123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236 |
- /*-------------------------------------------------------------------------
- *
- * spell.h
- *
- * Declarations for ISpell dictionary
- *
- * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
- *
- * src/include/tsearch/dicts/spell.h
- *
- *-------------------------------------------------------------------------
- */
- #ifndef __SPELL_H__
- #define __SPELL_H__
- #include "regex/regex.h"
- #include "tsearch/dicts/regis.h"
- #include "tsearch/ts_public.h"
- /*
- * SPNode and SPNodeData are used to represent prefix tree (Trie) to store
- * a words list.
- */
- struct SPNode;
- typedef struct
- {
- uint32 val:8,
- isword:1,
- /* Stores compound flags listed below */
- compoundflag:4,
- /* Reference to an entry of the AffixData field */
- affix:19;
- struct SPNode *node;
- } SPNodeData;
- /*
- * Names of FF_ are correlated with Hunspell options in affix file
- * http://hunspell.sourceforge.net/
- */
- #define FF_COMPOUNDONLY 0x01
- #define FF_COMPOUNDBEGIN 0x02
- #define FF_COMPOUNDMIDDLE 0x04
- #define FF_COMPOUNDLAST 0x08
- #define FF_COMPOUNDFLAG ( FF_COMPOUNDBEGIN | FF_COMPOUNDMIDDLE | \
- FF_COMPOUNDLAST )
- #define FF_COMPOUNDFLAGMASK 0x0f
- typedef struct SPNode
- {
- uint32 length;
- SPNodeData data[FLEXIBLE_ARRAY_MEMBER];
- } SPNode;
- #define SPNHDRSZ (offsetof(SPNode,data))
- /*
- * Represents an entry in a words list.
- */
- typedef struct spell_struct
- {
- union
- {
- /*
- * flag is filled in by NIImportDictionary(). After
- * NISortDictionary(), d is used instead of flag.
- */
- char *flag;
- /* d is used in mkSPNode() */
- struct
- {
- /* Reference to an entry of the AffixData field */
- int affix;
- /* Length of the word */
- int len;
- } d;
- } p;
- char word[FLEXIBLE_ARRAY_MEMBER];
- } SPELL;
- #define SPELLHDRSZ (offsetof(SPELL, word))
- /*
- * Represents an entry in an affix list.
- */
- typedef struct aff_struct
- {
- char *flag;
- /* FF_SUFFIX or FF_PREFIX */
- uint32 type:1,
- flagflags:7,
- issimple:1,
- isregis:1,
- replen:14;
- char *find;
- char *repl;
- union
- {
- regex_t regex;
- Regis regis;
- } reg;
- } AFFIX;
- /*
- * affixes use dictionary flags too
- */
- #define FF_COMPOUNDPERMITFLAG 0x10
- #define FF_COMPOUNDFORBIDFLAG 0x20
- #define FF_CROSSPRODUCT 0x40
- /*
- * Don't change the order of these. Initialization sorts by these,
- * and expects prefixes to come first after sorting.
- */
- #define FF_SUFFIX 1
- #define FF_PREFIX 0
- /*
- * AffixNode and AffixNodeData are used to represent prefix tree (Trie) to store
- * an affix list.
- */
- struct AffixNode;
- typedef struct
- {
- uint32 val:8,
- naff:24;
- AFFIX **aff;
- struct AffixNode *node;
- } AffixNodeData;
- typedef struct AffixNode
- {
- uint32 isvoid:1,
- length:31;
- AffixNodeData data[FLEXIBLE_ARRAY_MEMBER];
- } AffixNode;
- #define ANHRDSZ (offsetof(AffixNode, data))
- typedef struct
- {
- char *affix;
- int len;
- bool issuffix;
- } CMPDAffix;
- /*
- * Type of encoding affix flags in Hunspell dictionaries
- */
- typedef enum
- {
- FM_CHAR, /* one character (like ispell) */
- FM_LONG, /* two characters */
- FM_NUM /* number, >= 0 and < 65536 */
- } FlagMode;
- /*
- * Structure to store Hunspell options. Flag representation depends on flag
- * type. These flags are about support of compound words.
- */
- typedef struct CompoundAffixFlag
- {
- union
- {
- /* Flag name if flagMode is FM_CHAR or FM_LONG */
- char *s;
- /* Flag name if flagMode is FM_NUM */
- uint32 i;
- } flag;
- /* we don't have a bsearch_arg version, so, copy FlagMode */
- FlagMode flagMode;
- uint32 value;
- } CompoundAffixFlag;
- #define FLAGNUM_MAXSIZE (1 << 16)
- typedef struct
- {
- int maffixes;
- int naffixes;
- AFFIX *Affix;
- AffixNode *Suffix;
- AffixNode *Prefix;
- SPNode *Dictionary;
- /* Array of sets of affixes */
- char **AffixData;
- int lenAffixData;
- int nAffixData;
- bool useFlagAliases;
- CMPDAffix *CompoundAffix;
- bool usecompound;
- FlagMode flagMode;
- /*
- * All follow fields are actually needed only for initialization
- */
- /* Array of Hunspell options in affix file */
- CompoundAffixFlag *CompoundAffixFlags;
- /* number of entries in CompoundAffixFlags array */
- int nCompoundAffixFlag;
- /* allocated length of CompoundAffixFlags array */
- int mCompoundAffixFlag;
- /*
- * Remaining fields are only used during dictionary construction; they are
- * set up by NIStartBuild and cleared by NIFinishBuild.
- */
- MemoryContext buildCxt; /* temp context for construction */
- /* Temporary array of all words in the dict file */
- SPELL **Spell;
- int nspell; /* number of valid entries in Spell array */
- int mspell; /* allocated length of Spell array */
- /* These are used to allocate "compact" data without palloc overhead */
- char *firstfree; /* first free address (always maxaligned) */
- size_t avail; /* free space remaining at firstfree */
- } IspellDict;
- extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word);
- extern void NIStartBuild(IspellDict *Conf);
- extern void NIImportAffixes(IspellDict *Conf, const char *filename);
- extern void NIImportDictionary(IspellDict *Conf, const char *filename);
- extern void NISortDictionary(IspellDict *Conf);
- extern void NISortAffixes(IspellDict *Conf);
- extern void NIFinishBuild(IspellDict *Conf);
- #endif
|