spell.h 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. /*-------------------------------------------------------------------------
  2. *
  3. * spell.h
  4. *
  5. * Declarations for ISpell dictionary
  6. *
  7. * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
  8. *
  9. * src/include/tsearch/dicts/spell.h
  10. *
  11. *-------------------------------------------------------------------------
  12. */
  13. #ifndef __SPELL_H__
  14. #define __SPELL_H__
  15. #include "regex/regex.h"
  16. #include "tsearch/dicts/regis.h"
  17. #include "tsearch/ts_public.h"
  18. /*
  19. * SPNode and SPNodeData are used to represent prefix tree (Trie) to store
  20. * a words list.
  21. */
  22. struct SPNode;
  23. typedef struct
  24. {
  25. uint32 val:8,
  26. isword:1,
  27. /* Stores compound flags listed below */
  28. compoundflag:4,
  29. /* Reference to an entry of the AffixData field */
  30. affix:19;
  31. struct SPNode *node;
  32. } SPNodeData;
  33. /*
  34. * Names of FF_ are correlated with Hunspell options in affix file
  35. * http://hunspell.sourceforge.net/
  36. */
  37. #define FF_COMPOUNDONLY 0x01
  38. #define FF_COMPOUNDBEGIN 0x02
  39. #define FF_COMPOUNDMIDDLE 0x04
  40. #define FF_COMPOUNDLAST 0x08
  41. #define FF_COMPOUNDFLAG ( FF_COMPOUNDBEGIN | FF_COMPOUNDMIDDLE | \
  42. FF_COMPOUNDLAST )
  43. #define FF_COMPOUNDFLAGMASK 0x0f
  44. typedef struct SPNode
  45. {
  46. uint32 length;
  47. SPNodeData data[FLEXIBLE_ARRAY_MEMBER];
  48. } SPNode;
  49. #define SPNHDRSZ (offsetof(SPNode,data))
  50. /*
  51. * Represents an entry in a words list.
  52. */
  53. typedef struct spell_struct
  54. {
  55. union
  56. {
  57. /*
  58. * flag is filled in by NIImportDictionary(). After
  59. * NISortDictionary(), d is used instead of flag.
  60. */
  61. char *flag;
  62. /* d is used in mkSPNode() */
  63. struct
  64. {
  65. /* Reference to an entry of the AffixData field */
  66. int affix;
  67. /* Length of the word */
  68. int len;
  69. } d;
  70. } p;
  71. char word[FLEXIBLE_ARRAY_MEMBER];
  72. } SPELL;
  73. #define SPELLHDRSZ (offsetof(SPELL, word))
  74. /*
  75. * Represents an entry in an affix list.
  76. */
  77. typedef struct aff_struct
  78. {
  79. char *flag;
  80. /* FF_SUFFIX or FF_PREFIX */
  81. uint32 type:1,
  82. flagflags:7,
  83. issimple:1,
  84. isregis:1,
  85. replen:14;
  86. char *find;
  87. char *repl;
  88. union
  89. {
  90. regex_t regex;
  91. Regis regis;
  92. } reg;
  93. } AFFIX;
  94. /*
  95. * affixes use dictionary flags too
  96. */
  97. #define FF_COMPOUNDPERMITFLAG 0x10
  98. #define FF_COMPOUNDFORBIDFLAG 0x20
  99. #define FF_CROSSPRODUCT 0x40
  100. /*
  101. * Don't change the order of these. Initialization sorts by these,
  102. * and expects prefixes to come first after sorting.
  103. */
  104. #define FF_SUFFIX 1
  105. #define FF_PREFIX 0
  106. /*
  107. * AffixNode and AffixNodeData are used to represent prefix tree (Trie) to store
  108. * an affix list.
  109. */
  110. struct AffixNode;
  111. typedef struct
  112. {
  113. uint32 val:8,
  114. naff:24;
  115. AFFIX **aff;
  116. struct AffixNode *node;
  117. } AffixNodeData;
  118. typedef struct AffixNode
  119. {
  120. uint32 isvoid:1,
  121. length:31;
  122. AffixNodeData data[FLEXIBLE_ARRAY_MEMBER];
  123. } AffixNode;
  124. #define ANHRDSZ (offsetof(AffixNode, data))
  125. typedef struct
  126. {
  127. char *affix;
  128. int len;
  129. bool issuffix;
  130. } CMPDAffix;
  131. /*
  132. * Type of encoding affix flags in Hunspell dictionaries
  133. */
  134. typedef enum
  135. {
  136. FM_CHAR, /* one character (like ispell) */
  137. FM_LONG, /* two characters */
  138. FM_NUM /* number, >= 0 and < 65536 */
  139. } FlagMode;
  140. /*
  141. * Structure to store Hunspell options. Flag representation depends on flag
  142. * type. These flags are about support of compound words.
  143. */
  144. typedef struct CompoundAffixFlag
  145. {
  146. union
  147. {
  148. /* Flag name if flagMode is FM_CHAR or FM_LONG */
  149. char *s;
  150. /* Flag name if flagMode is FM_NUM */
  151. uint32 i;
  152. } flag;
  153. /* we don't have a bsearch_arg version, so, copy FlagMode */
  154. FlagMode flagMode;
  155. uint32 value;
  156. } CompoundAffixFlag;
  157. #define FLAGNUM_MAXSIZE (1 << 16)
  158. typedef struct
  159. {
  160. int maffixes;
  161. int naffixes;
  162. AFFIX *Affix;
  163. AffixNode *Suffix;
  164. AffixNode *Prefix;
  165. SPNode *Dictionary;
  166. /* Array of sets of affixes */
  167. char **AffixData;
  168. int lenAffixData;
  169. int nAffixData;
  170. bool useFlagAliases;
  171. CMPDAffix *CompoundAffix;
  172. bool usecompound;
  173. FlagMode flagMode;
  174. /*
  175. * All follow fields are actually needed only for initialization
  176. */
  177. /* Array of Hunspell options in affix file */
  178. CompoundAffixFlag *CompoundAffixFlags;
  179. /* number of entries in CompoundAffixFlags array */
  180. int nCompoundAffixFlag;
  181. /* allocated length of CompoundAffixFlags array */
  182. int mCompoundAffixFlag;
  183. /*
  184. * Remaining fields are only used during dictionary construction; they are
  185. * set up by NIStartBuild and cleared by NIFinishBuild.
  186. */
  187. MemoryContext buildCxt; /* temp context for construction */
  188. /* Temporary array of all words in the dict file */
  189. SPELL **Spell;
  190. int nspell; /* number of valid entries in Spell array */
  191. int mspell; /* allocated length of Spell array */
  192. /* These are used to allocate "compact" data without palloc overhead */
  193. char *firstfree; /* first free address (always maxaligned) */
  194. size_t avail; /* free space remaining at firstfree */
  195. } IspellDict;
  196. extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word);
  197. extern void NIStartBuild(IspellDict *Conf);
  198. extern void NIImportAffixes(IspellDict *Conf, const char *filename);
  199. extern void NIImportDictionary(IspellDict *Conf, const char *filename);
  200. extern void NISortDictionary(IspellDict *Conf);
  201. extern void NISortAffixes(IspellDict *Conf);
  202. extern void NIFinishBuild(IspellDict *Conf);
  203. #endif