123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359 |
- /*-------------------------------------------------------------------------
- *
- * ts_utils.h
- * helper utilities for tsearch
- *
- * Copyright (c) 1998-2016, PostgreSQL Global Development Group
- *
- * src/include/tsearch/ts_utils.h
- *
- *-------------------------------------------------------------------------
- */
- #ifndef _PG_TS_UTILS_H_
- #define _PG_TS_UTILS_H_
- #include "nodes/pg_list.h"
- #include "tsearch/ts_public.h"
- #include "tsearch/ts_type.h"
- /*
- * Common parse definitions for tsvector and tsquery
- */
- /* tsvector parser support. */
- struct TSVectorParseStateData; /* opaque struct in tsvector_parser.c */
- typedef struct TSVectorParseStateData *TSVectorParseState;
- extern TSVectorParseState init_tsvector_parser(char *input,
- bool oprisdelim,
- bool is_tsquery);
- extern void reset_tsvector_parser(TSVectorParseState state, char *input);
- extern bool gettoken_tsvector(TSVectorParseState state,
- char **token, int *len,
- WordEntryPos **pos, int *poslen,
- char **endptr);
- extern void close_tsvector_parser(TSVectorParseState state);
- /* parse_tsquery */
- struct TSQueryParserStateData; /* private in backend/utils/adt/tsquery.c */
- typedef struct TSQueryParserStateData *TSQueryParserState;
- typedef void (*PushFunction) (Datum opaque, TSQueryParserState state,
- char *token, int tokenlen,
- int16 tokenweights, /* bitmap as described
- * in QueryOperand
- * struct */
- bool prefix);
- extern TSQuery parse_tsquery(char *buf,
- PushFunction pushval,
- Datum opaque, bool isplain);
- /* Functions for use by PushFunction implementations */
- extern void pushValue(TSQueryParserState state,
- char *strval, int lenval, int16 weight, bool prefix);
- extern void pushStop(TSQueryParserState state);
- extern void pushOperator(TSQueryParserState state, int8 oper, int16 distance);
- /*
- * parse plain text and lexize words
- */
- typedef struct
- {
- uint16 len;
- uint16 nvariant;
- union
- {
- uint16 pos;
- /*
- * When apos array is used, apos[0] is the number of elements in the
- * array (excluding apos[0]), and alen is the allocated size of the
- * array.
- */
- uint16 *apos;
- } pos;
- uint16 flags; /* currently, only TSL_PREFIX */
- char *word;
- uint32 alen;
- } ParsedWord;
- typedef struct
- {
- ParsedWord *words;
- int32 lenwords;
- int32 curwords;
- int32 pos;
- } ParsedText;
- extern void parsetext(Oid cfgId, ParsedText *prs, char *buf, int32 buflen);
- /*
- * headline framework, flow in common to generate:
- * 1 parse text with hlparsetext
- * 2 parser-specific function to find part
- * 3 generateHeadline to generate result text
- */
- extern void hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query,
- char *buf, int32 buflen);
- extern text *generateHeadline(HeadlineParsedText *prs);
- /*
- * TSQuery execution support
- *
- * TS_execute() executes a tsquery against data that can be represented in
- * various forms. The TSExecuteCallback callback function is called to check
- * whether a given primitive tsquery value is matched in the data.
- */
- /*
- * struct ExecPhraseData is passed to a TSExecuteCallback function if we need
- * lexeme position data (because of a phrase-match operator in the tsquery).
- * The callback should fill in position data when it returns true (success).
- * If it cannot return position data, it may leave "data" unchanged, but
- * then the caller of TS_execute() must pass the TS_EXEC_PHRASE_NO_POS flag
- * and must arrange for a later recheck with position data available.
- *
- * The reported lexeme positions must be sorted and unique. Callers must only
- * consult the position bits of the pos array, ie, WEP_GETPOS(data->pos[i]).
- * This allows the returned "pos" to point directly to the WordEntryPos
- * portion of a tsvector value. If "allocated" is true then the pos array
- * is palloc'd workspace and caller may free it when done.
- *
- * "negate" means that the pos array contains positions where the query does
- * not match, rather than positions where it does. "width" is positive when
- * the match is wider than one lexeme. Neither of these fields normally need
- * to be touched by TSExecuteCallback functions; they are used for
- * phrase-search processing within TS_execute.
- *
- * All fields of the ExecPhraseData struct are initially zeroed by caller.
- */
- typedef struct ExecPhraseData
- {
- int npos; /* number of positions reported */
- bool allocated; /* pos points to palloc'd data? */
- bool negate; /* positions are where query is NOT matched */
- WordEntryPos *pos; /* ordered, non-duplicate lexeme positions */
- int width; /* width of match in lexemes, less 1 */
- } ExecPhraseData;
- /*
- * Signature for TSQuery lexeme check functions
- *
- * arg: opaque value passed through from caller of TS_execute
- * val: lexeme to test for presence of
- * data: to be filled with lexeme positions; NULL if position data not needed
- *
- * Return TRUE if lexeme is present in data, else FALSE. If data is not
- * NULL, it should be filled with lexeme positions, but function can leave
- * it as zeroes if position data is not available.
- */
- typedef bool (*TSExecuteCallback) (void *arg, QueryOperand *val,
- ExecPhraseData *data);
- /*
- * Flag bits for TS_execute
- */
- #define TS_EXEC_EMPTY (0x00)
- /*
- * If TS_EXEC_CALC_NOT is not set, then NOT expressions are automatically
- * evaluated to be true. Useful in cases where NOT cannot be accurately
- * computed (GiST) or it isn't important (ranking). From TS_execute's
- * perspective, !CALC_NOT means that the TSExecuteCallback function might
- * return false-positive indications of a lexeme's presence.
- */
- #define TS_EXEC_CALC_NOT (0x01)
- /*
- * If TS_EXEC_PHRASE_NO_POS is set, allow OP_PHRASE to be executed lossily
- * in the absence of position information: a TRUE result indicates that the
- * phrase might be present. Without this flag, OP_PHRASE always returns
- * false if lexeme position information is not available.
- */
- #define TS_EXEC_PHRASE_NO_POS (0x02)
- /* Obsolete spelling of TS_EXEC_PHRASE_NO_POS: */
- #define TS_EXEC_PHRASE_AS_AND TS_EXEC_PHRASE_NO_POS
- extern bool TS_execute(QueryItem *curitem, void *arg, uint32 flags,
- TSExecuteCallback chkcond);
- extern bool tsquery_requires_match(QueryItem *curitem);
- /*
- * to_ts* - text transformation to tsvector, tsquery
- */
- extern TSVector make_tsvector(ParsedText *prs);
- extern int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix);
- extern Datum to_tsvector_byid(PG_FUNCTION_ARGS);
- extern Datum to_tsvector(PG_FUNCTION_ARGS);
- extern Datum to_tsquery_byid(PG_FUNCTION_ARGS);
- extern Datum to_tsquery(PG_FUNCTION_ARGS);
- extern Datum plainto_tsquery_byid(PG_FUNCTION_ARGS);
- extern Datum plainto_tsquery(PG_FUNCTION_ARGS);
- extern Datum phraseto_tsquery_byid(PG_FUNCTION_ARGS);
- extern Datum phraseto_tsquery(PG_FUNCTION_ARGS);
- /*
- * GiST support function
- */
- extern Datum gtsvector_compress(PG_FUNCTION_ARGS);
- extern Datum gtsvector_decompress(PG_FUNCTION_ARGS);
- extern Datum gtsvector_consistent(PG_FUNCTION_ARGS);
- extern Datum gtsvector_union(PG_FUNCTION_ARGS);
- extern Datum gtsvector_same(PG_FUNCTION_ARGS);
- extern Datum gtsvector_penalty(PG_FUNCTION_ARGS);
- extern Datum gtsvector_picksplit(PG_FUNCTION_ARGS);
- extern Datum gtsvector_consistent_oldsig(PG_FUNCTION_ARGS);
- /*
- * IO functions for pseudotype gtsvector
- * used internally in tsvector GiST opclass
- */
- extern Datum gtsvectorin(PG_FUNCTION_ARGS);
- extern Datum gtsvectorout(PG_FUNCTION_ARGS);
- /*
- * GIN support function
- */
- extern Datum gin_extract_tsvector(PG_FUNCTION_ARGS);
- extern Datum gin_cmp_tslexeme(PG_FUNCTION_ARGS);
- extern Datum gin_cmp_prefix(PG_FUNCTION_ARGS);
- extern Datum gin_extract_tsquery(PG_FUNCTION_ARGS);
- extern Datum gin_tsquery_consistent(PG_FUNCTION_ARGS);
- extern Datum gin_tsquery_triconsistent(PG_FUNCTION_ARGS);
- extern Datum gin_extract_tsvector_2args(PG_FUNCTION_ARGS);
- extern Datum gin_extract_tsquery_5args(PG_FUNCTION_ARGS);
- extern Datum gin_tsquery_consistent_6args(PG_FUNCTION_ARGS);
- extern Datum gin_extract_tsquery_oldsig(PG_FUNCTION_ARGS);
- extern Datum gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS);
- /*
- * Possible strategy numbers for indexes
- * TSearchStrategyNumber - (tsvector|text) @@ tsquery
- * TSearchWithClassStrategyNumber - tsvector @@@ tsquery
- */
- #define TSearchStrategyNumber 1
- #define TSearchWithClassStrategyNumber 2
- /*
- * TSQuery Utilities
- */
- extern QueryItem *clean_NOT(QueryItem *ptr, int32 *len);
- extern TSQuery cleanup_tsquery_stopwords(TSQuery in);
- typedef struct QTNode
- {
- QueryItem *valnode;
- uint32 flags;
- int32 nchild;
- char *word;
- uint32 sign;
- struct QTNode **child;
- } QTNode;
- /* bits in QTNode.flags */
- #define QTN_NEEDFREE 0x01
- #define QTN_NOCHANGE 0x02
- #define QTN_WORDFREE 0x04
- typedef uint64 TSQuerySign;
- #define TSQS_SIGLEN (sizeof(TSQuerySign)*BITS_PER_BYTE)
- #define TSQuerySignGetDatum(X) Int64GetDatum((int64) (X))
- #define DatumGetTSQuerySign(X) ((TSQuerySign) DatumGetInt64(X))
- #define PG_RETURN_TSQUERYSIGN(X) return TSQuerySignGetDatum(X)
- #define PG_GETARG_TSQUERYSIGN(n) DatumGetTSQuerySign(PG_GETARG_DATUM(n))
- extern QTNode *QT2QTN(QueryItem *in, char *operand);
- extern TSQuery QTN2QT(QTNode *in);
- extern void QTNFree(QTNode *in);
- extern void QTNSort(QTNode *in);
- extern void QTNTernary(QTNode *in);
- extern void QTNBinary(QTNode *in);
- extern int QTNodeCompare(QTNode *an, QTNode *bn);
- extern QTNode *QTNCopy(QTNode *in);
- extern void QTNClearFlags(QTNode *in, uint32 flags);
- extern bool QTNEq(QTNode *a, QTNode *b);
- extern TSQuerySign makeTSQuerySign(TSQuery a);
- extern QTNode *findsubquery(QTNode *root, QTNode *ex, QTNode *subs,
- bool *isfind);
- /*
- * TSQuery GiST support
- */
- extern Datum gtsquery_compress(PG_FUNCTION_ARGS);
- extern Datum gtsquery_decompress(PG_FUNCTION_ARGS);
- extern Datum gtsquery_consistent(PG_FUNCTION_ARGS);
- extern Datum gtsquery_union(PG_FUNCTION_ARGS);
- extern Datum gtsquery_same(PG_FUNCTION_ARGS);
- extern Datum gtsquery_penalty(PG_FUNCTION_ARGS);
- extern Datum gtsquery_picksplit(PG_FUNCTION_ARGS);
- extern Datum gtsquery_consistent_oldsig(PG_FUNCTION_ARGS);
- /*
- * Parser interface to SQL
- */
- extern Datum ts_token_type_byid(PG_FUNCTION_ARGS);
- extern Datum ts_token_type_byname(PG_FUNCTION_ARGS);
- extern Datum ts_parse_byid(PG_FUNCTION_ARGS);
- extern Datum ts_parse_byname(PG_FUNCTION_ARGS);
- /*
- * Default word parser
- */
- extern Datum prsd_start(PG_FUNCTION_ARGS);
- extern Datum prsd_nexttoken(PG_FUNCTION_ARGS);
- extern Datum prsd_end(PG_FUNCTION_ARGS);
- extern Datum prsd_headline(PG_FUNCTION_ARGS);
- extern Datum prsd_lextype(PG_FUNCTION_ARGS);
- /*
- * Dictionary interface to SQL
- */
- extern Datum ts_lexize(PG_FUNCTION_ARGS);
- /*
- * Simple built-in dictionary
- */
- extern Datum dsimple_init(PG_FUNCTION_ARGS);
- extern Datum dsimple_lexize(PG_FUNCTION_ARGS);
- /*
- * Synonym built-in dictionary
- */
- extern Datum dsynonym_init(PG_FUNCTION_ARGS);
- extern Datum dsynonym_lexize(PG_FUNCTION_ARGS);
- /*
- * ISpell dictionary
- */
- extern Datum dispell_init(PG_FUNCTION_ARGS);
- extern Datum dispell_lexize(PG_FUNCTION_ARGS);
- /*
- * Thesaurus
- */
- extern Datum thesaurus_init(PG_FUNCTION_ARGS);
- extern Datum thesaurus_lexize(PG_FUNCTION_ARGS);
- /*
- * headline
- */
- extern Datum ts_headline_byid_opt(PG_FUNCTION_ARGS);
- extern Datum ts_headline_byid(PG_FUNCTION_ARGS);
- extern Datum ts_headline(PG_FUNCTION_ARGS);
- extern Datum ts_headline_opt(PG_FUNCTION_ARGS);
- /*
- * current cfg
- */
- extern Datum get_current_ts_config(PG_FUNCTION_ARGS);
- #endif /* _PG_TS_UTILS_H_ */
|