LCOV - code coverage report
Current view: top level - src/include/tsearch - ts_utils.h (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 4 4 100.0 %
Date: 2025-01-18 03:14:54 Functions: 2 2 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * ts_utils.h
       4             :  *    helper utilities for tsearch
       5             :  *
       6             :  * Copyright (c) 1998-2025, PostgreSQL Global Development Group
       7             :  *
       8             :  * src/include/tsearch/ts_utils.h
       9             :  *
      10             :  *-------------------------------------------------------------------------
      11             :  */
      12             : #ifndef _PG_TS_UTILS_H_
      13             : #define _PG_TS_UTILS_H_
      14             : 
      15             : #include "nodes/pg_list.h"
      16             : #include "tsearch/ts_public.h"
      17             : #include "tsearch/ts_type.h"
      18             : 
      19             : /*
      20             :  * Common parse definitions for tsvector and tsquery
      21             :  */
      22             : 
      23             : /* tsvector parser support. */
      24             : 
      25             : struct TSVectorParseStateData;  /* opaque struct in tsvector_parser.c */
      26             : typedef struct TSVectorParseStateData *TSVectorParseState;
      27             : 
      28             : /* flag bits that can be passed to init_tsvector_parser: */
      29             : #define P_TSV_OPR_IS_DELIM  (1 << 0)
      30             : #define P_TSV_IS_TSQUERY    (1 << 1)
      31             : #define P_TSV_IS_WEB        (1 << 2)
      32             : 
      33             : extern TSVectorParseState init_tsvector_parser(char *input, int flags,
      34             :                                                Node *escontext);
      35             : extern void reset_tsvector_parser(TSVectorParseState state, char *input);
      36             : extern bool gettoken_tsvector(TSVectorParseState state,
      37             :                               char **strval, int *lenval,
      38             :                               WordEntryPos **pos_ptr, int *poslen,
      39             :                               char **endptr);
      40             : extern void close_tsvector_parser(TSVectorParseState state);
      41             : 
      42             : /* phrase operator begins with '<' */
      43             : #define ISOPERATOR(x) \
      44             :     ( pg_mblen(x) == 1 && ( *(x) == '!' ||  \
      45             :                             *(x) == '&' ||  \
      46             :                             *(x) == '|' ||  \
      47             :                             *(x) == '(' ||  \
      48             :                             *(x) == ')' ||  \
      49             :                             *(x) == '<'      \
      50             :                           ) )
      51             : 
      52             : /* parse_tsquery */
      53             : 
      54             : struct TSQueryParserStateData;  /* private in backend/utils/adt/tsquery.c */
      55             : typedef struct TSQueryParserStateData *TSQueryParserState;
      56             : 
      57             : typedef void (*PushFunction) (Datum opaque, TSQueryParserState state,
      58             :                               char *token, int tokenlen,
      59             :                               int16 tokenweights,   /* bitmap as described in
      60             :                                                      * QueryOperand struct */
      61             :                               bool prefix);
      62             : 
      63             : /* flag bits that can be passed to parse_tsquery: */
      64             : #define P_TSQ_PLAIN     (1 << 0)
      65             : #define P_TSQ_WEB       (1 << 1)
      66             : 
      67             : extern TSQuery parse_tsquery(char *buf,
      68             :                              PushFunction pushval,
      69             :                              Datum opaque,
      70             :                              int flags,
      71             :                              Node *escontext);
      72             : 
      73             : /* Functions for use by PushFunction implementations */
      74             : extern void pushValue(TSQueryParserState state,
      75             :                       char *strval, int lenval, int16 weight, bool prefix);
      76             : extern void pushStop(TSQueryParserState state);
      77             : extern void pushOperator(TSQueryParserState state, int8 oper, int16 distance);
      78             : 
      79             : /*
      80             :  * parse plain text and lexize words
      81             :  */
      82             : typedef struct
      83             : {
      84             :     uint16      flags;          /* currently, only TSL_PREFIX */
      85             :     uint16      len;
      86             :     uint16      nvariant;
      87             :     uint16      alen;
      88             :     union
      89             :     {
      90             :         uint16      pos;
      91             : 
      92             :         /*
      93             :          * When apos array is used, apos[0] is the number of elements in the
      94             :          * array (excluding apos[0]), and alen is the allocated size of the
      95             :          * array.  We do not allow more than MAXNUMPOS array elements.
      96             :          */
      97             :         uint16     *apos;
      98             :     }           pos;
      99             :     char       *word;
     100             : } ParsedWord;
     101             : 
     102             : typedef struct
     103             : {
     104             :     ParsedWord *words;
     105             :     int32       lenwords;
     106             :     int32       curwords;
     107             :     int32       pos;
     108             : } ParsedText;
     109             : 
     110             : extern void parsetext(Oid cfgId, ParsedText *prs, char *buf, int32 buflen);
     111             : 
     112             : /*
     113             :  * headline framework, flow in common to generate:
     114             :  *  1 parse text with hlparsetext
     115             :  *  2 parser-specific function to find part
     116             :  *  3 generateHeadline to generate result text
     117             :  */
     118             : 
     119             : extern void hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query,
     120             :                         char *buf, int32 buflen);
     121             : extern text *generateHeadline(HeadlineParsedText *prs);
     122             : 
     123             : /*
     124             :  * TSQuery execution support
     125             :  *
     126             :  * TS_execute() executes a tsquery against data that can be represented in
     127             :  * various forms.  The TSExecuteCallback callback function is called to check
     128             :  * whether a given primitive tsquery value is matched in the data.
     129             :  */
     130             : 
     131             : /* TS_execute requires ternary logic to handle NOT with phrase matches */
     132             : typedef enum
     133             : {
     134             :     TS_NO,                      /* definitely no match */
     135             :     TS_YES,                     /* definitely does match */
     136             :     TS_MAYBE,                   /* can't verify match for lack of pos data */
     137             : } TSTernaryValue;
     138             : 
     139             : /*
     140             :  * struct ExecPhraseData is passed to a TSExecuteCallback function if we need
     141             :  * lexeme position data (because of a phrase-match operator in the tsquery).
     142             :  * The callback should fill in position data when it returns TS_YES (success).
     143             :  * If it cannot return position data, it should leave "data" unchanged and
     144             :  * return TS_MAYBE.  The caller of TS_execute() must then arrange for a later
     145             :  * recheck with position data available.
     146             :  *
     147             :  * The reported lexeme positions must be sorted and unique.  Callers must only
     148             :  * consult the position bits of the pos array, ie, WEP_GETPOS(data->pos[i]).
     149             :  * This allows the returned "pos" to point directly to the WordEntryPos
     150             :  * portion of a tsvector value.  If "allocated" is true then the pos array
     151             :  * is palloc'd workspace and caller may free it when done.
     152             :  *
     153             :  * "negate" means that the pos array contains positions where the query does
     154             :  * not match, rather than positions where it does.  "width" is positive when
     155             :  * the match is wider than one lexeme.  Neither of these fields normally need
     156             :  * to be touched by TSExecuteCallback functions; they are used for
     157             :  * phrase-search processing within TS_execute.
     158             :  *
     159             :  * All fields of the ExecPhraseData struct are initially zeroed by caller.
     160             :  */
     161             : typedef struct ExecPhraseData
     162             : {
     163             :     int         npos;           /* number of positions reported */
     164             :     bool        allocated;      /* pos points to palloc'd data? */
     165             :     bool        negate;         /* positions are where query is NOT matched */
     166             :     WordEntryPos *pos;          /* ordered, non-duplicate lexeme positions */
     167             :     int         width;          /* width of match in lexemes, less 1 */
     168             : } ExecPhraseData;
     169             : 
     170             : /*
     171             :  * Signature for TSQuery lexeme check functions
     172             :  *
     173             :  * arg: opaque value passed through from caller of TS_execute
     174             :  * val: lexeme to test for presence of
     175             :  * data: to be filled with lexeme positions; NULL if position data not needed
     176             :  *
     177             :  * Return TS_YES if lexeme is present in data, TS_MAYBE if it might be
     178             :  * present, TS_NO if it definitely is not present.  If data is not NULL,
     179             :  * it must be filled with lexeme positions if available.  If position data
     180             :  * is not available, leave *data as zeroes and return TS_MAYBE, never TS_YES.
     181             :  */
     182             : typedef TSTernaryValue (*TSExecuteCallback) (void *arg, QueryOperand *val,
     183             :                                              ExecPhraseData *data);
     184             : 
     185             : /*
     186             :  * Flag bits for TS_execute
     187             :  */
     188             : #define TS_EXEC_EMPTY           (0x00)
     189             : /*
     190             :  * If TS_EXEC_SKIP_NOT is set, then NOT sub-expressions are automatically
     191             :  * evaluated to be true.  This was formerly the default behavior.  It's now
     192             :  * deprecated because it tends to give silly answers, but some applications
     193             :  * might still have a use for it.
     194             :  */
     195             : #define TS_EXEC_SKIP_NOT        (0x01)
     196             : /*
     197             :  * If TS_EXEC_PHRASE_NO_POS is set, allow OP_PHRASE to be executed lossily
     198             :  * in the absence of position information: a true result indicates that the
     199             :  * phrase might be present.  Without this flag, OP_PHRASE always returns
     200             :  * false if lexeme position information is not available.
     201             :  */
     202             : #define TS_EXEC_PHRASE_NO_POS   (0x02)
     203             : 
     204             : extern bool TS_execute(QueryItem *curitem, void *arg, uint32 flags,
     205             :                        TSExecuteCallback chkcond);
     206             : extern TSTernaryValue TS_execute_ternary(QueryItem *curitem, void *arg,
     207             :                                          uint32 flags,
     208             :                                          TSExecuteCallback chkcond);
     209             : extern List *TS_execute_locations(QueryItem *curitem, void *arg,
     210             :                                   uint32 flags,
     211             :                                   TSExecuteCallback chkcond);
     212             : extern bool tsquery_requires_match(QueryItem *curitem);
     213             : 
     214             : /*
     215             :  * to_ts* - text transformation to tsvector, tsquery
     216             :  */
     217             : extern TSVector make_tsvector(ParsedText *prs);
     218             : extern int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix);
     219             : 
     220             : /*
     221             :  * Possible strategy numbers for indexes
     222             :  *    TSearchStrategyNumber  - (tsvector|text) @@ tsquery
     223             :  *    TSearchWithClassStrategyNumber  - tsvector @@@ tsquery
     224             :  */
     225             : #define TSearchStrategyNumber           1
     226             : #define TSearchWithClassStrategyNumber  2
     227             : 
     228             : /*
     229             :  * TSQuery Utilities
     230             :  */
     231             : extern QueryItem *clean_NOT(QueryItem *ptr, int32 *len);
     232             : extern TSQuery cleanup_tsquery_stopwords(TSQuery in, bool noisy);
     233             : 
     234             : typedef struct QTNode
     235             : {
     236             :     QueryItem  *valnode;
     237             :     uint32      flags;
     238             :     int32       nchild;
     239             :     char       *word;
     240             :     uint32      sign;
     241             :     struct QTNode **child;
     242             : } QTNode;
     243             : 
     244             : /* bits in QTNode.flags */
     245             : #define QTN_NEEDFREE    0x01
     246             : #define QTN_NOCHANGE    0x02
     247             : #define QTN_WORDFREE    0x04
     248             : 
     249             : typedef uint64 TSQuerySign;
     250             : 
     251             : #define TSQS_SIGLEN  (sizeof(TSQuerySign)*BITS_PER_BYTE)
     252             : 
     253             : static inline Datum
     254          36 : TSQuerySignGetDatum(TSQuerySign X)
     255             : {
     256          36 :     return Int64GetDatum((int64) X);
     257             : }
     258             : 
     259             : static inline TSQuerySign
     260          84 : DatumGetTSQuerySign(Datum X)
     261             : {
     262          84 :     return (TSQuerySign) DatumGetInt64(X);
     263             : }
     264             : 
     265             : #define PG_RETURN_TSQUERYSIGN(X)    return TSQuerySignGetDatum(X)
     266             : #define PG_GETARG_TSQUERYSIGN(n)    DatumGetTSQuerySign(PG_GETARG_DATUM(n))
     267             : 
     268             : 
     269             : extern QTNode *QT2QTN(QueryItem *in, char *operand);
     270             : extern TSQuery QTN2QT(QTNode *in);
     271             : extern void QTNFree(QTNode *in);
     272             : extern void QTNSort(QTNode *in);
     273             : extern void QTNTernary(QTNode *in);
     274             : extern void QTNBinary(QTNode *in);
     275             : extern int  QTNodeCompare(QTNode *an, QTNode *bn);
     276             : extern QTNode *QTNCopy(QTNode *in);
     277             : extern void QTNClearFlags(QTNode *in, uint32 flags);
     278             : extern bool QTNEq(QTNode *a, QTNode *b);
     279             : extern TSQuerySign makeTSQuerySign(TSQuery a);
     280             : extern QTNode *findsubquery(QTNode *root, QTNode *ex, QTNode *subs,
     281             :                             bool *isfind);
     282             : 
     283             : #endif                          /* _PG_TS_UTILS_H_ */

Generated by: LCOV version 1.14