LCOV - code coverage report
Current view: top level - src/backend/tsearch - spell.c (source / functions) Hit Total Coverage
Test: PostgreSQL 15devel Lines: 1037 1126 92.1 %
Date: 2021-12-09 04:09:06 Functions: 47 47 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * spell.c
       4             :  *      Normalizing word with ISpell
       5             :  *
       6             :  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
       7             :  *
       8             :  * Ispell dictionary
       9             :  * -----------------
      10             :  *
      11             :  * Rules of dictionaries are defined in two files with .affix and .dict
      12             :  * extensions. They are used by spell checker programs Ispell and Hunspell.
      13             :  *
      14             :  * An .affix file declares morphological rules to get a basic form of words.
      15             :  * The format of an .affix file has different structure for Ispell and Hunspell
      16             :  * dictionaries. The Hunspell format is more complicated. But when an .affix
      17             :  * file is imported and compiled, it is stored in the same structure AffixNode.
      18             :  *
      19             :  * A .dict file stores a list of basic forms of words with references to
      20             :  * affix rules. The format of a .dict file has the same structure for Ispell
      21             :  * and Hunspell dictionaries.
      22             :  *
      23             :  * Compilation of a dictionary
      24             :  * ---------------------------
      25             :  *
      26             :  * A compiled dictionary is stored in the IspellDict structure. Compilation of
      27             :  * a dictionary is divided into the several steps:
      28             :  *  - NIImportDictionary() - stores each word of a .dict file in the
      29             :  *    temporary Spell field.
      30             :  *  - NIImportAffixes() - stores affix rules of an .affix file in the
      31             :  *    Affix field (not temporary) if an .affix file has the Ispell format.
      32             :  *    -> NIImportOOAffixes() - stores affix rules if an .affix file has the
      33             :  *       Hunspell format. The AffixData field is initialized if AF parameter
      34             :  *       is defined.
      35             :  *  - NISortDictionary() - builds a prefix tree (Trie) from the words list
      36             :  *    and stores it in the Dictionary field. The words list is got from the
      37             :  *    Spell field. The AffixData field is initialized if AF parameter is not
      38             :  *    defined.
      39             :  *  - NISortAffixes():
      40             :  *    - builds a list of compound affixes from the affix list and stores it
      41             :  *      in the CompoundAffix.
      42             :  *    - builds prefix trees (Trie) from the affix list for prefixes and suffixes
      43             :  *      and stores them in Suffix and Prefix fields.
      44             :  *    The affix list is got from the Affix field.
      45             :  *
      46             :  * Memory management
      47             :  * -----------------
      48             :  *
      49             :  * The IspellDict structure has the Spell field which is used only in compile
      50             :  * time. The Spell field stores a words list. It can take a lot of memory.
      51             :  * Therefore when a dictionary is compiled this field is cleared by
      52             :  * NIFinishBuild().
      53             :  *
      54             :  * All resources which should cleared by NIFinishBuild() is initialized using
      55             :  * tmpalloc() and tmpalloc0().
      56             :  *
      57             :  * IDENTIFICATION
      58             :  *    src/backend/tsearch/spell.c
      59             :  *
      60             :  *-------------------------------------------------------------------------
      61             :  */
      62             : 
      63             : #include "postgres.h"
      64             : 
      65             : #include "catalog/pg_collation.h"
      66             : #include "tsearch/dicts/spell.h"
      67             : #include "tsearch/ts_locale.h"
      68             : #include "utils/memutils.h"
      69             : 
      70             : 
      71             : /*
      72             :  * Initialization requires a lot of memory that's not needed
      73             :  * after the initialization is done.  During initialization,
      74             :  * CurrentMemoryContext is the long-lived memory context associated
      75             :  * with the dictionary cache entry.  We keep the short-lived stuff
      76             :  * in the Conf->buildCxt context.
      77             :  */
      78             : #define tmpalloc(sz)  MemoryContextAlloc(Conf->buildCxt, (sz))
      79             : #define tmpalloc0(sz)  MemoryContextAllocZero(Conf->buildCxt, (sz))
      80             : 
      81             : /*
      82             :  * Prepare for constructing an ISpell dictionary.
      83             :  *
      84             :  * The IspellDict struct is assumed to be zeroed when allocated.
      85             :  */
      86             : void
      87          94 : NIStartBuild(IspellDict *Conf)
      88             : {
      89             :     /*
      90             :      * The temp context is a child of CurTransactionContext, so that it will
      91             :      * go away automatically on error.
      92             :      */
      93          94 :     Conf->buildCxt = AllocSetContextCreate(CurTransactionContext,
      94             :                                            "Ispell dictionary init context",
      95             :                                            ALLOCSET_DEFAULT_SIZES);
      96          94 : }
      97             : 
      98             : /*
      99             :  * Clean up when dictionary construction is complete.
     100             :  */
     101             : void
     102          78 : NIFinishBuild(IspellDict *Conf)
     103             : {
     104             :     /* Release no-longer-needed temp memory */
     105          78 :     MemoryContextDelete(Conf->buildCxt);
     106             :     /* Just for cleanliness, zero the now-dangling pointers */
     107          78 :     Conf->buildCxt = NULL;
     108          78 :     Conf->Spell = NULL;
     109          78 :     Conf->firstfree = NULL;
     110          78 :     Conf->CompoundAffixFlags = NULL;
     111          78 : }
     112             : 
     113             : 
     114             : /*
     115             :  * "Compact" palloc: allocate without extra palloc overhead.
     116             :  *
     117             :  * Since we have no need to free the ispell data items individually, there's
     118             :  * not much value in the per-chunk overhead normally consumed by palloc.
     119             :  * Getting rid of it is helpful since ispell can allocate a lot of small nodes.
     120             :  *
     121             :  * We currently pre-zero all data allocated this way, even though some of it
     122             :  * doesn't need that.  The cpalloc and cpalloc0 macros are just documentation
     123             :  * to indicate which allocations actually require zeroing.
     124             :  */
     125             : #define COMPACT_ALLOC_CHUNK 8192    /* amount to get from palloc at once */
     126             : #define COMPACT_MAX_REQ     1024    /* must be < COMPACT_ALLOC_CHUNK */
     127             : 
     128             : static void *
     129        8782 : compact_palloc0(IspellDict *Conf, size_t size)
     130             : {
     131             :     void       *result;
     132             : 
     133             :     /* Should only be called during init */
     134             :     Assert(Conf->buildCxt != NULL);
     135             : 
     136             :     /* No point in this for large chunks */
     137        8782 :     if (size > COMPACT_MAX_REQ)
     138           0 :         return palloc0(size);
     139             : 
     140             :     /* Keep everything maxaligned */
     141        8782 :     size = MAXALIGN(size);
     142             : 
     143             :     /* Need more space? */
     144        8782 :     if (size > Conf->avail)
     145             :     {
     146          90 :         Conf->firstfree = palloc0(COMPACT_ALLOC_CHUNK);
     147          90 :         Conf->avail = COMPACT_ALLOC_CHUNK;
     148             :     }
     149             : 
     150        8782 :     result = (void *) Conf->firstfree;
     151        8782 :     Conf->firstfree += size;
     152        8782 :     Conf->avail -= size;
     153             : 
     154        8782 :     return result;
     155             : }
     156             : 
     157             : #define cpalloc(size) compact_palloc0(Conf, size)
     158             : #define cpalloc0(size) compact_palloc0(Conf, size)
     159             : 
     160             : static char *
     161        4680 : cpstrdup(IspellDict *Conf, const char *str)
     162             : {
     163        4680 :     char       *res = cpalloc(strlen(str) + 1);
     164             : 
     165        4680 :     strcpy(res, str);
     166        4680 :     return res;
     167             : }
     168             : 
     169             : 
     170             : /*
     171             :  * Apply lowerstr(), producing a temporary result (in the buildCxt).
     172             :  */
     173             : static char *
     174        4010 : lowerstr_ctx(IspellDict *Conf, const char *src)
     175             : {
     176             :     MemoryContext saveCtx;
     177             :     char       *dst;
     178             : 
     179        4010 :     saveCtx = MemoryContextSwitchTo(Conf->buildCxt);
     180        4010 :     dst = lowerstr(src);
     181        4010 :     MemoryContextSwitchTo(saveCtx);
     182             : 
     183        4010 :     return dst;
     184             : }
     185             : 
     186             : #define MAX_NORM 1024
     187             : #define MAXNORMLEN 256
     188             : 
     189             : #define STRNCMP(s,p)    strncmp( (s), (p), strlen(p) )
     190             : #define GETWCHAR(W,L,N,T) ( ((const uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] )
     191             : #define GETCHAR(A,N,T)    GETWCHAR( (A)->repl, (A)->replen, N, T )
     192             : 
     193             : static char *VoidString = "";
     194             : 
     195             : static int
     196        2052 : cmpspell(const void *s1, const void *s2)
     197             : {
     198        2052 :     return strcmp((*(SPELL *const *) s1)->word, (*(SPELL *const *) s2)->word);
     199             : }
     200             : 
     201             : static int
     202        1600 : cmpspellaffix(const void *s1, const void *s2)
     203             : {
     204        3200 :     return strcmp((*(SPELL *const *) s1)->p.flag,
     205        1600 :                   (*(SPELL *const *) s2)->p.flag);
     206             : }
     207             : 
     208             : static int
     209        2782 : cmpcmdflag(const void *f1, const void *f2)
     210             : {
     211        2782 :     CompoundAffixFlag *fv1 = (CompoundAffixFlag *) f1,
     212        2782 :                *fv2 = (CompoundAffixFlag *) f2;
     213             : 
     214             :     Assert(fv1->flagMode == fv2->flagMode);
     215             : 
     216        2782 :     if (fv1->flagMode == FM_NUM)
     217             :     {
     218         542 :         if (fv1->flag.i == fv2->flag.i)
     219          80 :             return 0;
     220             : 
     221         462 :         return (fv1->flag.i > fv2->flag.i) ? 1 : -1;
     222             :     }
     223             : 
     224        2240 :     return strcmp(fv1->flag.s, fv2->flag.s);
     225             : }
     226             : 
     227             : static char *
     228         822 : findchar(char *str, int c)
     229             : {
     230        6052 :     while (*str)
     231             :     {
     232        5962 :         if (t_iseq(str, c))
     233         732 :             return str;
     234        5230 :         str += pg_mblen(str);
     235             :     }
     236             : 
     237          90 :     return NULL;
     238             : }
     239             : 
     240             : static char *
     241          30 : findchar2(char *str, int c1, int c2)
     242             : {
     243         630 :     while (*str)
     244             :     {
     245         630 :         if (t_iseq(str, c1) || t_iseq(str, c2))
     246          30 :             return str;
     247         600 :         str += pg_mblen(str);
     248             :     }
     249             : 
     250           0 :     return NULL;
     251             : }
     252             : 
     253             : 
     254             : /* backward string compare for suffix tree operations */
     255             : static int
     256         818 : strbcmp(const unsigned char *s1, const unsigned char *s2)
     257             : {
     258         818 :     int         l1 = strlen((const char *) s1) - 1,
     259         818 :                 l2 = strlen((const char *) s2) - 1;
     260             : 
     261        1094 :     while (l1 >= 0 && l2 >= 0)
     262             :     {
     263         856 :         if (s1[l1] < s2[l2])
     264         186 :             return -1;
     265         670 :         if (s1[l1] > s2[l2])
     266         394 :             return 1;
     267         276 :         l1--;
     268         276 :         l2--;
     269             :     }
     270         238 :     if (l1 < l2)
     271          64 :         return -1;
     272         174 :     if (l1 > l2)
     273         146 :         return 1;
     274             : 
     275          28 :     return 0;
     276             : }
     277             : 
     278             : static int
     279          28 : strbncmp(const unsigned char *s1, const unsigned char *s2, size_t count)
     280             : {
     281          28 :     int         l1 = strlen((const char *) s1) - 1,
     282          28 :                 l2 = strlen((const char *) s2) - 1,
     283          28 :                 l = count;
     284             : 
     285          42 :     while (l1 >= 0 && l2 >= 0 && l > 0)
     286             :     {
     287          28 :         if (s1[l1] < s2[l2])
     288          14 :             return -1;
     289          14 :         if (s1[l1] > s2[l2])
     290           0 :             return 1;
     291          14 :         l1--;
     292          14 :         l2--;
     293          14 :         l--;
     294             :     }
     295          14 :     if (l == 0)
     296          14 :         return 0;
     297           0 :     if (l1 < l2)
     298           0 :         return -1;
     299           0 :     if (l1 > l2)
     300           0 :         return 1;
     301           0 :     return 0;
     302             : }
     303             : 
     304             : /*
     305             :  * Compares affixes.
     306             :  * First compares the type of an affix. Prefixes should go before affixes.
     307             :  * If types are equal then compares replaceable string.
     308             :  */
     309             : static int
     310        1384 : cmpaffix(const void *s1, const void *s2)
     311             : {
     312        1384 :     const AFFIX *a1 = (const AFFIX *) s1;
     313        1384 :     const AFFIX *a2 = (const AFFIX *) s2;
     314             : 
     315        1384 :     if (a1->type < a2->type)
     316         316 :         return -1;
     317        1068 :     if (a1->type > a2->type)
     318          94 :         return 1;
     319         974 :     if (a1->type == FF_PREFIX)
     320         156 :         return strcmp(a1->repl, a2->repl);
     321             :     else
     322         818 :         return strbcmp((const unsigned char *) a1->repl,
     323         818 :                        (const unsigned char *) a2->repl);
     324             : }
     325             : 
     326             : /*
     327             :  * Gets an affix flag from the set of affix flags (sflagset).
     328             :  *
     329             :  * Several flags can be stored in a single string. Flags can be represented by:
     330             :  * - 1 character (FM_CHAR). A character may be Unicode.
     331             :  * - 2 characters (FM_LONG). A character may be Unicode.
     332             :  * - numbers from 1 to 65000 (FM_NUM).
     333             :  *
     334             :  * Depending on the flagMode an affix string can have the following format:
     335             :  * - FM_CHAR: ABCD
     336             :  *   Here we have 4 flags: A, B, C and D
     337             :  * - FM_LONG: ABCDE*
     338             :  *   Here we have 3 flags: AB, CD and E*
     339             :  * - FM_NUM: 200,205,50
     340             :  *   Here we have 3 flags: 200, 205 and 50
     341             :  *
     342             :  * Conf: current dictionary.
     343             :  * sflagset: the set of affix flags. Returns a reference to the start of a next
     344             :  *           affix flag.
     345             :  * sflag: returns an affix flag from sflagset.
     346             :  */
     347             : static void
     348        4248 : getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
     349             : {
     350             :     int32       s;
     351             :     char       *next,
     352        4248 :                *sbuf = *sflagset;
     353             :     int         maxstep;
     354        4248 :     bool        stop = false;
     355        4248 :     bool        met_comma = false;
     356             : 
     357        4248 :     maxstep = (Conf->flagMode == FM_LONG) ? 2 : 1;
     358             : 
     359        5542 :     while (**sflagset)
     360             :     {
     361        5542 :         switch (Conf->flagMode)
     362             :         {
     363        4746 :             case FM_LONG:
     364             :             case FM_CHAR:
     365        4746 :                 COPYCHAR(sflag, *sflagset);
     366        4746 :                 sflag += pg_mblen(*sflagset);
     367             : 
     368             :                 /* Go to start of the next flag */
     369        4746 :                 *sflagset += pg_mblen(*sflagset);
     370             : 
     371             :                 /* Check if we get all characters of flag */
     372        4746 :                 maxstep--;
     373        4746 :                 stop = (maxstep == 0);
     374        4746 :                 break;
     375         796 :             case FM_NUM:
     376         796 :                 s = strtol(*sflagset, &next, 10);
     377         796 :                 if (*sflagset == next || errno == ERANGE)
     378           4 :                     ereport(ERROR,
     379             :                             (errcode(ERRCODE_CONFIG_FILE_ERROR),
     380             :                              errmsg("invalid affix flag \"%s\"", *sflagset)));
     381         792 :                 if (s < 0 || s > FLAGNUM_MAXSIZE)
     382           0 :                     ereport(ERROR,
     383             :                             (errcode(ERRCODE_CONFIG_FILE_ERROR),
     384             :                              errmsg("affix flag \"%s\" is out of range",
     385             :                                     *sflagset)));
     386         792 :                 sflag += sprintf(sflag, "%0d", s);
     387             : 
     388             :                 /* Go to start of the next flag */
     389         792 :                 *sflagset = next;
     390        1208 :                 while (**sflagset)
     391             :                 {
     392         832 :                     if (t_isdigit(*sflagset))
     393             :                     {
     394         416 :                         if (!met_comma)
     395           0 :                             ereport(ERROR,
     396             :                                     (errcode(ERRCODE_CONFIG_FILE_ERROR),
     397             :                                      errmsg("invalid affix flag \"%s\"",
     398             :                                             *sflagset)));
     399         416 :                         break;
     400             :                     }
     401         416 :                     else if (t_iseq(*sflagset, ','))
     402             :                     {
     403         416 :                         if (met_comma)
     404           0 :                             ereport(ERROR,
     405             :                                     (errcode(ERRCODE_CONFIG_FILE_ERROR),
     406             :                                      errmsg("invalid affix flag \"%s\"",
     407             :                                             *sflagset)));
     408         416 :                         met_comma = true;
     409             :                     }
     410           0 :                     else if (!t_isspace(*sflagset))
     411             :                     {
     412           0 :                         ereport(ERROR,
     413             :                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
     414             :                                  errmsg("invalid character in affix flag \"%s\"",
     415             :                                         *sflagset)));
     416             :                     }
     417             : 
     418         416 :                     *sflagset += pg_mblen(*sflagset);
     419             :                 }
     420         792 :                 stop = true;
     421         792 :                 break;
     422           0 :             default:
     423           0 :                 elog(ERROR, "unrecognized type of Conf->flagMode: %d",
     424             :                      Conf->flagMode);
     425             :         }
     426             : 
     427        5538 :         if (stop)
     428        4244 :             break;
     429             :     }
     430             : 
     431        4244 :     if (Conf->flagMode == FM_LONG && maxstep > 0)
     432           0 :         ereport(ERROR,
     433             :                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
     434             :                  errmsg("invalid affix flag \"%s\" with \"long\" flag value",
     435             :                         sbuf)));
     436             : 
     437        4244 :     *sflag = '\0';
     438        4244 : }
     439             : 
     440             : /*
     441             :  * Checks if the affix set Conf->AffixData[affix] contains affixflag.
     442             :  * Conf->AffixData[affix] does not contain affixflag if this flag is not used
     443             :  * actually by the .dict file.
     444             :  *
     445             :  * Conf: current dictionary.
     446             :  * affix: index of the Conf->AffixData array.
     447             :  * affixflag: the affix flag.
     448             :  *
     449             :  * Returns true if the string Conf->AffixData[affix] contains affixflag,
     450             :  * otherwise returns false.
     451             :  */
     452             : static bool
     453        1530 : IsAffixFlagInUse(IspellDict *Conf, int affix, const char *affixflag)
     454             : {
     455             :     char       *flagcur;
     456             :     char        flag[BUFSIZ];
     457             : 
     458        1530 :     if (*affixflag == 0)
     459         424 :         return true;
     460             : 
     461             :     Assert(affix < Conf->nAffixData);
     462             : 
     463        1106 :     flagcur = Conf->AffixData[affix];
     464             : 
     465        3218 :     while (*flagcur)
     466             :     {
     467        2448 :         getNextFlagFromString(Conf, &flagcur, flag);
     468             :         /* Compare first affix flag in flagcur with affixflag */
     469        2448 :         if (strcmp(flag, affixflag) == 0)
     470         336 :             return true;
     471             :     }
     472             : 
     473             :     /* Could not find affixflag */
     474         770 :     return false;
     475             : }
     476             : 
     477             : /*
     478             :  * Adds the new word into the temporary array Spell.
     479             :  *
     480             :  * Conf: current dictionary.
     481             :  * word: new word.
     482             :  * flag: set of affix flags. Single flag can be get by getNextFlagFromString().
     483             :  */
     484             : static void
     485         822 : NIAddSpell(IspellDict *Conf, const char *word, const char *flag)
     486             : {
     487         822 :     if (Conf->nspell >= Conf->mspell)
     488             :     {
     489          90 :         if (Conf->mspell)
     490             :         {
     491           0 :             Conf->mspell *= 2;
     492           0 :             Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *));
     493             :         }
     494             :         else
     495             :         {
     496          90 :             Conf->mspell = 1024 * 20;
     497          90 :             Conf->Spell = (SPELL **) tmpalloc(Conf->mspell * sizeof(SPELL *));
     498             :         }
     499             :     }
     500         822 :     Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1);
     501         822 :     strcpy(Conf->Spell[Conf->nspell]->word, word);
     502        1644 :     Conf->Spell[Conf->nspell]->p.flag = (*flag != '\0')
     503         822 :         ? cpstrdup(Conf, flag) : VoidString;
     504         822 :     Conf->nspell++;
     505         822 : }
     506             : 
     507             : /*
     508             :  * Imports dictionary into the temporary array Spell.
     509             :  *
     510             :  * Note caller must already have applied get_tsearch_config_filename.
     511             :  *
     512             :  * Conf: current dictionary.
     513             :  * filename: path to the .dict file.
     514             :  */
     515             : void
     516          90 : NIImportDictionary(IspellDict *Conf, const char *filename)
     517             : {
     518             :     tsearch_readline_state trst;
     519             :     char       *line;
     520             : 
     521          90 :     if (!tsearch_readline_begin(&trst, filename))
     522           0 :         ereport(ERROR,
     523             :                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
     524             :                  errmsg("could not open dictionary file \"%s\": %m",
     525             :                         filename)));
     526             : 
     527         912 :     while ((line = tsearch_readline(&trst)) != NULL)
     528             :     {
     529             :         char       *s,
     530             :                    *pstr;
     531             : 
     532             :         /* Set of affix flags */
     533             :         const char *flag;
     534             : 
     535             :         /* Extract flag from the line */
     536         822 :         flag = NULL;
     537         822 :         if ((s = findchar(line, '/')))
     538             :         {
     539         732 :             *s++ = '\0';
     540         732 :             flag = s;
     541        2922 :             while (*s)
     542             :             {
     543             :                 /* we allow only single encoded flags for faster works */
     544        2922 :                 if (pg_mblen(s) == 1 && t_isprint(s) && !t_isspace(s))
     545        2190 :                     s++;
     546             :                 else
     547             :                 {
     548         732 :                     *s = '\0';
     549         732 :                     break;
     550             :                 }
     551             :             }
     552             :         }
     553             :         else
     554          90 :             flag = "";
     555             : 
     556             :         /* Remove trailing spaces */
     557         822 :         s = line;
     558        5962 :         while (*s)
     559             :         {
     560        5230 :             if (t_isspace(s))
     561             :             {
     562          90 :                 *s = '\0';
     563          90 :                 break;
     564             :             }
     565        5140 :             s += pg_mblen(s);
     566             :         }
     567         822 :         pstr = lowerstr_ctx(Conf, line);
     568             : 
     569         822 :         NIAddSpell(Conf, pstr, flag);
     570         822 :         pfree(pstr);
     571             : 
     572         822 :         pfree(line);
     573             :     }
     574          90 :     tsearch_readline_end(&trst);
     575          90 : }
     576             : 
     577             : /*
     578             :  * Searches a basic form of word in the prefix tree. This word was generated
     579             :  * using an affix rule. This rule may not be presented in an affix set of
     580             :  * a basic form of word.
     581             :  *
     582             :  * For example, we have the entry in the .dict file:
     583             :  * meter/GMD
     584             :  *
     585             :  * The affix rule with the flag S:
     586             :  * SFX S   y     ies        [^aeiou]y
     587             :  * is not presented here.
     588             :  *
     589             :  * The affix rule with the flag M:
     590             :  * SFX M   0     's         .
     591             :  * is presented here.
     592             :  *
     593             :  * Conf: current dictionary.
     594             :  * word: basic form of word.
     595             :  * affixflag: affix flag, by which a basic form of word was generated.
     596             :  * flag: compound flag used to compare with StopMiddle->compoundflag.
     597             :  *
     598             :  * Returns 1 if the word was found in the prefix tree, else returns 0.
     599             :  */
     600             : static int
     601        1996 : FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag)
     602             : {
     603        1996 :     SPNode     *node = Conf->Dictionary;
     604             :     SPNodeData *StopLow,
     605             :                *StopHigh,
     606             :                *StopMiddle;
     607        1996 :     const uint8 *ptr = (const uint8 *) word;
     608             : 
     609        1996 :     flag &= FF_COMPOUNDFLAGMASK;
     610             : 
     611        9296 :     while (node && *ptr)
     612             :     {
     613        8816 :         StopLow = node->data;
     614        8816 :         StopHigh = node->data + node->length;
     615       12612 :         while (StopLow < StopHigh)
     616             :         {
     617       11768 :             StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
     618       11768 :             if (StopMiddle->val == *ptr)
     619             :             {
     620        7972 :                 if (*(ptr + 1) == '\0' && StopMiddle->isword)
     621             :                 {
     622         764 :                     if (flag == 0)
     623             :                     {
     624             :                         /*
     625             :                          * The word can be formed only with another word. And
     626             :                          * in the flag parameter there is not a sign that we
     627             :                          * search compound words.
     628             :                          */
     629         484 :                         if (StopMiddle->compoundflag & FF_COMPOUNDONLY)
     630           0 :                             return 0;
     631             :                     }
     632         280 :                     else if ((flag & StopMiddle->compoundflag) == 0)
     633           0 :                         return 0;
     634             : 
     635             :                     /*
     636             :                      * Check if this affix rule is presented in the affix set
     637             :                      * with index StopMiddle->affix.
     638             :                      */
     639         764 :                     if (IsAffixFlagInUse(Conf, StopMiddle->affix, affixflag))
     640         672 :                         return 1;
     641             :                 }
     642        7300 :                 node = StopMiddle->node;
     643        7300 :                 ptr++;
     644        7300 :                 break;
     645             :             }
     646        3796 :             else if (StopMiddle->val < *ptr)
     647        1288 :                 StopLow = StopMiddle + 1;
     648             :             else
     649        2508 :                 StopHigh = StopMiddle;
     650             :         }
     651        8144 :         if (StopLow >= StopHigh)
     652         844 :             break;
     653             :     }
     654        1324 :     return 0;
     655             : }
     656             : 
     657             : /*
     658             :  * Context reset/delete callback for a regular expression used in an affix
     659             :  */
     660             : static void
     661          76 : regex_affix_deletion_callback(void *arg)
     662             : {
     663          76 :     aff_regex_struct *pregex = (aff_regex_struct *) arg;
     664             : 
     665          76 :     pg_regfree(&(pregex->regex));
     666          76 : }
     667             : 
     668             : /*
     669             :  * Adds a new affix rule to the Affix field.
     670             :  *
     671             :  * Conf: current dictionary.
     672             :  * flag: affix flag ('\' in the below example).
     673             :  * flagflags: set of flags from the flagval field for this affix rule. This set
     674             :  *            is listed after '/' character in the added string (repl).
     675             :  *
     676             :  *            For example L flag in the hunspell_sample.affix:
     677             :  *            SFX \   0 Y/L [^Y]
     678             :  *
     679             :  * mask: condition for search ('[^Y]' in the above example).
     680             :  * find: stripping characters from beginning (at prefix) or end (at suffix)
     681             :  *       of the word ('0' in the above example, 0 means that there is not
     682             :  *       stripping character).
     683             :  * repl: adding string after stripping ('Y' in the above example).
     684             :  * type: FF_SUFFIX or FF_PREFIX.
     685             :  */
     686             : static void
     687         744 : NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask,
     688             :            const char *find, const char *repl, int type)
     689             : {
     690             :     AFFIX      *Affix;
     691             : 
     692         744 :     if (Conf->naffixes >= Conf->maffixes)
     693             :     {
     694          90 :         if (Conf->maffixes)
     695             :         {
     696           0 :             Conf->maffixes *= 2;
     697           0 :             Conf->Affix = (AFFIX *) repalloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX));
     698             :         }
     699             :         else
     700             :         {
     701          90 :             Conf->maffixes = 16;
     702          90 :             Conf->Affix = (AFFIX *) palloc(Conf->maffixes * sizeof(AFFIX));
     703             :         }
     704             :     }
     705             : 
     706         744 :     Affix = Conf->Affix + Conf->naffixes;
     707             : 
     708             :     /* This affix rule can be applied for words with any ending */
     709         744 :     if (strcmp(mask, ".") == 0 || *mask == '\0')
     710             :     {
     711         180 :         Affix->issimple = 1;
     712         180 :         Affix->isregis = 0;
     713             :     }
     714             :     /* This affix rule will use regis to search word ending */
     715         564 :     else if (RS_isRegis(mask))
     716             :     {
     717         472 :         Affix->issimple = 0;
     718         472 :         Affix->isregis = 1;
     719         472 :         RS_compile(&(Affix->reg.regis), (type == FF_SUFFIX),
     720         472 :                    *mask ? mask : VoidString);
     721             :     }
     722             :     /* This affix rule will use regex_t to search word ending */
     723             :     else
     724             :     {
     725             :         int         masklen;
     726             :         int         wmasklen;
     727             :         int         err;
     728             :         pg_wchar   *wmask;
     729             :         char       *tmask;
     730             :         aff_regex_struct *pregex;
     731             : 
     732          92 :         Affix->issimple = 0;
     733          92 :         Affix->isregis = 0;
     734          92 :         tmask = (char *) tmpalloc(strlen(mask) + 3);
     735          92 :         if (type == FF_SUFFIX)
     736          92 :             sprintf(tmask, "%s$", mask);
     737             :         else
     738           0 :             sprintf(tmask, "^%s", mask);
     739             : 
     740          92 :         masklen = strlen(tmask);
     741          92 :         wmask = (pg_wchar *) tmpalloc((masklen + 1) * sizeof(pg_wchar));
     742          92 :         wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen);
     743             : 
     744             :         /*
     745             :          * The regex engine stores its stuff using malloc not palloc, so we
     746             :          * must arrange to explicitly clean up the regex when the dictionary's
     747             :          * context is cleared.  That means the regex_t has to stay in a fixed
     748             :          * location within the context; we can't keep it directly in the AFFIX
     749             :          * struct, since we may sort and resize the array of AFFIXes.
     750             :          */
     751          92 :         Affix->reg.pregex = pregex = palloc(sizeof(aff_regex_struct));
     752             : 
     753          92 :         err = pg_regcomp(&(pregex->regex), wmask, wmasklen,
     754             :                          REG_ADVANCED | REG_NOSUB,
     755             :                          DEFAULT_COLLATION_OID);
     756          92 :         if (err)
     757             :         {
     758             :             char        errstr[100];
     759             : 
     760           0 :             pg_regerror(err, &(pregex->regex), errstr, sizeof(errstr));
     761           0 :             ereport(ERROR,
     762             :                     (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
     763             :                      errmsg("invalid regular expression: %s", errstr)));
     764             :         }
     765             : 
     766          92 :         pregex->mcallback.func = regex_affix_deletion_callback;
     767          92 :         pregex->mcallback.arg = (void *) pregex;
     768          92 :         MemoryContextRegisterResetCallback(CurrentMemoryContext,
     769             :                                            &pregex->mcallback);
     770             :     }
     771             : 
     772         744 :     Affix->flagflags = flagflags;
     773         744 :     if ((Affix->flagflags & FF_COMPOUNDONLY) || (Affix->flagflags & FF_COMPOUNDPERMITFLAG))
     774             :     {
     775         134 :         if ((Affix->flagflags & FF_COMPOUNDFLAG) == 0)
     776         134 :             Affix->flagflags |= FF_COMPOUNDFLAG;
     777             :     }
     778         744 :     Affix->flag = cpstrdup(Conf, flag);
     779         744 :     Affix->type = type;
     780             : 
     781         744 :     Affix->find = (find && *find) ? cpstrdup(Conf, find) : VoidString;
     782         744 :     if ((Affix->replen = strlen(repl)) > 0)
     783         720 :         Affix->repl = cpstrdup(Conf, repl);
     784             :     else
     785          24 :         Affix->repl = VoidString;
     786         744 :     Conf->naffixes++;
     787         744 : }
     788             : 
     789             : /* Parsing states for parse_affentry() and friends */
     790             : #define PAE_WAIT_MASK   0
     791             : #define PAE_INMASK      1
     792             : #define PAE_WAIT_FIND   2
     793             : #define PAE_INFIND      3
     794             : #define PAE_WAIT_REPL   4
     795             : #define PAE_INREPL      5
     796             : #define PAE_WAIT_TYPE   6
     797             : #define PAE_WAIT_FLAG   7
     798             : 
     799             : /*
     800             :  * Parse next space-separated field of an .affix file line.
     801             :  *
     802             :  * *str is the input pointer (will be advanced past field)
     803             :  * next is where to copy the field value to, with null termination
     804             :  *
     805             :  * The buffer at "next" must be of size BUFSIZ; we truncate the input to fit.
     806             :  *
     807             :  * Returns true if we found a field, false if not.
     808             :  */
     809             : static bool
     810        6894 : get_nextfield(char **str, char *next)
     811             : {
     812        6894 :     int         state = PAE_WAIT_MASK;
     813        6894 :     int         avail = BUFSIZ;
     814             : 
     815       29484 :     while (**str)
     816             :     {
     817       28674 :         if (state == PAE_WAIT_MASK)
     818             :         {
     819       12718 :             if (t_iseq(*str, '#'))
     820         242 :                 return false;
     821       12476 :             else if (!t_isspace(*str))
     822             :             {
     823        5842 :                 int         clen = pg_mblen(*str);
     824             : 
     825        5842 :                 if (clen < avail)
     826             :                 {
     827        5842 :                     COPYCHAR(next, *str);
     828        5842 :                     next += clen;
     829        5842 :                     avail -= clen;
     830             :                 }
     831        5842 :                 state = PAE_INMASK;
     832             :             }
     833             :         }
     834             :         else                    /* state == PAE_INMASK */
     835             :         {
     836       15956 :             if (t_isspace(*str))
     837             :             {
     838        5842 :                 *next = '\0';
     839        5842 :                 return true;
     840             :             }
     841             :             else
     842             :             {
     843       10114 :                 int         clen = pg_mblen(*str);
     844             : 
     845       10114 :                 if (clen < avail)
     846             :                 {
     847       10114 :                     COPYCHAR(next, *str);
     848       10114 :                     next += clen;
     849       10114 :                     avail -= clen;
     850             :                 }
     851             :             }
     852             :         }
     853       22590 :         *str += pg_mblen(*str);
     854             :     }
     855             : 
     856         810 :     *next = '\0';
     857             : 
     858         810 :     return (state == PAE_INMASK);   /* OK if we got a nonempty field */
     859             : }
     860             : 
     861             : /*
     862             :  * Parses entry of an .affix file of MySpell or Hunspell format.
     863             :  *
     864             :  * An .affix file entry has the following format:
     865             :  * - header
     866             :  *   <type>  <flag>  <cross_flag>  <flag_count>
     867             :  * - fields after header:
     868             :  *   <type>  <flag>  <find>  <replace>  <mask>
     869             :  *
     870             :  * str is the input line
     871             :  * field values are returned to type etc, which must be buffers of size BUFSIZ.
     872             :  *
     873             :  * Returns number of fields found; any omitted fields are set to empty strings.
     874             :  */
     875             : static int
     876        1586 : parse_ooaffentry(char *str, char *type, char *flag, char *find,
     877             :                  char *repl, char *mask)
     878             : {
     879        1586 :     int         state = PAE_WAIT_TYPE;
     880        1586 :     int         fields_read = 0;
     881        1586 :     bool        valid = false;
     882             : 
     883        1586 :     *type = *flag = *find = *repl = *mask = '\0';
     884             : 
     885        6894 :     while (*str)
     886             :     {
     887        6894 :         switch (state)
     888             :         {
     889        1586 :             case PAE_WAIT_TYPE:
     890        1586 :                 valid = get_nextfield(&str, type);
     891        1586 :                 state = PAE_WAIT_FLAG;
     892        1586 :                 break;
     893        1586 :             case PAE_WAIT_FLAG:
     894        1586 :                 valid = get_nextfield(&str, flag);
     895        1586 :                 state = PAE_WAIT_FIND;
     896        1586 :                 break;
     897        1586 :             case PAE_WAIT_FIND:
     898        1586 :                 valid = get_nextfield(&str, find);
     899        1586 :                 state = PAE_WAIT_REPL;
     900        1586 :                 break;
     901        1068 :             case PAE_WAIT_REPL:
     902        1068 :                 valid = get_nextfield(&str, repl);
     903        1068 :                 state = PAE_WAIT_MASK;
     904        1068 :                 break;
     905        1068 :             case PAE_WAIT_MASK:
     906        1068 :                 valid = get_nextfield(&str, mask);
     907        1068 :                 state = -1;     /* force loop exit */
     908        1068 :                 break;
     909           0 :             default:
     910           0 :                 elog(ERROR, "unrecognized state in parse_ooaffentry: %d",
     911             :                      state);
     912             :                 break;
     913             :         }
     914        6894 :         if (valid)
     915        5842 :             fields_read++;
     916             :         else
     917        1052 :             break;              /* early EOL */
     918        5842 :         if (state < 0)
     919         534 :             break;              /* got all fields */
     920             :     }
     921             : 
     922        1586 :     return fields_read;
     923             : }
     924             : 
     925             : /*
     926             :  * Parses entry of an .affix file of Ispell format
     927             :  *
     928             :  * An .affix file entry has the following format:
     929             :  * <mask>  >  [-<find>,]<replace>
     930             :  */
     931             : static bool
     932         210 : parse_affentry(char *str, char *mask, char *find, char *repl)
     933             : {
     934         210 :     int         state = PAE_WAIT_MASK;
     935         210 :     char       *pmask = mask,
     936         210 :                *pfind = find,
     937         210 :                *prepl = repl;
     938             : 
     939         210 :     *mask = *find = *repl = '\0';
     940             : 
     941        5520 :     while (*str)
     942             :     {
     943        5520 :         if (state == PAE_WAIT_MASK)
     944             :         {
     945         510 :             if (t_iseq(str, '#'))
     946           0 :                 return false;
     947         510 :             else if (!t_isspace(str))
     948             :             {
     949         210 :                 COPYCHAR(pmask, str);
     950         210 :                 pmask += pg_mblen(str);
     951         210 :                 state = PAE_INMASK;
     952             :             }
     953             :         }
     954        5010 :         else if (state == PAE_INMASK)
     955             :         {
     956        2040 :             if (t_iseq(str, '>'))
     957             :             {
     958         210 :                 *pmask = '\0';
     959         210 :                 state = PAE_WAIT_FIND;
     960             :             }
     961        1830 :             else if (!t_isspace(str))
     962             :             {
     963         720 :                 COPYCHAR(pmask, str);
     964         720 :                 pmask += pg_mblen(str);
     965             :             }
     966             :         }
     967        2970 :         else if (state == PAE_WAIT_FIND)
     968             :         {
     969         840 :             if (t_iseq(str, '-'))
     970             :             {
     971          30 :                 state = PAE_INFIND;
     972             :             }
     973         810 :             else if (t_isalpha(str) || t_iseq(str, '\'') /* english 's */ )
     974             :             {
     975         180 :                 COPYCHAR(prepl, str);
     976         180 :                 prepl += pg_mblen(str);
     977         180 :                 state = PAE_INREPL;
     978             :             }
     979         630 :             else if (!t_isspace(str))
     980           0 :                 ereport(ERROR,
     981             :                         (errcode(ERRCODE_CONFIG_FILE_ERROR),
     982             :                          errmsg("syntax error")));
     983             :         }
     984        2130 :         else if (state == PAE_INFIND)
     985             :         {
     986          60 :             if (t_iseq(str, ','))
     987             :             {
     988          30 :                 *pfind = '\0';
     989          30 :                 state = PAE_WAIT_REPL;
     990             :             }
     991          30 :             else if (t_isalpha(str))
     992             :             {
     993          30 :                 COPYCHAR(pfind, str);
     994          30 :                 pfind += pg_mblen(str);
     995             :             }
     996           0 :             else if (!t_isspace(str))
     997           0 :                 ereport(ERROR,
     998             :                         (errcode(ERRCODE_CONFIG_FILE_ERROR),
     999             :                          errmsg("syntax error")));
    1000             :         }
    1001        2070 :         else if (state == PAE_WAIT_REPL)
    1002             :         {
    1003          30 :             if (t_iseq(str, '-'))
    1004             :             {
    1005           0 :                 break;          /* void repl */
    1006             :             }
    1007          30 :             else if (t_isalpha(str))
    1008             :             {
    1009          30 :                 COPYCHAR(prepl, str);
    1010          30 :                 prepl += pg_mblen(str);
    1011          30 :                 state = PAE_INREPL;
    1012             :             }
    1013           0 :             else if (!t_isspace(str))
    1014           0 :                 ereport(ERROR,
    1015             :                         (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1016             :                          errmsg("syntax error")));
    1017             :         }
    1018        2040 :         else if (state == PAE_INREPL)
    1019             :         {
    1020        2040 :             if (t_iseq(str, '#'))
    1021             :             {
    1022         210 :                 *prepl = '\0';
    1023         210 :                 break;
    1024             :             }
    1025        1830 :             else if (t_isalpha(str))
    1026             :             {
    1027         270 :                 COPYCHAR(prepl, str);
    1028         270 :                 prepl += pg_mblen(str);
    1029             :             }
    1030        1560 :             else if (!t_isspace(str))
    1031           0 :                 ereport(ERROR,
    1032             :                         (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1033             :                          errmsg("syntax error")));
    1034             :         }
    1035             :         else
    1036           0 :             elog(ERROR, "unrecognized state in parse_affentry: %d", state);
    1037             : 
    1038        5310 :         str += pg_mblen(str);
    1039             :     }
    1040             : 
    1041         210 :     *pmask = *pfind = *prepl = '\0';
    1042             : 
    1043         210 :     return (*mask && (*find || *repl));
    1044             : }
    1045             : 
    1046             : /*
    1047             :  * Sets a Hunspell options depending on flag type.
    1048             :  */
    1049             : static void
    1050        2034 : setCompoundAffixFlagValue(IspellDict *Conf, CompoundAffixFlag *entry,
    1051             :                           char *s, uint32 val)
    1052             : {
    1053        2034 :     if (Conf->flagMode == FM_NUM)
    1054             :     {
    1055             :         char       *next;
    1056             :         int         i;
    1057             : 
    1058         438 :         i = strtol(s, &next, 10);
    1059         438 :         if (s == next || errno == ERANGE)
    1060           0 :             ereport(ERROR,
    1061             :                     (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1062             :                      errmsg("invalid affix flag \"%s\"", s)));
    1063         438 :         if (i < 0 || i > FLAGNUM_MAXSIZE)
    1064           0 :             ereport(ERROR,
    1065             :                     (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1066             :                      errmsg("affix flag \"%s\" is out of range", s)));
    1067             : 
    1068         438 :         entry->flag.i = i;
    1069             :     }
    1070             :     else
    1071        1596 :         entry->flag.s = cpstrdup(Conf, s);
    1072             : 
    1073        2034 :     entry->flagMode = Conf->flagMode;
    1074        2034 :     entry->value = val;
    1075        2034 : }
    1076             : 
    1077             : /*
    1078             :  * Sets up a correspondence for the affix parameter with the affix flag.
    1079             :  *
    1080             :  * Conf: current dictionary.
    1081             :  * s: affix flag in string.
    1082             :  * val: affix parameter.
    1083             :  */
    1084             : static void
    1085         238 : addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val)
    1086             : {
    1087             :     CompoundAffixFlag *newValue;
    1088             :     char        sbuf[BUFSIZ];
    1089             :     char       *sflag;
    1090             :     int         clen;
    1091             : 
    1092         446 :     while (*s && t_isspace(s))
    1093         208 :         s += pg_mblen(s);
    1094             : 
    1095         238 :     if (!*s)
    1096           0 :         ereport(ERROR,
    1097             :                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1098             :                  errmsg("syntax error")));
    1099             : 
    1100             :     /* Get flag without \n */
    1101         238 :     sflag = sbuf;
    1102         704 :     while (*s && !t_isspace(s) && *s != '\n')
    1103             :     {
    1104         466 :         clen = pg_mblen(s);
    1105         466 :         COPYCHAR(sflag, s);
    1106         466 :         sflag += clen;
    1107         466 :         s += clen;
    1108             :     }
    1109         238 :     *sflag = '\0';
    1110             : 
    1111             :     /* Resize array or allocate memory for array CompoundAffixFlag */
    1112         238 :     if (Conf->nCompoundAffixFlag >= Conf->mCompoundAffixFlag)
    1113             :     {
    1114          90 :         if (Conf->mCompoundAffixFlag)
    1115             :         {
    1116           0 :             Conf->mCompoundAffixFlag *= 2;
    1117           0 :             Conf->CompoundAffixFlags = (CompoundAffixFlag *)
    1118           0 :                 repalloc((void *) Conf->CompoundAffixFlags,
    1119           0 :                          Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag));
    1120             :         }
    1121             :         else
    1122             :         {
    1123          90 :             Conf->mCompoundAffixFlag = 10;
    1124          90 :             Conf->CompoundAffixFlags = (CompoundAffixFlag *)
    1125          90 :                 tmpalloc(Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag));
    1126             :         }
    1127             :     }
    1128             : 
    1129         238 :     newValue = Conf->CompoundAffixFlags + Conf->nCompoundAffixFlag;
    1130             : 
    1131         238 :     setCompoundAffixFlagValue(Conf, newValue, sbuf, val);
    1132             : 
    1133         238 :     Conf->usecompound = true;
    1134         238 :     Conf->nCompoundAffixFlag++;
    1135         238 : }
    1136             : 
    1137             : /*
    1138             :  * Returns a set of affix parameters which correspondence to the set of affix
    1139             :  * flags s.
    1140             :  */
    1141             : static int
    1142         876 : getCompoundAffixFlagValue(IspellDict *Conf, char *s)
    1143             : {
    1144         876 :     uint32      flag = 0;
    1145             :     CompoundAffixFlag *found,
    1146             :                 key;
    1147             :     char        sflag[BUFSIZ];
    1148             :     char       *flagcur;
    1149             : 
    1150         876 :     if (Conf->nCompoundAffixFlag == 0)
    1151           0 :         return 0;
    1152             : 
    1153         876 :     flagcur = s;
    1154        2672 :     while (*flagcur)
    1155             :     {
    1156        1800 :         getNextFlagFromString(Conf, &flagcur, sflag);
    1157        1796 :         setCompoundAffixFlagValue(Conf, &key, sflag, 0);
    1158             : 
    1159             :         found = (CompoundAffixFlag *)
    1160        1796 :             bsearch(&key, (void *) Conf->CompoundAffixFlags,
    1161        1796 :                     Conf->nCompoundAffixFlag, sizeof(CompoundAffixFlag),
    1162             :                     cmpcmdflag);
    1163        1796 :         if (found != NULL)
    1164         400 :             flag |= found->value;
    1165             :     }
    1166             : 
    1167         872 :     return flag;
    1168             : }
    1169             : 
    1170             : /*
    1171             :  * Returns a flag set using the s parameter.
    1172             :  *
    1173             :  * If Conf->useFlagAliases is true then the s parameter is index of the
    1174             :  * Conf->AffixData array and function returns its entry.
    1175             :  * Else function returns the s parameter.
    1176             :  */
    1177             : static char *
    1178         104 : getAffixFlagSet(IspellDict *Conf, char *s)
    1179             : {
    1180         104 :     if (Conf->useFlagAliases && *s != '\0')
    1181             :     {
    1182             :         int         curaffix;
    1183             :         char       *end;
    1184             : 
    1185          66 :         curaffix = strtol(s, &end, 10);
    1186          66 :         if (s == end || errno == ERANGE)
    1187           0 :             ereport(ERROR,
    1188             :                     (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1189             :                      errmsg("invalid affix alias \"%s\"", s)));
    1190             : 
    1191          66 :         if (curaffix > 0 && curaffix < Conf->nAffixData)
    1192             : 
    1193             :             /*
    1194             :              * Do not subtract 1 from curaffix because empty string was added
    1195             :              * in NIImportOOAffixes
    1196             :              */
    1197          66 :             return Conf->AffixData[curaffix];
    1198           0 :         else if (curaffix > Conf->nAffixData)
    1199           0 :             ereport(ERROR,
    1200             :                     (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1201             :                      errmsg("invalid affix alias \"%s\"", s)));
    1202           0 :         return VoidString;
    1203             :     }
    1204             :     else
    1205          38 :         return s;
    1206             : }
    1207             : 
    1208             : /*
    1209             :  * Import an affix file that follows MySpell or Hunspell format.
    1210             :  *
    1211             :  * Conf: current dictionary.
    1212             :  * filename: path to the .affix file.
    1213             :  */
    1214             : static void
    1215          60 : NIImportOOAffixes(IspellDict *Conf, const char *filename)
    1216             : {
    1217             :     char        type[BUFSIZ],
    1218          60 :                *ptype = NULL;
    1219             :     char        sflag[BUFSIZ];
    1220             :     char        mask[BUFSIZ],
    1221             :                *pmask;
    1222             :     char        find[BUFSIZ],
    1223             :                *pfind;
    1224             :     char        repl[BUFSIZ],
    1225             :                *prepl;
    1226          60 :     bool        isSuffix = false;
    1227          60 :     int         naffix = 0,
    1228          60 :                 curaffix = 0;
    1229          60 :     int         sflaglen = 0;
    1230          60 :     char        flagflags = 0;
    1231             :     tsearch_readline_state trst;
    1232             :     char       *recoded;
    1233             : 
    1234             :     /* read file to find any flag */
    1235          60 :     Conf->usecompound = false;
    1236          60 :     Conf->useFlagAliases = false;
    1237          60 :     Conf->flagMode = FM_CHAR;
    1238             : 
    1239          60 :     if (!tsearch_readline_begin(&trst, filename))
    1240           0 :         ereport(ERROR,
    1241             :                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1242             :                  errmsg("could not open affix file \"%s\": %m",
    1243             :                         filename)));
    1244             : 
    1245        2340 :     while ((recoded = tsearch_readline(&trst)) != NULL)
    1246             :     {
    1247        2280 :         if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
    1248             :         {
    1249         694 :             pfree(recoded);
    1250         694 :             continue;
    1251             :         }
    1252             : 
    1253        1586 :         if (STRNCMP(recoded, "COMPOUNDFLAG") == 0)
    1254          60 :             addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDFLAG"),
    1255             :                                       FF_COMPOUNDFLAG);
    1256        1526 :         else if (STRNCMP(recoded, "COMPOUNDBEGIN") == 0)
    1257          22 :             addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDBEGIN"),
    1258             :                                       FF_COMPOUNDBEGIN);
    1259        1504 :         else if (STRNCMP(recoded, "COMPOUNDLAST") == 0)
    1260           0 :             addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDLAST"),
    1261             :                                       FF_COMPOUNDLAST);
    1262             :         /* COMPOUNDLAST and COMPOUNDEND are synonyms */
    1263        1504 :         else if (STRNCMP(recoded, "COMPOUNDEND") == 0)
    1264          22 :             addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDEND"),
    1265             :                                       FF_COMPOUNDLAST);
    1266        1482 :         else if (STRNCMP(recoded, "COMPOUNDMIDDLE") == 0)
    1267          22 :             addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDMIDDLE"),
    1268             :                                       FF_COMPOUNDMIDDLE);
    1269        1460 :         else if (STRNCMP(recoded, "ONLYINCOMPOUND") == 0)
    1270          60 :             addCompoundAffixFlagValue(Conf, recoded + strlen("ONLYINCOMPOUND"),
    1271             :                                       FF_COMPOUNDONLY);
    1272        1400 :         else if (STRNCMP(recoded, "COMPOUNDPERMITFLAG") == 0)
    1273          22 :             addCompoundAffixFlagValue(Conf,
    1274             :                                       recoded + strlen("COMPOUNDPERMITFLAG"),
    1275             :                                       FF_COMPOUNDPERMITFLAG);
    1276        1378 :         else if (STRNCMP(recoded, "COMPOUNDFORBIDFLAG") == 0)
    1277           0 :             addCompoundAffixFlagValue(Conf,
    1278             :                                       recoded + strlen("COMPOUNDFORBIDFLAG"),
    1279             :                                       FF_COMPOUNDFORBIDFLAG);
    1280        1378 :         else if (STRNCMP(recoded, "FLAG") == 0)
    1281             :         {
    1282          46 :             char       *s = recoded + strlen("FLAG");
    1283             : 
    1284          92 :             while (*s && t_isspace(s))
    1285          46 :                 s += pg_mblen(s);
    1286             : 
    1287          46 :             if (*s)
    1288             :             {
    1289          46 :                 if (STRNCMP(s, "long") == 0)
    1290          22 :                     Conf->flagMode = FM_LONG;
    1291          24 :                 else if (STRNCMP(s, "num") == 0)
    1292          24 :                     Conf->flagMode = FM_NUM;
    1293           0 :                 else if (STRNCMP(s, "default") != 0)
    1294           0 :                     ereport(ERROR,
    1295             :                             (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1296             :                              errmsg("Ispell dictionary supports only "
    1297             :                                     "\"default\", \"long\", "
    1298             :                                     "and \"num\" flag values")));
    1299             :             }
    1300             :         }
    1301             : 
    1302        1586 :         pfree(recoded);
    1303             :     }
    1304          60 :     tsearch_readline_end(&trst);
    1305             : 
    1306          60 :     if (Conf->nCompoundAffixFlag > 1)
    1307          60 :         qsort((void *) Conf->CompoundAffixFlags, Conf->nCompoundAffixFlag,
    1308             :               sizeof(CompoundAffixFlag), cmpcmdflag);
    1309             : 
    1310          60 :     if (!tsearch_readline_begin(&trst, filename))
    1311           0 :         ereport(ERROR,
    1312             :                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1313             :                  errmsg("could not open affix file \"%s\": %m",
    1314             :                         filename)));
    1315             : 
    1316        2340 :     while ((recoded = tsearch_readline(&trst)) != NULL)
    1317             :     {
    1318             :         int         fields_read;
    1319             : 
    1320        2280 :         if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
    1321         694 :             goto nextline;
    1322             : 
    1323        1586 :         fields_read = parse_ooaffentry(recoded, type, sflag, find, repl, mask);
    1324             : 
    1325        1586 :         if (ptype)
    1326        1526 :             pfree(ptype);
    1327        1586 :         ptype = lowerstr_ctx(Conf, type);
    1328             : 
    1329             :         /* First try to parse AF parameter (alias compression) */
    1330        1586 :         if (STRNCMP(ptype, "af") == 0)
    1331             :         {
    1332             :             /* First line is the number of aliases */
    1333         264 :             if (!Conf->useFlagAliases)
    1334             :             {
    1335          22 :                 Conf->useFlagAliases = true;
    1336          22 :                 naffix = atoi(sflag);
    1337          22 :                 if (naffix <= 0)
    1338           0 :                     ereport(ERROR,
    1339             :                             (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1340             :                              errmsg("invalid number of flag vector aliases")));
    1341             : 
    1342             :                 /* Also reserve place for empty flag set */
    1343          22 :                 naffix++;
    1344             : 
    1345          22 :                 Conf->AffixData = (char **) palloc0(naffix * sizeof(char *));
    1346          22 :                 Conf->lenAffixData = Conf->nAffixData = naffix;
    1347             : 
    1348             :                 /* Add empty flag set into AffixData */
    1349          22 :                 Conf->AffixData[curaffix] = VoidString;
    1350          22 :                 curaffix++;
    1351             :             }
    1352             :             /* Other lines are aliases */
    1353             :             else
    1354             :             {
    1355         242 :                 if (curaffix < naffix)
    1356             :                 {
    1357         242 :                     Conf->AffixData[curaffix] = cpstrdup(Conf, sflag);
    1358         242 :                     curaffix++;
    1359             :                 }
    1360             :                 else
    1361           0 :                     ereport(ERROR,
    1362             :                             (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1363             :                              errmsg("number of aliases exceeds specified number %d",
    1364             :                                     naffix - 1)));
    1365             :             }
    1366         264 :             goto nextline;
    1367             :         }
    1368             :         /* Else try to parse prefixes and suffixes */
    1369        1322 :         if (fields_read < 4 ||
    1370        1068 :             (STRNCMP(ptype, "sfx") != 0 && STRNCMP(ptype, "pfx") != 0))
    1371         254 :             goto nextline;
    1372             : 
    1373        1068 :         sflaglen = strlen(sflag);
    1374        1068 :         if (sflaglen == 0
    1375        1068 :             || (sflaglen > 1 && Conf->flagMode == FM_CHAR)
    1376        1068 :             || (sflaglen > 2 && Conf->flagMode == FM_LONG))
    1377           0 :             goto nextline;
    1378             : 
    1379             :         /*--------
    1380             :          * Affix header. For example:
    1381             :          * SFX \ N 1
    1382             :          *--------
    1383             :          */
    1384        1068 :         if (fields_read == 4)
    1385             :         {
    1386         534 :             isSuffix = (STRNCMP(ptype, "sfx") == 0);
    1387         534 :             if (t_iseq(find, 'y') || t_iseq(find, 'Y'))
    1388         370 :                 flagflags = FF_CROSSPRODUCT;
    1389             :             else
    1390         164 :                 flagflags = 0;
    1391             :         }
    1392             :         /*--------
    1393             :          * Affix fields. For example:
    1394             :          * SFX \   0    Y/L [^Y]
    1395             :          *--------
    1396             :          */
    1397             :         else
    1398             :         {
    1399             :             char       *ptr;
    1400         534 :             int         aflg = 0;
    1401             : 
    1402             :             /* Get flags after '/' (flags are case sensitive) */
    1403         534 :             if ((ptr = strchr(repl, '/')) != NULL)
    1404         104 :                 aflg |= getCompoundAffixFlagValue(Conf,
    1405             :                                                   getAffixFlagSet(Conf,
    1406             :                                                                   ptr + 1));
    1407             :             /* Get lowercased version of string before '/' */
    1408         534 :             prepl = lowerstr_ctx(Conf, repl);
    1409         534 :             if ((ptr = strchr(prepl, '/')) != NULL)
    1410         104 :                 *ptr = '\0';
    1411         534 :             pfind = lowerstr_ctx(Conf, find);
    1412         534 :             pmask = lowerstr_ctx(Conf, mask);
    1413         534 :             if (t_iseq(find, '0'))
    1414         450 :                 *pfind = '\0';
    1415         534 :             if (t_iseq(repl, '0'))
    1416          24 :                 *prepl = '\0';
    1417             : 
    1418         534 :             NIAddAffix(Conf, sflag, flagflags | aflg, pmask, pfind, prepl,
    1419             :                        isSuffix ? FF_SUFFIX : FF_PREFIX);
    1420         534 :             pfree(prepl);
    1421         534 :             pfree(pfind);
    1422         534 :             pfree(pmask);
    1423             :         }
    1424             : 
    1425        2280 : nextline:
    1426        2280 :         pfree(recoded);
    1427             :     }
    1428             : 
    1429          60 :     tsearch_readline_end(&trst);
    1430          60 :     if (ptype)
    1431          60 :         pfree(ptype);
    1432          60 : }
    1433             : 
    1434             : /*
    1435             :  * import affixes
    1436             :  *
    1437             :  * Note caller must already have applied get_tsearch_config_filename
    1438             :  *
    1439             :  * This function is responsible for parsing ispell ("old format") affix files.
    1440             :  * If we realize that the file contains new-format commands, we pass off the
    1441             :  * work to NIImportOOAffixes(), which will re-read the whole file.
    1442             :  */
    1443             : void
    1444          90 : NIImportAffixes(IspellDict *Conf, const char *filename)
    1445             : {
    1446          90 :     char       *pstr = NULL;
    1447             :     char        flag[BUFSIZ];
    1448             :     char        mask[BUFSIZ];
    1449             :     char        find[BUFSIZ];
    1450             :     char        repl[BUFSIZ];
    1451             :     char       *s;
    1452          90 :     bool        suffixes = false;
    1453          90 :     bool        prefixes = false;
    1454          90 :     char        flagflags = 0;
    1455             :     tsearch_readline_state trst;
    1456          90 :     bool        oldformat = false;
    1457          90 :     char       *recoded = NULL;
    1458             : 
    1459          90 :     if (!tsearch_readline_begin(&trst, filename))
    1460           0 :         ereport(ERROR,
    1461             :                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1462             :                  errmsg("could not open affix file \"%s\": %m",
    1463             :                         filename)));
    1464             : 
    1465          90 :     Conf->usecompound = false;
    1466          90 :     Conf->useFlagAliases = false;
    1467          90 :     Conf->flagMode = FM_CHAR;
    1468             : 
    1469         870 :     while ((recoded = tsearch_readline(&trst)) != NULL)
    1470             :     {
    1471         840 :         pstr = lowerstr(recoded);
    1472             : 
    1473             :         /* Skip comments and empty lines */
    1474         840 :         if (*pstr == '#' || *pstr == '\n')
    1475         270 :             goto nextline;
    1476             : 
    1477         570 :         if (STRNCMP(pstr, "compoundwords") == 0)
    1478             :         {
    1479             :             /* Find case-insensitive L flag in non-lowercased string */
    1480          30 :             s = findchar2(recoded, 'l', 'L');
    1481          30 :             if (s)
    1482             :             {
    1483         150 :                 while (*s && !t_isspace(s))
    1484         120 :                     s += pg_mblen(s);
    1485          60 :                 while (*s && t_isspace(s))
    1486          30 :                     s += pg_mblen(s);
    1487             : 
    1488          30 :                 if (*s && pg_mblen(s) == 1)
    1489             :                 {
    1490          30 :                     addCompoundAffixFlagValue(Conf, s, FF_COMPOUNDFLAG);
    1491          30 :                     Conf->usecompound = true;
    1492             :                 }
    1493          30 :                 oldformat = true;
    1494          30 :                 goto nextline;
    1495             :             }
    1496             :         }
    1497         540 :         if (STRNCMP(pstr, "suffixes") == 0)
    1498             :         {
    1499          30 :             suffixes = true;
    1500          30 :             prefixes = false;
    1501          30 :             oldformat = true;
    1502          30 :             goto nextline;
    1503             :         }
    1504         510 :         if (STRNCMP(pstr, "prefixes") == 0)
    1505             :         {
    1506          30 :             suffixes = false;
    1507          30 :             prefixes = true;
    1508          30 :             oldformat = true;
    1509          30 :             goto nextline;
    1510             :         }
    1511         480 :         if (STRNCMP(pstr, "flag") == 0)
    1512             :         {
    1513         256 :             s = recoded + 4;    /* we need non-lowercased string */
    1514         256 :             flagflags = 0;
    1515             : 
    1516         512 :             while (*s && t_isspace(s))
    1517         256 :                 s += pg_mblen(s);
    1518             : 
    1519         256 :             if (*s == '*')
    1520             :             {
    1521         150 :                 flagflags |= FF_CROSSPRODUCT;
    1522         150 :                 s++;
    1523             :             }
    1524         106 :             else if (*s == '~')
    1525             :             {
    1526          30 :                 flagflags |= FF_COMPOUNDONLY;
    1527          30 :                 s++;
    1528             :             }
    1529             : 
    1530         256 :             if (*s == '\\')
    1531          30 :                 s++;
    1532             : 
    1533             :             /*
    1534             :              * An old-format flag is a single ASCII character; we expect it to
    1535             :              * be followed by EOL, whitespace, or ':'.  Otherwise this is a
    1536             :              * new-format flag command.
    1537             :              */
    1538         256 :             if (*s && pg_mblen(s) == 1)
    1539             :             {
    1540         256 :                 COPYCHAR(flag, s);
    1541         256 :                 flag[1] = '\0';
    1542             : 
    1543         256 :                 s++;
    1544         302 :                 if (*s == '\0' || *s == '#' || *s == '\n' || *s == ':' ||
    1545          46 :                     t_isspace(s))
    1546             :                 {
    1547         210 :                     oldformat = true;
    1548         210 :                     goto nextline;
    1549             :                 }
    1550             :             }
    1551          46 :             goto isnewformat;
    1552             :         }
    1553         224 :         if (STRNCMP(recoded, "COMPOUNDFLAG") == 0 ||
    1554         210 :             STRNCMP(recoded, "COMPOUNDMIN") == 0 ||
    1555         210 :             STRNCMP(recoded, "PFX") == 0 ||
    1556         210 :             STRNCMP(recoded, "SFX") == 0)
    1557          14 :             goto isnewformat;
    1558             : 
    1559         210 :         if ((!suffixes) && (!prefixes))
    1560           0 :             goto nextline;
    1561             : 
    1562         210 :         if (!parse_affentry(pstr, mask, find, repl))
    1563           0 :             goto nextline;
    1564             : 
    1565         210 :         NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX);
    1566             : 
    1567         780 : nextline:
    1568         780 :         pfree(recoded);
    1569         780 :         pfree(pstr);
    1570             :     }
    1571          30 :     tsearch_readline_end(&trst);
    1572          30 :     return;
    1573             : 
    1574          60 : isnewformat:
    1575          60 :     if (oldformat)
    1576           0 :         ereport(ERROR,
    1577             :                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1578             :                  errmsg("affix file contains both old-style and new-style commands")));
    1579          60 :     tsearch_readline_end(&trst);
    1580             : 
    1581          60 :     NIImportOOAffixes(Conf, filename);
    1582             : }
    1583             : 
    1584             : /*
    1585             :  * Merges two affix flag sets and stores a new affix flag set into
    1586             :  * Conf->AffixData.
    1587             :  *
    1588             :  * Returns index of a new affix flag set.
    1589             :  */
    1590             : static int
    1591          46 : MergeAffix(IspellDict *Conf, int a1, int a2)
    1592             : {
    1593             :     char      **ptr;
    1594             : 
    1595             :     Assert(a1 < Conf->nAffixData && a2 < Conf->nAffixData);
    1596             : 
    1597             :     /* Do not merge affix flags if one of affix flags is empty */
    1598          46 :     if (*Conf->AffixData[a1] == '\0')
    1599           0 :         return a2;
    1600          46 :     else if (*Conf->AffixData[a2] == '\0')
    1601           0 :         return a1;
    1602             : 
    1603             :     /* Double the size of AffixData if there's not enough space */
    1604          46 :     if (Conf->nAffixData + 1 >= Conf->lenAffixData)
    1605             :     {
    1606          46 :         Conf->lenAffixData *= 2;
    1607          46 :         Conf->AffixData = (char **) repalloc(Conf->AffixData,
    1608          46 :                                              sizeof(char *) * Conf->lenAffixData);
    1609             :     }
    1610             : 
    1611          46 :     ptr = Conf->AffixData + Conf->nAffixData;
    1612          46 :     if (Conf->flagMode == FM_NUM)
    1613             :     {
    1614          20 :         *ptr = cpalloc(strlen(Conf->AffixData[a1]) +
    1615             :                        strlen(Conf->AffixData[a2]) +
    1616             :                        1 /* comma */ + 1 /* \0 */ );
    1617          20 :         sprintf(*ptr, "%s,%s", Conf->AffixData[a1], Conf->AffixData[a2]);
    1618             :     }
    1619             :     else
    1620             :     {
    1621          26 :         *ptr = cpalloc(strlen(Conf->AffixData[a1]) +
    1622             :                        strlen(Conf->AffixData[a2]) +
    1623             :                        1 /* \0 */ );
    1624          26 :         sprintf(*ptr, "%s%s", Conf->AffixData[a1], Conf->AffixData[a2]);
    1625             :     }
    1626          46 :     ptr++;
    1627          46 :     *ptr = NULL;
    1628          46 :     Conf->nAffixData++;
    1629             : 
    1630          46 :     return Conf->nAffixData - 1;
    1631             : }
    1632             : 
    1633             : /*
    1634             :  * Returns a set of affix parameters which correspondence to the set of affix
    1635             :  * flags with the given index.
    1636             :  */
    1637             : static uint32
    1638         772 : makeCompoundFlags(IspellDict *Conf, int affix)
    1639             : {
    1640             :     Assert(affix < Conf->nAffixData);
    1641             : 
    1642         772 :     return (getCompoundAffixFlagValue(Conf, Conf->AffixData[affix]) &
    1643             :             FF_COMPOUNDFLAGMASK);
    1644             : }
    1645             : 
    1646             : /*
    1647             :  * Makes a prefix tree for the given level.
    1648             :  *
    1649             :  * Conf: current dictionary.
    1650             :  * low: lower index of the Conf->Spell array.
    1651             :  * high: upper index of the Conf->Spell array.
    1652             :  * level: current prefix tree level.
    1653             :  */
    1654             : static SPNode *
    1655        3084 : mkSPNode(IspellDict *Conf, int low, int high, int level)
    1656             : {
    1657             :     int         i;
    1658        3084 :     int         nchar = 0;
    1659        3084 :     char        lastchar = '\0';
    1660             :     SPNode     *rs;
    1661             :     SPNodeData *data;
    1662        3084 :     int         lownew = low;
    1663             : 
    1664       10134 :     for (i = low; i < high; i++)
    1665        7050 :         if (Conf->Spell[i]->p.d.len > level && lastchar != Conf->Spell[i]->word[level])
    1666             :         {
    1667        3022 :             nchar++;
    1668        3022 :             lastchar = Conf->Spell[i]->word[level];
    1669             :         }
    1670             : 
    1671        3084 :     if (!nchar)
    1672         442 :         return NULL;
    1673             : 
    1674        2642 :     rs = (SPNode *) cpalloc0(SPNHDRSZ + nchar * sizeof(SPNodeData));
    1675        2642 :     rs->length = nchar;
    1676        2642 :     data = rs->data;
    1677             : 
    1678        2642 :     lastchar = '\0';
    1679        8938 :     for (i = low; i < high; i++)
    1680        6308 :         if (Conf->Spell[i]->p.d.len > level)
    1681             :         {
    1682        4532 :             if (lastchar != Conf->Spell[i]->word[level])
    1683             :             {
    1684        3014 :                 if (lastchar)
    1685             :                 {
    1686             :                     /* Next level of the prefix tree */
    1687         372 :                     data->node = mkSPNode(Conf, lownew, i, level + 1);
    1688         364 :                     lownew = i;
    1689         364 :                     data++;
    1690             :                 }
    1691        3006 :                 lastchar = Conf->Spell[i]->word[level];
    1692             :             }
    1693        4524 :             data->val = ((uint8 *) (Conf->Spell[i]->word))[level];
    1694        4524 :             if (Conf->Spell[i]->p.d.len == level + 1)
    1695             :             {
    1696         726 :                 bool        clearCompoundOnly = false;
    1697             : 
    1698         726 :                 if (data->isword && data->affix != Conf->Spell[i]->p.d.affix)
    1699             :                 {
    1700             :                     /*
    1701             :                      * MergeAffix called a few times. If one of word is
    1702             :                      * allowed to be in compound word and another isn't, then
    1703             :                      * clear FF_COMPOUNDONLY flag.
    1704             :                      */
    1705             : 
    1706          92 :                     clearCompoundOnly = (FF_COMPOUNDONLY & data->compoundflag
    1707          46 :                                          & makeCompoundFlags(Conf, Conf->Spell[i]->p.d.affix))
    1708             :                         ? false : true;
    1709          46 :                     data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i]->p.d.affix);
    1710             :                 }
    1711             :                 else
    1712         680 :                     data->affix = Conf->Spell[i]->p.d.affix;
    1713         726 :                 data->isword = 1;
    1714             : 
    1715         726 :                 data->compoundflag = makeCompoundFlags(Conf, data->affix);
    1716             : 
    1717         722 :                 if ((data->compoundflag & FF_COMPOUNDONLY) &&
    1718           0 :                     (data->compoundflag & FF_COMPOUNDFLAG) == 0)
    1719           0 :                     data->compoundflag |= FF_COMPOUNDFLAG;
    1720             : 
    1721         722 :                 if (clearCompoundOnly)
    1722          46 :                     data->compoundflag &= ~FF_COMPOUNDONLY;
    1723             :             }
    1724             :         }
    1725             : 
    1726             :     /* Next level of the prefix tree */
    1727        2630 :     data->node = mkSPNode(Conf, lownew, high, level + 1);
    1728             : 
    1729        2626 :     return rs;
    1730             : }
    1731             : 
    1732             : /*
    1733             :  * Builds the Conf->Dictionary tree and AffixData from the imported dictionary
    1734             :  * and affixes.
    1735             :  */
    1736             : void
    1737          90 : NISortDictionary(IspellDict *Conf)
    1738             : {
    1739             :     int         i;
    1740             :     int         naffix;
    1741             :     int         curaffix;
    1742             : 
    1743             :     /* compress affixes */
    1744             : 
    1745             :     /*
    1746             :      * If we use flag aliases then we need to use Conf->AffixData filled in
    1747             :      * the NIImportOOAffixes().
    1748             :      */
    1749          90 :     if (Conf->useFlagAliases)
    1750             :     {
    1751         176 :         for (i = 0; i < Conf->nspell; i++)
    1752             :         {
    1753             :             char       *end;
    1754             : 
    1755         162 :             if (*Conf->Spell[i]->p.flag != '\0')
    1756             :             {
    1757         148 :                 curaffix = strtol(Conf->Spell[i]->p.flag, &end, 10);
    1758         148 :                 if (Conf->Spell[i]->p.flag == end || errno == ERANGE)
    1759           4 :                     ereport(ERROR,
    1760             :                             (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1761             :                              errmsg("invalid affix alias \"%s\"",
    1762             :                                     Conf->Spell[i]->p.flag)));
    1763         144 :                 if (curaffix < 0 || curaffix >= Conf->nAffixData)
    1764           4 :                     ereport(ERROR,
    1765             :                             (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1766             :                              errmsg("invalid affix alias \"%s\"",
    1767             :                                     Conf->Spell[i]->p.flag)));
    1768         140 :                 if (*end != '\0' && !t_isdigit(end) && !t_isspace(end))
    1769           0 :                     ereport(ERROR,
    1770             :                             (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1771             :                              errmsg("invalid affix alias \"%s\"",
    1772             :                                     Conf->Spell[i]->p.flag)));
    1773             :             }
    1774             :             else
    1775             :             {
    1776             :                 /*
    1777             :                  * If Conf->Spell[i]->p.flag is empty, then get empty value of
    1778             :                  * Conf->AffixData (0 index).
    1779             :                  */
    1780          14 :                 curaffix = 0;
    1781             :             }
    1782             : 
    1783         154 :             Conf->Spell[i]->p.d.affix = curaffix;
    1784         154 :             Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
    1785             :         }
    1786             :     }
    1787             :     /* Otherwise fill Conf->AffixData here */
    1788             :     else
    1789             :     {
    1790             :         /* Count the number of different flags used in the dictionary */
    1791          68 :         qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *),
    1792             :               cmpspellaffix);
    1793             : 
    1794          68 :         naffix = 0;
    1795         668 :         for (i = 0; i < Conf->nspell; i++)
    1796             :         {
    1797         600 :             if (i == 0 ||
    1798         532 :                 strcmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag) != 0)
    1799         532 :                 naffix++;
    1800             :         }
    1801             : 
    1802             :         /*
    1803             :          * Fill in Conf->AffixData with the affixes that were used in the
    1804             :          * dictionary. Replace textual flag-field of Conf->Spell entries with
    1805             :          * indexes into Conf->AffixData array.
    1806             :          */
    1807          68 :         Conf->AffixData = (char **) palloc0(naffix * sizeof(char *));
    1808             : 
    1809          68 :         curaffix = -1;
    1810         668 :         for (i = 0; i < Conf->nspell; i++)
    1811             :         {
    1812         600 :             if (i == 0 ||
    1813         532 :                 strcmp(Conf->Spell[i]->p.flag, Conf->AffixData[curaffix]) != 0)
    1814             :             {
    1815         532 :                 curaffix++;
    1816             :                 Assert(curaffix < naffix);
    1817         532 :                 Conf->AffixData[curaffix] = cpstrdup(Conf,
    1818         532 :                                                      Conf->Spell[i]->p.flag);
    1819             :             }
    1820             : 
    1821         600 :             Conf->Spell[i]->p.d.affix = curaffix;
    1822         600 :             Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
    1823             :         }
    1824             : 
    1825          68 :         Conf->lenAffixData = Conf->nAffixData = naffix;
    1826             :     }
    1827             : 
    1828             :     /* Start build a prefix tree */
    1829          82 :     qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell);
    1830          82 :     Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
    1831          78 : }
    1832             : 
    1833             : /*
    1834             :  * Makes a prefix tree for the given level using the repl string of an affix
    1835             :  * rule. Affixes with empty replace string do not include in the prefix tree.
    1836             :  * This affixes are included by mkVoidAffix().
    1837             :  *
    1838             :  * Conf: current dictionary.
    1839             :  * low: lower index of the Conf->Affix array.
    1840             :  * high: upper index of the Conf->Affix array.
    1841             :  * level: current prefix tree level.
    1842             :  * type: FF_SUFFIX or FF_PREFIX.
    1843             :  */
    1844             : static AffixNode *
    1845        1316 : mkANode(IspellDict *Conf, int low, int high, int level, int type)
    1846             : {
    1847             :     int         i;
    1848        1316 :     int         nchar = 0;
    1849        1316 :     uint8       lastchar = '\0';
    1850             :     AffixNode  *rs;
    1851             :     AffixNodeData *data;
    1852        1316 :     int         lownew = low;
    1853             :     int         naff;
    1854             :     AFFIX     **aff;
    1855             : 
    1856        3540 :     for (i = low; i < high; i++)
    1857        2224 :         if (Conf->Affix[i].replen > level && lastchar != GETCHAR(Conf->Affix + i, level, type))
    1858             :         {
    1859        1160 :             nchar++;
    1860        1160 :             lastchar = GETCHAR(Conf->Affix + i, level, type);
    1861             :         }
    1862             : 
    1863        1316 :     if (!nchar)
    1864         502 :         return NULL;
    1865             : 
    1866         814 :     aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1));
    1867         814 :     naff = 0;
    1868             : 
    1869         814 :     rs = (AffixNode *) cpalloc0(ANHRDSZ + nchar * sizeof(AffixNodeData));
    1870         814 :     rs->length = nchar;
    1871         814 :     data = rs->data;
    1872             : 
    1873         814 :     lastchar = '\0';
    1874        2410 :     for (i = low; i < high; i++)
    1875        1596 :         if (Conf->Affix[i].replen > level)
    1876             :         {
    1877        1344 :             if (lastchar != GETCHAR(Conf->Affix + i, level, type))
    1878             :             {
    1879        1160 :                 if (lastchar)
    1880             :                 {
    1881             :                     /* Next level of the prefix tree */
    1882         346 :                     data->node = mkANode(Conf, lownew, i, level + 1, type);
    1883         346 :                     if (naff)
    1884             :                     {
    1885          78 :                         data->naff = naff;
    1886          78 :                         data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
    1887          78 :                         memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
    1888          78 :                         naff = 0;
    1889             :                     }
    1890         346 :                     data++;
    1891         346 :                     lownew = i;
    1892             :                 }
    1893        1160 :                 lastchar = GETCHAR(Conf->Affix + i, level, type);
    1894             :             }
    1895        1344 :             data->val = GETCHAR(Conf->Affix + i, level, type);
    1896        1344 :             if (Conf->Affix[i].replen == level + 1)
    1897             :             {                   /* affix stopped */
    1898         608 :                 aff[naff++] = Conf->Affix + i;
    1899             :             }
    1900             :         }
    1901             : 
    1902             :     /* Next level of the prefix tree */
    1903         814 :     data->node = mkANode(Conf, lownew, high, level + 1, type);
    1904         814 :     if (naff)
    1905             :     {
    1906         502 :         data->naff = naff;
    1907         502 :         data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
    1908         502 :         memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
    1909         502 :         naff = 0;
    1910             :     }
    1911             : 
    1912         814 :     pfree(aff);
    1913             : 
    1914         814 :     return rs;
    1915             : }
    1916             : 
    1917             : /*
    1918             :  * Makes the root void node in the prefix tree. The root void node is created
    1919             :  * for affixes which have empty replace string ("repl" field).
    1920             :  */
    1921             : static void
    1922         156 : mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix)
    1923             : {
    1924             :     int         i,
    1925         156 :                 cnt = 0;
    1926         156 :     int         start = (issuffix) ? startsuffix : 0;
    1927         156 :     int         end = (issuffix) ? Conf->naffixes : startsuffix;
    1928         156 :     AffixNode  *Affix = (AffixNode *) palloc0(ANHRDSZ + sizeof(AffixNodeData));
    1929             : 
    1930         156 :     Affix->length = 1;
    1931         156 :     Affix->isvoid = 1;
    1932             : 
    1933         156 :     if (issuffix)
    1934             :     {
    1935          78 :         Affix->data->node = Conf->Suffix;
    1936          78 :         Conf->Suffix = Affix;
    1937             :     }
    1938             :     else
    1939             :     {
    1940          78 :         Affix->data->node = Conf->Prefix;
    1941          78 :         Conf->Prefix = Affix;
    1942             :     }
    1943             : 
    1944             :     /* Count affixes with empty replace string */
    1945         784 :     for (i = start; i < end; i++)
    1946         628 :         if (Conf->Affix[i].replen == 0)
    1947          20 :             cnt++;
    1948             : 
    1949             :     /* There is not affixes with empty replace string */
    1950         156 :     if (cnt == 0)
    1951         136 :         return;
    1952             : 
    1953          20 :     Affix->data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * cnt);
    1954          20 :     Affix->data->naff = (uint32) cnt;
    1955             : 
    1956          20 :     cnt = 0;
    1957         160 :     for (i = start; i < end; i++)
    1958         140 :         if (Conf->Affix[i].replen == 0)
    1959             :         {
    1960          20 :             Affix->data->aff[cnt] = Conf->Affix + i;
    1961          20 :             cnt++;
    1962             :         }
    1963             : }
    1964             : 
    1965             : /*
    1966             :  * Checks if the affixflag is used by dictionary. Conf->AffixData does not
    1967             :  * contain affixflag if this flag is not used actually by the .dict file.
    1968             :  *
    1969             :  * Conf: current dictionary.
    1970             :  * affixflag: affix flag.
    1971             :  *
    1972             :  * Returns true if the Conf->AffixData array contains affixflag, otherwise
    1973             :  * returns false.
    1974             :  */
    1975             : static bool
    1976         106 : isAffixInUse(IspellDict *Conf, char *affixflag)
    1977             : {
    1978             :     int         i;
    1979             : 
    1980         784 :     for (i = 0; i < Conf->nAffixData; i++)
    1981         766 :         if (IsAffixFlagInUse(Conf, i, affixflag))
    1982          88 :             return true;
    1983             : 
    1984          18 :     return false;
    1985             : }
    1986             : 
    1987             : /*
    1988             :  * Builds Conf->Prefix and Conf->Suffix trees from the imported affixes.
    1989             :  */
    1990             : void
    1991          78 : NISortAffixes(IspellDict *Conf)
    1992             : {
    1993             :     AFFIX      *Affix;
    1994             :     size_t      i;
    1995             :     CMPDAffix  *ptr;
    1996          78 :     int         firstsuffix = Conf->naffixes;
    1997             : 
    1998          78 :     if (Conf->naffixes == 0)
    1999           0 :         return;
    2000             : 
    2001             :     /* Store compound affixes in the Conf->CompoundAffix array */
    2002          78 :     if (Conf->naffixes > 1)
    2003          78 :         qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix);
    2004          78 :     Conf->CompoundAffix = ptr = (CMPDAffix *) palloc(sizeof(CMPDAffix) * Conf->naffixes);
    2005          78 :     ptr->affix = NULL;
    2006             : 
    2007         706 :     for (i = 0; i < Conf->naffixes; i++)
    2008             :     {
    2009         628 :         Affix = &(((AFFIX *) Conf->Affix)[i]);
    2010         628 :         if (Affix->type == FF_SUFFIX && i < firstsuffix)
    2011          78 :             firstsuffix = i;
    2012             : 
    2013         734 :         if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 &&
    2014         106 :             isAffixInUse(Conf, Affix->flag))
    2015             :         {
    2016          88 :             bool        issuffix = (Affix->type == FF_SUFFIX);
    2017             : 
    2018          88 :             if (ptr == Conf->CompoundAffix ||
    2019          56 :                 issuffix != (ptr - 1)->issuffix ||
    2020          28 :                 strbncmp((const unsigned char *) (ptr - 1)->affix,
    2021          28 :                          (const unsigned char *) Affix->repl,
    2022          28 :                          (ptr - 1)->len))
    2023             :             {
    2024             :                 /* leave only unique and minimal suffixes */
    2025          74 :                 ptr->affix = Affix->repl;
    2026          74 :                 ptr->len = Affix->replen;
    2027          74 :                 ptr->issuffix = issuffix;
    2028          74 :                 ptr++;
    2029             :             }
    2030             :         }
    2031             :     }
    2032          78 :     ptr->affix = NULL;
    2033          78 :     Conf->CompoundAffix = (CMPDAffix *) repalloc(Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr - Conf->CompoundAffix + 1));
    2034             : 
    2035             :     /* Start build a prefix tree */
    2036          78 :     Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX);
    2037          78 :     Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX);
    2038          78 :     mkVoidAffix(Conf, true, firstsuffix);
    2039          78 :     mkVoidAffix(Conf, false, firstsuffix);
    2040             : }
    2041             : 
    2042             : static AffixNodeData *
    2043        3080 : FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type)
    2044             : {
    2045             :     AffixNodeData *StopLow,
    2046             :                *StopHigh,
    2047             :                *StopMiddle;
    2048             :     uint8 symbol;
    2049             : 
    2050        3080 :     if (node->isvoid)
    2051             :     {                           /* search void affixes */
    2052        2680 :         if (node->data->naff)
    2053         228 :             return node->data;
    2054        2452 :         node = node->data->node;
    2055             :     }
    2056             : 
    2057        3588 :     while (node && *level < wrdlen)
    2058             :     {
    2059        3572 :         StopLow = node->data;
    2060        3572 :         StopHigh = node->data + node->length;
    2061        7884 :         while (StopLow < StopHigh)
    2062             :         {
    2063        5916 :             StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
    2064        5916 :             symbol = GETWCHAR(word, wrdlen, *level, type);
    2065             : 
    2066        5916 :             if (StopMiddle->val == symbol)
    2067             :             {
    2068        1604 :                 (*level)++;
    2069        1604 :                 if (StopMiddle->naff)
    2070         868 :                     return StopMiddle;
    2071         736 :                 node = StopMiddle->node;
    2072         736 :                 break;
    2073             :             }
    2074        4312 :             else if (StopMiddle->val < symbol)
    2075        1072 :                 StopLow = StopMiddle + 1;
    2076             :             else
    2077        3240 :                 StopHigh = StopMiddle;
    2078             :         }
    2079        2704 :         if (StopLow >= StopHigh)
    2080        1968 :             break;
    2081             :     }
    2082        1984 :     return NULL;
    2083             : }
    2084             : 
    2085             : static char *
    2086        1224 : CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *newword, int *baselen)
    2087             : {
    2088             :     /*
    2089             :      * Check compound allow flags
    2090             :      */
    2091             : 
    2092        1224 :     if (flagflags == 0)
    2093             :     {
    2094         844 :         if (Affix->flagflags & FF_COMPOUNDONLY)
    2095          88 :             return NULL;
    2096             :     }
    2097         380 :     else if (flagflags & FF_COMPOUNDBEGIN)
    2098             :     {
    2099           0 :         if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
    2100           0 :             return NULL;
    2101           0 :         if ((Affix->flagflags & FF_COMPOUNDBEGIN) == 0)
    2102           0 :             if (Affix->type == FF_SUFFIX)
    2103           0 :                 return NULL;
    2104             :     }
    2105         380 :     else if (flagflags & FF_COMPOUNDMIDDLE)
    2106             :     {
    2107         272 :         if ((Affix->flagflags & FF_COMPOUNDMIDDLE) == 0 ||
    2108         152 :             (Affix->flagflags & FF_COMPOUNDFORBIDFLAG))
    2109         120 :             return NULL;
    2110             :     }
    2111         108 :     else if (flagflags & FF_COMPOUNDLAST)
    2112             :     {
    2113         108 :         if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
    2114           0 :             return NULL;
    2115         108 :         if ((Affix->flagflags & FF_COMPOUNDLAST) == 0)
    2116         100 :             if (Affix->type == FF_PREFIX)
    2117           0 :                 return NULL;
    2118             :     }
    2119             : 
    2120             :     /*
    2121             :      * make replace pattern of affix
    2122             :      */
    2123        1016 :     if (Affix->type == FF_SUFFIX)
    2124             :     {
    2125         696 :         strcpy(newword, word);
    2126         696 :         strcpy(newword + len - Affix->replen, Affix->find);
    2127         696 :         if (baselen)            /* store length of non-changed part of word */
    2128         696 :             *baselen = len - Affix->replen;
    2129             :     }
    2130             :     else
    2131             :     {
    2132             :         /*
    2133             :          * if prefix is an all non-changed part's length then all word
    2134             :          * contains only prefix and suffix, so out
    2135             :          */
    2136         320 :         if (baselen && *baselen + strlen(Affix->find) <= Affix->replen)
    2137           0 :             return NULL;
    2138         320 :         strcpy(newword, Affix->find);
    2139         320 :         strcat(newword, word + Affix->replen);
    2140             :     }
    2141             : 
    2142             :     /*
    2143             :      * check resulting word
    2144             :      */
    2145        1016 :     if (Affix->issimple)
    2146         320 :         return newword;
    2147         696 :     else if (Affix->isregis)
    2148             :     {
    2149         472 :         if (RS_execute(&(Affix->reg.regis), newword))
    2150         448 :             return newword;
    2151             :     }
    2152             :     else
    2153             :     {
    2154             :         pg_wchar   *data;
    2155             :         size_t      data_len;
    2156             :         int         newword_len;
    2157             : 
    2158             :         /* Convert data string to wide characters */
    2159         224 :         newword_len = strlen(newword);
    2160         224 :         data = (pg_wchar *) palloc((newword_len + 1) * sizeof(pg_wchar));
    2161         224 :         data_len = pg_mb2wchar_with_len(newword, data, newword_len);
    2162             : 
    2163         224 :         if (pg_regexec(&(Affix->reg.pregex->regex), data, data_len,
    2164             :                        0, NULL, 0, NULL, 0) == REG_OKAY)
    2165             :         {
    2166         224 :             pfree(data);
    2167         224 :             return newword;
    2168             :         }
    2169           0 :         pfree(data);
    2170             :     }
    2171             : 
    2172          24 :     return NULL;
    2173             : }
    2174             : 
    2175             : static int
    2176         360 : addToResult(char **forms, char **cur, char *word)
    2177             : {
    2178         360 :     if (cur - forms >= MAX_NORM - 1)
    2179           0 :         return 0;
    2180         360 :     if (forms == cur || strcmp(word, *(cur - 1)) != 0)
    2181             :     {
    2182         360 :         *cur = pstrdup(word);
    2183         360 :         *(cur + 1) = NULL;
    2184         360 :         return 1;
    2185             :     }
    2186             : 
    2187           0 :     return 0;
    2188             : }
    2189             : 
    2190             : static char **
    2191        1004 : NormalizeSubWord(IspellDict *Conf, char *word, int flag)
    2192             : {
    2193        1004 :     AffixNodeData *suffix = NULL,
    2194        1004 :                *prefix = NULL;
    2195        1004 :     int         slevel = 0,
    2196        1004 :                 plevel = 0;
    2197        1004 :     int         wrdlen = strlen(word),
    2198             :                 swrdlen;
    2199             :     char      **forms;
    2200             :     char      **cur;
    2201        1004 :     char        newword[2 * MAXNORMLEN] = "";
    2202        1004 :     char        pnewword[2 * MAXNORMLEN] = "";
    2203        1004 :     AffixNode  *snode = Conf->Suffix,
    2204             :                *pnode;
    2205             :     int         i,
    2206             :                 j;
    2207             : 
    2208        1004 :     if (wrdlen > MAXNORMLEN)
    2209           0 :         return NULL;
    2210        1004 :     cur = forms = (char **) palloc(MAX_NORM * sizeof(char *));
    2211        1004 :     *cur = NULL;
    2212             : 
    2213             : 
    2214             :     /* Check that the word itself is normal form */
    2215        1004 :     if (FindWord(Conf, word, VoidString, flag))
    2216             :     {
    2217         312 :         *cur = pstrdup(word);
    2218         312 :         cur++;
    2219         312 :         *cur = NULL;
    2220             :     }
    2221             : 
    2222             :     /* Find all other NORMAL forms of the 'word' (check only prefix) */
    2223        1004 :     pnode = Conf->Prefix;
    2224        1004 :     plevel = 0;
    2225        1148 :     while (pnode)
    2226             :     {
    2227        1004 :         prefix = FindAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX);
    2228        1004 :         if (!prefix)
    2229         860 :             break;
    2230         288 :         for (j = 0; j < prefix->naff; j++)
    2231             :         {
    2232         144 :             if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword, NULL))
    2233             :             {
    2234             :                 /* prefix success */
    2235         128 :                 if (FindWord(Conf, newword, prefix->aff[j]->flag, flag))
    2236          32 :                     cur += addToResult(forms, cur, newword);
    2237             :             }
    2238             :         }
    2239         144 :         pnode = prefix->node;
    2240             :     }
    2241             : 
    2242             :     /*
    2243             :      * Find all other NORMAL forms of the 'word' (check suffix and then
    2244             :      * prefix)
    2245             :      */
    2246        1732 :     while (snode)
    2247             :     {
    2248        1404 :         int         baselen = 0;
    2249             : 
    2250             :         /* find possible suffix */
    2251        1404 :         suffix = FindAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX);
    2252        1404 :         if (!suffix)
    2253         676 :             break;
    2254             :         /* foreach suffix check affix */
    2255        1584 :         for (i = 0; i < suffix->naff; i++)
    2256             :         {
    2257         856 :             if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword, &baselen))
    2258             :             {
    2259             :                 /* suffix success */
    2260         672 :                 if (FindWord(Conf, newword, suffix->aff[i]->flag, flag))
    2261         184 :                     cur += addToResult(forms, cur, newword);
    2262             : 
    2263             :                 /* now we will look changed word with prefixes */
    2264         672 :                 pnode = Conf->Prefix;
    2265         672 :                 plevel = 0;
    2266         672 :                 swrdlen = strlen(newword);
    2267         896 :                 while (pnode)
    2268             :                 {
    2269         672 :                     prefix = FindAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX);
    2270         672 :                     if (!prefix)
    2271         448 :                         break;
    2272         448 :                     for (j = 0; j < prefix->naff; j++)
    2273             :                     {
    2274         224 :                         if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword, &baselen))
    2275             :                         {
    2276             :                             /* prefix success */
    2277         384 :                             char       *ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ?
    2278         192 :                             VoidString : prefix->aff[j]->flag;
    2279             : 
    2280         192 :                             if (FindWord(Conf, pnewword, ff, flag))
    2281         144 :                                 cur += addToResult(forms, cur, pnewword);
    2282             :                         }
    2283             :                     }
    2284         224 :                     pnode = prefix->node;
    2285             :                 }
    2286             :             }
    2287             :         }
    2288             : 
    2289         728 :         snode = suffix->node;
    2290             :     }
    2291             : 
    2292        1004 :     if (cur == forms)
    2293             :     {
    2294         444 :         pfree(forms);
    2295         444 :         return NULL;
    2296             :     }
    2297         560 :     return forms;
    2298             : }
    2299             : 
    2300             : typedef struct SplitVar
    2301             : {
    2302             :     int         nstem;
    2303             :     int         lenstem;
    2304             :     char      **stem;
    2305             :     struct SplitVar *next;
    2306             : } SplitVar;
    2307             : 
    2308             : static int
    2309        4040 : CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace)
    2310             : {
    2311             :     bool        issuffix;
    2312             : 
    2313             :     /* in case CompoundAffix is null: */
    2314        4040 :     if (*ptr == NULL)
    2315           0 :         return -1;
    2316             : 
    2317        4040 :     if (CheckInPlace)
    2318             :     {
    2319        7712 :         while ((*ptr)->affix)
    2320             :         {
    2321        4296 :             if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0)
    2322             :             {
    2323          40 :                 len = (*ptr)->len;
    2324          40 :                 issuffix = (*ptr)->issuffix;
    2325          40 :                 (*ptr)++;
    2326          40 :                 return (issuffix) ? len : 0;
    2327             :             }
    2328        4256 :             (*ptr)++;
    2329             :         }
    2330             :     }
    2331             :     else
    2332             :     {
    2333             :         char       *affbegin;
    2334             : 
    2335        1128 :         while ((*ptr)->affix)
    2336             :         {
    2337         628 :             if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL)
    2338             :             {
    2339          84 :                 len = (*ptr)->len + (affbegin - word);
    2340          84 :                 issuffix = (*ptr)->issuffix;
    2341          84 :                 (*ptr)++;
    2342          84 :                 return (issuffix) ? len : 0;
    2343             :             }
    2344         544 :             (*ptr)++;
    2345             :         }
    2346             :     }
    2347        3916 :     return -1;
    2348             : }
    2349             : 
    2350             : static SplitVar *
    2351         940 : CopyVar(SplitVar *s, int makedup)
    2352             : {
    2353         940 :     SplitVar   *v = (SplitVar *) palloc(sizeof(SplitVar));
    2354             : 
    2355         940 :     v->next = NULL;
    2356         940 :     if (s)
    2357             :     {
    2358             :         int         i;
    2359             : 
    2360         440 :         v->lenstem = s->lenstem;
    2361         440 :         v->stem = (char **) palloc(sizeof(char *) * v->lenstem);
    2362         440 :         v->nstem = s->nstem;
    2363         668 :         for (i = 0; i < s->nstem; i++)
    2364         228 :             v->stem[i] = (makedup) ? pstrdup(s->stem[i]) : s->stem[i];
    2365             :     }
    2366             :     else
    2367             :     {
    2368         500 :         v->lenstem = 16;
    2369         500 :         v->stem = (char **) palloc(sizeof(char *) * v->lenstem);
    2370         500 :         v->nstem = 0;
    2371             :     }
    2372         940 :     return v;
    2373             : }
    2374             : 
    2375             : static void
    2376        1260 : AddStem(SplitVar *v, char *word)
    2377             : {
    2378        1260 :     if (v->nstem >= v->lenstem)
    2379             :     {
    2380           0 :         v->lenstem *= 2;
    2381           0 :         v->stem = (char **) repalloc(v->stem, sizeof(char *) * v->lenstem);
    2382             :     }
    2383             : 
    2384        1260 :     v->stem[v->nstem] = word;
    2385        1260 :     v->nstem++;
    2386        1260 : }
    2387             : 
    2388             : static SplitVar *
    2389         880 : SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int wordlen, int startpos, int minpos)
    2390             : {
    2391         880 :     SplitVar   *var = NULL;
    2392             :     SPNodeData *StopLow,
    2393             :                *StopHigh,
    2394         880 :                *StopMiddle = NULL;
    2395         880 :     SPNode     *node = (snode) ? snode : Conf->Dictionary;
    2396         880 :     int         level = (snode) ? minpos : startpos;    /* recursive
    2397             :                                                          * minpos==level */
    2398             :     int         lenaff;
    2399             :     CMPDAffix  *caff;
    2400             :     char       *notprobed;
    2401         880 :     int         compoundflag = 0;
    2402             : 
    2403         880 :     notprobed = (char *) palloc(wordlen);
    2404         880 :     memset(notprobed, 1, wordlen);
    2405         880 :     var = CopyVar(orig, 1);
    2406             : 
    2407        4968 :     while (level < wordlen)
    2408             :     {
    2409             :         /* find word with epenthetic or/and compound affix */
    2410        4796 :         caff = Conf->CompoundAffix;
    2411        4920 :         while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) >= 0)
    2412             :         {
    2413             :             /*
    2414             :              * there is one of compound affixes, so check word for existings
    2415             :              */
    2416             :             char        buf[MAXNORMLEN];
    2417             :             char      **subres;
    2418             : 
    2419         124 :             lenaff = level - startpos + lenaff;
    2420             : 
    2421         124 :             if (!notprobed[startpos + lenaff - 1])
    2422           0 :                 continue;
    2423             : 
    2424         124 :             if (level + lenaff - 1 <= minpos)
    2425           0 :                 continue;
    2426             : 
    2427         124 :             if (lenaff >= MAXNORMLEN)
    2428           0 :                 continue;       /* skip too big value */
    2429         124 :             if (lenaff > 0)
    2430         124 :                 memcpy(buf, word + startpos, lenaff);
    2431         124 :             buf[lenaff] = '\0';
    2432             : 
    2433         124 :             if (level == 0)
    2434           0 :                 compoundflag = FF_COMPOUNDBEGIN;
    2435         124 :             else if (level == wordlen - 1)
    2436           0 :                 compoundflag = FF_COMPOUNDLAST;
    2437             :             else
    2438         124 :                 compoundflag = FF_COMPOUNDMIDDLE;
    2439         124 :             subres = NormalizeSubWord(Conf, buf, compoundflag);
    2440         124 :             if (subres)
    2441             :             {
    2442             :                 /* Yes, it was a word from dictionary */
    2443          60 :                 SplitVar   *new = CopyVar(var, 0);
    2444          60 :                 SplitVar   *ptr = var;
    2445          60 :                 char      **sptr = subres;
    2446             : 
    2447          60 :                 notprobed[startpos + lenaff - 1] = 0;
    2448             : 
    2449         120 :                 while (*sptr)
    2450             :                 {
    2451          60 :                     AddStem(new, *sptr);
    2452          60 :                     sptr++;
    2453             :                 }
    2454          60 :                 pfree(subres);
    2455             : 
    2456          60 :                 while (ptr->next)
    2457           0 :                     ptr = ptr->next;
    2458          60 :                 ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos + lenaff, startpos + lenaff);
    2459             : 
    2460          60 :                 pfree(new->stem);
    2461          60 :                 pfree(new);
    2462             :             }
    2463             :         }
    2464             : 
    2465        4796 :         if (!node)
    2466         500 :             break;
    2467             : 
    2468        4296 :         StopLow = node->data;
    2469        4296 :         StopHigh = node->data + node->length;
    2470        5796 :         while (StopLow < StopHigh)
    2471             :         {
    2472        5376 :             StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
    2473        5376 :             if (StopMiddle->val == ((uint8 *) (word))[level])
    2474        3876 :                 break;
    2475        1500 :             else if (StopMiddle->val < ((uint8 *) (word))[level])
    2476         652 :                 StopLow = StopMiddle + 1;
    2477             :             else
    2478         848 :                 StopHigh = StopMiddle;
    2479             :         }
    2480             : 
    2481        4296 :         if (StopLow < StopHigh)
    2482             :         {
    2483        3876 :             if (startpos == 0)
    2484        2180 :                 compoundflag = FF_COMPOUNDBEGIN;
    2485        1696 :             else if (level == wordlen - 1)
    2486         192 :                 compoundflag = FF_COMPOUNDLAST;
    2487             :             else
    2488        1504 :                 compoundflag = FF_COMPOUNDMIDDLE;
    2489             : 
    2490             :             /* find infinitive */
    2491        3876 :             if (StopMiddle->isword &&
    2492        1024 :                 (StopMiddle->compoundflag & compoundflag) &&
    2493         848 :                 notprobed[level])
    2494             :             {
    2495             :                 /* ok, we found full compoundallowed word */
    2496         848 :                 if (level > minpos)
    2497             :                 {
    2498             :                     /* and its length more than minimal */
    2499         528 :                     if (wordlen == level + 1)
    2500             :                     {
    2501             :                         /* well, it was last word */
    2502         208 :                         AddStem(var, pnstrdup(word + startpos, wordlen - startpos));
    2503         208 :                         pfree(notprobed);
    2504         208 :                         return var;
    2505             :                     }
    2506             :                     else
    2507             :                     {
    2508             :                         /* then we will search more big word at the same point */
    2509         320 :                         SplitVar   *ptr = var;
    2510             : 
    2511         496 :                         while (ptr->next)
    2512         176 :                             ptr = ptr->next;
    2513         320 :                         ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
    2514             :                         /* we can find next word */
    2515         320 :                         level++;
    2516         320 :                         AddStem(var, pnstrdup(word + startpos, level - startpos));
    2517         320 :                         node = Conf->Dictionary;
    2518         320 :                         startpos = level;
    2519         320 :                         continue;
    2520             :                     }
    2521             :                 }
    2522             :             }
    2523        3348 :             node = StopMiddle->node;
    2524             :         }
    2525             :         else
    2526         420 :             node = NULL;
    2527        3768 :         level++;
    2528             :     }
    2529             : 
    2530         672 :     AddStem(var, pnstrdup(word + startpos, wordlen - startpos));
    2531         672 :     pfree(notprobed);
    2532         672 :     return var;
    2533             : }
    2534             : 
    2535             : static void
    2536         876 : addNorm(TSLexeme **lres, TSLexeme **lcur, char *word, int flags, uint16 NVariant)
    2537             : {
    2538         876 :     if (*lres == NULL)
    2539         404 :         *lcur = *lres = (TSLexeme *) palloc(MAX_NORM * sizeof(TSLexeme));
    2540             : 
    2541         876 :     if (*lcur - *lres < MAX_NORM - 1)
    2542             :     {
    2543         876 :         (*lcur)->lexeme = word;
    2544         876 :         (*lcur)->flags = flags;
    2545         876 :         (*lcur)->nvariant = NVariant;
    2546         876 :         (*lcur)++;
    2547         876 :         (*lcur)->lexeme = NULL;
    2548             :     }
    2549         876 : }
    2550             : 
    2551             : TSLexeme *
    2552         500 : NINormalizeWord(IspellDict *Conf, char *word)
    2553             : {
    2554             :     char      **res;
    2555         500 :     TSLexeme   *lcur = NULL,
    2556         500 :                *lres = NULL;
    2557         500 :     uint16      NVariant = 1;
    2558             : 
    2559         500 :     res = NormalizeSubWord(Conf, word, 0);
    2560             : 
    2561         500 :     if (res)
    2562             :     {
    2563         324 :         char      **ptr = res;
    2564             : 
    2565         760 :         while (*ptr && (lcur - lres) < MAX_NORM)
    2566             :         {
    2567         436 :             addNorm(&lres, &lcur, *ptr, 0, NVariant++);
    2568         436 :             ptr++;
    2569             :         }
    2570         324 :         pfree(res);
    2571             :     }
    2572             : 
    2573         500 :     if (Conf->usecompound)
    2574             :     {
    2575         500 :         int         wordlen = strlen(word);
    2576             :         SplitVar   *ptr,
    2577         500 :                    *var = SplitToVariants(Conf, NULL, NULL, word, wordlen, 0, -1);
    2578             :         int         i;
    2579             : 
    2580        1380 :         while (var)
    2581             :         {
    2582         880 :             if (var->nstem > 1)
    2583             :             {
    2584         380 :                 char      **subres = NormalizeSubWord(Conf, var->stem[var->nstem - 1], FF_COMPOUNDLAST);
    2585             : 
    2586         380 :                 if (subres)
    2587             :                 {
    2588         176 :                     char      **subptr = subres;
    2589             : 
    2590         352 :                     while (*subptr)
    2591             :                     {
    2592         440 :                         for (i = 0; i < var->nstem - 1; i++)
    2593             :                         {
    2594         264 :                             addNorm(&lres, &lcur, (subptr == subres) ? var->stem[i] : pstrdup(var->stem[i]), 0, NVariant);
    2595             :                         }
    2596             : 
    2597         176 :                         addNorm(&lres, &lcur, *subptr, 0, NVariant);
    2598         176 :                         subptr++;
    2599         176 :                         NVariant++;
    2600             :                     }
    2601             : 
    2602         176 :                     pfree(subres);
    2603         176 :                     var->stem[0] = NULL;
    2604         176 :                     pfree(var->stem[var->nstem - 1]);
    2605             :                 }
    2606             :             }
    2607             : 
    2608        1828 :             for (i = 0; i < var->nstem && var->stem[i]; i++)
    2609         948 :                 pfree(var->stem[i]);
    2610         880 :             ptr = var->next;
    2611         880 :             pfree(var->stem);
    2612         880 :             pfree(var);
    2613         880 :             var = ptr;
    2614             :         }
    2615             :     }
    2616             : 
    2617         500 :     return lres;
    2618             : }

Generated by: LCOV version 1.14