LCOV - code coverage report
Current view: top level - src/backend/tsearch - spell.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 1037 1126 92.1 %
Date: 2025-04-02 17:15:16 Functions: 46 46 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * spell.c
       4             :  *      Normalizing word with ISpell
       5             :  *
       6             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       7             :  *
       8             :  * Ispell dictionary
       9             :  * -----------------
      10             :  *
      11             :  * Rules of dictionaries are defined in two files with .affix and .dict
      12             :  * extensions. They are used by spell checker programs Ispell and Hunspell.
      13             :  *
      14             :  * An .affix file declares morphological rules to get a basic form of words.
      15             :  * The format of an .affix file has different structure for Ispell and Hunspell
      16             :  * dictionaries. The Hunspell format is more complicated. But when an .affix
      17             :  * file is imported and compiled, it is stored in the same structure AffixNode.
      18             :  *
      19             :  * A .dict file stores a list of basic forms of words with references to
      20             :  * affix rules. The format of a .dict file has the same structure for Ispell
      21             :  * and Hunspell dictionaries.
      22             :  *
      23             :  * Compilation of a dictionary
      24             :  * ---------------------------
      25             :  *
      26             :  * A compiled dictionary is stored in the IspellDict structure. Compilation of
      27             :  * a dictionary is divided into the several steps:
      28             :  *  - NIImportDictionary() - stores each word of a .dict file in the
      29             :  *    temporary Spell field.
      30             :  *  - NIImportAffixes() - stores affix rules of an .affix file in the
      31             :  *    Affix field (not temporary) if an .affix file has the Ispell format.
      32             :  *    -> NIImportOOAffixes() - stores affix rules if an .affix file has the
      33             :  *       Hunspell format. The AffixData field is initialized if AF parameter
      34             :  *       is defined.
      35             :  *  - NISortDictionary() - builds a prefix tree (Trie) from the words list
      36             :  *    and stores it in the Dictionary field. The words list is got from the
      37             :  *    Spell field. The AffixData field is initialized if AF parameter is not
      38             :  *    defined.
      39             :  *  - NISortAffixes():
      40             :  *    - builds a list of compound affixes from the affix list and stores it
      41             :  *      in the CompoundAffix.
      42             :  *    - builds prefix trees (Trie) from the affix list for prefixes and suffixes
      43             :  *      and stores them in Suffix and Prefix fields.
      44             :  *    The affix list is got from the Affix field.
      45             :  *
      46             :  * Memory management
      47             :  * -----------------
      48             :  *
      49             :  * The IspellDict structure has the Spell field which is used only in compile
      50             :  * time. The Spell field stores a words list. It can take a lot of memory.
      51             :  * Therefore when a dictionary is compiled this field is cleared by
      52             :  * NIFinishBuild().
      53             :  *
      54             :  * All resources which should cleared by NIFinishBuild() is initialized using
      55             :  * tmpalloc() and tmpalloc0().
      56             :  *
      57             :  * IDENTIFICATION
      58             :  *    src/backend/tsearch/spell.c
      59             :  *
      60             :  *-------------------------------------------------------------------------
      61             :  */
      62             : 
      63             : #include "postgres.h"
      64             : 
      65             : #include "catalog/pg_collation.h"
      66             : #include "miscadmin.h"
      67             : #include "tsearch/dicts/spell.h"
      68             : #include "tsearch/ts_locale.h"
      69             : #include "utils/formatting.h"
      70             : #include "utils/memutils.h"
      71             : 
      72             : 
      73             : /*
      74             :  * Initialization requires a lot of memory that's not needed
      75             :  * after the initialization is done.  During initialization,
      76             :  * CurrentMemoryContext is the long-lived memory context associated
      77             :  * with the dictionary cache entry.  We keep the short-lived stuff
      78             :  * in the Conf->buildCxt context.
      79             :  */
      80             : #define tmpalloc(sz)  MemoryContextAlloc(Conf->buildCxt, (sz))
      81             : #define tmpalloc0(sz)  MemoryContextAllocZero(Conf->buildCxt, (sz))
      82             : 
      83             : /*
      84             :  * Prepare for constructing an ISpell dictionary.
      85             :  *
      86             :  * The IspellDict struct is assumed to be zeroed when allocated.
      87             :  */
      88             : void
      89         148 : NIStartBuild(IspellDict *Conf)
      90             : {
      91             :     /*
      92             :      * The temp context is a child of CurTransactionContext, so that it will
      93             :      * go away automatically on error.
      94             :      */
      95         148 :     Conf->buildCxt = AllocSetContextCreate(CurTransactionContext,
      96             :                                            "Ispell dictionary init context",
      97             :                                            ALLOCSET_DEFAULT_SIZES);
      98         148 : }
      99             : 
     100             : /*
     101             :  * Clean up when dictionary construction is complete.
     102             :  */
     103             : void
     104         124 : NIFinishBuild(IspellDict *Conf)
     105             : {
     106             :     /* Release no-longer-needed temp memory */
     107         124 :     MemoryContextDelete(Conf->buildCxt);
     108             :     /* Just for cleanliness, zero the now-dangling pointers */
     109         124 :     Conf->buildCxt = NULL;
     110         124 :     Conf->Spell = NULL;
     111         124 :     Conf->firstfree = NULL;
     112         124 :     Conf->CompoundAffixFlags = NULL;
     113         124 : }
     114             : 
     115             : 
     116             : /*
     117             :  * "Compact" palloc: allocate without extra palloc overhead.
     118             :  *
     119             :  * Since we have no need to free the ispell data items individually, there's
     120             :  * not much value in the per-chunk overhead normally consumed by palloc.
     121             :  * Getting rid of it is helpful since ispell can allocate a lot of small nodes.
     122             :  *
     123             :  * We currently pre-zero all data allocated this way, even though some of it
     124             :  * doesn't need that.  The cpalloc and cpalloc0 macros are just documentation
     125             :  * to indicate which allocations actually require zeroing.
     126             :  */
     127             : #define COMPACT_ALLOC_CHUNK 8192    /* amount to get from palloc at once */
     128             : #define COMPACT_MAX_REQ     1024    /* must be < COMPACT_ALLOC_CHUNK */
     129             : 
     130             : static void *
     131       13942 : compact_palloc0(IspellDict *Conf, size_t size)
     132             : {
     133             :     void       *result;
     134             : 
     135             :     /* Should only be called during init */
     136             :     Assert(Conf->buildCxt != NULL);
     137             : 
     138             :     /* No point in this for large chunks */
     139       13942 :     if (size > COMPACT_MAX_REQ)
     140           0 :         return palloc0(size);
     141             : 
     142             :     /* Keep everything maxaligned */
     143       13942 :     size = MAXALIGN(size);
     144             : 
     145             :     /* Need more space? */
     146       13942 :     if (size > Conf->avail)
     147             :     {
     148         142 :         Conf->firstfree = palloc0(COMPACT_ALLOC_CHUNK);
     149         142 :         Conf->avail = COMPACT_ALLOC_CHUNK;
     150             :     }
     151             : 
     152       13942 :     result = Conf->firstfree;
     153       13942 :     Conf->firstfree += size;
     154       13942 :     Conf->avail -= size;
     155             : 
     156       13942 :     return result;
     157             : }
     158             : 
     159             : #define cpalloc(size) compact_palloc0(Conf, size)
     160             : #define cpalloc0(size) compact_palloc0(Conf, size)
     161             : 
     162             : static char *
     163        7416 : cpstrdup(IspellDict *Conf, const char *str)
     164             : {
     165        7416 :     char       *res = cpalloc(strlen(str) + 1);
     166             : 
     167        7416 :     strcpy(res, str);
     168        7416 :     return res;
     169             : }
     170             : 
     171             : 
     172             : /*
     173             :  * Apply str_tolower(), producing a temporary result (in the buildCxt).
     174             :  */
     175             : static char *
     176        6284 : lowerstr_ctx(IspellDict *Conf, const char *src)
     177             : {
     178             :     MemoryContext saveCtx;
     179             :     char       *dst;
     180             : 
     181        6284 :     saveCtx = MemoryContextSwitchTo(Conf->buildCxt);
     182        6284 :     dst = str_tolower(src, strlen(src), DEFAULT_COLLATION_OID);
     183        6284 :     MemoryContextSwitchTo(saveCtx);
     184             : 
     185        6284 :     return dst;
     186             : }
     187             : 
     188             : #define MAX_NORM 1024
     189             : #define MAXNORMLEN 256
     190             : 
     191             : #define STRNCMP(s,p)    strncmp( (s), (p), strlen(p) )
     192             : #define GETWCHAR(W,L,N,T) ( ((const uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] )
     193             : #define GETCHAR(A,N,T)    GETWCHAR( (A)->repl, (A)->replen, N, T )
     194             : 
     195             : static const char *VoidString = "";
     196             : 
     197             : static int
     198        3264 : cmpspell(const void *s1, const void *s2)
     199             : {
     200        3264 :     return strcmp((*(SPELL *const *) s1)->word, (*(SPELL *const *) s2)->word);
     201             : }
     202             : 
     203             : static int
     204        2544 : cmpspellaffix(const void *s1, const void *s2)
     205             : {
     206        5088 :     return strcmp((*(SPELL *const *) s1)->p.flag,
     207        2544 :                   (*(SPELL *const *) s2)->p.flag);
     208             : }
     209             : 
     210             : static int
     211        4422 : cmpcmdflag(const void *f1, const void *f2)
     212             : {
     213        4422 :     CompoundAffixFlag *fv1 = (CompoundAffixFlag *) f1,
     214        4422 :                *fv2 = (CompoundAffixFlag *) f2;
     215             : 
     216             :     Assert(fv1->flagMode == fv2->flagMode);
     217             : 
     218        4422 :     if (fv1->flagMode == FM_NUM)
     219             :     {
     220         866 :         if (fv1->flag.i == fv2->flag.i)
     221         126 :             return 0;
     222             : 
     223         740 :         return (fv1->flag.i > fv2->flag.i) ? 1 : -1;
     224             :     }
     225             : 
     226        3556 :     return strcmp(fv1->flag.s, fv2->flag.s);
     227             : }
     228             : 
     229             : static char *
     230        1300 : findchar(char *str, int c)
     231             : {
     232        9566 :     while (*str)
     233             :     {
     234        9424 :         if (t_iseq(str, c))
     235        1158 :             return str;
     236        8266 :         str += pg_mblen(str);
     237             :     }
     238             : 
     239         142 :     return NULL;
     240             : }
     241             : 
     242             : static char *
     243          48 : findchar2(char *str, int c1, int c2)
     244             : {
     245        1008 :     while (*str)
     246             :     {
     247        1008 :         if (t_iseq(str, c1) || t_iseq(str, c2))
     248          48 :             return str;
     249         960 :         str += pg_mblen(str);
     250             :     }
     251             : 
     252           0 :     return NULL;
     253             : }
     254             : 
     255             : 
     256             : /* backward string compare for suffix tree operations */
     257             : static int
     258        1300 : strbcmp(const unsigned char *s1, const unsigned char *s2)
     259             : {
     260        1300 :     int         l1 = strlen((const char *) s1) - 1,
     261        1300 :                 l2 = strlen((const char *) s2) - 1;
     262             : 
     263        1738 :     while (l1 >= 0 && l2 >= 0)
     264             :     {
     265        1360 :         if (s1[l1] < s2[l2])
     266         296 :             return -1;
     267        1064 :         if (s1[l1] > s2[l2])
     268         626 :             return 1;
     269         438 :         l1--;
     270         438 :         l2--;
     271             :     }
     272         378 :     if (l1 < l2)
     273         102 :         return -1;
     274         276 :     if (l1 > l2)
     275         232 :         return 1;
     276             : 
     277          44 :     return 0;
     278             : }
     279             : 
     280             : static int
     281          44 : strbncmp(const unsigned char *s1, const unsigned char *s2, size_t count)
     282             : {
     283          44 :     int         l1 = strlen((const char *) s1) - 1,
     284          44 :                 l2 = strlen((const char *) s2) - 1,
     285          44 :                 l = count;
     286             : 
     287          66 :     while (l1 >= 0 && l2 >= 0 && l > 0)
     288             :     {
     289          44 :         if (s1[l1] < s2[l2])
     290          22 :             return -1;
     291          22 :         if (s1[l1] > s2[l2])
     292           0 :             return 1;
     293          22 :         l1--;
     294          22 :         l2--;
     295          22 :         l--;
     296             :     }
     297          22 :     if (l == 0)
     298          22 :         return 0;
     299           0 :     if (l1 < l2)
     300           0 :         return -1;
     301           0 :     if (l1 > l2)
     302           0 :         return 1;
     303           0 :     return 0;
     304             : }
     305             : 
     306             : /*
     307             :  * Compares affixes.
     308             :  * First compares the type of an affix. Prefixes should go before affixes.
     309             :  * If types are equal then compares replaceable string.
     310             :  */
     311             : static int
     312        2200 : cmpaffix(const void *s1, const void *s2)
     313             : {
     314        2200 :     const AFFIX *a1 = (const AFFIX *) s1;
     315        2200 :     const AFFIX *a2 = (const AFFIX *) s2;
     316             : 
     317        2200 :     if (a1->type < a2->type)
     318         502 :         return -1;
     319        1698 :     if (a1->type > a2->type)
     320         150 :         return 1;
     321        1548 :     if (a1->type == FF_PREFIX)
     322         248 :         return strcmp(a1->repl, a2->repl);
     323             :     else
     324        1300 :         return strbcmp((const unsigned char *) a1->repl,
     325        1300 :                        (const unsigned char *) a2->repl);
     326             : }
     327             : 
     328             : /*
     329             :  * Gets an affix flag from the set of affix flags (sflagset).
     330             :  *
     331             :  * Several flags can be stored in a single string. Flags can be represented by:
     332             :  * - 1 character (FM_CHAR). A character may be Unicode.
     333             :  * - 2 characters (FM_LONG). A character may be Unicode.
     334             :  * - numbers from 1 to 65000 (FM_NUM).
     335             :  *
     336             :  * Depending on the flagMode an affix string can have the following format:
     337             :  * - FM_CHAR: ABCD
     338             :  *   Here we have 4 flags: A, B, C and D
     339             :  * - FM_LONG: ABCDE*
     340             :  *   Here we have 3 flags: AB, CD and E*
     341             :  * - FM_NUM: 200,205,50
     342             :  *   Here we have 3 flags: 200, 205 and 50
     343             :  *
     344             :  * Conf: current dictionary.
     345             :  * sflagset: the set of affix flags. Returns a reference to the start of a next
     346             :  *           affix flag.
     347             :  * sflag: returns an affix flag from sflagset.
     348             :  */
     349             : static void
     350        6724 : getNextFlagFromString(IspellDict *Conf, const char **sflagset, char *sflag)
     351             : {
     352             :     int32       s;
     353             :     char       *next;
     354        6724 :     const char *sbuf = *sflagset;
     355             :     int         maxstep;
     356        6724 :     bool        stop = false;
     357        6724 :     bool        met_comma = false;
     358             : 
     359        6724 :     maxstep = (Conf->flagMode == FM_LONG) ? 2 : 1;
     360             : 
     361        8740 :     while (**sflagset)
     362             :     {
     363        8740 :         switch (Conf->flagMode)
     364             :         {
     365        7490 :             case FM_LONG:
     366             :             case FM_CHAR:
     367        7490 :                 COPYCHAR(sflag, *sflagset);
     368        7490 :                 sflag += pg_mblen(*sflagset);
     369             : 
     370             :                 /* Go to start of the next flag */
     371        7490 :                 *sflagset += pg_mblen(*sflagset);
     372             : 
     373             :                 /* Check if we get all characters of flag */
     374        7490 :                 maxstep--;
     375        7490 :                 stop = (maxstep == 0);
     376        7490 :                 break;
     377        1250 :             case FM_NUM:
     378        1250 :                 errno = 0;
     379        1250 :                 s = strtol(*sflagset, &next, 10);
     380        1250 :                 if (*sflagset == next || errno == ERANGE)
     381           6 :                     ereport(ERROR,
     382             :                             (errcode(ERRCODE_CONFIG_FILE_ERROR),
     383             :                              errmsg("invalid affix flag \"%s\"", *sflagset)));
     384        1244 :                 if (s < 0 || s > FLAGNUM_MAXSIZE)
     385           0 :                     ereport(ERROR,
     386             :                             (errcode(ERRCODE_CONFIG_FILE_ERROR),
     387             :                              errmsg("affix flag \"%s\" is out of range",
     388             :                                     *sflagset)));
     389        1244 :                 sflag += sprintf(sflag, "%0d", s);
     390             : 
     391             :                 /* Go to start of the next flag */
     392        1244 :                 *sflagset = next;
     393        1888 :                 while (**sflagset)
     394             :                 {
     395        1288 :                     if (isdigit((unsigned char) **sflagset))
     396             :                     {
     397         644 :                         if (!met_comma)
     398           0 :                             ereport(ERROR,
     399             :                                     (errcode(ERRCODE_CONFIG_FILE_ERROR),
     400             :                                      errmsg("invalid affix flag \"%s\"",
     401             :                                             *sflagset)));
     402         644 :                         break;
     403             :                     }
     404         644 :                     else if (t_iseq(*sflagset, ','))
     405             :                     {
     406         644 :                         if (met_comma)
     407           0 :                             ereport(ERROR,
     408             :                                     (errcode(ERRCODE_CONFIG_FILE_ERROR),
     409             :                                      errmsg("invalid affix flag \"%s\"",
     410             :                                             *sflagset)));
     411         644 :                         met_comma = true;
     412             :                     }
     413           0 :                     else if (!isspace((unsigned char) **sflagset))
     414             :                     {
     415           0 :                         ereport(ERROR,
     416             :                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
     417             :                                  errmsg("invalid character in affix flag \"%s\"",
     418             :                                         *sflagset)));
     419             :                     }
     420             : 
     421         644 :                     *sflagset += pg_mblen(*sflagset);
     422             :                 }
     423        1244 :                 stop = true;
     424        1244 :                 break;
     425           0 :             default:
     426           0 :                 elog(ERROR, "unrecognized type of Conf->flagMode: %d",
     427             :                      Conf->flagMode);
     428             :         }
     429             : 
     430        8734 :         if (stop)
     431        6718 :             break;
     432             :     }
     433             : 
     434        6718 :     if (Conf->flagMode == FM_LONG && maxstep > 0)
     435           0 :         ereport(ERROR,
     436             :                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
     437             :                  errmsg("invalid affix flag \"%s\" with \"long\" flag value",
     438             :                         sbuf)));
     439             : 
     440        6718 :     *sflag = '\0';
     441        6718 : }
     442             : 
     443             : /*
     444             :  * Checks if the affix set Conf->AffixData[affix] contains affixflag.
     445             :  * Conf->AffixData[affix] does not contain affixflag if this flag is not used
     446             :  * actually by the .dict file.
     447             :  *
     448             :  * Conf: current dictionary.
     449             :  * affix: index of the Conf->AffixData array.
     450             :  * affixflag: the affix flag.
     451             :  *
     452             :  * Returns true if the string Conf->AffixData[affix] contains affixflag,
     453             :  * otherwise returns false.
     454             :  */
     455             : static bool
     456        2366 : IsAffixFlagInUse(IspellDict *Conf, int affix, const char *affixflag)
     457             : {
     458             :     const char *flagcur;
     459             :     char        flag[BUFSIZ];
     460             : 
     461        2366 :     if (*affixflag == 0)
     462         636 :         return true;
     463             : 
     464             :     Assert(affix < Conf->nAffixData);
     465             : 
     466        1730 :     flagcur = Conf->AffixData[affix];
     467             : 
     468        5064 :     while (*flagcur)
     469             :     {
     470        3844 :         getNextFlagFromString(Conf, &flagcur, flag);
     471             :         /* Compare first affix flag in flagcur with affixflag */
     472        3844 :         if (strcmp(flag, affixflag) == 0)
     473         510 :             return true;
     474             :     }
     475             : 
     476             :     /* Could not find affixflag */
     477        1220 :     return false;
     478             : }
     479             : 
     480             : /*
     481             :  * Adds the new word into the temporary array Spell.
     482             :  *
     483             :  * Conf: current dictionary.
     484             :  * word: new word.
     485             :  * flag: set of affix flags. Single flag can be get by getNextFlagFromString().
     486             :  */
     487             : static void
     488        1300 : NIAddSpell(IspellDict *Conf, const char *word, const char *flag)
     489             : {
     490        1300 :     if (Conf->nspell >= Conf->mspell)
     491             :     {
     492         142 :         if (Conf->mspell)
     493             :         {
     494           0 :             Conf->mspell *= 2;
     495           0 :             Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *));
     496             :         }
     497             :         else
     498             :         {
     499         142 :             Conf->mspell = 1024 * 20;
     500         142 :             Conf->Spell = (SPELL **) tmpalloc(Conf->mspell * sizeof(SPELL *));
     501             :         }
     502             :     }
     503        1300 :     Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1);
     504        1300 :     strcpy(Conf->Spell[Conf->nspell]->word, word);
     505        2600 :     Conf->Spell[Conf->nspell]->p.flag = (*flag != '\0')
     506        1300 :         ? cpstrdup(Conf, flag) : VoidString;
     507        1300 :     Conf->nspell++;
     508        1300 : }
     509             : 
     510             : /*
     511             :  * Imports dictionary into the temporary array Spell.
     512             :  *
     513             :  * Note caller must already have applied get_tsearch_config_filename.
     514             :  *
     515             :  * Conf: current dictionary.
     516             :  * filename: path to the .dict file.
     517             :  */
     518             : void
     519         142 : NIImportDictionary(IspellDict *Conf, const char *filename)
     520             : {
     521             :     tsearch_readline_state trst;
     522             :     char       *line;
     523             : 
     524         142 :     if (!tsearch_readline_begin(&trst, filename))
     525           0 :         ereport(ERROR,
     526             :                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
     527             :                  errmsg("could not open dictionary file \"%s\": %m",
     528             :                         filename)));
     529             : 
     530        1442 :     while ((line = tsearch_readline(&trst)) != NULL)
     531             :     {
     532             :         char       *s,
     533             :                    *pstr;
     534             : 
     535             :         /* Set of affix flags */
     536             :         const char *flag;
     537             : 
     538             :         /* Extract flag from the line */
     539        1300 :         flag = NULL;
     540        1300 :         if ((s = findchar(line, '/')))
     541             :         {
     542        1158 :             *s++ = '\0';
     543        1158 :             flag = s;
     544        4616 :             while (*s)
     545             :             {
     546             :                 /* we allow only single encoded flags for faster works */
     547        4616 :                 if (pg_mblen(s) == 1 && isprint((unsigned char) *s) && !isspace((unsigned char) *s))
     548        3458 :                     s++;
     549             :                 else
     550             :                 {
     551        1158 :                     *s = '\0';
     552        1158 :                     break;
     553             :                 }
     554             :             }
     555             :         }
     556             :         else
     557         142 :             flag = "";
     558             : 
     559             :         /* Remove trailing spaces */
     560        1300 :         s = line;
     561        9424 :         while (*s)
     562             :         {
     563        8266 :             if (isspace((unsigned char) *s))
     564             :             {
     565         142 :                 *s = '\0';
     566         142 :                 break;
     567             :             }
     568        8124 :             s += pg_mblen(s);
     569             :         }
     570        1300 :         pstr = lowerstr_ctx(Conf, line);
     571             : 
     572        1300 :         NIAddSpell(Conf, pstr, flag);
     573        1300 :         pfree(pstr);
     574             : 
     575        1300 :         pfree(line);
     576             :     }
     577         142 :     tsearch_readline_end(&trst);
     578         142 : }
     579             : 
     580             : /*
     581             :  * Searches a basic form of word in the prefix tree. This word was generated
     582             :  * using an affix rule. This rule may not be presented in an affix set of
     583             :  * a basic form of word.
     584             :  *
     585             :  * For example, we have the entry in the .dict file:
     586             :  * meter/GMD
     587             :  *
     588             :  * The affix rule with the flag S:
     589             :  * SFX S   y     ies        [^aeiou]y
     590             :  * is not presented here.
     591             :  *
     592             :  * The affix rule with the flag M:
     593             :  * SFX M   0     's         .
     594             :  * is presented here.
     595             :  *
     596             :  * Conf: current dictionary.
     597             :  * word: basic form of word.
     598             :  * affixflag: affix flag, by which a basic form of word was generated.
     599             :  * flag: compound flag used to compare with StopMiddle->compoundflag.
     600             :  *
     601             :  * Returns 1 if the word was found in the prefix tree, else returns 0.
     602             :  */
     603             : static int
     604        2994 : FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag)
     605             : {
     606        2994 :     SPNode     *node = Conf->Dictionary;
     607             :     SPNodeData *StopLow,
     608             :                *StopHigh,
     609             :                *StopMiddle;
     610        2994 :     const uint8 *ptr = (const uint8 *) word;
     611             : 
     612        2994 :     flag &= FF_COMPOUNDFLAGMASK;
     613             : 
     614       13944 :     while (node && *ptr)
     615             :     {
     616       13224 :         StopLow = node->data;
     617       13224 :         StopHigh = node->data + node->length;
     618       18918 :         while (StopLow < StopHigh)
     619             :         {
     620       17652 :             StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
     621       17652 :             if (StopMiddle->val == *ptr)
     622             :             {
     623       11958 :                 if (*(ptr + 1) == '\0' && StopMiddle->isword)
     624             :                 {
     625        1146 :                     if (flag == 0)
     626             :                     {
     627             :                         /*
     628             :                          * The word can be formed only with another word. And
     629             :                          * in the flag parameter there is not a sign that we
     630             :                          * search compound words.
     631             :                          */
     632         726 :                         if (StopMiddle->compoundflag & FF_COMPOUNDONLY)
     633           0 :                             return 0;
     634             :                     }
     635         420 :                     else if ((flag & StopMiddle->compoundflag) == 0)
     636           0 :                         return 0;
     637             : 
     638             :                     /*
     639             :                      * Check if this affix rule is presented in the affix set
     640             :                      * with index StopMiddle->affix.
     641             :                      */
     642        1146 :                     if (IsAffixFlagInUse(Conf, StopMiddle->affix, affixflag))
     643        1008 :                         return 1;
     644             :                 }
     645       10950 :                 node = StopMiddle->node;
     646       10950 :                 ptr++;
     647       10950 :                 break;
     648             :             }
     649        5694 :             else if (StopMiddle->val < *ptr)
     650        1932 :                 StopLow = StopMiddle + 1;
     651             :             else
     652        3762 :                 StopHigh = StopMiddle;
     653             :         }
     654       12216 :         if (StopLow >= StopHigh)
     655        1266 :             break;
     656             :     }
     657        1986 :     return 0;
     658             : }
     659             : 
     660             : /*
     661             :  * Adds a new affix rule to the Affix field.
     662             :  *
     663             :  * Conf: current dictionary.
     664             :  * flag: affix flag ('\' in the below example).
     665             :  * flagflags: set of flags from the flagval field for this affix rule. This set
     666             :  *            is listed after '/' character in the added string (repl).
     667             :  *
     668             :  *            For example L flag in the hunspell_sample.affix:
     669             :  *            SFX \   0 Y/L [^Y]
     670             :  *
     671             :  * mask: condition for search ('[^Y]' in the above example).
     672             :  * find: stripping characters from beginning (at prefix) or end (at suffix)
     673             :  *       of the word ('0' in the above example, 0 means that there is not
     674             :  *       stripping character).
     675             :  * repl: adding string after stripping ('Y' in the above example).
     676             :  * type: FF_SUFFIX or FF_PREFIX.
     677             :  */
     678             : static void
     679        1172 : NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask,
     680             :            const char *find, const char *repl, int type)
     681             : {
     682             :     AFFIX      *Affix;
     683             : 
     684        1172 :     if (Conf->naffixes >= Conf->maffixes)
     685             :     {
     686         142 :         if (Conf->maffixes)
     687             :         {
     688           0 :             Conf->maffixes *= 2;
     689           0 :             Conf->Affix = (AFFIX *) repalloc(Conf->Affix, Conf->maffixes * sizeof(AFFIX));
     690             :         }
     691             :         else
     692             :         {
     693         142 :             Conf->maffixes = 16;
     694         142 :             Conf->Affix = (AFFIX *) palloc(Conf->maffixes * sizeof(AFFIX));
     695             :         }
     696             :     }
     697             : 
     698        1172 :     Affix = Conf->Affix + Conf->naffixes;
     699             : 
     700             :     /* This affix rule can be applied for words with any ending */
     701        1172 :     if (strcmp(mask, ".") == 0 || *mask == '\0')
     702             :     {
     703         284 :         Affix->issimple = 1;
     704         284 :         Affix->isregis = 0;
     705             :     }
     706             :     /* This affix rule will use regis to search word ending */
     707         888 :     else if (RS_isRegis(mask))
     708             :     {
     709         744 :         Affix->issimple = 0;
     710         744 :         Affix->isregis = 1;
     711         744 :         RS_compile(&(Affix->reg.regis), (type == FF_SUFFIX),
     712         744 :                    *mask ? mask : VoidString);
     713             :     }
     714             :     /* This affix rule will use regex_t to search word ending */
     715             :     else
     716             :     {
     717             :         int         masklen;
     718             :         int         wmasklen;
     719             :         int         err;
     720             :         pg_wchar   *wmask;
     721             :         char       *tmask;
     722             : 
     723         144 :         Affix->issimple = 0;
     724         144 :         Affix->isregis = 0;
     725         144 :         tmask = (char *) tmpalloc(strlen(mask) + 3);
     726         144 :         if (type == FF_SUFFIX)
     727         144 :             sprintf(tmask, "%s$", mask);
     728             :         else
     729           0 :             sprintf(tmask, "^%s", mask);
     730             : 
     731         144 :         masklen = strlen(tmask);
     732         144 :         wmask = (pg_wchar *) tmpalloc((masklen + 1) * sizeof(pg_wchar));
     733         144 :         wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen);
     734             : 
     735             :         /*
     736             :          * The regex and all internal state created by pg_regcomp are
     737             :          * allocated in the dictionary's memory context, and will be freed
     738             :          * automatically when it is destroyed.
     739             :          */
     740         144 :         Affix->reg.pregex = palloc(sizeof(regex_t));
     741         144 :         err = pg_regcomp(Affix->reg.pregex, wmask, wmasklen,
     742             :                          REG_ADVANCED | REG_NOSUB,
     743             :                          DEFAULT_COLLATION_OID);
     744         144 :         if (err)
     745             :         {
     746             :             char        errstr[100];
     747             : 
     748           0 :             pg_regerror(err, Affix->reg.pregex, errstr, sizeof(errstr));
     749           0 :             ereport(ERROR,
     750             :                     (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
     751             :                      errmsg("invalid regular expression: %s", errstr)));
     752             :         }
     753             :     }
     754             : 
     755        1172 :     Affix->flagflags = flagflags;
     756        1172 :     if ((Affix->flagflags & FF_COMPOUNDONLY) || (Affix->flagflags & FF_COMPOUNDPERMITFLAG))
     757             :     {
     758         210 :         if ((Affix->flagflags & FF_COMPOUNDFLAG) == 0)
     759         210 :             Affix->flagflags |= FF_COMPOUNDFLAG;
     760             :     }
     761        1172 :     Affix->flag = cpstrdup(Conf, flag);
     762        1172 :     Affix->type = type;
     763             : 
     764        1172 :     Affix->find = (find && *find) ? cpstrdup(Conf, find) : VoidString;
     765        1172 :     if ((Affix->replen = strlen(repl)) > 0)
     766        1134 :         Affix->repl = cpstrdup(Conf, repl);
     767             :     else
     768          38 :         Affix->repl = VoidString;
     769        1172 :     Conf->naffixes++;
     770        1172 : }
     771             : 
     772             : /* Parsing states for parse_affentry() and friends */
     773             : #define PAE_WAIT_MASK   0
     774             : #define PAE_INMASK      1
     775             : #define PAE_WAIT_FIND   2
     776             : #define PAE_INFIND      3
     777             : #define PAE_WAIT_REPL   4
     778             : #define PAE_INREPL      5
     779             : #define PAE_WAIT_TYPE   6
     780             : #define PAE_WAIT_FLAG   7
     781             : 
     782             : /*
     783             :  * Parse next space-separated field of an .affix file line.
     784             :  *
     785             :  * *str is the input pointer (will be advanced past field)
     786             :  * next is where to copy the field value to, with null termination
     787             :  *
     788             :  * The buffer at "next" must be of size BUFSIZ; we truncate the input to fit.
     789             :  *
     790             :  * Returns true if we found a field, false if not.
     791             :  */
     792             : static bool
     793       10772 : get_nextfield(char **str, char *next)
     794             : {
     795       10772 :     int         state = PAE_WAIT_MASK;
     796       10772 :     int         avail = BUFSIZ;
     797             : 
     798       46068 :     while (**str)
     799             :     {
     800       44802 :         if (state == PAE_WAIT_MASK)
     801             :         {
     802       19874 :             if (t_iseq(*str, '#'))
     803         374 :                 return false;
     804       19500 :             else if (!isspace((unsigned char) **str))
     805             :             {
     806        9132 :                 int         clen = pg_mblen(*str);
     807             : 
     808        9132 :                 if (clen < avail)
     809             :                 {
     810        9132 :                     COPYCHAR(next, *str);
     811        9132 :                     next += clen;
     812        9132 :                     avail -= clen;
     813             :                 }
     814        9132 :                 state = PAE_INMASK;
     815             :             }
     816             :         }
     817             :         else                    /* state == PAE_INMASK */
     818             :         {
     819       24928 :             if (isspace((unsigned char) **str))
     820             :             {
     821        9132 :                 *next = '\0';
     822        9132 :                 return true;
     823             :             }
     824             :             else
     825             :             {
     826       15796 :                 int         clen = pg_mblen(*str);
     827             : 
     828       15796 :                 if (clen < avail)
     829             :                 {
     830       15796 :                     COPYCHAR(next, *str);
     831       15796 :                     next += clen;
     832       15796 :                     avail -= clen;
     833             :                 }
     834             :             }
     835             :         }
     836       35296 :         *str += pg_mblen(*str);
     837             :     }
     838             : 
     839        1266 :     *next = '\0';
     840             : 
     841        1266 :     return (state == PAE_INMASK);   /* OK if we got a nonempty field */
     842             : }
     843             : 
     844             : /*
     845             :  * Parses entry of an .affix file of MySpell or Hunspell format.
     846             :  *
     847             :  * An .affix file entry has the following format:
     848             :  * - header
     849             :  *   <type>  <flag>  <cross_flag>  <flag_count>
     850             :  * - fields after header:
     851             :  *   <type>  <flag>  <find>  <replace>  <mask>
     852             :  *
     853             :  * str is the input line
     854             :  * field values are returned to type etc, which must be buffers of size BUFSIZ.
     855             :  *
     856             :  * Returns number of fields found; any omitted fields are set to empty strings.
     857             :  */
     858             : static int
     859        2476 : parse_ooaffentry(char *str, char *type, char *flag, char *find,
     860             :                  char *repl, char *mask)
     861             : {
     862        2476 :     int         state = PAE_WAIT_TYPE;
     863        2476 :     int         fields_read = 0;
     864        2476 :     bool        valid = false;
     865             : 
     866        2476 :     *type = *flag = *find = *repl = *mask = '\0';
     867             : 
     868       10772 :     while (*str)
     869             :     {
     870       10772 :         switch (state)
     871             :         {
     872        2476 :             case PAE_WAIT_TYPE:
     873        2476 :                 valid = get_nextfield(&str, type);
     874        2476 :                 state = PAE_WAIT_FLAG;
     875        2476 :                 break;
     876        2476 :             case PAE_WAIT_FLAG:
     877        2476 :                 valid = get_nextfield(&str, flag);
     878        2476 :                 state = PAE_WAIT_FIND;
     879        2476 :                 break;
     880        2476 :             case PAE_WAIT_FIND:
     881        2476 :                 valid = get_nextfield(&str, find);
     882        2476 :                 state = PAE_WAIT_REPL;
     883        2476 :                 break;
     884        1672 :             case PAE_WAIT_REPL:
     885        1672 :                 valid = get_nextfield(&str, repl);
     886        1672 :                 state = PAE_WAIT_MASK;
     887        1672 :                 break;
     888        1672 :             case PAE_WAIT_MASK:
     889        1672 :                 valid = get_nextfield(&str, mask);
     890        1672 :                 state = -1;     /* force loop exit */
     891        1672 :                 break;
     892           0 :             default:
     893           0 :                 elog(ERROR, "unrecognized state in parse_ooaffentry: %d",
     894             :                      state);
     895             :                 break;
     896             :         }
     897       10772 :         if (valid)
     898        9132 :             fields_read++;
     899             :         else
     900        1640 :             break;              /* early EOL */
     901        9132 :         if (state < 0)
     902         836 :             break;              /* got all fields */
     903             :     }
     904             : 
     905        2476 :     return fields_read;
     906             : }
     907             : 
     908             : /*
     909             :  * Parses entry of an .affix file of Ispell format
     910             :  *
     911             :  * An .affix file entry has the following format:
     912             :  * <mask>  >  [-<find>,]<replace>
     913             :  */
     914             : static bool
     915         336 : parse_affentry(char *str, char *mask, char *find, char *repl)
     916             : {
     917         336 :     int         state = PAE_WAIT_MASK;
     918         336 :     char       *pmask = mask,
     919         336 :                *pfind = find,
     920         336 :                *prepl = repl;
     921             : 
     922         336 :     *mask = *find = *repl = '\0';
     923             : 
     924        8832 :     while (*str)
     925             :     {
     926        8832 :         if (state == PAE_WAIT_MASK)
     927             :         {
     928         816 :             if (t_iseq(str, '#'))
     929           0 :                 return false;
     930         816 :             else if (!isspace((unsigned char) *str))
     931             :             {
     932         336 :                 COPYCHAR(pmask, str);
     933         336 :                 pmask += pg_mblen(str);
     934         336 :                 state = PAE_INMASK;
     935             :             }
     936             :         }
     937        8016 :         else if (state == PAE_INMASK)
     938             :         {
     939        3264 :             if (t_iseq(str, '>'))
     940             :             {
     941         336 :                 *pmask = '\0';
     942         336 :                 state = PAE_WAIT_FIND;
     943             :             }
     944        2928 :             else if (!isspace((unsigned char) *str))
     945             :             {
     946        1152 :                 COPYCHAR(pmask, str);
     947        1152 :                 pmask += pg_mblen(str);
     948             :             }
     949             :         }
     950        4752 :         else if (state == PAE_WAIT_FIND)
     951             :         {
     952        1344 :             if (t_iseq(str, '-'))
     953             :             {
     954          48 :                 state = PAE_INFIND;
     955             :             }
     956        1296 :             else if (t_isalpha(str) || t_iseq(str, '\'') /* english 's */ )
     957             :             {
     958         288 :                 COPYCHAR(prepl, str);
     959         288 :                 prepl += pg_mblen(str);
     960         288 :                 state = PAE_INREPL;
     961             :             }
     962        1008 :             else if (!isspace((unsigned char) *str))
     963           0 :                 ereport(ERROR,
     964             :                         (errcode(ERRCODE_CONFIG_FILE_ERROR),
     965             :                          errmsg("syntax error")));
     966             :         }
     967        3408 :         else if (state == PAE_INFIND)
     968             :         {
     969          96 :             if (t_iseq(str, ','))
     970             :             {
     971          48 :                 *pfind = '\0';
     972          48 :                 state = PAE_WAIT_REPL;
     973             :             }
     974          48 :             else if (t_isalpha(str))
     975             :             {
     976          48 :                 COPYCHAR(pfind, str);
     977          48 :                 pfind += pg_mblen(str);
     978             :             }
     979           0 :             else if (!isspace((unsigned char) *str))
     980           0 :                 ereport(ERROR,
     981             :                         (errcode(ERRCODE_CONFIG_FILE_ERROR),
     982             :                          errmsg("syntax error")));
     983             :         }
     984        3312 :         else if (state == PAE_WAIT_REPL)
     985             :         {
     986          48 :             if (t_iseq(str, '-'))
     987             :             {
     988           0 :                 break;          /* void repl */
     989             :             }
     990          48 :             else if (t_isalpha(str))
     991             :             {
     992          48 :                 COPYCHAR(prepl, str);
     993          48 :                 prepl += pg_mblen(str);
     994          48 :                 state = PAE_INREPL;
     995             :             }
     996           0 :             else if (!isspace((unsigned char) *str))
     997           0 :                 ereport(ERROR,
     998             :                         (errcode(ERRCODE_CONFIG_FILE_ERROR),
     999             :                          errmsg("syntax error")));
    1000             :         }
    1001        3264 :         else if (state == PAE_INREPL)
    1002             :         {
    1003        3264 :             if (t_iseq(str, '#'))
    1004             :             {
    1005         336 :                 *prepl = '\0';
    1006         336 :                 break;
    1007             :             }
    1008        2928 :             else if (t_isalpha(str))
    1009             :             {
    1010         432 :                 COPYCHAR(prepl, str);
    1011         432 :                 prepl += pg_mblen(str);
    1012             :             }
    1013        2496 :             else if (!isspace((unsigned char) *str))
    1014           0 :                 ereport(ERROR,
    1015             :                         (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1016             :                          errmsg("syntax error")));
    1017             :         }
    1018             :         else
    1019           0 :             elog(ERROR, "unrecognized state in parse_affentry: %d", state);
    1020             : 
    1021        8496 :         str += pg_mblen(str);
    1022             :     }
    1023             : 
    1024         336 :     *pmask = *pfind = *prepl = '\0';
    1025             : 
    1026         336 :     return (*mask && (*find || *repl));
    1027             : }
    1028             : 
    1029             : /*
    1030             :  * Sets a Hunspell options depending on flag type.
    1031             :  */
    1032             : static void
    1033        3246 : setCompoundAffixFlagValue(IspellDict *Conf, CompoundAffixFlag *entry,
    1034             :                           char *s, uint32 val)
    1035             : {
    1036        3246 :     if (Conf->flagMode == FM_NUM)
    1037             :     {
    1038             :         char       *next;
    1039             :         int         i;
    1040             : 
    1041         696 :         errno = 0;
    1042         696 :         i = strtol(s, &next, 10);
    1043         696 :         if (s == next || errno == ERANGE)
    1044           0 :             ereport(ERROR,
    1045             :                     (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1046             :                      errmsg("invalid affix flag \"%s\"", s)));
    1047         696 :         if (i < 0 || i > FLAGNUM_MAXSIZE)
    1048           0 :             ereport(ERROR,
    1049             :                     (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1050             :                      errmsg("affix flag \"%s\" is out of range", s)));
    1051             : 
    1052         696 :         entry->flag.i = i;
    1053             :     }
    1054             :     else
    1055        2550 :         entry->flag.s = cpstrdup(Conf, s);
    1056             : 
    1057        3246 :     entry->flagMode = Conf->flagMode;
    1058        3246 :     entry->value = val;
    1059        3246 : }
    1060             : 
    1061             : /*
    1062             :  * Sets up a correspondence for the affix parameter with the affix flag.
    1063             :  *
    1064             :  * Conf: current dictionary.
    1065             :  * s: affix flag in string.
    1066             :  * val: affix parameter.
    1067             :  */
    1068             : static void
    1069         372 : addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val)
    1070             : {
    1071             :     CompoundAffixFlag *newValue;
    1072             :     char        sbuf[BUFSIZ];
    1073             :     char       *sflag;
    1074             :     int         clen;
    1075             : 
    1076         696 :     while (*s && isspace((unsigned char) *s))
    1077         324 :         s += pg_mblen(s);
    1078             : 
    1079         372 :     if (!*s)
    1080           0 :         ereport(ERROR,
    1081             :                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1082             :                  errmsg("syntax error")));
    1083             : 
    1084             :     /* Get flag without \n */
    1085         372 :     sflag = sbuf;
    1086        1100 :     while (*s && !isspace((unsigned char) *s) && *s != '\n')
    1087             :     {
    1088         728 :         clen = pg_mblen(s);
    1089         728 :         COPYCHAR(sflag, s);
    1090         728 :         sflag += clen;
    1091         728 :         s += clen;
    1092             :     }
    1093         372 :     *sflag = '\0';
    1094             : 
    1095             :     /* Resize array or allocate memory for array CompoundAffixFlag */
    1096         372 :     if (Conf->nCompoundAffixFlag >= Conf->mCompoundAffixFlag)
    1097             :     {
    1098         142 :         if (Conf->mCompoundAffixFlag)
    1099             :         {
    1100           0 :             Conf->mCompoundAffixFlag *= 2;
    1101           0 :             Conf->CompoundAffixFlags = (CompoundAffixFlag *)
    1102           0 :                 repalloc(Conf->CompoundAffixFlags,
    1103           0 :                          Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag));
    1104             :         }
    1105             :         else
    1106             :         {
    1107         142 :             Conf->mCompoundAffixFlag = 10;
    1108         142 :             Conf->CompoundAffixFlags = (CompoundAffixFlag *)
    1109         142 :                 tmpalloc(Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag));
    1110             :         }
    1111             :     }
    1112             : 
    1113         372 :     newValue = Conf->CompoundAffixFlags + Conf->nCompoundAffixFlag;
    1114             : 
    1115         372 :     setCompoundAffixFlagValue(Conf, newValue, sbuf, val);
    1116             : 
    1117         372 :     Conf->usecompound = true;
    1118         372 :     Conf->nCompoundAffixFlag++;
    1119         372 : }
    1120             : 
    1121             : /*
    1122             :  * Returns a set of affix parameters which correspondence to the set of affix
    1123             :  * flags s.
    1124             :  */
    1125             : static int
    1126        1392 : getCompoundAffixFlagValue(IspellDict *Conf, const char *s)
    1127             : {
    1128        1392 :     uint32      flag = 0;
    1129             :     CompoundAffixFlag *found,
    1130             :                 key;
    1131             :     char        sflag[BUFSIZ];
    1132             :     const char *flagcur;
    1133             : 
    1134        1392 :     if (Conf->nCompoundAffixFlag == 0)
    1135           0 :         return 0;
    1136             : 
    1137        1392 :     flagcur = s;
    1138        4266 :     while (*flagcur)
    1139             :     {
    1140        2880 :         getNextFlagFromString(Conf, &flagcur, sflag);
    1141        2874 :         setCompoundAffixFlagValue(Conf, &key, sflag, 0);
    1142             : 
    1143             :         found = (CompoundAffixFlag *)
    1144        2874 :             bsearch(&key, Conf->CompoundAffixFlags,
    1145        2874 :                     Conf->nCompoundAffixFlag, sizeof(CompoundAffixFlag),
    1146             :                     cmpcmdflag);
    1147        2874 :         if (found != NULL)
    1148         626 :             flag |= found->value;
    1149             :     }
    1150             : 
    1151        1386 :     return flag;
    1152             : }
    1153             : 
    1154             : /*
    1155             :  * Returns a flag set using the s parameter.
    1156             :  *
    1157             :  * If Conf->useFlagAliases is true then the s parameter is index of the
    1158             :  * Conf->AffixData array and function returns its entry.
    1159             :  * Else function returns the s parameter.
    1160             :  */
    1161             : static const char *
    1162         162 : getAffixFlagSet(IspellDict *Conf, char *s)
    1163             : {
    1164         162 :     if (Conf->useFlagAliases && *s != '\0')
    1165             :     {
    1166             :         int         curaffix;
    1167             :         char       *end;
    1168             : 
    1169         102 :         errno = 0;
    1170         102 :         curaffix = strtol(s, &end, 10);
    1171         102 :         if (s == end || errno == ERANGE)
    1172           0 :             ereport(ERROR,
    1173             :                     (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1174             :                      errmsg("invalid affix alias \"%s\"", s)));
    1175             : 
    1176         102 :         if (curaffix > 0 && curaffix < Conf->nAffixData)
    1177             : 
    1178             :             /*
    1179             :              * Do not subtract 1 from curaffix because empty string was added
    1180             :              * in NIImportOOAffixes
    1181             :              */
    1182         102 :             return Conf->AffixData[curaffix];
    1183           0 :         else if (curaffix > Conf->nAffixData)
    1184           0 :             ereport(ERROR,
    1185             :                     (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1186             :                      errmsg("invalid affix alias \"%s\"", s)));
    1187           0 :         return VoidString;
    1188             :     }
    1189             :     else
    1190          60 :         return s;
    1191             : }
    1192             : 
    1193             : /*
    1194             :  * Import an affix file that follows MySpell or Hunspell format.
    1195             :  *
    1196             :  * Conf: current dictionary.
    1197             :  * filename: path to the .affix file.
    1198             :  */
    1199             : static void
    1200          94 : NIImportOOAffixes(IspellDict *Conf, const char *filename)
    1201             : {
    1202             :     char        type[BUFSIZ],
    1203          94 :                *ptype = NULL;
    1204             :     char        sflag[BUFSIZ];
    1205             :     char        mask[BUFSIZ],
    1206             :                *pmask;
    1207             :     char        find[BUFSIZ],
    1208             :                *pfind;
    1209             :     char        repl[BUFSIZ],
    1210             :                *prepl;
    1211          94 :     bool        isSuffix = false;
    1212          94 :     int         naffix = 0,
    1213          94 :                 curaffix = 0;
    1214          94 :     int         sflaglen = 0;
    1215          94 :     char        flagflags = 0;
    1216             :     tsearch_readline_state trst;
    1217             :     char       *recoded;
    1218             : 
    1219             :     /* read file to find any flag */
    1220          94 :     Conf->usecompound = false;
    1221          94 :     Conf->useFlagAliases = false;
    1222          94 :     Conf->flagMode = FM_CHAR;
    1223             : 
    1224          94 :     if (!tsearch_readline_begin(&trst, filename))
    1225           0 :         ereport(ERROR,
    1226             :                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1227             :                  errmsg("could not open affix file \"%s\": %m",
    1228             :                         filename)));
    1229             : 
    1230        3656 :     while ((recoded = tsearch_readline(&trst)) != NULL)
    1231             :     {
    1232        3562 :         if (*recoded == '\0' || isspace((unsigned char) *recoded) || t_iseq(recoded, '#'))
    1233             :         {
    1234        1086 :             pfree(recoded);
    1235        1086 :             continue;
    1236             :         }
    1237             : 
    1238        2476 :         if (STRNCMP(recoded, "COMPOUNDFLAG") == 0)
    1239          94 :             addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDFLAG"),
    1240             :                                       FF_COMPOUNDFLAG);
    1241        2382 :         else if (STRNCMP(recoded, "COMPOUNDBEGIN") == 0)
    1242          34 :             addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDBEGIN"),
    1243             :                                       FF_COMPOUNDBEGIN);
    1244        2348 :         else if (STRNCMP(recoded, "COMPOUNDLAST") == 0)
    1245           0 :             addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDLAST"),
    1246             :                                       FF_COMPOUNDLAST);
    1247             :         /* COMPOUNDLAST and COMPOUNDEND are synonyms */
    1248        2348 :         else if (STRNCMP(recoded, "COMPOUNDEND") == 0)
    1249          34 :             addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDEND"),
    1250             :                                       FF_COMPOUNDLAST);
    1251        2314 :         else if (STRNCMP(recoded, "COMPOUNDMIDDLE") == 0)
    1252          34 :             addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDMIDDLE"),
    1253             :                                       FF_COMPOUNDMIDDLE);
    1254        2280 :         else if (STRNCMP(recoded, "ONLYINCOMPOUND") == 0)
    1255          94 :             addCompoundAffixFlagValue(Conf, recoded + strlen("ONLYINCOMPOUND"),
    1256             :                                       FF_COMPOUNDONLY);
    1257        2186 :         else if (STRNCMP(recoded, "COMPOUNDPERMITFLAG") == 0)
    1258          34 :             addCompoundAffixFlagValue(Conf,
    1259             :                                       recoded + strlen("COMPOUNDPERMITFLAG"),
    1260             :                                       FF_COMPOUNDPERMITFLAG);
    1261        2152 :         else if (STRNCMP(recoded, "COMPOUNDFORBIDFLAG") == 0)
    1262           0 :             addCompoundAffixFlagValue(Conf,
    1263             :                                       recoded + strlen("COMPOUNDFORBIDFLAG"),
    1264             :                                       FF_COMPOUNDFORBIDFLAG);
    1265        2152 :         else if (STRNCMP(recoded, "FLAG") == 0)
    1266             :         {
    1267          72 :             char       *s = recoded + strlen("FLAG");
    1268             : 
    1269         144 :             while (*s && isspace((unsigned char) *s))
    1270          72 :                 s += pg_mblen(s);
    1271             : 
    1272          72 :             if (*s)
    1273             :             {
    1274          72 :                 if (STRNCMP(s, "long") == 0)
    1275          34 :                     Conf->flagMode = FM_LONG;
    1276          38 :                 else if (STRNCMP(s, "num") == 0)
    1277          38 :                     Conf->flagMode = FM_NUM;
    1278           0 :                 else if (STRNCMP(s, "default") != 0)
    1279           0 :                     ereport(ERROR,
    1280             :                             (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1281             :                              errmsg("Ispell dictionary supports only "
    1282             :                                     "\"default\", \"long\", "
    1283             :                                     "and \"num\" flag values")));
    1284             :             }
    1285             :         }
    1286             : 
    1287        2476 :         pfree(recoded);
    1288             :     }
    1289          94 :     tsearch_readline_end(&trst);
    1290             : 
    1291          94 :     if (Conf->nCompoundAffixFlag > 1)
    1292          94 :         qsort(Conf->CompoundAffixFlags, Conf->nCompoundAffixFlag,
    1293             :               sizeof(CompoundAffixFlag), cmpcmdflag);
    1294             : 
    1295          94 :     if (!tsearch_readline_begin(&trst, filename))
    1296           0 :         ereport(ERROR,
    1297             :                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1298             :                  errmsg("could not open affix file \"%s\": %m",
    1299             :                         filename)));
    1300             : 
    1301        3656 :     while ((recoded = tsearch_readline(&trst)) != NULL)
    1302             :     {
    1303             :         int         fields_read;
    1304             : 
    1305        3562 :         if (*recoded == '\0' || isspace((unsigned char) *recoded) || t_iseq(recoded, '#'))
    1306        1086 :             goto nextline;
    1307             : 
    1308        2476 :         fields_read = parse_ooaffentry(recoded, type, sflag, find, repl, mask);
    1309             : 
    1310        2476 :         if (ptype)
    1311        2382 :             pfree(ptype);
    1312        2476 :         ptype = lowerstr_ctx(Conf, type);
    1313             : 
    1314             :         /* First try to parse AF parameter (alias compression) */
    1315        2476 :         if (STRNCMP(ptype, "af") == 0)
    1316             :         {
    1317             :             /* First line is the number of aliases */
    1318         408 :             if (!Conf->useFlagAliases)
    1319             :             {
    1320          34 :                 Conf->useFlagAliases = true;
    1321          34 :                 naffix = atoi(sflag);
    1322          34 :                 if (naffix <= 0)
    1323           0 :                     ereport(ERROR,
    1324             :                             (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1325             :                              errmsg("invalid number of flag vector aliases")));
    1326             : 
    1327             :                 /* Also reserve place for empty flag set */
    1328          34 :                 naffix++;
    1329             : 
    1330          34 :                 Conf->AffixData = (const char **) palloc0(naffix * sizeof(char *));
    1331          34 :                 Conf->lenAffixData = Conf->nAffixData = naffix;
    1332             : 
    1333             :                 /* Add empty flag set into AffixData */
    1334          34 :                 Conf->AffixData[curaffix] = VoidString;
    1335          34 :                 curaffix++;
    1336             :             }
    1337             :             /* Other lines are aliases */
    1338             :             else
    1339             :             {
    1340         374 :                 if (curaffix < naffix)
    1341             :                 {
    1342         374 :                     Conf->AffixData[curaffix] = cpstrdup(Conf, sflag);
    1343         374 :                     curaffix++;
    1344             :                 }
    1345             :                 else
    1346           0 :                     ereport(ERROR,
    1347             :                             (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1348             :                              errmsg("number of aliases exceeds specified number %d",
    1349             :                                     naffix - 1)));
    1350             :             }
    1351         408 :             goto nextline;
    1352             :         }
    1353             :         /* Else try to parse prefixes and suffixes */
    1354        2068 :         if (fields_read < 4 ||
    1355        1672 :             (STRNCMP(ptype, "sfx") != 0 && STRNCMP(ptype, "pfx") != 0))
    1356         396 :             goto nextline;
    1357             : 
    1358        1672 :         sflaglen = strlen(sflag);
    1359        1672 :         if (sflaglen == 0
    1360        1672 :             || (sflaglen > 1 && Conf->flagMode == FM_CHAR)
    1361        1672 :             || (sflaglen > 2 && Conf->flagMode == FM_LONG))
    1362           0 :             goto nextline;
    1363             : 
    1364             :         /*--------
    1365             :          * Affix header. For example:
    1366             :          * SFX \ N 1
    1367             :          *--------
    1368             :          */
    1369        1672 :         if (fields_read == 4)
    1370             :         {
    1371         836 :             isSuffix = (STRNCMP(ptype, "sfx") == 0);
    1372         836 :             if (t_iseq(find, 'y') || t_iseq(find, 'Y'))
    1373         580 :                 flagflags = FF_CROSSPRODUCT;
    1374             :             else
    1375         256 :                 flagflags = 0;
    1376             :         }
    1377             :         /*--------
    1378             :          * Affix fields. For example:
    1379             :          * SFX \   0    Y/L [^Y]
    1380             :          *--------
    1381             :          */
    1382             :         else
    1383             :         {
    1384             :             char       *ptr;
    1385         836 :             int         aflg = 0;
    1386             : 
    1387             :             /* Get flags after '/' (flags are case sensitive) */
    1388         836 :             if ((ptr = strchr(repl, '/')) != NULL)
    1389         162 :                 aflg |= getCompoundAffixFlagValue(Conf,
    1390             :                                                   getAffixFlagSet(Conf,
    1391             :                                                                   ptr + 1));
    1392             :             /* Get lowercased version of string before '/' */
    1393         836 :             prepl = lowerstr_ctx(Conf, repl);
    1394         836 :             if ((ptr = strchr(prepl, '/')) != NULL)
    1395         162 :                 *ptr = '\0';
    1396         836 :             pfind = lowerstr_ctx(Conf, find);
    1397         836 :             pmask = lowerstr_ctx(Conf, mask);
    1398         836 :             if (t_iseq(find, '0'))
    1399         704 :                 *pfind = '\0';
    1400         836 :             if (t_iseq(repl, '0'))
    1401          38 :                 *prepl = '\0';
    1402             : 
    1403         836 :             NIAddAffix(Conf, sflag, flagflags | aflg, pmask, pfind, prepl,
    1404             :                        isSuffix ? FF_SUFFIX : FF_PREFIX);
    1405         836 :             pfree(prepl);
    1406         836 :             pfree(pfind);
    1407         836 :             pfree(pmask);
    1408             :         }
    1409             : 
    1410        3562 : nextline:
    1411        3562 :         pfree(recoded);
    1412             :     }
    1413             : 
    1414          94 :     tsearch_readline_end(&trst);
    1415          94 :     if (ptype)
    1416          94 :         pfree(ptype);
    1417          94 : }
    1418             : 
    1419             : /*
    1420             :  * import affixes
    1421             :  *
    1422             :  * Note caller must already have applied get_tsearch_config_filename
    1423             :  *
    1424             :  * This function is responsible for parsing ispell ("old format") affix files.
    1425             :  * If we realize that the file contains new-format commands, we pass off the
    1426             :  * work to NIImportOOAffixes(), which will re-read the whole file.
    1427             :  */
    1428             : void
    1429         142 : NIImportAffixes(IspellDict *Conf, const char *filename)
    1430             : {
    1431         142 :     char       *pstr = NULL;
    1432             :     char        flag[BUFSIZ];
    1433             :     char        mask[BUFSIZ];
    1434             :     char        find[BUFSIZ];
    1435             :     char        repl[BUFSIZ];
    1436             :     char       *s;
    1437         142 :     bool        suffixes = false;
    1438         142 :     bool        prefixes = false;
    1439         142 :     char        flagflags = 0;
    1440             :     tsearch_readline_state trst;
    1441         142 :     bool        oldformat = false;
    1442         142 :     char       *recoded = NULL;
    1443             : 
    1444         142 :     if (!tsearch_readline_begin(&trst, filename))
    1445           0 :         ereport(ERROR,
    1446             :                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1447             :                  errmsg("could not open affix file \"%s\": %m",
    1448             :                         filename)));
    1449             : 
    1450         142 :     Conf->usecompound = false;
    1451         142 :     Conf->useFlagAliases = false;
    1452         142 :     Conf->flagMode = FM_CHAR;
    1453             : 
    1454        1390 :     while ((recoded = tsearch_readline(&trst)) != NULL)
    1455             :     {
    1456        1342 :         pstr = str_tolower(recoded, strlen(recoded), DEFAULT_COLLATION_OID);
    1457             : 
    1458             :         /* Skip comments and empty lines */
    1459        1342 :         if (*pstr == '#' || *pstr == '\n')
    1460         432 :             goto nextline;
    1461             : 
    1462         910 :         if (STRNCMP(pstr, "compoundwords") == 0)
    1463             :         {
    1464             :             /* Find case-insensitive L flag in non-lowercased string */
    1465          48 :             s = findchar2(recoded, 'l', 'L');
    1466          48 :             if (s)
    1467             :             {
    1468         240 :                 while (*s && !isspace((unsigned char) *s))
    1469         192 :                     s += pg_mblen(s);
    1470          96 :                 while (*s && isspace((unsigned char) *s))
    1471          48 :                     s += pg_mblen(s);
    1472             : 
    1473          48 :                 if (*s && pg_mblen(s) == 1)
    1474             :                 {
    1475          48 :                     addCompoundAffixFlagValue(Conf, s, FF_COMPOUNDFLAG);
    1476          48 :                     Conf->usecompound = true;
    1477             :                 }
    1478          48 :                 oldformat = true;
    1479          48 :                 goto nextline;
    1480             :             }
    1481             :         }
    1482         862 :         if (STRNCMP(pstr, "suffixes") == 0)
    1483             :         {
    1484          48 :             suffixes = true;
    1485          48 :             prefixes = false;
    1486          48 :             oldformat = true;
    1487          48 :             goto nextline;
    1488             :         }
    1489         814 :         if (STRNCMP(pstr, "prefixes") == 0)
    1490             :         {
    1491          48 :             suffixes = false;
    1492          48 :             prefixes = true;
    1493          48 :             oldformat = true;
    1494          48 :             goto nextline;
    1495             :         }
    1496         766 :         if (STRNCMP(pstr, "flag") == 0)
    1497             :         {
    1498         408 :             s = recoded + 4;    /* we need non-lowercased string */
    1499         408 :             flagflags = 0;
    1500             : 
    1501         816 :             while (*s && isspace((unsigned char) *s))
    1502         408 :                 s += pg_mblen(s);
    1503             : 
    1504         408 :             if (*s == '*')
    1505             :             {
    1506         240 :                 flagflags |= FF_CROSSPRODUCT;
    1507         240 :                 s++;
    1508             :             }
    1509         168 :             else if (*s == '~')
    1510             :             {
    1511          48 :                 flagflags |= FF_COMPOUNDONLY;
    1512          48 :                 s++;
    1513             :             }
    1514             : 
    1515         408 :             if (*s == '\\')
    1516          48 :                 s++;
    1517             : 
    1518             :             /*
    1519             :              * An old-format flag is a single ASCII character; we expect it to
    1520             :              * be followed by EOL, whitespace, or ':'.  Otherwise this is a
    1521             :              * new-format flag command.
    1522             :              */
    1523         408 :             if (*s && pg_mblen(s) == 1)
    1524             :             {
    1525         408 :                 COPYCHAR(flag, s);
    1526         408 :                 flag[1] = '\0';
    1527             : 
    1528         408 :                 s++;
    1529         408 :                 if (*s == '\0' || *s == '#' || *s == '\n' || *s == ':' ||
    1530          72 :                     isspace((unsigned char) *s))
    1531             :                 {
    1532         336 :                     oldformat = true;
    1533         336 :                     goto nextline;
    1534             :                 }
    1535             :             }
    1536          72 :             goto isnewformat;
    1537             :         }
    1538         358 :         if (STRNCMP(recoded, "COMPOUNDFLAG") == 0 ||
    1539         336 :             STRNCMP(recoded, "COMPOUNDMIN") == 0 ||
    1540         336 :             STRNCMP(recoded, "PFX") == 0 ||
    1541         336 :             STRNCMP(recoded, "SFX") == 0)
    1542          22 :             goto isnewformat;
    1543             : 
    1544         336 :         if ((!suffixes) && (!prefixes))
    1545           0 :             goto nextline;
    1546             : 
    1547         336 :         if (!parse_affentry(pstr, mask, find, repl))
    1548           0 :             goto nextline;
    1549             : 
    1550         336 :         NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX);
    1551             : 
    1552        1248 : nextline:
    1553        1248 :         pfree(recoded);
    1554        1248 :         pfree(pstr);
    1555             :     }
    1556          48 :     tsearch_readline_end(&trst);
    1557          48 :     return;
    1558             : 
    1559          94 : isnewformat:
    1560          94 :     if (oldformat)
    1561           0 :         ereport(ERROR,
    1562             :                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1563             :                  errmsg("affix file contains both old-style and new-style commands")));
    1564          94 :     tsearch_readline_end(&trst);
    1565             : 
    1566          94 :     NIImportOOAffixes(Conf, filename);
    1567             : }
    1568             : 
    1569             : /*
    1570             :  * Merges two affix flag sets and stores a new affix flag set into
    1571             :  * Conf->AffixData.
    1572             :  *
    1573             :  * Returns index of a new affix flag set.
    1574             :  */
    1575             : static int
    1576          74 : MergeAffix(IspellDict *Conf, int a1, int a2)
    1577             : {
    1578             :     const char **ptr;
    1579             : 
    1580             :     Assert(a1 < Conf->nAffixData && a2 < Conf->nAffixData);
    1581             : 
    1582             :     /* Do not merge affix flags if one of affix flags is empty */
    1583          74 :     if (*Conf->AffixData[a1] == '\0')
    1584           0 :         return a2;
    1585          74 :     else if (*Conf->AffixData[a2] == '\0')
    1586           0 :         return a1;
    1587             : 
    1588             :     /* Double the size of AffixData if there's not enough space */
    1589          74 :     if (Conf->nAffixData + 1 >= Conf->lenAffixData)
    1590             :     {
    1591          74 :         Conf->lenAffixData *= 2;
    1592          74 :         Conf->AffixData = (const char **) repalloc(Conf->AffixData,
    1593          74 :                                                    sizeof(char *) * Conf->lenAffixData);
    1594             :     }
    1595             : 
    1596          74 :     ptr = Conf->AffixData + Conf->nAffixData;
    1597          74 :     if (Conf->flagMode == FM_NUM)
    1598             :     {
    1599          32 :         char       *p = cpalloc(strlen(Conf->AffixData[a1]) +
    1600             :                                 strlen(Conf->AffixData[a2]) +
    1601             :                                 1 /* comma */ + 1 /* \0 */ );
    1602             : 
    1603          32 :         sprintf(p, "%s,%s", Conf->AffixData[a1], Conf->AffixData[a2]);
    1604          32 :         *ptr = p;
    1605             :     }
    1606             :     else
    1607             :     {
    1608          42 :         char       *p = cpalloc(strlen(Conf->AffixData[a1]) +
    1609             :                                 strlen(Conf->AffixData[a2]) +
    1610             :                                 1 /* \0 */ );
    1611             : 
    1612          42 :         sprintf(p, "%s%s", Conf->AffixData[a1], Conf->AffixData[a2]);
    1613          42 :         *ptr = p;
    1614             :     }
    1615          74 :     ptr++;
    1616          74 :     *ptr = NULL;
    1617          74 :     Conf->nAffixData++;
    1618             : 
    1619          74 :     return Conf->nAffixData - 1;
    1620             : }
    1621             : 
    1622             : /*
    1623             :  * Returns a set of affix parameters which correspondence to the set of affix
    1624             :  * flags with the given index.
    1625             :  */
    1626             : static uint32
    1627        1230 : makeCompoundFlags(IspellDict *Conf, int affix)
    1628             : {
    1629             :     Assert(affix < Conf->nAffixData);
    1630             : 
    1631        1230 :     return (getCompoundAffixFlagValue(Conf, Conf->AffixData[affix]) &
    1632             :             FF_COMPOUNDFLAGMASK);
    1633             : }
    1634             : 
    1635             : /*
    1636             :  * Makes a prefix tree for the given level.
    1637             :  *
    1638             :  * Conf: current dictionary.
    1639             :  * low: lower index of the Conf->Spell array.
    1640             :  * high: upper index of the Conf->Spell array.
    1641             :  * level: current prefix tree level.
    1642             :  */
    1643             : static SPNode *
    1644        4908 : mkSPNode(IspellDict *Conf, int low, int high, int level)
    1645             : {
    1646             :     int         i;
    1647        4908 :     int         nchar = 0;
    1648        4908 :     char        lastchar = '\0';
    1649             :     SPNode     *rs;
    1650             :     SPNodeData *data;
    1651        4908 :     int         lownew = low;
    1652             : 
    1653       16126 :     for (i = low; i < high; i++)
    1654       11218 :         if (Conf->Spell[i]->p.d.len > level && lastchar != Conf->Spell[i]->word[level])
    1655             :         {
    1656        4808 :             nchar++;
    1657        4808 :             lastchar = Conf->Spell[i]->word[level];
    1658             :         }
    1659             : 
    1660        4908 :     if (!nchar)
    1661         704 :         return NULL;
    1662             : 
    1663        4204 :     rs = (SPNode *) cpalloc0(SPNHDRSZ + nchar * sizeof(SPNodeData));
    1664        4204 :     rs->length = nchar;
    1665        4204 :     data = rs->data;
    1666             : 
    1667        4204 :     lastchar = '\0';
    1668       14224 :     for (i = low; i < high; i++)
    1669       10038 :         if (Conf->Spell[i]->p.d.len > level)
    1670             :         {
    1671        7212 :             if (lastchar != Conf->Spell[i]->word[level])
    1672             :             {
    1673        4796 :                 if (lastchar)
    1674             :                 {
    1675             :                     /* Next level of the prefix tree */
    1676         592 :                     data->node = mkSPNode(Conf, lownew, i, level + 1);
    1677         580 :                     lownew = i;
    1678         580 :                     data++;
    1679             :                 }
    1680        4784 :                 lastchar = Conf->Spell[i]->word[level];
    1681             :             }
    1682        7200 :             data->val = ((uint8 *) (Conf->Spell[i]->word))[level];
    1683        7200 :             if (Conf->Spell[i]->p.d.len == level + 1)
    1684             :             {
    1685        1156 :                 bool        clearCompoundOnly = false;
    1686             : 
    1687        1156 :                 if (data->isword && data->affix != Conf->Spell[i]->p.d.affix)
    1688             :                 {
    1689             :                     /*
    1690             :                      * MergeAffix called a few times. If one of word is
    1691             :                      * allowed to be in compound word and another isn't, then
    1692             :                      * clear FF_COMPOUNDONLY flag.
    1693             :                      */
    1694             : 
    1695         148 :                     clearCompoundOnly = (FF_COMPOUNDONLY & data->compoundflag
    1696          74 :                                          & makeCompoundFlags(Conf, Conf->Spell[i]->p.d.affix))
    1697             :                         ? false : true;
    1698          74 :                     data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i]->p.d.affix);
    1699             :                 }
    1700             :                 else
    1701        1082 :                     data->affix = Conf->Spell[i]->p.d.affix;
    1702        1156 :                 data->isword = 1;
    1703             : 
    1704        1156 :                 data->compoundflag = makeCompoundFlags(Conf, data->affix);
    1705             : 
    1706        1150 :                 if ((data->compoundflag & FF_COMPOUNDONLY) &&
    1707           0 :                     (data->compoundflag & FF_COMPOUNDFLAG) == 0)
    1708           0 :                     data->compoundflag |= FF_COMPOUNDFLAG;
    1709             : 
    1710        1150 :                 if (clearCompoundOnly)
    1711          74 :                     data->compoundflag &= ~FF_COMPOUNDONLY;
    1712             :             }
    1713             :         }
    1714             : 
    1715             :     /* Next level of the prefix tree */
    1716        4186 :     data->node = mkSPNode(Conf, lownew, high, level + 1);
    1717             : 
    1718        4180 :     return rs;
    1719             : }
    1720             : 
    1721             : /*
    1722             :  * Builds the Conf->Dictionary tree and AffixData from the imported dictionary
    1723             :  * and affixes.
    1724             :  */
    1725             : void
    1726         142 : NISortDictionary(IspellDict *Conf)
    1727             : {
    1728             :     int         i;
    1729             :     int         naffix;
    1730             :     int         curaffix;
    1731             : 
    1732             :     /* compress affixes */
    1733             : 
    1734             :     /*
    1735             :      * If we use flag aliases then we need to use Conf->AffixData filled in
    1736             :      * the NIImportOOAffixes().
    1737             :      */
    1738         142 :     if (Conf->useFlagAliases)
    1739             :     {
    1740         276 :         for (i = 0; i < Conf->nspell; i++)
    1741             :         {
    1742             :             char       *end;
    1743             : 
    1744         254 :             if (*Conf->Spell[i]->p.flag != '\0')
    1745             :             {
    1746         232 :                 errno = 0;
    1747         232 :                 curaffix = strtol(Conf->Spell[i]->p.flag, &end, 10);
    1748         232 :                 if (Conf->Spell[i]->p.flag == end || errno == ERANGE)
    1749           6 :                     ereport(ERROR,
    1750             :                             (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1751             :                              errmsg("invalid affix alias \"%s\"",
    1752             :                                     Conf->Spell[i]->p.flag)));
    1753         226 :                 if (curaffix < 0 || curaffix >= Conf->nAffixData)
    1754           6 :                     ereport(ERROR,
    1755             :                             (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1756             :                              errmsg("invalid affix alias \"%s\"",
    1757             :                                     Conf->Spell[i]->p.flag)));
    1758         220 :                 if (*end != '\0' && !isdigit((unsigned char) *end) && !isspace((unsigned char) *end))
    1759           0 :                     ereport(ERROR,
    1760             :                             (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1761             :                              errmsg("invalid affix alias \"%s\"",
    1762             :                                     Conf->Spell[i]->p.flag)));
    1763             :             }
    1764             :             else
    1765             :             {
    1766             :                 /*
    1767             :                  * If Conf->Spell[i]->p.flag is empty, then get empty value of
    1768             :                  * Conf->AffixData (0 index).
    1769             :                  */
    1770          22 :                 curaffix = 0;
    1771             :             }
    1772             : 
    1773         242 :             Conf->Spell[i]->p.d.affix = curaffix;
    1774         242 :             Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
    1775             :         }
    1776             :     }
    1777             :     /* Otherwise fill Conf->AffixData here */
    1778             :     else
    1779             :     {
    1780             :         /* Count the number of different flags used in the dictionary */
    1781         108 :         qsort(Conf->Spell, Conf->nspell, sizeof(SPELL *),
    1782             :               cmpspellaffix);
    1783             : 
    1784         108 :         naffix = 0;
    1785        1064 :         for (i = 0; i < Conf->nspell; i++)
    1786             :         {
    1787         956 :             if (i == 0 ||
    1788         848 :                 strcmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag) != 0)
    1789         848 :                 naffix++;
    1790             :         }
    1791             : 
    1792             :         /*
    1793             :          * Fill in Conf->AffixData with the affixes that were used in the
    1794             :          * dictionary. Replace textual flag-field of Conf->Spell entries with
    1795             :          * indexes into Conf->AffixData array.
    1796             :          */
    1797         108 :         Conf->AffixData = (const char **) palloc0(naffix * sizeof(const char *));
    1798             : 
    1799         108 :         curaffix = -1;
    1800        1064 :         for (i = 0; i < Conf->nspell; i++)
    1801             :         {
    1802         956 :             if (i == 0 ||
    1803         848 :                 strcmp(Conf->Spell[i]->p.flag, Conf->AffixData[curaffix]) != 0)
    1804             :             {
    1805         848 :                 curaffix++;
    1806             :                 Assert(curaffix < naffix);
    1807         848 :                 Conf->AffixData[curaffix] = cpstrdup(Conf,
    1808         848 :                                                      Conf->Spell[i]->p.flag);
    1809             :             }
    1810             : 
    1811         956 :             Conf->Spell[i]->p.d.affix = curaffix;
    1812         956 :             Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
    1813             :         }
    1814             : 
    1815         108 :         Conf->lenAffixData = Conf->nAffixData = naffix;
    1816             :     }
    1817             : 
    1818             :     /* Start build a prefix tree */
    1819         130 :     qsort(Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell);
    1820         130 :     Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
    1821         124 : }
    1822             : 
    1823             : /*
    1824             :  * Makes a prefix tree for the given level using the repl string of an affix
    1825             :  * rule. Affixes with empty replace string do not include in the prefix tree.
    1826             :  * This affixes are included by mkVoidAffix().
    1827             :  *
    1828             :  * Conf: current dictionary.
    1829             :  * low: lower index of the Conf->Affix array.
    1830             :  * high: upper index of the Conf->Affix array.
    1831             :  * level: current prefix tree level.
    1832             :  * type: FF_SUFFIX or FF_PREFIX.
    1833             :  */
    1834             : static AffixNode *
    1835        2092 : mkANode(IspellDict *Conf, int low, int high, int level, int type)
    1836             : {
    1837             :     int         i;
    1838        2092 :     int         nchar = 0;
    1839        2092 :     uint8       lastchar = '\0';
    1840             :     AffixNode  *rs;
    1841             :     AffixNodeData *data;
    1842        2092 :     int         lownew = low;
    1843             :     int         naff;
    1844             :     AFFIX     **aff;
    1845             : 
    1846        5626 :     for (i = low; i < high; i++)
    1847        3534 :         if (Conf->Affix[i].replen > level && lastchar != GETCHAR(Conf->Affix + i, level, type))
    1848             :         {
    1849        1844 :             nchar++;
    1850        1844 :             lastchar = GETCHAR(Conf->Affix + i, level, type);
    1851             :         }
    1852             : 
    1853        2092 :     if (!nchar)
    1854         798 :         return NULL;
    1855             : 
    1856        1294 :     aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1));
    1857        1294 :     naff = 0;
    1858             : 
    1859        1294 :     rs = (AffixNode *) cpalloc0(ANHRDSZ + nchar * sizeof(AffixNodeData));
    1860        1294 :     rs->length = nchar;
    1861        1294 :     data = rs->data;
    1862             : 
    1863        1294 :     lastchar = '\0';
    1864        3830 :     for (i = low; i < high; i++)
    1865        2536 :         if (Conf->Affix[i].replen > level)
    1866             :         {
    1867        2136 :             if (lastchar != GETCHAR(Conf->Affix + i, level, type))
    1868             :             {
    1869        1844 :                 if (lastchar)
    1870             :                 {
    1871             :                     /* Next level of the prefix tree */
    1872         550 :                     data->node = mkANode(Conf, lownew, i, level + 1, type);
    1873         550 :                     if (naff)
    1874             :                     {
    1875         124 :                         data->naff = naff;
    1876         124 :                         data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
    1877         124 :                         memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
    1878         124 :                         naff = 0;
    1879             :                     }
    1880         550 :                     data++;
    1881         550 :                     lownew = i;
    1882             :                 }
    1883        1844 :                 lastchar = GETCHAR(Conf->Affix + i, level, type);
    1884             :             }
    1885        2136 :             data->val = GETCHAR(Conf->Affix + i, level, type);
    1886        2136 :             if (Conf->Affix[i].replen == level + 1)
    1887             :             {                   /* affix stopped */
    1888         966 :                 aff[naff++] = Conf->Affix + i;
    1889             :             }
    1890             :         }
    1891             : 
    1892             :     /* Next level of the prefix tree */
    1893        1294 :     data->node = mkANode(Conf, lownew, high, level + 1, type);
    1894        1294 :     if (naff)
    1895             :     {
    1896         798 :         data->naff = naff;
    1897         798 :         data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
    1898         798 :         memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
    1899         798 :         naff = 0;
    1900             :     }
    1901             : 
    1902        1294 :     pfree(aff);
    1903             : 
    1904        1294 :     return rs;
    1905             : }
    1906             : 
    1907             : /*
    1908             :  * Makes the root void node in the prefix tree. The root void node is created
    1909             :  * for affixes which have empty replace string ("repl" field).
    1910             :  */
    1911             : static void
    1912         248 : mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix)
    1913             : {
    1914             :     int         i,
    1915         248 :                 cnt = 0;
    1916         248 :     int         start = (issuffix) ? startsuffix : 0;
    1917         248 :     int         end = (issuffix) ? Conf->naffixes : startsuffix;
    1918         248 :     AffixNode  *Affix = (AffixNode *) palloc0(ANHRDSZ + sizeof(AffixNodeData));
    1919             : 
    1920         248 :     Affix->length = 1;
    1921         248 :     Affix->isvoid = 1;
    1922             : 
    1923         248 :     if (issuffix)
    1924             :     {
    1925         124 :         Affix->data->node = Conf->Suffix;
    1926         124 :         Conf->Suffix = Affix;
    1927             :     }
    1928             :     else
    1929             :     {
    1930         124 :         Affix->data->node = Conf->Prefix;
    1931         124 :         Conf->Prefix = Affix;
    1932             :     }
    1933             : 
    1934             :     /* Count affixes with empty replace string */
    1935        1246 :     for (i = start; i < end; i++)
    1936         998 :         if (Conf->Affix[i].replen == 0)
    1937          32 :             cnt++;
    1938             : 
    1939             :     /* There is not affixes with empty replace string */
    1940         248 :     if (cnt == 0)
    1941         216 :         return;
    1942             : 
    1943          32 :     Affix->data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * cnt);
    1944          32 :     Affix->data->naff = (uint32) cnt;
    1945             : 
    1946          32 :     cnt = 0;
    1947         256 :     for (i = start; i < end; i++)
    1948         224 :         if (Conf->Affix[i].replen == 0)
    1949             :         {
    1950          32 :             Affix->data->aff[cnt] = Conf->Affix + i;
    1951          32 :             cnt++;
    1952             :         }
    1953             : }
    1954             : 
    1955             : /*
    1956             :  * Checks if the affixflag is used by dictionary. Conf->AffixData does not
    1957             :  * contain affixflag if this flag is not used actually by the .dict file.
    1958             :  *
    1959             :  * Conf: current dictionary.
    1960             :  * affixflag: affix flag.
    1961             :  *
    1962             :  * Returns true if the Conf->AffixData array contains affixflag, otherwise
    1963             :  * returns false.
    1964             :  */
    1965             : static bool
    1966         168 : isAffixInUse(IspellDict *Conf, const char *affixflag)
    1967             : {
    1968             :     int         i;
    1969             : 
    1970        1250 :     for (i = 0; i < Conf->nAffixData; i++)
    1971        1220 :         if (IsAffixFlagInUse(Conf, i, affixflag))
    1972         138 :             return true;
    1973             : 
    1974          30 :     return false;
    1975             : }
    1976             : 
    1977             : /*
    1978             :  * Builds Conf->Prefix and Conf->Suffix trees from the imported affixes.
    1979             :  */
    1980             : void
    1981         124 : NISortAffixes(IspellDict *Conf)
    1982             : {
    1983             :     AFFIX      *Affix;
    1984             :     size_t      i;
    1985             :     CMPDAffix  *ptr;
    1986         124 :     int         firstsuffix = Conf->naffixes;
    1987             : 
    1988         124 :     if (Conf->naffixes == 0)
    1989           0 :         return;
    1990             : 
    1991             :     /* Store compound affixes in the Conf->CompoundAffix array */
    1992         124 :     if (Conf->naffixes > 1)
    1993         124 :         qsort(Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix);
    1994         124 :     Conf->CompoundAffix = ptr = (CMPDAffix *) palloc(sizeof(CMPDAffix) * Conf->naffixes);
    1995         124 :     ptr->affix = NULL;
    1996             : 
    1997        1122 :     for (i = 0; i < Conf->naffixes; i++)
    1998             :     {
    1999         998 :         Affix = &(((AFFIX *) Conf->Affix)[i]);
    2000         998 :         if (Affix->type == FF_SUFFIX && i < firstsuffix)
    2001         124 :             firstsuffix = i;
    2002             : 
    2003        1166 :         if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 &&
    2004         168 :             isAffixInUse(Conf, Affix->flag))
    2005             :         {
    2006         138 :             bool        issuffix = (Affix->type == FF_SUFFIX);
    2007             : 
    2008         138 :             if (ptr == Conf->CompoundAffix ||
    2009          88 :                 issuffix != (ptr - 1)->issuffix ||
    2010          44 :                 strbncmp((const unsigned char *) (ptr - 1)->affix,
    2011          44 :                          (const unsigned char *) Affix->repl,
    2012          44 :                          (ptr - 1)->len))
    2013             :             {
    2014             :                 /* leave only unique and minimal suffixes */
    2015         116 :                 ptr->affix = Affix->repl;
    2016         116 :                 ptr->len = Affix->replen;
    2017         116 :                 ptr->issuffix = issuffix;
    2018         116 :                 ptr++;
    2019             :             }
    2020             :         }
    2021             :     }
    2022         124 :     ptr->affix = NULL;
    2023         124 :     Conf->CompoundAffix = (CMPDAffix *) repalloc(Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr - Conf->CompoundAffix + 1));
    2024             : 
    2025             :     /* Start build a prefix tree */
    2026         124 :     Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX);
    2027         124 :     Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX);
    2028         124 :     mkVoidAffix(Conf, true, firstsuffix);
    2029         124 :     mkVoidAffix(Conf, false, firstsuffix);
    2030             : }
    2031             : 
    2032             : static AffixNodeData *
    2033        4620 : FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type)
    2034             : {
    2035             :     AffixNodeData *StopLow,
    2036             :                *StopHigh,
    2037             :                *StopMiddle;
    2038             :     uint8 symbol;
    2039             : 
    2040        4620 :     if (node->isvoid)
    2041             :     {                           /* search void affixes */
    2042        4020 :         if (node->data->naff)
    2043         342 :             return node->data;
    2044        3678 :         node = node->data->node;
    2045             :     }
    2046             : 
    2047        5382 :     while (node && *level < wrdlen)
    2048             :     {
    2049        5358 :         StopLow = node->data;
    2050        5358 :         StopHigh = node->data + node->length;
    2051       11826 :         while (StopLow < StopHigh)
    2052             :         {
    2053        8874 :             StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
    2054        8874 :             symbol = GETWCHAR(word, wrdlen, *level, type);
    2055             : 
    2056        8874 :             if (StopMiddle->val == symbol)
    2057             :             {
    2058        2406 :                 (*level)++;
    2059        2406 :                 if (StopMiddle->naff)
    2060        1302 :                     return StopMiddle;
    2061        1104 :                 node = StopMiddle->node;
    2062        1104 :                 break;
    2063             :             }
    2064        6468 :             else if (StopMiddle->val < symbol)
    2065        1608 :                 StopLow = StopMiddle + 1;
    2066             :             else
    2067        4860 :                 StopHigh = StopMiddle;
    2068             :         }
    2069        4056 :         if (StopLow >= StopHigh)
    2070        2952 :             break;
    2071             :     }
    2072        2976 :     return NULL;
    2073             : }
    2074             : 
    2075             : static char *
    2076        1836 : CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *newword, int *baselen)
    2077             : {
    2078             :     /*
    2079             :      * Check compound allow flags
    2080             :      */
    2081             : 
    2082        1836 :     if (flagflags == 0)
    2083             :     {
    2084        1266 :         if (Affix->flagflags & FF_COMPOUNDONLY)
    2085         132 :             return NULL;
    2086             :     }
    2087         570 :     else if (flagflags & FF_COMPOUNDBEGIN)
    2088             :     {
    2089           0 :         if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
    2090           0 :             return NULL;
    2091           0 :         if ((Affix->flagflags & FF_COMPOUNDBEGIN) == 0)
    2092           0 :             if (Affix->type == FF_SUFFIX)
    2093           0 :                 return NULL;
    2094             :     }
    2095         570 :     else if (flagflags & FF_COMPOUNDMIDDLE)
    2096             :     {
    2097         408 :         if ((Affix->flagflags & FF_COMPOUNDMIDDLE) == 0 ||
    2098         228 :             (Affix->flagflags & FF_COMPOUNDFORBIDFLAG))
    2099         180 :             return NULL;
    2100             :     }
    2101         162 :     else if (flagflags & FF_COMPOUNDLAST)
    2102             :     {
    2103         162 :         if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
    2104           0 :             return NULL;
    2105         162 :         if ((Affix->flagflags & FF_COMPOUNDLAST) == 0)
    2106         150 :             if (Affix->type == FF_PREFIX)
    2107           0 :                 return NULL;
    2108             :     }
    2109             : 
    2110             :     /*
    2111             :      * make replace pattern of affix
    2112             :      */
    2113        1524 :     if (Affix->type == FF_SUFFIX)
    2114             :     {
    2115        1044 :         strcpy(newword, word);
    2116        1044 :         strcpy(newword + len - Affix->replen, Affix->find);
    2117        1044 :         if (baselen)            /* store length of non-changed part of word */
    2118        1044 :             *baselen = len - Affix->replen;
    2119             :     }
    2120             :     else
    2121             :     {
    2122             :         /*
    2123             :          * if prefix is an all non-changed part's length then all word
    2124             :          * contains only prefix and suffix, so out
    2125             :          */
    2126         480 :         if (baselen && *baselen + strlen(Affix->find) <= Affix->replen)
    2127           0 :             return NULL;
    2128         480 :         strcpy(newword, Affix->find);
    2129         480 :         strcat(newword, word + Affix->replen);
    2130             :     }
    2131             : 
    2132             :     /*
    2133             :      * check resulting word
    2134             :      */
    2135        1524 :     if (Affix->issimple)
    2136         480 :         return newword;
    2137        1044 :     else if (Affix->isregis)
    2138             :     {
    2139         708 :         if (RS_execute(&(Affix->reg.regis), newword))
    2140         672 :             return newword;
    2141             :     }
    2142             :     else
    2143             :     {
    2144             :         pg_wchar   *data;
    2145             :         size_t      data_len;
    2146             :         int         newword_len;
    2147             : 
    2148             :         /* Convert data string to wide characters */
    2149         336 :         newword_len = strlen(newword);
    2150         336 :         data = (pg_wchar *) palloc((newword_len + 1) * sizeof(pg_wchar));
    2151         336 :         data_len = pg_mb2wchar_with_len(newword, data, newword_len);
    2152             : 
    2153         336 :         if (pg_regexec(Affix->reg.pregex, data, data_len,
    2154             :                        0, NULL, 0, NULL, 0) == REG_OKAY)
    2155             :         {
    2156         336 :             pfree(data);
    2157         336 :             return newword;
    2158             :         }
    2159           0 :         pfree(data);
    2160             :     }
    2161             : 
    2162          36 :     return NULL;
    2163             : }
    2164             : 
    2165             : static int
    2166         540 : addToResult(char **forms, char **cur, char *word)
    2167             : {
    2168         540 :     if (cur - forms >= MAX_NORM - 1)
    2169           0 :         return 0;
    2170         540 :     if (forms == cur || strcmp(word, *(cur - 1)) != 0)
    2171             :     {
    2172         540 :         *cur = pstrdup(word);
    2173         540 :         *(cur + 1) = NULL;
    2174         540 :         return 1;
    2175             :     }
    2176             : 
    2177           0 :     return 0;
    2178             : }
    2179             : 
    2180             : static char **
    2181        1506 : NormalizeSubWord(IspellDict *Conf, const char *word, int flag)
    2182             : {
    2183        1506 :     AffixNodeData *suffix = NULL,
    2184        1506 :                *prefix = NULL;
    2185        1506 :     int         slevel = 0,
    2186        1506 :                 plevel = 0;
    2187        1506 :     int         wrdlen = strlen(word),
    2188             :                 swrdlen;
    2189             :     char      **forms;
    2190             :     char      **cur;
    2191        1506 :     char        newword[2 * MAXNORMLEN] = "";
    2192        1506 :     char        pnewword[2 * MAXNORMLEN] = "";
    2193        1506 :     AffixNode  *snode = Conf->Suffix,
    2194             :                *pnode;
    2195             :     int         i,
    2196             :                 j;
    2197             : 
    2198        1506 :     if (wrdlen > MAXNORMLEN)
    2199           0 :         return NULL;
    2200        1506 :     cur = forms = (char **) palloc(MAX_NORM * sizeof(char *));
    2201        1506 :     *cur = NULL;
    2202             : 
    2203             : 
    2204             :     /* Check that the word itself is normal form */
    2205        1506 :     if (FindWord(Conf, word, VoidString, flag))
    2206             :     {
    2207         468 :         *cur = pstrdup(word);
    2208         468 :         cur++;
    2209         468 :         *cur = NULL;
    2210             :     }
    2211             : 
    2212             :     /* Find all other NORMAL forms of the 'word' (check only prefix) */
    2213        1506 :     pnode = Conf->Prefix;
    2214        1506 :     plevel = 0;
    2215        1722 :     while (pnode)
    2216             :     {
    2217        1506 :         prefix = FindAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX);
    2218        1506 :         if (!prefix)
    2219        1290 :             break;
    2220         432 :         for (j = 0; j < prefix->naff; j++)
    2221             :         {
    2222         216 :             if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword, NULL))
    2223             :             {
    2224             :                 /* prefix success */
    2225         192 :                 if (FindWord(Conf, newword, prefix->aff[j]->flag, flag))
    2226          48 :                     cur += addToResult(forms, cur, newword);
    2227             :             }
    2228             :         }
    2229         216 :         pnode = prefix->node;
    2230             :     }
    2231             : 
    2232             :     /*
    2233             :      * Find all other NORMAL forms of the 'word' (check suffix and then
    2234             :      * prefix)
    2235             :      */
    2236        2598 :     while (snode)
    2237             :     {
    2238        2106 :         int         baselen = 0;
    2239             : 
    2240             :         /* find possible suffix */
    2241        2106 :         suffix = FindAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX);
    2242        2106 :         if (!suffix)
    2243        1014 :             break;
    2244             :         /* foreach suffix check affix */
    2245        2376 :         for (i = 0; i < suffix->naff; i++)
    2246             :         {
    2247        1284 :             if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword, &baselen))
    2248             :             {
    2249             :                 /* suffix success */
    2250        1008 :                 if (FindWord(Conf, newword, suffix->aff[i]->flag, flag))
    2251         276 :                     cur += addToResult(forms, cur, newword);
    2252             : 
    2253             :                 /* now we will look changed word with prefixes */
    2254        1008 :                 pnode = Conf->Prefix;
    2255        1008 :                 plevel = 0;
    2256        1008 :                 swrdlen = strlen(newword);
    2257        1344 :                 while (pnode)
    2258             :                 {
    2259        1008 :                     prefix = FindAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX);
    2260        1008 :                     if (!prefix)
    2261         672 :                         break;
    2262         672 :                     for (j = 0; j < prefix->naff; j++)
    2263             :                     {
    2264         336 :                         if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword, &baselen))
    2265             :                         {
    2266             :                             /* prefix success */
    2267         576 :                             const char *ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ?
    2268         288 :                                 VoidString : prefix->aff[j]->flag;
    2269             : 
    2270         288 :                             if (FindWord(Conf, pnewword, ff, flag))
    2271         216 :                                 cur += addToResult(forms, cur, pnewword);
    2272             :                         }
    2273             :                     }
    2274         336 :                     pnode = prefix->node;
    2275             :                 }
    2276             :             }
    2277             :         }
    2278             : 
    2279        1092 :         snode = suffix->node;
    2280             :     }
    2281             : 
    2282        1506 :     if (cur == forms)
    2283             :     {
    2284         666 :         pfree(forms);
    2285         666 :         return NULL;
    2286             :     }
    2287         840 :     return forms;
    2288             : }
    2289             : 
    2290             : typedef struct SplitVar
    2291             : {
    2292             :     int         nstem;
    2293             :     int         lenstem;
    2294             :     char      **stem;
    2295             :     struct SplitVar *next;
    2296             : } SplitVar;
    2297             : 
    2298             : static int
    2299        6060 : CheckCompoundAffixes(CMPDAffix **ptr, const char *word, int len, bool CheckInPlace)
    2300             : {
    2301             :     bool        issuffix;
    2302             : 
    2303             :     /* in case CompoundAffix is null: */
    2304        6060 :     if (*ptr == NULL)
    2305           0 :         return -1;
    2306             : 
    2307        6060 :     if (CheckInPlace)
    2308             :     {
    2309       11568 :         while ((*ptr)->affix)
    2310             :         {
    2311        6444 :             if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0)
    2312             :             {
    2313          60 :                 len = (*ptr)->len;
    2314          60 :                 issuffix = (*ptr)->issuffix;
    2315          60 :                 (*ptr)++;
    2316          60 :                 return (issuffix) ? len : 0;
    2317             :             }
    2318        6384 :             (*ptr)++;
    2319             :         }
    2320             :     }
    2321             :     else
    2322             :     {
    2323             :         char       *affbegin;
    2324             : 
    2325        1692 :         while ((*ptr)->affix)
    2326             :         {
    2327         942 :             if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL)
    2328             :             {
    2329         126 :                 len = (*ptr)->len + (affbegin - word);
    2330         126 :                 issuffix = (*ptr)->issuffix;
    2331         126 :                 (*ptr)++;
    2332         126 :                 return (issuffix) ? len : 0;
    2333             :             }
    2334         816 :             (*ptr)++;
    2335             :         }
    2336             :     }
    2337        5874 :     return -1;
    2338             : }
    2339             : 
    2340             : static SplitVar *
    2341        1410 : CopyVar(SplitVar *s, int makedup)
    2342             : {
    2343        1410 :     SplitVar   *v = (SplitVar *) palloc(sizeof(SplitVar));
    2344             : 
    2345        1410 :     v->next = NULL;
    2346        1410 :     if (s)
    2347             :     {
    2348             :         int         i;
    2349             : 
    2350         660 :         v->lenstem = s->lenstem;
    2351         660 :         v->stem = (char **) palloc(sizeof(char *) * v->lenstem);
    2352         660 :         v->nstem = s->nstem;
    2353        1002 :         for (i = 0; i < s->nstem; i++)
    2354         342 :             v->stem[i] = (makedup) ? pstrdup(s->stem[i]) : s->stem[i];
    2355             :     }
    2356             :     else
    2357             :     {
    2358         750 :         v->lenstem = 16;
    2359         750 :         v->stem = (char **) palloc(sizeof(char *) * v->lenstem);
    2360         750 :         v->nstem = 0;
    2361             :     }
    2362        1410 :     return v;
    2363             : }
    2364             : 
    2365             : static void
    2366        1890 : AddStem(SplitVar *v, char *word)
    2367             : {
    2368        1890 :     if (v->nstem >= v->lenstem)
    2369             :     {
    2370           0 :         v->lenstem *= 2;
    2371           0 :         v->stem = (char **) repalloc(v->stem, sizeof(char *) * v->lenstem);
    2372             :     }
    2373             : 
    2374        1890 :     v->stem[v->nstem] = word;
    2375        1890 :     v->nstem++;
    2376        1890 : }
    2377             : 
    2378             : static SplitVar *
    2379        1320 : SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, const char *word, int wordlen, int startpos, int minpos)
    2380             : {
    2381        1320 :     SplitVar   *var = NULL;
    2382             :     SPNodeData *StopLow,
    2383             :                *StopHigh,
    2384        1320 :                *StopMiddle = NULL;
    2385        1320 :     SPNode     *node = (snode) ? snode : Conf->Dictionary;
    2386        1320 :     int         level = (snode) ? minpos : startpos;    /* recursive
    2387             :                                                          * minpos==level */
    2388             :     int         lenaff;
    2389             :     CMPDAffix  *caff;
    2390             :     char       *notprobed;
    2391        1320 :     int         compoundflag = 0;
    2392             : 
    2393             :     /* since this function recurses, it could be driven to stack overflow */
    2394        1320 :     check_stack_depth();
    2395             : 
    2396        1320 :     notprobed = (char *) palloc(wordlen);
    2397        1320 :     memset(notprobed, 1, wordlen);
    2398        1320 :     var = CopyVar(orig, 1);
    2399             : 
    2400        7452 :     while (level < wordlen)
    2401             :     {
    2402             :         /* find word with epenthetic or/and compound affix */
    2403        7194 :         caff = Conf->CompoundAffix;
    2404        7380 :         while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) >= 0)
    2405             :         {
    2406             :             /*
    2407             :              * there is one of compound affixes, so check word for existings
    2408             :              */
    2409             :             char        buf[MAXNORMLEN];
    2410             :             char      **subres;
    2411             : 
    2412         186 :             lenaff = level - startpos + lenaff;
    2413             : 
    2414         186 :             if (!notprobed[startpos + lenaff - 1])
    2415           0 :                 continue;
    2416             : 
    2417         186 :             if (level + lenaff - 1 <= minpos)
    2418           0 :                 continue;
    2419             : 
    2420         186 :             if (lenaff >= MAXNORMLEN)
    2421           0 :                 continue;       /* skip too big value */
    2422         186 :             if (lenaff > 0)
    2423         186 :                 memcpy(buf, word + startpos, lenaff);
    2424         186 :             buf[lenaff] = '\0';
    2425             : 
    2426         186 :             if (level == 0)
    2427           0 :                 compoundflag = FF_COMPOUNDBEGIN;
    2428         186 :             else if (level == wordlen - 1)
    2429           0 :                 compoundflag = FF_COMPOUNDLAST;
    2430             :             else
    2431         186 :                 compoundflag = FF_COMPOUNDMIDDLE;
    2432         186 :             subres = NormalizeSubWord(Conf, buf, compoundflag);
    2433         186 :             if (subres)
    2434             :             {
    2435             :                 /* Yes, it was a word from dictionary */
    2436          90 :                 SplitVar   *new = CopyVar(var, 0);
    2437          90 :                 SplitVar   *ptr = var;
    2438          90 :                 char      **sptr = subres;
    2439             : 
    2440          90 :                 notprobed[startpos + lenaff - 1] = 0;
    2441             : 
    2442         180 :                 while (*sptr)
    2443             :                 {
    2444          90 :                     AddStem(new, *sptr);
    2445          90 :                     sptr++;
    2446             :                 }
    2447          90 :                 pfree(subres);
    2448             : 
    2449          90 :                 while (ptr->next)
    2450           0 :                     ptr = ptr->next;
    2451          90 :                 ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos + lenaff, startpos + lenaff);
    2452             : 
    2453          90 :                 pfree(new->stem);
    2454          90 :                 pfree(new);
    2455             :             }
    2456             :         }
    2457             : 
    2458        7194 :         if (!node)
    2459         750 :             break;
    2460             : 
    2461        6444 :         StopLow = node->data;
    2462        6444 :         StopHigh = node->data + node->length;
    2463        8694 :         while (StopLow < StopHigh)
    2464             :         {
    2465        8064 :             StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
    2466        8064 :             if (StopMiddle->val == ((uint8 *) (word))[level])
    2467        5814 :                 break;
    2468        2250 :             else if (StopMiddle->val < ((uint8 *) (word))[level])
    2469         978 :                 StopLow = StopMiddle + 1;
    2470             :             else
    2471        1272 :                 StopHigh = StopMiddle;
    2472             :         }
    2473             : 
    2474        6444 :         if (StopLow < StopHigh)
    2475             :         {
    2476        5814 :             if (startpos == 0)
    2477        3270 :                 compoundflag = FF_COMPOUNDBEGIN;
    2478        2544 :             else if (level == wordlen - 1)
    2479         288 :                 compoundflag = FF_COMPOUNDLAST;
    2480             :             else
    2481        2256 :                 compoundflag = FF_COMPOUNDMIDDLE;
    2482             : 
    2483             :             /* find infinitive */
    2484        5814 :             if (StopMiddle->isword &&
    2485        1536 :                 (StopMiddle->compoundflag & compoundflag) &&
    2486        1272 :                 notprobed[level])
    2487             :             {
    2488             :                 /* ok, we found full compoundallowed word */
    2489        1272 :                 if (level > minpos)
    2490             :                 {
    2491             :                     /* and its length more than minimal */
    2492         792 :                     if (wordlen == level + 1)
    2493             :                     {
    2494             :                         /* well, it was last word */
    2495         312 :                         AddStem(var, pnstrdup(word + startpos, wordlen - startpos));
    2496         312 :                         pfree(notprobed);
    2497         312 :                         return var;
    2498             :                     }
    2499             :                     else
    2500             :                     {
    2501             :                         /* then we will search more big word at the same point */
    2502         480 :                         SplitVar   *ptr = var;
    2503             : 
    2504         744 :                         while (ptr->next)
    2505         264 :                             ptr = ptr->next;
    2506         480 :                         ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
    2507             :                         /* we can find next word */
    2508         480 :                         level++;
    2509         480 :                         AddStem(var, pnstrdup(word + startpos, level - startpos));
    2510         480 :                         node = Conf->Dictionary;
    2511         480 :                         startpos = level;
    2512         480 :                         continue;
    2513             :                     }
    2514             :                 }
    2515             :             }
    2516        5022 :             node = StopMiddle->node;
    2517             :         }
    2518             :         else
    2519         630 :             node = NULL;
    2520        5652 :         level++;
    2521             :     }
    2522             : 
    2523        1008 :     AddStem(var, pnstrdup(word + startpos, wordlen - startpos));
    2524        1008 :     pfree(notprobed);
    2525        1008 :     return var;
    2526             : }
    2527             : 
    2528             : static void
    2529        1314 : addNorm(TSLexeme **lres, TSLexeme **lcur, char *word, int flags, uint16 NVariant)
    2530             : {
    2531        1314 :     if (*lres == NULL)
    2532         606 :         *lcur = *lres = (TSLexeme *) palloc(MAX_NORM * sizeof(TSLexeme));
    2533             : 
    2534        1314 :     if (*lcur - *lres < MAX_NORM - 1)
    2535             :     {
    2536        1314 :         (*lcur)->lexeme = word;
    2537        1314 :         (*lcur)->flags = flags;
    2538        1314 :         (*lcur)->nvariant = NVariant;
    2539        1314 :         (*lcur)++;
    2540        1314 :         (*lcur)->lexeme = NULL;
    2541             :     }
    2542        1314 : }
    2543             : 
    2544             : TSLexeme *
    2545         750 : NINormalizeWord(IspellDict *Conf, const char *word)
    2546             : {
    2547             :     char      **res;
    2548         750 :     TSLexeme   *lcur = NULL,
    2549         750 :                *lres = NULL;
    2550         750 :     uint16      NVariant = 1;
    2551             : 
    2552         750 :     res = NormalizeSubWord(Conf, word, 0);
    2553             : 
    2554         750 :     if (res)
    2555             :     {
    2556         486 :         char      **ptr = res;
    2557             : 
    2558        1140 :         while (*ptr && (lcur - lres) < MAX_NORM)
    2559             :         {
    2560         654 :             addNorm(&lres, &lcur, *ptr, 0, NVariant++);
    2561         654 :             ptr++;
    2562             :         }
    2563         486 :         pfree(res);
    2564             :     }
    2565             : 
    2566         750 :     if (Conf->usecompound)
    2567             :     {
    2568         750 :         int         wordlen = strlen(word);
    2569             :         SplitVar   *ptr,
    2570         750 :                    *var = SplitToVariants(Conf, NULL, NULL, word, wordlen, 0, -1);
    2571             :         int         i;
    2572             : 
    2573        2070 :         while (var)
    2574             :         {
    2575        1320 :             if (var->nstem > 1)
    2576             :             {
    2577         570 :                 char      **subres = NormalizeSubWord(Conf, var->stem[var->nstem - 1], FF_COMPOUNDLAST);
    2578             : 
    2579         570 :                 if (subres)
    2580             :                 {
    2581         264 :                     char      **subptr = subres;
    2582             : 
    2583         528 :                     while (*subptr)
    2584             :                     {
    2585         660 :                         for (i = 0; i < var->nstem - 1; i++)
    2586             :                         {
    2587         396 :                             addNorm(&lres, &lcur, (subptr == subres) ? var->stem[i] : pstrdup(var->stem[i]), 0, NVariant);
    2588             :                         }
    2589             : 
    2590         264 :                         addNorm(&lres, &lcur, *subptr, 0, NVariant);
    2591         264 :                         subptr++;
    2592         264 :                         NVariant++;
    2593             :                     }
    2594             : 
    2595         264 :                     pfree(subres);
    2596         264 :                     var->stem[0] = NULL;
    2597         264 :                     pfree(var->stem[var->nstem - 1]);
    2598             :                 }
    2599             :             }
    2600             : 
    2601        2742 :             for (i = 0; i < var->nstem && var->stem[i]; i++)
    2602        1422 :                 pfree(var->stem[i]);
    2603        1320 :             ptr = var->next;
    2604        1320 :             pfree(var->stem);
    2605        1320 :             pfree(var);
    2606        1320 :             var = ptr;
    2607             :         }
    2608             :     }
    2609             : 
    2610         750 :     return lres;
    2611             : }

Generated by: LCOV version 1.14