LCOV - code coverage report
Current view: top level - src/backend/tsearch - spell.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 91.9 % 1137 1045
Test Date: 2026-05-16 06:16:24 Functions: 100.0 % 46 46
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * spell.c
       4              :  *      Normalizing word with ISpell
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  *
       8              :  * Ispell dictionary
       9              :  * -----------------
      10              :  *
      11              :  * Rules of dictionaries are defined in two files with .affix and .dict
      12              :  * extensions. They are used by spell checker programs Ispell and Hunspell.
      13              :  *
      14              :  * An .affix file declares morphological rules to get a basic form of words.
      15              :  * The format of an .affix file has different structure for Ispell and Hunspell
      16              :  * dictionaries. The Hunspell format is more complicated. But when an .affix
      17              :  * file is imported and compiled, it is stored in the same structure AffixNode.
      18              :  *
      19              :  * A .dict file stores a list of basic forms of words with references to
      20              :  * affix rules. The format of a .dict file has the same structure for Ispell
      21              :  * and Hunspell dictionaries.
      22              :  *
      23              :  * Compilation of a dictionary
      24              :  * ---------------------------
      25              :  *
      26              :  * A compiled dictionary is stored in the IspellDict structure. Compilation of
      27              :  * a dictionary is divided into the several steps:
      28              :  *  - NIImportDictionary() - stores each word of a .dict file in the
      29              :  *    temporary Spell field.
      30              :  *  - NIImportAffixes() - stores affix rules of an .affix file in the
      31              :  *    Affix field (not temporary) if an .affix file has the Ispell format.
      32              :  *    -> NIImportOOAffixes() - stores affix rules if an .affix file has the
      33              :  *       Hunspell format. The AffixData field is initialized if AF parameter
      34              :  *       is defined.
      35              :  *  - NISortDictionary() - builds a prefix tree (Trie) from the words list
      36              :  *    and stores it in the Dictionary field. The words list is got from the
      37              :  *    Spell field. The AffixData field is initialized if AF parameter is not
      38              :  *    defined.
      39              :  *  - NISortAffixes():
      40              :  *    - builds a list of compound affixes from the affix list and stores it
      41              :  *      in the CompoundAffix.
      42              :  *    - builds prefix trees (Trie) from the affix list for prefixes and suffixes
      43              :  *      and stores them in Suffix and Prefix fields.
      44              :  *    The affix list is got from the Affix field.
      45              :  *
      46              :  * Memory management
      47              :  * -----------------
      48              :  *
      49              :  * The IspellDict structure has the Spell field which is used only in compile
      50              :  * time. The Spell field stores a words list. It can take a lot of memory.
      51              :  * Therefore when a dictionary is compiled this field is cleared by
      52              :  * NIFinishBuild().
      53              :  *
      54              :  * All resources which should cleared by NIFinishBuild() is initialized using
      55              :  * tmpalloc() and tmpalloc0().
      56              :  *
      57              :  * IDENTIFICATION
      58              :  *    src/backend/tsearch/spell.c
      59              :  *
      60              :  *-------------------------------------------------------------------------
      61              :  */
      62              : 
      63              : #include "postgres.h"
      64              : 
      65              : #include "catalog/pg_collation.h"
      66              : #include "miscadmin.h"
      67              : #include "tsearch/dicts/spell.h"
      68              : #include "tsearch/ts_locale.h"
      69              : #include "utils/formatting.h"
      70              : #include "utils/memutils.h"
      71              : 
      72              : 
      73              : /*
      74              :  * Initialization requires a lot of memory that's not needed
      75              :  * after the initialization is done.  During initialization,
      76              :  * CurrentMemoryContext is the long-lived memory context associated
      77              :  * with the dictionary cache entry.  We keep the short-lived stuff
      78              :  * in the Conf->buildCxt context.
      79              :  */
      80              : #define tmpalloc(sz)  MemoryContextAlloc(Conf->buildCxt, (sz))
      81              : #define tmpalloc0(sz)  MemoryContextAllocZero(Conf->buildCxt, (sz))
      82              : 
      83              : /*
      84              :  * Prepare for constructing an ISpell dictionary.
      85              :  *
      86              :  * The IspellDict struct is assumed to be zeroed when allocated.
      87              :  */
      88              : void
      89           87 : NIStartBuild(IspellDict *Conf)
      90              : {
      91              :     /*
      92              :      * The temp context is a child of CurTransactionContext, so that it will
      93              :      * go away automatically on error.
      94              :      */
      95           87 :     Conf->buildCxt = AllocSetContextCreate(CurTransactionContext,
      96              :                                            "Ispell dictionary init context",
      97              :                                            ALLOCSET_DEFAULT_SIZES);
      98           87 : }
      99              : 
     100              : /*
     101              :  * Clean up when dictionary construction is complete.
     102              :  */
     103              : void
     104           71 : NIFinishBuild(IspellDict *Conf)
     105              : {
     106              :     /* Release no-longer-needed temp memory */
     107           71 :     MemoryContextDelete(Conf->buildCxt);
     108              :     /* Just for cleanliness, zero the now-dangling pointers */
     109           71 :     Conf->buildCxt = NULL;
     110           71 :     Conf->Spell = NULL;
     111           71 :     Conf->firstfree = NULL;
     112           71 :     Conf->CompoundAffixFlags = NULL;
     113           71 : }
     114              : 
     115              : 
     116              : /*
     117              :  * "Compact" palloc: allocate without extra palloc overhead.
     118              :  *
     119              :  * Since we have no need to free the ispell data items individually, there's
     120              :  * not much value in the per-chunk overhead normally consumed by palloc.
     121              :  * Getting rid of it is helpful since ispell can allocate a lot of small nodes.
     122              :  *
     123              :  * We currently pre-zero all data allocated this way, even though some of it
     124              :  * doesn't need that.  The cpalloc and cpalloc0 macros are just documentation
     125              :  * to indicate which allocations actually require zeroing.
     126              :  */
     127              : #define COMPACT_ALLOC_CHUNK 8192    /* amount to get from palloc at once */
     128              : #define COMPACT_MAX_REQ     1024    /* must be < COMPACT_ALLOC_CHUNK */
     129              : 
     130              : static void *
     131         8013 : compact_palloc0(IspellDict *Conf, size_t size)
     132              : {
     133              :     void       *result;
     134              : 
     135              :     /* Should only be called during init */
     136              :     Assert(Conf->buildCxt != NULL);
     137              : 
     138              :     /* No point in this for large chunks */
     139         8013 :     if (size > COMPACT_MAX_REQ)
     140            0 :         return palloc0(size);
     141              : 
     142              :     /* Keep everything maxaligned */
     143         8013 :     size = MAXALIGN(size);
     144              : 
     145              :     /* Need more space? */
     146         8013 :     if (size > Conf->avail)
     147              :     {
     148           83 :         Conf->firstfree = palloc0(COMPACT_ALLOC_CHUNK);
     149           83 :         Conf->avail = COMPACT_ALLOC_CHUNK;
     150              :     }
     151              : 
     152         8013 :     result = Conf->firstfree;
     153         8013 :     Conf->firstfree += size;
     154         8013 :     Conf->avail -= size;
     155              : 
     156         8013 :     return result;
     157              : }
     158              : 
     159              : #define cpalloc(size) compact_palloc0(Conf, size)
     160              : #define cpalloc0(size) compact_palloc0(Conf, size)
     161              : 
     162              : static char *
     163         4284 : cpstrdup(IspellDict *Conf, const char *str)
     164              : {
     165         4284 :     char       *res = cpalloc(strlen(str) + 1);
     166              : 
     167         4284 :     strcpy(res, str);
     168         4284 :     return res;
     169              : }
     170              : 
     171              : 
     172              : /*
     173              :  * Apply str_tolower(), producing a temporary result (in the buildCxt).
     174              :  */
     175              : static char *
     176         3741 : lowerstr_ctx(IspellDict *Conf, const char *src)
     177              : {
     178              :     MemoryContext saveCtx;
     179              :     char       *dst;
     180              : 
     181         3741 :     saveCtx = MemoryContextSwitchTo(Conf->buildCxt);
     182         3741 :     dst = str_tolower(src, strlen(src), DEFAULT_COLLATION_OID);
     183         3741 :     MemoryContextSwitchTo(saveCtx);
     184              : 
     185         3741 :     return dst;
     186              : }
     187              : 
     188              : #define MAX_NORM 1024
     189              : #define MAXNORMLEN 256
     190              : 
     191              : #define STRNCMP(s,p)    strncmp( (s), (p), strlen(p) )
     192              : #define GETWCHAR(W,L,N,T) ( ((const uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] )
     193              : #define GETCHAR(A,N,T)    GETWCHAR( (A)->repl, (A)->replen, N, T )
     194              : 
     195              : static const char *VoidString = "";
     196              : 
     197              : static int
     198         1866 : cmpspell(const void *s1, const void *s2)
     199              : {
     200         1866 :     return strcmp((*(SPELL *const *) s1)->word, (*(SPELL *const *) s2)->word);
     201              : }
     202              : 
     203              : static int
     204         1456 : cmpspellaffix(const void *s1, const void *s2)
     205              : {
     206         2912 :     return strcmp((*(SPELL *const *) s1)->p.flag,
     207         1456 :                   (*(SPELL *const *) s2)->p.flag);
     208              : }
     209              : 
     210              : static int
     211         2533 : cmpcmdflag(const void *f1, const void *f2)
     212              : {
     213         2533 :     const CompoundAffixFlag *fv1 = f1;
     214         2533 :     const CompoundAffixFlag *fv2 = f2;
     215              : 
     216              :     Assert(fv1->flagMode == fv2->flagMode);
     217              : 
     218         2533 :     if (fv1->flagMode == FM_NUM)
     219              :     {
     220          489 :         if (fv1->flag.i == fv2->flag.i)
     221           74 :             return 0;
     222              : 
     223          415 :         return (fv1->flag.i > fv2->flag.i) ? 1 : -1;
     224              :     }
     225              : 
     226         2044 :     return strcmp(fv1->flag.s, fv2->flag.s);
     227              : }
     228              : 
     229              : static char *
     230          755 : findchar(char *str, int c)
     231              : {
     232         5564 :     while (*str)
     233              :     {
     234         5481 :         if (t_iseq(str, c))
     235          672 :             return str;
     236         4809 :         str += pg_mblen_cstr(str);
     237              :     }
     238              : 
     239           83 :     return NULL;
     240              : }
     241              : 
     242              : static char *
     243           27 : findchar2(char *str, int c1, int c2)
     244              : {
     245          567 :     while (*str)
     246              :     {
     247          567 :         if (t_iseq(str, c1) || t_iseq(str, c2))
     248           27 :             return str;
     249          540 :         str += pg_mblen_cstr(str);
     250              :     }
     251              : 
     252            0 :     return NULL;
     253              : }
     254              : 
     255              : 
     256              : /* backward string compare for suffix tree operations */
     257              : static int
     258          745 : strbcmp(const unsigned char *s1, const unsigned char *s2)
     259              : {
     260          745 :     int         l1 = strlen((const char *) s1) - 1,
     261          745 :                 l2 = strlen((const char *) s2) - 1;
     262              : 
     263          997 :     while (l1 >= 0 && l2 >= 0)
     264              :     {
     265          780 :         if (s1[l1] < s2[l2])
     266          169 :             return -1;
     267          611 :         if (s1[l1] > s2[l2])
     268          359 :             return 1;
     269          252 :         l1--;
     270          252 :         l2--;
     271              :     }
     272          217 :     if (l1 < l2)
     273           58 :         return -1;
     274          159 :     if (l1 > l2)
     275          133 :         return 1;
     276              : 
     277           26 :     return 0;
     278              : }
     279              : 
     280              : static int
     281           26 : strbncmp(const unsigned char *s1, const unsigned char *s2, size_t count)
     282              : {
     283           26 :     int         l1 = strlen((const char *) s1) - 1,
     284           26 :                 l2 = strlen((const char *) s2) - 1,
     285           26 :                 l = count;
     286              : 
     287           39 :     while (l1 >= 0 && l2 >= 0 && l > 0)
     288              :     {
     289           26 :         if (s1[l1] < s2[l2])
     290           13 :             return -1;
     291           13 :         if (s1[l1] > s2[l2])
     292            0 :             return 1;
     293           13 :         l1--;
     294           13 :         l2--;
     295           13 :         l--;
     296              :     }
     297           13 :     if (l == 0)
     298           13 :         return 0;
     299            0 :     if (l1 < l2)
     300            0 :         return -1;
     301            0 :     if (l1 > l2)
     302            0 :         return 1;
     303            0 :     return 0;
     304              : }
     305              : 
     306              : /*
     307              :  * Compares affixes.
     308              :  * First compares the type of an affix. Prefixes should go before affixes.
     309              :  * If types are equal then compares replaceable string.
     310              :  */
     311              : static int
     312         1260 : cmpaffix(const void *s1, const void *s2)
     313              : {
     314         1260 :     const AFFIX *a1 = (const AFFIX *) s1;
     315         1260 :     const AFFIX *a2 = (const AFFIX *) s2;
     316              : 
     317         1260 :     if (a1->type < a2->type)
     318          288 :         return -1;
     319          972 :     if (a1->type > a2->type)
     320           85 :         return 1;
     321          887 :     if (a1->type == FF_PREFIX)
     322          142 :         return strcmp(a1->repl, a2->repl);
     323              :     else
     324          745 :         return strbcmp((const unsigned char *) a1->repl,
     325          745 :                        (const unsigned char *) a2->repl);
     326              : }
     327              : 
     328              : /*
     329              :  * Gets an affix flag from the set of affix flags (sflagset).
     330              :  *
     331              :  * Several flags can be stored in a single string. Flags can be represented by:
     332              :  * - 1 character (FM_CHAR). A character may be Unicode.
     333              :  * - 2 characters (FM_LONG). A character may be Unicode.
     334              :  * - numbers from 1 to 65000 (FM_NUM).
     335              :  *
     336              :  * Depending on the flagMode an affix string can have the following format:
     337              :  * - FM_CHAR: ABCD
     338              :  *   Here we have 4 flags: A, B, C and D
     339              :  * - FM_LONG: ABCDE*
     340              :  *   Here we have 3 flags: AB, CD and E*
     341              :  * - FM_NUM: 200,205,50
     342              :  *   Here we have 3 flags: 200, 205 and 50
     343              :  *
     344              :  * Conf: current dictionary.
     345              :  * sflagset: the set of affix flags. Returns a reference to the start of a next
     346              :  *           affix flag.
     347              :  * sflag: returns an affix flag from sflagset.
     348              :  */
     349              : static void
     350         4086 : getNextFlagFromString(IspellDict *Conf, const char **sflagset, char *sflag)
     351              : {
     352              :     int32       s;
     353              :     char       *next;
     354         4086 :     const char *sbuf = *sflagset;
     355              :     int         maxstep;
     356              :     int         clen;
     357         4086 :     bool        stop = false;
     358         4086 :     bool        met_comma = false;
     359              : 
     360         4086 :     maxstep = (Conf->flagMode == FM_LONG) ? 2 : 1;
     361              : 
     362         5360 :     while (**sflagset)
     363              :     {
     364         5360 :         switch (Conf->flagMode)
     365              :         {
     366         4571 :             case FM_LONG:
     367              :             case FM_CHAR:
     368         4571 :                 clen = ts_copychar_cstr(sflag, *sflagset);
     369         4571 :                 sflag += clen;
     370              : 
     371              :                 /* Go to start of the next flag */
     372         4571 :                 *sflagset += clen;
     373              : 
     374              :                 /* Check if we get all characters of flag */
     375         4571 :                 maxstep--;
     376         4571 :                 stop = (maxstep == 0);
     377         4571 :                 break;
     378          789 :             case FM_NUM:
     379          789 :                 errno = 0;
     380          789 :                 s = strtol(*sflagset, &next, 10);
     381          789 :                 if (*sflagset == next || errno == ERANGE)
     382            4 :                     ereport(ERROR,
     383              :                             (errcode(ERRCODE_CONFIG_FILE_ERROR),
     384              :                              errmsg("invalid affix flag \"%s\"", *sflagset)));
     385          785 :                 if (s < 0 || s > FLAGNUM_MAXSIZE)
     386            0 :                     ereport(ERROR,
     387              :                             (errcode(ERRCODE_CONFIG_FILE_ERROR),
     388              :                              errmsg("affix flag \"%s\" is out of range",
     389              :                                     *sflagset)));
     390          785 :                 sflag += sprintf(sflag, "%0d", s);
     391              : 
     392              :                 /* Go to start of the next flag */
     393          785 :                 *sflagset = next;
     394         1219 :                 while (**sflagset)
     395              :                 {
     396          868 :                     if (isdigit((unsigned char) **sflagset))
     397              :                     {
     398          434 :                         if (!met_comma)
     399            0 :                             ereport(ERROR,
     400              :                                     (errcode(ERRCODE_CONFIG_FILE_ERROR),
     401              :                                      errmsg("invalid affix flag \"%s\"",
     402              :                                             *sflagset)));
     403          434 :                         break;
     404              :                     }
     405          434 :                     else if (t_iseq(*sflagset, ','))
     406              :                     {
     407          434 :                         if (met_comma)
     408            0 :                             ereport(ERROR,
     409              :                                     (errcode(ERRCODE_CONFIG_FILE_ERROR),
     410              :                                      errmsg("invalid affix flag \"%s\"",
     411              :                                             *sflagset)));
     412          434 :                         met_comma = true;
     413              :                     }
     414            0 :                     else if (!isspace((unsigned char) **sflagset))
     415              :                     {
     416            0 :                         ereport(ERROR,
     417              :                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
     418              :                                  errmsg("invalid character in affix flag \"%s\"",
     419              :                                         *sflagset)));
     420              :                     }
     421              : 
     422          434 :                     *sflagset += pg_mblen_cstr(*sflagset);
     423              :                 }
     424          785 :                 stop = true;
     425          785 :                 break;
     426            0 :             default:
     427            0 :                 elog(ERROR, "unrecognized type of Conf->flagMode: %d",
     428              :                      Conf->flagMode);
     429              :         }
     430              : 
     431         5356 :         if (stop)
     432         4082 :             break;
     433              :     }
     434              : 
     435         4082 :     if (Conf->flagMode == FM_LONG && maxstep > 0)
     436            0 :         ereport(ERROR,
     437              :                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
     438              :                  errmsg("invalid affix flag \"%s\" with \"long\" flag value",
     439              :                         sbuf)));
     440              : 
     441         4082 :     *sflag = '\0';
     442         4082 : }
     443              : 
     444              : /*
     445              :  * Checks if the affix set Conf->AffixData[affix] contains affixflag.
     446              :  * Conf->AffixData[affix] does not contain affixflag if this flag is not used
     447              :  * actually by the .dict file.
     448              :  *
     449              :  * Conf: current dictionary.
     450              :  * affix: index of the Conf->AffixData array.
     451              :  * affixflag: the affix flag.
     452              :  *
     453              :  * Returns true if the string Conf->AffixData[affix] contains affixflag,
     454              :  * otherwise returns false.
     455              :  */
     456              : static bool
     457         1650 : IsAffixFlagInUse(IspellDict *Conf, int affix, const char *affixflag)
     458              : {
     459              :     const char *flagcur;
     460              :     char        flag[BUFSIZ];
     461              : 
     462         1650 :     if (*affixflag == 0)
     463          530 :         return true;
     464              : 
     465              :     Assert(affix < Conf->nAffixData);
     466              : 
     467         1120 :     flagcur = Conf->AffixData[affix];
     468              : 
     469         3194 :     while (*flagcur)
     470              :     {
     471         2466 :         getNextFlagFromString(Conf, &flagcur, flag);
     472              :         /* Compare first affix flag in flagcur with affixflag */
     473         2466 :         if (strcmp(flag, affixflag) == 0)
     474          392 :             return true;
     475              :     }
     476              : 
     477              :     /* Could not find affixflag */
     478          728 :     return false;
     479              : }
     480              : 
     481              : /*
     482              :  * Adds the new word into the temporary array Spell.
     483              :  *
     484              :  * Conf: current dictionary.
     485              :  * word: new word.
     486              :  * flag: set of affix flags. Single flag can be get by getNextFlagFromString().
     487              :  */
     488              : static void
     489          755 : NIAddSpell(IspellDict *Conf, const char *word, const char *flag)
     490              : {
     491          755 :     if (Conf->nspell >= Conf->mspell)
     492              :     {
     493           83 :         if (Conf->mspell)
     494              :         {
     495            0 :             Conf->mspell *= 2;
     496            0 :             Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *));
     497              :         }
     498              :         else
     499              :         {
     500           83 :             Conf->mspell = 1024 * 20;
     501           83 :             Conf->Spell = (SPELL **) tmpalloc(Conf->mspell * sizeof(SPELL *));
     502              :         }
     503              :     }
     504          755 :     Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1);
     505          755 :     strcpy(Conf->Spell[Conf->nspell]->word, word);
     506         1510 :     Conf->Spell[Conf->nspell]->p.flag = (*flag != '\0')
     507          755 :         ? cpstrdup(Conf, flag) : VoidString;
     508          755 :     Conf->nspell++;
     509          755 : }
     510              : 
     511              : /*
     512              :  * Imports dictionary into the temporary array Spell.
     513              :  *
     514              :  * Note caller must already have applied get_tsearch_config_filename.
     515              :  *
     516              :  * Conf: current dictionary.
     517              :  * filename: path to the .dict file.
     518              :  */
     519              : void
     520           83 : NIImportDictionary(IspellDict *Conf, const char *filename)
     521              : {
     522              :     tsearch_readline_state trst;
     523              :     char       *line;
     524              : 
     525           83 :     if (!tsearch_readline_begin(&trst, filename))
     526            0 :         ereport(ERROR,
     527              :                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
     528              :                  errmsg("could not open dictionary file \"%s\": %m",
     529              :                         filename)));
     530              : 
     531          838 :     while ((line = tsearch_readline(&trst)) != NULL)
     532              :     {
     533              :         char       *s,
     534              :                    *pstr;
     535              : 
     536              :         /* Set of affix flags */
     537              :         const char *flag;
     538              : 
     539              :         /* Extract flag from the line */
     540          755 :         flag = NULL;
     541          755 :         if ((s = findchar(line, '/')))
     542              :         {
     543          672 :             *s++ = '\0';
     544          672 :             flag = s;
     545         2689 :             while (*s)
     546              :             {
     547              :                 /* we allow only single encoded flags for faster works */
     548         2689 :                 if (pg_mblen_cstr(s) == 1 && isprint((unsigned char) *s) && !isspace((unsigned char) *s))
     549         2017 :                     s++;
     550              :                 else
     551              :                 {
     552          672 :                     *s = '\0';
     553          672 :                     break;
     554              :                 }
     555              :             }
     556              :         }
     557              :         else
     558           83 :             flag = "";
     559              : 
     560              :         /* Remove trailing spaces */
     561          755 :         s = line;
     562         5481 :         while (*s)
     563              :         {
     564         4809 :             if (isspace((unsigned char) *s))
     565              :             {
     566           83 :                 *s = '\0';
     567           83 :                 break;
     568              :             }
     569         4726 :             s += pg_mblen_cstr(s);
     570              :         }
     571          755 :         pstr = lowerstr_ctx(Conf, line);
     572              : 
     573          755 :         NIAddSpell(Conf, pstr, flag);
     574          755 :         pfree(pstr);
     575              : 
     576          755 :         pfree(line);
     577              :     }
     578           83 :     tsearch_readline_end(&trst);
     579           83 : }
     580              : 
     581              : /*
     582              :  * Searches a basic form of word in the prefix tree. This word was generated
     583              :  * using an affix rule. This rule may not be presented in an affix set of
     584              :  * a basic form of word.
     585              :  *
     586              :  * For example, we have the entry in the .dict file:
     587              :  * meter/GMD
     588              :  *
     589              :  * The affix rule with the flag S:
     590              :  * SFX S   y     ies        [^aeiou]y
     591              :  * is not presented here.
     592              :  *
     593              :  * The affix rule with the flag M:
     594              :  * SFX M   0     's         .
     595              :  * is presented here.
     596              :  *
     597              :  * Conf: current dictionary.
     598              :  * word: basic form of word.
     599              :  * affixflag: affix flag, by which a basic form of word was generated.
     600              :  * flag: compound flag used to compare with StopMiddle->compoundflag.
     601              :  *
     602              :  * Returns 1 if the word was found in the prefix tree, else returns 0.
     603              :  */
     604              : static int
     605         2495 : FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag)
     606              : {
     607         2495 :     SPNode     *node = Conf->Dictionary;
     608              :     SPNodeData *StopLow,
     609              :                *StopHigh,
     610              :                *StopMiddle;
     611         2495 :     const uint8 *ptr = (const uint8 *) word;
     612              : 
     613         2495 :     flag &= FF_COMPOUNDFLAGMASK;
     614              : 
     615        11620 :     while (node && *ptr)
     616              :     {
     617        11020 :         StopLow = node->data;
     618        11020 :         StopHigh = node->data + node->length;
     619        15765 :         while (StopLow < StopHigh)
     620              :         {
     621        14710 :             StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
     622        14710 :             if (StopMiddle->val == *ptr)
     623              :             {
     624         9965 :                 if (*(ptr + 1) == '\0' && StopMiddle->isword)
     625              :                 {
     626          955 :                     if (flag == 0)
     627              :                     {
     628              :                         /*
     629              :                          * The word can be formed only with another word. And
     630              :                          * in the flag parameter there is not a sign that we
     631              :                          * search compound words.
     632              :                          */
     633          605 :                         if (StopMiddle->compoundflag & FF_COMPOUNDONLY)
     634            0 :                             return 0;
     635              :                     }
     636          350 :                     else if ((flag & StopMiddle->compoundflag) == 0)
     637            0 :                         return 0;
     638              : 
     639              :                     /*
     640              :                      * Check if this affix rule is presented in the affix set
     641              :                      * with index StopMiddle->affix.
     642              :                      */
     643          955 :                     if (IsAffixFlagInUse(Conf, StopMiddle->affix, affixflag))
     644          840 :                         return 1;
     645              :                 }
     646         9125 :                 node = StopMiddle->node;
     647         9125 :                 ptr++;
     648         9125 :                 break;
     649              :             }
     650         4745 :             else if (StopMiddle->val < *ptr)
     651         1610 :                 StopLow = StopMiddle + 1;
     652              :             else
     653         3135 :                 StopHigh = StopMiddle;
     654              :         }
     655        10180 :         if (StopLow >= StopHigh)
     656         1055 :             break;
     657              :     }
     658         1655 :     return 0;
     659              : }
     660              : 
     661              : /*
     662              :  * Adds a new affix rule to the Affix field.
     663              :  *
     664              :  * Conf: current dictionary.
     665              :  * flag: affix flag ('\' in the below example).
     666              :  * flagflags: set of flags from the flagval field for this affix rule. This set
     667              :  *            is listed after '/' character in the added string (repl).
     668              :  *
     669              :  *            For example L flag in the hunspell_sample.affix:
     670              :  *            SFX \   0 Y/L [^Y]
     671              :  *
     672              :  * mask: condition for search ('[^Y]' in the above example).
     673              :  * find: stripping characters from beginning (at prefix) or end (at suffix)
     674              :  *       of the word ('0' in the above example, 0 means that there is not
     675              :  *       stripping character).
     676              :  * repl: adding string after stripping ('Y' in the above example).
     677              :  * type: FF_SUFFIX or FF_PREFIX.
     678              :  */
     679              : static void
     680          688 : NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask,
     681              :            const char *find, const char *repl, int type)
     682              : {
     683              :     AFFIX      *Affix;
     684              : 
     685          688 :     if (Conf->naffixes >= Conf->maffixes)
     686              :     {
     687           83 :         if (Conf->maffixes)
     688              :         {
     689            0 :             Conf->maffixes *= 2;
     690            0 :             Conf->Affix = (AFFIX *) repalloc(Conf->Affix, Conf->maffixes * sizeof(AFFIX));
     691              :         }
     692              :         else
     693              :         {
     694           83 :             Conf->maffixes = 16;
     695           83 :             Conf->Affix = palloc_array(AFFIX, Conf->maffixes);
     696              :         }
     697              :     }
     698              : 
     699          688 :     Affix = Conf->Affix + Conf->naffixes;
     700              : 
     701              :     /* This affix rule can be applied for words with any ending */
     702          688 :     if (strcmp(mask, ".") == 0 || *mask == '\0')
     703              :     {
     704          166 :         Affix->issimple = 1;
     705          166 :         Affix->isregis = 0;
     706              :     }
     707              :     /* This affix rule will use regis to search word ending */
     708          522 :     else if (RS_isRegis(mask))
     709              :     {
     710          436 :         Affix->issimple = 0;
     711          436 :         Affix->isregis = 1;
     712          436 :         RS_compile(&(Affix->reg.regis), (type == FF_SUFFIX),
     713          436 :                    *mask ? mask : VoidString);
     714              :     }
     715              :     /* This affix rule will use regex_t to search word ending */
     716              :     else
     717              :     {
     718              :         int         masklen;
     719              :         int         wmasklen;
     720              :         int         err;
     721              :         pg_wchar   *wmask;
     722              :         char       *tmask;
     723              : 
     724           86 :         Affix->issimple = 0;
     725           86 :         Affix->isregis = 0;
     726           86 :         tmask = (char *) tmpalloc(strlen(mask) + 3);
     727           86 :         if (type == FF_SUFFIX)
     728           86 :             sprintf(tmask, "%s$", mask);
     729              :         else
     730            0 :             sprintf(tmask, "^%s", mask);
     731              : 
     732           86 :         masklen = strlen(tmask);
     733           86 :         wmask = (pg_wchar *) tmpalloc((masklen + 1) * sizeof(pg_wchar));
     734           86 :         wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen);
     735              : 
     736              :         /*
     737              :          * The regex and all internal state created by pg_regcomp are
     738              :          * allocated in the dictionary's memory context, and will be freed
     739              :          * automatically when it is destroyed.
     740              :          */
     741           86 :         Affix->reg.pregex = palloc_object(regex_t);
     742           86 :         err = pg_regcomp(Affix->reg.pregex, wmask, wmasklen,
     743              :                          REG_ADVANCED | REG_NOSUB,
     744              :                          DEFAULT_COLLATION_OID);
     745           86 :         if (err)
     746              :         {
     747              :             char        errstr[100];
     748              : 
     749            0 :             pg_regerror(err, Affix->reg.pregex, errstr, sizeof(errstr));
     750            0 :             ereport(ERROR,
     751              :                     (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
     752              :                      errmsg("invalid regular expression: %s", errstr)));
     753              :         }
     754              :     }
     755              : 
     756          688 :     Affix->flagflags = flagflags;
     757          688 :     if ((Affix->flagflags & FF_COMPOUNDONLY) || (Affix->flagflags & FF_COMPOUNDPERMITFLAG))
     758              :     {
     759          125 :         if ((Affix->flagflags & FF_COMPOUNDFLAG) == 0)
     760          125 :             Affix->flagflags |= FF_COMPOUNDFLAG;
     761              :     }
     762          688 :     Affix->flag = cpstrdup(Conf, flag);
     763          688 :     Affix->type = type;
     764              : 
     765          688 :     Affix->find = (find && *find) ? cpstrdup(Conf, find) : VoidString;
     766          688 :     if ((Affix->replen = strlen(repl)) > 0)
     767          666 :         Affix->repl = cpstrdup(Conf, repl);
     768              :     else
     769           22 :         Affix->repl = VoidString;
     770          688 :     Conf->naffixes++;
     771          688 : }
     772              : 
     773              : /* Parsing states for parse_affentry() and friends */
     774              : #define PAE_WAIT_MASK   0
     775              : #define PAE_INMASK      1
     776              : #define PAE_WAIT_FIND   2
     777              : #define PAE_INFIND      3
     778              : #define PAE_WAIT_REPL   4
     779              : #define PAE_INREPL      5
     780              : #define PAE_WAIT_TYPE   6
     781              : #define PAE_WAIT_FLAG   7
     782              : 
     783              : /*
     784              :  * Parse next space-separated field of an .affix file line.
     785              :  *
     786              :  * *str is the input pointer (will be advanced past field)
     787              :  * next is where to copy the field value to, with null termination
     788              :  *
     789              :  * The buffer at "next" must be of size BUFSIZ; we truncate the input to fit.
     790              :  *
     791              :  * Returns true if we found a field, false if not.
     792              :  */
     793              : static bool
     794         6463 : get_nextfield(char **str, char *next)
     795              : {
     796         6463 :     int         state = PAE_WAIT_MASK;
     797         6463 :     int         avail = BUFSIZ;
     798              : 
     799        27642 :     while (**str)
     800              :     {
     801        26883 :         int         clen = pg_mblen_cstr(*str);
     802              : 
     803        26883 :         if (state == PAE_WAIT_MASK)
     804              :         {
     805        11921 :             if (t_iseq(*str, '#'))
     806          231 :                 return false;
     807        11690 :             else if (!isspace((unsigned char) **str))
     808              :             {
     809         5473 :                 if (clen < avail)
     810              :                 {
     811         5473 :                     ts_copychar_with_len(next, *str, clen);
     812         5473 :                     next += clen;
     813         5473 :                     avail -= clen;
     814              :                 }
     815         5473 :                 state = PAE_INMASK;
     816              :             }
     817              :         }
     818              :         else                    /* state == PAE_INMASK */
     819              :         {
     820        14962 :             if (isspace((unsigned char) **str))
     821              :             {
     822         5473 :                 *next = '\0';
     823         5473 :                 return true;
     824              :             }
     825              :             else
     826              :             {
     827         9489 :                 if (clen < avail)
     828              :                 {
     829         9489 :                     ts_copychar_with_len(next, *str, clen);
     830         9489 :                     next += clen;
     831         9489 :                     avail -= clen;
     832              :                 }
     833              :             }
     834              :         }
     835        21179 :         *str += clen;
     836              :     }
     837              : 
     838          759 :     *next = '\0';
     839              : 
     840          759 :     return (state == PAE_INMASK);   /* OK if we got a nonempty field */
     841              : }
     842              : 
     843              : /*
     844              :  * Parses entry of an .affix file of MySpell or Hunspell format.
     845              :  *
     846              :  * An .affix file entry has the following format:
     847              :  * - header
     848              :  *   <type>  <flag>  <cross_flag>  <flag_count>
     849              :  * - fields after header:
     850              :  *   <type>  <flag>  <find>  <replace>  <mask>
     851              :  *
     852              :  * str is the input line
     853              :  * field values are returned to type etc, which must be buffers of size BUFSIZ.
     854              :  *
     855              :  * Returns number of fields found; any omitted fields are set to empty strings.
     856              :  */
     857              : static int
     858         1489 : parse_ooaffentry(char *str, char *type, char *flag, char *find,
     859              :                  char *repl, char *mask)
     860              : {
     861         1489 :     int         state = PAE_WAIT_TYPE;
     862         1489 :     int         fields_read = 0;
     863         1489 :     bool        valid = false;
     864              : 
     865         1489 :     *type = *flag = *find = *repl = *mask = '\0';
     866              : 
     867         6463 :     while (*str)
     868              :     {
     869         6463 :         switch (state)
     870              :         {
     871         1489 :             case PAE_WAIT_TYPE:
     872         1489 :                 valid = get_nextfield(&str, type);
     873         1489 :                 state = PAE_WAIT_FLAG;
     874         1489 :                 break;
     875         1489 :             case PAE_WAIT_FLAG:
     876         1489 :                 valid = get_nextfield(&str, flag);
     877         1489 :                 state = PAE_WAIT_FIND;
     878         1489 :                 break;
     879         1489 :             case PAE_WAIT_FIND:
     880         1489 :                 valid = get_nextfield(&str, find);
     881         1489 :                 state = PAE_WAIT_REPL;
     882         1489 :                 break;
     883          998 :             case PAE_WAIT_REPL:
     884          998 :                 valid = get_nextfield(&str, repl);
     885          998 :                 state = PAE_WAIT_MASK;
     886          998 :                 break;
     887          998 :             case PAE_WAIT_MASK:
     888          998 :                 valid = get_nextfield(&str, mask);
     889          998 :                 state = -1;     /* force loop exit */
     890          998 :                 break;
     891            0 :             default:
     892            0 :                 elog(ERROR, "unrecognized state in parse_ooaffentry: %d",
     893              :                      state);
     894              :                 break;
     895              :         }
     896         6463 :         if (valid)
     897         5473 :             fields_read++;
     898              :         else
     899          990 :             break;              /* early EOL */
     900         5473 :         if (state < 0)
     901          499 :             break;              /* got all fields */
     902              :     }
     903              : 
     904         1489 :     return fields_read;
     905              : }
     906              : 
     907              : /*
     908              :  * Parses entry of an .affix file of Ispell format
     909              :  *
     910              :  * An .affix file entry has the following format:
     911              :  * <mask>  >  [-<find>,]<replace>
     912              :  *
     913              :  * Output buffers mask, find, repl must be of length BUFSIZ;
     914              :  * we truncate the input to fit.
     915              :  */
     916              : static bool
     917          189 : parse_affentry(const char *str, char *mask, char *find, char *repl)
     918              : {
     919          189 :     int         state = PAE_WAIT_MASK;
     920          189 :     char       *pmask = mask,
     921          189 :                *pfind = find,
     922          189 :                *prepl = repl;
     923          189 :     char       *emask = mask + BUFSIZ;
     924          189 :     char       *efind = find + BUFSIZ;
     925          189 :     char       *erepl = repl + BUFSIZ;
     926              : 
     927          189 :     *mask = *find = *repl = '\0';
     928              : 
     929         4968 :     while (*str)
     930              :     {
     931         4968 :         int         clen = pg_mblen_cstr(str);
     932              : 
     933         4968 :         if (state == PAE_WAIT_MASK)
     934              :         {
     935          459 :             if (t_iseq(str, '#'))
     936            0 :                 return false;
     937          459 :             else if (!isspace((unsigned char) *str))
     938              :             {
     939          189 :                 if (pmask < emask - clen)
     940          189 :                     pmask += ts_copychar_with_len(pmask, str, clen);
     941          189 :                 state = PAE_INMASK;
     942              :             }
     943              :         }
     944         4509 :         else if (state == PAE_INMASK)
     945              :         {
     946         1836 :             if (t_iseq(str, '>'))
     947              :             {
     948          189 :                 *pmask = '\0';
     949          189 :                 state = PAE_WAIT_FIND;
     950              :             }
     951         1647 :             else if (!isspace((unsigned char) *str))
     952              :             {
     953          648 :                 if (pmask < emask - clen)
     954          648 :                     pmask += ts_copychar_with_len(pmask, str, clen);
     955              :             }
     956              :         }
     957         2673 :         else if (state == PAE_WAIT_FIND)
     958              :         {
     959          756 :             if (t_iseq(str, '-'))
     960              :             {
     961           27 :                 state = PAE_INFIND;
     962              :             }
     963          729 :             else if (t_isalpha_cstr(str) || t_iseq(str, '\'') /* english 's */ )
     964              :             {
     965          162 :                 if (prepl < erepl - clen)
     966          162 :                     prepl += ts_copychar_with_len(prepl, str, clen);
     967          162 :                 state = PAE_INREPL;
     968              :             }
     969          567 :             else if (!isspace((unsigned char) *str))
     970            0 :                 ereport(ERROR,
     971              :                         (errcode(ERRCODE_CONFIG_FILE_ERROR),
     972              :                          errmsg("syntax error")));
     973              :         }
     974         1917 :         else if (state == PAE_INFIND)
     975              :         {
     976           54 :             if (t_iseq(str, ','))
     977              :             {
     978           27 :                 *pfind = '\0';
     979           27 :                 state = PAE_WAIT_REPL;
     980              :             }
     981           27 :             else if (t_isalpha_cstr(str))
     982              :             {
     983           27 :                 if (pfind < efind - clen)
     984           27 :                     pfind += ts_copychar_with_len(pfind, str, clen);
     985              :             }
     986            0 :             else if (!isspace((unsigned char) *str))
     987            0 :                 ereport(ERROR,
     988              :                         (errcode(ERRCODE_CONFIG_FILE_ERROR),
     989              :                          errmsg("syntax error")));
     990              :         }
     991         1863 :         else if (state == PAE_WAIT_REPL)
     992              :         {
     993           27 :             if (t_iseq(str, '-'))
     994              :             {
     995            0 :                 break;          /* void repl */
     996              :             }
     997           27 :             else if (t_isalpha_cstr(str))
     998              :             {
     999           27 :                 if (prepl < erepl - clen)
    1000           27 :                     prepl += ts_copychar_with_len(prepl, str, clen);
    1001           27 :                 state = PAE_INREPL;
    1002              :             }
    1003            0 :             else if (!isspace((unsigned char) *str))
    1004            0 :                 ereport(ERROR,
    1005              :                         (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1006              :                          errmsg("syntax error")));
    1007              :         }
    1008         1836 :         else if (state == PAE_INREPL)
    1009              :         {
    1010         1836 :             if (t_iseq(str, '#'))
    1011              :             {
    1012          189 :                 *prepl = '\0';
    1013          189 :                 break;
    1014              :             }
    1015         1647 :             else if (t_isalpha_cstr(str))
    1016              :             {
    1017          243 :                 if (prepl < erepl - clen)
    1018          243 :                     prepl += ts_copychar_with_len(prepl, str, clen);
    1019              :             }
    1020         1404 :             else if (!isspace((unsigned char) *str))
    1021            0 :                 ereport(ERROR,
    1022              :                         (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1023              :                          errmsg("syntax error")));
    1024              :         }
    1025              :         else
    1026            0 :             elog(ERROR, "unrecognized state in parse_affentry: %d", state);
    1027              : 
    1028         4779 :         str += clen;
    1029              :     }
    1030              : 
    1031          189 :     *pmask = *pfind = *prepl = '\0';
    1032              : 
    1033          189 :     return (*mask && (*find || *repl));
    1034              : }
    1035              : 
    1036              : /*
    1037              :  * Sets a Hunspell options depending on flag type.
    1038              :  */
    1039              : static void
    1040         1839 : setCompoundAffixFlagValue(IspellDict *Conf, CompoundAffixFlag *entry,
    1041              :                           char *s, uint32 val)
    1042              : {
    1043         1839 :     if (Conf->flagMode == FM_NUM)
    1044              :     {
    1045              :         char       *next;
    1046              :         int         i;
    1047              : 
    1048          399 :         errno = 0;
    1049          399 :         i = strtol(s, &next, 10);
    1050          399 :         if (s == next || errno == ERANGE)
    1051            0 :             ereport(ERROR,
    1052              :                     (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1053              :                      errmsg("invalid affix flag \"%s\"", s)));
    1054          399 :         if (i < 0 || i > FLAGNUM_MAXSIZE)
    1055            0 :             ereport(ERROR,
    1056              :                     (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1057              :                      errmsg("affix flag \"%s\" is out of range", s)));
    1058              : 
    1059          399 :         entry->flag.i = i;
    1060              :     }
    1061              :     else
    1062         1440 :         entry->flag.s = cpstrdup(Conf, s);
    1063              : 
    1064         1839 :     entry->flagMode = Conf->flagMode;
    1065         1839 :     entry->value = val;
    1066         1839 : }
    1067              : 
    1068              : /*
    1069              :  * Sets up a correspondence for the affix parameter with the affix flag.
    1070              :  *
    1071              :  * Conf: current dictionary.
    1072              :  * s: affix flag in string.
    1073              :  * val: affix parameter.
    1074              :  */
    1075              : static void
    1076          223 : addCompoundAffixFlagValue(IspellDict *Conf, const char *s, uint32 val)
    1077              : {
    1078              :     CompoundAffixFlag *newValue;
    1079              :     char        sbuf[BUFSIZ];
    1080              :     char       *sflag;
    1081              : 
    1082          419 :     while (*s && isspace((unsigned char) *s))
    1083          196 :         s += pg_mblen_cstr(s);
    1084              : 
    1085          223 :     if (!*s)
    1086            0 :         ereport(ERROR,
    1087              :                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1088              :                  errmsg("syntax error")));
    1089              : 
    1090              :     /* Get flag without \n */
    1091          223 :     sflag = sbuf;
    1092          660 :     while (*s && !isspace((unsigned char) *s) && *s != '\n')
    1093              :     {
    1094          437 :         int         clen = pg_mblen_cstr(s);
    1095              : 
    1096              :         /* Truncate the input to fit in BUFSIZ */
    1097          437 :         if (sflag < sbuf + BUFSIZ - clen)
    1098          437 :             sflag += ts_copychar_with_len(sflag, s, clen);
    1099          437 :         s += clen;
    1100              :     }
    1101          223 :     *sflag = '\0';
    1102              : 
    1103              :     /* Resize array or allocate memory for array CompoundAffixFlag */
    1104          223 :     if (Conf->nCompoundAffixFlag >= Conf->mCompoundAffixFlag)
    1105              :     {
    1106           83 :         if (Conf->mCompoundAffixFlag)
    1107              :         {
    1108            0 :             Conf->mCompoundAffixFlag *= 2;
    1109            0 :             Conf->CompoundAffixFlags = (CompoundAffixFlag *)
    1110            0 :                 repalloc(Conf->CompoundAffixFlags,
    1111            0 :                          Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag));
    1112              :         }
    1113              :         else
    1114              :         {
    1115           83 :             Conf->mCompoundAffixFlag = 10;
    1116           83 :             Conf->CompoundAffixFlags = (CompoundAffixFlag *)
    1117           83 :                 tmpalloc(Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag));
    1118              :         }
    1119              :     }
    1120              : 
    1121          223 :     newValue = Conf->CompoundAffixFlags + Conf->nCompoundAffixFlag;
    1122              : 
    1123          223 :     setCompoundAffixFlagValue(Conf, newValue, sbuf, val);
    1124              : 
    1125          223 :     Conf->usecompound = true;
    1126          223 :     Conf->nCompoundAffixFlag++;
    1127          223 : }
    1128              : 
    1129              : /*
    1130              :  * Returns a set of affix parameters which correspondence to the set of affix
    1131              :  * flags s.
    1132              :  */
    1133              : static int
    1134          798 : getCompoundAffixFlagValue(IspellDict *Conf, const char *s)
    1135              : {
    1136          798 :     uint32      flag = 0;
    1137              :     CompoundAffixFlag *found,
    1138              :                 key;
    1139              :     char        sflag[BUFSIZ];
    1140              :     const char *flagcur;
    1141              : 
    1142          798 :     if (Conf->nCompoundAffixFlag == 0)
    1143            0 :         return 0;
    1144              : 
    1145          798 :     flagcur = s;
    1146         2414 :     while (*flagcur)
    1147              :     {
    1148         1620 :         getNextFlagFromString(Conf, &flagcur, sflag);
    1149         1616 :         setCompoundAffixFlagValue(Conf, &key, sflag, 0);
    1150              : 
    1151              :         found = (CompoundAffixFlag *)
    1152         1616 :             bsearch(&key, Conf->CompoundAffixFlags,
    1153         1616 :                     Conf->nCompoundAffixFlag, sizeof(CompoundAffixFlag),
    1154              :                     cmpcmdflag);
    1155         1616 :         if (found != NULL)
    1156          374 :             flag |= found->value;
    1157              :     }
    1158              : 
    1159          794 :     return flag;
    1160              : }
    1161              : 
    1162              : /*
    1163              :  * Returns a flag set using the s parameter.
    1164              :  *
    1165              :  * If Conf->useFlagAliases is true then the s parameter is index of the
    1166              :  * Conf->AffixData array and function returns its entry.
    1167              :  * Else function returns the s parameter.
    1168              :  */
    1169              : static const char *
    1170           98 : getAffixFlagSet(IspellDict *Conf, char *s)
    1171              : {
    1172           98 :     if (Conf->useFlagAliases && *s != '\0')
    1173              :     {
    1174              :         int         curaffix;
    1175              :         char       *end;
    1176              : 
    1177           63 :         errno = 0;
    1178           63 :         curaffix = strtol(s, &end, 10);
    1179           63 :         if (s == end || errno == ERANGE)
    1180            0 :             ereport(ERROR,
    1181              :                     (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1182              :                      errmsg("invalid affix alias \"%s\"", s)));
    1183              : 
    1184           63 :         if (curaffix > 0 && curaffix < Conf->nAffixData)
    1185              : 
    1186              :             /*
    1187              :              * Do not subtract 1 from curaffix because empty string was added
    1188              :              * in NIImportOOAffixes
    1189              :              */
    1190           63 :             return Conf->AffixData[curaffix];
    1191            0 :         else if (curaffix > Conf->nAffixData)
    1192            0 :             ereport(ERROR,
    1193              :                     (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1194              :                      errmsg("invalid affix alias \"%s\"", s)));
    1195            0 :         return VoidString;
    1196              :     }
    1197              :     else
    1198           35 :         return s;
    1199              : }
    1200              : 
    1201              : /*
    1202              :  * Import an affix file that follows MySpell or Hunspell format.
    1203              :  *
    1204              :  * Conf: current dictionary.
    1205              :  * filename: path to the .affix file.
    1206              :  */
    1207              : static void
    1208           56 : NIImportOOAffixes(IspellDict *Conf, const char *filename)
    1209              : {
    1210              :     char        type[BUFSIZ],
    1211           56 :                *ptype = NULL;
    1212              :     char        sflag[BUFSIZ];
    1213              :     char        mask[BUFSIZ],
    1214              :                *pmask;
    1215              :     char        find[BUFSIZ],
    1216              :                *pfind;
    1217              :     char        repl[BUFSIZ],
    1218              :                *prepl;
    1219           56 :     bool        isSuffix = false;
    1220           56 :     int         naffix = 0,
    1221           56 :                 curaffix = 0;
    1222           56 :     int         sflaglen = 0;
    1223           56 :     char        flagflags = 0;
    1224              :     tsearch_readline_state trst;
    1225              :     char       *recoded;
    1226              : 
    1227              :     /* read file to find any flag */
    1228           56 :     Conf->usecompound = false;
    1229           56 :     Conf->useFlagAliases = false;
    1230           56 :     Conf->flagMode = FM_CHAR;
    1231              : 
    1232           56 :     if (!tsearch_readline_begin(&trst, filename))
    1233            0 :         ereport(ERROR,
    1234              :                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1235              :                  errmsg("could not open affix file \"%s\": %m",
    1236              :                         filename)));
    1237              : 
    1238         2194 :     while ((recoded = tsearch_readline(&trst)) != NULL)
    1239              :     {
    1240         2138 :         if (*recoded == '\0' || isspace((unsigned char) *recoded) || t_iseq(recoded, '#'))
    1241              :         {
    1242          649 :             pfree(recoded);
    1243          649 :             continue;
    1244              :         }
    1245              : 
    1246         1489 :         if (STRNCMP(recoded, "COMPOUNDFLAG") == 0)
    1247           56 :             addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDFLAG"),
    1248              :                                       FF_COMPOUNDFLAG);
    1249         1433 :         else if (STRNCMP(recoded, "COMPOUNDBEGIN") == 0)
    1250           21 :             addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDBEGIN"),
    1251              :                                       FF_COMPOUNDBEGIN);
    1252         1412 :         else if (STRNCMP(recoded, "COMPOUNDLAST") == 0)
    1253            0 :             addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDLAST"),
    1254              :                                       FF_COMPOUNDLAST);
    1255              :         /* COMPOUNDLAST and COMPOUNDEND are synonyms */
    1256         1412 :         else if (STRNCMP(recoded, "COMPOUNDEND") == 0)
    1257           21 :             addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDEND"),
    1258              :                                       FF_COMPOUNDLAST);
    1259         1391 :         else if (STRNCMP(recoded, "COMPOUNDMIDDLE") == 0)
    1260           21 :             addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDMIDDLE"),
    1261              :                                       FF_COMPOUNDMIDDLE);
    1262         1370 :         else if (STRNCMP(recoded, "ONLYINCOMPOUND") == 0)
    1263           56 :             addCompoundAffixFlagValue(Conf, recoded + strlen("ONLYINCOMPOUND"),
    1264              :                                       FF_COMPOUNDONLY);
    1265         1314 :         else if (STRNCMP(recoded, "COMPOUNDPERMITFLAG") == 0)
    1266           21 :             addCompoundAffixFlagValue(Conf,
    1267           21 :                                       recoded + strlen("COMPOUNDPERMITFLAG"),
    1268              :                                       FF_COMPOUNDPERMITFLAG);
    1269         1293 :         else if (STRNCMP(recoded, "COMPOUNDFORBIDFLAG") == 0)
    1270            0 :             addCompoundAffixFlagValue(Conf,
    1271            0 :                                       recoded + strlen("COMPOUNDFORBIDFLAG"),
    1272              :                                       FF_COMPOUNDFORBIDFLAG);
    1273         1293 :         else if (STRNCMP(recoded, "FLAG") == 0)
    1274              :         {
    1275           43 :             char       *s = recoded + strlen("FLAG");
    1276              : 
    1277           86 :             while (*s && isspace((unsigned char) *s))
    1278           43 :                 s += pg_mblen_cstr(s);
    1279              : 
    1280           43 :             if (*s)
    1281              :             {
    1282           43 :                 if (STRNCMP(s, "long") == 0)
    1283           21 :                     Conf->flagMode = FM_LONG;
    1284           22 :                 else if (STRNCMP(s, "num") == 0)
    1285           22 :                     Conf->flagMode = FM_NUM;
    1286            0 :                 else if (STRNCMP(s, "default") != 0)
    1287            0 :                     ereport(ERROR,
    1288              :                             (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1289              :                              errmsg("Ispell dictionary supports only "
    1290              :                                     "\"default\", \"long\", "
    1291              :                                     "and \"num\" flag values")));
    1292              :             }
    1293              :         }
    1294              : 
    1295         1489 :         pfree(recoded);
    1296              :     }
    1297           56 :     tsearch_readline_end(&trst);
    1298              : 
    1299           56 :     if (Conf->nCompoundAffixFlag > 1)
    1300           56 :         qsort(Conf->CompoundAffixFlags, Conf->nCompoundAffixFlag,
    1301              :               sizeof(CompoundAffixFlag), cmpcmdflag);
    1302              : 
    1303           56 :     if (!tsearch_readline_begin(&trst, filename))
    1304            0 :         ereport(ERROR,
    1305              :                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1306              :                  errmsg("could not open affix file \"%s\": %m",
    1307              :                         filename)));
    1308              : 
    1309         2194 :     while ((recoded = tsearch_readline(&trst)) != NULL)
    1310              :     {
    1311              :         int         fields_read;
    1312              : 
    1313         2138 :         if (*recoded == '\0' || isspace((unsigned char) *recoded) || t_iseq(recoded, '#'))
    1314          649 :             goto nextline;
    1315              : 
    1316         1489 :         fields_read = parse_ooaffentry(recoded, type, sflag, find, repl, mask);
    1317              : 
    1318         1489 :         if (ptype)
    1319         1433 :             pfree(ptype);
    1320         1489 :         ptype = lowerstr_ctx(Conf, type);
    1321              : 
    1322              :         /* First try to parse AF parameter (alias compression) */
    1323         1489 :         if (STRNCMP(ptype, "af") == 0)
    1324              :         {
    1325              :             /* First line is the number of aliases */
    1326          252 :             if (!Conf->useFlagAliases)
    1327              :             {
    1328           21 :                 Conf->useFlagAliases = true;
    1329           21 :                 naffix = atoi(sflag);
    1330           21 :                 if (naffix <= 0)
    1331            0 :                     ereport(ERROR,
    1332              :                             (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1333              :                              errmsg("invalid number of flag vector aliases")));
    1334              : 
    1335              :                 /* Also reserve place for empty flag set */
    1336           21 :                 naffix++;
    1337              : 
    1338           21 :                 Conf->AffixData = palloc0_array(const char *, naffix);
    1339           21 :                 Conf->lenAffixData = Conf->nAffixData = naffix;
    1340              : 
    1341              :                 /* Add empty flag set into AffixData */
    1342           21 :                 Conf->AffixData[curaffix] = VoidString;
    1343           21 :                 curaffix++;
    1344              :             }
    1345              :             /* Other lines are aliases */
    1346              :             else
    1347              :             {
    1348          231 :                 if (curaffix < naffix)
    1349              :                 {
    1350          231 :                     Conf->AffixData[curaffix] = cpstrdup(Conf, sflag);
    1351          231 :                     curaffix++;
    1352              :                 }
    1353              :                 else
    1354            0 :                     ereport(ERROR,
    1355              :                             (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1356              :                              errmsg("number of aliases exceeds specified number %d",
    1357              :                                     naffix - 1)));
    1358              :             }
    1359          252 :             goto nextline;
    1360              :         }
    1361              :         /* Else try to parse prefixes and suffixes */
    1362         1237 :         if (fields_read < 4 ||
    1363          998 :             (STRNCMP(ptype, "sfx") != 0 && STRNCMP(ptype, "pfx") != 0))
    1364          239 :             goto nextline;
    1365              : 
    1366          998 :         sflaglen = strlen(sflag);
    1367          998 :         if (sflaglen == 0
    1368          998 :             || (sflaglen > 1 && Conf->flagMode == FM_CHAR)
    1369          998 :             || (sflaglen > 2 && Conf->flagMode == FM_LONG))
    1370            0 :             goto nextline;
    1371              : 
    1372              :         /*--------
    1373              :          * Affix header. For example:
    1374              :          * SFX \ N 1
    1375              :          *--------
    1376              :          */
    1377          998 :         if (fields_read == 4)
    1378              :         {
    1379          499 :             isSuffix = (STRNCMP(ptype, "sfx") == 0);
    1380          499 :             if (t_iseq(find, 'y') || t_iseq(find, 'Y'))
    1381          345 :                 flagflags = FF_CROSSPRODUCT;
    1382              :             else
    1383          154 :                 flagflags = 0;
    1384              :         }
    1385              :         /*--------
    1386              :          * Affix fields. For example:
    1387              :          * SFX \   0    Y/L [^Y]
    1388              :          *--------
    1389              :          */
    1390              :         else
    1391              :         {
    1392              :             char       *ptr;
    1393          499 :             int         aflg = 0;
    1394              : 
    1395              :             /* Get flags after '/' (flags are case sensitive) */
    1396          499 :             if ((ptr = strchr(repl, '/')) != NULL)
    1397           98 :                 aflg |= getCompoundAffixFlagValue(Conf,
    1398              :                                                   getAffixFlagSet(Conf,
    1399              :                                                                   ptr + 1));
    1400              :             /* Get lowercased version of string before '/' */
    1401          499 :             prepl = lowerstr_ctx(Conf, repl);
    1402          499 :             if ((ptr = strchr(prepl, '/')) != NULL)
    1403           98 :                 *ptr = '\0';
    1404          499 :             pfind = lowerstr_ctx(Conf, find);
    1405          499 :             pmask = lowerstr_ctx(Conf, mask);
    1406          499 :             if (t_iseq(find, '0'))
    1407          421 :                 *pfind = '\0';
    1408          499 :             if (t_iseq(repl, '0'))
    1409           22 :                 *prepl = '\0';
    1410              : 
    1411          499 :             NIAddAffix(Conf, sflag, flagflags | aflg, pmask, pfind, prepl,
    1412              :                        isSuffix ? FF_SUFFIX : FF_PREFIX);
    1413          499 :             pfree(prepl);
    1414          499 :             pfree(pfind);
    1415          499 :             pfree(pmask);
    1416              :         }
    1417              : 
    1418         2138 : nextline:
    1419         2138 :         pfree(recoded);
    1420              :     }
    1421              : 
    1422           56 :     tsearch_readline_end(&trst);
    1423           56 :     if (ptype)
    1424           56 :         pfree(ptype);
    1425           56 : }
    1426              : 
    1427              : /*
    1428              :  * import affixes
    1429              :  *
    1430              :  * Note caller must already have applied get_tsearch_config_filename
    1431              :  *
    1432              :  * This function is responsible for parsing ispell ("old format") affix files.
    1433              :  * If we realize that the file contains new-format commands, we pass off the
    1434              :  * work to NIImportOOAffixes(), which will re-read the whole file.
    1435              :  */
    1436              : void
    1437           83 : NIImportAffixes(IspellDict *Conf, const char *filename)
    1438              : {
    1439           83 :     char       *pstr = NULL;
    1440              :     char        flag[BUFSIZ];
    1441              :     char        mask[BUFSIZ];
    1442              :     char        find[BUFSIZ];
    1443              :     char        repl[BUFSIZ];
    1444              :     char       *s;
    1445           83 :     bool        suffixes = false;
    1446           83 :     bool        prefixes = false;
    1447           83 :     char        flagflags = 0;
    1448              :     tsearch_readline_state trst;
    1449           83 :     bool        oldformat = false;
    1450           83 :     char       *recoded = NULL;
    1451              : 
    1452           83 :     if (!tsearch_readline_begin(&trst, filename))
    1453            0 :         ereport(ERROR,
    1454              :                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1455              :                  errmsg("could not open affix file \"%s\": %m",
    1456              :                         filename)));
    1457              : 
    1458           83 :     Conf->usecompound = false;
    1459           83 :     Conf->useFlagAliases = false;
    1460           83 :     Conf->flagMode = FM_CHAR;
    1461              : 
    1462          785 :     while ((recoded = tsearch_readline(&trst)) != NULL)
    1463              :     {
    1464          758 :         pstr = str_tolower(recoded, strlen(recoded), DEFAULT_COLLATION_OID);
    1465              : 
    1466              :         /* Skip comments and empty lines */
    1467          758 :         if (*pstr == '#' || *pstr == '\n')
    1468          243 :             goto nextline;
    1469              : 
    1470          515 :         if (STRNCMP(pstr, "compoundwords") == 0)
    1471              :         {
    1472              :             /* Find case-insensitive L flag in non-lowercased string */
    1473           27 :             s = findchar2(recoded, 'l', 'L');
    1474           27 :             if (s)
    1475              :             {
    1476          135 :                 while (*s && !isspace((unsigned char) *s))
    1477          108 :                     s += pg_mblen_cstr(s);
    1478           54 :                 while (*s && isspace((unsigned char) *s))
    1479           27 :                     s += pg_mblen_cstr(s);
    1480              : 
    1481           27 :                 if (*s && pg_mblen_cstr(s) == 1)
    1482              :                 {
    1483           27 :                     addCompoundAffixFlagValue(Conf, s, FF_COMPOUNDFLAG);
    1484           27 :                     Conf->usecompound = true;
    1485              :                 }
    1486           27 :                 oldformat = true;
    1487           27 :                 goto nextline;
    1488              :             }
    1489              :         }
    1490          488 :         if (STRNCMP(pstr, "suffixes") == 0)
    1491              :         {
    1492           27 :             suffixes = true;
    1493           27 :             prefixes = false;
    1494           27 :             oldformat = true;
    1495           27 :             goto nextline;
    1496              :         }
    1497          461 :         if (STRNCMP(pstr, "prefixes") == 0)
    1498              :         {
    1499           27 :             suffixes = false;
    1500           27 :             prefixes = true;
    1501           27 :             oldformat = true;
    1502           27 :             goto nextline;
    1503              :         }
    1504          434 :         if (STRNCMP(pstr, "flag") == 0)
    1505              :         {
    1506          232 :             s = recoded + 4;    /* we need non-lowercased string */
    1507          232 :             flagflags = 0;
    1508              : 
    1509          464 :             while (*s && isspace((unsigned char) *s))
    1510          232 :                 s += pg_mblen_cstr(s);
    1511              : 
    1512          232 :             if (*s == '*')
    1513              :             {
    1514          135 :                 flagflags |= FF_CROSSPRODUCT;
    1515          135 :                 s++;
    1516              :             }
    1517           97 :             else if (*s == '~')
    1518              :             {
    1519           27 :                 flagflags |= FF_COMPOUNDONLY;
    1520           27 :                 s++;
    1521              :             }
    1522              : 
    1523          232 :             if (*s == '\\')
    1524           27 :                 s++;
    1525              : 
    1526              :             /*
    1527              :              * An old-format flag is a single ASCII character; we expect it to
    1528              :              * be followed by EOL, whitespace, or ':'.  Otherwise this is a
    1529              :              * new-format flag command.
    1530              :              */
    1531          232 :             if (*s && pg_mblen_cstr(s) == 1)
    1532              :             {
    1533          232 :                 flag[0] = *s++;
    1534          232 :                 flag[1] = '\0';
    1535              : 
    1536          232 :                 if (*s == '\0' || *s == '#' || *s == '\n' || *s == ':' ||
    1537           43 :                     isspace((unsigned char) *s))
    1538              :                 {
    1539          189 :                     oldformat = true;
    1540          189 :                     goto nextline;
    1541              :                 }
    1542              :             }
    1543           43 :             goto isnewformat;
    1544              :         }
    1545          202 :         if (STRNCMP(recoded, "COMPOUNDFLAG") == 0 ||
    1546          189 :             STRNCMP(recoded, "COMPOUNDMIN") == 0 ||
    1547          189 :             STRNCMP(recoded, "PFX") == 0 ||
    1548          189 :             STRNCMP(recoded, "SFX") == 0)
    1549           13 :             goto isnewformat;
    1550              : 
    1551          189 :         if ((!suffixes) && (!prefixes))
    1552            0 :             goto nextline;
    1553              : 
    1554          189 :         if (!parse_affentry(pstr, mask, find, repl))
    1555            0 :             goto nextline;
    1556              : 
    1557          189 :         NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX);
    1558              : 
    1559          702 : nextline:
    1560          702 :         pfree(recoded);
    1561          702 :         pfree(pstr);
    1562              :     }
    1563           27 :     tsearch_readline_end(&trst);
    1564           27 :     return;
    1565              : 
    1566           56 : isnewformat:
    1567           56 :     if (oldformat)
    1568            0 :         ereport(ERROR,
    1569              :                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1570              :                  errmsg("affix file contains both old-style and new-style commands")));
    1571           56 :     tsearch_readline_end(&trst);
    1572              : 
    1573           56 :     NIImportOOAffixes(Conf, filename);
    1574              : }
    1575              : 
    1576              : /*
    1577              :  * Merges two affix flag sets and stores a new affix flag set into
    1578              :  * Conf->AffixData.
    1579              :  *
    1580              :  * Returns index of a new affix flag set.
    1581              :  */
    1582              : static int
    1583           41 : MergeAffix(IspellDict *Conf, int a1, int a2)
    1584              : {
    1585              :     const char **ptr;
    1586              : 
    1587              :     Assert(a1 < Conf->nAffixData && a2 < Conf->nAffixData);
    1588              : 
    1589              :     /* Do not merge affix flags if one of affix flags is empty */
    1590           41 :     if (*Conf->AffixData[a1] == '\0')
    1591            0 :         return a2;
    1592           41 :     else if (*Conf->AffixData[a2] == '\0')
    1593            0 :         return a1;
    1594              : 
    1595              :     /* Double the size of AffixData if there's not enough space */
    1596           41 :     if (Conf->nAffixData + 1 >= Conf->lenAffixData)
    1597              :     {
    1598           41 :         Conf->lenAffixData *= 2;
    1599           41 :         Conf->AffixData = (const char **) repalloc(Conf->AffixData,
    1600           41 :                                                    sizeof(char *) * Conf->lenAffixData);
    1601              :     }
    1602              : 
    1603           41 :     ptr = Conf->AffixData + Conf->nAffixData;
    1604           41 :     if (Conf->flagMode == FM_NUM)
    1605              :     {
    1606           18 :         char       *p = cpalloc(strlen(Conf->AffixData[a1]) +
    1607              :                                 strlen(Conf->AffixData[a2]) +
    1608              :                                 1 /* comma */ + 1 /* \0 */ );
    1609              : 
    1610           18 :         sprintf(p, "%s,%s", Conf->AffixData[a1], Conf->AffixData[a2]);
    1611           18 :         *ptr = p;
    1612              :     }
    1613              :     else
    1614              :     {
    1615           23 :         char       *p = cpalloc(strlen(Conf->AffixData[a1]) +
    1616              :                                 strlen(Conf->AffixData[a2]) +
    1617              :                                 1 /* \0 */ );
    1618              : 
    1619           23 :         sprintf(p, "%s%s", Conf->AffixData[a1], Conf->AffixData[a2]);
    1620           23 :         *ptr = p;
    1621              :     }
    1622           41 :     ptr++;
    1623           41 :     *ptr = NULL;
    1624           41 :     Conf->nAffixData++;
    1625              : 
    1626           41 :     return Conf->nAffixData - 1;
    1627              : }
    1628              : 
    1629              : /*
    1630              :  * Returns a set of affix parameters which correspondence to the set of affix
    1631              :  * flags with the given index.
    1632              :  */
    1633              : static uint32
    1634          700 : makeCompoundFlags(IspellDict *Conf, int affix)
    1635              : {
    1636              :     Assert(affix < Conf->nAffixData);
    1637              : 
    1638          700 :     return (getCompoundAffixFlagValue(Conf, Conf->AffixData[affix]) &
    1639              :             FF_COMPOUNDFLAGMASK);
    1640              : }
    1641              : 
    1642              : /*
    1643              :  * Makes a prefix tree for the given level.
    1644              :  *
    1645              :  * Conf: current dictionary.
    1646              :  * low: lower index of the Conf->Spell array.
    1647              :  * high: upper index of the Conf->Spell array.
    1648              :  * level: current prefix tree level.
    1649              :  */
    1650              : static SPNode *
    1651         2802 : mkSPNode(IspellDict *Conf, int low, int high, int level)
    1652              : {
    1653              :     int         i;
    1654         2802 :     int         nchar = 0;
    1655         2802 :     char        lastchar = '\0';
    1656              :     SPNode     *rs;
    1657              :     SPNodeData *data;
    1658         2802 :     int         lownew = low;
    1659              : 
    1660         9209 :     for (i = low; i < high; i++)
    1661         6407 :         if (Conf->Spell[i]->p.d.len > level && lastchar != Conf->Spell[i]->word[level])
    1662              :         {
    1663         2747 :             nchar++;
    1664         2747 :             lastchar = Conf->Spell[i]->word[level];
    1665              :         }
    1666              : 
    1667         2802 :     if (!nchar)
    1668          401 :         return NULL;
    1669              : 
    1670         2401 :     rs = (SPNode *) cpalloc0(SPNHDRSZ + nchar * sizeof(SPNodeData));
    1671         2401 :     rs->length = nchar;
    1672         2401 :     data = rs->data;
    1673              : 
    1674         2401 :     lastchar = '\0';
    1675         8121 :     for (i = low; i < high; i++)
    1676         5732 :         if (Conf->Spell[i]->p.d.len > level)
    1677              :         {
    1678         4118 :             if (lastchar != Conf->Spell[i]->word[level])
    1679              :             {
    1680         2739 :                 if (lastchar)
    1681              :                 {
    1682              :                     /* Next level of the prefix tree */
    1683          338 :                     data->node = mkSPNode(Conf, lownew, i, level + 1);
    1684          330 :                     lownew = i;
    1685          330 :                     data++;
    1686              :                 }
    1687         2731 :                 lastchar = Conf->Spell[i]->word[level];
    1688              :             }
    1689         4110 :             data->val = ((uint8 *) (Conf->Spell[i]->word))[level];
    1690         4110 :             if (Conf->Spell[i]->p.d.len == level + 1)
    1691              :             {
    1692          659 :                 bool        clearCompoundOnly = false;
    1693              : 
    1694          659 :                 if (data->isword && data->affix != Conf->Spell[i]->p.d.affix)
    1695              :                 {
    1696              :                     /*
    1697              :                      * MergeAffix called a few times. If one of word is
    1698              :                      * allowed to be in compound word and another isn't, then
    1699              :                      * clear FF_COMPOUNDONLY flag.
    1700              :                      */
    1701              : 
    1702           82 :                     clearCompoundOnly = (FF_COMPOUNDONLY & data->compoundflag
    1703           41 :                                          & makeCompoundFlags(Conf, Conf->Spell[i]->p.d.affix))
    1704              :                         ? false : true;
    1705           41 :                     data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i]->p.d.affix);
    1706              :                 }
    1707              :                 else
    1708          618 :                     data->affix = Conf->Spell[i]->p.d.affix;
    1709          659 :                 data->isword = 1;
    1710              : 
    1711          659 :                 data->compoundflag = makeCompoundFlags(Conf, data->affix);
    1712              : 
    1713          655 :                 if ((data->compoundflag & FF_COMPOUNDONLY) &&
    1714            0 :                     (data->compoundflag & FF_COMPOUNDFLAG) == 0)
    1715            0 :                     data->compoundflag |= FF_COMPOUNDFLAG;
    1716              : 
    1717          655 :                 if (clearCompoundOnly)
    1718           41 :                     data->compoundflag &= ~FF_COMPOUNDONLY;
    1719              :             }
    1720              :         }
    1721              : 
    1722              :     /* Next level of the prefix tree */
    1723         2389 :     data->node = mkSPNode(Conf, lownew, high, level + 1);
    1724              : 
    1725         2385 :     return rs;
    1726              : }
    1727              : 
    1728              : /*
    1729              :  * Builds the Conf->Dictionary tree and AffixData from the imported dictionary
    1730              :  * and affixes.
    1731              :  */
    1732              : void
    1733           83 : NISortDictionary(IspellDict *Conf)
    1734              : {
    1735              :     int         i;
    1736              :     int         naffix;
    1737              :     int         curaffix;
    1738              : 
    1739              :     /* compress affixes */
    1740              : 
    1741              :     /*
    1742              :      * If we use flag aliases then we need to use Conf->AffixData filled in
    1743              :      * the NIImportOOAffixes().
    1744              :      */
    1745           83 :     if (Conf->useFlagAliases)
    1746              :     {
    1747          164 :         for (i = 0; i < Conf->nspell; i++)
    1748              :         {
    1749              :             char       *end;
    1750              : 
    1751          151 :             if (*Conf->Spell[i]->p.flag != '\0')
    1752              :             {
    1753          138 :                 errno = 0;
    1754          138 :                 curaffix = strtol(Conf->Spell[i]->p.flag, &end, 10);
    1755          138 :                 if (Conf->Spell[i]->p.flag == end || errno == ERANGE)
    1756            4 :                     ereport(ERROR,
    1757              :                             (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1758              :                              errmsg("invalid affix alias \"%s\"",
    1759              :                                     Conf->Spell[i]->p.flag)));
    1760          134 :                 if (curaffix < 0 || curaffix >= Conf->nAffixData)
    1761            4 :                     ereport(ERROR,
    1762              :                             (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1763              :                              errmsg("invalid affix alias \"%s\"",
    1764              :                                     Conf->Spell[i]->p.flag)));
    1765          130 :                 if (*end != '\0' && !isdigit((unsigned char) *end) && !isspace((unsigned char) *end))
    1766            0 :                     ereport(ERROR,
    1767              :                             (errcode(ERRCODE_CONFIG_FILE_ERROR),
    1768              :                              errmsg("invalid affix alias \"%s\"",
    1769              :                                     Conf->Spell[i]->p.flag)));
    1770              :             }
    1771              :             else
    1772              :             {
    1773              :                 /*
    1774              :                  * If Conf->Spell[i]->p.flag is empty, then get empty value of
    1775              :                  * Conf->AffixData (0 index).
    1776              :                  */
    1777           13 :                 curaffix = 0;
    1778              :             }
    1779              : 
    1780          143 :             Conf->Spell[i]->p.d.affix = curaffix;
    1781          143 :             Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
    1782              :         }
    1783              :     }
    1784              :     /* Otherwise fill Conf->AffixData here */
    1785              :     else
    1786              :     {
    1787              :         /* Count the number of different flags used in the dictionary */
    1788           62 :         qsort(Conf->Spell, Conf->nspell, sizeof(SPELL *),
    1789              :               cmpspellaffix);
    1790              : 
    1791           62 :         naffix = 0;
    1792          606 :         for (i = 0; i < Conf->nspell; i++)
    1793              :         {
    1794          544 :             if (i == 0 ||
    1795          482 :                 strcmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag) != 0)
    1796          482 :                 naffix++;
    1797              :         }
    1798              : 
    1799              :         /*
    1800              :          * Fill in Conf->AffixData with the affixes that were used in the
    1801              :          * dictionary. Replace textual flag-field of Conf->Spell entries with
    1802              :          * indexes into Conf->AffixData array.
    1803              :          */
    1804           62 :         Conf->AffixData = palloc0_array(const char *, naffix);
    1805              : 
    1806           62 :         curaffix = -1;
    1807          606 :         for (i = 0; i < Conf->nspell; i++)
    1808              :         {
    1809          544 :             if (i == 0 ||
    1810          482 :                 strcmp(Conf->Spell[i]->p.flag, Conf->AffixData[curaffix]) != 0)
    1811              :             {
    1812          482 :                 curaffix++;
    1813              :                 Assert(curaffix < naffix);
    1814          482 :                 Conf->AffixData[curaffix] = cpstrdup(Conf,
    1815          482 :                                                      Conf->Spell[i]->p.flag);
    1816              :             }
    1817              : 
    1818          544 :             Conf->Spell[i]->p.d.affix = curaffix;
    1819          544 :             Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
    1820              :         }
    1821              : 
    1822           62 :         Conf->lenAffixData = Conf->nAffixData = naffix;
    1823              :     }
    1824              : 
    1825              :     /* Start build a prefix tree */
    1826           75 :     qsort(Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell);
    1827           75 :     Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
    1828           71 : }
    1829              : 
    1830              : /*
    1831              :  * Makes a prefix tree for the given level using the repl string of an affix
    1832              :  * rule. Affixes with empty replace string do not include in the prefix tree.
    1833              :  * This affixes are included by mkVoidAffix().
    1834              :  *
    1835              :  * Conf: current dictionary.
    1836              :  * low: lower index of the Conf->Affix array.
    1837              :  * high: upper index of the Conf->Affix array.
    1838              :  * level: current prefix tree level.
    1839              :  * type: FF_SUFFIX or FF_PREFIX.
    1840              :  */
    1841              : static AffixNode *
    1842         1198 : mkANode(IspellDict *Conf, int low, int high, int level, int type)
    1843              : {
    1844              :     int         i;
    1845         1198 :     int         nchar = 0;
    1846         1198 :     uint8       lastchar = '\0';
    1847              :     AffixNode  *rs;
    1848              :     AffixNodeData *data;
    1849         1198 :     int         lownew = low;
    1850              :     int         naff;
    1851              :     AFFIX     **aff;
    1852              : 
    1853         3224 :     for (i = low; i < high; i++)
    1854         2026 :         if (Conf->Affix[i].replen > level && lastchar != GETCHAR(Conf->Affix + i, level, type))
    1855              :         {
    1856         1056 :             nchar++;
    1857         1056 :             lastchar = GETCHAR(Conf->Affix + i, level, type);
    1858              :         }
    1859              : 
    1860         1198 :     if (!nchar)
    1861          457 :         return NULL;
    1862              : 
    1863          741 :     aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1));
    1864          741 :     naff = 0;
    1865              : 
    1866          741 :     rs = (AffixNode *) cpalloc0(ANHRDSZ + nchar * sizeof(AffixNodeData));
    1867          741 :     rs->length = nchar;
    1868          741 :     data = rs->data;
    1869              : 
    1870          741 :     lastchar = '\0';
    1871         2195 :     for (i = low; i < high; i++)
    1872         1454 :         if (Conf->Affix[i].replen > level)
    1873              :         {
    1874         1224 :             if (lastchar != GETCHAR(Conf->Affix + i, level, type))
    1875              :             {
    1876         1056 :                 if (lastchar)
    1877              :                 {
    1878              :                     /* Next level of the prefix tree */
    1879          315 :                     data->node = mkANode(Conf, lownew, i, level + 1, type);
    1880          315 :                     if (naff)
    1881              :                     {
    1882           71 :                         data->naff = naff;
    1883           71 :                         data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
    1884           71 :                         memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
    1885           71 :                         naff = 0;
    1886              :                     }
    1887          315 :                     data++;
    1888          315 :                     lownew = i;
    1889              :                 }
    1890         1056 :                 lastchar = GETCHAR(Conf->Affix + i, level, type);
    1891              :             }
    1892         1224 :             data->val = GETCHAR(Conf->Affix + i, level, type);
    1893         1224 :             if (Conf->Affix[i].replen == level + 1)
    1894              :             {                   /* affix stopped */
    1895          554 :                 aff[naff++] = Conf->Affix + i;
    1896              :             }
    1897              :         }
    1898              : 
    1899              :     /* Next level of the prefix tree */
    1900          741 :     data->node = mkANode(Conf, lownew, high, level + 1, type);
    1901          741 :     if (naff)
    1902              :     {
    1903          457 :         data->naff = naff;
    1904          457 :         data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
    1905          457 :         memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
    1906          457 :         naff = 0;
    1907              :     }
    1908              : 
    1909          741 :     pfree(aff);
    1910              : 
    1911          741 :     return rs;
    1912              : }
    1913              : 
    1914              : /*
    1915              :  * Makes the root void node in the prefix tree. The root void node is created
    1916              :  * for affixes which have empty replace string ("repl" field).
    1917              :  */
    1918              : static void
    1919          142 : mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix)
    1920              : {
    1921              :     int         i,
    1922          142 :                 cnt = 0;
    1923          142 :     int         start = (issuffix) ? startsuffix : 0;
    1924          142 :     int         end = (issuffix) ? Conf->naffixes : startsuffix;
    1925          142 :     AffixNode  *Affix = (AffixNode *) palloc0(ANHRDSZ + sizeof(AffixNodeData));
    1926              : 
    1927          142 :     Affix->length = 1;
    1928          142 :     Affix->isvoid = 1;
    1929              : 
    1930          142 :     if (issuffix)
    1931              :     {
    1932           71 :         Affix->data->node = Conf->Suffix;
    1933           71 :         Conf->Suffix = Affix;
    1934              :     }
    1935              :     else
    1936              :     {
    1937           71 :         Affix->data->node = Conf->Prefix;
    1938           71 :         Conf->Prefix = Affix;
    1939              :     }
    1940              : 
    1941              :     /* Count affixes with empty replace string */
    1942          714 :     for (i = start; i < end; i++)
    1943          572 :         if (Conf->Affix[i].replen == 0)
    1944           18 :             cnt++;
    1945              : 
    1946              :     /* There is not affixes with empty replace string */
    1947          142 :     if (cnt == 0)
    1948          124 :         return;
    1949              : 
    1950           18 :     Affix->data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * cnt);
    1951           18 :     Affix->data->naff = (uint32) cnt;
    1952              : 
    1953           18 :     cnt = 0;
    1954          144 :     for (i = start; i < end; i++)
    1955          126 :         if (Conf->Affix[i].replen == 0)
    1956              :         {
    1957           18 :             Affix->data->aff[cnt] = Conf->Affix + i;
    1958           18 :             cnt++;
    1959              :         }
    1960              : }
    1961              : 
    1962              : /*
    1963              :  * Checks if the affixflag is used by dictionary. Conf->AffixData does not
    1964              :  * contain affixflag if this flag is not used actually by the .dict file.
    1965              :  *
    1966              :  * Conf: current dictionary.
    1967              :  * affixflag: affix flag.
    1968              :  *
    1969              :  * Returns true if the Conf->AffixData array contains affixflag, otherwise
    1970              :  * returns false.
    1971              :  */
    1972              : static bool
    1973           97 : isAffixInUse(IspellDict *Conf, const char *affixflag)
    1974              : {
    1975              :     int         i;
    1976              : 
    1977          710 :     for (i = 0; i < Conf->nAffixData; i++)
    1978          695 :         if (IsAffixFlagInUse(Conf, i, affixflag))
    1979           82 :             return true;
    1980              : 
    1981           15 :     return false;
    1982              : }
    1983              : 
    1984              : /*
    1985              :  * Builds Conf->Prefix and Conf->Suffix trees from the imported affixes.
    1986              :  */
    1987              : void
    1988           71 : NISortAffixes(IspellDict *Conf)
    1989              : {
    1990              :     AFFIX      *Affix;
    1991              :     size_t      i;
    1992              :     CMPDAffix  *ptr;
    1993           71 :     int         firstsuffix = Conf->naffixes;
    1994              : 
    1995           71 :     if (Conf->naffixes == 0)
    1996            0 :         return;
    1997              : 
    1998              :     /* Store compound affixes in the Conf->CompoundAffix array */
    1999           71 :     if (Conf->naffixes > 1)
    2000           71 :         qsort(Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix);
    2001           71 :     Conf->CompoundAffix = ptr = palloc_array(CMPDAffix, Conf->naffixes);
    2002           71 :     ptr->affix = NULL;
    2003              : 
    2004          643 :     for (i = 0; i < Conf->naffixes; i++)
    2005              :     {
    2006          572 :         Affix = &(((AFFIX *) Conf->Affix)[i]);
    2007          572 :         if (Affix->type == FF_SUFFIX && i < firstsuffix)
    2008           71 :             firstsuffix = i;
    2009              : 
    2010          669 :         if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 &&
    2011           97 :             isAffixInUse(Conf, Affix->flag))
    2012              :         {
    2013           82 :             bool        issuffix = (Affix->type == FF_SUFFIX);
    2014              : 
    2015           82 :             if (ptr == Conf->CompoundAffix ||
    2016           52 :                 issuffix != (ptr - 1)->issuffix ||
    2017           26 :                 strbncmp((const unsigned char *) (ptr - 1)->affix,
    2018           26 :                          (const unsigned char *) Affix->repl,
    2019           26 :                          (ptr - 1)->len))
    2020              :             {
    2021              :                 /* leave only unique and minimal suffixes */
    2022           69 :                 ptr->affix = Affix->repl;
    2023           69 :                 ptr->len = Affix->replen;
    2024           69 :                 ptr->issuffix = issuffix;
    2025           69 :                 ptr++;
    2026              :             }
    2027              :         }
    2028              :     }
    2029           71 :     ptr->affix = NULL;
    2030           71 :     Conf->CompoundAffix = (CMPDAffix *) repalloc(Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr - Conf->CompoundAffix + 1));
    2031              : 
    2032              :     /* Start build a prefix tree */
    2033           71 :     Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX);
    2034           71 :     Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX);
    2035           71 :     mkVoidAffix(Conf, true, firstsuffix);
    2036           71 :     mkVoidAffix(Conf, false, firstsuffix);
    2037              : }
    2038              : 
    2039              : static AffixNodeData *
    2040         3850 : FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type)
    2041              : {
    2042              :     AffixNodeData *StopLow,
    2043              :                *StopHigh,
    2044              :                *StopMiddle;
    2045              :     uint8 symbol;
    2046              : 
    2047         3850 :     if (node->isvoid)
    2048              :     {                           /* search void affixes */
    2049         3350 :         if (node->data->naff)
    2050          285 :             return node->data;
    2051         3065 :         node = node->data->node;
    2052              :     }
    2053              : 
    2054         4485 :     while (node && *level < wrdlen)
    2055              :     {
    2056         4465 :         StopLow = node->data;
    2057         4465 :         StopHigh = node->data + node->length;
    2058         9855 :         while (StopLow < StopHigh)
    2059              :         {
    2060         7395 :             StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
    2061         7395 :             symbol = GETWCHAR(word, wrdlen, *level, type);
    2062              : 
    2063         7395 :             if (StopMiddle->val == symbol)
    2064              :             {
    2065         2005 :                 (*level)++;
    2066         2005 :                 if (StopMiddle->naff)
    2067         1085 :                     return StopMiddle;
    2068          920 :                 node = StopMiddle->node;
    2069          920 :                 break;
    2070              :             }
    2071         5390 :             else if (StopMiddle->val < symbol)
    2072         1340 :                 StopLow = StopMiddle + 1;
    2073              :             else
    2074         4050 :                 StopHigh = StopMiddle;
    2075              :         }
    2076         3380 :         if (StopLow >= StopHigh)
    2077         2460 :             break;
    2078              :     }
    2079         2480 :     return NULL;
    2080              : }
    2081              : 
    2082              : /*
    2083              :  * Checks to see if affix applies to word, transforms word if so.
    2084              :  * The transformation consists of replacing Affix->replen leading or
    2085              :  * trailing bytes with the Affix->find string.
    2086              :  *
    2087              :  * word: input word
    2088              :  * len: length of input word
    2089              :  * Affix: affix to consider
    2090              :  * flagflags: context flags showing whether we are handling a compound word
    2091              :  * newword: output buffer (MUST be of length 2 * MAXNORMLEN)
    2092              :  * baselen: input/output argument
    2093              :  *
    2094              :  * If baselen isn't NULL, then *baselen is used to return the length of
    2095              :  * the non-changed part of the word when applying a suffix, and is used
    2096              :  * to detect whether the input contained only a prefix and suffix when
    2097              :  * later applying a prefix.
    2098              :  *
    2099              :  * Returns newword on success, or NULL if the affix can't be applied.
    2100              :  * On success, the modified word is stored into newword.
    2101              :  */
    2102              : static char *
    2103         1530 : CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *newword, int *baselen)
    2104              : {
    2105              :     size_t      keeplen,
    2106              :                 findlen;
    2107              : 
    2108              :     /*
    2109              :      * Check compound allow flags
    2110              :      */
    2111              : 
    2112         1530 :     if (flagflags == 0)
    2113              :     {
    2114         1055 :         if (Affix->flagflags & FF_COMPOUNDONLY)
    2115          110 :             return NULL;
    2116              :     }
    2117          475 :     else if (flagflags & FF_COMPOUNDBEGIN)
    2118              :     {
    2119            0 :         if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
    2120            0 :             return NULL;
    2121            0 :         if ((Affix->flagflags & FF_COMPOUNDBEGIN) == 0)
    2122            0 :             if (Affix->type == FF_SUFFIX)
    2123            0 :                 return NULL;
    2124              :     }
    2125          475 :     else if (flagflags & FF_COMPOUNDMIDDLE)
    2126              :     {
    2127          340 :         if ((Affix->flagflags & FF_COMPOUNDMIDDLE) == 0 ||
    2128          190 :             (Affix->flagflags & FF_COMPOUNDFORBIDFLAG))
    2129          150 :             return NULL;
    2130              :     }
    2131          135 :     else if (flagflags & FF_COMPOUNDLAST)
    2132              :     {
    2133          135 :         if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
    2134            0 :             return NULL;
    2135          135 :         if ((Affix->flagflags & FF_COMPOUNDLAST) == 0)
    2136          125 :             if (Affix->type == FF_PREFIX)
    2137            0 :                 return NULL;
    2138              :     }
    2139              : 
    2140              :     /*
    2141              :      * Protect against output buffer overrun (len < Affix->replen would be
    2142              :      * caller error, but check anyway)
    2143              :      */
    2144              :     Assert(len == strlen(word));
    2145         1270 :     if (len < Affix->replen)
    2146            0 :         return NULL;
    2147         1270 :     keeplen = len - Affix->replen;   /* how much of word we will keep */
    2148         1270 :     findlen = strlen(Affix->find);
    2149         1270 :     if (keeplen + findlen >= 2 * MAXNORMLEN)
    2150            0 :         return NULL;
    2151              : 
    2152              :     /*
    2153              :      * make replace pattern of affix
    2154              :      */
    2155         1270 :     if (Affix->type == FF_SUFFIX)
    2156              :     {
    2157          870 :         memcpy(newword, word, keeplen);
    2158          870 :         strcpy(newword + keeplen, Affix->find);
    2159          870 :         if (baselen)            /* store length of non-changed part of word */
    2160          870 :             *baselen = keeplen;
    2161              :     }
    2162              :     else
    2163              :     {
    2164              :         /*
    2165              :          * if prefix is an all non-changed part's length then all word
    2166              :          * contains only prefix and suffix, so out
    2167              :          */
    2168          400 :         if (baselen && *baselen + findlen <= Affix->replen)
    2169            0 :             return NULL;
    2170          400 :         memcpy(newword, Affix->find, findlen);
    2171          400 :         strcpy(newword + findlen, word + Affix->replen);
    2172              :     }
    2173              : 
    2174              :     /*
    2175              :      * check resulting word
    2176              :      */
    2177         1270 :     if (Affix->issimple)
    2178          400 :         return newword;
    2179          870 :     else if (Affix->isregis)
    2180              :     {
    2181          590 :         if (RS_execute(&(Affix->reg.regis), newword))
    2182          560 :             return newword;
    2183              :     }
    2184              :     else
    2185              :     {
    2186              :         pg_wchar   *data;
    2187              :         size_t      data_len;
    2188              :         int         newword_len;
    2189              : 
    2190              :         /* Convert data string to wide characters */
    2191          280 :         newword_len = strlen(newword);
    2192          280 :         data = palloc_array(pg_wchar, newword_len + 1);
    2193          280 :         data_len = pg_mb2wchar_with_len(newword, data, newword_len);
    2194              : 
    2195          280 :         if (pg_regexec(Affix->reg.pregex, data, data_len,
    2196              :                        0, NULL, 0, NULL, 0) == REG_OKAY)
    2197              :         {
    2198          280 :             pfree(data);
    2199          280 :             return newword;
    2200              :         }
    2201            0 :         pfree(data);
    2202              :     }
    2203              : 
    2204           30 :     return NULL;
    2205              : }
    2206              : 
    2207              : static int
    2208          450 : addToResult(char **forms, char **cur, char *word)
    2209              : {
    2210          450 :     if (cur - forms >= MAX_NORM - 1)
    2211            0 :         return 0;
    2212          450 :     if (forms == cur || strcmp(word, *(cur - 1)) != 0)
    2213              :     {
    2214          450 :         *cur = pstrdup(word);
    2215          450 :         *(cur + 1) = NULL;
    2216          450 :         return 1;
    2217              :     }
    2218              : 
    2219            0 :     return 0;
    2220              : }
    2221              : 
    2222              : static char **
    2223         1255 : NormalizeSubWord(IspellDict *Conf, const char *word, int flag)
    2224              : {
    2225         1255 :     AffixNodeData *suffix = NULL,
    2226         1255 :                *prefix = NULL;
    2227         1255 :     int         slevel = 0,
    2228         1255 :                 plevel = 0;
    2229         1255 :     int         wrdlen = strlen(word),
    2230              :                 swrdlen;
    2231              :     char      **forms;
    2232              :     char      **cur;
    2233         1255 :     char        newword[2 * MAXNORMLEN] = "";
    2234         1255 :     char        pnewword[2 * MAXNORMLEN] = "";
    2235         1255 :     AffixNode  *snode = Conf->Suffix,
    2236              :                *pnode;
    2237              :     int         i,
    2238              :                 j;
    2239              : 
    2240         1255 :     if (wrdlen > MAXNORMLEN)
    2241            0 :         return NULL;
    2242         1255 :     cur = forms = palloc_array(char *, MAX_NORM);
    2243         1255 :     *cur = NULL;
    2244              : 
    2245              : 
    2246              :     /* Check that the word itself is normal form */
    2247         1255 :     if (FindWord(Conf, word, VoidString, flag))
    2248              :     {
    2249          390 :         *cur = pstrdup(word);
    2250          390 :         cur++;
    2251          390 :         *cur = NULL;
    2252              :     }
    2253              : 
    2254              :     /* Find all other NORMAL forms of the 'word' (check only prefix) */
    2255         1255 :     pnode = Conf->Prefix;
    2256         1255 :     plevel = 0;
    2257         1435 :     while (pnode)
    2258              :     {
    2259         1255 :         prefix = FindAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX);
    2260         1255 :         if (!prefix)
    2261         1075 :             break;
    2262          360 :         for (j = 0; j < prefix->naff; j++)
    2263              :         {
    2264          180 :             if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword, NULL))
    2265              :             {
    2266              :                 /* prefix success */
    2267          160 :                 if (FindWord(Conf, newword, prefix->aff[j]->flag, flag))
    2268           40 :                     cur += addToResult(forms, cur, newword);
    2269              :             }
    2270              :         }
    2271          180 :         pnode = prefix->node;
    2272              :     }
    2273              : 
    2274              :     /*
    2275              :      * Find all other NORMAL forms of the 'word' (check suffix and then
    2276              :      * prefix)
    2277              :      */
    2278         2165 :     while (snode)
    2279              :     {
    2280         1755 :         int         baselen = 0;
    2281              : 
    2282              :         /* find possible suffix */
    2283         1755 :         suffix = FindAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX);
    2284         1755 :         if (!suffix)
    2285          845 :             break;
    2286              :         /* foreach suffix check affix */
    2287         1980 :         for (i = 0; i < suffix->naff; i++)
    2288              :         {
    2289         1070 :             if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword, &baselen))
    2290              :             {
    2291              :                 /* suffix success */
    2292          840 :                 if (FindWord(Conf, newword, suffix->aff[i]->flag, flag))
    2293          230 :                     cur += addToResult(forms, cur, newword);
    2294              : 
    2295              :                 /* now we will look changed word with prefixes */
    2296          840 :                 pnode = Conf->Prefix;
    2297          840 :                 plevel = 0;
    2298          840 :                 swrdlen = strlen(newword);
    2299         1120 :                 while (pnode)
    2300              :                 {
    2301          840 :                     prefix = FindAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX);
    2302          840 :                     if (!prefix)
    2303          560 :                         break;
    2304          560 :                     for (j = 0; j < prefix->naff; j++)
    2305              :                     {
    2306          280 :                         if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword, &baselen))
    2307              :                         {
    2308              :                             /* prefix success */
    2309          480 :                             const char *ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ?
    2310          240 :                                 VoidString : prefix->aff[j]->flag;
    2311              : 
    2312          240 :                             if (FindWord(Conf, pnewword, ff, flag))
    2313          180 :                                 cur += addToResult(forms, cur, pnewword);
    2314              :                         }
    2315              :                     }
    2316          280 :                     pnode = prefix->node;
    2317              :                 }
    2318              :             }
    2319              :         }
    2320              : 
    2321          910 :         snode = suffix->node;
    2322              :     }
    2323              : 
    2324         1255 :     if (cur == forms)
    2325              :     {
    2326          555 :         pfree(forms);
    2327          555 :         return NULL;
    2328              :     }
    2329          700 :     return forms;
    2330              : }
    2331              : 
    2332              : typedef struct SplitVar
    2333              : {
    2334              :     int         nstem;
    2335              :     int         lenstem;
    2336              :     char      **stem;
    2337              :     struct SplitVar *next;
    2338              : } SplitVar;
    2339              : 
    2340              : static int
    2341         5050 : CheckCompoundAffixes(CMPDAffix **ptr, const char *word, int len, bool CheckInPlace)
    2342              : {
    2343              :     bool        issuffix;
    2344              : 
    2345              :     /* in case CompoundAffix is null: */
    2346         5050 :     if (*ptr == NULL)
    2347            0 :         return -1;
    2348              : 
    2349         5050 :     if (CheckInPlace)
    2350              :     {
    2351         9640 :         while ((*ptr)->affix)
    2352              :         {
    2353         5370 :             if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0)
    2354              :             {
    2355           50 :                 len = (*ptr)->len;
    2356           50 :                 issuffix = (*ptr)->issuffix;
    2357           50 :                 (*ptr)++;
    2358           50 :                 return (issuffix) ? len : 0;
    2359              :             }
    2360         5320 :             (*ptr)++;
    2361              :         }
    2362              :     }
    2363              :     else
    2364              :     {
    2365              :         const char *affbegin;
    2366              : 
    2367         1410 :         while ((*ptr)->affix)
    2368              :         {
    2369          785 :             if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL)
    2370              :             {
    2371          105 :                 len = (*ptr)->len + (affbegin - word);
    2372          105 :                 issuffix = (*ptr)->issuffix;
    2373          105 :                 (*ptr)++;
    2374          105 :                 return (issuffix) ? len : 0;
    2375              :             }
    2376          680 :             (*ptr)++;
    2377              :         }
    2378              :     }
    2379         4895 :     return -1;
    2380              : }
    2381              : 
    2382              : static SplitVar *
    2383         1175 : CopyVar(SplitVar *s, int makedup)
    2384              : {
    2385         1175 :     SplitVar   *v = palloc_object(SplitVar);
    2386              : 
    2387         1175 :     v->next = NULL;
    2388         1175 :     if (s)
    2389              :     {
    2390              :         int         i;
    2391              : 
    2392          550 :         v->lenstem = s->lenstem;
    2393          550 :         v->stem = palloc_array(char *, v->lenstem);
    2394          550 :         v->nstem = s->nstem;
    2395          835 :         for (i = 0; i < s->nstem; i++)
    2396          285 :             v->stem[i] = (makedup) ? pstrdup(s->stem[i]) : s->stem[i];
    2397              :     }
    2398              :     else
    2399              :     {
    2400          625 :         v->lenstem = 16;
    2401          625 :         v->stem = palloc_array(char *, v->lenstem);
    2402          625 :         v->nstem = 0;
    2403              :     }
    2404         1175 :     return v;
    2405              : }
    2406              : 
    2407              : static void
    2408         1575 : AddStem(SplitVar *v, char *word)
    2409              : {
    2410         1575 :     if (v->nstem >= v->lenstem)
    2411              :     {
    2412            0 :         v->lenstem *= 2;
    2413            0 :         v->stem = (char **) repalloc(v->stem, sizeof(char *) * v->lenstem);
    2414              :     }
    2415              : 
    2416         1575 :     v->stem[v->nstem] = word;
    2417         1575 :     v->nstem++;
    2418         1575 : }
    2419              : 
    2420              : static SplitVar *
    2421         1100 : SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, const char *word, int wordlen, int startpos, int minpos)
    2422              : {
    2423         1100 :     SplitVar   *var = NULL;
    2424              :     SPNodeData *StopLow,
    2425              :                *StopHigh,
    2426         1100 :                *StopMiddle = NULL;
    2427         1100 :     SPNode     *node = (snode) ? snode : Conf->Dictionary;
    2428         1100 :     int         level = (snode) ? minpos : startpos;    /* recursive
    2429              :                                                          * minpos==level */
    2430              :     int         lenaff;
    2431              :     CMPDAffix  *caff;
    2432              :     char       *notprobed;
    2433         1100 :     int         compoundflag = 0;
    2434              : 
    2435              :     /* since this function recurses, it could be driven to stack overflow */
    2436         1100 :     check_stack_depth();
    2437              : 
    2438         1100 :     notprobed = (char *) palloc(wordlen);
    2439         1100 :     memset(notprobed, 1, wordlen);
    2440         1100 :     var = CopyVar(orig, 1);
    2441              : 
    2442         6210 :     while (level < wordlen)
    2443              :     {
    2444              :         /* find word with epenthetic or/and compound affix */
    2445         5995 :         caff = Conf->CompoundAffix;
    2446         6150 :         while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) >= 0)
    2447              :         {
    2448              :             /*
    2449              :              * there is one of compound affixes, so check word for existings
    2450              :              */
    2451              :             char        buf[MAXNORMLEN];
    2452              :             char      **subres;
    2453              : 
    2454          155 :             lenaff = level - startpos + lenaff;
    2455              : 
    2456          155 :             if (!notprobed[startpos + lenaff - 1])
    2457            0 :                 continue;
    2458              : 
    2459          155 :             if (level + lenaff - 1 <= minpos)
    2460            0 :                 continue;
    2461              : 
    2462          155 :             if (lenaff >= MAXNORMLEN)
    2463            0 :                 continue;       /* skip too big value */
    2464          155 :             if (lenaff > 0)
    2465          155 :                 memcpy(buf, word + startpos, lenaff);
    2466          155 :             buf[lenaff] = '\0';
    2467              : 
    2468          155 :             if (level == 0)
    2469            0 :                 compoundflag = FF_COMPOUNDBEGIN;
    2470          155 :             else if (level == wordlen - 1)
    2471            0 :                 compoundflag = FF_COMPOUNDLAST;
    2472              :             else
    2473          155 :                 compoundflag = FF_COMPOUNDMIDDLE;
    2474          155 :             subres = NormalizeSubWord(Conf, buf, compoundflag);
    2475          155 :             if (subres)
    2476              :             {
    2477              :                 /* Yes, it was a word from dictionary */
    2478           75 :                 SplitVar   *new = CopyVar(var, 0);
    2479           75 :                 SplitVar   *ptr = var;
    2480           75 :                 char      **sptr = subres;
    2481              : 
    2482           75 :                 notprobed[startpos + lenaff - 1] = 0;
    2483              : 
    2484          150 :                 while (*sptr)
    2485              :                 {
    2486           75 :                     AddStem(new, *sptr);
    2487           75 :                     sptr++;
    2488              :                 }
    2489           75 :                 pfree(subres);
    2490              : 
    2491           75 :                 while (ptr->next)
    2492            0 :                     ptr = ptr->next;
    2493           75 :                 ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos + lenaff, startpos + lenaff);
    2494              : 
    2495           75 :                 pfree(new->stem);
    2496           75 :                 pfree(new);
    2497              :             }
    2498              :         }
    2499              : 
    2500         5995 :         if (!node)
    2501          625 :             break;
    2502              : 
    2503         5370 :         StopLow = node->data;
    2504         5370 :         StopHigh = node->data + node->length;
    2505         7245 :         while (StopLow < StopHigh)
    2506              :         {
    2507         6720 :             StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
    2508         6720 :             if (StopMiddle->val == ((const uint8 *) (word))[level])
    2509         4845 :                 break;
    2510         1875 :             else if (StopMiddle->val < ((const uint8 *) (word))[level])
    2511          815 :                 StopLow = StopMiddle + 1;
    2512              :             else
    2513         1060 :                 StopHigh = StopMiddle;
    2514              :         }
    2515              : 
    2516         5370 :         if (StopLow < StopHigh)
    2517              :         {
    2518         4845 :             if (startpos == 0)
    2519         2725 :                 compoundflag = FF_COMPOUNDBEGIN;
    2520         2120 :             else if (level == wordlen - 1)
    2521          240 :                 compoundflag = FF_COMPOUNDLAST;
    2522              :             else
    2523         1880 :                 compoundflag = FF_COMPOUNDMIDDLE;
    2524              : 
    2525              :             /* find infinitive */
    2526         4845 :             if (StopMiddle->isword &&
    2527         1280 :                 (StopMiddle->compoundflag & compoundflag) &&
    2528         1060 :                 notprobed[level])
    2529              :             {
    2530              :                 /* ok, we found full compoundallowed word */
    2531         1060 :                 if (level > minpos)
    2532              :                 {
    2533              :                     /* and its length more than minimal */
    2534          660 :                     if (wordlen == level + 1)
    2535              :                     {
    2536              :                         /* well, it was last word */
    2537          260 :                         AddStem(var, pnstrdup(word + startpos, wordlen - startpos));
    2538          260 :                         pfree(notprobed);
    2539          260 :                         return var;
    2540              :                     }
    2541              :                     else
    2542          400 :                     {
    2543              :                         /* then we will search more big word at the same point */
    2544          400 :                         SplitVar   *ptr = var;
    2545              : 
    2546          620 :                         while (ptr->next)
    2547          220 :                             ptr = ptr->next;
    2548          400 :                         ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
    2549              :                         /* we can find next word */
    2550          400 :                         level++;
    2551          400 :                         AddStem(var, pnstrdup(word + startpos, level - startpos));
    2552          400 :                         node = Conf->Dictionary;
    2553          400 :                         startpos = level;
    2554          400 :                         continue;
    2555              :                     }
    2556              :                 }
    2557              :             }
    2558         4185 :             node = StopMiddle->node;
    2559              :         }
    2560              :         else
    2561          525 :             node = NULL;
    2562         4710 :         level++;
    2563              :     }
    2564              : 
    2565          840 :     AddStem(var, pnstrdup(word + startpos, wordlen - startpos));
    2566          840 :     pfree(notprobed);
    2567          840 :     return var;
    2568              : }
    2569              : 
    2570              : static void
    2571         1095 : addNorm(TSLexeme **lres, TSLexeme **lcur, char *word, int flags, uint16 NVariant)
    2572              : {
    2573         1095 :     if (*lres == NULL)
    2574          505 :         *lcur = *lres = palloc_array(TSLexeme, MAX_NORM);
    2575              : 
    2576         1095 :     if (*lcur - *lres < MAX_NORM - 1)
    2577              :     {
    2578         1095 :         (*lcur)->lexeme = word;
    2579         1095 :         (*lcur)->flags = flags;
    2580         1095 :         (*lcur)->nvariant = NVariant;
    2581         1095 :         (*lcur)++;
    2582         1095 :         (*lcur)->lexeme = NULL;
    2583              :     }
    2584         1095 : }
    2585              : 
    2586              : TSLexeme *
    2587          625 : NINormalizeWord(IspellDict *Conf, const char *word)
    2588              : {
    2589              :     char      **res;
    2590          625 :     TSLexeme   *lcur = NULL,
    2591          625 :                *lres = NULL;
    2592          625 :     uint16      NVariant = 1;
    2593              : 
    2594          625 :     res = NormalizeSubWord(Conf, word, 0);
    2595              : 
    2596          625 :     if (res)
    2597              :     {
    2598          405 :         char      **ptr = res;
    2599              : 
    2600          950 :         while (*ptr && (lcur - lres) < MAX_NORM)
    2601              :         {
    2602          545 :             addNorm(&lres, &lcur, *ptr, 0, NVariant++);
    2603          545 :             ptr++;
    2604              :         }
    2605          405 :         pfree(res);
    2606              :     }
    2607              : 
    2608          625 :     if (Conf->usecompound)
    2609              :     {
    2610          625 :         int         wordlen = strlen(word);
    2611              :         SplitVar   *ptr,
    2612          625 :                    *var = SplitToVariants(Conf, NULL, NULL, word, wordlen, 0, -1);
    2613              :         int         i;
    2614              : 
    2615         1725 :         while (var)
    2616              :         {
    2617         1100 :             if (var->nstem > 1)
    2618              :             {
    2619          475 :                 char      **subres = NormalizeSubWord(Conf, var->stem[var->nstem - 1], FF_COMPOUNDLAST);
    2620              : 
    2621          475 :                 if (subres)
    2622              :                 {
    2623          220 :                     char      **subptr = subres;
    2624              : 
    2625          440 :                     while (*subptr)
    2626              :                     {
    2627          550 :                         for (i = 0; i < var->nstem - 1; i++)
    2628              :                         {
    2629          330 :                             addNorm(&lres, &lcur, (subptr == subres) ? var->stem[i] : pstrdup(var->stem[i]), 0, NVariant);
    2630              :                         }
    2631              : 
    2632          220 :                         addNorm(&lres, &lcur, *subptr, 0, NVariant);
    2633          220 :                         subptr++;
    2634          220 :                         NVariant++;
    2635              :                     }
    2636              : 
    2637          220 :                     pfree(subres);
    2638          220 :                     var->stem[0] = NULL;
    2639          220 :                     pfree(var->stem[var->nstem - 1]);
    2640              :                 }
    2641              :             }
    2642              : 
    2643         2285 :             for (i = 0; i < var->nstem && var->stem[i]; i++)
    2644         1185 :                 pfree(var->stem[i]);
    2645         1100 :             ptr = var->next;
    2646         1100 :             pfree(var->stem);
    2647         1100 :             pfree(var);
    2648         1100 :             var = ptr;
    2649              :         }
    2650              :     }
    2651              : 
    2652          625 :     return lres;
    2653              : }
        

Generated by: LCOV version 2.0-1