Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * spell.c
4 : * Normalizing word with ISpell
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : *
8 : * Ispell dictionary
9 : * -----------------
10 : *
11 : * Rules of dictionaries are defined in two files with .affix and .dict
12 : * extensions. They are used by spell checker programs Ispell and Hunspell.
13 : *
14 : * An .affix file declares morphological rules to get a basic form of words.
15 : * The format of an .affix file has different structure for Ispell and Hunspell
16 : * dictionaries. The Hunspell format is more complicated. But when an .affix
17 : * file is imported and compiled, it is stored in the same structure AffixNode.
18 : *
19 : * A .dict file stores a list of basic forms of words with references to
20 : * affix rules. The format of a .dict file has the same structure for Ispell
21 : * and Hunspell dictionaries.
22 : *
23 : * Compilation of a dictionary
24 : * ---------------------------
25 : *
26 : * A compiled dictionary is stored in the IspellDict structure. Compilation of
27 : * a dictionary is divided into the several steps:
28 : * - NIImportDictionary() - stores each word of a .dict file in the
29 : * temporary Spell field.
30 : * - NIImportAffixes() - stores affix rules of an .affix file in the
31 : * Affix field (not temporary) if an .affix file has the Ispell format.
32 : * -> NIImportOOAffixes() - stores affix rules if an .affix file has the
33 : * Hunspell format. The AffixData field is initialized if AF parameter
34 : * is defined.
35 : * - NISortDictionary() - builds a prefix tree (Trie) from the words list
36 : * and stores it in the Dictionary field. The words list is got from the
37 : * Spell field. The AffixData field is initialized if AF parameter is not
38 : * defined.
39 : * - NISortAffixes():
40 : * - builds a list of compound affixes from the affix list and stores it
41 : * in the CompoundAffix.
42 : * - builds prefix trees (Trie) from the affix list for prefixes and suffixes
43 : * and stores them in Suffix and Prefix fields.
44 : * The affix list is got from the Affix field.
45 : *
46 : * Memory management
47 : * -----------------
48 : *
49 : * The IspellDict structure has the Spell field which is used only in compile
50 : * time. The Spell field stores a words list. It can take a lot of memory.
51 : * Therefore when a dictionary is compiled this field is cleared by
52 : * NIFinishBuild().
53 : *
54 : * All resources which should cleared by NIFinishBuild() is initialized using
55 : * tmpalloc() and tmpalloc0().
56 : *
57 : * IDENTIFICATION
58 : * src/backend/tsearch/spell.c
59 : *
60 : *-------------------------------------------------------------------------
61 : */
62 :
63 : #include "postgres.h"
64 :
65 : #include "catalog/pg_collation.h"
66 : #include "miscadmin.h"
67 : #include "tsearch/dicts/spell.h"
68 : #include "tsearch/ts_locale.h"
69 : #include "utils/formatting.h"
70 : #include "utils/memutils.h"
71 :
72 :
73 : /*
74 : * Initialization requires a lot of memory that's not needed
75 : * after the initialization is done. During initialization,
76 : * CurrentMemoryContext is the long-lived memory context associated
77 : * with the dictionary cache entry. We keep the short-lived stuff
78 : * in the Conf->buildCxt context.
79 : */
80 : #define tmpalloc(sz) MemoryContextAlloc(Conf->buildCxt, (sz))
81 : #define tmpalloc0(sz) MemoryContextAllocZero(Conf->buildCxt, (sz))
82 :
83 : /*
84 : * Prepare for constructing an ISpell dictionary.
85 : *
86 : * The IspellDict struct is assumed to be zeroed when allocated.
87 : */
88 : void
89 148 : NIStartBuild(IspellDict *Conf)
90 : {
91 : /*
92 : * The temp context is a child of CurTransactionContext, so that it will
93 : * go away automatically on error.
94 : */
95 148 : Conf->buildCxt = AllocSetContextCreate(CurTransactionContext,
96 : "Ispell dictionary init context",
97 : ALLOCSET_DEFAULT_SIZES);
98 148 : }
99 :
100 : /*
101 : * Clean up when dictionary construction is complete.
102 : */
103 : void
104 124 : NIFinishBuild(IspellDict *Conf)
105 : {
106 : /* Release no-longer-needed temp memory */
107 124 : MemoryContextDelete(Conf->buildCxt);
108 : /* Just for cleanliness, zero the now-dangling pointers */
109 124 : Conf->buildCxt = NULL;
110 124 : Conf->Spell = NULL;
111 124 : Conf->firstfree = NULL;
112 124 : Conf->CompoundAffixFlags = NULL;
113 124 : }
114 :
115 :
116 : /*
117 : * "Compact" palloc: allocate without extra palloc overhead.
118 : *
119 : * Since we have no need to free the ispell data items individually, there's
120 : * not much value in the per-chunk overhead normally consumed by palloc.
121 : * Getting rid of it is helpful since ispell can allocate a lot of small nodes.
122 : *
123 : * We currently pre-zero all data allocated this way, even though some of it
124 : * doesn't need that. The cpalloc and cpalloc0 macros are just documentation
125 : * to indicate which allocations actually require zeroing.
126 : */
127 : #define COMPACT_ALLOC_CHUNK 8192 /* amount to get from palloc at once */
128 : #define COMPACT_MAX_REQ 1024 /* must be < COMPACT_ALLOC_CHUNK */
129 :
130 : static void *
131 13942 : compact_palloc0(IspellDict *Conf, size_t size)
132 : {
133 : void *result;
134 :
135 : /* Should only be called during init */
136 : Assert(Conf->buildCxt != NULL);
137 :
138 : /* No point in this for large chunks */
139 13942 : if (size > COMPACT_MAX_REQ)
140 0 : return palloc0(size);
141 :
142 : /* Keep everything maxaligned */
143 13942 : size = MAXALIGN(size);
144 :
145 : /* Need more space? */
146 13942 : if (size > Conf->avail)
147 : {
148 142 : Conf->firstfree = palloc0(COMPACT_ALLOC_CHUNK);
149 142 : Conf->avail = COMPACT_ALLOC_CHUNK;
150 : }
151 :
152 13942 : result = Conf->firstfree;
153 13942 : Conf->firstfree += size;
154 13942 : Conf->avail -= size;
155 :
156 13942 : return result;
157 : }
158 :
159 : #define cpalloc(size) compact_palloc0(Conf, size)
160 : #define cpalloc0(size) compact_palloc0(Conf, size)
161 :
162 : static char *
163 7416 : cpstrdup(IspellDict *Conf, const char *str)
164 : {
165 7416 : char *res = cpalloc(strlen(str) + 1);
166 :
167 7416 : strcpy(res, str);
168 7416 : return res;
169 : }
170 :
171 :
172 : /*
173 : * Apply str_tolower(), producing a temporary result (in the buildCxt).
174 : */
175 : static char *
176 6284 : lowerstr_ctx(IspellDict *Conf, const char *src)
177 : {
178 : MemoryContext saveCtx;
179 : char *dst;
180 :
181 6284 : saveCtx = MemoryContextSwitchTo(Conf->buildCxt);
182 6284 : dst = str_tolower(src, strlen(src), DEFAULT_COLLATION_OID);
183 6284 : MemoryContextSwitchTo(saveCtx);
184 :
185 6284 : return dst;
186 : }
187 :
188 : #define MAX_NORM 1024
189 : #define MAXNORMLEN 256
190 :
191 : #define STRNCMP(s,p) strncmp( (s), (p), strlen(p) )
192 : #define GETWCHAR(W,L,N,T) ( ((const uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] )
193 : #define GETCHAR(A,N,T) GETWCHAR( (A)->repl, (A)->replen, N, T )
194 :
195 : static const char *VoidString = "";
196 :
197 : static int
198 3264 : cmpspell(const void *s1, const void *s2)
199 : {
200 3264 : return strcmp((*(SPELL *const *) s1)->word, (*(SPELL *const *) s2)->word);
201 : }
202 :
203 : static int
204 2544 : cmpspellaffix(const void *s1, const void *s2)
205 : {
206 5088 : return strcmp((*(SPELL *const *) s1)->p.flag,
207 2544 : (*(SPELL *const *) s2)->p.flag);
208 : }
209 :
210 : static int
211 4422 : cmpcmdflag(const void *f1, const void *f2)
212 : {
213 4422 : CompoundAffixFlag *fv1 = (CompoundAffixFlag *) f1,
214 4422 : *fv2 = (CompoundAffixFlag *) f2;
215 :
216 : Assert(fv1->flagMode == fv2->flagMode);
217 :
218 4422 : if (fv1->flagMode == FM_NUM)
219 : {
220 866 : if (fv1->flag.i == fv2->flag.i)
221 126 : return 0;
222 :
223 740 : return (fv1->flag.i > fv2->flag.i) ? 1 : -1;
224 : }
225 :
226 3556 : return strcmp(fv1->flag.s, fv2->flag.s);
227 : }
228 :
229 : static char *
230 1300 : findchar(char *str, int c)
231 : {
232 9566 : while (*str)
233 : {
234 9424 : if (t_iseq(str, c))
235 1158 : return str;
236 8266 : str += pg_mblen(str);
237 : }
238 :
239 142 : return NULL;
240 : }
241 :
242 : static char *
243 48 : findchar2(char *str, int c1, int c2)
244 : {
245 1008 : while (*str)
246 : {
247 1008 : if (t_iseq(str, c1) || t_iseq(str, c2))
248 48 : return str;
249 960 : str += pg_mblen(str);
250 : }
251 :
252 0 : return NULL;
253 : }
254 :
255 :
256 : /* backward string compare for suffix tree operations */
257 : static int
258 1300 : strbcmp(const unsigned char *s1, const unsigned char *s2)
259 : {
260 1300 : int l1 = strlen((const char *) s1) - 1,
261 1300 : l2 = strlen((const char *) s2) - 1;
262 :
263 1738 : while (l1 >= 0 && l2 >= 0)
264 : {
265 1360 : if (s1[l1] < s2[l2])
266 296 : return -1;
267 1064 : if (s1[l1] > s2[l2])
268 626 : return 1;
269 438 : l1--;
270 438 : l2--;
271 : }
272 378 : if (l1 < l2)
273 102 : return -1;
274 276 : if (l1 > l2)
275 232 : return 1;
276 :
277 44 : return 0;
278 : }
279 :
280 : static int
281 44 : strbncmp(const unsigned char *s1, const unsigned char *s2, size_t count)
282 : {
283 44 : int l1 = strlen((const char *) s1) - 1,
284 44 : l2 = strlen((const char *) s2) - 1,
285 44 : l = count;
286 :
287 66 : while (l1 >= 0 && l2 >= 0 && l > 0)
288 : {
289 44 : if (s1[l1] < s2[l2])
290 22 : return -1;
291 22 : if (s1[l1] > s2[l2])
292 0 : return 1;
293 22 : l1--;
294 22 : l2--;
295 22 : l--;
296 : }
297 22 : if (l == 0)
298 22 : return 0;
299 0 : if (l1 < l2)
300 0 : return -1;
301 0 : if (l1 > l2)
302 0 : return 1;
303 0 : return 0;
304 : }
305 :
306 : /*
307 : * Compares affixes.
308 : * First compares the type of an affix. Prefixes should go before affixes.
309 : * If types are equal then compares replaceable string.
310 : */
311 : static int
312 2200 : cmpaffix(const void *s1, const void *s2)
313 : {
314 2200 : const AFFIX *a1 = (const AFFIX *) s1;
315 2200 : const AFFIX *a2 = (const AFFIX *) s2;
316 :
317 2200 : if (a1->type < a2->type)
318 502 : return -1;
319 1698 : if (a1->type > a2->type)
320 150 : return 1;
321 1548 : if (a1->type == FF_PREFIX)
322 248 : return strcmp(a1->repl, a2->repl);
323 : else
324 1300 : return strbcmp((const unsigned char *) a1->repl,
325 1300 : (const unsigned char *) a2->repl);
326 : }
327 :
328 : /*
329 : * Gets an affix flag from the set of affix flags (sflagset).
330 : *
331 : * Several flags can be stored in a single string. Flags can be represented by:
332 : * - 1 character (FM_CHAR). A character may be Unicode.
333 : * - 2 characters (FM_LONG). A character may be Unicode.
334 : * - numbers from 1 to 65000 (FM_NUM).
335 : *
336 : * Depending on the flagMode an affix string can have the following format:
337 : * - FM_CHAR: ABCD
338 : * Here we have 4 flags: A, B, C and D
339 : * - FM_LONG: ABCDE*
340 : * Here we have 3 flags: AB, CD and E*
341 : * - FM_NUM: 200,205,50
342 : * Here we have 3 flags: 200, 205 and 50
343 : *
344 : * Conf: current dictionary.
345 : * sflagset: the set of affix flags. Returns a reference to the start of a next
346 : * affix flag.
347 : * sflag: returns an affix flag from sflagset.
348 : */
349 : static void
350 6724 : getNextFlagFromString(IspellDict *Conf, const char **sflagset, char *sflag)
351 : {
352 : int32 s;
353 : char *next;
354 6724 : const char *sbuf = *sflagset;
355 : int maxstep;
356 6724 : bool stop = false;
357 6724 : bool met_comma = false;
358 :
359 6724 : maxstep = (Conf->flagMode == FM_LONG) ? 2 : 1;
360 :
361 8740 : while (**sflagset)
362 : {
363 8740 : switch (Conf->flagMode)
364 : {
365 7490 : case FM_LONG:
366 : case FM_CHAR:
367 7490 : COPYCHAR(sflag, *sflagset);
368 7490 : sflag += pg_mblen(*sflagset);
369 :
370 : /* Go to start of the next flag */
371 7490 : *sflagset += pg_mblen(*sflagset);
372 :
373 : /* Check if we get all characters of flag */
374 7490 : maxstep--;
375 7490 : stop = (maxstep == 0);
376 7490 : break;
377 1250 : case FM_NUM:
378 1250 : errno = 0;
379 1250 : s = strtol(*sflagset, &next, 10);
380 1250 : if (*sflagset == next || errno == ERANGE)
381 6 : ereport(ERROR,
382 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
383 : errmsg("invalid affix flag \"%s\"", *sflagset)));
384 1244 : if (s < 0 || s > FLAGNUM_MAXSIZE)
385 0 : ereport(ERROR,
386 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
387 : errmsg("affix flag \"%s\" is out of range",
388 : *sflagset)));
389 1244 : sflag += sprintf(sflag, "%0d", s);
390 :
391 : /* Go to start of the next flag */
392 1244 : *sflagset = next;
393 1888 : while (**sflagset)
394 : {
395 1288 : if (isdigit((unsigned char) **sflagset))
396 : {
397 644 : if (!met_comma)
398 0 : ereport(ERROR,
399 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
400 : errmsg("invalid affix flag \"%s\"",
401 : *sflagset)));
402 644 : break;
403 : }
404 644 : else if (t_iseq(*sflagset, ','))
405 : {
406 644 : if (met_comma)
407 0 : ereport(ERROR,
408 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
409 : errmsg("invalid affix flag \"%s\"",
410 : *sflagset)));
411 644 : met_comma = true;
412 : }
413 0 : else if (!isspace((unsigned char) **sflagset))
414 : {
415 0 : ereport(ERROR,
416 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
417 : errmsg("invalid character in affix flag \"%s\"",
418 : *sflagset)));
419 : }
420 :
421 644 : *sflagset += pg_mblen(*sflagset);
422 : }
423 1244 : stop = true;
424 1244 : break;
425 0 : default:
426 0 : elog(ERROR, "unrecognized type of Conf->flagMode: %d",
427 : Conf->flagMode);
428 : }
429 :
430 8734 : if (stop)
431 6718 : break;
432 : }
433 :
434 6718 : if (Conf->flagMode == FM_LONG && maxstep > 0)
435 0 : ereport(ERROR,
436 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
437 : errmsg("invalid affix flag \"%s\" with \"long\" flag value",
438 : sbuf)));
439 :
440 6718 : *sflag = '\0';
441 6718 : }
442 :
443 : /*
444 : * Checks if the affix set Conf->AffixData[affix] contains affixflag.
445 : * Conf->AffixData[affix] does not contain affixflag if this flag is not used
446 : * actually by the .dict file.
447 : *
448 : * Conf: current dictionary.
449 : * affix: index of the Conf->AffixData array.
450 : * affixflag: the affix flag.
451 : *
452 : * Returns true if the string Conf->AffixData[affix] contains affixflag,
453 : * otherwise returns false.
454 : */
455 : static bool
456 2366 : IsAffixFlagInUse(IspellDict *Conf, int affix, const char *affixflag)
457 : {
458 : const char *flagcur;
459 : char flag[BUFSIZ];
460 :
461 2366 : if (*affixflag == 0)
462 636 : return true;
463 :
464 : Assert(affix < Conf->nAffixData);
465 :
466 1730 : flagcur = Conf->AffixData[affix];
467 :
468 5064 : while (*flagcur)
469 : {
470 3844 : getNextFlagFromString(Conf, &flagcur, flag);
471 : /* Compare first affix flag in flagcur with affixflag */
472 3844 : if (strcmp(flag, affixflag) == 0)
473 510 : return true;
474 : }
475 :
476 : /* Could not find affixflag */
477 1220 : return false;
478 : }
479 :
480 : /*
481 : * Adds the new word into the temporary array Spell.
482 : *
483 : * Conf: current dictionary.
484 : * word: new word.
485 : * flag: set of affix flags. Single flag can be get by getNextFlagFromString().
486 : */
487 : static void
488 1300 : NIAddSpell(IspellDict *Conf, const char *word, const char *flag)
489 : {
490 1300 : if (Conf->nspell >= Conf->mspell)
491 : {
492 142 : if (Conf->mspell)
493 : {
494 0 : Conf->mspell *= 2;
495 0 : Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *));
496 : }
497 : else
498 : {
499 142 : Conf->mspell = 1024 * 20;
500 142 : Conf->Spell = (SPELL **) tmpalloc(Conf->mspell * sizeof(SPELL *));
501 : }
502 : }
503 1300 : Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1);
504 1300 : strcpy(Conf->Spell[Conf->nspell]->word, word);
505 2600 : Conf->Spell[Conf->nspell]->p.flag = (*flag != '\0')
506 1300 : ? cpstrdup(Conf, flag) : VoidString;
507 1300 : Conf->nspell++;
508 1300 : }
509 :
510 : /*
511 : * Imports dictionary into the temporary array Spell.
512 : *
513 : * Note caller must already have applied get_tsearch_config_filename.
514 : *
515 : * Conf: current dictionary.
516 : * filename: path to the .dict file.
517 : */
518 : void
519 142 : NIImportDictionary(IspellDict *Conf, const char *filename)
520 : {
521 : tsearch_readline_state trst;
522 : char *line;
523 :
524 142 : if (!tsearch_readline_begin(&trst, filename))
525 0 : ereport(ERROR,
526 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
527 : errmsg("could not open dictionary file \"%s\": %m",
528 : filename)));
529 :
530 1442 : while ((line = tsearch_readline(&trst)) != NULL)
531 : {
532 : char *s,
533 : *pstr;
534 :
535 : /* Set of affix flags */
536 : const char *flag;
537 :
538 : /* Extract flag from the line */
539 1300 : flag = NULL;
540 1300 : if ((s = findchar(line, '/')))
541 : {
542 1158 : *s++ = '\0';
543 1158 : flag = s;
544 4616 : while (*s)
545 : {
546 : /* we allow only single encoded flags for faster works */
547 4616 : if (pg_mblen(s) == 1 && isprint((unsigned char) *s) && !isspace((unsigned char) *s))
548 3458 : s++;
549 : else
550 : {
551 1158 : *s = '\0';
552 1158 : break;
553 : }
554 : }
555 : }
556 : else
557 142 : flag = "";
558 :
559 : /* Remove trailing spaces */
560 1300 : s = line;
561 9424 : while (*s)
562 : {
563 8266 : if (isspace((unsigned char) *s))
564 : {
565 142 : *s = '\0';
566 142 : break;
567 : }
568 8124 : s += pg_mblen(s);
569 : }
570 1300 : pstr = lowerstr_ctx(Conf, line);
571 :
572 1300 : NIAddSpell(Conf, pstr, flag);
573 1300 : pfree(pstr);
574 :
575 1300 : pfree(line);
576 : }
577 142 : tsearch_readline_end(&trst);
578 142 : }
579 :
580 : /*
581 : * Searches a basic form of word in the prefix tree. This word was generated
582 : * using an affix rule. This rule may not be presented in an affix set of
583 : * a basic form of word.
584 : *
585 : * For example, we have the entry in the .dict file:
586 : * meter/GMD
587 : *
588 : * The affix rule with the flag S:
589 : * SFX S y ies [^aeiou]y
590 : * is not presented here.
591 : *
592 : * The affix rule with the flag M:
593 : * SFX M 0 's .
594 : * is presented here.
595 : *
596 : * Conf: current dictionary.
597 : * word: basic form of word.
598 : * affixflag: affix flag, by which a basic form of word was generated.
599 : * flag: compound flag used to compare with StopMiddle->compoundflag.
600 : *
601 : * Returns 1 if the word was found in the prefix tree, else returns 0.
602 : */
603 : static int
604 2994 : FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag)
605 : {
606 2994 : SPNode *node = Conf->Dictionary;
607 : SPNodeData *StopLow,
608 : *StopHigh,
609 : *StopMiddle;
610 2994 : const uint8 *ptr = (const uint8 *) word;
611 :
612 2994 : flag &= FF_COMPOUNDFLAGMASK;
613 :
614 13944 : while (node && *ptr)
615 : {
616 13224 : StopLow = node->data;
617 13224 : StopHigh = node->data + node->length;
618 18918 : while (StopLow < StopHigh)
619 : {
620 17652 : StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
621 17652 : if (StopMiddle->val == *ptr)
622 : {
623 11958 : if (*(ptr + 1) == '\0' && StopMiddle->isword)
624 : {
625 1146 : if (flag == 0)
626 : {
627 : /*
628 : * The word can be formed only with another word. And
629 : * in the flag parameter there is not a sign that we
630 : * search compound words.
631 : */
632 726 : if (StopMiddle->compoundflag & FF_COMPOUNDONLY)
633 0 : return 0;
634 : }
635 420 : else if ((flag & StopMiddle->compoundflag) == 0)
636 0 : return 0;
637 :
638 : /*
639 : * Check if this affix rule is presented in the affix set
640 : * with index StopMiddle->affix.
641 : */
642 1146 : if (IsAffixFlagInUse(Conf, StopMiddle->affix, affixflag))
643 1008 : return 1;
644 : }
645 10950 : node = StopMiddle->node;
646 10950 : ptr++;
647 10950 : break;
648 : }
649 5694 : else if (StopMiddle->val < *ptr)
650 1932 : StopLow = StopMiddle + 1;
651 : else
652 3762 : StopHigh = StopMiddle;
653 : }
654 12216 : if (StopLow >= StopHigh)
655 1266 : break;
656 : }
657 1986 : return 0;
658 : }
659 :
660 : /*
661 : * Adds a new affix rule to the Affix field.
662 : *
663 : * Conf: current dictionary.
664 : * flag: affix flag ('\' in the below example).
665 : * flagflags: set of flags from the flagval field for this affix rule. This set
666 : * is listed after '/' character in the added string (repl).
667 : *
668 : * For example L flag in the hunspell_sample.affix:
669 : * SFX \ 0 Y/L [^Y]
670 : *
671 : * mask: condition for search ('[^Y]' in the above example).
672 : * find: stripping characters from beginning (at prefix) or end (at suffix)
673 : * of the word ('0' in the above example, 0 means that there is not
674 : * stripping character).
675 : * repl: adding string after stripping ('Y' in the above example).
676 : * type: FF_SUFFIX or FF_PREFIX.
677 : */
678 : static void
679 1172 : NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask,
680 : const char *find, const char *repl, int type)
681 : {
682 : AFFIX *Affix;
683 :
684 1172 : if (Conf->naffixes >= Conf->maffixes)
685 : {
686 142 : if (Conf->maffixes)
687 : {
688 0 : Conf->maffixes *= 2;
689 0 : Conf->Affix = (AFFIX *) repalloc(Conf->Affix, Conf->maffixes * sizeof(AFFIX));
690 : }
691 : else
692 : {
693 142 : Conf->maffixes = 16;
694 142 : Conf->Affix = (AFFIX *) palloc(Conf->maffixes * sizeof(AFFIX));
695 : }
696 : }
697 :
698 1172 : Affix = Conf->Affix + Conf->naffixes;
699 :
700 : /* This affix rule can be applied for words with any ending */
701 1172 : if (strcmp(mask, ".") == 0 || *mask == '\0')
702 : {
703 284 : Affix->issimple = 1;
704 284 : Affix->isregis = 0;
705 : }
706 : /* This affix rule will use regis to search word ending */
707 888 : else if (RS_isRegis(mask))
708 : {
709 744 : Affix->issimple = 0;
710 744 : Affix->isregis = 1;
711 744 : RS_compile(&(Affix->reg.regis), (type == FF_SUFFIX),
712 744 : *mask ? mask : VoidString);
713 : }
714 : /* This affix rule will use regex_t to search word ending */
715 : else
716 : {
717 : int masklen;
718 : int wmasklen;
719 : int err;
720 : pg_wchar *wmask;
721 : char *tmask;
722 :
723 144 : Affix->issimple = 0;
724 144 : Affix->isregis = 0;
725 144 : tmask = (char *) tmpalloc(strlen(mask) + 3);
726 144 : if (type == FF_SUFFIX)
727 144 : sprintf(tmask, "%s$", mask);
728 : else
729 0 : sprintf(tmask, "^%s", mask);
730 :
731 144 : masklen = strlen(tmask);
732 144 : wmask = (pg_wchar *) tmpalloc((masklen + 1) * sizeof(pg_wchar));
733 144 : wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen);
734 :
735 : /*
736 : * The regex and all internal state created by pg_regcomp are
737 : * allocated in the dictionary's memory context, and will be freed
738 : * automatically when it is destroyed.
739 : */
740 144 : Affix->reg.pregex = palloc(sizeof(regex_t));
741 144 : err = pg_regcomp(Affix->reg.pregex, wmask, wmasklen,
742 : REG_ADVANCED | REG_NOSUB,
743 : DEFAULT_COLLATION_OID);
744 144 : if (err)
745 : {
746 : char errstr[100];
747 :
748 0 : pg_regerror(err, Affix->reg.pregex, errstr, sizeof(errstr));
749 0 : ereport(ERROR,
750 : (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
751 : errmsg("invalid regular expression: %s", errstr)));
752 : }
753 : }
754 :
755 1172 : Affix->flagflags = flagflags;
756 1172 : if ((Affix->flagflags & FF_COMPOUNDONLY) || (Affix->flagflags & FF_COMPOUNDPERMITFLAG))
757 : {
758 210 : if ((Affix->flagflags & FF_COMPOUNDFLAG) == 0)
759 210 : Affix->flagflags |= FF_COMPOUNDFLAG;
760 : }
761 1172 : Affix->flag = cpstrdup(Conf, flag);
762 1172 : Affix->type = type;
763 :
764 1172 : Affix->find = (find && *find) ? cpstrdup(Conf, find) : VoidString;
765 1172 : if ((Affix->replen = strlen(repl)) > 0)
766 1134 : Affix->repl = cpstrdup(Conf, repl);
767 : else
768 38 : Affix->repl = VoidString;
769 1172 : Conf->naffixes++;
770 1172 : }
771 :
772 : /* Parsing states for parse_affentry() and friends */
773 : #define PAE_WAIT_MASK 0
774 : #define PAE_INMASK 1
775 : #define PAE_WAIT_FIND 2
776 : #define PAE_INFIND 3
777 : #define PAE_WAIT_REPL 4
778 : #define PAE_INREPL 5
779 : #define PAE_WAIT_TYPE 6
780 : #define PAE_WAIT_FLAG 7
781 :
782 : /*
783 : * Parse next space-separated field of an .affix file line.
784 : *
785 : * *str is the input pointer (will be advanced past field)
786 : * next is where to copy the field value to, with null termination
787 : *
788 : * The buffer at "next" must be of size BUFSIZ; we truncate the input to fit.
789 : *
790 : * Returns true if we found a field, false if not.
791 : */
792 : static bool
793 10772 : get_nextfield(char **str, char *next)
794 : {
795 10772 : int state = PAE_WAIT_MASK;
796 10772 : int avail = BUFSIZ;
797 :
798 46068 : while (**str)
799 : {
800 44802 : if (state == PAE_WAIT_MASK)
801 : {
802 19874 : if (t_iseq(*str, '#'))
803 374 : return false;
804 19500 : else if (!isspace((unsigned char) **str))
805 : {
806 9132 : int clen = pg_mblen(*str);
807 :
808 9132 : if (clen < avail)
809 : {
810 9132 : COPYCHAR(next, *str);
811 9132 : next += clen;
812 9132 : avail -= clen;
813 : }
814 9132 : state = PAE_INMASK;
815 : }
816 : }
817 : else /* state == PAE_INMASK */
818 : {
819 24928 : if (isspace((unsigned char) **str))
820 : {
821 9132 : *next = '\0';
822 9132 : return true;
823 : }
824 : else
825 : {
826 15796 : int clen = pg_mblen(*str);
827 :
828 15796 : if (clen < avail)
829 : {
830 15796 : COPYCHAR(next, *str);
831 15796 : next += clen;
832 15796 : avail -= clen;
833 : }
834 : }
835 : }
836 35296 : *str += pg_mblen(*str);
837 : }
838 :
839 1266 : *next = '\0';
840 :
841 1266 : return (state == PAE_INMASK); /* OK if we got a nonempty field */
842 : }
843 :
844 : /*
845 : * Parses entry of an .affix file of MySpell or Hunspell format.
846 : *
847 : * An .affix file entry has the following format:
848 : * - header
849 : * <type> <flag> <cross_flag> <flag_count>
850 : * - fields after header:
851 : * <type> <flag> <find> <replace> <mask>
852 : *
853 : * str is the input line
854 : * field values are returned to type etc, which must be buffers of size BUFSIZ.
855 : *
856 : * Returns number of fields found; any omitted fields are set to empty strings.
857 : */
858 : static int
859 2476 : parse_ooaffentry(char *str, char *type, char *flag, char *find,
860 : char *repl, char *mask)
861 : {
862 2476 : int state = PAE_WAIT_TYPE;
863 2476 : int fields_read = 0;
864 2476 : bool valid = false;
865 :
866 2476 : *type = *flag = *find = *repl = *mask = '\0';
867 :
868 10772 : while (*str)
869 : {
870 10772 : switch (state)
871 : {
872 2476 : case PAE_WAIT_TYPE:
873 2476 : valid = get_nextfield(&str, type);
874 2476 : state = PAE_WAIT_FLAG;
875 2476 : break;
876 2476 : case PAE_WAIT_FLAG:
877 2476 : valid = get_nextfield(&str, flag);
878 2476 : state = PAE_WAIT_FIND;
879 2476 : break;
880 2476 : case PAE_WAIT_FIND:
881 2476 : valid = get_nextfield(&str, find);
882 2476 : state = PAE_WAIT_REPL;
883 2476 : break;
884 1672 : case PAE_WAIT_REPL:
885 1672 : valid = get_nextfield(&str, repl);
886 1672 : state = PAE_WAIT_MASK;
887 1672 : break;
888 1672 : case PAE_WAIT_MASK:
889 1672 : valid = get_nextfield(&str, mask);
890 1672 : state = -1; /* force loop exit */
891 1672 : break;
892 0 : default:
893 0 : elog(ERROR, "unrecognized state in parse_ooaffentry: %d",
894 : state);
895 : break;
896 : }
897 10772 : if (valid)
898 9132 : fields_read++;
899 : else
900 1640 : break; /* early EOL */
901 9132 : if (state < 0)
902 836 : break; /* got all fields */
903 : }
904 :
905 2476 : return fields_read;
906 : }
907 :
908 : /*
909 : * Parses entry of an .affix file of Ispell format
910 : *
911 : * An .affix file entry has the following format:
912 : * <mask> > [-<find>,]<replace>
913 : */
914 : static bool
915 336 : parse_affentry(char *str, char *mask, char *find, char *repl)
916 : {
917 336 : int state = PAE_WAIT_MASK;
918 336 : char *pmask = mask,
919 336 : *pfind = find,
920 336 : *prepl = repl;
921 :
922 336 : *mask = *find = *repl = '\0';
923 :
924 8832 : while (*str)
925 : {
926 8832 : if (state == PAE_WAIT_MASK)
927 : {
928 816 : if (t_iseq(str, '#'))
929 0 : return false;
930 816 : else if (!isspace((unsigned char) *str))
931 : {
932 336 : COPYCHAR(pmask, str);
933 336 : pmask += pg_mblen(str);
934 336 : state = PAE_INMASK;
935 : }
936 : }
937 8016 : else if (state == PAE_INMASK)
938 : {
939 3264 : if (t_iseq(str, '>'))
940 : {
941 336 : *pmask = '\0';
942 336 : state = PAE_WAIT_FIND;
943 : }
944 2928 : else if (!isspace((unsigned char) *str))
945 : {
946 1152 : COPYCHAR(pmask, str);
947 1152 : pmask += pg_mblen(str);
948 : }
949 : }
950 4752 : else if (state == PAE_WAIT_FIND)
951 : {
952 1344 : if (t_iseq(str, '-'))
953 : {
954 48 : state = PAE_INFIND;
955 : }
956 1296 : else if (t_isalpha(str) || t_iseq(str, '\'') /* english 's */ )
957 : {
958 288 : COPYCHAR(prepl, str);
959 288 : prepl += pg_mblen(str);
960 288 : state = PAE_INREPL;
961 : }
962 1008 : else if (!isspace((unsigned char) *str))
963 0 : ereport(ERROR,
964 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
965 : errmsg("syntax error")));
966 : }
967 3408 : else if (state == PAE_INFIND)
968 : {
969 96 : if (t_iseq(str, ','))
970 : {
971 48 : *pfind = '\0';
972 48 : state = PAE_WAIT_REPL;
973 : }
974 48 : else if (t_isalpha(str))
975 : {
976 48 : COPYCHAR(pfind, str);
977 48 : pfind += pg_mblen(str);
978 : }
979 0 : else if (!isspace((unsigned char) *str))
980 0 : ereport(ERROR,
981 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
982 : errmsg("syntax error")));
983 : }
984 3312 : else if (state == PAE_WAIT_REPL)
985 : {
986 48 : if (t_iseq(str, '-'))
987 : {
988 0 : break; /* void repl */
989 : }
990 48 : else if (t_isalpha(str))
991 : {
992 48 : COPYCHAR(prepl, str);
993 48 : prepl += pg_mblen(str);
994 48 : state = PAE_INREPL;
995 : }
996 0 : else if (!isspace((unsigned char) *str))
997 0 : ereport(ERROR,
998 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
999 : errmsg("syntax error")));
1000 : }
1001 3264 : else if (state == PAE_INREPL)
1002 : {
1003 3264 : if (t_iseq(str, '#'))
1004 : {
1005 336 : *prepl = '\0';
1006 336 : break;
1007 : }
1008 2928 : else if (t_isalpha(str))
1009 : {
1010 432 : COPYCHAR(prepl, str);
1011 432 : prepl += pg_mblen(str);
1012 : }
1013 2496 : else if (!isspace((unsigned char) *str))
1014 0 : ereport(ERROR,
1015 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1016 : errmsg("syntax error")));
1017 : }
1018 : else
1019 0 : elog(ERROR, "unrecognized state in parse_affentry: %d", state);
1020 :
1021 8496 : str += pg_mblen(str);
1022 : }
1023 :
1024 336 : *pmask = *pfind = *prepl = '\0';
1025 :
1026 336 : return (*mask && (*find || *repl));
1027 : }
1028 :
1029 : /*
1030 : * Sets a Hunspell options depending on flag type.
1031 : */
1032 : static void
1033 3246 : setCompoundAffixFlagValue(IspellDict *Conf, CompoundAffixFlag *entry,
1034 : char *s, uint32 val)
1035 : {
1036 3246 : if (Conf->flagMode == FM_NUM)
1037 : {
1038 : char *next;
1039 : int i;
1040 :
1041 696 : errno = 0;
1042 696 : i = strtol(s, &next, 10);
1043 696 : if (s == next || errno == ERANGE)
1044 0 : ereport(ERROR,
1045 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1046 : errmsg("invalid affix flag \"%s\"", s)));
1047 696 : if (i < 0 || i > FLAGNUM_MAXSIZE)
1048 0 : ereport(ERROR,
1049 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1050 : errmsg("affix flag \"%s\" is out of range", s)));
1051 :
1052 696 : entry->flag.i = i;
1053 : }
1054 : else
1055 2550 : entry->flag.s = cpstrdup(Conf, s);
1056 :
1057 3246 : entry->flagMode = Conf->flagMode;
1058 3246 : entry->value = val;
1059 3246 : }
1060 :
1061 : /*
1062 : * Sets up a correspondence for the affix parameter with the affix flag.
1063 : *
1064 : * Conf: current dictionary.
1065 : * s: affix flag in string.
1066 : * val: affix parameter.
1067 : */
1068 : static void
1069 372 : addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val)
1070 : {
1071 : CompoundAffixFlag *newValue;
1072 : char sbuf[BUFSIZ];
1073 : char *sflag;
1074 : int clen;
1075 :
1076 696 : while (*s && isspace((unsigned char) *s))
1077 324 : s += pg_mblen(s);
1078 :
1079 372 : if (!*s)
1080 0 : ereport(ERROR,
1081 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1082 : errmsg("syntax error")));
1083 :
1084 : /* Get flag without \n */
1085 372 : sflag = sbuf;
1086 1100 : while (*s && !isspace((unsigned char) *s) && *s != '\n')
1087 : {
1088 728 : clen = pg_mblen(s);
1089 728 : COPYCHAR(sflag, s);
1090 728 : sflag += clen;
1091 728 : s += clen;
1092 : }
1093 372 : *sflag = '\0';
1094 :
1095 : /* Resize array or allocate memory for array CompoundAffixFlag */
1096 372 : if (Conf->nCompoundAffixFlag >= Conf->mCompoundAffixFlag)
1097 : {
1098 142 : if (Conf->mCompoundAffixFlag)
1099 : {
1100 0 : Conf->mCompoundAffixFlag *= 2;
1101 0 : Conf->CompoundAffixFlags = (CompoundAffixFlag *)
1102 0 : repalloc(Conf->CompoundAffixFlags,
1103 0 : Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag));
1104 : }
1105 : else
1106 : {
1107 142 : Conf->mCompoundAffixFlag = 10;
1108 142 : Conf->CompoundAffixFlags = (CompoundAffixFlag *)
1109 142 : tmpalloc(Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag));
1110 : }
1111 : }
1112 :
1113 372 : newValue = Conf->CompoundAffixFlags + Conf->nCompoundAffixFlag;
1114 :
1115 372 : setCompoundAffixFlagValue(Conf, newValue, sbuf, val);
1116 :
1117 372 : Conf->usecompound = true;
1118 372 : Conf->nCompoundAffixFlag++;
1119 372 : }
1120 :
1121 : /*
1122 : * Returns a set of affix parameters which correspondence to the set of affix
1123 : * flags s.
1124 : */
1125 : static int
1126 1392 : getCompoundAffixFlagValue(IspellDict *Conf, const char *s)
1127 : {
1128 1392 : uint32 flag = 0;
1129 : CompoundAffixFlag *found,
1130 : key;
1131 : char sflag[BUFSIZ];
1132 : const char *flagcur;
1133 :
1134 1392 : if (Conf->nCompoundAffixFlag == 0)
1135 0 : return 0;
1136 :
1137 1392 : flagcur = s;
1138 4266 : while (*flagcur)
1139 : {
1140 2880 : getNextFlagFromString(Conf, &flagcur, sflag);
1141 2874 : setCompoundAffixFlagValue(Conf, &key, sflag, 0);
1142 :
1143 : found = (CompoundAffixFlag *)
1144 2874 : bsearch(&key, Conf->CompoundAffixFlags,
1145 2874 : Conf->nCompoundAffixFlag, sizeof(CompoundAffixFlag),
1146 : cmpcmdflag);
1147 2874 : if (found != NULL)
1148 626 : flag |= found->value;
1149 : }
1150 :
1151 1386 : return flag;
1152 : }
1153 :
1154 : /*
1155 : * Returns a flag set using the s parameter.
1156 : *
1157 : * If Conf->useFlagAliases is true then the s parameter is index of the
1158 : * Conf->AffixData array and function returns its entry.
1159 : * Else function returns the s parameter.
1160 : */
1161 : static const char *
1162 162 : getAffixFlagSet(IspellDict *Conf, char *s)
1163 : {
1164 162 : if (Conf->useFlagAliases && *s != '\0')
1165 : {
1166 : int curaffix;
1167 : char *end;
1168 :
1169 102 : errno = 0;
1170 102 : curaffix = strtol(s, &end, 10);
1171 102 : if (s == end || errno == ERANGE)
1172 0 : ereport(ERROR,
1173 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1174 : errmsg("invalid affix alias \"%s\"", s)));
1175 :
1176 102 : if (curaffix > 0 && curaffix < Conf->nAffixData)
1177 :
1178 : /*
1179 : * Do not subtract 1 from curaffix because empty string was added
1180 : * in NIImportOOAffixes
1181 : */
1182 102 : return Conf->AffixData[curaffix];
1183 0 : else if (curaffix > Conf->nAffixData)
1184 0 : ereport(ERROR,
1185 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1186 : errmsg("invalid affix alias \"%s\"", s)));
1187 0 : return VoidString;
1188 : }
1189 : else
1190 60 : return s;
1191 : }
1192 :
1193 : /*
1194 : * Import an affix file that follows MySpell or Hunspell format.
1195 : *
1196 : * Conf: current dictionary.
1197 : * filename: path to the .affix file.
1198 : */
1199 : static void
1200 94 : NIImportOOAffixes(IspellDict *Conf, const char *filename)
1201 : {
1202 : char type[BUFSIZ],
1203 94 : *ptype = NULL;
1204 : char sflag[BUFSIZ];
1205 : char mask[BUFSIZ],
1206 : *pmask;
1207 : char find[BUFSIZ],
1208 : *pfind;
1209 : char repl[BUFSIZ],
1210 : *prepl;
1211 94 : bool isSuffix = false;
1212 94 : int naffix = 0,
1213 94 : curaffix = 0;
1214 94 : int sflaglen = 0;
1215 94 : char flagflags = 0;
1216 : tsearch_readline_state trst;
1217 : char *recoded;
1218 :
1219 : /* read file to find any flag */
1220 94 : Conf->usecompound = false;
1221 94 : Conf->useFlagAliases = false;
1222 94 : Conf->flagMode = FM_CHAR;
1223 :
1224 94 : if (!tsearch_readline_begin(&trst, filename))
1225 0 : ereport(ERROR,
1226 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1227 : errmsg("could not open affix file \"%s\": %m",
1228 : filename)));
1229 :
1230 3656 : while ((recoded = tsearch_readline(&trst)) != NULL)
1231 : {
1232 3562 : if (*recoded == '\0' || isspace((unsigned char) *recoded) || t_iseq(recoded, '#'))
1233 : {
1234 1086 : pfree(recoded);
1235 1086 : continue;
1236 : }
1237 :
1238 2476 : if (STRNCMP(recoded, "COMPOUNDFLAG") == 0)
1239 94 : addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDFLAG"),
1240 : FF_COMPOUNDFLAG);
1241 2382 : else if (STRNCMP(recoded, "COMPOUNDBEGIN") == 0)
1242 34 : addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDBEGIN"),
1243 : FF_COMPOUNDBEGIN);
1244 2348 : else if (STRNCMP(recoded, "COMPOUNDLAST") == 0)
1245 0 : addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDLAST"),
1246 : FF_COMPOUNDLAST);
1247 : /* COMPOUNDLAST and COMPOUNDEND are synonyms */
1248 2348 : else if (STRNCMP(recoded, "COMPOUNDEND") == 0)
1249 34 : addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDEND"),
1250 : FF_COMPOUNDLAST);
1251 2314 : else if (STRNCMP(recoded, "COMPOUNDMIDDLE") == 0)
1252 34 : addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDMIDDLE"),
1253 : FF_COMPOUNDMIDDLE);
1254 2280 : else if (STRNCMP(recoded, "ONLYINCOMPOUND") == 0)
1255 94 : addCompoundAffixFlagValue(Conf, recoded + strlen("ONLYINCOMPOUND"),
1256 : FF_COMPOUNDONLY);
1257 2186 : else if (STRNCMP(recoded, "COMPOUNDPERMITFLAG") == 0)
1258 34 : addCompoundAffixFlagValue(Conf,
1259 : recoded + strlen("COMPOUNDPERMITFLAG"),
1260 : FF_COMPOUNDPERMITFLAG);
1261 2152 : else if (STRNCMP(recoded, "COMPOUNDFORBIDFLAG") == 0)
1262 0 : addCompoundAffixFlagValue(Conf,
1263 : recoded + strlen("COMPOUNDFORBIDFLAG"),
1264 : FF_COMPOUNDFORBIDFLAG);
1265 2152 : else if (STRNCMP(recoded, "FLAG") == 0)
1266 : {
1267 72 : char *s = recoded + strlen("FLAG");
1268 :
1269 144 : while (*s && isspace((unsigned char) *s))
1270 72 : s += pg_mblen(s);
1271 :
1272 72 : if (*s)
1273 : {
1274 72 : if (STRNCMP(s, "long") == 0)
1275 34 : Conf->flagMode = FM_LONG;
1276 38 : else if (STRNCMP(s, "num") == 0)
1277 38 : Conf->flagMode = FM_NUM;
1278 0 : else if (STRNCMP(s, "default") != 0)
1279 0 : ereport(ERROR,
1280 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1281 : errmsg("Ispell dictionary supports only "
1282 : "\"default\", \"long\", "
1283 : "and \"num\" flag values")));
1284 : }
1285 : }
1286 :
1287 2476 : pfree(recoded);
1288 : }
1289 94 : tsearch_readline_end(&trst);
1290 :
1291 94 : if (Conf->nCompoundAffixFlag > 1)
1292 94 : qsort(Conf->CompoundAffixFlags, Conf->nCompoundAffixFlag,
1293 : sizeof(CompoundAffixFlag), cmpcmdflag);
1294 :
1295 94 : if (!tsearch_readline_begin(&trst, filename))
1296 0 : ereport(ERROR,
1297 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1298 : errmsg("could not open affix file \"%s\": %m",
1299 : filename)));
1300 :
1301 3656 : while ((recoded = tsearch_readline(&trst)) != NULL)
1302 : {
1303 : int fields_read;
1304 :
1305 3562 : if (*recoded == '\0' || isspace((unsigned char) *recoded) || t_iseq(recoded, '#'))
1306 1086 : goto nextline;
1307 :
1308 2476 : fields_read = parse_ooaffentry(recoded, type, sflag, find, repl, mask);
1309 :
1310 2476 : if (ptype)
1311 2382 : pfree(ptype);
1312 2476 : ptype = lowerstr_ctx(Conf, type);
1313 :
1314 : /* First try to parse AF parameter (alias compression) */
1315 2476 : if (STRNCMP(ptype, "af") == 0)
1316 : {
1317 : /* First line is the number of aliases */
1318 408 : if (!Conf->useFlagAliases)
1319 : {
1320 34 : Conf->useFlagAliases = true;
1321 34 : naffix = atoi(sflag);
1322 34 : if (naffix <= 0)
1323 0 : ereport(ERROR,
1324 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1325 : errmsg("invalid number of flag vector aliases")));
1326 :
1327 : /* Also reserve place for empty flag set */
1328 34 : naffix++;
1329 :
1330 34 : Conf->AffixData = (const char **) palloc0(naffix * sizeof(char *));
1331 34 : Conf->lenAffixData = Conf->nAffixData = naffix;
1332 :
1333 : /* Add empty flag set into AffixData */
1334 34 : Conf->AffixData[curaffix] = VoidString;
1335 34 : curaffix++;
1336 : }
1337 : /* Other lines are aliases */
1338 : else
1339 : {
1340 374 : if (curaffix < naffix)
1341 : {
1342 374 : Conf->AffixData[curaffix] = cpstrdup(Conf, sflag);
1343 374 : curaffix++;
1344 : }
1345 : else
1346 0 : ereport(ERROR,
1347 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1348 : errmsg("number of aliases exceeds specified number %d",
1349 : naffix - 1)));
1350 : }
1351 408 : goto nextline;
1352 : }
1353 : /* Else try to parse prefixes and suffixes */
1354 2068 : if (fields_read < 4 ||
1355 1672 : (STRNCMP(ptype, "sfx") != 0 && STRNCMP(ptype, "pfx") != 0))
1356 396 : goto nextline;
1357 :
1358 1672 : sflaglen = strlen(sflag);
1359 1672 : if (sflaglen == 0
1360 1672 : || (sflaglen > 1 && Conf->flagMode == FM_CHAR)
1361 1672 : || (sflaglen > 2 && Conf->flagMode == FM_LONG))
1362 0 : goto nextline;
1363 :
1364 : /*--------
1365 : * Affix header. For example:
1366 : * SFX \ N 1
1367 : *--------
1368 : */
1369 1672 : if (fields_read == 4)
1370 : {
1371 836 : isSuffix = (STRNCMP(ptype, "sfx") == 0);
1372 836 : if (t_iseq(find, 'y') || t_iseq(find, 'Y'))
1373 580 : flagflags = FF_CROSSPRODUCT;
1374 : else
1375 256 : flagflags = 0;
1376 : }
1377 : /*--------
1378 : * Affix fields. For example:
1379 : * SFX \ 0 Y/L [^Y]
1380 : *--------
1381 : */
1382 : else
1383 : {
1384 : char *ptr;
1385 836 : int aflg = 0;
1386 :
1387 : /* Get flags after '/' (flags are case sensitive) */
1388 836 : if ((ptr = strchr(repl, '/')) != NULL)
1389 162 : aflg |= getCompoundAffixFlagValue(Conf,
1390 : getAffixFlagSet(Conf,
1391 : ptr + 1));
1392 : /* Get lowercased version of string before '/' */
1393 836 : prepl = lowerstr_ctx(Conf, repl);
1394 836 : if ((ptr = strchr(prepl, '/')) != NULL)
1395 162 : *ptr = '\0';
1396 836 : pfind = lowerstr_ctx(Conf, find);
1397 836 : pmask = lowerstr_ctx(Conf, mask);
1398 836 : if (t_iseq(find, '0'))
1399 704 : *pfind = '\0';
1400 836 : if (t_iseq(repl, '0'))
1401 38 : *prepl = '\0';
1402 :
1403 836 : NIAddAffix(Conf, sflag, flagflags | aflg, pmask, pfind, prepl,
1404 : isSuffix ? FF_SUFFIX : FF_PREFIX);
1405 836 : pfree(prepl);
1406 836 : pfree(pfind);
1407 836 : pfree(pmask);
1408 : }
1409 :
1410 3562 : nextline:
1411 3562 : pfree(recoded);
1412 : }
1413 :
1414 94 : tsearch_readline_end(&trst);
1415 94 : if (ptype)
1416 94 : pfree(ptype);
1417 94 : }
1418 :
1419 : /*
1420 : * import affixes
1421 : *
1422 : * Note caller must already have applied get_tsearch_config_filename
1423 : *
1424 : * This function is responsible for parsing ispell ("old format") affix files.
1425 : * If we realize that the file contains new-format commands, we pass off the
1426 : * work to NIImportOOAffixes(), which will re-read the whole file.
1427 : */
1428 : void
1429 142 : NIImportAffixes(IspellDict *Conf, const char *filename)
1430 : {
1431 142 : char *pstr = NULL;
1432 : char flag[BUFSIZ];
1433 : char mask[BUFSIZ];
1434 : char find[BUFSIZ];
1435 : char repl[BUFSIZ];
1436 : char *s;
1437 142 : bool suffixes = false;
1438 142 : bool prefixes = false;
1439 142 : char flagflags = 0;
1440 : tsearch_readline_state trst;
1441 142 : bool oldformat = false;
1442 142 : char *recoded = NULL;
1443 :
1444 142 : if (!tsearch_readline_begin(&trst, filename))
1445 0 : ereport(ERROR,
1446 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1447 : errmsg("could not open affix file \"%s\": %m",
1448 : filename)));
1449 :
1450 142 : Conf->usecompound = false;
1451 142 : Conf->useFlagAliases = false;
1452 142 : Conf->flagMode = FM_CHAR;
1453 :
1454 1390 : while ((recoded = tsearch_readline(&trst)) != NULL)
1455 : {
1456 1342 : pstr = str_tolower(recoded, strlen(recoded), DEFAULT_COLLATION_OID);
1457 :
1458 : /* Skip comments and empty lines */
1459 1342 : if (*pstr == '#' || *pstr == '\n')
1460 432 : goto nextline;
1461 :
1462 910 : if (STRNCMP(pstr, "compoundwords") == 0)
1463 : {
1464 : /* Find case-insensitive L flag in non-lowercased string */
1465 48 : s = findchar2(recoded, 'l', 'L');
1466 48 : if (s)
1467 : {
1468 240 : while (*s && !isspace((unsigned char) *s))
1469 192 : s += pg_mblen(s);
1470 96 : while (*s && isspace((unsigned char) *s))
1471 48 : s += pg_mblen(s);
1472 :
1473 48 : if (*s && pg_mblen(s) == 1)
1474 : {
1475 48 : addCompoundAffixFlagValue(Conf, s, FF_COMPOUNDFLAG);
1476 48 : Conf->usecompound = true;
1477 : }
1478 48 : oldformat = true;
1479 48 : goto nextline;
1480 : }
1481 : }
1482 862 : if (STRNCMP(pstr, "suffixes") == 0)
1483 : {
1484 48 : suffixes = true;
1485 48 : prefixes = false;
1486 48 : oldformat = true;
1487 48 : goto nextline;
1488 : }
1489 814 : if (STRNCMP(pstr, "prefixes") == 0)
1490 : {
1491 48 : suffixes = false;
1492 48 : prefixes = true;
1493 48 : oldformat = true;
1494 48 : goto nextline;
1495 : }
1496 766 : if (STRNCMP(pstr, "flag") == 0)
1497 : {
1498 408 : s = recoded + 4; /* we need non-lowercased string */
1499 408 : flagflags = 0;
1500 :
1501 816 : while (*s && isspace((unsigned char) *s))
1502 408 : s += pg_mblen(s);
1503 :
1504 408 : if (*s == '*')
1505 : {
1506 240 : flagflags |= FF_CROSSPRODUCT;
1507 240 : s++;
1508 : }
1509 168 : else if (*s == '~')
1510 : {
1511 48 : flagflags |= FF_COMPOUNDONLY;
1512 48 : s++;
1513 : }
1514 :
1515 408 : if (*s == '\\')
1516 48 : s++;
1517 :
1518 : /*
1519 : * An old-format flag is a single ASCII character; we expect it to
1520 : * be followed by EOL, whitespace, or ':'. Otherwise this is a
1521 : * new-format flag command.
1522 : */
1523 408 : if (*s && pg_mblen(s) == 1)
1524 : {
1525 408 : COPYCHAR(flag, s);
1526 408 : flag[1] = '\0';
1527 :
1528 408 : s++;
1529 408 : if (*s == '\0' || *s == '#' || *s == '\n' || *s == ':' ||
1530 72 : isspace((unsigned char) *s))
1531 : {
1532 336 : oldformat = true;
1533 336 : goto nextline;
1534 : }
1535 : }
1536 72 : goto isnewformat;
1537 : }
1538 358 : if (STRNCMP(recoded, "COMPOUNDFLAG") == 0 ||
1539 336 : STRNCMP(recoded, "COMPOUNDMIN") == 0 ||
1540 336 : STRNCMP(recoded, "PFX") == 0 ||
1541 336 : STRNCMP(recoded, "SFX") == 0)
1542 22 : goto isnewformat;
1543 :
1544 336 : if ((!suffixes) && (!prefixes))
1545 0 : goto nextline;
1546 :
1547 336 : if (!parse_affentry(pstr, mask, find, repl))
1548 0 : goto nextline;
1549 :
1550 336 : NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX);
1551 :
1552 1248 : nextline:
1553 1248 : pfree(recoded);
1554 1248 : pfree(pstr);
1555 : }
1556 48 : tsearch_readline_end(&trst);
1557 48 : return;
1558 :
1559 94 : isnewformat:
1560 94 : if (oldformat)
1561 0 : ereport(ERROR,
1562 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1563 : errmsg("affix file contains both old-style and new-style commands")));
1564 94 : tsearch_readline_end(&trst);
1565 :
1566 94 : NIImportOOAffixes(Conf, filename);
1567 : }
1568 :
1569 : /*
1570 : * Merges two affix flag sets and stores a new affix flag set into
1571 : * Conf->AffixData.
1572 : *
1573 : * Returns index of a new affix flag set.
1574 : */
1575 : static int
1576 74 : MergeAffix(IspellDict *Conf, int a1, int a2)
1577 : {
1578 : const char **ptr;
1579 :
1580 : Assert(a1 < Conf->nAffixData && a2 < Conf->nAffixData);
1581 :
1582 : /* Do not merge affix flags if one of affix flags is empty */
1583 74 : if (*Conf->AffixData[a1] == '\0')
1584 0 : return a2;
1585 74 : else if (*Conf->AffixData[a2] == '\0')
1586 0 : return a1;
1587 :
1588 : /* Double the size of AffixData if there's not enough space */
1589 74 : if (Conf->nAffixData + 1 >= Conf->lenAffixData)
1590 : {
1591 74 : Conf->lenAffixData *= 2;
1592 74 : Conf->AffixData = (const char **) repalloc(Conf->AffixData,
1593 74 : sizeof(char *) * Conf->lenAffixData);
1594 : }
1595 :
1596 74 : ptr = Conf->AffixData + Conf->nAffixData;
1597 74 : if (Conf->flagMode == FM_NUM)
1598 : {
1599 32 : char *p = cpalloc(strlen(Conf->AffixData[a1]) +
1600 : strlen(Conf->AffixData[a2]) +
1601 : 1 /* comma */ + 1 /* \0 */ );
1602 :
1603 32 : sprintf(p, "%s,%s", Conf->AffixData[a1], Conf->AffixData[a2]);
1604 32 : *ptr = p;
1605 : }
1606 : else
1607 : {
1608 42 : char *p = cpalloc(strlen(Conf->AffixData[a1]) +
1609 : strlen(Conf->AffixData[a2]) +
1610 : 1 /* \0 */ );
1611 :
1612 42 : sprintf(p, "%s%s", Conf->AffixData[a1], Conf->AffixData[a2]);
1613 42 : *ptr = p;
1614 : }
1615 74 : ptr++;
1616 74 : *ptr = NULL;
1617 74 : Conf->nAffixData++;
1618 :
1619 74 : return Conf->nAffixData - 1;
1620 : }
1621 :
1622 : /*
1623 : * Returns a set of affix parameters which correspondence to the set of affix
1624 : * flags with the given index.
1625 : */
1626 : static uint32
1627 1230 : makeCompoundFlags(IspellDict *Conf, int affix)
1628 : {
1629 : Assert(affix < Conf->nAffixData);
1630 :
1631 1230 : return (getCompoundAffixFlagValue(Conf, Conf->AffixData[affix]) &
1632 : FF_COMPOUNDFLAGMASK);
1633 : }
1634 :
1635 : /*
1636 : * Makes a prefix tree for the given level.
1637 : *
1638 : * Conf: current dictionary.
1639 : * low: lower index of the Conf->Spell array.
1640 : * high: upper index of the Conf->Spell array.
1641 : * level: current prefix tree level.
1642 : */
1643 : static SPNode *
1644 4908 : mkSPNode(IspellDict *Conf, int low, int high, int level)
1645 : {
1646 : int i;
1647 4908 : int nchar = 0;
1648 4908 : char lastchar = '\0';
1649 : SPNode *rs;
1650 : SPNodeData *data;
1651 4908 : int lownew = low;
1652 :
1653 16126 : for (i = low; i < high; i++)
1654 11218 : if (Conf->Spell[i]->p.d.len > level && lastchar != Conf->Spell[i]->word[level])
1655 : {
1656 4808 : nchar++;
1657 4808 : lastchar = Conf->Spell[i]->word[level];
1658 : }
1659 :
1660 4908 : if (!nchar)
1661 704 : return NULL;
1662 :
1663 4204 : rs = (SPNode *) cpalloc0(SPNHDRSZ + nchar * sizeof(SPNodeData));
1664 4204 : rs->length = nchar;
1665 4204 : data = rs->data;
1666 :
1667 4204 : lastchar = '\0';
1668 14224 : for (i = low; i < high; i++)
1669 10038 : if (Conf->Spell[i]->p.d.len > level)
1670 : {
1671 7212 : if (lastchar != Conf->Spell[i]->word[level])
1672 : {
1673 4796 : if (lastchar)
1674 : {
1675 : /* Next level of the prefix tree */
1676 592 : data->node = mkSPNode(Conf, lownew, i, level + 1);
1677 580 : lownew = i;
1678 580 : data++;
1679 : }
1680 4784 : lastchar = Conf->Spell[i]->word[level];
1681 : }
1682 7200 : data->val = ((uint8 *) (Conf->Spell[i]->word))[level];
1683 7200 : if (Conf->Spell[i]->p.d.len == level + 1)
1684 : {
1685 1156 : bool clearCompoundOnly = false;
1686 :
1687 1156 : if (data->isword && data->affix != Conf->Spell[i]->p.d.affix)
1688 : {
1689 : /*
1690 : * MergeAffix called a few times. If one of word is
1691 : * allowed to be in compound word and another isn't, then
1692 : * clear FF_COMPOUNDONLY flag.
1693 : */
1694 :
1695 148 : clearCompoundOnly = (FF_COMPOUNDONLY & data->compoundflag
1696 74 : & makeCompoundFlags(Conf, Conf->Spell[i]->p.d.affix))
1697 : ? false : true;
1698 74 : data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i]->p.d.affix);
1699 : }
1700 : else
1701 1082 : data->affix = Conf->Spell[i]->p.d.affix;
1702 1156 : data->isword = 1;
1703 :
1704 1156 : data->compoundflag = makeCompoundFlags(Conf, data->affix);
1705 :
1706 1150 : if ((data->compoundflag & FF_COMPOUNDONLY) &&
1707 0 : (data->compoundflag & FF_COMPOUNDFLAG) == 0)
1708 0 : data->compoundflag |= FF_COMPOUNDFLAG;
1709 :
1710 1150 : if (clearCompoundOnly)
1711 74 : data->compoundflag &= ~FF_COMPOUNDONLY;
1712 : }
1713 : }
1714 :
1715 : /* Next level of the prefix tree */
1716 4186 : data->node = mkSPNode(Conf, lownew, high, level + 1);
1717 :
1718 4180 : return rs;
1719 : }
1720 :
1721 : /*
1722 : * Builds the Conf->Dictionary tree and AffixData from the imported dictionary
1723 : * and affixes.
1724 : */
1725 : void
1726 142 : NISortDictionary(IspellDict *Conf)
1727 : {
1728 : int i;
1729 : int naffix;
1730 : int curaffix;
1731 :
1732 : /* compress affixes */
1733 :
1734 : /*
1735 : * If we use flag aliases then we need to use Conf->AffixData filled in
1736 : * the NIImportOOAffixes().
1737 : */
1738 142 : if (Conf->useFlagAliases)
1739 : {
1740 276 : for (i = 0; i < Conf->nspell; i++)
1741 : {
1742 : char *end;
1743 :
1744 254 : if (*Conf->Spell[i]->p.flag != '\0')
1745 : {
1746 232 : errno = 0;
1747 232 : curaffix = strtol(Conf->Spell[i]->p.flag, &end, 10);
1748 232 : if (Conf->Spell[i]->p.flag == end || errno == ERANGE)
1749 6 : ereport(ERROR,
1750 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1751 : errmsg("invalid affix alias \"%s\"",
1752 : Conf->Spell[i]->p.flag)));
1753 226 : if (curaffix < 0 || curaffix >= Conf->nAffixData)
1754 6 : ereport(ERROR,
1755 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1756 : errmsg("invalid affix alias \"%s\"",
1757 : Conf->Spell[i]->p.flag)));
1758 220 : if (*end != '\0' && !isdigit((unsigned char) *end) && !isspace((unsigned char) *end))
1759 0 : ereport(ERROR,
1760 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1761 : errmsg("invalid affix alias \"%s\"",
1762 : Conf->Spell[i]->p.flag)));
1763 : }
1764 : else
1765 : {
1766 : /*
1767 : * If Conf->Spell[i]->p.flag is empty, then get empty value of
1768 : * Conf->AffixData (0 index).
1769 : */
1770 22 : curaffix = 0;
1771 : }
1772 :
1773 242 : Conf->Spell[i]->p.d.affix = curaffix;
1774 242 : Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
1775 : }
1776 : }
1777 : /* Otherwise fill Conf->AffixData here */
1778 : else
1779 : {
1780 : /* Count the number of different flags used in the dictionary */
1781 108 : qsort(Conf->Spell, Conf->nspell, sizeof(SPELL *),
1782 : cmpspellaffix);
1783 :
1784 108 : naffix = 0;
1785 1064 : for (i = 0; i < Conf->nspell; i++)
1786 : {
1787 956 : if (i == 0 ||
1788 848 : strcmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag) != 0)
1789 848 : naffix++;
1790 : }
1791 :
1792 : /*
1793 : * Fill in Conf->AffixData with the affixes that were used in the
1794 : * dictionary. Replace textual flag-field of Conf->Spell entries with
1795 : * indexes into Conf->AffixData array.
1796 : */
1797 108 : Conf->AffixData = (const char **) palloc0(naffix * sizeof(const char *));
1798 :
1799 108 : curaffix = -1;
1800 1064 : for (i = 0; i < Conf->nspell; i++)
1801 : {
1802 956 : if (i == 0 ||
1803 848 : strcmp(Conf->Spell[i]->p.flag, Conf->AffixData[curaffix]) != 0)
1804 : {
1805 848 : curaffix++;
1806 : Assert(curaffix < naffix);
1807 848 : Conf->AffixData[curaffix] = cpstrdup(Conf,
1808 848 : Conf->Spell[i]->p.flag);
1809 : }
1810 :
1811 956 : Conf->Spell[i]->p.d.affix = curaffix;
1812 956 : Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
1813 : }
1814 :
1815 108 : Conf->lenAffixData = Conf->nAffixData = naffix;
1816 : }
1817 :
1818 : /* Start build a prefix tree */
1819 130 : qsort(Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell);
1820 130 : Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
1821 124 : }
1822 :
1823 : /*
1824 : * Makes a prefix tree for the given level using the repl string of an affix
1825 : * rule. Affixes with empty replace string do not include in the prefix tree.
1826 : * This affixes are included by mkVoidAffix().
1827 : *
1828 : * Conf: current dictionary.
1829 : * low: lower index of the Conf->Affix array.
1830 : * high: upper index of the Conf->Affix array.
1831 : * level: current prefix tree level.
1832 : * type: FF_SUFFIX or FF_PREFIX.
1833 : */
1834 : static AffixNode *
1835 2092 : mkANode(IspellDict *Conf, int low, int high, int level, int type)
1836 : {
1837 : int i;
1838 2092 : int nchar = 0;
1839 2092 : uint8 lastchar = '\0';
1840 : AffixNode *rs;
1841 : AffixNodeData *data;
1842 2092 : int lownew = low;
1843 : int naff;
1844 : AFFIX **aff;
1845 :
1846 5626 : for (i = low; i < high; i++)
1847 3534 : if (Conf->Affix[i].replen > level && lastchar != GETCHAR(Conf->Affix + i, level, type))
1848 : {
1849 1844 : nchar++;
1850 1844 : lastchar = GETCHAR(Conf->Affix + i, level, type);
1851 : }
1852 :
1853 2092 : if (!nchar)
1854 798 : return NULL;
1855 :
1856 1294 : aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1));
1857 1294 : naff = 0;
1858 :
1859 1294 : rs = (AffixNode *) cpalloc0(ANHRDSZ + nchar * sizeof(AffixNodeData));
1860 1294 : rs->length = nchar;
1861 1294 : data = rs->data;
1862 :
1863 1294 : lastchar = '\0';
1864 3830 : for (i = low; i < high; i++)
1865 2536 : if (Conf->Affix[i].replen > level)
1866 : {
1867 2136 : if (lastchar != GETCHAR(Conf->Affix + i, level, type))
1868 : {
1869 1844 : if (lastchar)
1870 : {
1871 : /* Next level of the prefix tree */
1872 550 : data->node = mkANode(Conf, lownew, i, level + 1, type);
1873 550 : if (naff)
1874 : {
1875 124 : data->naff = naff;
1876 124 : data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
1877 124 : memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
1878 124 : naff = 0;
1879 : }
1880 550 : data++;
1881 550 : lownew = i;
1882 : }
1883 1844 : lastchar = GETCHAR(Conf->Affix + i, level, type);
1884 : }
1885 2136 : data->val = GETCHAR(Conf->Affix + i, level, type);
1886 2136 : if (Conf->Affix[i].replen == level + 1)
1887 : { /* affix stopped */
1888 966 : aff[naff++] = Conf->Affix + i;
1889 : }
1890 : }
1891 :
1892 : /* Next level of the prefix tree */
1893 1294 : data->node = mkANode(Conf, lownew, high, level + 1, type);
1894 1294 : if (naff)
1895 : {
1896 798 : data->naff = naff;
1897 798 : data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
1898 798 : memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
1899 798 : naff = 0;
1900 : }
1901 :
1902 1294 : pfree(aff);
1903 :
1904 1294 : return rs;
1905 : }
1906 :
1907 : /*
1908 : * Makes the root void node in the prefix tree. The root void node is created
1909 : * for affixes which have empty replace string ("repl" field).
1910 : */
1911 : static void
1912 248 : mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix)
1913 : {
1914 : int i,
1915 248 : cnt = 0;
1916 248 : int start = (issuffix) ? startsuffix : 0;
1917 248 : int end = (issuffix) ? Conf->naffixes : startsuffix;
1918 248 : AffixNode *Affix = (AffixNode *) palloc0(ANHRDSZ + sizeof(AffixNodeData));
1919 :
1920 248 : Affix->length = 1;
1921 248 : Affix->isvoid = 1;
1922 :
1923 248 : if (issuffix)
1924 : {
1925 124 : Affix->data->node = Conf->Suffix;
1926 124 : Conf->Suffix = Affix;
1927 : }
1928 : else
1929 : {
1930 124 : Affix->data->node = Conf->Prefix;
1931 124 : Conf->Prefix = Affix;
1932 : }
1933 :
1934 : /* Count affixes with empty replace string */
1935 1246 : for (i = start; i < end; i++)
1936 998 : if (Conf->Affix[i].replen == 0)
1937 32 : cnt++;
1938 :
1939 : /* There is not affixes with empty replace string */
1940 248 : if (cnt == 0)
1941 216 : return;
1942 :
1943 32 : Affix->data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * cnt);
1944 32 : Affix->data->naff = (uint32) cnt;
1945 :
1946 32 : cnt = 0;
1947 256 : for (i = start; i < end; i++)
1948 224 : if (Conf->Affix[i].replen == 0)
1949 : {
1950 32 : Affix->data->aff[cnt] = Conf->Affix + i;
1951 32 : cnt++;
1952 : }
1953 : }
1954 :
1955 : /*
1956 : * Checks if the affixflag is used by dictionary. Conf->AffixData does not
1957 : * contain affixflag if this flag is not used actually by the .dict file.
1958 : *
1959 : * Conf: current dictionary.
1960 : * affixflag: affix flag.
1961 : *
1962 : * Returns true if the Conf->AffixData array contains affixflag, otherwise
1963 : * returns false.
1964 : */
1965 : static bool
1966 168 : isAffixInUse(IspellDict *Conf, const char *affixflag)
1967 : {
1968 : int i;
1969 :
1970 1250 : for (i = 0; i < Conf->nAffixData; i++)
1971 1220 : if (IsAffixFlagInUse(Conf, i, affixflag))
1972 138 : return true;
1973 :
1974 30 : return false;
1975 : }
1976 :
1977 : /*
1978 : * Builds Conf->Prefix and Conf->Suffix trees from the imported affixes.
1979 : */
1980 : void
1981 124 : NISortAffixes(IspellDict *Conf)
1982 : {
1983 : AFFIX *Affix;
1984 : size_t i;
1985 : CMPDAffix *ptr;
1986 124 : int firstsuffix = Conf->naffixes;
1987 :
1988 124 : if (Conf->naffixes == 0)
1989 0 : return;
1990 :
1991 : /* Store compound affixes in the Conf->CompoundAffix array */
1992 124 : if (Conf->naffixes > 1)
1993 124 : qsort(Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix);
1994 124 : Conf->CompoundAffix = ptr = (CMPDAffix *) palloc(sizeof(CMPDAffix) * Conf->naffixes);
1995 124 : ptr->affix = NULL;
1996 :
1997 1122 : for (i = 0; i < Conf->naffixes; i++)
1998 : {
1999 998 : Affix = &(((AFFIX *) Conf->Affix)[i]);
2000 998 : if (Affix->type == FF_SUFFIX && i < firstsuffix)
2001 124 : firstsuffix = i;
2002 :
2003 1166 : if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 &&
2004 168 : isAffixInUse(Conf, Affix->flag))
2005 : {
2006 138 : bool issuffix = (Affix->type == FF_SUFFIX);
2007 :
2008 138 : if (ptr == Conf->CompoundAffix ||
2009 88 : issuffix != (ptr - 1)->issuffix ||
2010 44 : strbncmp((const unsigned char *) (ptr - 1)->affix,
2011 44 : (const unsigned char *) Affix->repl,
2012 44 : (ptr - 1)->len))
2013 : {
2014 : /* leave only unique and minimal suffixes */
2015 116 : ptr->affix = Affix->repl;
2016 116 : ptr->len = Affix->replen;
2017 116 : ptr->issuffix = issuffix;
2018 116 : ptr++;
2019 : }
2020 : }
2021 : }
2022 124 : ptr->affix = NULL;
2023 124 : Conf->CompoundAffix = (CMPDAffix *) repalloc(Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr - Conf->CompoundAffix + 1));
2024 :
2025 : /* Start build a prefix tree */
2026 124 : Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX);
2027 124 : Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX);
2028 124 : mkVoidAffix(Conf, true, firstsuffix);
2029 124 : mkVoidAffix(Conf, false, firstsuffix);
2030 : }
2031 :
2032 : static AffixNodeData *
2033 4620 : FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type)
2034 : {
2035 : AffixNodeData *StopLow,
2036 : *StopHigh,
2037 : *StopMiddle;
2038 : uint8 symbol;
2039 :
2040 4620 : if (node->isvoid)
2041 : { /* search void affixes */
2042 4020 : if (node->data->naff)
2043 342 : return node->data;
2044 3678 : node = node->data->node;
2045 : }
2046 :
2047 5382 : while (node && *level < wrdlen)
2048 : {
2049 5358 : StopLow = node->data;
2050 5358 : StopHigh = node->data + node->length;
2051 11826 : while (StopLow < StopHigh)
2052 : {
2053 8874 : StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
2054 8874 : symbol = GETWCHAR(word, wrdlen, *level, type);
2055 :
2056 8874 : if (StopMiddle->val == symbol)
2057 : {
2058 2406 : (*level)++;
2059 2406 : if (StopMiddle->naff)
2060 1302 : return StopMiddle;
2061 1104 : node = StopMiddle->node;
2062 1104 : break;
2063 : }
2064 6468 : else if (StopMiddle->val < symbol)
2065 1608 : StopLow = StopMiddle + 1;
2066 : else
2067 4860 : StopHigh = StopMiddle;
2068 : }
2069 4056 : if (StopLow >= StopHigh)
2070 2952 : break;
2071 : }
2072 2976 : return NULL;
2073 : }
2074 :
2075 : static char *
2076 1836 : CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *newword, int *baselen)
2077 : {
2078 : /*
2079 : * Check compound allow flags
2080 : */
2081 :
2082 1836 : if (flagflags == 0)
2083 : {
2084 1266 : if (Affix->flagflags & FF_COMPOUNDONLY)
2085 132 : return NULL;
2086 : }
2087 570 : else if (flagflags & FF_COMPOUNDBEGIN)
2088 : {
2089 0 : if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
2090 0 : return NULL;
2091 0 : if ((Affix->flagflags & FF_COMPOUNDBEGIN) == 0)
2092 0 : if (Affix->type == FF_SUFFIX)
2093 0 : return NULL;
2094 : }
2095 570 : else if (flagflags & FF_COMPOUNDMIDDLE)
2096 : {
2097 408 : if ((Affix->flagflags & FF_COMPOUNDMIDDLE) == 0 ||
2098 228 : (Affix->flagflags & FF_COMPOUNDFORBIDFLAG))
2099 180 : return NULL;
2100 : }
2101 162 : else if (flagflags & FF_COMPOUNDLAST)
2102 : {
2103 162 : if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
2104 0 : return NULL;
2105 162 : if ((Affix->flagflags & FF_COMPOUNDLAST) == 0)
2106 150 : if (Affix->type == FF_PREFIX)
2107 0 : return NULL;
2108 : }
2109 :
2110 : /*
2111 : * make replace pattern of affix
2112 : */
2113 1524 : if (Affix->type == FF_SUFFIX)
2114 : {
2115 1044 : strcpy(newword, word);
2116 1044 : strcpy(newword + len - Affix->replen, Affix->find);
2117 1044 : if (baselen) /* store length of non-changed part of word */
2118 1044 : *baselen = len - Affix->replen;
2119 : }
2120 : else
2121 : {
2122 : /*
2123 : * if prefix is an all non-changed part's length then all word
2124 : * contains only prefix and suffix, so out
2125 : */
2126 480 : if (baselen && *baselen + strlen(Affix->find) <= Affix->replen)
2127 0 : return NULL;
2128 480 : strcpy(newword, Affix->find);
2129 480 : strcat(newword, word + Affix->replen);
2130 : }
2131 :
2132 : /*
2133 : * check resulting word
2134 : */
2135 1524 : if (Affix->issimple)
2136 480 : return newword;
2137 1044 : else if (Affix->isregis)
2138 : {
2139 708 : if (RS_execute(&(Affix->reg.regis), newword))
2140 672 : return newword;
2141 : }
2142 : else
2143 : {
2144 : pg_wchar *data;
2145 : size_t data_len;
2146 : int newword_len;
2147 :
2148 : /* Convert data string to wide characters */
2149 336 : newword_len = strlen(newword);
2150 336 : data = (pg_wchar *) palloc((newword_len + 1) * sizeof(pg_wchar));
2151 336 : data_len = pg_mb2wchar_with_len(newword, data, newword_len);
2152 :
2153 336 : if (pg_regexec(Affix->reg.pregex, data, data_len,
2154 : 0, NULL, 0, NULL, 0) == REG_OKAY)
2155 : {
2156 336 : pfree(data);
2157 336 : return newword;
2158 : }
2159 0 : pfree(data);
2160 : }
2161 :
2162 36 : return NULL;
2163 : }
2164 :
2165 : static int
2166 540 : addToResult(char **forms, char **cur, char *word)
2167 : {
2168 540 : if (cur - forms >= MAX_NORM - 1)
2169 0 : return 0;
2170 540 : if (forms == cur || strcmp(word, *(cur - 1)) != 0)
2171 : {
2172 540 : *cur = pstrdup(word);
2173 540 : *(cur + 1) = NULL;
2174 540 : return 1;
2175 : }
2176 :
2177 0 : return 0;
2178 : }
2179 :
2180 : static char **
2181 1506 : NormalizeSubWord(IspellDict *Conf, const char *word, int flag)
2182 : {
2183 1506 : AffixNodeData *suffix = NULL,
2184 1506 : *prefix = NULL;
2185 1506 : int slevel = 0,
2186 1506 : plevel = 0;
2187 1506 : int wrdlen = strlen(word),
2188 : swrdlen;
2189 : char **forms;
2190 : char **cur;
2191 1506 : char newword[2 * MAXNORMLEN] = "";
2192 1506 : char pnewword[2 * MAXNORMLEN] = "";
2193 1506 : AffixNode *snode = Conf->Suffix,
2194 : *pnode;
2195 : int i,
2196 : j;
2197 :
2198 1506 : if (wrdlen > MAXNORMLEN)
2199 0 : return NULL;
2200 1506 : cur = forms = (char **) palloc(MAX_NORM * sizeof(char *));
2201 1506 : *cur = NULL;
2202 :
2203 :
2204 : /* Check that the word itself is normal form */
2205 1506 : if (FindWord(Conf, word, VoidString, flag))
2206 : {
2207 468 : *cur = pstrdup(word);
2208 468 : cur++;
2209 468 : *cur = NULL;
2210 : }
2211 :
2212 : /* Find all other NORMAL forms of the 'word' (check only prefix) */
2213 1506 : pnode = Conf->Prefix;
2214 1506 : plevel = 0;
2215 1722 : while (pnode)
2216 : {
2217 1506 : prefix = FindAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX);
2218 1506 : if (!prefix)
2219 1290 : break;
2220 432 : for (j = 0; j < prefix->naff; j++)
2221 : {
2222 216 : if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword, NULL))
2223 : {
2224 : /* prefix success */
2225 192 : if (FindWord(Conf, newword, prefix->aff[j]->flag, flag))
2226 48 : cur += addToResult(forms, cur, newword);
2227 : }
2228 : }
2229 216 : pnode = prefix->node;
2230 : }
2231 :
2232 : /*
2233 : * Find all other NORMAL forms of the 'word' (check suffix and then
2234 : * prefix)
2235 : */
2236 2598 : while (snode)
2237 : {
2238 2106 : int baselen = 0;
2239 :
2240 : /* find possible suffix */
2241 2106 : suffix = FindAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX);
2242 2106 : if (!suffix)
2243 1014 : break;
2244 : /* foreach suffix check affix */
2245 2376 : for (i = 0; i < suffix->naff; i++)
2246 : {
2247 1284 : if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword, &baselen))
2248 : {
2249 : /* suffix success */
2250 1008 : if (FindWord(Conf, newword, suffix->aff[i]->flag, flag))
2251 276 : cur += addToResult(forms, cur, newword);
2252 :
2253 : /* now we will look changed word with prefixes */
2254 1008 : pnode = Conf->Prefix;
2255 1008 : plevel = 0;
2256 1008 : swrdlen = strlen(newword);
2257 1344 : while (pnode)
2258 : {
2259 1008 : prefix = FindAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX);
2260 1008 : if (!prefix)
2261 672 : break;
2262 672 : for (j = 0; j < prefix->naff; j++)
2263 : {
2264 336 : if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword, &baselen))
2265 : {
2266 : /* prefix success */
2267 576 : const char *ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ?
2268 288 : VoidString : prefix->aff[j]->flag;
2269 :
2270 288 : if (FindWord(Conf, pnewword, ff, flag))
2271 216 : cur += addToResult(forms, cur, pnewword);
2272 : }
2273 : }
2274 336 : pnode = prefix->node;
2275 : }
2276 : }
2277 : }
2278 :
2279 1092 : snode = suffix->node;
2280 : }
2281 :
2282 1506 : if (cur == forms)
2283 : {
2284 666 : pfree(forms);
2285 666 : return NULL;
2286 : }
2287 840 : return forms;
2288 : }
2289 :
2290 : typedef struct SplitVar
2291 : {
2292 : int nstem;
2293 : int lenstem;
2294 : char **stem;
2295 : struct SplitVar *next;
2296 : } SplitVar;
2297 :
2298 : static int
2299 6060 : CheckCompoundAffixes(CMPDAffix **ptr, const char *word, int len, bool CheckInPlace)
2300 : {
2301 : bool issuffix;
2302 :
2303 : /* in case CompoundAffix is null: */
2304 6060 : if (*ptr == NULL)
2305 0 : return -1;
2306 :
2307 6060 : if (CheckInPlace)
2308 : {
2309 11568 : while ((*ptr)->affix)
2310 : {
2311 6444 : if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0)
2312 : {
2313 60 : len = (*ptr)->len;
2314 60 : issuffix = (*ptr)->issuffix;
2315 60 : (*ptr)++;
2316 60 : return (issuffix) ? len : 0;
2317 : }
2318 6384 : (*ptr)++;
2319 : }
2320 : }
2321 : else
2322 : {
2323 : char *affbegin;
2324 :
2325 1692 : while ((*ptr)->affix)
2326 : {
2327 942 : if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL)
2328 : {
2329 126 : len = (*ptr)->len + (affbegin - word);
2330 126 : issuffix = (*ptr)->issuffix;
2331 126 : (*ptr)++;
2332 126 : return (issuffix) ? len : 0;
2333 : }
2334 816 : (*ptr)++;
2335 : }
2336 : }
2337 5874 : return -1;
2338 : }
2339 :
2340 : static SplitVar *
2341 1410 : CopyVar(SplitVar *s, int makedup)
2342 : {
2343 1410 : SplitVar *v = (SplitVar *) palloc(sizeof(SplitVar));
2344 :
2345 1410 : v->next = NULL;
2346 1410 : if (s)
2347 : {
2348 : int i;
2349 :
2350 660 : v->lenstem = s->lenstem;
2351 660 : v->stem = (char **) palloc(sizeof(char *) * v->lenstem);
2352 660 : v->nstem = s->nstem;
2353 1002 : for (i = 0; i < s->nstem; i++)
2354 342 : v->stem[i] = (makedup) ? pstrdup(s->stem[i]) : s->stem[i];
2355 : }
2356 : else
2357 : {
2358 750 : v->lenstem = 16;
2359 750 : v->stem = (char **) palloc(sizeof(char *) * v->lenstem);
2360 750 : v->nstem = 0;
2361 : }
2362 1410 : return v;
2363 : }
2364 :
2365 : static void
2366 1890 : AddStem(SplitVar *v, char *word)
2367 : {
2368 1890 : if (v->nstem >= v->lenstem)
2369 : {
2370 0 : v->lenstem *= 2;
2371 0 : v->stem = (char **) repalloc(v->stem, sizeof(char *) * v->lenstem);
2372 : }
2373 :
2374 1890 : v->stem[v->nstem] = word;
2375 1890 : v->nstem++;
2376 1890 : }
2377 :
2378 : static SplitVar *
2379 1320 : SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, const char *word, int wordlen, int startpos, int minpos)
2380 : {
2381 1320 : SplitVar *var = NULL;
2382 : SPNodeData *StopLow,
2383 : *StopHigh,
2384 1320 : *StopMiddle = NULL;
2385 1320 : SPNode *node = (snode) ? snode : Conf->Dictionary;
2386 1320 : int level = (snode) ? minpos : startpos; /* recursive
2387 : * minpos==level */
2388 : int lenaff;
2389 : CMPDAffix *caff;
2390 : char *notprobed;
2391 1320 : int compoundflag = 0;
2392 :
2393 : /* since this function recurses, it could be driven to stack overflow */
2394 1320 : check_stack_depth();
2395 :
2396 1320 : notprobed = (char *) palloc(wordlen);
2397 1320 : memset(notprobed, 1, wordlen);
2398 1320 : var = CopyVar(orig, 1);
2399 :
2400 7452 : while (level < wordlen)
2401 : {
2402 : /* find word with epenthetic or/and compound affix */
2403 7194 : caff = Conf->CompoundAffix;
2404 7380 : while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) >= 0)
2405 : {
2406 : /*
2407 : * there is one of compound affixes, so check word for existings
2408 : */
2409 : char buf[MAXNORMLEN];
2410 : char **subres;
2411 :
2412 186 : lenaff = level - startpos + lenaff;
2413 :
2414 186 : if (!notprobed[startpos + lenaff - 1])
2415 0 : continue;
2416 :
2417 186 : if (level + lenaff - 1 <= minpos)
2418 0 : continue;
2419 :
2420 186 : if (lenaff >= MAXNORMLEN)
2421 0 : continue; /* skip too big value */
2422 186 : if (lenaff > 0)
2423 186 : memcpy(buf, word + startpos, lenaff);
2424 186 : buf[lenaff] = '\0';
2425 :
2426 186 : if (level == 0)
2427 0 : compoundflag = FF_COMPOUNDBEGIN;
2428 186 : else if (level == wordlen - 1)
2429 0 : compoundflag = FF_COMPOUNDLAST;
2430 : else
2431 186 : compoundflag = FF_COMPOUNDMIDDLE;
2432 186 : subres = NormalizeSubWord(Conf, buf, compoundflag);
2433 186 : if (subres)
2434 : {
2435 : /* Yes, it was a word from dictionary */
2436 90 : SplitVar *new = CopyVar(var, 0);
2437 90 : SplitVar *ptr = var;
2438 90 : char **sptr = subres;
2439 :
2440 90 : notprobed[startpos + lenaff - 1] = 0;
2441 :
2442 180 : while (*sptr)
2443 : {
2444 90 : AddStem(new, *sptr);
2445 90 : sptr++;
2446 : }
2447 90 : pfree(subres);
2448 :
2449 90 : while (ptr->next)
2450 0 : ptr = ptr->next;
2451 90 : ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos + lenaff, startpos + lenaff);
2452 :
2453 90 : pfree(new->stem);
2454 90 : pfree(new);
2455 : }
2456 : }
2457 :
2458 7194 : if (!node)
2459 750 : break;
2460 :
2461 6444 : StopLow = node->data;
2462 6444 : StopHigh = node->data + node->length;
2463 8694 : while (StopLow < StopHigh)
2464 : {
2465 8064 : StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
2466 8064 : if (StopMiddle->val == ((uint8 *) (word))[level])
2467 5814 : break;
2468 2250 : else if (StopMiddle->val < ((uint8 *) (word))[level])
2469 978 : StopLow = StopMiddle + 1;
2470 : else
2471 1272 : StopHigh = StopMiddle;
2472 : }
2473 :
2474 6444 : if (StopLow < StopHigh)
2475 : {
2476 5814 : if (startpos == 0)
2477 3270 : compoundflag = FF_COMPOUNDBEGIN;
2478 2544 : else if (level == wordlen - 1)
2479 288 : compoundflag = FF_COMPOUNDLAST;
2480 : else
2481 2256 : compoundflag = FF_COMPOUNDMIDDLE;
2482 :
2483 : /* find infinitive */
2484 5814 : if (StopMiddle->isword &&
2485 1536 : (StopMiddle->compoundflag & compoundflag) &&
2486 1272 : notprobed[level])
2487 : {
2488 : /* ok, we found full compoundallowed word */
2489 1272 : if (level > minpos)
2490 : {
2491 : /* and its length more than minimal */
2492 792 : if (wordlen == level + 1)
2493 : {
2494 : /* well, it was last word */
2495 312 : AddStem(var, pnstrdup(word + startpos, wordlen - startpos));
2496 312 : pfree(notprobed);
2497 312 : return var;
2498 : }
2499 : else
2500 : {
2501 : /* then we will search more big word at the same point */
2502 480 : SplitVar *ptr = var;
2503 :
2504 744 : while (ptr->next)
2505 264 : ptr = ptr->next;
2506 480 : ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
2507 : /* we can find next word */
2508 480 : level++;
2509 480 : AddStem(var, pnstrdup(word + startpos, level - startpos));
2510 480 : node = Conf->Dictionary;
2511 480 : startpos = level;
2512 480 : continue;
2513 : }
2514 : }
2515 : }
2516 5022 : node = StopMiddle->node;
2517 : }
2518 : else
2519 630 : node = NULL;
2520 5652 : level++;
2521 : }
2522 :
2523 1008 : AddStem(var, pnstrdup(word + startpos, wordlen - startpos));
2524 1008 : pfree(notprobed);
2525 1008 : return var;
2526 : }
2527 :
2528 : static void
2529 1314 : addNorm(TSLexeme **lres, TSLexeme **lcur, char *word, int flags, uint16 NVariant)
2530 : {
2531 1314 : if (*lres == NULL)
2532 606 : *lcur = *lres = (TSLexeme *) palloc(MAX_NORM * sizeof(TSLexeme));
2533 :
2534 1314 : if (*lcur - *lres < MAX_NORM - 1)
2535 : {
2536 1314 : (*lcur)->lexeme = word;
2537 1314 : (*lcur)->flags = flags;
2538 1314 : (*lcur)->nvariant = NVariant;
2539 1314 : (*lcur)++;
2540 1314 : (*lcur)->lexeme = NULL;
2541 : }
2542 1314 : }
2543 :
2544 : TSLexeme *
2545 750 : NINormalizeWord(IspellDict *Conf, const char *word)
2546 : {
2547 : char **res;
2548 750 : TSLexeme *lcur = NULL,
2549 750 : *lres = NULL;
2550 750 : uint16 NVariant = 1;
2551 :
2552 750 : res = NormalizeSubWord(Conf, word, 0);
2553 :
2554 750 : if (res)
2555 : {
2556 486 : char **ptr = res;
2557 :
2558 1140 : while (*ptr && (lcur - lres) < MAX_NORM)
2559 : {
2560 654 : addNorm(&lres, &lcur, *ptr, 0, NVariant++);
2561 654 : ptr++;
2562 : }
2563 486 : pfree(res);
2564 : }
2565 :
2566 750 : if (Conf->usecompound)
2567 : {
2568 750 : int wordlen = strlen(word);
2569 : SplitVar *ptr,
2570 750 : *var = SplitToVariants(Conf, NULL, NULL, word, wordlen, 0, -1);
2571 : int i;
2572 :
2573 2070 : while (var)
2574 : {
2575 1320 : if (var->nstem > 1)
2576 : {
2577 570 : char **subres = NormalizeSubWord(Conf, var->stem[var->nstem - 1], FF_COMPOUNDLAST);
2578 :
2579 570 : if (subres)
2580 : {
2581 264 : char **subptr = subres;
2582 :
2583 528 : while (*subptr)
2584 : {
2585 660 : for (i = 0; i < var->nstem - 1; i++)
2586 : {
2587 396 : addNorm(&lres, &lcur, (subptr == subres) ? var->stem[i] : pstrdup(var->stem[i]), 0, NVariant);
2588 : }
2589 :
2590 264 : addNorm(&lres, &lcur, *subptr, 0, NVariant);
2591 264 : subptr++;
2592 264 : NVariant++;
2593 : }
2594 :
2595 264 : pfree(subres);
2596 264 : var->stem[0] = NULL;
2597 264 : pfree(var->stem[var->nstem - 1]);
2598 : }
2599 : }
2600 :
2601 2742 : for (i = 0; i < var->nstem && var->stem[i]; i++)
2602 1422 : pfree(var->stem[i]);
2603 1320 : ptr = var->next;
2604 1320 : pfree(var->stem);
2605 1320 : pfree(var);
2606 1320 : var = ptr;
2607 : }
2608 : }
2609 :
2610 750 : return lres;
2611 : }
|