Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * spell.c
4 : * Normalizing word with ISpell
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : *
8 : * Ispell dictionary
9 : * -----------------
10 : *
11 : * Rules of dictionaries are defined in two files with .affix and .dict
12 : * extensions. They are used by spell checker programs Ispell and Hunspell.
13 : *
14 : * An .affix file declares morphological rules to get a basic form of words.
15 : * The format of an .affix file has different structure for Ispell and Hunspell
16 : * dictionaries. The Hunspell format is more complicated. But when an .affix
17 : * file is imported and compiled, it is stored in the same structure AffixNode.
18 : *
19 : * A .dict file stores a list of basic forms of words with references to
20 : * affix rules. The format of a .dict file has the same structure for Ispell
21 : * and Hunspell dictionaries.
22 : *
23 : * Compilation of a dictionary
24 : * ---------------------------
25 : *
26 : * A compiled dictionary is stored in the IspellDict structure. Compilation of
27 : * a dictionary is divided into the several steps:
28 : * - NIImportDictionary() - stores each word of a .dict file in the
29 : * temporary Spell field.
30 : * - NIImportAffixes() - stores affix rules of an .affix file in the
31 : * Affix field (not temporary) if an .affix file has the Ispell format.
32 : * -> NIImportOOAffixes() - stores affix rules if an .affix file has the
33 : * Hunspell format. The AffixData field is initialized if AF parameter
34 : * is defined.
35 : * - NISortDictionary() - builds a prefix tree (Trie) from the words list
36 : * and stores it in the Dictionary field. The words list is got from the
37 : * Spell field. The AffixData field is initialized if AF parameter is not
38 : * defined.
39 : * - NISortAffixes():
40 : * - builds a list of compound affixes from the affix list and stores it
41 : * in the CompoundAffix.
42 : * - builds prefix trees (Trie) from the affix list for prefixes and suffixes
43 : * and stores them in Suffix and Prefix fields.
44 : * The affix list is got from the Affix field.
45 : *
46 : * Memory management
47 : * -----------------
48 : *
49 : * The IspellDict structure has the Spell field which is used only in compile
50 : * time. The Spell field stores a words list. It can take a lot of memory.
51 : * Therefore when a dictionary is compiled this field is cleared by
52 : * NIFinishBuild().
53 : *
54 : * All resources which should cleared by NIFinishBuild() is initialized using
55 : * tmpalloc() and tmpalloc0().
56 : *
57 : * IDENTIFICATION
58 : * src/backend/tsearch/spell.c
59 : *
60 : *-------------------------------------------------------------------------
61 : */
62 :
63 : #include "postgres.h"
64 :
65 : #include "catalog/pg_collation.h"
66 : #include "miscadmin.h"
67 : #include "tsearch/dicts/spell.h"
68 : #include "tsearch/ts_locale.h"
69 : #include "utils/formatting.h"
70 : #include "utils/memutils.h"
71 :
72 :
73 : /*
74 : * Initialization requires a lot of memory that's not needed
75 : * after the initialization is done. During initialization,
76 : * CurrentMemoryContext is the long-lived memory context associated
77 : * with the dictionary cache entry. We keep the short-lived stuff
78 : * in the Conf->buildCxt context.
79 : */
80 : #define tmpalloc(sz) MemoryContextAlloc(Conf->buildCxt, (sz))
81 : #define tmpalloc0(sz) MemoryContextAllocZero(Conf->buildCxt, (sz))
82 :
83 : /*
84 : * Prepare for constructing an ISpell dictionary.
85 : *
86 : * The IspellDict struct is assumed to be zeroed when allocated.
87 : */
88 : void
89 87 : NIStartBuild(IspellDict *Conf)
90 : {
91 : /*
92 : * The temp context is a child of CurTransactionContext, so that it will
93 : * go away automatically on error.
94 : */
95 87 : Conf->buildCxt = AllocSetContextCreate(CurTransactionContext,
96 : "Ispell dictionary init context",
97 : ALLOCSET_DEFAULT_SIZES);
98 87 : }
99 :
100 : /*
101 : * Clean up when dictionary construction is complete.
102 : */
103 : void
104 71 : NIFinishBuild(IspellDict *Conf)
105 : {
106 : /* Release no-longer-needed temp memory */
107 71 : MemoryContextDelete(Conf->buildCxt);
108 : /* Just for cleanliness, zero the now-dangling pointers */
109 71 : Conf->buildCxt = NULL;
110 71 : Conf->Spell = NULL;
111 71 : Conf->firstfree = NULL;
112 71 : Conf->CompoundAffixFlags = NULL;
113 71 : }
114 :
115 :
116 : /*
117 : * "Compact" palloc: allocate without extra palloc overhead.
118 : *
119 : * Since we have no need to free the ispell data items individually, there's
120 : * not much value in the per-chunk overhead normally consumed by palloc.
121 : * Getting rid of it is helpful since ispell can allocate a lot of small nodes.
122 : *
123 : * We currently pre-zero all data allocated this way, even though some of it
124 : * doesn't need that. The cpalloc and cpalloc0 macros are just documentation
125 : * to indicate which allocations actually require zeroing.
126 : */
127 : #define COMPACT_ALLOC_CHUNK 8192 /* amount to get from palloc at once */
128 : #define COMPACT_MAX_REQ 1024 /* must be < COMPACT_ALLOC_CHUNK */
129 :
130 : static void *
131 8013 : compact_palloc0(IspellDict *Conf, size_t size)
132 : {
133 : void *result;
134 :
135 : /* Should only be called during init */
136 : Assert(Conf->buildCxt != NULL);
137 :
138 : /* No point in this for large chunks */
139 8013 : if (size > COMPACT_MAX_REQ)
140 0 : return palloc0(size);
141 :
142 : /* Keep everything maxaligned */
143 8013 : size = MAXALIGN(size);
144 :
145 : /* Need more space? */
146 8013 : if (size > Conf->avail)
147 : {
148 83 : Conf->firstfree = palloc0(COMPACT_ALLOC_CHUNK);
149 83 : Conf->avail = COMPACT_ALLOC_CHUNK;
150 : }
151 :
152 8013 : result = Conf->firstfree;
153 8013 : Conf->firstfree += size;
154 8013 : Conf->avail -= size;
155 :
156 8013 : return result;
157 : }
158 :
159 : #define cpalloc(size) compact_palloc0(Conf, size)
160 : #define cpalloc0(size) compact_palloc0(Conf, size)
161 :
162 : static char *
163 4284 : cpstrdup(IspellDict *Conf, const char *str)
164 : {
165 4284 : char *res = cpalloc(strlen(str) + 1);
166 :
167 4284 : strcpy(res, str);
168 4284 : return res;
169 : }
170 :
171 :
172 : /*
173 : * Apply str_tolower(), producing a temporary result (in the buildCxt).
174 : */
175 : static char *
176 3741 : lowerstr_ctx(IspellDict *Conf, const char *src)
177 : {
178 : MemoryContext saveCtx;
179 : char *dst;
180 :
181 3741 : saveCtx = MemoryContextSwitchTo(Conf->buildCxt);
182 3741 : dst = str_tolower(src, strlen(src), DEFAULT_COLLATION_OID);
183 3741 : MemoryContextSwitchTo(saveCtx);
184 :
185 3741 : return dst;
186 : }
187 :
188 : #define MAX_NORM 1024
189 : #define MAXNORMLEN 256
190 :
191 : #define STRNCMP(s,p) strncmp( (s), (p), strlen(p) )
192 : #define GETWCHAR(W,L,N,T) ( ((const uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] )
193 : #define GETCHAR(A,N,T) GETWCHAR( (A)->repl, (A)->replen, N, T )
194 :
195 : static const char *VoidString = "";
196 :
197 : static int
198 1866 : cmpspell(const void *s1, const void *s2)
199 : {
200 1866 : return strcmp((*(SPELL *const *) s1)->word, (*(SPELL *const *) s2)->word);
201 : }
202 :
203 : static int
204 1456 : cmpspellaffix(const void *s1, const void *s2)
205 : {
206 2912 : return strcmp((*(SPELL *const *) s1)->p.flag,
207 1456 : (*(SPELL *const *) s2)->p.flag);
208 : }
209 :
210 : static int
211 2533 : cmpcmdflag(const void *f1, const void *f2)
212 : {
213 2533 : const CompoundAffixFlag *fv1 = f1;
214 2533 : const CompoundAffixFlag *fv2 = f2;
215 :
216 : Assert(fv1->flagMode == fv2->flagMode);
217 :
218 2533 : if (fv1->flagMode == FM_NUM)
219 : {
220 489 : if (fv1->flag.i == fv2->flag.i)
221 74 : return 0;
222 :
223 415 : return (fv1->flag.i > fv2->flag.i) ? 1 : -1;
224 : }
225 :
226 2044 : return strcmp(fv1->flag.s, fv2->flag.s);
227 : }
228 :
229 : static char *
230 755 : findchar(char *str, int c)
231 : {
232 5564 : while (*str)
233 : {
234 5481 : if (t_iseq(str, c))
235 672 : return str;
236 4809 : str += pg_mblen_cstr(str);
237 : }
238 :
239 83 : return NULL;
240 : }
241 :
242 : static char *
243 27 : findchar2(char *str, int c1, int c2)
244 : {
245 567 : while (*str)
246 : {
247 567 : if (t_iseq(str, c1) || t_iseq(str, c2))
248 27 : return str;
249 540 : str += pg_mblen_cstr(str);
250 : }
251 :
252 0 : return NULL;
253 : }
254 :
255 :
256 : /* backward string compare for suffix tree operations */
257 : static int
258 745 : strbcmp(const unsigned char *s1, const unsigned char *s2)
259 : {
260 745 : int l1 = strlen((const char *) s1) - 1,
261 745 : l2 = strlen((const char *) s2) - 1;
262 :
263 997 : while (l1 >= 0 && l2 >= 0)
264 : {
265 780 : if (s1[l1] < s2[l2])
266 169 : return -1;
267 611 : if (s1[l1] > s2[l2])
268 359 : return 1;
269 252 : l1--;
270 252 : l2--;
271 : }
272 217 : if (l1 < l2)
273 58 : return -1;
274 159 : if (l1 > l2)
275 133 : return 1;
276 :
277 26 : return 0;
278 : }
279 :
280 : static int
281 26 : strbncmp(const unsigned char *s1, const unsigned char *s2, size_t count)
282 : {
283 26 : int l1 = strlen((const char *) s1) - 1,
284 26 : l2 = strlen((const char *) s2) - 1,
285 26 : l = count;
286 :
287 39 : while (l1 >= 0 && l2 >= 0 && l > 0)
288 : {
289 26 : if (s1[l1] < s2[l2])
290 13 : return -1;
291 13 : if (s1[l1] > s2[l2])
292 0 : return 1;
293 13 : l1--;
294 13 : l2--;
295 13 : l--;
296 : }
297 13 : if (l == 0)
298 13 : return 0;
299 0 : if (l1 < l2)
300 0 : return -1;
301 0 : if (l1 > l2)
302 0 : return 1;
303 0 : return 0;
304 : }
305 :
306 : /*
307 : * Compares affixes.
308 : * First compares the type of an affix. Prefixes should go before affixes.
309 : * If types are equal then compares replaceable string.
310 : */
311 : static int
312 1260 : cmpaffix(const void *s1, const void *s2)
313 : {
314 1260 : const AFFIX *a1 = (const AFFIX *) s1;
315 1260 : const AFFIX *a2 = (const AFFIX *) s2;
316 :
317 1260 : if (a1->type < a2->type)
318 288 : return -1;
319 972 : if (a1->type > a2->type)
320 85 : return 1;
321 887 : if (a1->type == FF_PREFIX)
322 142 : return strcmp(a1->repl, a2->repl);
323 : else
324 745 : return strbcmp((const unsigned char *) a1->repl,
325 745 : (const unsigned char *) a2->repl);
326 : }
327 :
328 : /*
329 : * Gets an affix flag from the set of affix flags (sflagset).
330 : *
331 : * Several flags can be stored in a single string. Flags can be represented by:
332 : * - 1 character (FM_CHAR). A character may be Unicode.
333 : * - 2 characters (FM_LONG). A character may be Unicode.
334 : * - numbers from 1 to 65000 (FM_NUM).
335 : *
336 : * Depending on the flagMode an affix string can have the following format:
337 : * - FM_CHAR: ABCD
338 : * Here we have 4 flags: A, B, C and D
339 : * - FM_LONG: ABCDE*
340 : * Here we have 3 flags: AB, CD and E*
341 : * - FM_NUM: 200,205,50
342 : * Here we have 3 flags: 200, 205 and 50
343 : *
344 : * Conf: current dictionary.
345 : * sflagset: the set of affix flags. Returns a reference to the start of a next
346 : * affix flag.
347 : * sflag: returns an affix flag from sflagset.
348 : */
349 : static void
350 4086 : getNextFlagFromString(IspellDict *Conf, const char **sflagset, char *sflag)
351 : {
352 : int32 s;
353 : char *next;
354 4086 : const char *sbuf = *sflagset;
355 : int maxstep;
356 : int clen;
357 4086 : bool stop = false;
358 4086 : bool met_comma = false;
359 :
360 4086 : maxstep = (Conf->flagMode == FM_LONG) ? 2 : 1;
361 :
362 5360 : while (**sflagset)
363 : {
364 5360 : switch (Conf->flagMode)
365 : {
366 4571 : case FM_LONG:
367 : case FM_CHAR:
368 4571 : clen = ts_copychar_cstr(sflag, *sflagset);
369 4571 : sflag += clen;
370 :
371 : /* Go to start of the next flag */
372 4571 : *sflagset += clen;
373 :
374 : /* Check if we get all characters of flag */
375 4571 : maxstep--;
376 4571 : stop = (maxstep == 0);
377 4571 : break;
378 789 : case FM_NUM:
379 789 : errno = 0;
380 789 : s = strtol(*sflagset, &next, 10);
381 789 : if (*sflagset == next || errno == ERANGE)
382 4 : ereport(ERROR,
383 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
384 : errmsg("invalid affix flag \"%s\"", *sflagset)));
385 785 : if (s < 0 || s > FLAGNUM_MAXSIZE)
386 0 : ereport(ERROR,
387 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
388 : errmsg("affix flag \"%s\" is out of range",
389 : *sflagset)));
390 785 : sflag += sprintf(sflag, "%0d", s);
391 :
392 : /* Go to start of the next flag */
393 785 : *sflagset = next;
394 1219 : while (**sflagset)
395 : {
396 868 : if (isdigit((unsigned char) **sflagset))
397 : {
398 434 : if (!met_comma)
399 0 : ereport(ERROR,
400 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
401 : errmsg("invalid affix flag \"%s\"",
402 : *sflagset)));
403 434 : break;
404 : }
405 434 : else if (t_iseq(*sflagset, ','))
406 : {
407 434 : if (met_comma)
408 0 : ereport(ERROR,
409 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
410 : errmsg("invalid affix flag \"%s\"",
411 : *sflagset)));
412 434 : met_comma = true;
413 : }
414 0 : else if (!isspace((unsigned char) **sflagset))
415 : {
416 0 : ereport(ERROR,
417 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
418 : errmsg("invalid character in affix flag \"%s\"",
419 : *sflagset)));
420 : }
421 :
422 434 : *sflagset += pg_mblen_cstr(*sflagset);
423 : }
424 785 : stop = true;
425 785 : break;
426 0 : default:
427 0 : elog(ERROR, "unrecognized type of Conf->flagMode: %d",
428 : Conf->flagMode);
429 : }
430 :
431 5356 : if (stop)
432 4082 : break;
433 : }
434 :
435 4082 : if (Conf->flagMode == FM_LONG && maxstep > 0)
436 0 : ereport(ERROR,
437 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
438 : errmsg("invalid affix flag \"%s\" with \"long\" flag value",
439 : sbuf)));
440 :
441 4082 : *sflag = '\0';
442 4082 : }
443 :
444 : /*
445 : * Checks if the affix set Conf->AffixData[affix] contains affixflag.
446 : * Conf->AffixData[affix] does not contain affixflag if this flag is not used
447 : * actually by the .dict file.
448 : *
449 : * Conf: current dictionary.
450 : * affix: index of the Conf->AffixData array.
451 : * affixflag: the affix flag.
452 : *
453 : * Returns true if the string Conf->AffixData[affix] contains affixflag,
454 : * otherwise returns false.
455 : */
456 : static bool
457 1650 : IsAffixFlagInUse(IspellDict *Conf, int affix, const char *affixflag)
458 : {
459 : const char *flagcur;
460 : char flag[BUFSIZ];
461 :
462 1650 : if (*affixflag == 0)
463 530 : return true;
464 :
465 : Assert(affix < Conf->nAffixData);
466 :
467 1120 : flagcur = Conf->AffixData[affix];
468 :
469 3194 : while (*flagcur)
470 : {
471 2466 : getNextFlagFromString(Conf, &flagcur, flag);
472 : /* Compare first affix flag in flagcur with affixflag */
473 2466 : if (strcmp(flag, affixflag) == 0)
474 392 : return true;
475 : }
476 :
477 : /* Could not find affixflag */
478 728 : return false;
479 : }
480 :
481 : /*
482 : * Adds the new word into the temporary array Spell.
483 : *
484 : * Conf: current dictionary.
485 : * word: new word.
486 : * flag: set of affix flags. Single flag can be get by getNextFlagFromString().
487 : */
488 : static void
489 755 : NIAddSpell(IspellDict *Conf, const char *word, const char *flag)
490 : {
491 755 : if (Conf->nspell >= Conf->mspell)
492 : {
493 83 : if (Conf->mspell)
494 : {
495 0 : Conf->mspell *= 2;
496 0 : Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *));
497 : }
498 : else
499 : {
500 83 : Conf->mspell = 1024 * 20;
501 83 : Conf->Spell = (SPELL **) tmpalloc(Conf->mspell * sizeof(SPELL *));
502 : }
503 : }
504 755 : Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1);
505 755 : strcpy(Conf->Spell[Conf->nspell]->word, word);
506 1510 : Conf->Spell[Conf->nspell]->p.flag = (*flag != '\0')
507 755 : ? cpstrdup(Conf, flag) : VoidString;
508 755 : Conf->nspell++;
509 755 : }
510 :
511 : /*
512 : * Imports dictionary into the temporary array Spell.
513 : *
514 : * Note caller must already have applied get_tsearch_config_filename.
515 : *
516 : * Conf: current dictionary.
517 : * filename: path to the .dict file.
518 : */
519 : void
520 83 : NIImportDictionary(IspellDict *Conf, const char *filename)
521 : {
522 : tsearch_readline_state trst;
523 : char *line;
524 :
525 83 : if (!tsearch_readline_begin(&trst, filename))
526 0 : ereport(ERROR,
527 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
528 : errmsg("could not open dictionary file \"%s\": %m",
529 : filename)));
530 :
531 838 : while ((line = tsearch_readline(&trst)) != NULL)
532 : {
533 : char *s,
534 : *pstr;
535 :
536 : /* Set of affix flags */
537 : const char *flag;
538 :
539 : /* Extract flag from the line */
540 755 : flag = NULL;
541 755 : if ((s = findchar(line, '/')))
542 : {
543 672 : *s++ = '\0';
544 672 : flag = s;
545 2689 : while (*s)
546 : {
547 : /* we allow only single encoded flags for faster works */
548 2689 : if (pg_mblen_cstr(s) == 1 && isprint((unsigned char) *s) && !isspace((unsigned char) *s))
549 2017 : s++;
550 : else
551 : {
552 672 : *s = '\0';
553 672 : break;
554 : }
555 : }
556 : }
557 : else
558 83 : flag = "";
559 :
560 : /* Remove trailing spaces */
561 755 : s = line;
562 5481 : while (*s)
563 : {
564 4809 : if (isspace((unsigned char) *s))
565 : {
566 83 : *s = '\0';
567 83 : break;
568 : }
569 4726 : s += pg_mblen_cstr(s);
570 : }
571 755 : pstr = lowerstr_ctx(Conf, line);
572 :
573 755 : NIAddSpell(Conf, pstr, flag);
574 755 : pfree(pstr);
575 :
576 755 : pfree(line);
577 : }
578 83 : tsearch_readline_end(&trst);
579 83 : }
580 :
581 : /*
582 : * Searches a basic form of word in the prefix tree. This word was generated
583 : * using an affix rule. This rule may not be presented in an affix set of
584 : * a basic form of word.
585 : *
586 : * For example, we have the entry in the .dict file:
587 : * meter/GMD
588 : *
589 : * The affix rule with the flag S:
590 : * SFX S y ies [^aeiou]y
591 : * is not presented here.
592 : *
593 : * The affix rule with the flag M:
594 : * SFX M 0 's .
595 : * is presented here.
596 : *
597 : * Conf: current dictionary.
598 : * word: basic form of word.
599 : * affixflag: affix flag, by which a basic form of word was generated.
600 : * flag: compound flag used to compare with StopMiddle->compoundflag.
601 : *
602 : * Returns 1 if the word was found in the prefix tree, else returns 0.
603 : */
604 : static int
605 2495 : FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag)
606 : {
607 2495 : SPNode *node = Conf->Dictionary;
608 : SPNodeData *StopLow,
609 : *StopHigh,
610 : *StopMiddle;
611 2495 : const uint8 *ptr = (const uint8 *) word;
612 :
613 2495 : flag &= FF_COMPOUNDFLAGMASK;
614 :
615 11620 : while (node && *ptr)
616 : {
617 11020 : StopLow = node->data;
618 11020 : StopHigh = node->data + node->length;
619 15765 : while (StopLow < StopHigh)
620 : {
621 14710 : StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
622 14710 : if (StopMiddle->val == *ptr)
623 : {
624 9965 : if (*(ptr + 1) == '\0' && StopMiddle->isword)
625 : {
626 955 : if (flag == 0)
627 : {
628 : /*
629 : * The word can be formed only with another word. And
630 : * in the flag parameter there is not a sign that we
631 : * search compound words.
632 : */
633 605 : if (StopMiddle->compoundflag & FF_COMPOUNDONLY)
634 0 : return 0;
635 : }
636 350 : else if ((flag & StopMiddle->compoundflag) == 0)
637 0 : return 0;
638 :
639 : /*
640 : * Check if this affix rule is presented in the affix set
641 : * with index StopMiddle->affix.
642 : */
643 955 : if (IsAffixFlagInUse(Conf, StopMiddle->affix, affixflag))
644 840 : return 1;
645 : }
646 9125 : node = StopMiddle->node;
647 9125 : ptr++;
648 9125 : break;
649 : }
650 4745 : else if (StopMiddle->val < *ptr)
651 1610 : StopLow = StopMiddle + 1;
652 : else
653 3135 : StopHigh = StopMiddle;
654 : }
655 10180 : if (StopLow >= StopHigh)
656 1055 : break;
657 : }
658 1655 : return 0;
659 : }
660 :
661 : /*
662 : * Adds a new affix rule to the Affix field.
663 : *
664 : * Conf: current dictionary.
665 : * flag: affix flag ('\' in the below example).
666 : * flagflags: set of flags from the flagval field for this affix rule. This set
667 : * is listed after '/' character in the added string (repl).
668 : *
669 : * For example L flag in the hunspell_sample.affix:
670 : * SFX \ 0 Y/L [^Y]
671 : *
672 : * mask: condition for search ('[^Y]' in the above example).
673 : * find: stripping characters from beginning (at prefix) or end (at suffix)
674 : * of the word ('0' in the above example, 0 means that there is not
675 : * stripping character).
676 : * repl: adding string after stripping ('Y' in the above example).
677 : * type: FF_SUFFIX or FF_PREFIX.
678 : */
679 : static void
680 688 : NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask,
681 : const char *find, const char *repl, int type)
682 : {
683 : AFFIX *Affix;
684 :
685 688 : if (Conf->naffixes >= Conf->maffixes)
686 : {
687 83 : if (Conf->maffixes)
688 : {
689 0 : Conf->maffixes *= 2;
690 0 : Conf->Affix = (AFFIX *) repalloc(Conf->Affix, Conf->maffixes * sizeof(AFFIX));
691 : }
692 : else
693 : {
694 83 : Conf->maffixes = 16;
695 83 : Conf->Affix = palloc_array(AFFIX, Conf->maffixes);
696 : }
697 : }
698 :
699 688 : Affix = Conf->Affix + Conf->naffixes;
700 :
701 : /* This affix rule can be applied for words with any ending */
702 688 : if (strcmp(mask, ".") == 0 || *mask == '\0')
703 : {
704 166 : Affix->issimple = 1;
705 166 : Affix->isregis = 0;
706 : }
707 : /* This affix rule will use regis to search word ending */
708 522 : else if (RS_isRegis(mask))
709 : {
710 436 : Affix->issimple = 0;
711 436 : Affix->isregis = 1;
712 436 : RS_compile(&(Affix->reg.regis), (type == FF_SUFFIX),
713 436 : *mask ? mask : VoidString);
714 : }
715 : /* This affix rule will use regex_t to search word ending */
716 : else
717 : {
718 : int masklen;
719 : int wmasklen;
720 : int err;
721 : pg_wchar *wmask;
722 : char *tmask;
723 :
724 86 : Affix->issimple = 0;
725 86 : Affix->isregis = 0;
726 86 : tmask = (char *) tmpalloc(strlen(mask) + 3);
727 86 : if (type == FF_SUFFIX)
728 86 : sprintf(tmask, "%s$", mask);
729 : else
730 0 : sprintf(tmask, "^%s", mask);
731 :
732 86 : masklen = strlen(tmask);
733 86 : wmask = (pg_wchar *) tmpalloc((masklen + 1) * sizeof(pg_wchar));
734 86 : wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen);
735 :
736 : /*
737 : * The regex and all internal state created by pg_regcomp are
738 : * allocated in the dictionary's memory context, and will be freed
739 : * automatically when it is destroyed.
740 : */
741 86 : Affix->reg.pregex = palloc_object(regex_t);
742 86 : err = pg_regcomp(Affix->reg.pregex, wmask, wmasklen,
743 : REG_ADVANCED | REG_NOSUB,
744 : DEFAULT_COLLATION_OID);
745 86 : if (err)
746 : {
747 : char errstr[100];
748 :
749 0 : pg_regerror(err, Affix->reg.pregex, errstr, sizeof(errstr));
750 0 : ereport(ERROR,
751 : (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
752 : errmsg("invalid regular expression: %s", errstr)));
753 : }
754 : }
755 :
756 688 : Affix->flagflags = flagflags;
757 688 : if ((Affix->flagflags & FF_COMPOUNDONLY) || (Affix->flagflags & FF_COMPOUNDPERMITFLAG))
758 : {
759 125 : if ((Affix->flagflags & FF_COMPOUNDFLAG) == 0)
760 125 : Affix->flagflags |= FF_COMPOUNDFLAG;
761 : }
762 688 : Affix->flag = cpstrdup(Conf, flag);
763 688 : Affix->type = type;
764 :
765 688 : Affix->find = (find && *find) ? cpstrdup(Conf, find) : VoidString;
766 688 : if ((Affix->replen = strlen(repl)) > 0)
767 666 : Affix->repl = cpstrdup(Conf, repl);
768 : else
769 22 : Affix->repl = VoidString;
770 688 : Conf->naffixes++;
771 688 : }
772 :
773 : /* Parsing states for parse_affentry() and friends */
774 : #define PAE_WAIT_MASK 0
775 : #define PAE_INMASK 1
776 : #define PAE_WAIT_FIND 2
777 : #define PAE_INFIND 3
778 : #define PAE_WAIT_REPL 4
779 : #define PAE_INREPL 5
780 : #define PAE_WAIT_TYPE 6
781 : #define PAE_WAIT_FLAG 7
782 :
783 : /*
784 : * Parse next space-separated field of an .affix file line.
785 : *
786 : * *str is the input pointer (will be advanced past field)
787 : * next is where to copy the field value to, with null termination
788 : *
789 : * The buffer at "next" must be of size BUFSIZ; we truncate the input to fit.
790 : *
791 : * Returns true if we found a field, false if not.
792 : */
793 : static bool
794 6463 : get_nextfield(char **str, char *next)
795 : {
796 6463 : int state = PAE_WAIT_MASK;
797 6463 : int avail = BUFSIZ;
798 :
799 27642 : while (**str)
800 : {
801 26883 : int clen = pg_mblen_cstr(*str);
802 :
803 26883 : if (state == PAE_WAIT_MASK)
804 : {
805 11921 : if (t_iseq(*str, '#'))
806 231 : return false;
807 11690 : else if (!isspace((unsigned char) **str))
808 : {
809 5473 : if (clen < avail)
810 : {
811 5473 : ts_copychar_with_len(next, *str, clen);
812 5473 : next += clen;
813 5473 : avail -= clen;
814 : }
815 5473 : state = PAE_INMASK;
816 : }
817 : }
818 : else /* state == PAE_INMASK */
819 : {
820 14962 : if (isspace((unsigned char) **str))
821 : {
822 5473 : *next = '\0';
823 5473 : return true;
824 : }
825 : else
826 : {
827 9489 : if (clen < avail)
828 : {
829 9489 : ts_copychar_with_len(next, *str, clen);
830 9489 : next += clen;
831 9489 : avail -= clen;
832 : }
833 : }
834 : }
835 21179 : *str += clen;
836 : }
837 :
838 759 : *next = '\0';
839 :
840 759 : return (state == PAE_INMASK); /* OK if we got a nonempty field */
841 : }
842 :
843 : /*
844 : * Parses entry of an .affix file of MySpell or Hunspell format.
845 : *
846 : * An .affix file entry has the following format:
847 : * - header
848 : * <type> <flag> <cross_flag> <flag_count>
849 : * - fields after header:
850 : * <type> <flag> <find> <replace> <mask>
851 : *
852 : * str is the input line
853 : * field values are returned to type etc, which must be buffers of size BUFSIZ.
854 : *
855 : * Returns number of fields found; any omitted fields are set to empty strings.
856 : */
857 : static int
858 1489 : parse_ooaffentry(char *str, char *type, char *flag, char *find,
859 : char *repl, char *mask)
860 : {
861 1489 : int state = PAE_WAIT_TYPE;
862 1489 : int fields_read = 0;
863 1489 : bool valid = false;
864 :
865 1489 : *type = *flag = *find = *repl = *mask = '\0';
866 :
867 6463 : while (*str)
868 : {
869 6463 : switch (state)
870 : {
871 1489 : case PAE_WAIT_TYPE:
872 1489 : valid = get_nextfield(&str, type);
873 1489 : state = PAE_WAIT_FLAG;
874 1489 : break;
875 1489 : case PAE_WAIT_FLAG:
876 1489 : valid = get_nextfield(&str, flag);
877 1489 : state = PAE_WAIT_FIND;
878 1489 : break;
879 1489 : case PAE_WAIT_FIND:
880 1489 : valid = get_nextfield(&str, find);
881 1489 : state = PAE_WAIT_REPL;
882 1489 : break;
883 998 : case PAE_WAIT_REPL:
884 998 : valid = get_nextfield(&str, repl);
885 998 : state = PAE_WAIT_MASK;
886 998 : break;
887 998 : case PAE_WAIT_MASK:
888 998 : valid = get_nextfield(&str, mask);
889 998 : state = -1; /* force loop exit */
890 998 : break;
891 0 : default:
892 0 : elog(ERROR, "unrecognized state in parse_ooaffentry: %d",
893 : state);
894 : break;
895 : }
896 6463 : if (valid)
897 5473 : fields_read++;
898 : else
899 990 : break; /* early EOL */
900 5473 : if (state < 0)
901 499 : break; /* got all fields */
902 : }
903 :
904 1489 : return fields_read;
905 : }
906 :
907 : /*
908 : * Parses entry of an .affix file of Ispell format
909 : *
910 : * An .affix file entry has the following format:
911 : * <mask> > [-<find>,]<replace>
912 : */
913 : static bool
914 189 : parse_affentry(char *str, char *mask, char *find, char *repl)
915 : {
916 189 : int state = PAE_WAIT_MASK;
917 189 : char *pmask = mask,
918 189 : *pfind = find,
919 189 : *prepl = repl;
920 :
921 189 : *mask = *find = *repl = '\0';
922 :
923 4968 : while (*str)
924 : {
925 4968 : int clen = pg_mblen_cstr(str);
926 :
927 4968 : if (state == PAE_WAIT_MASK)
928 : {
929 459 : if (t_iseq(str, '#'))
930 0 : return false;
931 459 : else if (!isspace((unsigned char) *str))
932 : {
933 189 : pmask += ts_copychar_with_len(pmask, str, clen);
934 189 : state = PAE_INMASK;
935 : }
936 : }
937 4509 : else if (state == PAE_INMASK)
938 : {
939 1836 : if (t_iseq(str, '>'))
940 : {
941 189 : *pmask = '\0';
942 189 : state = PAE_WAIT_FIND;
943 : }
944 1647 : else if (!isspace((unsigned char) *str))
945 : {
946 648 : pmask += ts_copychar_with_len(pmask, str, clen);
947 : }
948 : }
949 2673 : else if (state == PAE_WAIT_FIND)
950 : {
951 756 : if (t_iseq(str, '-'))
952 : {
953 27 : state = PAE_INFIND;
954 : }
955 729 : else if (t_isalpha_cstr(str) || t_iseq(str, '\'') /* english 's */ )
956 : {
957 162 : prepl += ts_copychar_with_len(prepl, str, clen);
958 162 : state = PAE_INREPL;
959 : }
960 567 : else if (!isspace((unsigned char) *str))
961 0 : ereport(ERROR,
962 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
963 : errmsg("syntax error")));
964 : }
965 1917 : else if (state == PAE_INFIND)
966 : {
967 54 : if (t_iseq(str, ','))
968 : {
969 27 : *pfind = '\0';
970 27 : state = PAE_WAIT_REPL;
971 : }
972 27 : else if (t_isalpha_cstr(str))
973 : {
974 27 : pfind += ts_copychar_with_len(pfind, str, clen);
975 : }
976 0 : else if (!isspace((unsigned char) *str))
977 0 : ereport(ERROR,
978 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
979 : errmsg("syntax error")));
980 : }
981 1863 : else if (state == PAE_WAIT_REPL)
982 : {
983 27 : if (t_iseq(str, '-'))
984 : {
985 0 : break; /* void repl */
986 : }
987 27 : else if (t_isalpha_cstr(str))
988 : {
989 27 : prepl += ts_copychar_with_len(prepl, str, clen);
990 27 : state = PAE_INREPL;
991 : }
992 0 : else if (!isspace((unsigned char) *str))
993 0 : ereport(ERROR,
994 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
995 : errmsg("syntax error")));
996 : }
997 1836 : else if (state == PAE_INREPL)
998 : {
999 1836 : if (t_iseq(str, '#'))
1000 : {
1001 189 : *prepl = '\0';
1002 189 : break;
1003 : }
1004 1647 : else if (t_isalpha_cstr(str))
1005 : {
1006 243 : prepl += ts_copychar_with_len(prepl, str, clen);
1007 : }
1008 1404 : else if (!isspace((unsigned char) *str))
1009 0 : ereport(ERROR,
1010 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1011 : errmsg("syntax error")));
1012 : }
1013 : else
1014 0 : elog(ERROR, "unrecognized state in parse_affentry: %d", state);
1015 :
1016 4779 : str += clen;
1017 : }
1018 :
1019 189 : *pmask = *pfind = *prepl = '\0';
1020 :
1021 189 : return (*mask && (*find || *repl));
1022 : }
1023 :
1024 : /*
1025 : * Sets a Hunspell options depending on flag type.
1026 : */
1027 : static void
1028 1839 : setCompoundAffixFlagValue(IspellDict *Conf, CompoundAffixFlag *entry,
1029 : char *s, uint32 val)
1030 : {
1031 1839 : if (Conf->flagMode == FM_NUM)
1032 : {
1033 : char *next;
1034 : int i;
1035 :
1036 399 : errno = 0;
1037 399 : i = strtol(s, &next, 10);
1038 399 : if (s == next || errno == ERANGE)
1039 0 : ereport(ERROR,
1040 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1041 : errmsg("invalid affix flag \"%s\"", s)));
1042 399 : if (i < 0 || i > FLAGNUM_MAXSIZE)
1043 0 : ereport(ERROR,
1044 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1045 : errmsg("affix flag \"%s\" is out of range", s)));
1046 :
1047 399 : entry->flag.i = i;
1048 : }
1049 : else
1050 1440 : entry->flag.s = cpstrdup(Conf, s);
1051 :
1052 1839 : entry->flagMode = Conf->flagMode;
1053 1839 : entry->value = val;
1054 1839 : }
1055 :
1056 : /*
1057 : * Sets up a correspondence for the affix parameter with the affix flag.
1058 : *
1059 : * Conf: current dictionary.
1060 : * s: affix flag in string.
1061 : * val: affix parameter.
1062 : */
1063 : static void
1064 223 : addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val)
1065 : {
1066 : CompoundAffixFlag *newValue;
1067 : char sbuf[BUFSIZ];
1068 : char *sflag;
1069 :
1070 419 : while (*s && isspace((unsigned char) *s))
1071 196 : s += pg_mblen_cstr(s);
1072 :
1073 223 : if (!*s)
1074 0 : ereport(ERROR,
1075 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1076 : errmsg("syntax error")));
1077 :
1078 : /* Get flag without \n */
1079 223 : sflag = sbuf;
1080 660 : while (*s && !isspace((unsigned char) *s) && *s != '\n')
1081 : {
1082 437 : int clen = ts_copychar_cstr(sflag, s);
1083 :
1084 437 : sflag += clen;
1085 437 : s += clen;
1086 : }
1087 223 : *sflag = '\0';
1088 :
1089 : /* Resize array or allocate memory for array CompoundAffixFlag */
1090 223 : if (Conf->nCompoundAffixFlag >= Conf->mCompoundAffixFlag)
1091 : {
1092 83 : if (Conf->mCompoundAffixFlag)
1093 : {
1094 0 : Conf->mCompoundAffixFlag *= 2;
1095 0 : Conf->CompoundAffixFlags = (CompoundAffixFlag *)
1096 0 : repalloc(Conf->CompoundAffixFlags,
1097 0 : Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag));
1098 : }
1099 : else
1100 : {
1101 83 : Conf->mCompoundAffixFlag = 10;
1102 83 : Conf->CompoundAffixFlags = (CompoundAffixFlag *)
1103 83 : tmpalloc(Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag));
1104 : }
1105 : }
1106 :
1107 223 : newValue = Conf->CompoundAffixFlags + Conf->nCompoundAffixFlag;
1108 :
1109 223 : setCompoundAffixFlagValue(Conf, newValue, sbuf, val);
1110 :
1111 223 : Conf->usecompound = true;
1112 223 : Conf->nCompoundAffixFlag++;
1113 223 : }
1114 :
1115 : /*
1116 : * Returns a set of affix parameters which correspondence to the set of affix
1117 : * flags s.
1118 : */
1119 : static int
1120 798 : getCompoundAffixFlagValue(IspellDict *Conf, const char *s)
1121 : {
1122 798 : uint32 flag = 0;
1123 : CompoundAffixFlag *found,
1124 : key;
1125 : char sflag[BUFSIZ];
1126 : const char *flagcur;
1127 :
1128 798 : if (Conf->nCompoundAffixFlag == 0)
1129 0 : return 0;
1130 :
1131 798 : flagcur = s;
1132 2414 : while (*flagcur)
1133 : {
1134 1620 : getNextFlagFromString(Conf, &flagcur, sflag);
1135 1616 : setCompoundAffixFlagValue(Conf, &key, sflag, 0);
1136 :
1137 : found = (CompoundAffixFlag *)
1138 1616 : bsearch(&key, Conf->CompoundAffixFlags,
1139 1616 : Conf->nCompoundAffixFlag, sizeof(CompoundAffixFlag),
1140 : cmpcmdflag);
1141 1616 : if (found != NULL)
1142 374 : flag |= found->value;
1143 : }
1144 :
1145 794 : return flag;
1146 : }
1147 :
1148 : /*
1149 : * Returns a flag set using the s parameter.
1150 : *
1151 : * If Conf->useFlagAliases is true then the s parameter is index of the
1152 : * Conf->AffixData array and function returns its entry.
1153 : * Else function returns the s parameter.
1154 : */
1155 : static const char *
1156 98 : getAffixFlagSet(IspellDict *Conf, char *s)
1157 : {
1158 98 : if (Conf->useFlagAliases && *s != '\0')
1159 : {
1160 : int curaffix;
1161 : char *end;
1162 :
1163 63 : errno = 0;
1164 63 : curaffix = strtol(s, &end, 10);
1165 63 : if (s == end || errno == ERANGE)
1166 0 : ereport(ERROR,
1167 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1168 : errmsg("invalid affix alias \"%s\"", s)));
1169 :
1170 63 : if (curaffix > 0 && curaffix < Conf->nAffixData)
1171 :
1172 : /*
1173 : * Do not subtract 1 from curaffix because empty string was added
1174 : * in NIImportOOAffixes
1175 : */
1176 63 : return Conf->AffixData[curaffix];
1177 0 : else if (curaffix > Conf->nAffixData)
1178 0 : ereport(ERROR,
1179 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1180 : errmsg("invalid affix alias \"%s\"", s)));
1181 0 : return VoidString;
1182 : }
1183 : else
1184 35 : return s;
1185 : }
1186 :
1187 : /*
1188 : * Import an affix file that follows MySpell or Hunspell format.
1189 : *
1190 : * Conf: current dictionary.
1191 : * filename: path to the .affix file.
1192 : */
1193 : static void
1194 56 : NIImportOOAffixes(IspellDict *Conf, const char *filename)
1195 : {
1196 : char type[BUFSIZ],
1197 56 : *ptype = NULL;
1198 : char sflag[BUFSIZ];
1199 : char mask[BUFSIZ],
1200 : *pmask;
1201 : char find[BUFSIZ],
1202 : *pfind;
1203 : char repl[BUFSIZ],
1204 : *prepl;
1205 56 : bool isSuffix = false;
1206 56 : int naffix = 0,
1207 56 : curaffix = 0;
1208 56 : int sflaglen = 0;
1209 56 : char flagflags = 0;
1210 : tsearch_readline_state trst;
1211 : char *recoded;
1212 :
1213 : /* read file to find any flag */
1214 56 : Conf->usecompound = false;
1215 56 : Conf->useFlagAliases = false;
1216 56 : Conf->flagMode = FM_CHAR;
1217 :
1218 56 : if (!tsearch_readline_begin(&trst, filename))
1219 0 : ereport(ERROR,
1220 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1221 : errmsg("could not open affix file \"%s\": %m",
1222 : filename)));
1223 :
1224 2194 : while ((recoded = tsearch_readline(&trst)) != NULL)
1225 : {
1226 2138 : if (*recoded == '\0' || isspace((unsigned char) *recoded) || t_iseq(recoded, '#'))
1227 : {
1228 649 : pfree(recoded);
1229 649 : continue;
1230 : }
1231 :
1232 1489 : if (STRNCMP(recoded, "COMPOUNDFLAG") == 0)
1233 56 : addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDFLAG"),
1234 : FF_COMPOUNDFLAG);
1235 1433 : else if (STRNCMP(recoded, "COMPOUNDBEGIN") == 0)
1236 21 : addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDBEGIN"),
1237 : FF_COMPOUNDBEGIN);
1238 1412 : else if (STRNCMP(recoded, "COMPOUNDLAST") == 0)
1239 0 : addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDLAST"),
1240 : FF_COMPOUNDLAST);
1241 : /* COMPOUNDLAST and COMPOUNDEND are synonyms */
1242 1412 : else if (STRNCMP(recoded, "COMPOUNDEND") == 0)
1243 21 : addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDEND"),
1244 : FF_COMPOUNDLAST);
1245 1391 : else if (STRNCMP(recoded, "COMPOUNDMIDDLE") == 0)
1246 21 : addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDMIDDLE"),
1247 : FF_COMPOUNDMIDDLE);
1248 1370 : else if (STRNCMP(recoded, "ONLYINCOMPOUND") == 0)
1249 56 : addCompoundAffixFlagValue(Conf, recoded + strlen("ONLYINCOMPOUND"),
1250 : FF_COMPOUNDONLY);
1251 1314 : else if (STRNCMP(recoded, "COMPOUNDPERMITFLAG") == 0)
1252 21 : addCompoundAffixFlagValue(Conf,
1253 : recoded + strlen("COMPOUNDPERMITFLAG"),
1254 : FF_COMPOUNDPERMITFLAG);
1255 1293 : else if (STRNCMP(recoded, "COMPOUNDFORBIDFLAG") == 0)
1256 0 : addCompoundAffixFlagValue(Conf,
1257 : recoded + strlen("COMPOUNDFORBIDFLAG"),
1258 : FF_COMPOUNDFORBIDFLAG);
1259 1293 : else if (STRNCMP(recoded, "FLAG") == 0)
1260 : {
1261 43 : char *s = recoded + strlen("FLAG");
1262 :
1263 86 : while (*s && isspace((unsigned char) *s))
1264 43 : s += pg_mblen_cstr(s);
1265 :
1266 43 : if (*s)
1267 : {
1268 43 : if (STRNCMP(s, "long") == 0)
1269 21 : Conf->flagMode = FM_LONG;
1270 22 : else if (STRNCMP(s, "num") == 0)
1271 22 : Conf->flagMode = FM_NUM;
1272 0 : else if (STRNCMP(s, "default") != 0)
1273 0 : ereport(ERROR,
1274 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1275 : errmsg("Ispell dictionary supports only "
1276 : "\"default\", \"long\", "
1277 : "and \"num\" flag values")));
1278 : }
1279 : }
1280 :
1281 1489 : pfree(recoded);
1282 : }
1283 56 : tsearch_readline_end(&trst);
1284 :
1285 56 : if (Conf->nCompoundAffixFlag > 1)
1286 56 : qsort(Conf->CompoundAffixFlags, Conf->nCompoundAffixFlag,
1287 : sizeof(CompoundAffixFlag), cmpcmdflag);
1288 :
1289 56 : if (!tsearch_readline_begin(&trst, filename))
1290 0 : ereport(ERROR,
1291 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1292 : errmsg("could not open affix file \"%s\": %m",
1293 : filename)));
1294 :
1295 2194 : while ((recoded = tsearch_readline(&trst)) != NULL)
1296 : {
1297 : int fields_read;
1298 :
1299 2138 : if (*recoded == '\0' || isspace((unsigned char) *recoded) || t_iseq(recoded, '#'))
1300 649 : goto nextline;
1301 :
1302 1489 : fields_read = parse_ooaffentry(recoded, type, sflag, find, repl, mask);
1303 :
1304 1489 : if (ptype)
1305 1433 : pfree(ptype);
1306 1489 : ptype = lowerstr_ctx(Conf, type);
1307 :
1308 : /* First try to parse AF parameter (alias compression) */
1309 1489 : if (STRNCMP(ptype, "af") == 0)
1310 : {
1311 : /* First line is the number of aliases */
1312 252 : if (!Conf->useFlagAliases)
1313 : {
1314 21 : Conf->useFlagAliases = true;
1315 21 : naffix = atoi(sflag);
1316 21 : if (naffix <= 0)
1317 0 : ereport(ERROR,
1318 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1319 : errmsg("invalid number of flag vector aliases")));
1320 :
1321 : /* Also reserve place for empty flag set */
1322 21 : naffix++;
1323 :
1324 21 : Conf->AffixData = palloc0_array(const char *, naffix);
1325 21 : Conf->lenAffixData = Conf->nAffixData = naffix;
1326 :
1327 : /* Add empty flag set into AffixData */
1328 21 : Conf->AffixData[curaffix] = VoidString;
1329 21 : curaffix++;
1330 : }
1331 : /* Other lines are aliases */
1332 : else
1333 : {
1334 231 : if (curaffix < naffix)
1335 : {
1336 231 : Conf->AffixData[curaffix] = cpstrdup(Conf, sflag);
1337 231 : curaffix++;
1338 : }
1339 : else
1340 0 : ereport(ERROR,
1341 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1342 : errmsg("number of aliases exceeds specified number %d",
1343 : naffix - 1)));
1344 : }
1345 252 : goto nextline;
1346 : }
1347 : /* Else try to parse prefixes and suffixes */
1348 1237 : if (fields_read < 4 ||
1349 998 : (STRNCMP(ptype, "sfx") != 0 && STRNCMP(ptype, "pfx") != 0))
1350 239 : goto nextline;
1351 :
1352 998 : sflaglen = strlen(sflag);
1353 998 : if (sflaglen == 0
1354 998 : || (sflaglen > 1 && Conf->flagMode == FM_CHAR)
1355 998 : || (sflaglen > 2 && Conf->flagMode == FM_LONG))
1356 0 : goto nextline;
1357 :
1358 : /*--------
1359 : * Affix header. For example:
1360 : * SFX \ N 1
1361 : *--------
1362 : */
1363 998 : if (fields_read == 4)
1364 : {
1365 499 : isSuffix = (STRNCMP(ptype, "sfx") == 0);
1366 499 : if (t_iseq(find, 'y') || t_iseq(find, 'Y'))
1367 345 : flagflags = FF_CROSSPRODUCT;
1368 : else
1369 154 : flagflags = 0;
1370 : }
1371 : /*--------
1372 : * Affix fields. For example:
1373 : * SFX \ 0 Y/L [^Y]
1374 : *--------
1375 : */
1376 : else
1377 : {
1378 : char *ptr;
1379 499 : int aflg = 0;
1380 :
1381 : /* Get flags after '/' (flags are case sensitive) */
1382 499 : if ((ptr = strchr(repl, '/')) != NULL)
1383 98 : aflg |= getCompoundAffixFlagValue(Conf,
1384 : getAffixFlagSet(Conf,
1385 : ptr + 1));
1386 : /* Get lowercased version of string before '/' */
1387 499 : prepl = lowerstr_ctx(Conf, repl);
1388 499 : if ((ptr = strchr(prepl, '/')) != NULL)
1389 98 : *ptr = '\0';
1390 499 : pfind = lowerstr_ctx(Conf, find);
1391 499 : pmask = lowerstr_ctx(Conf, mask);
1392 499 : if (t_iseq(find, '0'))
1393 421 : *pfind = '\0';
1394 499 : if (t_iseq(repl, '0'))
1395 22 : *prepl = '\0';
1396 :
1397 499 : NIAddAffix(Conf, sflag, flagflags | aflg, pmask, pfind, prepl,
1398 : isSuffix ? FF_SUFFIX : FF_PREFIX);
1399 499 : pfree(prepl);
1400 499 : pfree(pfind);
1401 499 : pfree(pmask);
1402 : }
1403 :
1404 2138 : nextline:
1405 2138 : pfree(recoded);
1406 : }
1407 :
1408 56 : tsearch_readline_end(&trst);
1409 56 : if (ptype)
1410 56 : pfree(ptype);
1411 56 : }
1412 :
1413 : /*
1414 : * import affixes
1415 : *
1416 : * Note caller must already have applied get_tsearch_config_filename
1417 : *
1418 : * This function is responsible for parsing ispell ("old format") affix files.
1419 : * If we realize that the file contains new-format commands, we pass off the
1420 : * work to NIImportOOAffixes(), which will re-read the whole file.
1421 : */
1422 : void
1423 83 : NIImportAffixes(IspellDict *Conf, const char *filename)
1424 : {
1425 83 : char *pstr = NULL;
1426 : char flag[BUFSIZ];
1427 : char mask[BUFSIZ];
1428 : char find[BUFSIZ];
1429 : char repl[BUFSIZ];
1430 : char *s;
1431 83 : bool suffixes = false;
1432 83 : bool prefixes = false;
1433 83 : char flagflags = 0;
1434 : tsearch_readline_state trst;
1435 83 : bool oldformat = false;
1436 83 : char *recoded = NULL;
1437 :
1438 83 : if (!tsearch_readline_begin(&trst, filename))
1439 0 : ereport(ERROR,
1440 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1441 : errmsg("could not open affix file \"%s\": %m",
1442 : filename)));
1443 :
1444 83 : Conf->usecompound = false;
1445 83 : Conf->useFlagAliases = false;
1446 83 : Conf->flagMode = FM_CHAR;
1447 :
1448 785 : while ((recoded = tsearch_readline(&trst)) != NULL)
1449 : {
1450 758 : pstr = str_tolower(recoded, strlen(recoded), DEFAULT_COLLATION_OID);
1451 :
1452 : /* Skip comments and empty lines */
1453 758 : if (*pstr == '#' || *pstr == '\n')
1454 243 : goto nextline;
1455 :
1456 515 : if (STRNCMP(pstr, "compoundwords") == 0)
1457 : {
1458 : /* Find case-insensitive L flag in non-lowercased string */
1459 27 : s = findchar2(recoded, 'l', 'L');
1460 27 : if (s)
1461 : {
1462 135 : while (*s && !isspace((unsigned char) *s))
1463 108 : s += pg_mblen_cstr(s);
1464 54 : while (*s && isspace((unsigned char) *s))
1465 27 : s += pg_mblen_cstr(s);
1466 :
1467 27 : if (*s && pg_mblen_cstr(s) == 1)
1468 : {
1469 27 : addCompoundAffixFlagValue(Conf, s, FF_COMPOUNDFLAG);
1470 27 : Conf->usecompound = true;
1471 : }
1472 27 : oldformat = true;
1473 27 : goto nextline;
1474 : }
1475 : }
1476 488 : if (STRNCMP(pstr, "suffixes") == 0)
1477 : {
1478 27 : suffixes = true;
1479 27 : prefixes = false;
1480 27 : oldformat = true;
1481 27 : goto nextline;
1482 : }
1483 461 : if (STRNCMP(pstr, "prefixes") == 0)
1484 : {
1485 27 : suffixes = false;
1486 27 : prefixes = true;
1487 27 : oldformat = true;
1488 27 : goto nextline;
1489 : }
1490 434 : if (STRNCMP(pstr, "flag") == 0)
1491 : {
1492 232 : s = recoded + 4; /* we need non-lowercased string */
1493 232 : flagflags = 0;
1494 :
1495 464 : while (*s && isspace((unsigned char) *s))
1496 232 : s += pg_mblen_cstr(s);
1497 :
1498 232 : if (*s == '*')
1499 : {
1500 135 : flagflags |= FF_CROSSPRODUCT;
1501 135 : s++;
1502 : }
1503 97 : else if (*s == '~')
1504 : {
1505 27 : flagflags |= FF_COMPOUNDONLY;
1506 27 : s++;
1507 : }
1508 :
1509 232 : if (*s == '\\')
1510 27 : s++;
1511 :
1512 : /*
1513 : * An old-format flag is a single ASCII character; we expect it to
1514 : * be followed by EOL, whitespace, or ':'. Otherwise this is a
1515 : * new-format flag command.
1516 : */
1517 232 : if (*s && pg_mblen_cstr(s) == 1)
1518 : {
1519 232 : flag[0] = *s++;
1520 232 : flag[1] = '\0';
1521 :
1522 232 : if (*s == '\0' || *s == '#' || *s == '\n' || *s == ':' ||
1523 43 : isspace((unsigned char) *s))
1524 : {
1525 189 : oldformat = true;
1526 189 : goto nextline;
1527 : }
1528 : }
1529 43 : goto isnewformat;
1530 : }
1531 202 : if (STRNCMP(recoded, "COMPOUNDFLAG") == 0 ||
1532 189 : STRNCMP(recoded, "COMPOUNDMIN") == 0 ||
1533 189 : STRNCMP(recoded, "PFX") == 0 ||
1534 189 : STRNCMP(recoded, "SFX") == 0)
1535 13 : goto isnewformat;
1536 :
1537 189 : if ((!suffixes) && (!prefixes))
1538 0 : goto nextline;
1539 :
1540 189 : if (!parse_affentry(pstr, mask, find, repl))
1541 0 : goto nextline;
1542 :
1543 189 : NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX);
1544 :
1545 702 : nextline:
1546 702 : pfree(recoded);
1547 702 : pfree(pstr);
1548 : }
1549 27 : tsearch_readline_end(&trst);
1550 27 : return;
1551 :
1552 56 : isnewformat:
1553 56 : if (oldformat)
1554 0 : ereport(ERROR,
1555 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1556 : errmsg("affix file contains both old-style and new-style commands")));
1557 56 : tsearch_readline_end(&trst);
1558 :
1559 56 : NIImportOOAffixes(Conf, filename);
1560 : }
1561 :
1562 : /*
1563 : * Merges two affix flag sets and stores a new affix flag set into
1564 : * Conf->AffixData.
1565 : *
1566 : * Returns index of a new affix flag set.
1567 : */
1568 : static int
1569 41 : MergeAffix(IspellDict *Conf, int a1, int a2)
1570 : {
1571 : const char **ptr;
1572 :
1573 : Assert(a1 < Conf->nAffixData && a2 < Conf->nAffixData);
1574 :
1575 : /* Do not merge affix flags if one of affix flags is empty */
1576 41 : if (*Conf->AffixData[a1] == '\0')
1577 0 : return a2;
1578 41 : else if (*Conf->AffixData[a2] == '\0')
1579 0 : return a1;
1580 :
1581 : /* Double the size of AffixData if there's not enough space */
1582 41 : if (Conf->nAffixData + 1 >= Conf->lenAffixData)
1583 : {
1584 41 : Conf->lenAffixData *= 2;
1585 41 : Conf->AffixData = (const char **) repalloc(Conf->AffixData,
1586 41 : sizeof(char *) * Conf->lenAffixData);
1587 : }
1588 :
1589 41 : ptr = Conf->AffixData + Conf->nAffixData;
1590 41 : if (Conf->flagMode == FM_NUM)
1591 : {
1592 18 : char *p = cpalloc(strlen(Conf->AffixData[a1]) +
1593 : strlen(Conf->AffixData[a2]) +
1594 : 1 /* comma */ + 1 /* \0 */ );
1595 :
1596 18 : sprintf(p, "%s,%s", Conf->AffixData[a1], Conf->AffixData[a2]);
1597 18 : *ptr = p;
1598 : }
1599 : else
1600 : {
1601 23 : char *p = cpalloc(strlen(Conf->AffixData[a1]) +
1602 : strlen(Conf->AffixData[a2]) +
1603 : 1 /* \0 */ );
1604 :
1605 23 : sprintf(p, "%s%s", Conf->AffixData[a1], Conf->AffixData[a2]);
1606 23 : *ptr = p;
1607 : }
1608 41 : ptr++;
1609 41 : *ptr = NULL;
1610 41 : Conf->nAffixData++;
1611 :
1612 41 : return Conf->nAffixData - 1;
1613 : }
1614 :
1615 : /*
1616 : * Returns a set of affix parameters which correspondence to the set of affix
1617 : * flags with the given index.
1618 : */
1619 : static uint32
1620 700 : makeCompoundFlags(IspellDict *Conf, int affix)
1621 : {
1622 : Assert(affix < Conf->nAffixData);
1623 :
1624 700 : return (getCompoundAffixFlagValue(Conf, Conf->AffixData[affix]) &
1625 : FF_COMPOUNDFLAGMASK);
1626 : }
1627 :
1628 : /*
1629 : * Makes a prefix tree for the given level.
1630 : *
1631 : * Conf: current dictionary.
1632 : * low: lower index of the Conf->Spell array.
1633 : * high: upper index of the Conf->Spell array.
1634 : * level: current prefix tree level.
1635 : */
1636 : static SPNode *
1637 2802 : mkSPNode(IspellDict *Conf, int low, int high, int level)
1638 : {
1639 : int i;
1640 2802 : int nchar = 0;
1641 2802 : char lastchar = '\0';
1642 : SPNode *rs;
1643 : SPNodeData *data;
1644 2802 : int lownew = low;
1645 :
1646 9209 : for (i = low; i < high; i++)
1647 6407 : if (Conf->Spell[i]->p.d.len > level && lastchar != Conf->Spell[i]->word[level])
1648 : {
1649 2747 : nchar++;
1650 2747 : lastchar = Conf->Spell[i]->word[level];
1651 : }
1652 :
1653 2802 : if (!nchar)
1654 401 : return NULL;
1655 :
1656 2401 : rs = (SPNode *) cpalloc0(SPNHDRSZ + nchar * sizeof(SPNodeData));
1657 2401 : rs->length = nchar;
1658 2401 : data = rs->data;
1659 :
1660 2401 : lastchar = '\0';
1661 8121 : for (i = low; i < high; i++)
1662 5732 : if (Conf->Spell[i]->p.d.len > level)
1663 : {
1664 4118 : if (lastchar != Conf->Spell[i]->word[level])
1665 : {
1666 2739 : if (lastchar)
1667 : {
1668 : /* Next level of the prefix tree */
1669 338 : data->node = mkSPNode(Conf, lownew, i, level + 1);
1670 330 : lownew = i;
1671 330 : data++;
1672 : }
1673 2731 : lastchar = Conf->Spell[i]->word[level];
1674 : }
1675 4110 : data->val = ((uint8 *) (Conf->Spell[i]->word))[level];
1676 4110 : if (Conf->Spell[i]->p.d.len == level + 1)
1677 : {
1678 659 : bool clearCompoundOnly = false;
1679 :
1680 659 : if (data->isword && data->affix != Conf->Spell[i]->p.d.affix)
1681 : {
1682 : /*
1683 : * MergeAffix called a few times. If one of word is
1684 : * allowed to be in compound word and another isn't, then
1685 : * clear FF_COMPOUNDONLY flag.
1686 : */
1687 :
1688 82 : clearCompoundOnly = (FF_COMPOUNDONLY & data->compoundflag
1689 41 : & makeCompoundFlags(Conf, Conf->Spell[i]->p.d.affix))
1690 : ? false : true;
1691 41 : data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i]->p.d.affix);
1692 : }
1693 : else
1694 618 : data->affix = Conf->Spell[i]->p.d.affix;
1695 659 : data->isword = 1;
1696 :
1697 659 : data->compoundflag = makeCompoundFlags(Conf, data->affix);
1698 :
1699 655 : if ((data->compoundflag & FF_COMPOUNDONLY) &&
1700 0 : (data->compoundflag & FF_COMPOUNDFLAG) == 0)
1701 0 : data->compoundflag |= FF_COMPOUNDFLAG;
1702 :
1703 655 : if (clearCompoundOnly)
1704 41 : data->compoundflag &= ~FF_COMPOUNDONLY;
1705 : }
1706 : }
1707 :
1708 : /* Next level of the prefix tree */
1709 2389 : data->node = mkSPNode(Conf, lownew, high, level + 1);
1710 :
1711 2385 : return rs;
1712 : }
1713 :
1714 : /*
1715 : * Builds the Conf->Dictionary tree and AffixData from the imported dictionary
1716 : * and affixes.
1717 : */
1718 : void
1719 83 : NISortDictionary(IspellDict *Conf)
1720 : {
1721 : int i;
1722 : int naffix;
1723 : int curaffix;
1724 :
1725 : /* compress affixes */
1726 :
1727 : /*
1728 : * If we use flag aliases then we need to use Conf->AffixData filled in
1729 : * the NIImportOOAffixes().
1730 : */
1731 83 : if (Conf->useFlagAliases)
1732 : {
1733 164 : for (i = 0; i < Conf->nspell; i++)
1734 : {
1735 : char *end;
1736 :
1737 151 : if (*Conf->Spell[i]->p.flag != '\0')
1738 : {
1739 138 : errno = 0;
1740 138 : curaffix = strtol(Conf->Spell[i]->p.flag, &end, 10);
1741 138 : if (Conf->Spell[i]->p.flag == end || errno == ERANGE)
1742 4 : ereport(ERROR,
1743 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1744 : errmsg("invalid affix alias \"%s\"",
1745 : Conf->Spell[i]->p.flag)));
1746 134 : if (curaffix < 0 || curaffix >= Conf->nAffixData)
1747 4 : ereport(ERROR,
1748 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1749 : errmsg("invalid affix alias \"%s\"",
1750 : Conf->Spell[i]->p.flag)));
1751 130 : if (*end != '\0' && !isdigit((unsigned char) *end) && !isspace((unsigned char) *end))
1752 0 : ereport(ERROR,
1753 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1754 : errmsg("invalid affix alias \"%s\"",
1755 : Conf->Spell[i]->p.flag)));
1756 : }
1757 : else
1758 : {
1759 : /*
1760 : * If Conf->Spell[i]->p.flag is empty, then get empty value of
1761 : * Conf->AffixData (0 index).
1762 : */
1763 13 : curaffix = 0;
1764 : }
1765 :
1766 143 : Conf->Spell[i]->p.d.affix = curaffix;
1767 143 : Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
1768 : }
1769 : }
1770 : /* Otherwise fill Conf->AffixData here */
1771 : else
1772 : {
1773 : /* Count the number of different flags used in the dictionary */
1774 62 : qsort(Conf->Spell, Conf->nspell, sizeof(SPELL *),
1775 : cmpspellaffix);
1776 :
1777 62 : naffix = 0;
1778 606 : for (i = 0; i < Conf->nspell; i++)
1779 : {
1780 544 : if (i == 0 ||
1781 482 : strcmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag) != 0)
1782 482 : naffix++;
1783 : }
1784 :
1785 : /*
1786 : * Fill in Conf->AffixData with the affixes that were used in the
1787 : * dictionary. Replace textual flag-field of Conf->Spell entries with
1788 : * indexes into Conf->AffixData array.
1789 : */
1790 62 : Conf->AffixData = palloc0_array(const char *, naffix);
1791 :
1792 62 : curaffix = -1;
1793 606 : for (i = 0; i < Conf->nspell; i++)
1794 : {
1795 544 : if (i == 0 ||
1796 482 : strcmp(Conf->Spell[i]->p.flag, Conf->AffixData[curaffix]) != 0)
1797 : {
1798 482 : curaffix++;
1799 : Assert(curaffix < naffix);
1800 482 : Conf->AffixData[curaffix] = cpstrdup(Conf,
1801 482 : Conf->Spell[i]->p.flag);
1802 : }
1803 :
1804 544 : Conf->Spell[i]->p.d.affix = curaffix;
1805 544 : Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
1806 : }
1807 :
1808 62 : Conf->lenAffixData = Conf->nAffixData = naffix;
1809 : }
1810 :
1811 : /* Start build a prefix tree */
1812 75 : qsort(Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell);
1813 75 : Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
1814 71 : }
1815 :
1816 : /*
1817 : * Makes a prefix tree for the given level using the repl string of an affix
1818 : * rule. Affixes with empty replace string do not include in the prefix tree.
1819 : * This affixes are included by mkVoidAffix().
1820 : *
1821 : * Conf: current dictionary.
1822 : * low: lower index of the Conf->Affix array.
1823 : * high: upper index of the Conf->Affix array.
1824 : * level: current prefix tree level.
1825 : * type: FF_SUFFIX or FF_PREFIX.
1826 : */
1827 : static AffixNode *
1828 1198 : mkANode(IspellDict *Conf, int low, int high, int level, int type)
1829 : {
1830 : int i;
1831 1198 : int nchar = 0;
1832 1198 : uint8 lastchar = '\0';
1833 : AffixNode *rs;
1834 : AffixNodeData *data;
1835 1198 : int lownew = low;
1836 : int naff;
1837 : AFFIX **aff;
1838 :
1839 3224 : for (i = low; i < high; i++)
1840 2026 : if (Conf->Affix[i].replen > level && lastchar != GETCHAR(Conf->Affix + i, level, type))
1841 : {
1842 1056 : nchar++;
1843 1056 : lastchar = GETCHAR(Conf->Affix + i, level, type);
1844 : }
1845 :
1846 1198 : if (!nchar)
1847 457 : return NULL;
1848 :
1849 741 : aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1));
1850 741 : naff = 0;
1851 :
1852 741 : rs = (AffixNode *) cpalloc0(ANHRDSZ + nchar * sizeof(AffixNodeData));
1853 741 : rs->length = nchar;
1854 741 : data = rs->data;
1855 :
1856 741 : lastchar = '\0';
1857 2195 : for (i = low; i < high; i++)
1858 1454 : if (Conf->Affix[i].replen > level)
1859 : {
1860 1224 : if (lastchar != GETCHAR(Conf->Affix + i, level, type))
1861 : {
1862 1056 : if (lastchar)
1863 : {
1864 : /* Next level of the prefix tree */
1865 315 : data->node = mkANode(Conf, lownew, i, level + 1, type);
1866 315 : if (naff)
1867 : {
1868 71 : data->naff = naff;
1869 71 : data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
1870 71 : memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
1871 71 : naff = 0;
1872 : }
1873 315 : data++;
1874 315 : lownew = i;
1875 : }
1876 1056 : lastchar = GETCHAR(Conf->Affix + i, level, type);
1877 : }
1878 1224 : data->val = GETCHAR(Conf->Affix + i, level, type);
1879 1224 : if (Conf->Affix[i].replen == level + 1)
1880 : { /* affix stopped */
1881 554 : aff[naff++] = Conf->Affix + i;
1882 : }
1883 : }
1884 :
1885 : /* Next level of the prefix tree */
1886 741 : data->node = mkANode(Conf, lownew, high, level + 1, type);
1887 741 : if (naff)
1888 : {
1889 457 : data->naff = naff;
1890 457 : data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
1891 457 : memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
1892 457 : naff = 0;
1893 : }
1894 :
1895 741 : pfree(aff);
1896 :
1897 741 : return rs;
1898 : }
1899 :
1900 : /*
1901 : * Makes the root void node in the prefix tree. The root void node is created
1902 : * for affixes which have empty replace string ("repl" field).
1903 : */
1904 : static void
1905 142 : mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix)
1906 : {
1907 : int i,
1908 142 : cnt = 0;
1909 142 : int start = (issuffix) ? startsuffix : 0;
1910 142 : int end = (issuffix) ? Conf->naffixes : startsuffix;
1911 142 : AffixNode *Affix = (AffixNode *) palloc0(ANHRDSZ + sizeof(AffixNodeData));
1912 :
1913 142 : Affix->length = 1;
1914 142 : Affix->isvoid = 1;
1915 :
1916 142 : if (issuffix)
1917 : {
1918 71 : Affix->data->node = Conf->Suffix;
1919 71 : Conf->Suffix = Affix;
1920 : }
1921 : else
1922 : {
1923 71 : Affix->data->node = Conf->Prefix;
1924 71 : Conf->Prefix = Affix;
1925 : }
1926 :
1927 : /* Count affixes with empty replace string */
1928 714 : for (i = start; i < end; i++)
1929 572 : if (Conf->Affix[i].replen == 0)
1930 18 : cnt++;
1931 :
1932 : /* There is not affixes with empty replace string */
1933 142 : if (cnt == 0)
1934 124 : return;
1935 :
1936 18 : Affix->data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * cnt);
1937 18 : Affix->data->naff = (uint32) cnt;
1938 :
1939 18 : cnt = 0;
1940 144 : for (i = start; i < end; i++)
1941 126 : if (Conf->Affix[i].replen == 0)
1942 : {
1943 18 : Affix->data->aff[cnt] = Conf->Affix + i;
1944 18 : cnt++;
1945 : }
1946 : }
1947 :
1948 : /*
1949 : * Checks if the affixflag is used by dictionary. Conf->AffixData does not
1950 : * contain affixflag if this flag is not used actually by the .dict file.
1951 : *
1952 : * Conf: current dictionary.
1953 : * affixflag: affix flag.
1954 : *
1955 : * Returns true if the Conf->AffixData array contains affixflag, otherwise
1956 : * returns false.
1957 : */
1958 : static bool
1959 97 : isAffixInUse(IspellDict *Conf, const char *affixflag)
1960 : {
1961 : int i;
1962 :
1963 710 : for (i = 0; i < Conf->nAffixData; i++)
1964 695 : if (IsAffixFlagInUse(Conf, i, affixflag))
1965 82 : return true;
1966 :
1967 15 : return false;
1968 : }
1969 :
1970 : /*
1971 : * Builds Conf->Prefix and Conf->Suffix trees from the imported affixes.
1972 : */
1973 : void
1974 71 : NISortAffixes(IspellDict *Conf)
1975 : {
1976 : AFFIX *Affix;
1977 : size_t i;
1978 : CMPDAffix *ptr;
1979 71 : int firstsuffix = Conf->naffixes;
1980 :
1981 71 : if (Conf->naffixes == 0)
1982 0 : return;
1983 :
1984 : /* Store compound affixes in the Conf->CompoundAffix array */
1985 71 : if (Conf->naffixes > 1)
1986 71 : qsort(Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix);
1987 71 : Conf->CompoundAffix = ptr = palloc_array(CMPDAffix, Conf->naffixes);
1988 71 : ptr->affix = NULL;
1989 :
1990 643 : for (i = 0; i < Conf->naffixes; i++)
1991 : {
1992 572 : Affix = &(((AFFIX *) Conf->Affix)[i]);
1993 572 : if (Affix->type == FF_SUFFIX && i < firstsuffix)
1994 71 : firstsuffix = i;
1995 :
1996 669 : if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 &&
1997 97 : isAffixInUse(Conf, Affix->flag))
1998 : {
1999 82 : bool issuffix = (Affix->type == FF_SUFFIX);
2000 :
2001 82 : if (ptr == Conf->CompoundAffix ||
2002 52 : issuffix != (ptr - 1)->issuffix ||
2003 26 : strbncmp((const unsigned char *) (ptr - 1)->affix,
2004 26 : (const unsigned char *) Affix->repl,
2005 26 : (ptr - 1)->len))
2006 : {
2007 : /* leave only unique and minimal suffixes */
2008 69 : ptr->affix = Affix->repl;
2009 69 : ptr->len = Affix->replen;
2010 69 : ptr->issuffix = issuffix;
2011 69 : ptr++;
2012 : }
2013 : }
2014 : }
2015 71 : ptr->affix = NULL;
2016 71 : Conf->CompoundAffix = (CMPDAffix *) repalloc(Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr - Conf->CompoundAffix + 1));
2017 :
2018 : /* Start build a prefix tree */
2019 71 : Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX);
2020 71 : Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX);
2021 71 : mkVoidAffix(Conf, true, firstsuffix);
2022 71 : mkVoidAffix(Conf, false, firstsuffix);
2023 : }
2024 :
2025 : static AffixNodeData *
2026 3850 : FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type)
2027 : {
2028 : AffixNodeData *StopLow,
2029 : *StopHigh,
2030 : *StopMiddle;
2031 : uint8 symbol;
2032 :
2033 3850 : if (node->isvoid)
2034 : { /* search void affixes */
2035 3350 : if (node->data->naff)
2036 285 : return node->data;
2037 3065 : node = node->data->node;
2038 : }
2039 :
2040 4485 : while (node && *level < wrdlen)
2041 : {
2042 4465 : StopLow = node->data;
2043 4465 : StopHigh = node->data + node->length;
2044 9855 : while (StopLow < StopHigh)
2045 : {
2046 7395 : StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
2047 7395 : symbol = GETWCHAR(word, wrdlen, *level, type);
2048 :
2049 7395 : if (StopMiddle->val == symbol)
2050 : {
2051 2005 : (*level)++;
2052 2005 : if (StopMiddle->naff)
2053 1085 : return StopMiddle;
2054 920 : node = StopMiddle->node;
2055 920 : break;
2056 : }
2057 5390 : else if (StopMiddle->val < symbol)
2058 1340 : StopLow = StopMiddle + 1;
2059 : else
2060 4050 : StopHigh = StopMiddle;
2061 : }
2062 3380 : if (StopLow >= StopHigh)
2063 2460 : break;
2064 : }
2065 2480 : return NULL;
2066 : }
2067 :
2068 : static char *
2069 1530 : CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *newword, int *baselen)
2070 : {
2071 : /*
2072 : * Check compound allow flags
2073 : */
2074 :
2075 1530 : if (flagflags == 0)
2076 : {
2077 1055 : if (Affix->flagflags & FF_COMPOUNDONLY)
2078 110 : return NULL;
2079 : }
2080 475 : else if (flagflags & FF_COMPOUNDBEGIN)
2081 : {
2082 0 : if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
2083 0 : return NULL;
2084 0 : if ((Affix->flagflags & FF_COMPOUNDBEGIN) == 0)
2085 0 : if (Affix->type == FF_SUFFIX)
2086 0 : return NULL;
2087 : }
2088 475 : else if (flagflags & FF_COMPOUNDMIDDLE)
2089 : {
2090 340 : if ((Affix->flagflags & FF_COMPOUNDMIDDLE) == 0 ||
2091 190 : (Affix->flagflags & FF_COMPOUNDFORBIDFLAG))
2092 150 : return NULL;
2093 : }
2094 135 : else if (flagflags & FF_COMPOUNDLAST)
2095 : {
2096 135 : if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
2097 0 : return NULL;
2098 135 : if ((Affix->flagflags & FF_COMPOUNDLAST) == 0)
2099 125 : if (Affix->type == FF_PREFIX)
2100 0 : return NULL;
2101 : }
2102 :
2103 : /*
2104 : * make replace pattern of affix
2105 : */
2106 1270 : if (Affix->type == FF_SUFFIX)
2107 : {
2108 870 : strcpy(newword, word);
2109 870 : strcpy(newword + len - Affix->replen, Affix->find);
2110 870 : if (baselen) /* store length of non-changed part of word */
2111 870 : *baselen = len - Affix->replen;
2112 : }
2113 : else
2114 : {
2115 : /*
2116 : * if prefix is an all non-changed part's length then all word
2117 : * contains only prefix and suffix, so out
2118 : */
2119 400 : if (baselen && *baselen + strlen(Affix->find) <= Affix->replen)
2120 0 : return NULL;
2121 400 : strcpy(newword, Affix->find);
2122 400 : strcat(newword, word + Affix->replen);
2123 : }
2124 :
2125 : /*
2126 : * check resulting word
2127 : */
2128 1270 : if (Affix->issimple)
2129 400 : return newword;
2130 870 : else if (Affix->isregis)
2131 : {
2132 590 : if (RS_execute(&(Affix->reg.regis), newword))
2133 560 : return newword;
2134 : }
2135 : else
2136 : {
2137 : pg_wchar *data;
2138 : size_t data_len;
2139 : int newword_len;
2140 :
2141 : /* Convert data string to wide characters */
2142 280 : newword_len = strlen(newword);
2143 280 : data = palloc_array(pg_wchar, newword_len + 1);
2144 280 : data_len = pg_mb2wchar_with_len(newword, data, newword_len);
2145 :
2146 280 : if (pg_regexec(Affix->reg.pregex, data, data_len,
2147 : 0, NULL, 0, NULL, 0) == REG_OKAY)
2148 : {
2149 280 : pfree(data);
2150 280 : return newword;
2151 : }
2152 0 : pfree(data);
2153 : }
2154 :
2155 30 : return NULL;
2156 : }
2157 :
2158 : static int
2159 450 : addToResult(char **forms, char **cur, char *word)
2160 : {
2161 450 : if (cur - forms >= MAX_NORM - 1)
2162 0 : return 0;
2163 450 : if (forms == cur || strcmp(word, *(cur - 1)) != 0)
2164 : {
2165 450 : *cur = pstrdup(word);
2166 450 : *(cur + 1) = NULL;
2167 450 : return 1;
2168 : }
2169 :
2170 0 : return 0;
2171 : }
2172 :
2173 : static char **
2174 1255 : NormalizeSubWord(IspellDict *Conf, const char *word, int flag)
2175 : {
2176 1255 : AffixNodeData *suffix = NULL,
2177 1255 : *prefix = NULL;
2178 1255 : int slevel = 0,
2179 1255 : plevel = 0;
2180 1255 : int wrdlen = strlen(word),
2181 : swrdlen;
2182 : char **forms;
2183 : char **cur;
2184 1255 : char newword[2 * MAXNORMLEN] = "";
2185 1255 : char pnewword[2 * MAXNORMLEN] = "";
2186 1255 : AffixNode *snode = Conf->Suffix,
2187 : *pnode;
2188 : int i,
2189 : j;
2190 :
2191 1255 : if (wrdlen > MAXNORMLEN)
2192 0 : return NULL;
2193 1255 : cur = forms = palloc_array(char *, MAX_NORM);
2194 1255 : *cur = NULL;
2195 :
2196 :
2197 : /* Check that the word itself is normal form */
2198 1255 : if (FindWord(Conf, word, VoidString, flag))
2199 : {
2200 390 : *cur = pstrdup(word);
2201 390 : cur++;
2202 390 : *cur = NULL;
2203 : }
2204 :
2205 : /* Find all other NORMAL forms of the 'word' (check only prefix) */
2206 1255 : pnode = Conf->Prefix;
2207 1255 : plevel = 0;
2208 1435 : while (pnode)
2209 : {
2210 1255 : prefix = FindAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX);
2211 1255 : if (!prefix)
2212 1075 : break;
2213 360 : for (j = 0; j < prefix->naff; j++)
2214 : {
2215 180 : if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword, NULL))
2216 : {
2217 : /* prefix success */
2218 160 : if (FindWord(Conf, newword, prefix->aff[j]->flag, flag))
2219 40 : cur += addToResult(forms, cur, newword);
2220 : }
2221 : }
2222 180 : pnode = prefix->node;
2223 : }
2224 :
2225 : /*
2226 : * Find all other NORMAL forms of the 'word' (check suffix and then
2227 : * prefix)
2228 : */
2229 2165 : while (snode)
2230 : {
2231 1755 : int baselen = 0;
2232 :
2233 : /* find possible suffix */
2234 1755 : suffix = FindAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX);
2235 1755 : if (!suffix)
2236 845 : break;
2237 : /* foreach suffix check affix */
2238 1980 : for (i = 0; i < suffix->naff; i++)
2239 : {
2240 1070 : if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword, &baselen))
2241 : {
2242 : /* suffix success */
2243 840 : if (FindWord(Conf, newword, suffix->aff[i]->flag, flag))
2244 230 : cur += addToResult(forms, cur, newword);
2245 :
2246 : /* now we will look changed word with prefixes */
2247 840 : pnode = Conf->Prefix;
2248 840 : plevel = 0;
2249 840 : swrdlen = strlen(newword);
2250 1120 : while (pnode)
2251 : {
2252 840 : prefix = FindAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX);
2253 840 : if (!prefix)
2254 560 : break;
2255 560 : for (j = 0; j < prefix->naff; j++)
2256 : {
2257 280 : if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword, &baselen))
2258 : {
2259 : /* prefix success */
2260 480 : const char *ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ?
2261 240 : VoidString : prefix->aff[j]->flag;
2262 :
2263 240 : if (FindWord(Conf, pnewword, ff, flag))
2264 180 : cur += addToResult(forms, cur, pnewword);
2265 : }
2266 : }
2267 280 : pnode = prefix->node;
2268 : }
2269 : }
2270 : }
2271 :
2272 910 : snode = suffix->node;
2273 : }
2274 :
2275 1255 : if (cur == forms)
2276 : {
2277 555 : pfree(forms);
2278 555 : return NULL;
2279 : }
2280 700 : return forms;
2281 : }
2282 :
2283 : typedef struct SplitVar
2284 : {
2285 : int nstem;
2286 : int lenstem;
2287 : char **stem;
2288 : struct SplitVar *next;
2289 : } SplitVar;
2290 :
2291 : static int
2292 5050 : CheckCompoundAffixes(CMPDAffix **ptr, const char *word, int len, bool CheckInPlace)
2293 : {
2294 : bool issuffix;
2295 :
2296 : /* in case CompoundAffix is null: */
2297 5050 : if (*ptr == NULL)
2298 0 : return -1;
2299 :
2300 5050 : if (CheckInPlace)
2301 : {
2302 9640 : while ((*ptr)->affix)
2303 : {
2304 5370 : if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0)
2305 : {
2306 50 : len = (*ptr)->len;
2307 50 : issuffix = (*ptr)->issuffix;
2308 50 : (*ptr)++;
2309 50 : return (issuffix) ? len : 0;
2310 : }
2311 5320 : (*ptr)++;
2312 : }
2313 : }
2314 : else
2315 : {
2316 : const char *affbegin;
2317 :
2318 1410 : while ((*ptr)->affix)
2319 : {
2320 785 : if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL)
2321 : {
2322 105 : len = (*ptr)->len + (affbegin - word);
2323 105 : issuffix = (*ptr)->issuffix;
2324 105 : (*ptr)++;
2325 105 : return (issuffix) ? len : 0;
2326 : }
2327 680 : (*ptr)++;
2328 : }
2329 : }
2330 4895 : return -1;
2331 : }
2332 :
2333 : static SplitVar *
2334 1175 : CopyVar(SplitVar *s, int makedup)
2335 : {
2336 1175 : SplitVar *v = palloc_object(SplitVar);
2337 :
2338 1175 : v->next = NULL;
2339 1175 : if (s)
2340 : {
2341 : int i;
2342 :
2343 550 : v->lenstem = s->lenstem;
2344 550 : v->stem = palloc_array(char *, v->lenstem);
2345 550 : v->nstem = s->nstem;
2346 835 : for (i = 0; i < s->nstem; i++)
2347 285 : v->stem[i] = (makedup) ? pstrdup(s->stem[i]) : s->stem[i];
2348 : }
2349 : else
2350 : {
2351 625 : v->lenstem = 16;
2352 625 : v->stem = palloc_array(char *, v->lenstem);
2353 625 : v->nstem = 0;
2354 : }
2355 1175 : return v;
2356 : }
2357 :
2358 : static void
2359 1575 : AddStem(SplitVar *v, char *word)
2360 : {
2361 1575 : if (v->nstem >= v->lenstem)
2362 : {
2363 0 : v->lenstem *= 2;
2364 0 : v->stem = (char **) repalloc(v->stem, sizeof(char *) * v->lenstem);
2365 : }
2366 :
2367 1575 : v->stem[v->nstem] = word;
2368 1575 : v->nstem++;
2369 1575 : }
2370 :
2371 : static SplitVar *
2372 1100 : SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, const char *word, int wordlen, int startpos, int minpos)
2373 : {
2374 1100 : SplitVar *var = NULL;
2375 : SPNodeData *StopLow,
2376 : *StopHigh,
2377 1100 : *StopMiddle = NULL;
2378 1100 : SPNode *node = (snode) ? snode : Conf->Dictionary;
2379 1100 : int level = (snode) ? minpos : startpos; /* recursive
2380 : * minpos==level */
2381 : int lenaff;
2382 : CMPDAffix *caff;
2383 : char *notprobed;
2384 1100 : int compoundflag = 0;
2385 :
2386 : /* since this function recurses, it could be driven to stack overflow */
2387 1100 : check_stack_depth();
2388 :
2389 1100 : notprobed = (char *) palloc(wordlen);
2390 1100 : memset(notprobed, 1, wordlen);
2391 1100 : var = CopyVar(orig, 1);
2392 :
2393 6210 : while (level < wordlen)
2394 : {
2395 : /* find word with epenthetic or/and compound affix */
2396 5995 : caff = Conf->CompoundAffix;
2397 6150 : while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) >= 0)
2398 : {
2399 : /*
2400 : * there is one of compound affixes, so check word for existings
2401 : */
2402 : char buf[MAXNORMLEN];
2403 : char **subres;
2404 :
2405 155 : lenaff = level - startpos + lenaff;
2406 :
2407 155 : if (!notprobed[startpos + lenaff - 1])
2408 0 : continue;
2409 :
2410 155 : if (level + lenaff - 1 <= minpos)
2411 0 : continue;
2412 :
2413 155 : if (lenaff >= MAXNORMLEN)
2414 0 : continue; /* skip too big value */
2415 155 : if (lenaff > 0)
2416 155 : memcpy(buf, word + startpos, lenaff);
2417 155 : buf[lenaff] = '\0';
2418 :
2419 155 : if (level == 0)
2420 0 : compoundflag = FF_COMPOUNDBEGIN;
2421 155 : else if (level == wordlen - 1)
2422 0 : compoundflag = FF_COMPOUNDLAST;
2423 : else
2424 155 : compoundflag = FF_COMPOUNDMIDDLE;
2425 155 : subres = NormalizeSubWord(Conf, buf, compoundflag);
2426 155 : if (subres)
2427 : {
2428 : /* Yes, it was a word from dictionary */
2429 75 : SplitVar *new = CopyVar(var, 0);
2430 75 : SplitVar *ptr = var;
2431 75 : char **sptr = subres;
2432 :
2433 75 : notprobed[startpos + lenaff - 1] = 0;
2434 :
2435 150 : while (*sptr)
2436 : {
2437 75 : AddStem(new, *sptr);
2438 75 : sptr++;
2439 : }
2440 75 : pfree(subres);
2441 :
2442 75 : while (ptr->next)
2443 0 : ptr = ptr->next;
2444 75 : ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos + lenaff, startpos + lenaff);
2445 :
2446 75 : pfree(new->stem);
2447 75 : pfree(new);
2448 : }
2449 : }
2450 :
2451 5995 : if (!node)
2452 625 : break;
2453 :
2454 5370 : StopLow = node->data;
2455 5370 : StopHigh = node->data + node->length;
2456 7245 : while (StopLow < StopHigh)
2457 : {
2458 6720 : StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
2459 6720 : if (StopMiddle->val == ((const uint8 *) (word))[level])
2460 4845 : break;
2461 1875 : else if (StopMiddle->val < ((const uint8 *) (word))[level])
2462 815 : StopLow = StopMiddle + 1;
2463 : else
2464 1060 : StopHigh = StopMiddle;
2465 : }
2466 :
2467 5370 : if (StopLow < StopHigh)
2468 : {
2469 4845 : if (startpos == 0)
2470 2725 : compoundflag = FF_COMPOUNDBEGIN;
2471 2120 : else if (level == wordlen - 1)
2472 240 : compoundflag = FF_COMPOUNDLAST;
2473 : else
2474 1880 : compoundflag = FF_COMPOUNDMIDDLE;
2475 :
2476 : /* find infinitive */
2477 4845 : if (StopMiddle->isword &&
2478 1280 : (StopMiddle->compoundflag & compoundflag) &&
2479 1060 : notprobed[level])
2480 : {
2481 : /* ok, we found full compoundallowed word */
2482 1060 : if (level > minpos)
2483 : {
2484 : /* and its length more than minimal */
2485 660 : if (wordlen == level + 1)
2486 : {
2487 : /* well, it was last word */
2488 260 : AddStem(var, pnstrdup(word + startpos, wordlen - startpos));
2489 260 : pfree(notprobed);
2490 260 : return var;
2491 : }
2492 : else
2493 400 : {
2494 : /* then we will search more big word at the same point */
2495 400 : SplitVar *ptr = var;
2496 :
2497 620 : while (ptr->next)
2498 220 : ptr = ptr->next;
2499 400 : ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
2500 : /* we can find next word */
2501 400 : level++;
2502 400 : AddStem(var, pnstrdup(word + startpos, level - startpos));
2503 400 : node = Conf->Dictionary;
2504 400 : startpos = level;
2505 400 : continue;
2506 : }
2507 : }
2508 : }
2509 4185 : node = StopMiddle->node;
2510 : }
2511 : else
2512 525 : node = NULL;
2513 4710 : level++;
2514 : }
2515 :
2516 840 : AddStem(var, pnstrdup(word + startpos, wordlen - startpos));
2517 840 : pfree(notprobed);
2518 840 : return var;
2519 : }
2520 :
2521 : static void
2522 1095 : addNorm(TSLexeme **lres, TSLexeme **lcur, char *word, int flags, uint16 NVariant)
2523 : {
2524 1095 : if (*lres == NULL)
2525 505 : *lcur = *lres = palloc_array(TSLexeme, MAX_NORM);
2526 :
2527 1095 : if (*lcur - *lres < MAX_NORM - 1)
2528 : {
2529 1095 : (*lcur)->lexeme = word;
2530 1095 : (*lcur)->flags = flags;
2531 1095 : (*lcur)->nvariant = NVariant;
2532 1095 : (*lcur)++;
2533 1095 : (*lcur)->lexeme = NULL;
2534 : }
2535 1095 : }
2536 :
2537 : TSLexeme *
2538 625 : NINormalizeWord(IspellDict *Conf, const char *word)
2539 : {
2540 : char **res;
2541 625 : TSLexeme *lcur = NULL,
2542 625 : *lres = NULL;
2543 625 : uint16 NVariant = 1;
2544 :
2545 625 : res = NormalizeSubWord(Conf, word, 0);
2546 :
2547 625 : if (res)
2548 : {
2549 405 : char **ptr = res;
2550 :
2551 950 : while (*ptr && (lcur - lres) < MAX_NORM)
2552 : {
2553 545 : addNorm(&lres, &lcur, *ptr, 0, NVariant++);
2554 545 : ptr++;
2555 : }
2556 405 : pfree(res);
2557 : }
2558 :
2559 625 : if (Conf->usecompound)
2560 : {
2561 625 : int wordlen = strlen(word);
2562 : SplitVar *ptr,
2563 625 : *var = SplitToVariants(Conf, NULL, NULL, word, wordlen, 0, -1);
2564 : int i;
2565 :
2566 1725 : while (var)
2567 : {
2568 1100 : if (var->nstem > 1)
2569 : {
2570 475 : char **subres = NormalizeSubWord(Conf, var->stem[var->nstem - 1], FF_COMPOUNDLAST);
2571 :
2572 475 : if (subres)
2573 : {
2574 220 : char **subptr = subres;
2575 :
2576 440 : while (*subptr)
2577 : {
2578 550 : for (i = 0; i < var->nstem - 1; i++)
2579 : {
2580 330 : addNorm(&lres, &lcur, (subptr == subres) ? var->stem[i] : pstrdup(var->stem[i]), 0, NVariant);
2581 : }
2582 :
2583 220 : addNorm(&lres, &lcur, *subptr, 0, NVariant);
2584 220 : subptr++;
2585 220 : NVariant++;
2586 : }
2587 :
2588 220 : pfree(subres);
2589 220 : var->stem[0] = NULL;
2590 220 : pfree(var->stem[var->nstem - 1]);
2591 : }
2592 : }
2593 :
2594 2285 : for (i = 0; i < var->nstem && var->stem[i]; i++)
2595 1185 : pfree(var->stem[i]);
2596 1100 : ptr = var->next;
2597 1100 : pfree(var->stem);
2598 1100 : pfree(var);
2599 1100 : var = ptr;
2600 : }
2601 : }
2602 :
2603 625 : return lres;
2604 : }
|