Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * spell.c
4 : * Normalizing word with ISpell
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : *
8 : * Ispell dictionary
9 : * -----------------
10 : *
11 : * Rules of dictionaries are defined in two files with .affix and .dict
12 : * extensions. They are used by spell checker programs Ispell and Hunspell.
13 : *
14 : * An .affix file declares morphological rules to get a basic form of words.
15 : * The format of an .affix file has different structure for Ispell and Hunspell
16 : * dictionaries. The Hunspell format is more complicated. But when an .affix
17 : * file is imported and compiled, it is stored in the same structure AffixNode.
18 : *
19 : * A .dict file stores a list of basic forms of words with references to
20 : * affix rules. The format of a .dict file has the same structure for Ispell
21 : * and Hunspell dictionaries.
22 : *
23 : * Compilation of a dictionary
24 : * ---------------------------
25 : *
26 : * A compiled dictionary is stored in the IspellDict structure. Compilation of
27 : * a dictionary is divided into the several steps:
28 : * - NIImportDictionary() - stores each word of a .dict file in the
29 : * temporary Spell field.
30 : * - NIImportAffixes() - stores affix rules of an .affix file in the
31 : * Affix field (not temporary) if an .affix file has the Ispell format.
32 : * -> NIImportOOAffixes() - stores affix rules if an .affix file has the
33 : * Hunspell format. The AffixData field is initialized if AF parameter
34 : * is defined.
35 : * - NISortDictionary() - builds a prefix tree (Trie) from the words list
36 : * and stores it in the Dictionary field. The words list is got from the
37 : * Spell field. The AffixData field is initialized if AF parameter is not
38 : * defined.
39 : * - NISortAffixes():
40 : * - builds a list of compound affixes from the affix list and stores it
41 : * in the CompoundAffix.
42 : * - builds prefix trees (Trie) from the affix list for prefixes and suffixes
43 : * and stores them in Suffix and Prefix fields.
44 : * The affix list is got from the Affix field.
45 : *
46 : * Memory management
47 : * -----------------
48 : *
49 : * The IspellDict structure has the Spell field which is used only in compile
50 : * time. The Spell field stores a words list. It can take a lot of memory.
51 : * Therefore when a dictionary is compiled this field is cleared by
52 : * NIFinishBuild().
53 : *
54 : * All resources which should cleared by NIFinishBuild() is initialized using
55 : * tmpalloc() and tmpalloc0().
56 : *
57 : * IDENTIFICATION
58 : * src/backend/tsearch/spell.c
59 : *
60 : *-------------------------------------------------------------------------
61 : */
62 :
63 : #include "postgres.h"
64 :
65 : #include "catalog/pg_collation.h"
66 : #include "miscadmin.h"
67 : #include "tsearch/dicts/spell.h"
68 : #include "tsearch/ts_locale.h"
69 : #include "utils/formatting.h"
70 : #include "utils/memutils.h"
71 :
72 :
73 : /*
74 : * Initialization requires a lot of memory that's not needed
75 : * after the initialization is done. During initialization,
76 : * CurrentMemoryContext is the long-lived memory context associated
77 : * with the dictionary cache entry. We keep the short-lived stuff
78 : * in the Conf->buildCxt context.
79 : */
80 : #define tmpalloc(sz) MemoryContextAlloc(Conf->buildCxt, (sz))
81 : #define tmpalloc0(sz) MemoryContextAllocZero(Conf->buildCxt, (sz))
82 :
83 : /*
84 : * Prepare for constructing an ISpell dictionary.
85 : *
86 : * The IspellDict struct is assumed to be zeroed when allocated.
87 : */
88 : void
89 134 : NIStartBuild(IspellDict *Conf)
90 : {
91 : /*
92 : * The temp context is a child of CurTransactionContext, so that it will
93 : * go away automatically on error.
94 : */
95 134 : Conf->buildCxt = AllocSetContextCreate(CurTransactionContext,
96 : "Ispell dictionary init context",
97 : ALLOCSET_DEFAULT_SIZES);
98 134 : }
99 :
100 : /*
101 : * Clean up when dictionary construction is complete.
102 : */
103 : void
104 110 : NIFinishBuild(IspellDict *Conf)
105 : {
106 : /* Release no-longer-needed temp memory */
107 110 : MemoryContextDelete(Conf->buildCxt);
108 : /* Just for cleanliness, zero the now-dangling pointers */
109 110 : Conf->buildCxt = NULL;
110 110 : Conf->Spell = NULL;
111 110 : Conf->firstfree = NULL;
112 110 : Conf->CompoundAffixFlags = NULL;
113 110 : }
114 :
115 :
116 : /*
117 : * "Compact" palloc: allocate without extra palloc overhead.
118 : *
119 : * Since we have no need to free the ispell data items individually, there's
120 : * not much value in the per-chunk overhead normally consumed by palloc.
121 : * Getting rid of it is helpful since ispell can allocate a lot of small nodes.
122 : *
123 : * We currently pre-zero all data allocated this way, even though some of it
124 : * doesn't need that. The cpalloc and cpalloc0 macros are just documentation
125 : * to indicate which allocations actually require zeroing.
126 : */
127 : #define COMPACT_ALLOC_CHUNK 8192 /* amount to get from palloc at once */
128 : #define COMPACT_MAX_REQ 1024 /* must be < COMPACT_ALLOC_CHUNK */
129 :
130 : static void *
131 12404 : compact_palloc0(IspellDict *Conf, size_t size)
132 : {
133 : void *result;
134 :
135 : /* Should only be called during init */
136 : Assert(Conf->buildCxt != NULL);
137 :
138 : /* No point in this for large chunks */
139 12404 : if (size > COMPACT_MAX_REQ)
140 0 : return palloc0(size);
141 :
142 : /* Keep everything maxaligned */
143 12404 : size = MAXALIGN(size);
144 :
145 : /* Need more space? */
146 12404 : if (size > Conf->avail)
147 : {
148 128 : Conf->firstfree = palloc0(COMPACT_ALLOC_CHUNK);
149 128 : Conf->avail = COMPACT_ALLOC_CHUNK;
150 : }
151 :
152 12404 : result = Conf->firstfree;
153 12404 : Conf->firstfree += size;
154 12404 : Conf->avail -= size;
155 :
156 12404 : return result;
157 : }
158 :
159 : #define cpalloc(size) compact_palloc0(Conf, size)
160 : #define cpalloc0(size) compact_palloc0(Conf, size)
161 :
162 : static char *
163 6624 : cpstrdup(IspellDict *Conf, const char *str)
164 : {
165 6624 : char *res = cpalloc(strlen(str) + 1);
166 :
167 6624 : strcpy(res, str);
168 6624 : return res;
169 : }
170 :
171 :
172 : /*
173 : * Apply str_tolower(), producing a temporary result (in the buildCxt).
174 : */
175 : static char *
176 5746 : lowerstr_ctx(IspellDict *Conf, const char *src)
177 : {
178 : MemoryContext saveCtx;
179 : char *dst;
180 :
181 5746 : saveCtx = MemoryContextSwitchTo(Conf->buildCxt);
182 5746 : dst = str_tolower(src, strlen(src), DEFAULT_COLLATION_OID);
183 5746 : MemoryContextSwitchTo(saveCtx);
184 :
185 5746 : return dst;
186 : }
187 :
188 : #define MAX_NORM 1024
189 : #define MAXNORMLEN 256
190 :
191 : #define STRNCMP(s,p) strncmp( (s), (p), strlen(p) )
192 : #define GETWCHAR(W,L,N,T) ( ((const uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] )
193 : #define GETCHAR(A,N,T) GETWCHAR( (A)->repl, (A)->replen, N, T )
194 :
195 : static const char *VoidString = "";
196 :
197 : static int
198 2892 : cmpspell(const void *s1, const void *s2)
199 : {
200 2892 : return strcmp((*(SPELL *const *) s1)->word, (*(SPELL *const *) s2)->word);
201 : }
202 :
203 : static int
204 2256 : cmpspellaffix(const void *s1, const void *s2)
205 : {
206 4512 : return strcmp((*(SPELL *const *) s1)->p.flag,
207 2256 : (*(SPELL *const *) s2)->p.flag);
208 : }
209 :
210 : static int
211 3924 : cmpcmdflag(const void *f1, const void *f2)
212 : {
213 3924 : CompoundAffixFlag *fv1 = (CompoundAffixFlag *) f1,
214 3924 : *fv2 = (CompoundAffixFlag *) f2;
215 :
216 : Assert(fv1->flagMode == fv2->flagMode);
217 :
218 3924 : if (fv1->flagMode == FM_NUM)
219 : {
220 760 : if (fv1->flag.i == fv2->flag.i)
221 114 : return 0;
222 :
223 646 : return (fv1->flag.i > fv2->flag.i) ? 1 : -1;
224 : }
225 :
226 3164 : return strcmp(fv1->flag.s, fv2->flag.s);
227 : }
228 :
229 : static char *
230 1166 : findchar(char *str, int c)
231 : {
232 8590 : while (*str)
233 : {
234 8462 : if (t_iseq(str, c))
235 1038 : return str;
236 7424 : str += pg_mblen(str);
237 : }
238 :
239 128 : return NULL;
240 : }
241 :
242 : static char *
243 42 : findchar2(char *str, int c1, int c2)
244 : {
245 882 : while (*str)
246 : {
247 882 : if (t_iseq(str, c1) || t_iseq(str, c2))
248 42 : return str;
249 840 : str += pg_mblen(str);
250 : }
251 :
252 0 : return NULL;
253 : }
254 :
255 :
256 : /* backward string compare for suffix tree operations */
257 : static int
258 1154 : strbcmp(const unsigned char *s1, const unsigned char *s2)
259 : {
260 1154 : int l1 = strlen((const char *) s1) - 1,
261 1154 : l2 = strlen((const char *) s2) - 1;
262 :
263 1544 : while (l1 >= 0 && l2 >= 0)
264 : {
265 1208 : if (s1[l1] < s2[l2])
266 262 : return -1;
267 946 : if (s1[l1] > s2[l2])
268 556 : return 1;
269 390 : l1--;
270 390 : l2--;
271 : }
272 336 : if (l1 < l2)
273 90 : return -1;
274 246 : if (l1 > l2)
275 206 : return 1;
276 :
277 40 : return 0;
278 : }
279 :
280 : static int
281 40 : strbncmp(const unsigned char *s1, const unsigned char *s2, size_t count)
282 : {
283 40 : int l1 = strlen((const char *) s1) - 1,
284 40 : l2 = strlen((const char *) s2) - 1,
285 40 : l = count;
286 :
287 60 : while (l1 >= 0 && l2 >= 0 && l > 0)
288 : {
289 40 : if (s1[l1] < s2[l2])
290 20 : return -1;
291 20 : if (s1[l1] > s2[l2])
292 0 : return 1;
293 20 : l1--;
294 20 : l2--;
295 20 : l--;
296 : }
297 20 : if (l == 0)
298 20 : return 0;
299 0 : if (l1 < l2)
300 0 : return -1;
301 0 : if (l1 > l2)
302 0 : return 1;
303 0 : return 0;
304 : }
305 :
306 : /*
307 : * Compares affixes.
308 : * First compares the type of an affix. Prefixes should go before affixes.
309 : * If types are equal then compares replaceable string.
310 : */
311 : static int
312 1952 : cmpaffix(const void *s1, const void *s2)
313 : {
314 1952 : const AFFIX *a1 = (const AFFIX *) s1;
315 1952 : const AFFIX *a2 = (const AFFIX *) s2;
316 :
317 1952 : if (a1->type < a2->type)
318 446 : return -1;
319 1506 : if (a1->type > a2->type)
320 132 : return 1;
321 1374 : if (a1->type == FF_PREFIX)
322 220 : return strcmp(a1->repl, a2->repl);
323 : else
324 1154 : return strbcmp((const unsigned char *) a1->repl,
325 1154 : (const unsigned char *) a2->repl);
326 : }
327 :
328 : /*
329 : * Gets an affix flag from the set of affix flags (sflagset).
330 : *
331 : * Several flags can be stored in a single string. Flags can be represented by:
332 : * - 1 character (FM_CHAR). A character may be Unicode.
333 : * - 2 characters (FM_LONG). A character may be Unicode.
334 : * - numbers from 1 to 65000 (FM_NUM).
335 : *
336 : * Depending on the flagMode an affix string can have the following format:
337 : * - FM_CHAR: ABCD
338 : * Here we have 4 flags: A, B, C and D
339 : * - FM_LONG: ABCDE*
340 : * Here we have 3 flags: AB, CD and E*
341 : * - FM_NUM: 200,205,50
342 : * Here we have 3 flags: 200, 205 and 50
343 : *
344 : * Conf: current dictionary.
345 : * sflagset: the set of affix flags. Returns a reference to the start of a next
346 : * affix flag.
347 : * sflag: returns an affix flag from sflagset.
348 : */
349 : static void
350 6020 : getNextFlagFromString(IspellDict *Conf, const char **sflagset, char *sflag)
351 : {
352 : int32 s;
353 : char *next;
354 6020 : const char *sbuf = *sflagset;
355 : int maxstep;
356 6020 : bool stop = false;
357 6020 : bool met_comma = false;
358 :
359 6020 : maxstep = (Conf->flagMode == FM_LONG) ? 2 : 1;
360 :
361 7886 : while (**sflagset)
362 : {
363 7886 : switch (Conf->flagMode)
364 : {
365 6748 : case FM_LONG:
366 : case FM_CHAR:
367 6748 : COPYCHAR(sflag, *sflagset);
368 6748 : sflag += pg_mblen(*sflagset);
369 :
370 : /* Go to start of the next flag */
371 6748 : *sflagset += pg_mblen(*sflagset);
372 :
373 : /* Check if we get all characters of flag */
374 6748 : maxstep--;
375 6748 : stop = (maxstep == 0);
376 6748 : break;
377 1138 : case FM_NUM:
378 1138 : s = strtol(*sflagset, &next, 10);
379 1138 : if (*sflagset == next || errno == ERANGE)
380 6 : ereport(ERROR,
381 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
382 : errmsg("invalid affix flag \"%s\"", *sflagset)));
383 1132 : if (s < 0 || s > FLAGNUM_MAXSIZE)
384 0 : ereport(ERROR,
385 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
386 : errmsg("affix flag \"%s\" is out of range",
387 : *sflagset)));
388 1132 : sflag += sprintf(sflag, "%0d", s);
389 :
390 : /* Go to start of the next flag */
391 1132 : *sflagset = next;
392 1736 : while (**sflagset)
393 : {
394 1208 : if (isdigit((unsigned char) **sflagset))
395 : {
396 604 : if (!met_comma)
397 0 : ereport(ERROR,
398 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
399 : errmsg("invalid affix flag \"%s\"",
400 : *sflagset)));
401 604 : break;
402 : }
403 604 : else if (t_iseq(*sflagset, ','))
404 : {
405 604 : if (met_comma)
406 0 : ereport(ERROR,
407 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
408 : errmsg("invalid affix flag \"%s\"",
409 : *sflagset)));
410 604 : met_comma = true;
411 : }
412 0 : else if (!isspace((unsigned char) **sflagset))
413 : {
414 0 : ereport(ERROR,
415 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
416 : errmsg("invalid character in affix flag \"%s\"",
417 : *sflagset)));
418 : }
419 :
420 604 : *sflagset += pg_mblen(*sflagset);
421 : }
422 1132 : stop = true;
423 1132 : break;
424 0 : default:
425 0 : elog(ERROR, "unrecognized type of Conf->flagMode: %d",
426 : Conf->flagMode);
427 : }
428 :
429 7880 : if (stop)
430 6014 : break;
431 : }
432 :
433 6014 : if (Conf->flagMode == FM_LONG && maxstep > 0)
434 0 : ereport(ERROR,
435 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
436 : errmsg("invalid affix flag \"%s\" with \"long\" flag value",
437 : sbuf)));
438 :
439 6014 : *sflag = '\0';
440 6014 : }
441 :
442 : /*
443 : * Checks if the affix set Conf->AffixData[affix] contains affixflag.
444 : * Conf->AffixData[affix] does not contain affixflag if this flag is not used
445 : * actually by the .dict file.
446 : *
447 : * Conf: current dictionary.
448 : * affix: index of the Conf->AffixData array.
449 : * affixflag: the affix flag.
450 : *
451 : * Returns true if the string Conf->AffixData[affix] contains affixflag,
452 : * otherwise returns false.
453 : */
454 : static bool
455 2224 : IsAffixFlagInUse(IspellDict *Conf, int affix, const char *affixflag)
456 : {
457 : const char *flagcur;
458 : char flag[BUFSIZ];
459 :
460 2224 : if (*affixflag == 0)
461 636 : return true;
462 :
463 : Assert(affix < Conf->nAffixData);
464 :
465 1588 : flagcur = Conf->AffixData[affix];
466 :
467 4590 : while (*flagcur)
468 : {
469 3500 : getNextFlagFromString(Conf, &flagcur, flag);
470 : /* Compare first affix flag in flagcur with affixflag */
471 3500 : if (strcmp(flag, affixflag) == 0)
472 498 : return true;
473 : }
474 :
475 : /* Could not find affixflag */
476 1090 : return false;
477 : }
478 :
479 : /*
480 : * Adds the new word into the temporary array Spell.
481 : *
482 : * Conf: current dictionary.
483 : * word: new word.
484 : * flag: set of affix flags. Single flag can be get by getNextFlagFromString().
485 : */
486 : static void
487 1166 : NIAddSpell(IspellDict *Conf, const char *word, const char *flag)
488 : {
489 1166 : if (Conf->nspell >= Conf->mspell)
490 : {
491 128 : if (Conf->mspell)
492 : {
493 0 : Conf->mspell *= 2;
494 0 : Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *));
495 : }
496 : else
497 : {
498 128 : Conf->mspell = 1024 * 20;
499 128 : Conf->Spell = (SPELL **) tmpalloc(Conf->mspell * sizeof(SPELL *));
500 : }
501 : }
502 1166 : Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1);
503 1166 : strcpy(Conf->Spell[Conf->nspell]->word, word);
504 2332 : Conf->Spell[Conf->nspell]->p.flag = (*flag != '\0')
505 1166 : ? cpstrdup(Conf, flag) : VoidString;
506 1166 : Conf->nspell++;
507 1166 : }
508 :
509 : /*
510 : * Imports dictionary into the temporary array Spell.
511 : *
512 : * Note caller must already have applied get_tsearch_config_filename.
513 : *
514 : * Conf: current dictionary.
515 : * filename: path to the .dict file.
516 : */
517 : void
518 128 : NIImportDictionary(IspellDict *Conf, const char *filename)
519 : {
520 : tsearch_readline_state trst;
521 : char *line;
522 :
523 128 : if (!tsearch_readline_begin(&trst, filename))
524 0 : ereport(ERROR,
525 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
526 : errmsg("could not open dictionary file \"%s\": %m",
527 : filename)));
528 :
529 1294 : while ((line = tsearch_readline(&trst)) != NULL)
530 : {
531 : char *s,
532 : *pstr;
533 :
534 : /* Set of affix flags */
535 : const char *flag;
536 :
537 : /* Extract flag from the line */
538 1166 : flag = NULL;
539 1166 : if ((s = findchar(line, '/')))
540 : {
541 1038 : *s++ = '\0';
542 1038 : flag = s;
543 4150 : while (*s)
544 : {
545 : /* we allow only single encoded flags for faster works */
546 4150 : if (pg_mblen(s) == 1 && isprint((unsigned char) *s) && !isspace((unsigned char) *s))
547 3112 : s++;
548 : else
549 : {
550 1038 : *s = '\0';
551 1038 : break;
552 : }
553 : }
554 : }
555 : else
556 128 : flag = "";
557 :
558 : /* Remove trailing spaces */
559 1166 : s = line;
560 8462 : while (*s)
561 : {
562 7424 : if (isspace((unsigned char) *s))
563 : {
564 128 : *s = '\0';
565 128 : break;
566 : }
567 7296 : s += pg_mblen(s);
568 : }
569 1166 : pstr = lowerstr_ctx(Conf, line);
570 :
571 1166 : NIAddSpell(Conf, pstr, flag);
572 1166 : pfree(pstr);
573 :
574 1166 : pfree(line);
575 : }
576 128 : tsearch_readline_end(&trst);
577 128 : }
578 :
579 : /*
580 : * Searches a basic form of word in the prefix tree. This word was generated
581 : * using an affix rule. This rule may not be presented in an affix set of
582 : * a basic form of word.
583 : *
584 : * For example, we have the entry in the .dict file:
585 : * meter/GMD
586 : *
587 : * The affix rule with the flag S:
588 : * SFX S y ies [^aeiou]y
589 : * is not presented here.
590 : *
591 : * The affix rule with the flag M:
592 : * SFX M 0 's .
593 : * is presented here.
594 : *
595 : * Conf: current dictionary.
596 : * word: basic form of word.
597 : * affixflag: affix flag, by which a basic form of word was generated.
598 : * flag: compound flag used to compare with StopMiddle->compoundflag.
599 : *
600 : * Returns 1 if the word was found in the prefix tree, else returns 0.
601 : */
602 : static int
603 2994 : FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag)
604 : {
605 2994 : SPNode *node = Conf->Dictionary;
606 : SPNodeData *StopLow,
607 : *StopHigh,
608 : *StopMiddle;
609 2994 : const uint8 *ptr = (const uint8 *) word;
610 :
611 2994 : flag &= FF_COMPOUNDFLAGMASK;
612 :
613 13944 : while (node && *ptr)
614 : {
615 13224 : StopLow = node->data;
616 13224 : StopHigh = node->data + node->length;
617 18918 : while (StopLow < StopHigh)
618 : {
619 17652 : StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
620 17652 : if (StopMiddle->val == *ptr)
621 : {
622 11958 : if (*(ptr + 1) == '\0' && StopMiddle->isword)
623 : {
624 1146 : if (flag == 0)
625 : {
626 : /*
627 : * The word can be formed only with another word. And
628 : * in the flag parameter there is not a sign that we
629 : * search compound words.
630 : */
631 726 : if (StopMiddle->compoundflag & FF_COMPOUNDONLY)
632 0 : return 0;
633 : }
634 420 : else if ((flag & StopMiddle->compoundflag) == 0)
635 0 : return 0;
636 :
637 : /*
638 : * Check if this affix rule is presented in the affix set
639 : * with index StopMiddle->affix.
640 : */
641 1146 : if (IsAffixFlagInUse(Conf, StopMiddle->affix, affixflag))
642 1008 : return 1;
643 : }
644 10950 : node = StopMiddle->node;
645 10950 : ptr++;
646 10950 : break;
647 : }
648 5694 : else if (StopMiddle->val < *ptr)
649 1932 : StopLow = StopMiddle + 1;
650 : else
651 3762 : StopHigh = StopMiddle;
652 : }
653 12216 : if (StopLow >= StopHigh)
654 1266 : break;
655 : }
656 1986 : return 0;
657 : }
658 :
659 : /*
660 : * Adds a new affix rule to the Affix field.
661 : *
662 : * Conf: current dictionary.
663 : * flag: affix flag ('\' in the below example).
664 : * flagflags: set of flags from the flagval field for this affix rule. This set
665 : * is listed after '/' character in the added string (repl).
666 : *
667 : * For example L flag in the hunspell_sample.affix:
668 : * SFX \ 0 Y/L [^Y]
669 : *
670 : * mask: condition for search ('[^Y]' in the above example).
671 : * find: stripping characters from beginning (at prefix) or end (at suffix)
672 : * of the word ('0' in the above example, 0 means that there is not
673 : * stripping character).
674 : * repl: adding string after stripping ('Y' in the above example).
675 : * type: FF_SUFFIX or FF_PREFIX.
676 : */
677 : static void
678 1060 : NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask,
679 : const char *find, const char *repl, int type)
680 : {
681 : AFFIX *Affix;
682 :
683 1060 : if (Conf->naffixes >= Conf->maffixes)
684 : {
685 128 : if (Conf->maffixes)
686 : {
687 0 : Conf->maffixes *= 2;
688 0 : Conf->Affix = (AFFIX *) repalloc(Conf->Affix, Conf->maffixes * sizeof(AFFIX));
689 : }
690 : else
691 : {
692 128 : Conf->maffixes = 16;
693 128 : Conf->Affix = (AFFIX *) palloc(Conf->maffixes * sizeof(AFFIX));
694 : }
695 : }
696 :
697 1060 : Affix = Conf->Affix + Conf->naffixes;
698 :
699 : /* This affix rule can be applied for words with any ending */
700 1060 : if (strcmp(mask, ".") == 0 || *mask == '\0')
701 : {
702 256 : Affix->issimple = 1;
703 256 : Affix->isregis = 0;
704 : }
705 : /* This affix rule will use regis to search word ending */
706 804 : else if (RS_isRegis(mask))
707 : {
708 672 : Affix->issimple = 0;
709 672 : Affix->isregis = 1;
710 672 : RS_compile(&(Affix->reg.regis), (type == FF_SUFFIX),
711 672 : *mask ? mask : VoidString);
712 : }
713 : /* This affix rule will use regex_t to search word ending */
714 : else
715 : {
716 : int masklen;
717 : int wmasklen;
718 : int err;
719 : pg_wchar *wmask;
720 : char *tmask;
721 :
722 132 : Affix->issimple = 0;
723 132 : Affix->isregis = 0;
724 132 : tmask = (char *) tmpalloc(strlen(mask) + 3);
725 132 : if (type == FF_SUFFIX)
726 132 : sprintf(tmask, "%s$", mask);
727 : else
728 0 : sprintf(tmask, "^%s", mask);
729 :
730 132 : masklen = strlen(tmask);
731 132 : wmask = (pg_wchar *) tmpalloc((masklen + 1) * sizeof(pg_wchar));
732 132 : wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen);
733 :
734 : /*
735 : * The regex and all internal state created by pg_regcomp are
736 : * allocated in the dictionary's memory context, and will be freed
737 : * automatically when it is destroyed.
738 : */
739 132 : Affix->reg.pregex = palloc(sizeof(regex_t));
740 132 : err = pg_regcomp(Affix->reg.pregex, wmask, wmasklen,
741 : REG_ADVANCED | REG_NOSUB,
742 : DEFAULT_COLLATION_OID);
743 132 : if (err)
744 : {
745 : char errstr[100];
746 :
747 0 : pg_regerror(err, Affix->reg.pregex, errstr, sizeof(errstr));
748 0 : ereport(ERROR,
749 : (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
750 : errmsg("invalid regular expression: %s", errstr)));
751 : }
752 : }
753 :
754 1060 : Affix->flagflags = flagflags;
755 1060 : if ((Affix->flagflags & FF_COMPOUNDONLY) || (Affix->flagflags & FF_COMPOUNDPERMITFLAG))
756 : {
757 192 : if ((Affix->flagflags & FF_COMPOUNDFLAG) == 0)
758 192 : Affix->flagflags |= FF_COMPOUNDFLAG;
759 : }
760 1060 : Affix->flag = cpstrdup(Conf, flag);
761 1060 : Affix->type = type;
762 :
763 1060 : Affix->find = (find && *find) ? cpstrdup(Conf, find) : VoidString;
764 1060 : if ((Affix->replen = strlen(repl)) > 0)
765 1026 : Affix->repl = cpstrdup(Conf, repl);
766 : else
767 34 : Affix->repl = VoidString;
768 1060 : Conf->naffixes++;
769 1060 : }
770 :
771 : /* Parsing states for parse_affentry() and friends */
772 : #define PAE_WAIT_MASK 0
773 : #define PAE_INMASK 1
774 : #define PAE_WAIT_FIND 2
775 : #define PAE_INFIND 3
776 : #define PAE_WAIT_REPL 4
777 : #define PAE_INREPL 5
778 : #define PAE_WAIT_TYPE 6
779 : #define PAE_WAIT_FLAG 7
780 :
781 : /*
782 : * Parse next space-separated field of an .affix file line.
783 : *
784 : * *str is the input pointer (will be advanced past field)
785 : * next is where to copy the field value to, with null termination
786 : *
787 : * The buffer at "next" must be of size BUFSIZ; we truncate the input to fit.
788 : *
789 : * Returns true if we found a field, false if not.
790 : */
791 : static bool
792 9910 : get_nextfield(char **str, char *next)
793 : {
794 9910 : int state = PAE_WAIT_MASK;
795 9910 : int avail = BUFSIZ;
796 :
797 42384 : while (**str)
798 : {
799 41220 : if (state == PAE_WAIT_MASK)
800 : {
801 18280 : if (t_iseq(*str, '#'))
802 352 : return false;
803 17928 : else if (!isspace((unsigned char) **str))
804 : {
805 8394 : int clen = pg_mblen(*str);
806 :
807 8394 : if (clen < avail)
808 : {
809 8394 : COPYCHAR(next, *str);
810 8394 : next += clen;
811 8394 : avail -= clen;
812 : }
813 8394 : state = PAE_INMASK;
814 : }
815 : }
816 : else /* state == PAE_INMASK */
817 : {
818 22940 : if (isspace((unsigned char) **str))
819 : {
820 8394 : *next = '\0';
821 8394 : return true;
822 : }
823 : else
824 : {
825 14546 : int clen = pg_mblen(*str);
826 :
827 14546 : if (clen < avail)
828 : {
829 14546 : COPYCHAR(next, *str);
830 14546 : next += clen;
831 14546 : avail -= clen;
832 : }
833 : }
834 : }
835 32474 : *str += pg_mblen(*str);
836 : }
837 :
838 1164 : *next = '\0';
839 :
840 1164 : return (state == PAE_INMASK); /* OK if we got a nonempty field */
841 : }
842 :
843 : /*
844 : * Parses entry of an .affix file of MySpell or Hunspell format.
845 : *
846 : * An .affix file entry has the following format:
847 : * - header
848 : * <type> <flag> <cross_flag> <flag_count>
849 : * - fields after header:
850 : * <type> <flag> <find> <replace> <mask>
851 : *
852 : * str is the input line
853 : * field values are returned to type etc, which must be buffers of size BUFSIZ.
854 : *
855 : * Returns number of fields found; any omitted fields are set to empty strings.
856 : */
857 : static int
858 2282 : parse_ooaffentry(char *str, char *type, char *flag, char *find,
859 : char *repl, char *mask)
860 : {
861 2282 : int state = PAE_WAIT_TYPE;
862 2282 : int fields_read = 0;
863 2282 : bool valid = false;
864 :
865 2282 : *type = *flag = *find = *repl = *mask = '\0';
866 :
867 9910 : while (*str)
868 : {
869 9910 : switch (state)
870 : {
871 2282 : case PAE_WAIT_TYPE:
872 2282 : valid = get_nextfield(&str, type);
873 2282 : state = PAE_WAIT_FLAG;
874 2282 : break;
875 2282 : case PAE_WAIT_FLAG:
876 2282 : valid = get_nextfield(&str, flag);
877 2282 : state = PAE_WAIT_FIND;
878 2282 : break;
879 2282 : case PAE_WAIT_FIND:
880 2282 : valid = get_nextfield(&str, find);
881 2282 : state = PAE_WAIT_REPL;
882 2282 : break;
883 1532 : case PAE_WAIT_REPL:
884 1532 : valid = get_nextfield(&str, repl);
885 1532 : state = PAE_WAIT_MASK;
886 1532 : break;
887 1532 : case PAE_WAIT_MASK:
888 1532 : valid = get_nextfield(&str, mask);
889 1532 : state = -1; /* force loop exit */
890 1532 : break;
891 0 : default:
892 0 : elog(ERROR, "unrecognized state in parse_ooaffentry: %d",
893 : state);
894 : break;
895 : }
896 9910 : if (valid)
897 8394 : fields_read++;
898 : else
899 1516 : break; /* early EOL */
900 8394 : if (state < 0)
901 766 : break; /* got all fields */
902 : }
903 :
904 2282 : return fields_read;
905 : }
906 :
907 : /*
908 : * Parses entry of an .affix file of Ispell format
909 : *
910 : * An .affix file entry has the following format:
911 : * <mask> > [-<find>,]<replace>
912 : */
913 : static bool
914 294 : parse_affentry(char *str, char *mask, char *find, char *repl)
915 : {
916 294 : int state = PAE_WAIT_MASK;
917 294 : char *pmask = mask,
918 294 : *pfind = find,
919 294 : *prepl = repl;
920 :
921 294 : *mask = *find = *repl = '\0';
922 :
923 7728 : while (*str)
924 : {
925 7728 : if (state == PAE_WAIT_MASK)
926 : {
927 714 : if (t_iseq(str, '#'))
928 0 : return false;
929 714 : else if (!isspace((unsigned char) *str))
930 : {
931 294 : COPYCHAR(pmask, str);
932 294 : pmask += pg_mblen(str);
933 294 : state = PAE_INMASK;
934 : }
935 : }
936 7014 : else if (state == PAE_INMASK)
937 : {
938 2856 : if (t_iseq(str, '>'))
939 : {
940 294 : *pmask = '\0';
941 294 : state = PAE_WAIT_FIND;
942 : }
943 2562 : else if (!isspace((unsigned char) *str))
944 : {
945 1008 : COPYCHAR(pmask, str);
946 1008 : pmask += pg_mblen(str);
947 : }
948 : }
949 4158 : else if (state == PAE_WAIT_FIND)
950 : {
951 1176 : if (t_iseq(str, '-'))
952 : {
953 42 : state = PAE_INFIND;
954 : }
955 1134 : else if (t_isalpha(str) || t_iseq(str, '\'') /* english 's */ )
956 : {
957 252 : COPYCHAR(prepl, str);
958 252 : prepl += pg_mblen(str);
959 252 : state = PAE_INREPL;
960 : }
961 882 : else if (!isspace((unsigned char) *str))
962 0 : ereport(ERROR,
963 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
964 : errmsg("syntax error")));
965 : }
966 2982 : else if (state == PAE_INFIND)
967 : {
968 84 : if (t_iseq(str, ','))
969 : {
970 42 : *pfind = '\0';
971 42 : state = PAE_WAIT_REPL;
972 : }
973 42 : else if (t_isalpha(str))
974 : {
975 42 : COPYCHAR(pfind, str);
976 42 : pfind += pg_mblen(str);
977 : }
978 0 : else if (!isspace((unsigned char) *str))
979 0 : ereport(ERROR,
980 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
981 : errmsg("syntax error")));
982 : }
983 2898 : else if (state == PAE_WAIT_REPL)
984 : {
985 42 : if (t_iseq(str, '-'))
986 : {
987 0 : break; /* void repl */
988 : }
989 42 : else if (t_isalpha(str))
990 : {
991 42 : COPYCHAR(prepl, str);
992 42 : prepl += pg_mblen(str);
993 42 : state = PAE_INREPL;
994 : }
995 0 : else if (!isspace((unsigned char) *str))
996 0 : ereport(ERROR,
997 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
998 : errmsg("syntax error")));
999 : }
1000 2856 : else if (state == PAE_INREPL)
1001 : {
1002 2856 : if (t_iseq(str, '#'))
1003 : {
1004 294 : *prepl = '\0';
1005 294 : break;
1006 : }
1007 2562 : else if (t_isalpha(str))
1008 : {
1009 378 : COPYCHAR(prepl, str);
1010 378 : prepl += pg_mblen(str);
1011 : }
1012 2184 : else if (!isspace((unsigned char) *str))
1013 0 : ereport(ERROR,
1014 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1015 : errmsg("syntax error")));
1016 : }
1017 : else
1018 0 : elog(ERROR, "unrecognized state in parse_affentry: %d", state);
1019 :
1020 7434 : str += pg_mblen(str);
1021 : }
1022 :
1023 294 : *pmask = *pfind = *prepl = '\0';
1024 :
1025 294 : return (*mask && (*find || *repl));
1026 : }
1027 :
1028 : /*
1029 : * Sets a Hunspell options depending on flag type.
1030 : */
1031 : static void
1032 2856 : setCompoundAffixFlagValue(IspellDict *Conf, CompoundAffixFlag *entry,
1033 : char *s, uint32 val)
1034 : {
1035 2856 : if (Conf->flagMode == FM_NUM)
1036 : {
1037 : char *next;
1038 : int i;
1039 :
1040 618 : i = strtol(s, &next, 10);
1041 618 : if (s == next || errno == ERANGE)
1042 0 : ereport(ERROR,
1043 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1044 : errmsg("invalid affix flag \"%s\"", s)));
1045 618 : if (i < 0 || i > FLAGNUM_MAXSIZE)
1046 0 : ereport(ERROR,
1047 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1048 : errmsg("affix flag \"%s\" is out of range", s)));
1049 :
1050 618 : entry->flag.i = i;
1051 : }
1052 : else
1053 2238 : entry->flag.s = cpstrdup(Conf, s);
1054 :
1055 2856 : entry->flagMode = Conf->flagMode;
1056 2856 : entry->value = val;
1057 2856 : }
1058 :
1059 : /*
1060 : * Sets up a correspondence for the affix parameter with the affix flag.
1061 : *
1062 : * Conf: current dictionary.
1063 : * s: affix flag in string.
1064 : * val: affix parameter.
1065 : */
1066 : static void
1067 342 : addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val)
1068 : {
1069 : CompoundAffixFlag *newValue;
1070 : char sbuf[BUFSIZ];
1071 : char *sflag;
1072 : int clen;
1073 :
1074 642 : while (*s && isspace((unsigned char) *s))
1075 300 : s += pg_mblen(s);
1076 :
1077 342 : if (!*s)
1078 0 : ereport(ERROR,
1079 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1080 : errmsg("syntax error")));
1081 :
1082 : /* Get flag without \n */
1083 342 : sflag = sbuf;
1084 1012 : while (*s && !isspace((unsigned char) *s) && *s != '\n')
1085 : {
1086 670 : clen = pg_mblen(s);
1087 670 : COPYCHAR(sflag, s);
1088 670 : sflag += clen;
1089 670 : s += clen;
1090 : }
1091 342 : *sflag = '\0';
1092 :
1093 : /* Resize array or allocate memory for array CompoundAffixFlag */
1094 342 : if (Conf->nCompoundAffixFlag >= Conf->mCompoundAffixFlag)
1095 : {
1096 128 : if (Conf->mCompoundAffixFlag)
1097 : {
1098 0 : Conf->mCompoundAffixFlag *= 2;
1099 0 : Conf->CompoundAffixFlags = (CompoundAffixFlag *)
1100 0 : repalloc(Conf->CompoundAffixFlags,
1101 0 : Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag));
1102 : }
1103 : else
1104 : {
1105 128 : Conf->mCompoundAffixFlag = 10;
1106 128 : Conf->CompoundAffixFlags = (CompoundAffixFlag *)
1107 128 : tmpalloc(Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag));
1108 : }
1109 : }
1110 :
1111 342 : newValue = Conf->CompoundAffixFlags + Conf->nCompoundAffixFlag;
1112 :
1113 342 : setCompoundAffixFlagValue(Conf, newValue, sbuf, val);
1114 :
1115 342 : Conf->usecompound = true;
1116 342 : Conf->nCompoundAffixFlag++;
1117 342 : }
1118 :
1119 : /*
1120 : * Returns a set of affix parameters which correspondence to the set of affix
1121 : * flags s.
1122 : */
1123 : static int
1124 1236 : getCompoundAffixFlagValue(IspellDict *Conf, const char *s)
1125 : {
1126 1236 : uint32 flag = 0;
1127 : CompoundAffixFlag *found,
1128 : key;
1129 : char sflag[BUFSIZ];
1130 : const char *flagcur;
1131 :
1132 1236 : if (Conf->nCompoundAffixFlag == 0)
1133 0 : return 0;
1134 :
1135 1236 : flagcur = s;
1136 3750 : while (*flagcur)
1137 : {
1138 2520 : getNextFlagFromString(Conf, &flagcur, sflag);
1139 2514 : setCompoundAffixFlagValue(Conf, &key, sflag, 0);
1140 :
1141 : found = (CompoundAffixFlag *)
1142 2514 : bsearch(&key, Conf->CompoundAffixFlags,
1143 2514 : Conf->nCompoundAffixFlag, sizeof(CompoundAffixFlag),
1144 : cmpcmdflag);
1145 2514 : if (found != NULL)
1146 574 : flag |= found->value;
1147 : }
1148 :
1149 1230 : return flag;
1150 : }
1151 :
1152 : /*
1153 : * Returns a flag set using the s parameter.
1154 : *
1155 : * If Conf->useFlagAliases is true then the s parameter is index of the
1156 : * Conf->AffixData array and function returns its entry.
1157 : * Else function returns the s parameter.
1158 : */
1159 : static const char *
1160 150 : getAffixFlagSet(IspellDict *Conf, char *s)
1161 : {
1162 150 : if (Conf->useFlagAliases && *s != '\0')
1163 : {
1164 : int curaffix;
1165 : char *end;
1166 :
1167 96 : curaffix = strtol(s, &end, 10);
1168 96 : if (s == end || errno == ERANGE)
1169 0 : ereport(ERROR,
1170 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1171 : errmsg("invalid affix alias \"%s\"", s)));
1172 :
1173 96 : if (curaffix > 0 && curaffix < Conf->nAffixData)
1174 :
1175 : /*
1176 : * Do not subtract 1 from curaffix because empty string was added
1177 : * in NIImportOOAffixes
1178 : */
1179 96 : return Conf->AffixData[curaffix];
1180 0 : else if (curaffix > Conf->nAffixData)
1181 0 : ereport(ERROR,
1182 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1183 : errmsg("invalid affix alias \"%s\"", s)));
1184 0 : return VoidString;
1185 : }
1186 : else
1187 54 : return s;
1188 : }
1189 :
1190 : /*
1191 : * Import an affix file that follows MySpell or Hunspell format.
1192 : *
1193 : * Conf: current dictionary.
1194 : * filename: path to the .affix file.
1195 : */
1196 : static void
1197 86 : NIImportOOAffixes(IspellDict *Conf, const char *filename)
1198 : {
1199 : char type[BUFSIZ],
1200 86 : *ptype = NULL;
1201 : char sflag[BUFSIZ];
1202 : char mask[BUFSIZ],
1203 : *pmask;
1204 : char find[BUFSIZ],
1205 : *pfind;
1206 : char repl[BUFSIZ],
1207 : *prepl;
1208 86 : bool isSuffix = false;
1209 86 : int naffix = 0,
1210 86 : curaffix = 0;
1211 86 : int sflaglen = 0;
1212 86 : char flagflags = 0;
1213 : tsearch_readline_state trst;
1214 : char *recoded;
1215 :
1216 : /* read file to find any flag */
1217 86 : Conf->usecompound = false;
1218 86 : Conf->useFlagAliases = false;
1219 86 : Conf->flagMode = FM_CHAR;
1220 :
1221 86 : if (!tsearch_readline_begin(&trst, filename))
1222 0 : ereport(ERROR,
1223 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1224 : errmsg("could not open affix file \"%s\": %m",
1225 : filename)));
1226 :
1227 3364 : while ((recoded = tsearch_readline(&trst)) != NULL)
1228 : {
1229 3278 : if (*recoded == '\0' || isspace((unsigned char) *recoded) || t_iseq(recoded, '#'))
1230 : {
1231 996 : pfree(recoded);
1232 996 : continue;
1233 : }
1234 :
1235 2282 : if (STRNCMP(recoded, "COMPOUNDFLAG") == 0)
1236 86 : addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDFLAG"),
1237 : FF_COMPOUNDFLAG);
1238 2196 : else if (STRNCMP(recoded, "COMPOUNDBEGIN") == 0)
1239 32 : addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDBEGIN"),
1240 : FF_COMPOUNDBEGIN);
1241 2164 : else if (STRNCMP(recoded, "COMPOUNDLAST") == 0)
1242 0 : addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDLAST"),
1243 : FF_COMPOUNDLAST);
1244 : /* COMPOUNDLAST and COMPOUNDEND are synonyms */
1245 2164 : else if (STRNCMP(recoded, "COMPOUNDEND") == 0)
1246 32 : addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDEND"),
1247 : FF_COMPOUNDLAST);
1248 2132 : else if (STRNCMP(recoded, "COMPOUNDMIDDLE") == 0)
1249 32 : addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDMIDDLE"),
1250 : FF_COMPOUNDMIDDLE);
1251 2100 : else if (STRNCMP(recoded, "ONLYINCOMPOUND") == 0)
1252 86 : addCompoundAffixFlagValue(Conf, recoded + strlen("ONLYINCOMPOUND"),
1253 : FF_COMPOUNDONLY);
1254 2014 : else if (STRNCMP(recoded, "COMPOUNDPERMITFLAG") == 0)
1255 32 : addCompoundAffixFlagValue(Conf,
1256 : recoded + strlen("COMPOUNDPERMITFLAG"),
1257 : FF_COMPOUNDPERMITFLAG);
1258 1982 : else if (STRNCMP(recoded, "COMPOUNDFORBIDFLAG") == 0)
1259 0 : addCompoundAffixFlagValue(Conf,
1260 : recoded + strlen("COMPOUNDFORBIDFLAG"),
1261 : FF_COMPOUNDFORBIDFLAG);
1262 1982 : else if (STRNCMP(recoded, "FLAG") == 0)
1263 : {
1264 66 : char *s = recoded + strlen("FLAG");
1265 :
1266 132 : while (*s && isspace((unsigned char) *s))
1267 66 : s += pg_mblen(s);
1268 :
1269 66 : if (*s)
1270 : {
1271 66 : if (STRNCMP(s, "long") == 0)
1272 32 : Conf->flagMode = FM_LONG;
1273 34 : else if (STRNCMP(s, "num") == 0)
1274 34 : Conf->flagMode = FM_NUM;
1275 0 : else if (STRNCMP(s, "default") != 0)
1276 0 : ereport(ERROR,
1277 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1278 : errmsg("Ispell dictionary supports only "
1279 : "\"default\", \"long\", "
1280 : "and \"num\" flag values")));
1281 : }
1282 : }
1283 :
1284 2282 : pfree(recoded);
1285 : }
1286 86 : tsearch_readline_end(&trst);
1287 :
1288 86 : if (Conf->nCompoundAffixFlag > 1)
1289 86 : qsort(Conf->CompoundAffixFlags, Conf->nCompoundAffixFlag,
1290 : sizeof(CompoundAffixFlag), cmpcmdflag);
1291 :
1292 86 : if (!tsearch_readline_begin(&trst, filename))
1293 0 : ereport(ERROR,
1294 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1295 : errmsg("could not open affix file \"%s\": %m",
1296 : filename)));
1297 :
1298 3364 : while ((recoded = tsearch_readline(&trst)) != NULL)
1299 : {
1300 : int fields_read;
1301 :
1302 3278 : if (*recoded == '\0' || isspace((unsigned char) *recoded) || t_iseq(recoded, '#'))
1303 996 : goto nextline;
1304 :
1305 2282 : fields_read = parse_ooaffentry(recoded, type, sflag, find, repl, mask);
1306 :
1307 2282 : if (ptype)
1308 2196 : pfree(ptype);
1309 2282 : ptype = lowerstr_ctx(Conf, type);
1310 :
1311 : /* First try to parse AF parameter (alias compression) */
1312 2282 : if (STRNCMP(ptype, "af") == 0)
1313 : {
1314 : /* First line is the number of aliases */
1315 384 : if (!Conf->useFlagAliases)
1316 : {
1317 32 : Conf->useFlagAliases = true;
1318 32 : naffix = atoi(sflag);
1319 32 : if (naffix <= 0)
1320 0 : ereport(ERROR,
1321 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1322 : errmsg("invalid number of flag vector aliases")));
1323 :
1324 : /* Also reserve place for empty flag set */
1325 32 : naffix++;
1326 :
1327 32 : Conf->AffixData = (const char **) palloc0(naffix * sizeof(char *));
1328 32 : Conf->lenAffixData = Conf->nAffixData = naffix;
1329 :
1330 : /* Add empty flag set into AffixData */
1331 32 : Conf->AffixData[curaffix] = VoidString;
1332 32 : curaffix++;
1333 : }
1334 : /* Other lines are aliases */
1335 : else
1336 : {
1337 352 : if (curaffix < naffix)
1338 : {
1339 352 : Conf->AffixData[curaffix] = cpstrdup(Conf, sflag);
1340 352 : curaffix++;
1341 : }
1342 : else
1343 0 : ereport(ERROR,
1344 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1345 : errmsg("number of aliases exceeds specified number %d",
1346 : naffix - 1)));
1347 : }
1348 384 : goto nextline;
1349 : }
1350 : /* Else try to parse prefixes and suffixes */
1351 1898 : if (fields_read < 4 ||
1352 1532 : (STRNCMP(ptype, "sfx") != 0 && STRNCMP(ptype, "pfx") != 0))
1353 366 : goto nextline;
1354 :
1355 1532 : sflaglen = strlen(sflag);
1356 1532 : if (sflaglen == 0
1357 1532 : || (sflaglen > 1 && Conf->flagMode == FM_CHAR)
1358 1532 : || (sflaglen > 2 && Conf->flagMode == FM_LONG))
1359 0 : goto nextline;
1360 :
1361 : /*--------
1362 : * Affix header. For example:
1363 : * SFX \ N 1
1364 : *--------
1365 : */
1366 1532 : if (fields_read == 4)
1367 : {
1368 766 : isSuffix = (STRNCMP(ptype, "sfx") == 0);
1369 766 : if (t_iseq(find, 'y') || t_iseq(find, 'Y'))
1370 530 : flagflags = FF_CROSSPRODUCT;
1371 : else
1372 236 : flagflags = 0;
1373 : }
1374 : /*--------
1375 : * Affix fields. For example:
1376 : * SFX \ 0 Y/L [^Y]
1377 : *--------
1378 : */
1379 : else
1380 : {
1381 : char *ptr;
1382 766 : int aflg = 0;
1383 :
1384 : /* Get flags after '/' (flags are case sensitive) */
1385 766 : if ((ptr = strchr(repl, '/')) != NULL)
1386 150 : aflg |= getCompoundAffixFlagValue(Conf,
1387 : getAffixFlagSet(Conf,
1388 : ptr + 1));
1389 : /* Get lowercased version of string before '/' */
1390 766 : prepl = lowerstr_ctx(Conf, repl);
1391 766 : if ((ptr = strchr(prepl, '/')) != NULL)
1392 150 : *ptr = '\0';
1393 766 : pfind = lowerstr_ctx(Conf, find);
1394 766 : pmask = lowerstr_ctx(Conf, mask);
1395 766 : if (t_iseq(find, '0'))
1396 646 : *pfind = '\0';
1397 766 : if (t_iseq(repl, '0'))
1398 34 : *prepl = '\0';
1399 :
1400 766 : NIAddAffix(Conf, sflag, flagflags | aflg, pmask, pfind, prepl,
1401 : isSuffix ? FF_SUFFIX : FF_PREFIX);
1402 766 : pfree(prepl);
1403 766 : pfree(pfind);
1404 766 : pfree(pmask);
1405 : }
1406 :
1407 3278 : nextline:
1408 3278 : pfree(recoded);
1409 : }
1410 :
1411 86 : tsearch_readline_end(&trst);
1412 86 : if (ptype)
1413 86 : pfree(ptype);
1414 86 : }
1415 :
1416 : /*
1417 : * import affixes
1418 : *
1419 : * Note caller must already have applied get_tsearch_config_filename
1420 : *
1421 : * This function is responsible for parsing ispell ("old format") affix files.
1422 : * If we realize that the file contains new-format commands, we pass off the
1423 : * work to NIImportOOAffixes(), which will re-read the whole file.
1424 : */
1425 : void
1426 128 : NIImportAffixes(IspellDict *Conf, const char *filename)
1427 : {
1428 128 : char *pstr = NULL;
1429 : char flag[BUFSIZ];
1430 : char mask[BUFSIZ];
1431 : char find[BUFSIZ];
1432 : char repl[BUFSIZ];
1433 : char *s;
1434 128 : bool suffixes = false;
1435 128 : bool prefixes = false;
1436 128 : char flagflags = 0;
1437 : tsearch_readline_state trst;
1438 128 : bool oldformat = false;
1439 128 : char *recoded = NULL;
1440 :
1441 128 : if (!tsearch_readline_begin(&trst, filename))
1442 0 : ereport(ERROR,
1443 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1444 : errmsg("could not open affix file \"%s\": %m",
1445 : filename)));
1446 :
1447 128 : Conf->usecompound = false;
1448 128 : Conf->useFlagAliases = false;
1449 128 : Conf->flagMode = FM_CHAR;
1450 :
1451 1220 : while ((recoded = tsearch_readline(&trst)) != NULL)
1452 : {
1453 1178 : pstr = str_tolower(recoded, strlen(recoded), DEFAULT_COLLATION_OID);
1454 :
1455 : /* Skip comments and empty lines */
1456 1178 : if (*pstr == '#' || *pstr == '\n')
1457 378 : goto nextline;
1458 :
1459 800 : if (STRNCMP(pstr, "compoundwords") == 0)
1460 : {
1461 : /* Find case-insensitive L flag in non-lowercased string */
1462 42 : s = findchar2(recoded, 'l', 'L');
1463 42 : if (s)
1464 : {
1465 210 : while (*s && !isspace((unsigned char) *s))
1466 168 : s += pg_mblen(s);
1467 84 : while (*s && isspace((unsigned char) *s))
1468 42 : s += pg_mblen(s);
1469 :
1470 42 : if (*s && pg_mblen(s) == 1)
1471 : {
1472 42 : addCompoundAffixFlagValue(Conf, s, FF_COMPOUNDFLAG);
1473 42 : Conf->usecompound = true;
1474 : }
1475 42 : oldformat = true;
1476 42 : goto nextline;
1477 : }
1478 : }
1479 758 : if (STRNCMP(pstr, "suffixes") == 0)
1480 : {
1481 42 : suffixes = true;
1482 42 : prefixes = false;
1483 42 : oldformat = true;
1484 42 : goto nextline;
1485 : }
1486 716 : if (STRNCMP(pstr, "prefixes") == 0)
1487 : {
1488 42 : suffixes = false;
1489 42 : prefixes = true;
1490 42 : oldformat = true;
1491 42 : goto nextline;
1492 : }
1493 674 : if (STRNCMP(pstr, "flag") == 0)
1494 : {
1495 360 : s = recoded + 4; /* we need non-lowercased string */
1496 360 : flagflags = 0;
1497 :
1498 720 : while (*s && isspace((unsigned char) *s))
1499 360 : s += pg_mblen(s);
1500 :
1501 360 : if (*s == '*')
1502 : {
1503 210 : flagflags |= FF_CROSSPRODUCT;
1504 210 : s++;
1505 : }
1506 150 : else if (*s == '~')
1507 : {
1508 42 : flagflags |= FF_COMPOUNDONLY;
1509 42 : s++;
1510 : }
1511 :
1512 360 : if (*s == '\\')
1513 42 : s++;
1514 :
1515 : /*
1516 : * An old-format flag is a single ASCII character; we expect it to
1517 : * be followed by EOL, whitespace, or ':'. Otherwise this is a
1518 : * new-format flag command.
1519 : */
1520 360 : if (*s && pg_mblen(s) == 1)
1521 : {
1522 360 : COPYCHAR(flag, s);
1523 360 : flag[1] = '\0';
1524 :
1525 360 : s++;
1526 360 : if (*s == '\0' || *s == '#' || *s == '\n' || *s == ':' ||
1527 66 : isspace((unsigned char) *s))
1528 : {
1529 294 : oldformat = true;
1530 294 : goto nextline;
1531 : }
1532 : }
1533 66 : goto isnewformat;
1534 : }
1535 314 : if (STRNCMP(recoded, "COMPOUNDFLAG") == 0 ||
1536 294 : STRNCMP(recoded, "COMPOUNDMIN") == 0 ||
1537 294 : STRNCMP(recoded, "PFX") == 0 ||
1538 294 : STRNCMP(recoded, "SFX") == 0)
1539 20 : goto isnewformat;
1540 :
1541 294 : if ((!suffixes) && (!prefixes))
1542 0 : goto nextline;
1543 :
1544 294 : if (!parse_affentry(pstr, mask, find, repl))
1545 0 : goto nextline;
1546 :
1547 294 : NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX);
1548 :
1549 1092 : nextline:
1550 1092 : pfree(recoded);
1551 1092 : pfree(pstr);
1552 : }
1553 42 : tsearch_readline_end(&trst);
1554 42 : return;
1555 :
1556 86 : isnewformat:
1557 86 : if (oldformat)
1558 0 : ereport(ERROR,
1559 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1560 : errmsg("affix file contains both old-style and new-style commands")));
1561 86 : tsearch_readline_end(&trst);
1562 :
1563 86 : NIImportOOAffixes(Conf, filename);
1564 : }
1565 :
1566 : /*
1567 : * Merges two affix flag sets and stores a new affix flag set into
1568 : * Conf->AffixData.
1569 : *
1570 : * Returns index of a new affix flag set.
1571 : */
1572 : static int
1573 64 : MergeAffix(IspellDict *Conf, int a1, int a2)
1574 : {
1575 : const char **ptr;
1576 :
1577 : Assert(a1 < Conf->nAffixData && a2 < Conf->nAffixData);
1578 :
1579 : /* Do not merge affix flags if one of affix flags is empty */
1580 64 : if (*Conf->AffixData[a1] == '\0')
1581 0 : return a2;
1582 64 : else if (*Conf->AffixData[a2] == '\0')
1583 0 : return a1;
1584 :
1585 : /* Double the size of AffixData if there's not enough space */
1586 64 : if (Conf->nAffixData + 1 >= Conf->lenAffixData)
1587 : {
1588 64 : Conf->lenAffixData *= 2;
1589 64 : Conf->AffixData = (const char **) repalloc(Conf->AffixData,
1590 64 : sizeof(char *) * Conf->lenAffixData);
1591 : }
1592 :
1593 64 : ptr = Conf->AffixData + Conf->nAffixData;
1594 64 : if (Conf->flagMode == FM_NUM)
1595 : {
1596 28 : char *p = cpalloc(strlen(Conf->AffixData[a1]) +
1597 : strlen(Conf->AffixData[a2]) +
1598 : 1 /* comma */ + 1 /* \0 */ );
1599 :
1600 28 : sprintf(p, "%s,%s", Conf->AffixData[a1], Conf->AffixData[a2]);
1601 28 : *ptr = p;
1602 : }
1603 : else
1604 : {
1605 36 : char *p = cpalloc(strlen(Conf->AffixData[a1]) +
1606 : strlen(Conf->AffixData[a2]) +
1607 : 1 /* \0 */ );
1608 :
1609 36 : sprintf(p, "%s%s", Conf->AffixData[a1], Conf->AffixData[a2]);
1610 36 : *ptr = p;
1611 : }
1612 64 : ptr++;
1613 64 : *ptr = NULL;
1614 64 : Conf->nAffixData++;
1615 :
1616 64 : return Conf->nAffixData - 1;
1617 : }
1618 :
1619 : /*
1620 : * Returns a set of affix parameters which correspondence to the set of affix
1621 : * flags with the given index.
1622 : */
1623 : static uint32
1624 1086 : makeCompoundFlags(IspellDict *Conf, int affix)
1625 : {
1626 : Assert(affix < Conf->nAffixData);
1627 :
1628 1086 : return (getCompoundAffixFlagValue(Conf, Conf->AffixData[affix]) &
1629 : FF_COMPOUNDFLAGMASK);
1630 : }
1631 :
1632 : /*
1633 : * Makes a prefix tree for the given level.
1634 : *
1635 : * Conf: current dictionary.
1636 : * low: lower index of the Conf->Spell array.
1637 : * high: upper index of the Conf->Spell array.
1638 : * level: current prefix tree level.
1639 : */
1640 : static SPNode *
1641 4344 : mkSPNode(IspellDict *Conf, int low, int high, int level)
1642 : {
1643 : int i;
1644 4344 : int nchar = 0;
1645 4344 : char lastchar = '\0';
1646 : SPNode *rs;
1647 : SPNodeData *data;
1648 4344 : int lownew = low;
1649 :
1650 14276 : for (i = low; i < high; i++)
1651 9932 : if (Conf->Spell[i]->p.d.len > level && lastchar != Conf->Spell[i]->word[level])
1652 : {
1653 4258 : nchar++;
1654 4258 : lastchar = Conf->Spell[i]->word[level];
1655 : }
1656 :
1657 4344 : if (!nchar)
1658 622 : return NULL;
1659 :
1660 3722 : rs = (SPNode *) cpalloc0(SPNHDRSZ + nchar * sizeof(SPNodeData));
1661 3722 : rs->length = nchar;
1662 3722 : data = rs->data;
1663 :
1664 3722 : lastchar = '\0';
1665 12590 : for (i = low; i < high; i++)
1666 8886 : if (Conf->Spell[i]->p.d.len > level)
1667 : {
1668 6384 : if (lastchar != Conf->Spell[i]->word[level])
1669 : {
1670 4246 : if (lastchar)
1671 : {
1672 : /* Next level of the prefix tree */
1673 524 : data->node = mkSPNode(Conf, lownew, i, level + 1);
1674 512 : lownew = i;
1675 512 : data++;
1676 : }
1677 4234 : lastchar = Conf->Spell[i]->word[level];
1678 : }
1679 6372 : data->val = ((uint8 *) (Conf->Spell[i]->word))[level];
1680 6372 : if (Conf->Spell[i]->p.d.len == level + 1)
1681 : {
1682 1022 : bool clearCompoundOnly = false;
1683 :
1684 1022 : if (data->isword && data->affix != Conf->Spell[i]->p.d.affix)
1685 : {
1686 : /*
1687 : * MergeAffix called a few times. If one of word is
1688 : * allowed to be in compound word and another isn't, then
1689 : * clear FF_COMPOUNDONLY flag.
1690 : */
1691 :
1692 128 : clearCompoundOnly = (FF_COMPOUNDONLY & data->compoundflag
1693 64 : & makeCompoundFlags(Conf, Conf->Spell[i]->p.d.affix))
1694 : ? false : true;
1695 64 : data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i]->p.d.affix);
1696 : }
1697 : else
1698 958 : data->affix = Conf->Spell[i]->p.d.affix;
1699 1022 : data->isword = 1;
1700 :
1701 1022 : data->compoundflag = makeCompoundFlags(Conf, data->affix);
1702 :
1703 1016 : if ((data->compoundflag & FF_COMPOUNDONLY) &&
1704 0 : (data->compoundflag & FF_COMPOUNDFLAG) == 0)
1705 0 : data->compoundflag |= FF_COMPOUNDFLAG;
1706 :
1707 1016 : if (clearCompoundOnly)
1708 64 : data->compoundflag &= ~FF_COMPOUNDONLY;
1709 : }
1710 : }
1711 :
1712 : /* Next level of the prefix tree */
1713 3704 : data->node = mkSPNode(Conf, lownew, high, level + 1);
1714 :
1715 3698 : return rs;
1716 : }
1717 :
1718 : /*
1719 : * Builds the Conf->Dictionary tree and AffixData from the imported dictionary
1720 : * and affixes.
1721 : */
1722 : void
1723 128 : NISortDictionary(IspellDict *Conf)
1724 : {
1725 : int i;
1726 : int naffix;
1727 : int curaffix;
1728 :
1729 : /* compress affixes */
1730 :
1731 : /*
1732 : * If we use flag aliases then we need to use Conf->AffixData filled in
1733 : * the NIImportOOAffixes().
1734 : */
1735 128 : if (Conf->useFlagAliases)
1736 : {
1737 252 : for (i = 0; i < Conf->nspell; i++)
1738 : {
1739 : char *end;
1740 :
1741 232 : if (*Conf->Spell[i]->p.flag != '\0')
1742 : {
1743 212 : curaffix = strtol(Conf->Spell[i]->p.flag, &end, 10);
1744 212 : if (Conf->Spell[i]->p.flag == end || errno == ERANGE)
1745 6 : ereport(ERROR,
1746 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1747 : errmsg("invalid affix alias \"%s\"",
1748 : Conf->Spell[i]->p.flag)));
1749 206 : if (curaffix < 0 || curaffix >= Conf->nAffixData)
1750 6 : ereport(ERROR,
1751 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1752 : errmsg("invalid affix alias \"%s\"",
1753 : Conf->Spell[i]->p.flag)));
1754 200 : if (*end != '\0' && !isdigit((unsigned char) *end) && !isspace((unsigned char) *end))
1755 0 : ereport(ERROR,
1756 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1757 : errmsg("invalid affix alias \"%s\"",
1758 : Conf->Spell[i]->p.flag)));
1759 : }
1760 : else
1761 : {
1762 : /*
1763 : * If Conf->Spell[i]->p.flag is empty, then get empty value of
1764 : * Conf->AffixData (0 index).
1765 : */
1766 20 : curaffix = 0;
1767 : }
1768 :
1769 220 : Conf->Spell[i]->p.d.affix = curaffix;
1770 220 : Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
1771 : }
1772 : }
1773 : /* Otherwise fill Conf->AffixData here */
1774 : else
1775 : {
1776 : /* Count the number of different flags used in the dictionary */
1777 96 : qsort(Conf->Spell, Conf->nspell, sizeof(SPELL *),
1778 : cmpspellaffix);
1779 :
1780 96 : naffix = 0;
1781 940 : for (i = 0; i < Conf->nspell; i++)
1782 : {
1783 844 : if (i == 0 ||
1784 748 : strcmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag) != 0)
1785 748 : naffix++;
1786 : }
1787 :
1788 : /*
1789 : * Fill in Conf->AffixData with the affixes that were used in the
1790 : * dictionary. Replace textual flag-field of Conf->Spell entries with
1791 : * indexes into Conf->AffixData array.
1792 : */
1793 96 : Conf->AffixData = (const char **) palloc0(naffix * sizeof(const char *));
1794 :
1795 96 : curaffix = -1;
1796 940 : for (i = 0; i < Conf->nspell; i++)
1797 : {
1798 844 : if (i == 0 ||
1799 748 : strcmp(Conf->Spell[i]->p.flag, Conf->AffixData[curaffix]) != 0)
1800 : {
1801 748 : curaffix++;
1802 : Assert(curaffix < naffix);
1803 748 : Conf->AffixData[curaffix] = cpstrdup(Conf,
1804 748 : Conf->Spell[i]->p.flag);
1805 : }
1806 :
1807 844 : Conf->Spell[i]->p.d.affix = curaffix;
1808 844 : Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
1809 : }
1810 :
1811 96 : Conf->lenAffixData = Conf->nAffixData = naffix;
1812 : }
1813 :
1814 : /* Start build a prefix tree */
1815 116 : qsort(Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell);
1816 116 : Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
1817 110 : }
1818 :
1819 : /*
1820 : * Makes a prefix tree for the given level using the repl string of an affix
1821 : * rule. Affixes with empty replace string do not include in the prefix tree.
1822 : * This affixes are included by mkVoidAffix().
1823 : *
1824 : * Conf: current dictionary.
1825 : * low: lower index of the Conf->Affix array.
1826 : * high: upper index of the Conf->Affix array.
1827 : * level: current prefix tree level.
1828 : * type: FF_SUFFIX or FF_PREFIX.
1829 : */
1830 : static AffixNode *
1831 1856 : mkANode(IspellDict *Conf, int low, int high, int level, int type)
1832 : {
1833 : int i;
1834 1856 : int nchar = 0;
1835 1856 : uint8 lastchar = '\0';
1836 : AffixNode *rs;
1837 : AffixNodeData *data;
1838 1856 : int lownew = low;
1839 : int naff;
1840 : AFFIX **aff;
1841 :
1842 4994 : for (i = low; i < high; i++)
1843 3138 : if (Conf->Affix[i].replen > level && lastchar != GETCHAR(Conf->Affix + i, level, type))
1844 : {
1845 1636 : nchar++;
1846 1636 : lastchar = GETCHAR(Conf->Affix + i, level, type);
1847 : }
1848 :
1849 1856 : if (!nchar)
1850 708 : return NULL;
1851 :
1852 1148 : aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1));
1853 1148 : naff = 0;
1854 :
1855 1148 : rs = (AffixNode *) cpalloc0(ANHRDSZ + nchar * sizeof(AffixNodeData));
1856 1148 : rs->length = nchar;
1857 1148 : data = rs->data;
1858 :
1859 1148 : lastchar = '\0';
1860 3400 : for (i = low; i < high; i++)
1861 2252 : if (Conf->Affix[i].replen > level)
1862 : {
1863 1896 : if (lastchar != GETCHAR(Conf->Affix + i, level, type))
1864 : {
1865 1636 : if (lastchar)
1866 : {
1867 : /* Next level of the prefix tree */
1868 488 : data->node = mkANode(Conf, lownew, i, level + 1, type);
1869 488 : if (naff)
1870 : {
1871 110 : data->naff = naff;
1872 110 : data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
1873 110 : memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
1874 110 : naff = 0;
1875 : }
1876 488 : data++;
1877 488 : lownew = i;
1878 : }
1879 1636 : lastchar = GETCHAR(Conf->Affix + i, level, type);
1880 : }
1881 1896 : data->val = GETCHAR(Conf->Affix + i, level, type);
1882 1896 : if (Conf->Affix[i].replen == level + 1)
1883 : { /* affix stopped */
1884 858 : aff[naff++] = Conf->Affix + i;
1885 : }
1886 : }
1887 :
1888 : /* Next level of the prefix tree */
1889 1148 : data->node = mkANode(Conf, lownew, high, level + 1, type);
1890 1148 : if (naff)
1891 : {
1892 708 : data->naff = naff;
1893 708 : data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
1894 708 : memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
1895 708 : naff = 0;
1896 : }
1897 :
1898 1148 : pfree(aff);
1899 :
1900 1148 : return rs;
1901 : }
1902 :
1903 : /*
1904 : * Makes the root void node in the prefix tree. The root void node is created
1905 : * for affixes which have empty replace string ("repl" field).
1906 : */
1907 : static void
1908 220 : mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix)
1909 : {
1910 : int i,
1911 220 : cnt = 0;
1912 220 : int start = (issuffix) ? startsuffix : 0;
1913 220 : int end = (issuffix) ? Conf->naffixes : startsuffix;
1914 220 : AffixNode *Affix = (AffixNode *) palloc0(ANHRDSZ + sizeof(AffixNodeData));
1915 :
1916 220 : Affix->length = 1;
1917 220 : Affix->isvoid = 1;
1918 :
1919 220 : if (issuffix)
1920 : {
1921 110 : Affix->data->node = Conf->Suffix;
1922 110 : Conf->Suffix = Affix;
1923 : }
1924 : else
1925 : {
1926 110 : Affix->data->node = Conf->Prefix;
1927 110 : Conf->Prefix = Affix;
1928 : }
1929 :
1930 : /* Count affixes with empty replace string */
1931 1106 : for (i = start; i < end; i++)
1932 886 : if (Conf->Affix[i].replen == 0)
1933 28 : cnt++;
1934 :
1935 : /* There is not affixes with empty replace string */
1936 220 : if (cnt == 0)
1937 192 : return;
1938 :
1939 28 : Affix->data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * cnt);
1940 28 : Affix->data->naff = (uint32) cnt;
1941 :
1942 28 : cnt = 0;
1943 224 : for (i = start; i < end; i++)
1944 196 : if (Conf->Affix[i].replen == 0)
1945 : {
1946 28 : Affix->data->aff[cnt] = Conf->Affix + i;
1947 28 : cnt++;
1948 : }
1949 : }
1950 :
1951 : /*
1952 : * Checks if the affixflag is used by dictionary. Conf->AffixData does not
1953 : * contain affixflag if this flag is not used actually by the .dict file.
1954 : *
1955 : * Conf: current dictionary.
1956 : * affixflag: affix flag.
1957 : *
1958 : * Returns true if the Conf->AffixData array contains affixflag, otherwise
1959 : * returns false.
1960 : */
1961 : static bool
1962 150 : isAffixInUse(IspellDict *Conf, const char *affixflag)
1963 : {
1964 : int i;
1965 :
1966 1102 : for (i = 0; i < Conf->nAffixData; i++)
1967 1078 : if (IsAffixFlagInUse(Conf, i, affixflag))
1968 126 : return true;
1969 :
1970 24 : return false;
1971 : }
1972 :
1973 : /*
1974 : * Builds Conf->Prefix and Conf->Suffix trees from the imported affixes.
1975 : */
1976 : void
1977 110 : NISortAffixes(IspellDict *Conf)
1978 : {
1979 : AFFIX *Affix;
1980 : size_t i;
1981 : CMPDAffix *ptr;
1982 110 : int firstsuffix = Conf->naffixes;
1983 :
1984 110 : if (Conf->naffixes == 0)
1985 0 : return;
1986 :
1987 : /* Store compound affixes in the Conf->CompoundAffix array */
1988 110 : if (Conf->naffixes > 1)
1989 110 : qsort(Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix);
1990 110 : Conf->CompoundAffix = ptr = (CMPDAffix *) palloc(sizeof(CMPDAffix) * Conf->naffixes);
1991 110 : ptr->affix = NULL;
1992 :
1993 996 : for (i = 0; i < Conf->naffixes; i++)
1994 : {
1995 886 : Affix = &(((AFFIX *) Conf->Affix)[i]);
1996 886 : if (Affix->type == FF_SUFFIX && i < firstsuffix)
1997 110 : firstsuffix = i;
1998 :
1999 1036 : if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 &&
2000 150 : isAffixInUse(Conf, Affix->flag))
2001 : {
2002 126 : bool issuffix = (Affix->type == FF_SUFFIX);
2003 :
2004 126 : if (ptr == Conf->CompoundAffix ||
2005 80 : issuffix != (ptr - 1)->issuffix ||
2006 40 : strbncmp((const unsigned char *) (ptr - 1)->affix,
2007 40 : (const unsigned char *) Affix->repl,
2008 40 : (ptr - 1)->len))
2009 : {
2010 : /* leave only unique and minimal suffixes */
2011 106 : ptr->affix = Affix->repl;
2012 106 : ptr->len = Affix->replen;
2013 106 : ptr->issuffix = issuffix;
2014 106 : ptr++;
2015 : }
2016 : }
2017 : }
2018 110 : ptr->affix = NULL;
2019 110 : Conf->CompoundAffix = (CMPDAffix *) repalloc(Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr - Conf->CompoundAffix + 1));
2020 :
2021 : /* Start build a prefix tree */
2022 110 : Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX);
2023 110 : Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX);
2024 110 : mkVoidAffix(Conf, true, firstsuffix);
2025 110 : mkVoidAffix(Conf, false, firstsuffix);
2026 : }
2027 :
2028 : static AffixNodeData *
2029 4620 : FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type)
2030 : {
2031 : AffixNodeData *StopLow,
2032 : *StopHigh,
2033 : *StopMiddle;
2034 : uint8 symbol;
2035 :
2036 4620 : if (node->isvoid)
2037 : { /* search void affixes */
2038 4020 : if (node->data->naff)
2039 342 : return node->data;
2040 3678 : node = node->data->node;
2041 : }
2042 :
2043 5382 : while (node && *level < wrdlen)
2044 : {
2045 5358 : StopLow = node->data;
2046 5358 : StopHigh = node->data + node->length;
2047 11826 : while (StopLow < StopHigh)
2048 : {
2049 8874 : StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
2050 8874 : symbol = GETWCHAR(word, wrdlen, *level, type);
2051 :
2052 8874 : if (StopMiddle->val == symbol)
2053 : {
2054 2406 : (*level)++;
2055 2406 : if (StopMiddle->naff)
2056 1302 : return StopMiddle;
2057 1104 : node = StopMiddle->node;
2058 1104 : break;
2059 : }
2060 6468 : else if (StopMiddle->val < symbol)
2061 1608 : StopLow = StopMiddle + 1;
2062 : else
2063 4860 : StopHigh = StopMiddle;
2064 : }
2065 4056 : if (StopLow >= StopHigh)
2066 2952 : break;
2067 : }
2068 2976 : return NULL;
2069 : }
2070 :
2071 : static char *
2072 1836 : CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *newword, int *baselen)
2073 : {
2074 : /*
2075 : * Check compound allow flags
2076 : */
2077 :
2078 1836 : if (flagflags == 0)
2079 : {
2080 1266 : if (Affix->flagflags & FF_COMPOUNDONLY)
2081 132 : return NULL;
2082 : }
2083 570 : else if (flagflags & FF_COMPOUNDBEGIN)
2084 : {
2085 0 : if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
2086 0 : return NULL;
2087 0 : if ((Affix->flagflags & FF_COMPOUNDBEGIN) == 0)
2088 0 : if (Affix->type == FF_SUFFIX)
2089 0 : return NULL;
2090 : }
2091 570 : else if (flagflags & FF_COMPOUNDMIDDLE)
2092 : {
2093 408 : if ((Affix->flagflags & FF_COMPOUNDMIDDLE) == 0 ||
2094 228 : (Affix->flagflags & FF_COMPOUNDFORBIDFLAG))
2095 180 : return NULL;
2096 : }
2097 162 : else if (flagflags & FF_COMPOUNDLAST)
2098 : {
2099 162 : if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
2100 0 : return NULL;
2101 162 : if ((Affix->flagflags & FF_COMPOUNDLAST) == 0)
2102 150 : if (Affix->type == FF_PREFIX)
2103 0 : return NULL;
2104 : }
2105 :
2106 : /*
2107 : * make replace pattern of affix
2108 : */
2109 1524 : if (Affix->type == FF_SUFFIX)
2110 : {
2111 1044 : strcpy(newword, word);
2112 1044 : strcpy(newword + len - Affix->replen, Affix->find);
2113 1044 : if (baselen) /* store length of non-changed part of word */
2114 1044 : *baselen = len - Affix->replen;
2115 : }
2116 : else
2117 : {
2118 : /*
2119 : * if prefix is an all non-changed part's length then all word
2120 : * contains only prefix and suffix, so out
2121 : */
2122 480 : if (baselen && *baselen + strlen(Affix->find) <= Affix->replen)
2123 0 : return NULL;
2124 480 : strcpy(newword, Affix->find);
2125 480 : strcat(newword, word + Affix->replen);
2126 : }
2127 :
2128 : /*
2129 : * check resulting word
2130 : */
2131 1524 : if (Affix->issimple)
2132 480 : return newword;
2133 1044 : else if (Affix->isregis)
2134 : {
2135 708 : if (RS_execute(&(Affix->reg.regis), newword))
2136 672 : return newword;
2137 : }
2138 : else
2139 : {
2140 : pg_wchar *data;
2141 : size_t data_len;
2142 : int newword_len;
2143 :
2144 : /* Convert data string to wide characters */
2145 336 : newword_len = strlen(newword);
2146 336 : data = (pg_wchar *) palloc((newword_len + 1) * sizeof(pg_wchar));
2147 336 : data_len = pg_mb2wchar_with_len(newword, data, newword_len);
2148 :
2149 336 : if (pg_regexec(Affix->reg.pregex, data, data_len,
2150 : 0, NULL, 0, NULL, 0) == REG_OKAY)
2151 : {
2152 336 : pfree(data);
2153 336 : return newword;
2154 : }
2155 0 : pfree(data);
2156 : }
2157 :
2158 36 : return NULL;
2159 : }
2160 :
2161 : static int
2162 540 : addToResult(char **forms, char **cur, char *word)
2163 : {
2164 540 : if (cur - forms >= MAX_NORM - 1)
2165 0 : return 0;
2166 540 : if (forms == cur || strcmp(word, *(cur - 1)) != 0)
2167 : {
2168 540 : *cur = pstrdup(word);
2169 540 : *(cur + 1) = NULL;
2170 540 : return 1;
2171 : }
2172 :
2173 0 : return 0;
2174 : }
2175 :
2176 : static char **
2177 1506 : NormalizeSubWord(IspellDict *Conf, const char *word, int flag)
2178 : {
2179 1506 : AffixNodeData *suffix = NULL,
2180 1506 : *prefix = NULL;
2181 1506 : int slevel = 0,
2182 1506 : plevel = 0;
2183 1506 : int wrdlen = strlen(word),
2184 : swrdlen;
2185 : char **forms;
2186 : char **cur;
2187 1506 : char newword[2 * MAXNORMLEN] = "";
2188 1506 : char pnewword[2 * MAXNORMLEN] = "";
2189 1506 : AffixNode *snode = Conf->Suffix,
2190 : *pnode;
2191 : int i,
2192 : j;
2193 :
2194 1506 : if (wrdlen > MAXNORMLEN)
2195 0 : return NULL;
2196 1506 : cur = forms = (char **) palloc(MAX_NORM * sizeof(char *));
2197 1506 : *cur = NULL;
2198 :
2199 :
2200 : /* Check that the word itself is normal form */
2201 1506 : if (FindWord(Conf, word, VoidString, flag))
2202 : {
2203 468 : *cur = pstrdup(word);
2204 468 : cur++;
2205 468 : *cur = NULL;
2206 : }
2207 :
2208 : /* Find all other NORMAL forms of the 'word' (check only prefix) */
2209 1506 : pnode = Conf->Prefix;
2210 1506 : plevel = 0;
2211 1722 : while (pnode)
2212 : {
2213 1506 : prefix = FindAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX);
2214 1506 : if (!prefix)
2215 1290 : break;
2216 432 : for (j = 0; j < prefix->naff; j++)
2217 : {
2218 216 : if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword, NULL))
2219 : {
2220 : /* prefix success */
2221 192 : if (FindWord(Conf, newword, prefix->aff[j]->flag, flag))
2222 48 : cur += addToResult(forms, cur, newword);
2223 : }
2224 : }
2225 216 : pnode = prefix->node;
2226 : }
2227 :
2228 : /*
2229 : * Find all other NORMAL forms of the 'word' (check suffix and then
2230 : * prefix)
2231 : */
2232 2598 : while (snode)
2233 : {
2234 2106 : int baselen = 0;
2235 :
2236 : /* find possible suffix */
2237 2106 : suffix = FindAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX);
2238 2106 : if (!suffix)
2239 1014 : break;
2240 : /* foreach suffix check affix */
2241 2376 : for (i = 0; i < suffix->naff; i++)
2242 : {
2243 1284 : if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword, &baselen))
2244 : {
2245 : /* suffix success */
2246 1008 : if (FindWord(Conf, newword, suffix->aff[i]->flag, flag))
2247 276 : cur += addToResult(forms, cur, newword);
2248 :
2249 : /* now we will look changed word with prefixes */
2250 1008 : pnode = Conf->Prefix;
2251 1008 : plevel = 0;
2252 1008 : swrdlen = strlen(newword);
2253 1344 : while (pnode)
2254 : {
2255 1008 : prefix = FindAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX);
2256 1008 : if (!prefix)
2257 672 : break;
2258 672 : for (j = 0; j < prefix->naff; j++)
2259 : {
2260 336 : if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword, &baselen))
2261 : {
2262 : /* prefix success */
2263 576 : const char *ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ?
2264 288 : VoidString : prefix->aff[j]->flag;
2265 :
2266 288 : if (FindWord(Conf, pnewword, ff, flag))
2267 216 : cur += addToResult(forms, cur, pnewword);
2268 : }
2269 : }
2270 336 : pnode = prefix->node;
2271 : }
2272 : }
2273 : }
2274 :
2275 1092 : snode = suffix->node;
2276 : }
2277 :
2278 1506 : if (cur == forms)
2279 : {
2280 666 : pfree(forms);
2281 666 : return NULL;
2282 : }
2283 840 : return forms;
2284 : }
2285 :
2286 : typedef struct SplitVar
2287 : {
2288 : int nstem;
2289 : int lenstem;
2290 : char **stem;
2291 : struct SplitVar *next;
2292 : } SplitVar;
2293 :
2294 : static int
2295 6060 : CheckCompoundAffixes(CMPDAffix **ptr, const char *word, int len, bool CheckInPlace)
2296 : {
2297 : bool issuffix;
2298 :
2299 : /* in case CompoundAffix is null: */
2300 6060 : if (*ptr == NULL)
2301 0 : return -1;
2302 :
2303 6060 : if (CheckInPlace)
2304 : {
2305 11568 : while ((*ptr)->affix)
2306 : {
2307 6444 : if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0)
2308 : {
2309 60 : len = (*ptr)->len;
2310 60 : issuffix = (*ptr)->issuffix;
2311 60 : (*ptr)++;
2312 60 : return (issuffix) ? len : 0;
2313 : }
2314 6384 : (*ptr)++;
2315 : }
2316 : }
2317 : else
2318 : {
2319 : char *affbegin;
2320 :
2321 1692 : while ((*ptr)->affix)
2322 : {
2323 942 : if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL)
2324 : {
2325 126 : len = (*ptr)->len + (affbegin - word);
2326 126 : issuffix = (*ptr)->issuffix;
2327 126 : (*ptr)++;
2328 126 : return (issuffix) ? len : 0;
2329 : }
2330 816 : (*ptr)++;
2331 : }
2332 : }
2333 5874 : return -1;
2334 : }
2335 :
2336 : static SplitVar *
2337 1410 : CopyVar(SplitVar *s, int makedup)
2338 : {
2339 1410 : SplitVar *v = (SplitVar *) palloc(sizeof(SplitVar));
2340 :
2341 1410 : v->next = NULL;
2342 1410 : if (s)
2343 : {
2344 : int i;
2345 :
2346 660 : v->lenstem = s->lenstem;
2347 660 : v->stem = (char **) palloc(sizeof(char *) * v->lenstem);
2348 660 : v->nstem = s->nstem;
2349 1002 : for (i = 0; i < s->nstem; i++)
2350 342 : v->stem[i] = (makedup) ? pstrdup(s->stem[i]) : s->stem[i];
2351 : }
2352 : else
2353 : {
2354 750 : v->lenstem = 16;
2355 750 : v->stem = (char **) palloc(sizeof(char *) * v->lenstem);
2356 750 : v->nstem = 0;
2357 : }
2358 1410 : return v;
2359 : }
2360 :
2361 : static void
2362 1890 : AddStem(SplitVar *v, char *word)
2363 : {
2364 1890 : if (v->nstem >= v->lenstem)
2365 : {
2366 0 : v->lenstem *= 2;
2367 0 : v->stem = (char **) repalloc(v->stem, sizeof(char *) * v->lenstem);
2368 : }
2369 :
2370 1890 : v->stem[v->nstem] = word;
2371 1890 : v->nstem++;
2372 1890 : }
2373 :
2374 : static SplitVar *
2375 1320 : SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, const char *word, int wordlen, int startpos, int minpos)
2376 : {
2377 1320 : SplitVar *var = NULL;
2378 : SPNodeData *StopLow,
2379 : *StopHigh,
2380 1320 : *StopMiddle = NULL;
2381 1320 : SPNode *node = (snode) ? snode : Conf->Dictionary;
2382 1320 : int level = (snode) ? minpos : startpos; /* recursive
2383 : * minpos==level */
2384 : int lenaff;
2385 : CMPDAffix *caff;
2386 : char *notprobed;
2387 1320 : int compoundflag = 0;
2388 :
2389 : /* since this function recurses, it could be driven to stack overflow */
2390 1320 : check_stack_depth();
2391 :
2392 1320 : notprobed = (char *) palloc(wordlen);
2393 1320 : memset(notprobed, 1, wordlen);
2394 1320 : var = CopyVar(orig, 1);
2395 :
2396 7452 : while (level < wordlen)
2397 : {
2398 : /* find word with epenthetic or/and compound affix */
2399 7194 : caff = Conf->CompoundAffix;
2400 7380 : while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) >= 0)
2401 : {
2402 : /*
2403 : * there is one of compound affixes, so check word for existings
2404 : */
2405 : char buf[MAXNORMLEN];
2406 : char **subres;
2407 :
2408 186 : lenaff = level - startpos + lenaff;
2409 :
2410 186 : if (!notprobed[startpos + lenaff - 1])
2411 0 : continue;
2412 :
2413 186 : if (level + lenaff - 1 <= minpos)
2414 0 : continue;
2415 :
2416 186 : if (lenaff >= MAXNORMLEN)
2417 0 : continue; /* skip too big value */
2418 186 : if (lenaff > 0)
2419 186 : memcpy(buf, word + startpos, lenaff);
2420 186 : buf[lenaff] = '\0';
2421 :
2422 186 : if (level == 0)
2423 0 : compoundflag = FF_COMPOUNDBEGIN;
2424 186 : else if (level == wordlen - 1)
2425 0 : compoundflag = FF_COMPOUNDLAST;
2426 : else
2427 186 : compoundflag = FF_COMPOUNDMIDDLE;
2428 186 : subres = NormalizeSubWord(Conf, buf, compoundflag);
2429 186 : if (subres)
2430 : {
2431 : /* Yes, it was a word from dictionary */
2432 90 : SplitVar *new = CopyVar(var, 0);
2433 90 : SplitVar *ptr = var;
2434 90 : char **sptr = subres;
2435 :
2436 90 : notprobed[startpos + lenaff - 1] = 0;
2437 :
2438 180 : while (*sptr)
2439 : {
2440 90 : AddStem(new, *sptr);
2441 90 : sptr++;
2442 : }
2443 90 : pfree(subres);
2444 :
2445 90 : while (ptr->next)
2446 0 : ptr = ptr->next;
2447 90 : ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos + lenaff, startpos + lenaff);
2448 :
2449 90 : pfree(new->stem);
2450 90 : pfree(new);
2451 : }
2452 : }
2453 :
2454 7194 : if (!node)
2455 750 : break;
2456 :
2457 6444 : StopLow = node->data;
2458 6444 : StopHigh = node->data + node->length;
2459 8694 : while (StopLow < StopHigh)
2460 : {
2461 8064 : StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
2462 8064 : if (StopMiddle->val == ((uint8 *) (word))[level])
2463 5814 : break;
2464 2250 : else if (StopMiddle->val < ((uint8 *) (word))[level])
2465 978 : StopLow = StopMiddle + 1;
2466 : else
2467 1272 : StopHigh = StopMiddle;
2468 : }
2469 :
2470 6444 : if (StopLow < StopHigh)
2471 : {
2472 5814 : if (startpos == 0)
2473 3270 : compoundflag = FF_COMPOUNDBEGIN;
2474 2544 : else if (level == wordlen - 1)
2475 288 : compoundflag = FF_COMPOUNDLAST;
2476 : else
2477 2256 : compoundflag = FF_COMPOUNDMIDDLE;
2478 :
2479 : /* find infinitive */
2480 5814 : if (StopMiddle->isword &&
2481 1536 : (StopMiddle->compoundflag & compoundflag) &&
2482 1272 : notprobed[level])
2483 : {
2484 : /* ok, we found full compoundallowed word */
2485 1272 : if (level > minpos)
2486 : {
2487 : /* and its length more than minimal */
2488 792 : if (wordlen == level + 1)
2489 : {
2490 : /* well, it was last word */
2491 312 : AddStem(var, pnstrdup(word + startpos, wordlen - startpos));
2492 312 : pfree(notprobed);
2493 312 : return var;
2494 : }
2495 : else
2496 : {
2497 : /* then we will search more big word at the same point */
2498 480 : SplitVar *ptr = var;
2499 :
2500 744 : while (ptr->next)
2501 264 : ptr = ptr->next;
2502 480 : ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
2503 : /* we can find next word */
2504 480 : level++;
2505 480 : AddStem(var, pnstrdup(word + startpos, level - startpos));
2506 480 : node = Conf->Dictionary;
2507 480 : startpos = level;
2508 480 : continue;
2509 : }
2510 : }
2511 : }
2512 5022 : node = StopMiddle->node;
2513 : }
2514 : else
2515 630 : node = NULL;
2516 5652 : level++;
2517 : }
2518 :
2519 1008 : AddStem(var, pnstrdup(word + startpos, wordlen - startpos));
2520 1008 : pfree(notprobed);
2521 1008 : return var;
2522 : }
2523 :
2524 : static void
2525 1314 : addNorm(TSLexeme **lres, TSLexeme **lcur, char *word, int flags, uint16 NVariant)
2526 : {
2527 1314 : if (*lres == NULL)
2528 606 : *lcur = *lres = (TSLexeme *) palloc(MAX_NORM * sizeof(TSLexeme));
2529 :
2530 1314 : if (*lcur - *lres < MAX_NORM - 1)
2531 : {
2532 1314 : (*lcur)->lexeme = word;
2533 1314 : (*lcur)->flags = flags;
2534 1314 : (*lcur)->nvariant = NVariant;
2535 1314 : (*lcur)++;
2536 1314 : (*lcur)->lexeme = NULL;
2537 : }
2538 1314 : }
2539 :
2540 : TSLexeme *
2541 750 : NINormalizeWord(IspellDict *Conf, const char *word)
2542 : {
2543 : char **res;
2544 750 : TSLexeme *lcur = NULL,
2545 750 : *lres = NULL;
2546 750 : uint16 NVariant = 1;
2547 :
2548 750 : res = NormalizeSubWord(Conf, word, 0);
2549 :
2550 750 : if (res)
2551 : {
2552 486 : char **ptr = res;
2553 :
2554 1140 : while (*ptr && (lcur - lres) < MAX_NORM)
2555 : {
2556 654 : addNorm(&lres, &lcur, *ptr, 0, NVariant++);
2557 654 : ptr++;
2558 : }
2559 486 : pfree(res);
2560 : }
2561 :
2562 750 : if (Conf->usecompound)
2563 : {
2564 750 : int wordlen = strlen(word);
2565 : SplitVar *ptr,
2566 750 : *var = SplitToVariants(Conf, NULL, NULL, word, wordlen, 0, -1);
2567 : int i;
2568 :
2569 2070 : while (var)
2570 : {
2571 1320 : if (var->nstem > 1)
2572 : {
2573 570 : char **subres = NormalizeSubWord(Conf, var->stem[var->nstem - 1], FF_COMPOUNDLAST);
2574 :
2575 570 : if (subres)
2576 : {
2577 264 : char **subptr = subres;
2578 :
2579 528 : while (*subptr)
2580 : {
2581 660 : for (i = 0; i < var->nstem - 1; i++)
2582 : {
2583 396 : addNorm(&lres, &lcur, (subptr == subres) ? var->stem[i] : pstrdup(var->stem[i]), 0, NVariant);
2584 : }
2585 :
2586 264 : addNorm(&lres, &lcur, *subptr, 0, NVariant);
2587 264 : subptr++;
2588 264 : NVariant++;
2589 : }
2590 :
2591 264 : pfree(subres);
2592 264 : var->stem[0] = NULL;
2593 264 : pfree(var->stem[var->nstem - 1]);
2594 : }
2595 : }
2596 :
2597 2742 : for (i = 0; i < var->nstem && var->stem[i]; i++)
2598 1422 : pfree(var->stem[i]);
2599 1320 : ptr = var->next;
2600 1320 : pfree(var->stem);
2601 1320 : pfree(var);
2602 1320 : var = ptr;
2603 : }
2604 : }
2605 :
2606 750 : return lres;
2607 : }
|