Line data Source code
1 : /*
2 : * colorings of characters
3 : * This file is #included by regcomp.c.
4 : *
5 : * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
6 : *
7 : * Development of this software was funded, in part, by Cray Research Inc.,
8 : * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
9 : * Corporation, none of whom are responsible for the results. The author
10 : * thanks all of them.
11 : *
12 : * Redistribution and use in source and binary forms -- with or without
13 : * modification -- are permitted for any purpose, provided that
14 : * redistributions in source form retain this entire copyright notice and
15 : * indicate the origin and nature of any modifications.
16 : *
17 : * I'd appreciate being given credit for this package in the documentation
18 : * of software which uses it, but that is not a requirement.
19 : *
20 : * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
21 : * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
22 : * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23 : * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 : * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 : * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 : * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 : * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 : * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 : * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 : *
31 : * src/backend/regex/regc_color.c
32 : *
33 : *
34 : * Note that there are some incestuous relationships between this code and
35 : * NFA arc maintenance, which perhaps ought to be cleaned up sometime.
36 : */
37 :
38 :
39 :
40 : #define CISERR() VISERR(cm->v)
41 : #define CERR(e) VERR(cm->v, (e))
42 :
43 :
44 :
45 : /*
46 : * initcm - set up new colormap
47 : */
48 : static void
49 4182 : initcm(struct vars *v,
50 : struct colormap *cm)
51 : {
52 : struct colordesc *cd;
53 :
54 4182 : cm->magic = CMMAGIC;
55 4182 : cm->v = v;
56 :
57 4182 : cm->ncds = NINLINECDS;
58 4182 : cm->cd = cm->cdspace;
59 4182 : cm->max = 0;
60 4182 : cm->free = 0;
61 :
62 4182 : cd = cm->cd; /* cm->cd[WHITE] */
63 4182 : cd->nschrs = MAX_SIMPLE_CHR - CHR_MIN + 1;
64 4182 : cd->nuchrs = 1;
65 4182 : cd->sub = NOSUB;
66 4182 : cd->arcs = NULL;
67 4182 : cd->firstchr = CHR_MIN;
68 4182 : cd->flags = 0;
69 :
70 4182 : cm->locolormap = (color *)
71 4182 : MALLOC((MAX_SIMPLE_CHR - CHR_MIN + 1) * sizeof(color));
72 4182 : if (cm->locolormap == NULL)
73 : {
74 0 : CERR(REG_ESPACE);
75 0 : cm->cmranges = NULL; /* prevent failure during freecm */
76 0 : cm->hicolormap = NULL;
77 0 : return;
78 : }
79 : /* this memset relies on WHITE being zero: */
80 4182 : memset(cm->locolormap, WHITE,
81 : (MAX_SIMPLE_CHR - CHR_MIN + 1) * sizeof(color));
82 :
83 4182 : memset(cm->classbits, 0, sizeof(cm->classbits));
84 4182 : cm->numcmranges = 0;
85 4182 : cm->cmranges = NULL;
86 4182 : cm->maxarrayrows = 4; /* arbitrary initial allocation */
87 4182 : cm->hiarrayrows = 1; /* but we have only one row/col initially */
88 4182 : cm->hiarraycols = 1;
89 4182 : cm->hicolormap = (color *) MALLOC(cm->maxarrayrows * sizeof(color));
90 4182 : if (cm->hicolormap == NULL)
91 : {
92 0 : CERR(REG_ESPACE);
93 0 : return;
94 : }
95 : /* initialize the "all other characters" row to WHITE */
96 4182 : cm->hicolormap[0] = WHITE;
97 : }
98 :
99 : /*
100 : * freecm - free dynamically-allocated things in a colormap
101 : */
102 : static void
103 772 : freecm(struct colormap *cm)
104 : {
105 772 : cm->magic = 0;
106 772 : if (cm->cd != cm->cdspace)
107 60 : FREE(cm->cd);
108 772 : if (cm->locolormap != NULL)
109 772 : FREE(cm->locolormap);
110 772 : if (cm->cmranges != NULL)
111 8 : FREE(cm->cmranges);
112 772 : if (cm->hicolormap != NULL)
113 772 : FREE(cm->hicolormap);
114 772 : }
115 :
116 : /*
117 : * pg_reg_getcolor - slow case of GETCOLOR()
118 : */
119 : color
120 61 : pg_reg_getcolor(struct colormap *cm, chr c)
121 : {
122 : int rownum,
123 : colnum,
124 : low,
125 : high;
126 :
127 : /* Should not be used for chrs in the locolormap */
128 : assert(c > MAX_SIMPLE_CHR);
129 :
130 : /*
131 : * Find which row it's in. The colormapranges are in order, so we can use
132 : * binary search.
133 : */
134 61 : rownum = 0; /* if no match, use array row zero */
135 61 : low = 0;
136 61 : high = cm->numcmranges;
137 94 : while (low < high)
138 : {
139 56 : int middle = low + (high - low) / 2;
140 56 : const colormaprange *cmr = &cm->cmranges[middle];
141 :
142 56 : if (c < cmr->cmin)
143 28 : high = middle;
144 28 : else if (c > cmr->cmax)
145 5 : low = middle + 1;
146 : else
147 : {
148 23 : rownum = cmr->rownum; /* found a match */
149 23 : break;
150 : }
151 : }
152 :
153 : /*
154 : * Find which column it's in --- this is all locale-dependent.
155 : */
156 61 : if (cm->hiarraycols > 1)
157 : {
158 38 : colnum = cclass_column_index(cm, c);
159 38 : return cm->hicolormap[rownum * cm->hiarraycols + colnum];
160 : }
161 : else
162 : {
163 : /* fast path if no relevant cclasses */
164 23 : return cm->hicolormap[rownum];
165 : }
166 : }
167 :
168 : /*
169 : * maxcolor - report largest color number in use
170 : */
171 : static color
172 10066 : maxcolor(struct colormap *cm)
173 : {
174 10066 : if (CISERR())
175 0 : return COLORLESS;
176 :
177 10066 : return (color) cm->max;
178 : }
179 :
180 : /*
181 : * newcolor - find a new color (must be assigned at once)
182 : * Beware: may relocate the colordescs.
183 : */
184 : static color /* COLORLESS for error */
185 40882 : newcolor(struct colormap *cm)
186 : {
187 : struct colordesc *cd;
188 : size_t n;
189 :
190 40882 : if (CISERR())
191 2 : return COLORLESS;
192 :
193 40880 : if (cm->free != 0)
194 : {
195 : assert(cm->free > 0);
196 : assert((size_t) cm->free < cm->ncds);
197 179 : cd = &cm->cd[cm->free];
198 : assert(UNUSEDCOLOR(cd));
199 : assert(cd->arcs == NULL);
200 179 : cm->free = cd->sub;
201 : }
202 40701 : else if (cm->max < cm->ncds - 1)
203 : {
204 38460 : cm->max++;
205 38460 : cd = &cm->cd[cm->max];
206 : }
207 : else
208 : {
209 : /* oops, must allocate more */
210 : struct colordesc *newCd;
211 :
212 2241 : if (cm->max == MAX_COLOR)
213 : {
214 0 : CERR(REG_ECOLORS);
215 0 : return COLORLESS; /* too many colors */
216 : }
217 :
218 2241 : n = cm->ncds * 2;
219 2241 : if (n > MAX_COLOR + 1)
220 0 : n = MAX_COLOR + 1;
221 2241 : if (cm->cd == cm->cdspace)
222 : {
223 2184 : newCd = (struct colordesc *) MALLOC(n * sizeof(struct colordesc));
224 2184 : if (newCd != NULL)
225 2184 : memcpy(VS(newCd), VS(cm->cdspace), cm->ncds *
226 : sizeof(struct colordesc));
227 : }
228 : else
229 : newCd = (struct colordesc *)
230 57 : REALLOC(cm->cd, n * sizeof(struct colordesc));
231 2241 : if (newCd == NULL)
232 : {
233 0 : CERR(REG_ESPACE);
234 0 : return COLORLESS;
235 : }
236 2241 : cm->cd = newCd;
237 2241 : cm->ncds = n;
238 : assert(cm->max < cm->ncds - 1);
239 2241 : cm->max++;
240 2241 : cd = &cm->cd[cm->max];
241 : }
242 :
243 40880 : cd->nschrs = 0;
244 40880 : cd->nuchrs = 0;
245 40880 : cd->sub = NOSUB;
246 40880 : cd->arcs = NULL;
247 40880 : cd->firstchr = CHR_MIN; /* in case never set otherwise */
248 40880 : cd->flags = 0;
249 :
250 40880 : return (color) (cd - cm->cd);
251 : }
252 :
253 : /*
254 : * freecolor - free a color (must have no arcs or subcolor)
255 : */
256 : static void
257 226 : freecolor(struct colormap *cm,
258 : color co)
259 : {
260 226 : struct colordesc *cd = &cm->cd[co];
261 : color pco,
262 : nco; /* for freelist scan */
263 :
264 : assert(co >= 0);
265 226 : if (co == WHITE)
266 0 : return;
267 :
268 : assert(cd->arcs == NULL);
269 : assert(cd->sub == NOSUB);
270 : assert(cd->nschrs == 0);
271 : assert(cd->nuchrs == 0);
272 226 : cd->flags = FREECOL;
273 :
274 226 : if ((size_t) co == cm->max)
275 : {
276 92 : while (cm->max > WHITE && UNUSEDCOLOR(&cm->cd[cm->max]))
277 47 : cm->max--;
278 : assert(cm->free >= 0);
279 47 : while ((size_t) cm->free > cm->max)
280 2 : cm->free = cm->cd[cm->free].sub;
281 45 : if (cm->free > 0)
282 : {
283 : assert(cm->free < cm->max);
284 3 : pco = cm->free;
285 3 : nco = cm->cd[pco].sub;
286 3 : while (nco > 0)
287 0 : if ((size_t) nco > cm->max)
288 : {
289 : /* take this one out of freelist */
290 0 : nco = cm->cd[nco].sub;
291 0 : cm->cd[pco].sub = nco;
292 : }
293 : else
294 : {
295 : assert(nco < cm->max);
296 0 : pco = nco;
297 0 : nco = cm->cd[pco].sub;
298 : }
299 : }
300 : }
301 : else
302 : {
303 181 : cd->sub = cm->free;
304 181 : cm->free = (color) (cd - cm->cd);
305 : }
306 : }
307 :
308 : /*
309 : * pseudocolor - allocate a false color, to be managed by other means
310 : */
311 : static color
312 16252 : pseudocolor(struct colormap *cm)
313 : {
314 : color co;
315 : struct colordesc *cd;
316 :
317 16252 : co = newcolor(cm);
318 16252 : if (CISERR())
319 0 : return COLORLESS;
320 16252 : cd = &cm->cd[co];
321 16252 : cd->nschrs = 0;
322 16252 : cd->nuchrs = 1; /* pretend it is in the upper map */
323 16252 : cd->sub = NOSUB;
324 16252 : cd->arcs = NULL;
325 16252 : cd->firstchr = CHR_MIN;
326 16252 : cd->flags = PSEUDO;
327 16252 : return co;
328 : }
329 :
330 : /*
331 : * subcolor - allocate a new subcolor (if necessary) to this chr
332 : *
333 : * This works only for chrs that map into the low color map.
334 : */
335 : static color
336 329471 : subcolor(struct colormap *cm, chr c)
337 : {
338 : color co; /* current color of c */
339 : color sco; /* new subcolor */
340 :
341 : assert(c <= MAX_SIMPLE_CHR);
342 :
343 329471 : co = cm->locolormap[c - CHR_MIN];
344 329471 : sco = newsub(cm, co);
345 329471 : if (CISERR())
346 2 : return COLORLESS;
347 : assert(sco != COLORLESS);
348 :
349 329469 : if (co == sco) /* already in an open subcolor */
350 19870 : return co; /* rest is redundant */
351 309599 : cm->cd[co].nschrs--;
352 309599 : if (cm->cd[sco].nschrs == 0)
353 24580 : cm->cd[sco].firstchr = c;
354 309599 : cm->cd[sco].nschrs++;
355 309599 : cm->locolormap[c - CHR_MIN] = sco;
356 309599 : return sco;
357 : }
358 :
359 : /*
360 : * subcolorhi - allocate a new subcolor (if necessary) to this colormap entry
361 : *
362 : * This is the same processing as subcolor(), but for entries in the high
363 : * colormap, which do not necessarily correspond to exactly one chr code.
364 : */
365 : static color
366 963 : subcolorhi(struct colormap *cm, color *pco)
367 : {
368 : color co; /* current color of entry */
369 : color sco; /* new subcolor */
370 :
371 963 : co = *pco;
372 963 : sco = newsub(cm, co);
373 963 : if (CISERR())
374 0 : return COLORLESS;
375 : assert(sco != COLORLESS);
376 :
377 963 : if (co == sco) /* already in an open subcolor */
378 10 : return co; /* rest is redundant */
379 953 : cm->cd[co].nuchrs--;
380 953 : cm->cd[sco].nuchrs++;
381 953 : *pco = sco;
382 953 : return sco;
383 : }
384 :
385 : /*
386 : * newsub - allocate a new subcolor (if necessary) for a color
387 : */
388 : static color
389 330434 : newsub(struct colormap *cm,
390 : color co)
391 : {
392 : color sco; /* new subcolor */
393 :
394 330434 : sco = cm->cd[co].sub;
395 330434 : if (sco == NOSUB)
396 : { /* color has no open subcolor */
397 : /* optimization: singly-referenced color need not be subcolored */
398 44498 : if ((cm->cd[co].nschrs + cm->cd[co].nuchrs) == 1)
399 19868 : return co;
400 24630 : sco = newcolor(cm); /* must create subcolor */
401 24630 : if (sco == COLORLESS)
402 : {
403 : assert(CISERR());
404 2 : return COLORLESS;
405 : }
406 24628 : cm->cd[co].sub = sco;
407 24628 : cm->cd[sco].sub = sco; /* open subcolor points to self */
408 : }
409 : assert(sco != NOSUB);
410 :
411 310564 : return sco;
412 : }
413 :
414 : /*
415 : * newhicolorrow - get a new row in the hicolormap, cloning it from oldrow
416 : *
417 : * Returns array index of new row. Note the array might move.
418 : */
419 : static int
420 146 : newhicolorrow(struct colormap *cm,
421 : int oldrow)
422 : {
423 146 : int newrow = cm->hiarrayrows;
424 : color *newrowptr;
425 : int i;
426 :
427 : /* Assign a fresh array row index, enlarging storage if needed */
428 146 : if (newrow >= cm->maxarrayrows)
429 : {
430 : color *newarray;
431 :
432 7 : if (cm->maxarrayrows >= INT_MAX / (cm->hiarraycols * 2))
433 : {
434 0 : CERR(REG_ESPACE);
435 0 : return 0;
436 : }
437 7 : newarray = (color *) REALLOC(cm->hicolormap,
438 : cm->maxarrayrows * 2 *
439 : cm->hiarraycols * sizeof(color));
440 7 : if (newarray == NULL)
441 : {
442 0 : CERR(REG_ESPACE);
443 0 : return 0;
444 : }
445 7 : cm->hicolormap = newarray;
446 7 : cm->maxarrayrows *= 2;
447 : }
448 146 : cm->hiarrayrows++;
449 :
450 : /* Copy old row data */
451 146 : newrowptr = &cm->hicolormap[newrow * cm->hiarraycols];
452 146 : memcpy(newrowptr,
453 146 : &cm->hicolormap[oldrow * cm->hiarraycols],
454 146 : cm->hiarraycols * sizeof(color));
455 :
456 : /* Increase color reference counts to reflect new colormap entries */
457 691 : for (i = 0; i < cm->hiarraycols; i++)
458 545 : cm->cd[newrowptr[i]].nuchrs++;
459 :
460 146 : return newrow;
461 : }
462 :
463 : /*
464 : * newhicolorcols - create a new set of columns in the high colormap
465 : *
466 : * Essentially, extends the 2-D array to the right with a copy of itself.
467 : */
468 : static void
469 362 : newhicolorcols(struct colormap *cm)
470 : {
471 : color *newarray;
472 : int r,
473 : c;
474 :
475 362 : if (cm->hiarraycols >= INT_MAX / (cm->maxarrayrows * 2))
476 : {
477 0 : CERR(REG_ESPACE);
478 0 : return;
479 : }
480 362 : newarray = (color *) REALLOC(cm->hicolormap,
481 : cm->maxarrayrows *
482 : cm->hiarraycols * 2 * sizeof(color));
483 362 : if (newarray == NULL)
484 : {
485 0 : CERR(REG_ESPACE);
486 0 : return;
487 : }
488 362 : cm->hicolormap = newarray;
489 :
490 : /* Duplicate existing columns to the right, and increase ref counts */
491 : /* Must work backwards in the array because we realloc'd in place */
492 724 : for (r = cm->hiarrayrows - 1; r >= 0; r--)
493 : {
494 362 : color *oldrowptr = &newarray[r * cm->hiarraycols];
495 362 : color *newrowptr = &newarray[r * cm->hiarraycols * 2];
496 362 : color *newrowptr2 = newrowptr + cm->hiarraycols;
497 :
498 738 : for (c = 0; c < cm->hiarraycols; c++)
499 : {
500 376 : color co = oldrowptr[c];
501 :
502 376 : newrowptr[c] = newrowptr2[c] = co;
503 376 : cm->cd[co].nuchrs++;
504 : }
505 : }
506 :
507 362 : cm->hiarraycols *= 2;
508 : }
509 :
510 : /*
511 : * subcolorcvec - allocate new subcolors to cvec members, fill in arcs
512 : *
513 : * For each chr "c" represented by the cvec, do the equivalent of
514 : * newarc(v->nfa, PLAIN, subcolor(v->cm, c), lp, rp);
515 : *
516 : * Note that in typical cases, many of the subcolors are the same.
517 : * While newarc() would discard duplicate arc requests, we can save
518 : * some cycles by not calling it repetitively to begin with. This is
519 : * mechanized with the "lastsubcolor" state variable.
520 : */
521 : static void
522 1615 : subcolorcvec(struct vars *v,
523 : struct cvec *cv,
524 : struct state *lp,
525 : struct state *rp)
526 : {
527 1615 : struct colormap *cm = v->cm;
528 1615 : color lastsubcolor = COLORLESS;
529 : chr ch,
530 : from,
531 : to;
532 : const chr *p;
533 : int i;
534 :
535 : /* ordinary characters */
536 10282 : for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--)
537 : {
538 8667 : ch = *p;
539 8667 : subcoloronechr(v, ch, lp, rp, &lastsubcolor);
540 8667 : NOERR();
541 : }
542 :
543 : /* and the ranges */
544 8505 : for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--)
545 : {
546 6890 : from = *p;
547 6890 : to = *(p + 1);
548 6890 : if (from <= MAX_SIMPLE_CHR)
549 : {
550 : /* deal with simple chars one at a time */
551 6882 : chr lim = (to <= MAX_SIMPLE_CHR) ? to : MAX_SIMPLE_CHR;
552 :
553 286055 : while (from <= lim)
554 : {
555 279173 : color sco = subcolor(cm, from);
556 :
557 279173 : NOERR();
558 279173 : if (sco != lastsubcolor)
559 : {
560 2040 : newarc(v->nfa, PLAIN, sco, lp, rp);
561 2040 : NOERR();
562 2040 : lastsubcolor = sco;
563 : }
564 279173 : from++;
565 : }
566 : }
567 : /* deal with any part of the range that's above MAX_SIMPLE_CHR */
568 6890 : if (from < to)
569 8 : subcoloronerange(v, from, to, lp, rp, &lastsubcolor);
570 6882 : else if (from == to)
571 0 : subcoloronechr(v, from, lp, rp, &lastsubcolor);
572 6890 : NOERR();
573 : }
574 :
575 : /* and deal with cclass if any */
576 1615 : if (cv->cclasscode >= 0)
577 : {
578 : int classbit;
579 : color *pco;
580 : int r,
581 : c;
582 :
583 : /* Enlarge array if we don't have a column bit assignment for cclass */
584 415 : if (cm->classbits[cv->cclasscode] == 0)
585 : {
586 362 : cm->classbits[cv->cclasscode] = cm->hiarraycols;
587 362 : newhicolorcols(cm);
588 362 : NOERR();
589 : }
590 : /* Apply subcolorhi() and make arc for each entry in relevant cols */
591 415 : classbit = cm->classbits[cv->cclasscode];
592 415 : pco = cm->hicolormap;
593 830 : for (r = 0; r < cm->hiarrayrows; r++)
594 : {
595 1273 : for (c = 0; c < cm->hiarraycols; c++)
596 : {
597 858 : if (c & classbit)
598 : {
599 429 : color sco = subcolorhi(cm, pco);
600 :
601 429 : NOERR();
602 : /* add the arc if needed */
603 429 : if (sco != lastsubcolor)
604 : {
605 39 : newarc(v->nfa, PLAIN, sco, lp, rp);
606 39 : NOERR();
607 39 : lastsubcolor = sco;
608 : }
609 : }
610 858 : pco++;
611 : }
612 : }
613 : }
614 : }
615 :
616 : /*
617 : * subcoloronechr - do subcolorcvec's work for a singleton chr
618 : *
619 : * We could just let subcoloronerange do this, but it's a bit more efficient
620 : * if we exploit the single-chr case. Also, callers find it useful for this
621 : * to be able to handle both low and high chr codes.
622 : */
623 : static void
624 50096 : subcoloronechr(struct vars *v,
625 : chr ch,
626 : struct state *lp,
627 : struct state *rp,
628 : color *lastsubcolor)
629 : {
630 50096 : struct colormap *cm = v->cm;
631 : colormaprange *newranges;
632 : int numnewranges;
633 : colormaprange *oldrange;
634 : int oldrangen;
635 : int newrow;
636 :
637 : /* Easy case for low chr codes */
638 50096 : if (ch <= MAX_SIMPLE_CHR)
639 : {
640 49963 : color sco = subcolor(cm, ch);
641 :
642 49963 : NOERR();
643 49961 : if (sco != *lastsubcolor)
644 : {
645 42584 : newarc(v->nfa, PLAIN, sco, lp, rp);
646 42584 : *lastsubcolor = sco;
647 : }
648 49961 : return;
649 : }
650 :
651 : /*
652 : * Potentially, we could need two more colormapranges than we have now, if
653 : * the given chr is in the middle of some existing range.
654 : */
655 : newranges = (colormaprange *)
656 133 : MALLOC((cm->numcmranges + 2) * sizeof(colormaprange));
657 133 : if (newranges == NULL)
658 : {
659 0 : CERR(REG_ESPACE);
660 0 : return;
661 : }
662 133 : numnewranges = 0;
663 :
664 : /* Ranges before target are unchanged */
665 133 : for (oldrange = cm->cmranges, oldrangen = 0;
666 7400 : oldrangen < cm->numcmranges;
667 7267 : oldrange++, oldrangen++)
668 : {
669 7276 : if (oldrange->cmax >= ch)
670 9 : break;
671 7267 : newranges[numnewranges++] = *oldrange;
672 : }
673 :
674 : /* Match target chr against current range */
675 133 : if (oldrangen >= cm->numcmranges || oldrange->cmin > ch)
676 : {
677 : /* chr does not belong to any existing range, make a new one */
678 128 : newranges[numnewranges].cmin = ch;
679 128 : newranges[numnewranges].cmax = ch;
680 : /* row state should be cloned from the "all others" row */
681 128 : newranges[numnewranges].rownum = newrow = newhicolorrow(cm, 0);
682 128 : numnewranges++;
683 : }
684 5 : else if (oldrange->cmin == oldrange->cmax)
685 : {
686 : /* we have an existing singleton range matching the chr */
687 1 : newranges[numnewranges++] = *oldrange;
688 1 : newrow = oldrange->rownum;
689 : /* we've now fully processed this old range */
690 1 : oldrange++, oldrangen++;
691 : }
692 : else
693 : {
694 : /* chr is a subset of this existing range, must split it */
695 4 : if (ch > oldrange->cmin)
696 : {
697 : /* emit portion of old range before chr */
698 4 : newranges[numnewranges].cmin = oldrange->cmin;
699 4 : newranges[numnewranges].cmax = ch - 1;
700 4 : newranges[numnewranges].rownum = oldrange->rownum;
701 4 : numnewranges++;
702 : }
703 : /* emit chr as singleton range, initially cloning from range */
704 4 : newranges[numnewranges].cmin = ch;
705 4 : newranges[numnewranges].cmax = ch;
706 4 : newranges[numnewranges].rownum = newrow =
707 4 : newhicolorrow(cm, oldrange->rownum);
708 4 : numnewranges++;
709 4 : if (ch < oldrange->cmax)
710 : {
711 : /* emit portion of old range after chr */
712 4 : newranges[numnewranges].cmin = ch + 1;
713 4 : newranges[numnewranges].cmax = oldrange->cmax;
714 : /* must clone the row if we are making two new ranges from old */
715 4 : newranges[numnewranges].rownum =
716 4 : (ch > oldrange->cmin) ? newhicolorrow(cm, oldrange->rownum) :
717 : oldrange->rownum;
718 4 : numnewranges++;
719 : }
720 : /* we've now fully processed this old range */
721 4 : oldrange++, oldrangen++;
722 : }
723 :
724 : /* Update colors in newrow and create arcs as needed */
725 133 : subcoloronerow(v, newrow, lp, rp, lastsubcolor);
726 :
727 : /* Ranges after target are unchanged */
728 630 : for (; oldrangen < cm->numcmranges; oldrange++, oldrangen++)
729 : {
730 497 : newranges[numnewranges++] = *oldrange;
731 : }
732 :
733 : /* Assert our original space estimate was adequate */
734 : assert(numnewranges <= (cm->numcmranges + 2));
735 :
736 : /* And finally, store back the updated list of ranges */
737 133 : if (cm->cmranges != NULL)
738 128 : FREE(cm->cmranges);
739 133 : cm->cmranges = newranges;
740 133 : cm->numcmranges = numnewranges;
741 : }
742 :
743 : /*
744 : * subcoloronerange - do subcolorcvec's work for a high range
745 : */
746 : static void
747 8 : subcoloronerange(struct vars *v,
748 : chr from,
749 : chr to,
750 : struct state *lp,
751 : struct state *rp,
752 : color *lastsubcolor)
753 : {
754 8 : struct colormap *cm = v->cm;
755 : colormaprange *newranges;
756 : int numnewranges;
757 : colormaprange *oldrange;
758 : int oldrangen;
759 : int newrow;
760 :
761 : /* Caller should take care of non-high-range cases */
762 : assert(from > MAX_SIMPLE_CHR);
763 : assert(from < to);
764 :
765 : /*
766 : * Potentially, if we have N non-adjacent ranges, we could need as many as
767 : * 2N+1 result ranges (consider case where new range spans 'em all).
768 : */
769 : newranges = (colormaprange *)
770 8 : MALLOC((cm->numcmranges * 2 + 1) * sizeof(colormaprange));
771 8 : if (newranges == NULL)
772 : {
773 0 : CERR(REG_ESPACE);
774 0 : return;
775 : }
776 8 : numnewranges = 0;
777 :
778 : /* Ranges before target are unchanged */
779 8 : for (oldrange = cm->cmranges, oldrangen = 0;
780 14 : oldrangen < cm->numcmranges;
781 6 : oldrange++, oldrangen++)
782 : {
783 10 : if (oldrange->cmax >= from)
784 4 : break;
785 6 : newranges[numnewranges++] = *oldrange;
786 : }
787 :
788 : /*
789 : * Deal with ranges that (partially) overlap the target. As we process
790 : * each such range, increase "from" to remove the dealt-with characters
791 : * from the target range.
792 : */
793 11 : while (oldrangen < cm->numcmranges && oldrange->cmin <= to)
794 : {
795 3 : if (from < oldrange->cmin)
796 : {
797 : /* Handle portion of new range that corresponds to no old range */
798 1 : newranges[numnewranges].cmin = from;
799 1 : newranges[numnewranges].cmax = oldrange->cmin - 1;
800 : /* row state should be cloned from the "all others" row */
801 1 : newranges[numnewranges].rownum = newrow = newhicolorrow(cm, 0);
802 1 : numnewranges++;
803 : /* Update colors in newrow and create arcs as needed */
804 1 : subcoloronerow(v, newrow, lp, rp, lastsubcolor);
805 : /* We've now fully processed the part of new range before old */
806 1 : from = oldrange->cmin;
807 : }
808 :
809 3 : if (from <= oldrange->cmin && to >= oldrange->cmax)
810 : {
811 : /* old range is fully contained in new, process it in-place */
812 1 : newranges[numnewranges++] = *oldrange;
813 1 : newrow = oldrange->rownum;
814 1 : from = oldrange->cmax + 1;
815 : }
816 : else
817 : {
818 : /* some part of old range does not overlap new range */
819 2 : if (from > oldrange->cmin)
820 : {
821 : /* emit portion of old range before new range */
822 1 : newranges[numnewranges].cmin = oldrange->cmin;
823 1 : newranges[numnewranges].cmax = from - 1;
824 1 : newranges[numnewranges].rownum = oldrange->rownum;
825 1 : numnewranges++;
826 : }
827 : /* emit common subrange, initially cloning from old range */
828 2 : newranges[numnewranges].cmin = from;
829 2 : newranges[numnewranges].cmax =
830 2 : (to < oldrange->cmax) ? to : oldrange->cmax;
831 2 : newranges[numnewranges].rownum = newrow =
832 2 : newhicolorrow(cm, oldrange->rownum);
833 2 : numnewranges++;
834 2 : if (to < oldrange->cmax)
835 : {
836 : /* emit portion of old range after new range */
837 1 : newranges[numnewranges].cmin = to + 1;
838 1 : newranges[numnewranges].cmax = oldrange->cmax;
839 : /* must clone the row if we are making two new ranges from old */
840 1 : newranges[numnewranges].rownum =
841 1 : (from > oldrange->cmin) ? newhicolorrow(cm, oldrange->rownum) :
842 : oldrange->rownum;
843 1 : numnewranges++;
844 : }
845 2 : from = oldrange->cmax + 1;
846 : }
847 : /* Update colors in newrow and create arcs as needed */
848 3 : subcoloronerow(v, newrow, lp, rp, lastsubcolor);
849 : /* we've now fully processed this old range */
850 3 : oldrange++, oldrangen++;
851 : }
852 :
853 8 : if (from <= to)
854 : {
855 : /* Handle portion of new range that corresponds to no old range */
856 7 : newranges[numnewranges].cmin = from;
857 7 : newranges[numnewranges].cmax = to;
858 : /* row state should be cloned from the "all others" row */
859 7 : newranges[numnewranges].rownum = newrow = newhicolorrow(cm, 0);
860 7 : numnewranges++;
861 : /* Update colors in newrow and create arcs as needed */
862 7 : subcoloronerow(v, newrow, lp, rp, lastsubcolor);
863 : }
864 :
865 : /* Ranges after target are unchanged */
866 135 : for (; oldrangen < cm->numcmranges; oldrange++, oldrangen++)
867 : {
868 127 : newranges[numnewranges++] = *oldrange;
869 : }
870 :
871 : /* Assert our original space estimate was adequate */
872 : assert(numnewranges <= (cm->numcmranges * 2 + 1));
873 :
874 : /* And finally, store back the updated list of ranges */
875 8 : if (cm->cmranges != NULL)
876 5 : FREE(cm->cmranges);
877 8 : cm->cmranges = newranges;
878 8 : cm->numcmranges = numnewranges;
879 : }
880 :
881 : /*
882 : * subcoloronerow - do subcolorcvec's work for one new row in the high colormap
883 : */
884 : static void
885 144 : subcoloronerow(struct vars *v,
886 : int rownum,
887 : struct state *lp,
888 : struct state *rp,
889 : color *lastsubcolor)
890 : {
891 144 : struct colormap *cm = v->cm;
892 : color *pco;
893 : int i;
894 :
895 : /* Apply subcolorhi() and make arc for each entry in row */
896 144 : pco = &cm->hicolormap[rownum * cm->hiarraycols];
897 678 : for (i = 0; i < cm->hiarraycols; pco++, i++)
898 : {
899 534 : color sco = subcolorhi(cm, pco);
900 :
901 534 : NOERR();
902 : /* make the arc if needed */
903 534 : if (sco != *lastsubcolor)
904 : {
905 534 : newarc(v->nfa, PLAIN, sco, lp, rp);
906 534 : NOERR();
907 534 : *lastsubcolor = sco;
908 : }
909 : }
910 : }
911 :
912 : /*
913 : * okcolors - promote subcolors to full colors
914 : */
915 : static void
916 43041 : okcolors(struct nfa *nfa,
917 : struct colormap *cm)
918 : {
919 : struct colordesc *cd;
920 43041 : struct colordesc *end = CDEND(cm);
921 : struct colordesc *scd;
922 : struct arc *a;
923 : color co;
924 : color sco;
925 :
926 341875 : for (cd = cm->cd, co = 0; cd < end; cd++, co++)
927 : {
928 298834 : sco = cd->sub;
929 298834 : if (UNUSEDCOLOR(cd) || sco == NOSUB)
930 : {
931 : /* has no subcolor, no further action */
932 : }
933 24693 : else if (sco == co)
934 : {
935 : /* is subcolor, let parent deal with it */
936 : }
937 24628 : else if (cd->nschrs == 0 && cd->nuchrs == 0)
938 : {
939 : /*
940 : * Parent is now empty, so just change all its arcs to the
941 : * subcolor, then free the parent.
942 : *
943 : * It is not obvious that simply relabeling the arcs like this is
944 : * OK; it appears to risk creating duplicate arcs. We are
945 : * basically relying on the assumption that processing of a
946 : * bracket expression can't create arcs of both a color and its
947 : * subcolor between the bracket's endpoints.
948 : */
949 226 : cd->sub = NOSUB;
950 226 : scd = &cm->cd[sco];
951 : assert(scd->nschrs > 0 || scd->nuchrs > 0);
952 : assert(scd->sub == sco);
953 226 : scd->sub = NOSUB;
954 1474 : while ((a = cd->arcs) != NULL)
955 : {
956 : assert(a->co == co);
957 1248 : uncolorchain(cm, a);
958 1248 : a->co = sco;
959 1248 : colorchain(cm, a);
960 : }
961 226 : freecolor(cm, co);
962 : }
963 : else
964 : {
965 : /* parent's arcs must gain parallel subcolor arcs */
966 24402 : cd->sub = NOSUB;
967 24402 : scd = &cm->cd[sco];
968 : assert(scd->nschrs > 0 || scd->nuchrs > 0);
969 : assert(scd->sub == sco);
970 24402 : scd->sub = NOSUB;
971 24926 : for (a = cd->arcs; a != NULL; a = a->colorchain)
972 : {
973 : assert(a->co == co);
974 524 : newarc(nfa, a->type, sco, a->from, a->to);
975 : }
976 : }
977 : }
978 43041 : }
979 :
980 : /*
981 : * colorchain - add this arc to the color chain of its color
982 : */
983 : static void
984 804557 : colorchain(struct colormap *cm,
985 : struct arc *a)
986 : {
987 804557 : struct colordesc *cd = &cm->cd[a->co];
988 :
989 : assert(a->co >= 0);
990 804557 : if (cd->arcs != NULL)
991 768009 : cd->arcs->colorchainRev = a;
992 804557 : a->colorchain = cd->arcs;
993 804557 : a->colorchainRev = NULL;
994 804557 : cd->arcs = a;
995 804557 : }
996 :
997 : /*
998 : * uncolorchain - delete this arc from the color chain of its color
999 : */
1000 : static void
1001 372671 : uncolorchain(struct colormap *cm,
1002 : struct arc *a)
1003 : {
1004 372671 : struct colordesc *cd = &cm->cd[a->co];
1005 372671 : struct arc *aa = a->colorchainRev;
1006 :
1007 : assert(a->co >= 0);
1008 372671 : if (aa == NULL)
1009 : {
1010 : assert(cd->arcs == a);
1011 91084 : cd->arcs = a->colorchain;
1012 : }
1013 : else
1014 : {
1015 : assert(aa->colorchain == a);
1016 281587 : aa->colorchain = a->colorchain;
1017 : }
1018 372671 : if (a->colorchain != NULL)
1019 343498 : a->colorchain->colorchainRev = aa;
1020 372671 : a->colorchain = NULL; /* paranoia */
1021 372671 : a->colorchainRev = NULL;
1022 372671 : }
1023 :
1024 : /*
1025 : * rainbow - add arcs of all full colors (but one) between specified states
1026 : *
1027 : * If there isn't an exception color, we now generate just a single arc
1028 : * labeled RAINBOW, saving lots of arc-munging later on.
1029 : */
1030 : static void
1031 23567 : rainbow(struct nfa *nfa,
1032 : struct colormap *cm,
1033 : int type,
1034 : color but, /* COLORLESS if no exceptions */
1035 : struct state *from,
1036 : struct state *to)
1037 : {
1038 : struct colordesc *cd;
1039 23567 : struct colordesc *end = CDEND(cm);
1040 : color co;
1041 :
1042 23567 : if (but == COLORLESS)
1043 : {
1044 23305 : newarc(nfa, type, RAINBOW, from, to);
1045 23305 : return;
1046 : }
1047 :
1048 : /* Gotta do it the hard way. Skip subcolors, pseudocolors, and "but" */
1049 1975 : for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++)
1050 1713 : if (!UNUSEDCOLOR(cd) && cd->sub != co && co != but &&
1051 1451 : !(cd->flags & PSEUDO))
1052 1451 : newarc(nfa, type, co, from, to);
1053 : }
1054 :
1055 : /*
1056 : * colorcomplement - add arcs of complementary colors
1057 : *
1058 : * We add arcs of all colors that are not pseudocolors and do not match
1059 : * any of the "of" state's PLAIN outarcs.
1060 : *
1061 : * The calling sequence ought to be reconciled with cloneouts().
1062 : */
1063 : static void
1064 319 : colorcomplement(struct nfa *nfa,
1065 : struct colormap *cm,
1066 : int type,
1067 : struct state *of,
1068 : struct state *from,
1069 : struct state *to)
1070 : {
1071 : struct colordesc *cd;
1072 319 : struct colordesc *end = CDEND(cm);
1073 : color co;
1074 : struct arc *a;
1075 :
1076 : assert(of != from);
1077 :
1078 : /*
1079 : * A RAINBOW arc matches all colors, making the complement empty. But we
1080 : * can't just return without making any arcs, because that would leave the
1081 : * NFA disconnected which would break any future delsub(). Instead, make
1082 : * a CANTMATCH arc. Also set the HASCANTMATCH flag so we know we need to
1083 : * clean that up at the start of NFA optimization.
1084 : */
1085 319 : if (findarc(of, PLAIN, RAINBOW) != NULL)
1086 : {
1087 3 : newarc(nfa, CANTMATCH, 0, from, to);
1088 3 : nfa->flags |= HASCANTMATCH;
1089 3 : return;
1090 : }
1091 :
1092 : /* Otherwise, transiently mark the colors that appear in of's out-arcs */
1093 959 : for (a = of->outs; a != NULL; a = a->outchain)
1094 : {
1095 643 : if (a->type == PLAIN)
1096 : {
1097 : assert(a->co >= 0);
1098 643 : cd = &cm->cd[a->co];
1099 : assert(!UNUSEDCOLOR(cd));
1100 643 : cd->flags |= COLMARK;
1101 : }
1102 :
1103 : /*
1104 : * There's no syntax for re-complementing a color set, so we cannot
1105 : * see CANTMATCH arcs here.
1106 : */
1107 : assert(a->type != CANTMATCH);
1108 : }
1109 :
1110 : /* Scan colors, clear transient marks, add arcs for unmarked colors */
1111 1554 : for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++)
1112 : {
1113 1238 : if (cd->flags & COLMARK)
1114 643 : cd->flags &= ~COLMARK;
1115 595 : else if (!UNUSEDCOLOR(cd) && !(cd->flags & PSEUDO))
1116 580 : newarc(nfa, type, co, from, to);
1117 : }
1118 : }
1119 :
1120 :
1121 : #ifdef REG_DEBUG
1122 :
1123 : /*
1124 : * dumpcolors - debugging output
1125 : */
1126 : static void
1127 : dumpcolors(struct colormap *cm,
1128 : FILE *f)
1129 : {
1130 : struct colordesc *cd;
1131 : struct colordesc *end;
1132 : color co;
1133 : chr c;
1134 :
1135 : fprintf(f, "max %ld\n", (long) cm->max);
1136 : end = CDEND(cm);
1137 : for (cd = cm->cd + 1, co = 1; cd < end; cd++, co++) /* skip 0 */
1138 : {
1139 : if (!UNUSEDCOLOR(cd))
1140 : {
1141 : assert(cd->nschrs > 0 || cd->nuchrs > 0);
1142 : if (cd->flags & PSEUDO)
1143 : fprintf(f, "#%2ld(ps): ", (long) co);
1144 : else
1145 : fprintf(f, "#%2ld(%2d): ", (long) co, cd->nschrs + cd->nuchrs);
1146 :
1147 : /*
1148 : * Unfortunately, it's hard to do this next bit more efficiently.
1149 : */
1150 : for (c = CHR_MIN; c <= MAX_SIMPLE_CHR; c++)
1151 : if (GETCOLOR(cm, c) == co)
1152 : dumpchr(c, f);
1153 : fprintf(f, "\n");
1154 : }
1155 : }
1156 : /* dump the high colormap if it contains anything interesting */
1157 : if (cm->hiarrayrows > 1 || cm->hiarraycols > 1)
1158 : {
1159 : int r,
1160 : c;
1161 : const color *rowptr;
1162 :
1163 : fprintf(f, "other:\t");
1164 : for (c = 0; c < cm->hiarraycols; c++)
1165 : {
1166 : fprintf(f, "\t%ld", (long) cm->hicolormap[c]);
1167 : }
1168 : fprintf(f, "\n");
1169 : for (r = 0; r < cm->numcmranges; r++)
1170 : {
1171 : dumpchr(cm->cmranges[r].cmin, f);
1172 : fprintf(f, "..");
1173 : dumpchr(cm->cmranges[r].cmax, f);
1174 : fprintf(f, ":");
1175 : rowptr = &cm->hicolormap[cm->cmranges[r].rownum * cm->hiarraycols];
1176 : for (c = 0; c < cm->hiarraycols; c++)
1177 : {
1178 : fprintf(f, "\t%ld", (long) rowptr[c]);
1179 : }
1180 : fprintf(f, "\n");
1181 : }
1182 : }
1183 : }
1184 :
1185 : /*
1186 : * dumpchr - print a chr
1187 : *
1188 : * Kind of char-centric but works well enough for debug use.
1189 : */
1190 : static void
1191 : dumpchr(chr c,
1192 : FILE *f)
1193 : {
1194 : if (c == '\\')
1195 : fprintf(f, "\\\\");
1196 : else if (c > ' ' && c <= '~')
1197 : putc((char) c, f);
1198 : else
1199 : fprintf(f, "\\u%04lx", (long) c);
1200 : }
1201 :
1202 : #endif /* REG_DEBUG */
|