Line data Source code
1 : /*
2 : * colorings of characters
3 : * This file is #included by regcomp.c.
4 : *
5 : * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
6 : *
7 : * Development of this software was funded, in part, by Cray Research Inc.,
8 : * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
9 : * Corporation, none of whom are responsible for the results. The author
10 : * thanks all of them.
11 : *
12 : * Redistribution and use in source and binary forms -- with or without
13 : * modification -- are permitted for any purpose, provided that
14 : * redistributions in source form retain this entire copyright notice and
15 : * indicate the origin and nature of any modifications.
16 : *
17 : * I'd appreciate being given credit for this package in the documentation
18 : * of software which uses it, but that is not a requirement.
19 : *
20 : * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
21 : * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
22 : * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23 : * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 : * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 : * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 : * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 : * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 : * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 : * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 : *
31 : * src/backend/regex/regc_color.c
32 : *
33 : *
34 : * Note that there are some incestuous relationships between this code and
35 : * NFA arc maintenance, which perhaps ought to be cleaned up sometime.
36 : */
37 :
38 :
39 :
40 : #define CISERR() VISERR(cm->v)
41 : #define CERR(e) VERR(cm->v, (e))
42 :
43 :
44 :
45 : /*
46 : * initcm - set up new colormap
47 : */
48 : static void
49 5144 : initcm(struct vars *v,
50 : struct colormap *cm)
51 : {
52 : struct colordesc *cd;
53 :
54 5144 : cm->magic = CMMAGIC;
55 5144 : cm->v = v;
56 :
57 5144 : cm->ncds = NINLINECDS;
58 5144 : cm->cd = cm->cdspace;
59 5144 : cm->max = 0;
60 5144 : cm->free = 0;
61 :
62 5144 : cd = cm->cd; /* cm->cd[WHITE] */
63 5144 : cd->nschrs = MAX_SIMPLE_CHR - CHR_MIN + 1;
64 5144 : cd->nuchrs = 1;
65 5144 : cd->sub = NOSUB;
66 5144 : cd->arcs = NULL;
67 5144 : cd->firstchr = CHR_MIN;
68 5144 : cd->flags = 0;
69 :
70 5144 : cm->locolormap = (color *)
71 5144 : MALLOC((MAX_SIMPLE_CHR - CHR_MIN + 1) * sizeof(color));
72 5144 : if (cm->locolormap == NULL)
73 : {
74 0 : CERR(REG_ESPACE);
75 0 : cm->cmranges = NULL; /* prevent failure during freecm */
76 0 : cm->hicolormap = NULL;
77 0 : return;
78 : }
79 : /* this memset relies on WHITE being zero: */
80 5144 : memset(cm->locolormap, WHITE,
81 : (MAX_SIMPLE_CHR - CHR_MIN + 1) * sizeof(color));
82 :
83 5144 : memset(cm->classbits, 0, sizeof(cm->classbits));
84 5144 : cm->numcmranges = 0;
85 5144 : cm->cmranges = NULL;
86 5144 : cm->maxarrayrows = 4; /* arbitrary initial allocation */
87 5144 : cm->hiarrayrows = 1; /* but we have only one row/col initially */
88 5144 : cm->hiarraycols = 1;
89 5144 : cm->hicolormap = (color *) MALLOC(cm->maxarrayrows * sizeof(color));
90 5144 : if (cm->hicolormap == NULL)
91 : {
92 0 : CERR(REG_ESPACE);
93 0 : return;
94 : }
95 : /* initialize the "all other characters" row to WHITE */
96 5144 : cm->hicolormap[0] = WHITE;
97 : }
98 :
99 : /*
100 : * freecm - free dynamically-allocated things in a colormap
101 : */
102 : static void
103 799 : freecm(struct colormap *cm)
104 : {
105 799 : cm->magic = 0;
106 799 : if (cm->cd != cm->cdspace)
107 71 : FREE(cm->cd);
108 799 : if (cm->locolormap != NULL)
109 799 : FREE(cm->locolormap);
110 799 : if (cm->cmranges != NULL)
111 8 : FREE(cm->cmranges);
112 799 : if (cm->hicolormap != NULL)
113 799 : FREE(cm->hicolormap);
114 799 : }
115 :
116 : /*
117 : * pg_reg_getcolor - slow case of GETCOLOR()
118 : */
119 : color
120 65 : pg_reg_getcolor(struct colormap *cm, chr c)
121 : {
122 : int rownum,
123 : colnum,
124 : low,
125 : high;
126 :
127 : /* Should not be used for chrs in the locolormap */
128 : assert(c > MAX_SIMPLE_CHR);
129 :
130 : /*
131 : * Find which row it's in. The colormapranges are in order, so we can use
132 : * binary search.
133 : */
134 65 : rownum = 0; /* if no match, use array row zero */
135 65 : low = 0;
136 65 : high = cm->numcmranges;
137 98 : while (low < high)
138 : {
139 56 : int middle = low + (high - low) / 2;
140 56 : const colormaprange *cmr = &cm->cmranges[middle];
141 :
142 56 : if (c < cmr->cmin)
143 28 : high = middle;
144 28 : else if (c > cmr->cmax)
145 5 : low = middle + 1;
146 : else
147 : {
148 23 : rownum = cmr->rownum; /* found a match */
149 23 : break;
150 : }
151 : }
152 :
153 : /*
154 : * Find which column it's in --- this is all locale-dependent.
155 : */
156 65 : if (cm->hiarraycols > 1)
157 : {
158 42 : colnum = cclass_column_index(cm, c);
159 42 : return cm->hicolormap[rownum * cm->hiarraycols + colnum];
160 : }
161 : else
162 : {
163 : /* fast path if no relevant cclasses */
164 23 : return cm->hicolormap[rownum];
165 : }
166 : }
167 :
168 : /*
169 : * maxcolor - report largest color number in use
170 : */
171 : static color
172 12301 : maxcolor(struct colormap *cm)
173 : {
174 12301 : if (CISERR())
175 0 : return COLORLESS;
176 :
177 12301 : return (color) cm->max;
178 : }
179 :
180 : /*
181 : * newcolor - find a new color (must be assigned at once)
182 : * Beware: may relocate the colordescs.
183 : */
184 : static color /* COLORLESS for error */
185 51290 : newcolor(struct colormap *cm)
186 : {
187 : struct colordesc *cd;
188 : size_t n;
189 :
190 51290 : if (CISERR())
191 2 : return COLORLESS;
192 :
193 51288 : if (cm->free != 0)
194 : {
195 : assert(cm->free > 0);
196 : assert((size_t) cm->free < cm->ncds);
197 202 : cd = &cm->cd[cm->free];
198 : assert(UNUSEDCOLOR(cd));
199 : assert(cd->arcs == NULL);
200 202 : cm->free = cd->sub;
201 : }
202 51086 : else if (cm->max < cm->ncds - 1)
203 : {
204 48249 : cm->max++;
205 48249 : cd = &cm->cd[cm->max];
206 : }
207 : else
208 : {
209 : /* oops, must allocate more */
210 : struct colordesc *newCd;
211 :
212 2837 : if (cm->max == MAX_COLOR)
213 : {
214 0 : CERR(REG_ECOLORS);
215 0 : return COLORLESS; /* too many colors */
216 : }
217 :
218 2837 : n = cm->ncds * 2;
219 2837 : if (n > MAX_COLOR + 1)
220 0 : n = MAX_COLOR + 1;
221 : /* the MAX_COLOR+1 limit ensures these alloc sizes can't overflow: */
222 2837 : if (cm->cd == cm->cdspace)
223 : {
224 2765 : newCd = (struct colordesc *) MALLOC(n * sizeof(struct colordesc));
225 2765 : if (newCd != NULL)
226 2765 : memcpy(VS(newCd), VS(cm->cdspace), cm->ncds *
227 : sizeof(struct colordesc));
228 : }
229 : else
230 : newCd = (struct colordesc *)
231 72 : REALLOC(cm->cd, n * sizeof(struct colordesc));
232 2837 : if (newCd == NULL)
233 : {
234 0 : CERR(REG_ESPACE);
235 0 : return COLORLESS;
236 : }
237 2837 : cm->cd = newCd;
238 2837 : cm->ncds = n;
239 : assert(cm->max < cm->ncds - 1);
240 2837 : cm->max++;
241 2837 : cd = &cm->cd[cm->max];
242 : }
243 :
244 51288 : cd->nschrs = 0;
245 51288 : cd->nuchrs = 0;
246 51288 : cd->sub = NOSUB;
247 51288 : cd->arcs = NULL;
248 51288 : cd->firstchr = CHR_MIN; /* in case never set otherwise */
249 51288 : cd->flags = 0;
250 :
251 51288 : return (color) (cd - cm->cd);
252 : }
253 :
254 : /*
255 : * freecolor - free a color (must have no arcs or subcolor)
256 : */
257 : static void
258 253 : freecolor(struct colormap *cm,
259 : color co)
260 : {
261 253 : struct colordesc *cd = &cm->cd[co];
262 : color pco,
263 : nco; /* for freelist scan */
264 :
265 : assert(co >= 0);
266 253 : if (co == WHITE)
267 0 : return;
268 :
269 : assert(cd->arcs == NULL);
270 : assert(cd->sub == NOSUB);
271 : assert(cd->nschrs == 0);
272 : assert(cd->nuchrs == 0);
273 253 : cd->flags = FREECOL;
274 :
275 253 : if ((size_t) co == cm->max)
276 : {
277 100 : while (cm->max > WHITE && UNUSEDCOLOR(&cm->cd[cm->max]))
278 51 : cm->max--;
279 : assert(cm->free >= 0);
280 51 : while ((size_t) cm->free > cm->max)
281 2 : cm->free = cm->cd[cm->free].sub;
282 49 : if (cm->free > 0)
283 : {
284 : assert(cm->free < cm->max);
285 4 : pco = cm->free;
286 4 : nco = cm->cd[pco].sub;
287 4 : while (nco > 0)
288 0 : if ((size_t) nco > cm->max)
289 : {
290 : /* take this one out of freelist */
291 0 : nco = cm->cd[nco].sub;
292 0 : cm->cd[pco].sub = nco;
293 : }
294 : else
295 : {
296 : assert(nco < cm->max);
297 0 : pco = nco;
298 0 : nco = cm->cd[pco].sub;
299 : }
300 : }
301 : }
302 : else
303 : {
304 204 : cd->sub = cm->free;
305 204 : cm->free = (color) (cd - cm->cd);
306 : }
307 : }
308 :
309 : /*
310 : * pseudocolor - allocate a false color, to be managed by other means
311 : */
312 : static color
313 20076 : pseudocolor(struct colormap *cm)
314 : {
315 : color co;
316 : struct colordesc *cd;
317 :
318 20076 : co = newcolor(cm);
319 20076 : if (CISERR())
320 0 : return COLORLESS;
321 20076 : cd = &cm->cd[co];
322 20076 : cd->nschrs = 0;
323 20076 : cd->nuchrs = 1; /* pretend it is in the upper map */
324 20076 : cd->sub = NOSUB;
325 20076 : cd->arcs = NULL;
326 20076 : cd->firstchr = CHR_MIN;
327 20076 : cd->flags = PSEUDO;
328 20076 : return co;
329 : }
330 :
331 : /*
332 : * subcolor - allocate a new subcolor (if necessary) to this chr
333 : *
334 : * This works only for chrs that map into the low color map.
335 : */
336 : static color
337 381738 : subcolor(struct colormap *cm, chr c)
338 : {
339 : color co; /* current color of c */
340 : color sco; /* new subcolor */
341 :
342 : assert(c <= MAX_SIMPLE_CHR);
343 :
344 381738 : co = cm->locolormap[c - CHR_MIN];
345 381738 : sco = newsub(cm, co);
346 381738 : if (CISERR())
347 2 : return COLORLESS;
348 : assert(sco != COLORLESS);
349 :
350 381736 : if (co == sco) /* already in an open subcolor */
351 25340 : return co; /* rest is redundant */
352 356396 : cm->cd[co].nschrs--;
353 356396 : if (cm->cd[sco].nschrs == 0)
354 31160 : cm->cd[sco].firstchr = c;
355 356396 : cm->cd[sco].nschrs++;
356 356396 : cm->locolormap[c - CHR_MIN] = sco;
357 356396 : return sco;
358 : }
359 :
360 : /*
361 : * subcolorhi - allocate a new subcolor (if necessary) to this colormap entry
362 : *
363 : * This is the same processing as subcolor(), but for entries in the high
364 : * colormap, which do not necessarily correspond to exactly one chr code.
365 : */
366 : static color
367 1047 : subcolorhi(struct colormap *cm, color *pco)
368 : {
369 : color co; /* current color of entry */
370 : color sco; /* new subcolor */
371 :
372 1047 : co = *pco;
373 1047 : sco = newsub(cm, co);
374 1047 : if (CISERR())
375 0 : return COLORLESS;
376 : assert(sco != COLORLESS);
377 :
378 1047 : if (co == sco) /* already in an open subcolor */
379 10 : return co; /* rest is redundant */
380 1037 : cm->cd[co].nuchrs--;
381 1037 : cm->cd[sco].nuchrs++;
382 1037 : *pco = sco;
383 1037 : return sco;
384 : }
385 :
386 : /*
387 : * newsub - allocate a new subcolor (if necessary) for a color
388 : */
389 : static color
390 382785 : newsub(struct colormap *cm,
391 : color co)
392 : {
393 : color sco; /* new subcolor */
394 :
395 382785 : sco = cm->cd[co].sub;
396 382785 : if (sco == NOSUB)
397 : { /* color has no open subcolor */
398 : /* optimization: singly-referenced color need not be subcolored */
399 56549 : if ((cm->cd[co].nschrs + cm->cd[co].nuchrs) == 1)
400 25335 : return co;
401 31214 : sco = newcolor(cm); /* must create subcolor */
402 31214 : if (sco == COLORLESS)
403 : {
404 : assert(CISERR());
405 2 : return COLORLESS;
406 : }
407 31212 : cm->cd[co].sub = sco;
408 31212 : cm->cd[sco].sub = sco; /* open subcolor points to self */
409 : }
410 : assert(sco != NOSUB);
411 :
412 357448 : return sco;
413 : }
414 :
415 : /*
416 : * newhicolorrow - get a new row in the hicolormap, cloning it from oldrow
417 : *
418 : * Returns array index of new row. Note the array might move.
419 : */
420 : static int
421 146 : newhicolorrow(struct colormap *cm,
422 : int oldrow)
423 : {
424 146 : int newrow = cm->hiarrayrows;
425 : color *newrowptr;
426 : int i;
427 :
428 : /* Assign a fresh array row index, enlarging storage if needed */
429 146 : if (newrow >= cm->maxarrayrows)
430 : {
431 : color *newarray;
432 :
433 7 : if (cm->maxarrayrows >= INT_MAX / (cm->hiarraycols * 2))
434 : {
435 0 : CERR(REG_ESPACE);
436 0 : return 0;
437 : }
438 7 : newarray = REALLOC_ARRAY(cm->hicolormap, color,
439 : cm->maxarrayrows * 2 * cm->hiarraycols);
440 7 : if (newarray == NULL)
441 : {
442 0 : CERR(REG_ESPACE);
443 0 : return 0;
444 : }
445 7 : cm->hicolormap = newarray;
446 7 : cm->maxarrayrows *= 2;
447 : }
448 146 : cm->hiarrayrows++;
449 :
450 : /* Copy old row data */
451 146 : newrowptr = &cm->hicolormap[newrow * cm->hiarraycols];
452 146 : memcpy(newrowptr,
453 146 : &cm->hicolormap[oldrow * cm->hiarraycols],
454 146 : cm->hiarraycols * sizeof(color));
455 :
456 : /* Increase color reference counts to reflect new colormap entries */
457 691 : for (i = 0; i < cm->hiarraycols; i++)
458 545 : cm->cd[newrowptr[i]].nuchrs++;
459 :
460 146 : return newrow;
461 : }
462 :
463 : /*
464 : * newhicolorcols - create a new set of columns in the high colormap
465 : *
466 : * Essentially, extends the 2-D array to the right with a copy of itself.
467 : */
468 : static void
469 431 : newhicolorcols(struct colormap *cm)
470 : {
471 : color *newarray;
472 : int r,
473 : c;
474 :
475 431 : if (cm->hiarraycols >= INT_MAX / (cm->maxarrayrows * 2))
476 : {
477 0 : CERR(REG_ESPACE);
478 0 : return;
479 : }
480 431 : newarray = REALLOC_ARRAY(cm->hicolormap, color,
481 : cm->maxarrayrows * cm->hiarraycols * 2);
482 431 : if (newarray == NULL)
483 : {
484 0 : CERR(REG_ESPACE);
485 0 : return;
486 : }
487 431 : cm->hicolormap = newarray;
488 :
489 : /* Duplicate existing columns to the right, and increase ref counts */
490 : /* Must work backwards in the array because we realloc'd in place */
491 862 : for (r = cm->hiarrayrows - 1; r >= 0; r--)
492 : {
493 431 : color *oldrowptr = &newarray[r * cm->hiarraycols];
494 431 : color *newrowptr = &newarray[r * cm->hiarraycols * 2];
495 431 : color *newrowptr2 = newrowptr + cm->hiarraycols;
496 :
497 880 : for (c = 0; c < cm->hiarraycols; c++)
498 : {
499 449 : color co = oldrowptr[c];
500 :
501 449 : newrowptr[c] = newrowptr2[c] = co;
502 449 : cm->cd[co].nuchrs++;
503 : }
504 : }
505 :
506 431 : cm->hiarraycols *= 2;
507 : }
508 :
509 : /*
510 : * subcolorcvec - allocate new subcolors to cvec members, fill in arcs
511 : *
512 : * For each chr "c" represented by the cvec, do the equivalent of
513 : * newarc(v->nfa, PLAIN, subcolor(v->cm, c), lp, rp);
514 : *
515 : * Note that in typical cases, many of the subcolors are the same.
516 : * While newarc() would discard duplicate arc requests, we can save
517 : * some cycles by not calling it repetitively to begin with. This is
518 : * mechanized with the "lastsubcolor" state variable.
519 : */
520 : static void
521 1904 : subcolorcvec(struct vars *v,
522 : struct cvec *cv,
523 : struct state *lp,
524 : struct state *rp)
525 : {
526 1904 : struct colormap *cm = v->cm;
527 1904 : color lastsubcolor = COLORLESS;
528 : chr ch,
529 : from,
530 : to;
531 : const chr *p;
532 : int i;
533 :
534 : /* ordinary characters */
535 12282 : for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--)
536 : {
537 10378 : ch = *p;
538 10378 : subcoloronechr(v, ch, lp, rp, &lastsubcolor);
539 10378 : NOERR();
540 : }
541 :
542 : /* and the ranges */
543 9807 : for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--)
544 : {
545 7903 : from = *p;
546 7903 : to = *(p + 1);
547 7903 : if (from <= MAX_SIMPLE_CHR)
548 : {
549 : /* deal with simple chars one at a time */
550 7895 : chr lim = (to <= MAX_SIMPLE_CHR) ? to : MAX_SIMPLE_CHR;
551 :
552 325873 : while (from <= lim)
553 : {
554 317978 : color sco = subcolor(cm, from);
555 :
556 317978 : NOERR();
557 317978 : if (sco != lastsubcolor)
558 : {
559 2189 : newarc(v->nfa, PLAIN, sco, lp, rp);
560 2189 : NOERR();
561 2189 : lastsubcolor = sco;
562 : }
563 317978 : from++;
564 : }
565 : }
566 : /* deal with any part of the range that's above MAX_SIMPLE_CHR */
567 7903 : if (from < to)
568 8 : subcoloronerange(v, from, to, lp, rp, &lastsubcolor);
569 7895 : else if (from == to)
570 0 : subcoloronechr(v, from, lp, rp, &lastsubcolor);
571 7903 : NOERR();
572 : }
573 :
574 : /* and deal with cclass if any */
575 1904 : if (cv->cclasscode >= 0)
576 : {
577 : int classbit;
578 : color *pco;
579 : int r,
580 : c;
581 :
582 : /* Enlarge array if we don't have a column bit assignment for cclass */
583 495 : if (cm->classbits[cv->cclasscode] == 0)
584 : {
585 431 : cm->classbits[cv->cclasscode] = cm->hiarraycols;
586 431 : newhicolorcols(cm);
587 431 : NOERR();
588 : }
589 : /* Apply subcolorhi() and make arc for each entry in relevant cols */
590 495 : classbit = cm->classbits[cv->cclasscode];
591 495 : pco = cm->hicolormap;
592 990 : for (r = 0; r < cm->hiarrayrows; r++)
593 : {
594 1521 : for (c = 0; c < cm->hiarraycols; c++)
595 : {
596 1026 : if (c & classbit)
597 : {
598 513 : color sco = subcolorhi(cm, pco);
599 :
600 513 : NOERR();
601 : /* add the arc if needed */
602 513 : if (sco != lastsubcolor)
603 : {
604 46 : newarc(v->nfa, PLAIN, sco, lp, rp);
605 46 : NOERR();
606 46 : lastsubcolor = sco;
607 : }
608 : }
609 1026 : pco++;
610 : }
611 : }
612 : }
613 : }
614 :
615 : /*
616 : * subcoloronechr - do subcolorcvec's work for a singleton chr
617 : *
618 : * We could just let subcoloronerange do this, but it's a bit more efficient
619 : * if we exploit the single-chr case. Also, callers find it useful for this
620 : * to be able to handle both low and high chr codes.
621 : */
622 : static void
623 63513 : subcoloronechr(struct vars *v,
624 : chr ch,
625 : struct state *lp,
626 : struct state *rp,
627 : color *lastsubcolor)
628 : {
629 63513 : struct colormap *cm = v->cm;
630 : colormaprange *newranges;
631 : int numnewranges;
632 : colormaprange *oldrange;
633 : int oldrangen;
634 : int newrow;
635 :
636 : /* Easy case for low chr codes */
637 63513 : if (ch <= MAX_SIMPLE_CHR)
638 : {
639 63380 : color sco = subcolor(cm, ch);
640 :
641 63380 : NOERR();
642 63378 : if (sco != *lastsubcolor)
643 : {
644 54513 : newarc(v->nfa, PLAIN, sco, lp, rp);
645 54513 : *lastsubcolor = sco;
646 : }
647 63378 : return;
648 : }
649 :
650 : /*
651 : * Potentially, we could need two more colormapranges than we have now, if
652 : * the given chr is in the middle of some existing range.
653 : */
654 133 : newranges = MALLOC_ARRAY(colormaprange, cm->numcmranges + 2);
655 133 : if (newranges == NULL)
656 : {
657 0 : CERR(REG_ESPACE);
658 0 : return;
659 : }
660 133 : numnewranges = 0;
661 :
662 : /* Ranges before target are unchanged */
663 133 : for (oldrange = cm->cmranges, oldrangen = 0;
664 7400 : oldrangen < cm->numcmranges;
665 7267 : oldrange++, oldrangen++)
666 : {
667 7276 : if (oldrange->cmax >= ch)
668 9 : break;
669 7267 : newranges[numnewranges++] = *oldrange;
670 : }
671 :
672 : /* Match target chr against current range */
673 133 : if (oldrangen >= cm->numcmranges || oldrange->cmin > ch)
674 : {
675 : /* chr does not belong to any existing range, make a new one */
676 128 : newranges[numnewranges].cmin = ch;
677 128 : newranges[numnewranges].cmax = ch;
678 : /* row state should be cloned from the "all others" row */
679 128 : newranges[numnewranges].rownum = newrow = newhicolorrow(cm, 0);
680 128 : numnewranges++;
681 : }
682 5 : else if (oldrange->cmin == oldrange->cmax)
683 : {
684 : /* we have an existing singleton range matching the chr */
685 1 : newranges[numnewranges++] = *oldrange;
686 1 : newrow = oldrange->rownum;
687 : /* we've now fully processed this old range */
688 1 : oldrange++, oldrangen++;
689 : }
690 : else
691 : {
692 : /* chr is a subset of this existing range, must split it */
693 4 : if (ch > oldrange->cmin)
694 : {
695 : /* emit portion of old range before chr */
696 4 : newranges[numnewranges].cmin = oldrange->cmin;
697 4 : newranges[numnewranges].cmax = ch - 1;
698 4 : newranges[numnewranges].rownum = oldrange->rownum;
699 4 : numnewranges++;
700 : }
701 : /* emit chr as singleton range, initially cloning from range */
702 4 : newranges[numnewranges].cmin = ch;
703 4 : newranges[numnewranges].cmax = ch;
704 4 : newranges[numnewranges].rownum = newrow =
705 4 : newhicolorrow(cm, oldrange->rownum);
706 4 : numnewranges++;
707 4 : if (ch < oldrange->cmax)
708 : {
709 : /* emit portion of old range after chr */
710 4 : newranges[numnewranges].cmin = ch + 1;
711 4 : newranges[numnewranges].cmax = oldrange->cmax;
712 : /* must clone the row if we are making two new ranges from old */
713 4 : newranges[numnewranges].rownum =
714 4 : (ch > oldrange->cmin) ? newhicolorrow(cm, oldrange->rownum) :
715 : oldrange->rownum;
716 4 : numnewranges++;
717 : }
718 : /* we've now fully processed this old range */
719 4 : oldrange++, oldrangen++;
720 : }
721 :
722 : /* Update colors in newrow and create arcs as needed */
723 133 : subcoloronerow(v, newrow, lp, rp, lastsubcolor);
724 :
725 : /* Ranges after target are unchanged */
726 630 : for (; oldrangen < cm->numcmranges; oldrange++, oldrangen++)
727 : {
728 497 : newranges[numnewranges++] = *oldrange;
729 : }
730 :
731 : /* Assert our original space estimate was adequate */
732 : assert(numnewranges <= (cm->numcmranges + 2));
733 :
734 : /* And finally, store back the updated list of ranges */
735 133 : if (cm->cmranges != NULL)
736 128 : FREE(cm->cmranges);
737 133 : cm->cmranges = newranges;
738 133 : cm->numcmranges = numnewranges;
739 : }
740 :
741 : /*
742 : * subcoloronerange - do subcolorcvec's work for a high range
743 : */
744 : static void
745 8 : subcoloronerange(struct vars *v,
746 : chr from,
747 : chr to,
748 : struct state *lp,
749 : struct state *rp,
750 : color *lastsubcolor)
751 : {
752 8 : struct colormap *cm = v->cm;
753 : colormaprange *newranges;
754 : int numnewranges;
755 : colormaprange *oldrange;
756 : int oldrangen;
757 : int newrow;
758 :
759 : /* Caller should take care of non-high-range cases */
760 : assert(from > MAX_SIMPLE_CHR);
761 : assert(from < to);
762 :
763 : /*
764 : * Potentially, if we have N non-adjacent ranges, we could need as many as
765 : * 2N+1 result ranges (consider case where new range spans 'em all).
766 : */
767 8 : newranges = MALLOC_ARRAY(colormaprange, cm->numcmranges * 2 + 1);
768 8 : if (newranges == NULL)
769 : {
770 0 : CERR(REG_ESPACE);
771 0 : return;
772 : }
773 8 : numnewranges = 0;
774 :
775 : /* Ranges before target are unchanged */
776 8 : for (oldrange = cm->cmranges, oldrangen = 0;
777 14 : oldrangen < cm->numcmranges;
778 6 : oldrange++, oldrangen++)
779 : {
780 10 : if (oldrange->cmax >= from)
781 4 : break;
782 6 : newranges[numnewranges++] = *oldrange;
783 : }
784 :
785 : /*
786 : * Deal with ranges that (partially) overlap the target. As we process
787 : * each such range, increase "from" to remove the dealt-with characters
788 : * from the target range.
789 : */
790 11 : while (oldrangen < cm->numcmranges && oldrange->cmin <= to)
791 : {
792 3 : if (from < oldrange->cmin)
793 : {
794 : /* Handle portion of new range that corresponds to no old range */
795 1 : newranges[numnewranges].cmin = from;
796 1 : newranges[numnewranges].cmax = oldrange->cmin - 1;
797 : /* row state should be cloned from the "all others" row */
798 1 : newranges[numnewranges].rownum = newrow = newhicolorrow(cm, 0);
799 1 : numnewranges++;
800 : /* Update colors in newrow and create arcs as needed */
801 1 : subcoloronerow(v, newrow, lp, rp, lastsubcolor);
802 : /* We've now fully processed the part of new range before old */
803 1 : from = oldrange->cmin;
804 : }
805 :
806 3 : if (from <= oldrange->cmin && to >= oldrange->cmax)
807 : {
808 : /* old range is fully contained in new, process it in-place */
809 1 : newranges[numnewranges++] = *oldrange;
810 1 : newrow = oldrange->rownum;
811 1 : from = oldrange->cmax + 1;
812 : }
813 : else
814 : {
815 : /* some part of old range does not overlap new range */
816 2 : if (from > oldrange->cmin)
817 : {
818 : /* emit portion of old range before new range */
819 1 : newranges[numnewranges].cmin = oldrange->cmin;
820 1 : newranges[numnewranges].cmax = from - 1;
821 1 : newranges[numnewranges].rownum = oldrange->rownum;
822 1 : numnewranges++;
823 : }
824 : /* emit common subrange, initially cloning from old range */
825 2 : newranges[numnewranges].cmin = from;
826 2 : newranges[numnewranges].cmax =
827 2 : (to < oldrange->cmax) ? to : oldrange->cmax;
828 2 : newranges[numnewranges].rownum = newrow =
829 2 : newhicolorrow(cm, oldrange->rownum);
830 2 : numnewranges++;
831 2 : if (to < oldrange->cmax)
832 : {
833 : /* emit portion of old range after new range */
834 1 : newranges[numnewranges].cmin = to + 1;
835 1 : newranges[numnewranges].cmax = oldrange->cmax;
836 : /* must clone the row if we are making two new ranges from old */
837 1 : newranges[numnewranges].rownum =
838 1 : (from > oldrange->cmin) ? newhicolorrow(cm, oldrange->rownum) :
839 : oldrange->rownum;
840 1 : numnewranges++;
841 : }
842 2 : from = oldrange->cmax + 1;
843 : }
844 : /* Update colors in newrow and create arcs as needed */
845 3 : subcoloronerow(v, newrow, lp, rp, lastsubcolor);
846 : /* we've now fully processed this old range */
847 3 : oldrange++, oldrangen++;
848 : }
849 :
850 8 : if (from <= to)
851 : {
852 : /* Handle portion of new range that corresponds to no old range */
853 7 : newranges[numnewranges].cmin = from;
854 7 : newranges[numnewranges].cmax = to;
855 : /* row state should be cloned from the "all others" row */
856 7 : newranges[numnewranges].rownum = newrow = newhicolorrow(cm, 0);
857 7 : numnewranges++;
858 : /* Update colors in newrow and create arcs as needed */
859 7 : subcoloronerow(v, newrow, lp, rp, lastsubcolor);
860 : }
861 :
862 : /* Ranges after target are unchanged */
863 135 : for (; oldrangen < cm->numcmranges; oldrange++, oldrangen++)
864 : {
865 127 : newranges[numnewranges++] = *oldrange;
866 : }
867 :
868 : /* Assert our original space estimate was adequate */
869 : assert(numnewranges <= (cm->numcmranges * 2 + 1));
870 :
871 : /* And finally, store back the updated list of ranges */
872 8 : if (cm->cmranges != NULL)
873 5 : FREE(cm->cmranges);
874 8 : cm->cmranges = newranges;
875 8 : cm->numcmranges = numnewranges;
876 : }
877 :
878 : /*
879 : * subcoloronerow - do subcolorcvec's work for one new row in the high colormap
880 : */
881 : static void
882 144 : subcoloronerow(struct vars *v,
883 : int rownum,
884 : struct state *lp,
885 : struct state *rp,
886 : color *lastsubcolor)
887 : {
888 144 : struct colormap *cm = v->cm;
889 : color *pco;
890 : int i;
891 :
892 : /* Apply subcolorhi() and make arc for each entry in row */
893 144 : pco = &cm->hicolormap[rownum * cm->hiarraycols];
894 678 : for (i = 0; i < cm->hiarraycols; pco++, i++)
895 : {
896 534 : color sco = subcolorhi(cm, pco);
897 :
898 534 : NOERR();
899 : /* make the arc if needed */
900 534 : if (sco != *lastsubcolor)
901 : {
902 534 : newarc(v->nfa, PLAIN, sco, lp, rp);
903 534 : NOERR();
904 534 : *lastsubcolor = sco;
905 : }
906 : }
907 : }
908 :
909 : /*
910 : * okcolors - promote subcolors to full colors
911 : */
912 : static void
913 55019 : okcolors(struct nfa *nfa,
914 : struct colormap *cm)
915 : {
916 : struct colordesc *cd;
917 55019 : struct colordesc *end = CDEND(cm);
918 : struct colordesc *scd;
919 : struct arc *a;
920 : color co;
921 : color sco;
922 :
923 430673 : for (cd = cm->cd, co = 0; cd < end; cd++, co++)
924 : {
925 375654 : sco = cd->sub;
926 375654 : if (UNUSEDCOLOR(cd) || sco == NOSUB)
927 : {
928 : /* has no subcolor, no further action */
929 : }
930 31284 : else if (sco == co)
931 : {
932 : /* is subcolor, let parent deal with it */
933 : }
934 31212 : else if (cd->nschrs == 0 && cd->nuchrs == 0)
935 : {
936 : /*
937 : * Parent is now empty, so just change all its arcs to the
938 : * subcolor, then free the parent.
939 : *
940 : * It is not obvious that simply relabeling the arcs like this is
941 : * OK; it appears to risk creating duplicate arcs. We are
942 : * basically relying on the assumption that processing of a
943 : * bracket expression can't create arcs of both a color and its
944 : * subcolor between the bracket's endpoints.
945 : */
946 253 : cd->sub = NOSUB;
947 253 : scd = &cm->cd[sco];
948 : assert(scd->nschrs > 0 || scd->nuchrs > 0);
949 : assert(scd->sub == sco);
950 253 : scd->sub = NOSUB;
951 1550 : while ((a = cd->arcs) != NULL)
952 : {
953 : assert(a->co == co);
954 1297 : uncolorchain(cm, a);
955 1297 : a->co = sco;
956 1297 : colorchain(cm, a);
957 : }
958 253 : freecolor(cm, co);
959 : }
960 : else
961 : {
962 : /* parent's arcs must gain parallel subcolor arcs */
963 30959 : cd->sub = NOSUB;
964 30959 : scd = &cm->cd[sco];
965 : assert(scd->nschrs > 0 || scd->nuchrs > 0);
966 : assert(scd->sub == sco);
967 30959 : scd->sub = NOSUB;
968 31535 : for (a = cd->arcs; a != NULL; a = a->colorchain)
969 : {
970 : assert(a->co == co);
971 576 : newarc(nfa, a->type, sco, a->from, a->to);
972 : }
973 : }
974 : }
975 55019 : }
976 :
977 : /*
978 : * colorchain - add this arc to the color chain of its color
979 : */
980 : static void
981 846337 : colorchain(struct colormap *cm,
982 : struct arc *a)
983 : {
984 846337 : struct colordesc *cd = &cm->cd[a->co];
985 :
986 : assert(a->co >= 0);
987 846337 : if (cd->arcs != NULL)
988 800729 : cd->arcs->colorchainRev = a;
989 846337 : a->colorchain = cd->arcs;
990 846337 : a->colorchainRev = NULL;
991 846337 : cd->arcs = a;
992 846337 : }
993 :
994 : /*
995 : * uncolorchain - delete this arc from the color chain of its color
996 : */
997 : static void
998 397195 : uncolorchain(struct colormap *cm,
999 : struct arc *a)
1000 : {
1001 397195 : struct colordesc *cd = &cm->cd[a->co];
1002 397195 : struct arc *aa = a->colorchainRev;
1003 :
1004 : assert(a->co >= 0);
1005 397195 : if (aa == NULL)
1006 : {
1007 : assert(cd->arcs == a);
1008 95161 : cd->arcs = a->colorchain;
1009 : }
1010 : else
1011 : {
1012 : assert(aa->colorchain == a);
1013 302034 : aa->colorchain = a->colorchain;
1014 : }
1015 397195 : if (a->colorchain != NULL)
1016 360806 : a->colorchain->colorchainRev = aa;
1017 397195 : a->colorchain = NULL; /* paranoia */
1018 397195 : a->colorchainRev = NULL;
1019 397195 : }
1020 :
1021 : /*
1022 : * rainbow - add arcs of all full colors (but one) between specified states
1023 : *
1024 : * If there isn't an exception color, we now generate just a single arc
1025 : * labeled RAINBOW, saving lots of arc-munging later on.
1026 : */
1027 : static void
1028 28762 : rainbow(struct nfa *nfa,
1029 : struct colormap *cm,
1030 : int type,
1031 : color but, /* COLORLESS if no exceptions */
1032 : struct state *from,
1033 : struct state *to)
1034 : {
1035 : struct colordesc *cd;
1036 28762 : struct colordesc *end = CDEND(cm);
1037 : color co;
1038 :
1039 28762 : if (but == COLORLESS)
1040 : {
1041 28466 : newarc(nfa, type, RAINBOW, from, to);
1042 28466 : return;
1043 : }
1044 :
1045 : /* Gotta do it the hard way. Skip subcolors, pseudocolors, and "but" */
1046 2215 : for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++)
1047 1919 : if (!UNUSEDCOLOR(cd) && cd->sub != co && co != but &&
1048 1623 : !(cd->flags & PSEUDO))
1049 1623 : newarc(nfa, type, co, from, to);
1050 : }
1051 :
1052 : /*
1053 : * colorcomplement - add arcs of complementary colors
1054 : *
1055 : * We add arcs of all colors that are not pseudocolors and do not match
1056 : * any of the "of" state's PLAIN outarcs.
1057 : *
1058 : * The calling sequence ought to be reconciled with cloneouts().
1059 : */
1060 : static void
1061 361 : colorcomplement(struct nfa *nfa,
1062 : struct colormap *cm,
1063 : int type,
1064 : struct state *of,
1065 : struct state *from,
1066 : struct state *to)
1067 : {
1068 : struct colordesc *cd;
1069 361 : struct colordesc *end = CDEND(cm);
1070 : color co;
1071 : struct arc *a;
1072 :
1073 : assert(of != from);
1074 :
1075 : /*
1076 : * A RAINBOW arc matches all colors, making the complement empty. But we
1077 : * can't just return without making any arcs, because that would leave the
1078 : * NFA disconnected which would break any future delsub(). Instead, make
1079 : * a CANTMATCH arc. Also set the HASCANTMATCH flag so we know we need to
1080 : * clean that up at the start of NFA optimization.
1081 : */
1082 361 : if (findarc(of, PLAIN, RAINBOW) != NULL)
1083 : {
1084 3 : newarc(nfa, CANTMATCH, 0, from, to);
1085 3 : nfa->flags |= HASCANTMATCH;
1086 3 : return;
1087 : }
1088 :
1089 : /* Otherwise, transiently mark the colors that appear in of's out-arcs */
1090 1067 : for (a = of->outs; a != NULL; a = a->outchain)
1091 : {
1092 709 : if (a->type == PLAIN)
1093 : {
1094 : assert(a->co >= 0);
1095 709 : cd = &cm->cd[a->co];
1096 : assert(!UNUSEDCOLOR(cd));
1097 709 : cd->flags |= COLMARK;
1098 : }
1099 :
1100 : /*
1101 : * There's no syntax for re-complementing a color set, so we cannot
1102 : * see CANTMATCH arcs here.
1103 : */
1104 : assert(a->type != CANTMATCH);
1105 : }
1106 :
1107 : /* Scan colors, clear transient marks, add arcs for unmarked colors */
1108 1754 : for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++)
1109 : {
1110 1396 : if (cd->flags & COLMARK)
1111 709 : cd->flags &= ~COLMARK;
1112 687 : else if (!UNUSEDCOLOR(cd) && !(cd->flags & PSEUDO))
1113 671 : newarc(nfa, type, co, from, to);
1114 : }
1115 : }
1116 :
1117 :
1118 : #ifdef REG_DEBUG
1119 :
1120 : /*
1121 : * dumpcolors - debugging output
1122 : */
1123 : static void
1124 : dumpcolors(struct colormap *cm,
1125 : FILE *f)
1126 : {
1127 : struct colordesc *cd;
1128 : struct colordesc *end;
1129 : color co;
1130 : chr c;
1131 :
1132 : fprintf(f, "max %ld\n", (long) cm->max);
1133 : end = CDEND(cm);
1134 : for (cd = cm->cd + 1, co = 1; cd < end; cd++, co++) /* skip 0 */
1135 : {
1136 : if (!UNUSEDCOLOR(cd))
1137 : {
1138 : assert(cd->nschrs > 0 || cd->nuchrs > 0);
1139 : if (cd->flags & PSEUDO)
1140 : fprintf(f, "#%2ld(ps): ", (long) co);
1141 : else
1142 : fprintf(f, "#%2ld(%2d): ", (long) co, cd->nschrs + cd->nuchrs);
1143 :
1144 : /*
1145 : * Unfortunately, it's hard to do this next bit more efficiently.
1146 : */
1147 : for (c = CHR_MIN; c <= MAX_SIMPLE_CHR; c++)
1148 : if (GETCOLOR(cm, c) == co)
1149 : dumpchr(c, f);
1150 : fprintf(f, "\n");
1151 : }
1152 : }
1153 : /* dump the high colormap if it contains anything interesting */
1154 : if (cm->hiarrayrows > 1 || cm->hiarraycols > 1)
1155 : {
1156 : int r,
1157 : c;
1158 : const color *rowptr;
1159 :
1160 : fprintf(f, "other:\t");
1161 : for (c = 0; c < cm->hiarraycols; c++)
1162 : {
1163 : fprintf(f, "\t%ld", (long) cm->hicolormap[c]);
1164 : }
1165 : fprintf(f, "\n");
1166 : for (r = 0; r < cm->numcmranges; r++)
1167 : {
1168 : dumpchr(cm->cmranges[r].cmin, f);
1169 : fprintf(f, "..");
1170 : dumpchr(cm->cmranges[r].cmax, f);
1171 : fprintf(f, ":");
1172 : rowptr = &cm->hicolormap[cm->cmranges[r].rownum * cm->hiarraycols];
1173 : for (c = 0; c < cm->hiarraycols; c++)
1174 : {
1175 : fprintf(f, "\t%ld", (long) rowptr[c]);
1176 : }
1177 : fprintf(f, "\n");
1178 : }
1179 : }
1180 : }
1181 :
1182 : /*
1183 : * dumpchr - print a chr
1184 : *
1185 : * Kind of char-centric but works well enough for debug use.
1186 : */
1187 : static void
1188 : dumpchr(chr c,
1189 : FILE *f)
1190 : {
1191 : if (c == '\\')
1192 : fprintf(f, "\\\\");
1193 : else if (c > ' ' && c <= '~')
1194 : putc((char) c, f);
1195 : else
1196 : fprintf(f, "\\u%04lx", (long) c);
1197 : }
1198 :
1199 : #endif /* REG_DEBUG */
|