Line data Source code
1 : /*
2 : * colorings of characters
3 : * This file is #included by regcomp.c.
4 : *
5 : * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
6 : *
7 : * Development of this software was funded, in part, by Cray Research Inc.,
8 : * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
9 : * Corporation, none of whom are responsible for the results. The author
10 : * thanks all of them.
11 : *
12 : * Redistribution and use in source and binary forms -- with or without
13 : * modification -- are permitted for any purpose, provided that
14 : * redistributions in source form retain this entire copyright notice and
15 : * indicate the origin and nature of any modifications.
16 : *
17 : * I'd appreciate being given credit for this package in the documentation
18 : * of software which uses it, but that is not a requirement.
19 : *
20 : * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
21 : * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
22 : * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23 : * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 : * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 : * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 : * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 : * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 : * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 : * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 : *
31 : * src/backend/regex/regc_color.c
32 : *
33 : *
34 : * Note that there are some incestuous relationships between this code and
35 : * NFA arc maintenance, which perhaps ought to be cleaned up sometime.
36 : */
37 :
38 :
39 :
40 : #define CISERR() VISERR(cm->v)
41 : #define CERR(e) VERR(cm->v, (e))
42 :
43 :
44 :
45 : /*
46 : * initcm - set up new colormap
47 : */
48 : static void
49 7462 : initcm(struct vars *v,
50 : struct colormap *cm)
51 : {
52 : struct colordesc *cd;
53 :
54 7462 : cm->magic = CMMAGIC;
55 7462 : cm->v = v;
56 :
57 7462 : cm->ncds = NINLINECDS;
58 7462 : cm->cd = cm->cdspace;
59 7462 : cm->max = 0;
60 7462 : cm->free = 0;
61 :
62 7462 : cd = cm->cd; /* cm->cd[WHITE] */
63 7462 : cd->nschrs = MAX_SIMPLE_CHR - CHR_MIN + 1;
64 7462 : cd->nuchrs = 1;
65 7462 : cd->sub = NOSUB;
66 7462 : cd->arcs = NULL;
67 7462 : cd->firstchr = CHR_MIN;
68 7462 : cd->flags = 0;
69 :
70 7462 : cm->locolormap = (color *)
71 7462 : MALLOC((MAX_SIMPLE_CHR - CHR_MIN + 1) * sizeof(color));
72 7462 : if (cm->locolormap == NULL)
73 : {
74 0 : CERR(REG_ESPACE);
75 0 : cm->cmranges = NULL; /* prevent failure during freecm */
76 0 : cm->hicolormap = NULL;
77 0 : return;
78 : }
79 : /* this memset relies on WHITE being zero: */
80 7462 : memset(cm->locolormap, WHITE,
81 : (MAX_SIMPLE_CHR - CHR_MIN + 1) * sizeof(color));
82 :
83 7462 : memset(cm->classbits, 0, sizeof(cm->classbits));
84 7462 : cm->numcmranges = 0;
85 7462 : cm->cmranges = NULL;
86 7462 : cm->maxarrayrows = 4; /* arbitrary initial allocation */
87 7462 : cm->hiarrayrows = 1; /* but we have only one row/col initially */
88 7462 : cm->hiarraycols = 1;
89 7462 : cm->hicolormap = (color *) MALLOC(cm->maxarrayrows * sizeof(color));
90 7462 : if (cm->hicolormap == NULL)
91 : {
92 0 : CERR(REG_ESPACE);
93 0 : return;
94 : }
95 : /* initialize the "all other characters" row to WHITE */
96 7462 : cm->hicolormap[0] = WHITE;
97 : }
98 :
99 : /*
100 : * freecm - free dynamically-allocated things in a colormap
101 : */
102 : static void
103 1544 : freecm(struct colormap *cm)
104 : {
105 1544 : cm->magic = 0;
106 1544 : if (cm->cd != cm->cdspace)
107 120 : FREE(cm->cd);
108 1544 : if (cm->locolormap != NULL)
109 1544 : FREE(cm->locolormap);
110 1544 : if (cm->cmranges != NULL)
111 16 : FREE(cm->cmranges);
112 1544 : if (cm->hicolormap != NULL)
113 1544 : FREE(cm->hicolormap);
114 1544 : }
115 :
116 : /*
117 : * pg_reg_getcolor - slow case of GETCOLOR()
118 : */
119 : color
120 122 : pg_reg_getcolor(struct colormap *cm, chr c)
121 : {
122 : int rownum,
123 : colnum,
124 : low,
125 : high;
126 :
127 : /* Should not be used for chrs in the locolormap */
128 : assert(c > MAX_SIMPLE_CHR);
129 :
130 : /*
131 : * Find which row it's in. The colormapranges are in order, so we can use
132 : * binary search.
133 : */
134 122 : rownum = 0; /* if no match, use array row zero */
135 122 : low = 0;
136 122 : high = cm->numcmranges;
137 188 : while (low < high)
138 : {
139 112 : int middle = low + (high - low) / 2;
140 112 : const colormaprange *cmr = &cm->cmranges[middle];
141 :
142 112 : if (c < cmr->cmin)
143 56 : high = middle;
144 56 : else if (c > cmr->cmax)
145 10 : low = middle + 1;
146 : else
147 : {
148 46 : rownum = cmr->rownum; /* found a match */
149 46 : break;
150 : }
151 : }
152 :
153 : /*
154 : * Find which column it's in --- this is all locale-dependent.
155 : */
156 122 : if (cm->hiarraycols > 1)
157 : {
158 76 : colnum = cclass_column_index(cm, c);
159 76 : return cm->hicolormap[rownum * cm->hiarraycols + colnum];
160 : }
161 : else
162 : {
163 : /* fast path if no relevant cclasses */
164 46 : return cm->hicolormap[rownum];
165 : }
166 : }
167 :
168 : /*
169 : * maxcolor - report largest color number in use
170 : */
171 : static color
172 18218 : maxcolor(struct colormap *cm)
173 : {
174 18218 : if (CISERR())
175 0 : return COLORLESS;
176 :
177 18218 : return (color) cm->max;
178 : }
179 :
180 : /*
181 : * newcolor - find a new color (must be assigned at once)
182 : * Beware: may relocate the colordescs.
183 : */
184 : static color /* COLORLESS for error */
185 72000 : newcolor(struct colormap *cm)
186 : {
187 : struct colordesc *cd;
188 : size_t n;
189 :
190 72000 : if (CISERR())
191 4 : return COLORLESS;
192 :
193 71996 : if (cm->free != 0)
194 : {
195 : assert(cm->free > 0);
196 : assert((size_t) cm->free < cm->ncds);
197 338 : cd = &cm->cd[cm->free];
198 : assert(UNUSEDCOLOR(cd));
199 : assert(cd->arcs == NULL);
200 338 : cm->free = cd->sub;
201 : }
202 71658 : else if (cm->max < cm->ncds - 1)
203 : {
204 67864 : cm->max++;
205 67864 : cd = &cm->cd[cm->max];
206 : }
207 : else
208 : {
209 : /* oops, must allocate more */
210 : struct colordesc *newCd;
211 :
212 3794 : if (cm->max == MAX_COLOR)
213 : {
214 0 : CERR(REG_ECOLORS);
215 0 : return COLORLESS; /* too many colors */
216 : }
217 :
218 3794 : n = cm->ncds * 2;
219 3794 : if (n > MAX_COLOR + 1)
220 0 : n = MAX_COLOR + 1;
221 3794 : if (cm->cd == cm->cdspace)
222 : {
223 3696 : newCd = (struct colordesc *) MALLOC(n * sizeof(struct colordesc));
224 3696 : if (newCd != NULL)
225 3696 : memcpy(VS(newCd), VS(cm->cdspace), cm->ncds *
226 : sizeof(struct colordesc));
227 : }
228 : else
229 : newCd = (struct colordesc *)
230 98 : REALLOC(cm->cd, n * sizeof(struct colordesc));
231 3794 : if (newCd == NULL)
232 : {
233 0 : CERR(REG_ESPACE);
234 0 : return COLORLESS;
235 : }
236 3794 : cm->cd = newCd;
237 3794 : cm->ncds = n;
238 : assert(cm->max < cm->ncds - 1);
239 3794 : cm->max++;
240 3794 : cd = &cm->cd[cm->max];
241 : }
242 :
243 71996 : cd->nschrs = 0;
244 71996 : cd->nuchrs = 0;
245 71996 : cd->sub = NOSUB;
246 71996 : cd->arcs = NULL;
247 71996 : cd->firstchr = CHR_MIN; /* in case never set otherwise */
248 71996 : cd->flags = 0;
249 :
250 71996 : return (color) (cd - cm->cd);
251 : }
252 :
253 : /*
254 : * freecolor - free a color (must have no arcs or subcolor)
255 : */
256 : static void
257 432 : freecolor(struct colormap *cm,
258 : color co)
259 : {
260 432 : struct colordesc *cd = &cm->cd[co];
261 : color pco,
262 : nco; /* for freelist scan */
263 :
264 : assert(co >= 0);
265 432 : if (co == WHITE)
266 0 : return;
267 :
268 : assert(cd->arcs == NULL);
269 : assert(cd->sub == NOSUB);
270 : assert(cd->nschrs == 0);
271 : assert(cd->nuchrs == 0);
272 432 : cd->flags = FREECOL;
273 :
274 432 : if ((size_t) co == cm->max)
275 : {
276 184 : while (cm->max > WHITE && UNUSEDCOLOR(&cm->cd[cm->max]))
277 94 : cm->max--;
278 : assert(cm->free >= 0);
279 94 : while ((size_t) cm->free > cm->max)
280 4 : cm->free = cm->cd[cm->free].sub;
281 90 : if (cm->free > 0)
282 : {
283 : assert(cm->free < cm->max);
284 6 : pco = cm->free;
285 6 : nco = cm->cd[pco].sub;
286 6 : while (nco > 0)
287 0 : if ((size_t) nco > cm->max)
288 : {
289 : /* take this one out of freelist */
290 0 : nco = cm->cd[nco].sub;
291 0 : cm->cd[pco].sub = nco;
292 : }
293 : else
294 : {
295 : assert(nco < cm->max);
296 0 : pco = nco;
297 0 : nco = cm->cd[pco].sub;
298 : }
299 : }
300 : }
301 : else
302 : {
303 342 : cd->sub = cm->free;
304 342 : cm->free = (color) (cd - cm->cd);
305 : }
306 : }
307 :
308 : /*
309 : * pseudocolor - allocate a false color, to be managed by other means
310 : */
311 : static color
312 28896 : pseudocolor(struct colormap *cm)
313 : {
314 : color co;
315 : struct colordesc *cd;
316 :
317 28896 : co = newcolor(cm);
318 28896 : if (CISERR())
319 0 : return COLORLESS;
320 28896 : cd = &cm->cd[co];
321 28896 : cd->nschrs = 0;
322 28896 : cd->nuchrs = 1; /* pretend it is in the upper map */
323 28896 : cd->sub = NOSUB;
324 28896 : cd->arcs = NULL;
325 28896 : cd->firstchr = CHR_MIN;
326 28896 : cd->flags = PSEUDO;
327 28896 : return co;
328 : }
329 :
330 : /*
331 : * subcolor - allocate a new subcolor (if necessary) to this chr
332 : *
333 : * This works only for chrs that map into the low color map.
334 : */
335 : static color
336 583668 : subcolor(struct colormap *cm, chr c)
337 : {
338 : color co; /* current color of c */
339 : color sco; /* new subcolor */
340 :
341 : assert(c <= MAX_SIMPLE_CHR);
342 :
343 583668 : co = cm->locolormap[c - CHR_MIN];
344 583668 : sco = newsub(cm, co);
345 583668 : if (CISERR())
346 4 : return COLORLESS;
347 : assert(sco != COLORLESS);
348 :
349 583664 : if (co == sco) /* already in an open subcolor */
350 37470 : return co; /* rest is redundant */
351 546194 : cm->cd[co].nschrs--;
352 546194 : if (cm->cd[sco].nschrs == 0)
353 43008 : cm->cd[sco].firstchr = c;
354 546194 : cm->cd[sco].nschrs++;
355 546194 : cm->locolormap[c - CHR_MIN] = sco;
356 546194 : return sco;
357 : }
358 :
359 : /*
360 : * subcolorhi - allocate a new subcolor (if necessary) to this colormap entry
361 : *
362 : * This is the same processing as subcolor(), but for entries in the high
363 : * colormap, which do not necessarily correspond to exactly one chr code.
364 : */
365 : static color
366 1838 : subcolorhi(struct colormap *cm, color *pco)
367 : {
368 : color co; /* current color of entry */
369 : color sco; /* new subcolor */
370 :
371 1838 : co = *pco;
372 1838 : sco = newsub(cm, co);
373 1838 : if (CISERR())
374 0 : return COLORLESS;
375 : assert(sco != COLORLESS);
376 :
377 1838 : if (co == sco) /* already in an open subcolor */
378 20 : return co; /* rest is redundant */
379 1818 : cm->cd[co].nuchrs--;
380 1818 : cm->cd[sco].nuchrs++;
381 1818 : *pco = sco;
382 1818 : return sco;
383 : }
384 :
385 : /*
386 : * newsub - allocate a new subcolor (if necessary) for a color
387 : */
388 : static color
389 585506 : newsub(struct colormap *cm,
390 : color co)
391 : {
392 : color sco; /* new subcolor */
393 :
394 585506 : sco = cm->cd[co].sub;
395 585506 : if (sco == NOSUB)
396 : { /* color has no open subcolor */
397 : /* optimization: singly-referenced color need not be subcolored */
398 80588 : if ((cm->cd[co].nschrs + cm->cd[co].nuchrs) == 1)
399 37484 : return co;
400 43104 : sco = newcolor(cm); /* must create subcolor */
401 43104 : if (sco == COLORLESS)
402 : {
403 : assert(CISERR());
404 4 : return COLORLESS;
405 : }
406 43100 : cm->cd[co].sub = sco;
407 43100 : cm->cd[sco].sub = sco; /* open subcolor points to self */
408 : }
409 : assert(sco != NOSUB);
410 :
411 548018 : return sco;
412 : }
413 :
414 : /*
415 : * newhicolorrow - get a new row in the hicolormap, cloning it from oldrow
416 : *
417 : * Returns array index of new row. Note the array might move.
418 : */
419 : static int
420 292 : newhicolorrow(struct colormap *cm,
421 : int oldrow)
422 : {
423 292 : int newrow = cm->hiarrayrows;
424 : color *newrowptr;
425 : int i;
426 :
427 : /* Assign a fresh array row index, enlarging storage if needed */
428 292 : if (newrow >= cm->maxarrayrows)
429 : {
430 : color *newarray;
431 :
432 14 : if (cm->maxarrayrows >= INT_MAX / (cm->hiarraycols * 2))
433 : {
434 0 : CERR(REG_ESPACE);
435 0 : return 0;
436 : }
437 14 : newarray = (color *) REALLOC(cm->hicolormap,
438 : cm->maxarrayrows * 2 *
439 : cm->hiarraycols * sizeof(color));
440 14 : if (newarray == NULL)
441 : {
442 0 : CERR(REG_ESPACE);
443 0 : return 0;
444 : }
445 14 : cm->hicolormap = newarray;
446 14 : cm->maxarrayrows *= 2;
447 : }
448 292 : cm->hiarrayrows++;
449 :
450 : /* Copy old row data */
451 292 : newrowptr = &cm->hicolormap[newrow * cm->hiarraycols];
452 292 : memcpy(newrowptr,
453 292 : &cm->hicolormap[oldrow * cm->hiarraycols],
454 292 : cm->hiarraycols * sizeof(color));
455 :
456 : /* Increase color reference counts to reflect new colormap entries */
457 1382 : for (i = 0; i < cm->hiarraycols; i++)
458 1090 : cm->cd[newrowptr[i]].nuchrs++;
459 :
460 292 : return newrow;
461 : }
462 :
463 : /*
464 : * newhicolorcols - create a new set of columns in the high colormap
465 : *
466 : * Essentially, extends the 2-D array to the right with a copy of itself.
467 : */
468 : static void
469 654 : newhicolorcols(struct colormap *cm)
470 : {
471 : color *newarray;
472 : int r,
473 : c;
474 :
475 654 : if (cm->hiarraycols >= INT_MAX / (cm->maxarrayrows * 2))
476 : {
477 0 : CERR(REG_ESPACE);
478 0 : return;
479 : }
480 654 : newarray = (color *) REALLOC(cm->hicolormap,
481 : cm->maxarrayrows *
482 : cm->hiarraycols * 2 * sizeof(color));
483 654 : if (newarray == NULL)
484 : {
485 0 : CERR(REG_ESPACE);
486 0 : return;
487 : }
488 654 : cm->hicolormap = newarray;
489 :
490 : /* Duplicate existing columns to the right, and increase ref counts */
491 : /* Must work backwards in the array because we realloc'd in place */
492 1308 : for (r = cm->hiarrayrows - 1; r >= 0; r--)
493 : {
494 654 : color *oldrowptr = &newarray[r * cm->hiarraycols];
495 654 : color *newrowptr = &newarray[r * cm->hiarraycols * 2];
496 654 : color *newrowptr2 = newrowptr + cm->hiarraycols;
497 :
498 1332 : for (c = 0; c < cm->hiarraycols; c++)
499 : {
500 678 : color co = oldrowptr[c];
501 :
502 678 : newrowptr[c] = newrowptr2[c] = co;
503 678 : cm->cd[co].nuchrs++;
504 : }
505 : }
506 :
507 654 : cm->hiarraycols *= 2;
508 : }
509 :
510 : /*
511 : * subcolorcvec - allocate new subcolors to cvec members, fill in arcs
512 : *
513 : * For each chr "c" represented by the cvec, do the equivalent of
514 : * newarc(v->nfa, PLAIN, subcolor(v->cm, c), lp, rp);
515 : *
516 : * Note that in typical cases, many of the subcolors are the same.
517 : * While newarc() would discard duplicate arc requests, we can save
518 : * some cycles by not calling it repetitively to begin with. This is
519 : * mechanized with the "lastsubcolor" state variable.
520 : */
521 : static void
522 3122 : subcolorcvec(struct vars *v,
523 : struct cvec *cv,
524 : struct state *lp,
525 : struct state *rp)
526 : {
527 3122 : struct colormap *cm = v->cm;
528 3122 : color lastsubcolor = COLORLESS;
529 : chr ch,
530 : from,
531 : to;
532 : const chr *p;
533 : int i;
534 :
535 : /* ordinary characters */
536 19892 : for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--)
537 : {
538 16770 : ch = *p;
539 16770 : subcoloronechr(v, ch, lp, rp, &lastsubcolor);
540 16770 : NOERR();
541 : }
542 :
543 : /* and the ranges */
544 15474 : for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--)
545 : {
546 12352 : from = *p;
547 12352 : to = *(p + 1);
548 12352 : if (from <= MAX_SIMPLE_CHR)
549 : {
550 : /* deal with simple chars one at a time */
551 12336 : chr lim = (to <= MAX_SIMPLE_CHR) ? to : MAX_SIMPLE_CHR;
552 :
553 504274 : while (from <= lim)
554 : {
555 491938 : color sco = subcolor(cm, from);
556 :
557 491938 : NOERR();
558 491938 : if (sco != lastsubcolor)
559 : {
560 3924 : newarc(v->nfa, PLAIN, sco, lp, rp);
561 3924 : NOERR();
562 3924 : lastsubcolor = sco;
563 : }
564 491938 : from++;
565 : }
566 : }
567 : /* deal with any part of the range that's above MAX_SIMPLE_CHR */
568 12352 : if (from < to)
569 16 : subcoloronerange(v, from, to, lp, rp, &lastsubcolor);
570 12336 : else if (from == to)
571 0 : subcoloronechr(v, from, lp, rp, &lastsubcolor);
572 12352 : NOERR();
573 : }
574 :
575 : /* and deal with cclass if any */
576 3122 : if (cv->cclasscode >= 0)
577 : {
578 : int classbit;
579 : color *pco;
580 : int r,
581 : c;
582 :
583 : /* Enlarge array if we don't have a column bit assignment for cclass */
584 746 : if (cm->classbits[cv->cclasscode] == 0)
585 : {
586 654 : cm->classbits[cv->cclasscode] = cm->hiarraycols;
587 654 : newhicolorcols(cm);
588 654 : NOERR();
589 : }
590 : /* Apply subcolorhi() and make arc for each entry in relevant cols */
591 746 : classbit = cm->classbits[cv->cclasscode];
592 746 : pco = cm->hicolormap;
593 1492 : for (r = 0; r < cm->hiarrayrows; r++)
594 : {
595 2286 : for (c = 0; c < cm->hiarraycols; c++)
596 : {
597 1540 : if (c & classbit)
598 : {
599 770 : color sco = subcolorhi(cm, pco);
600 :
601 770 : NOERR();
602 : /* add the arc if needed */
603 770 : if (sco != lastsubcolor)
604 : {
605 70 : newarc(v->nfa, PLAIN, sco, lp, rp);
606 70 : NOERR();
607 70 : lastsubcolor = sco;
608 : }
609 : }
610 1540 : pco++;
611 : }
612 : }
613 : }
614 : }
615 :
616 : /*
617 : * subcoloronechr - do subcolorcvec's work for a singleton chr
618 : *
619 : * We could just let subcoloronerange do this, but it's a bit more efficient
620 : * if we exploit the single-chr case. Also, callers find it useful for this
621 : * to be able to handle both low and high chr codes.
622 : */
623 : static void
624 91420 : subcoloronechr(struct vars *v,
625 : chr ch,
626 : struct state *lp,
627 : struct state *rp,
628 : color *lastsubcolor)
629 : {
630 91420 : struct colormap *cm = v->cm;
631 : colormaprange *newranges;
632 : int numnewranges;
633 : colormaprange *oldrange;
634 : int oldrangen;
635 : int newrow;
636 :
637 : /* Easy case for low chr codes */
638 91420 : if (ch <= MAX_SIMPLE_CHR)
639 : {
640 91154 : color sco = subcolor(cm, ch);
641 :
642 91154 : NOERR();
643 91150 : if (sco != *lastsubcolor)
644 : {
645 76920 : newarc(v->nfa, PLAIN, sco, lp, rp);
646 76920 : *lastsubcolor = sco;
647 : }
648 91150 : return;
649 : }
650 :
651 : /*
652 : * Potentially, we could need two more colormapranges than we have now, if
653 : * the given chr is in the middle of some existing range.
654 : */
655 : newranges = (colormaprange *)
656 266 : MALLOC((cm->numcmranges + 2) * sizeof(colormaprange));
657 266 : if (newranges == NULL)
658 : {
659 0 : CERR(REG_ESPACE);
660 0 : return;
661 : }
662 266 : numnewranges = 0;
663 :
664 : /* Ranges before target are unchanged */
665 266 : for (oldrange = cm->cmranges, oldrangen = 0;
666 14800 : oldrangen < cm->numcmranges;
667 14534 : oldrange++, oldrangen++)
668 : {
669 14552 : if (oldrange->cmax >= ch)
670 18 : break;
671 14534 : newranges[numnewranges++] = *oldrange;
672 : }
673 :
674 : /* Match target chr against current range */
675 266 : if (oldrangen >= cm->numcmranges || oldrange->cmin > ch)
676 : {
677 : /* chr does not belong to any existing range, make a new one */
678 256 : newranges[numnewranges].cmin = ch;
679 256 : newranges[numnewranges].cmax = ch;
680 : /* row state should be cloned from the "all others" row */
681 256 : newranges[numnewranges].rownum = newrow = newhicolorrow(cm, 0);
682 256 : numnewranges++;
683 : }
684 10 : else if (oldrange->cmin == oldrange->cmax)
685 : {
686 : /* we have an existing singleton range matching the chr */
687 2 : newranges[numnewranges++] = *oldrange;
688 2 : newrow = oldrange->rownum;
689 : /* we've now fully processed this old range */
690 2 : oldrange++, oldrangen++;
691 : }
692 : else
693 : {
694 : /* chr is a subset of this existing range, must split it */
695 8 : if (ch > oldrange->cmin)
696 : {
697 : /* emit portion of old range before chr */
698 8 : newranges[numnewranges].cmin = oldrange->cmin;
699 8 : newranges[numnewranges].cmax = ch - 1;
700 8 : newranges[numnewranges].rownum = oldrange->rownum;
701 8 : numnewranges++;
702 : }
703 : /* emit chr as singleton range, initially cloning from range */
704 8 : newranges[numnewranges].cmin = ch;
705 8 : newranges[numnewranges].cmax = ch;
706 8 : newranges[numnewranges].rownum = newrow =
707 8 : newhicolorrow(cm, oldrange->rownum);
708 8 : numnewranges++;
709 8 : if (ch < oldrange->cmax)
710 : {
711 : /* emit portion of old range after chr */
712 8 : newranges[numnewranges].cmin = ch + 1;
713 8 : newranges[numnewranges].cmax = oldrange->cmax;
714 : /* must clone the row if we are making two new ranges from old */
715 8 : newranges[numnewranges].rownum =
716 8 : (ch > oldrange->cmin) ? newhicolorrow(cm, oldrange->rownum) :
717 : oldrange->rownum;
718 8 : numnewranges++;
719 : }
720 : /* we've now fully processed this old range */
721 8 : oldrange++, oldrangen++;
722 : }
723 :
724 : /* Update colors in newrow and create arcs as needed */
725 266 : subcoloronerow(v, newrow, lp, rp, lastsubcolor);
726 :
727 : /* Ranges after target are unchanged */
728 1260 : for (; oldrangen < cm->numcmranges; oldrange++, oldrangen++)
729 : {
730 994 : newranges[numnewranges++] = *oldrange;
731 : }
732 :
733 : /* Assert our original space estimate was adequate */
734 : assert(numnewranges <= (cm->numcmranges + 2));
735 :
736 : /* And finally, store back the updated list of ranges */
737 266 : if (cm->cmranges != NULL)
738 256 : FREE(cm->cmranges);
739 266 : cm->cmranges = newranges;
740 266 : cm->numcmranges = numnewranges;
741 : }
742 :
743 : /*
744 : * subcoloronerange - do subcolorcvec's work for a high range
745 : */
746 : static void
747 16 : subcoloronerange(struct vars *v,
748 : chr from,
749 : chr to,
750 : struct state *lp,
751 : struct state *rp,
752 : color *lastsubcolor)
753 : {
754 16 : struct colormap *cm = v->cm;
755 : colormaprange *newranges;
756 : int numnewranges;
757 : colormaprange *oldrange;
758 : int oldrangen;
759 : int newrow;
760 :
761 : /* Caller should take care of non-high-range cases */
762 : assert(from > MAX_SIMPLE_CHR);
763 : assert(from < to);
764 :
765 : /*
766 : * Potentially, if we have N non-adjacent ranges, we could need as many as
767 : * 2N+1 result ranges (consider case where new range spans 'em all).
768 : */
769 : newranges = (colormaprange *)
770 16 : MALLOC((cm->numcmranges * 2 + 1) * sizeof(colormaprange));
771 16 : if (newranges == NULL)
772 : {
773 0 : CERR(REG_ESPACE);
774 0 : return;
775 : }
776 16 : numnewranges = 0;
777 :
778 : /* Ranges before target are unchanged */
779 16 : for (oldrange = cm->cmranges, oldrangen = 0;
780 28 : oldrangen < cm->numcmranges;
781 12 : oldrange++, oldrangen++)
782 : {
783 20 : if (oldrange->cmax >= from)
784 8 : break;
785 12 : newranges[numnewranges++] = *oldrange;
786 : }
787 :
788 : /*
789 : * Deal with ranges that (partially) overlap the target. As we process
790 : * each such range, increase "from" to remove the dealt-with characters
791 : * from the target range.
792 : */
793 22 : while (oldrangen < cm->numcmranges && oldrange->cmin <= to)
794 : {
795 6 : if (from < oldrange->cmin)
796 : {
797 : /* Handle portion of new range that corresponds to no old range */
798 2 : newranges[numnewranges].cmin = from;
799 2 : newranges[numnewranges].cmax = oldrange->cmin - 1;
800 : /* row state should be cloned from the "all others" row */
801 2 : newranges[numnewranges].rownum = newrow = newhicolorrow(cm, 0);
802 2 : numnewranges++;
803 : /* Update colors in newrow and create arcs as needed */
804 2 : subcoloronerow(v, newrow, lp, rp, lastsubcolor);
805 : /* We've now fully processed the part of new range before old */
806 2 : from = oldrange->cmin;
807 : }
808 :
809 6 : if (from <= oldrange->cmin && to >= oldrange->cmax)
810 : {
811 : /* old range is fully contained in new, process it in-place */
812 2 : newranges[numnewranges++] = *oldrange;
813 2 : newrow = oldrange->rownum;
814 2 : from = oldrange->cmax + 1;
815 : }
816 : else
817 : {
818 : /* some part of old range does not overlap new range */
819 4 : if (from > oldrange->cmin)
820 : {
821 : /* emit portion of old range before new range */
822 2 : newranges[numnewranges].cmin = oldrange->cmin;
823 2 : newranges[numnewranges].cmax = from - 1;
824 2 : newranges[numnewranges].rownum = oldrange->rownum;
825 2 : numnewranges++;
826 : }
827 : /* emit common subrange, initially cloning from old range */
828 4 : newranges[numnewranges].cmin = from;
829 4 : newranges[numnewranges].cmax =
830 4 : (to < oldrange->cmax) ? to : oldrange->cmax;
831 4 : newranges[numnewranges].rownum = newrow =
832 4 : newhicolorrow(cm, oldrange->rownum);
833 4 : numnewranges++;
834 4 : if (to < oldrange->cmax)
835 : {
836 : /* emit portion of old range after new range */
837 2 : newranges[numnewranges].cmin = to + 1;
838 2 : newranges[numnewranges].cmax = oldrange->cmax;
839 : /* must clone the row if we are making two new ranges from old */
840 2 : newranges[numnewranges].rownum =
841 2 : (from > oldrange->cmin) ? newhicolorrow(cm, oldrange->rownum) :
842 : oldrange->rownum;
843 2 : numnewranges++;
844 : }
845 4 : from = oldrange->cmax + 1;
846 : }
847 : /* Update colors in newrow and create arcs as needed */
848 6 : subcoloronerow(v, newrow, lp, rp, lastsubcolor);
849 : /* we've now fully processed this old range */
850 6 : oldrange++, oldrangen++;
851 : }
852 :
853 16 : if (from <= to)
854 : {
855 : /* Handle portion of new range that corresponds to no old range */
856 14 : newranges[numnewranges].cmin = from;
857 14 : newranges[numnewranges].cmax = to;
858 : /* row state should be cloned from the "all others" row */
859 14 : newranges[numnewranges].rownum = newrow = newhicolorrow(cm, 0);
860 14 : numnewranges++;
861 : /* Update colors in newrow and create arcs as needed */
862 14 : subcoloronerow(v, newrow, lp, rp, lastsubcolor);
863 : }
864 :
865 : /* Ranges after target are unchanged */
866 270 : for (; oldrangen < cm->numcmranges; oldrange++, oldrangen++)
867 : {
868 254 : newranges[numnewranges++] = *oldrange;
869 : }
870 :
871 : /* Assert our original space estimate was adequate */
872 : assert(numnewranges <= (cm->numcmranges * 2 + 1));
873 :
874 : /* And finally, store back the updated list of ranges */
875 16 : if (cm->cmranges != NULL)
876 10 : FREE(cm->cmranges);
877 16 : cm->cmranges = newranges;
878 16 : cm->numcmranges = numnewranges;
879 : }
880 :
881 : /*
882 : * subcoloronerow - do subcolorcvec's work for one new row in the high colormap
883 : */
884 : static void
885 288 : subcoloronerow(struct vars *v,
886 : int rownum,
887 : struct state *lp,
888 : struct state *rp,
889 : color *lastsubcolor)
890 : {
891 288 : struct colormap *cm = v->cm;
892 : color *pco;
893 : int i;
894 :
895 : /* Apply subcolorhi() and make arc for each entry in row */
896 288 : pco = &cm->hicolormap[rownum * cm->hiarraycols];
897 1356 : for (i = 0; i < cm->hiarraycols; pco++, i++)
898 : {
899 1068 : color sco = subcolorhi(cm, pco);
900 :
901 1068 : NOERR();
902 : /* make the arc if needed */
903 1068 : if (sco != *lastsubcolor)
904 : {
905 1068 : newarc(v->nfa, PLAIN, sco, lp, rp);
906 1068 : NOERR();
907 1068 : *lastsubcolor = sco;
908 : }
909 : }
910 : }
911 :
912 : /*
913 : * okcolors - promote subcolors to full colors
914 : */
915 : static void
916 77786 : okcolors(struct nfa *nfa,
917 : struct colormap *cm)
918 : {
919 : struct colordesc *cd;
920 77786 : struct colordesc *end = CDEND(cm);
921 : struct colordesc *scd;
922 : struct arc *a;
923 : color co;
924 : color sco;
925 :
926 621904 : for (cd = cm->cd, co = 0; cd < end; cd++, co++)
927 : {
928 544118 : sco = cd->sub;
929 544118 : if (UNUSEDCOLOR(cd) || sco == NOSUB)
930 : {
931 : /* has no subcolor, no further action */
932 : }
933 43224 : else if (sco == co)
934 : {
935 : /* is subcolor, let parent deal with it */
936 : }
937 43100 : else if (cd->nschrs == 0 && cd->nuchrs == 0)
938 : {
939 : /*
940 : * Parent is now empty, so just change all its arcs to the
941 : * subcolor, then free the parent.
942 : *
943 : * It is not obvious that simply relabeling the arcs like this is
944 : * OK; it appears to risk creating duplicate arcs. We are
945 : * basically relying on the assumption that processing of a
946 : * bracket expression can't create arcs of both a color and its
947 : * subcolor between the bracket's endpoints.
948 : */
949 432 : cd->sub = NOSUB;
950 432 : scd = &cm->cd[sco];
951 : assert(scd->nschrs > 0 || scd->nuchrs > 0);
952 : assert(scd->sub == sco);
953 432 : scd->sub = NOSUB;
954 2902 : while ((a = cd->arcs) != NULL)
955 : {
956 : assert(a->co == co);
957 2470 : uncolorchain(cm, a);
958 2470 : a->co = sco;
959 2470 : colorchain(cm, a);
960 : }
961 432 : freecolor(cm, co);
962 : }
963 : else
964 : {
965 : /* parent's arcs must gain parallel subcolor arcs */
966 42668 : cd->sub = NOSUB;
967 42668 : scd = &cm->cd[sco];
968 : assert(scd->nschrs > 0 || scd->nuchrs > 0);
969 : assert(scd->sub == sco);
970 42668 : scd->sub = NOSUB;
971 43684 : for (a = cd->arcs; a != NULL; a = a->colorchain)
972 : {
973 : assert(a->co == co);
974 1016 : newarc(nfa, a->type, sco, a->from, a->to);
975 : }
976 : }
977 : }
978 77786 : }
979 :
980 : /*
981 : * colorchain - add this arc to the color chain of its color
982 : */
983 : static void
984 1584968 : colorchain(struct colormap *cm,
985 : struct arc *a)
986 : {
987 1584968 : struct colordesc *cd = &cm->cd[a->co];
988 :
989 : assert(a->co >= 0);
990 1584968 : if (cd->arcs != NULL)
991 1520608 : cd->arcs->colorchainRev = a;
992 1584968 : a->colorchain = cd->arcs;
993 1584968 : a->colorchainRev = NULL;
994 1584968 : cd->arcs = a;
995 1584968 : }
996 :
997 : /*
998 : * uncolorchain - delete this arc from the color chain of its color
999 : */
1000 : static void
1001 733028 : uncolorchain(struct colormap *cm,
1002 : struct arc *a)
1003 : {
1004 733028 : struct colordesc *cd = &cm->cd[a->co];
1005 733028 : struct arc *aa = a->colorchainRev;
1006 :
1007 : assert(a->co >= 0);
1008 733028 : if (aa == NULL)
1009 : {
1010 : assert(cd->arcs == a);
1011 180956 : cd->arcs = a->colorchain;
1012 : }
1013 : else
1014 : {
1015 : assert(aa->colorchain == a);
1016 552072 : aa->colorchain = a->colorchain;
1017 : }
1018 733028 : if (a->colorchain != NULL)
1019 681518 : a->colorchain->colorchainRev = aa;
1020 733028 : a->colorchain = NULL; /* paranoia */
1021 733028 : a->colorchainRev = NULL;
1022 733028 : }
1023 :
1024 : /*
1025 : * rainbow - add arcs of all full colors (but one) between specified states
1026 : *
1027 : * If there isn't an exception color, we now generate just a single arc
1028 : * labeled RAINBOW, saving lots of arc-munging later on.
1029 : */
1030 : static void
1031 42976 : rainbow(struct nfa *nfa,
1032 : struct colormap *cm,
1033 : int type,
1034 : color but, /* COLORLESS if no exceptions */
1035 : struct state *from,
1036 : struct state *to)
1037 : {
1038 : struct colordesc *cd;
1039 42976 : struct colordesc *end = CDEND(cm);
1040 : color co;
1041 :
1042 42976 : if (but == COLORLESS)
1043 : {
1044 42546 : newarc(nfa, type, RAINBOW, from, to);
1045 42546 : return;
1046 : }
1047 :
1048 : /* Gotta do it the hard way. Skip subcolors, pseudocolors, and "but" */
1049 3198 : for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++)
1050 2768 : if (!UNUSEDCOLOR(cd) && cd->sub != co && co != but &&
1051 2338 : !(cd->flags & PSEUDO))
1052 2338 : newarc(nfa, type, co, from, to);
1053 : }
1054 :
1055 : /*
1056 : * colorcomplement - add arcs of complementary colors
1057 : *
1058 : * We add arcs of all colors that are not pseudocolors and do not match
1059 : * any of the "of" state's PLAIN outarcs.
1060 : *
1061 : * The calling sequence ought to be reconciled with cloneouts().
1062 : */
1063 : static void
1064 592 : colorcomplement(struct nfa *nfa,
1065 : struct colormap *cm,
1066 : int type,
1067 : struct state *of,
1068 : struct state *from,
1069 : struct state *to)
1070 : {
1071 : struct colordesc *cd;
1072 592 : struct colordesc *end = CDEND(cm);
1073 : color co;
1074 : struct arc *a;
1075 :
1076 : assert(of != from);
1077 :
1078 : /*
1079 : * A RAINBOW arc matches all colors, making the complement empty. But we
1080 : * can't just return without making any arcs, because that would leave the
1081 : * NFA disconnected which would break any future delsub(). Instead, make
1082 : * a CANTMATCH arc. Also set the HASCANTMATCH flag so we know we need to
1083 : * clean that up at the start of NFA optimization.
1084 : */
1085 592 : if (findarc(of, PLAIN, RAINBOW) != NULL)
1086 : {
1087 6 : newarc(nfa, CANTMATCH, 0, from, to);
1088 6 : nfa->flags |= HASCANTMATCH;
1089 6 : return;
1090 : }
1091 :
1092 : /* Otherwise, transiently mark the colors that appear in of's out-arcs */
1093 1720 : for (a = of->outs; a != NULL; a = a->outchain)
1094 : {
1095 1134 : if (a->type == PLAIN)
1096 : {
1097 : assert(a->co >= 0);
1098 1134 : cd = &cm->cd[a->co];
1099 : assert(!UNUSEDCOLOR(cd));
1100 1134 : cd->flags |= COLMARK;
1101 : }
1102 :
1103 : /*
1104 : * There's no syntax for re-complementing a color set, so we cannot
1105 : * see CANTMATCH arcs here.
1106 : */
1107 : assert(a->type != CANTMATCH);
1108 : }
1109 :
1110 : /* Scan colors, clear transient marks, add arcs for unmarked colors */
1111 2808 : for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++)
1112 : {
1113 2222 : if (cd->flags & COLMARK)
1114 1134 : cd->flags &= ~COLMARK;
1115 1088 : else if (!UNUSEDCOLOR(cd) && !(cd->flags & PSEUDO))
1116 1064 : newarc(nfa, type, co, from, to);
1117 : }
1118 : }
1119 :
1120 :
1121 : #ifdef REG_DEBUG
1122 :
1123 : /*
1124 : * dumpcolors - debugging output
1125 : */
1126 : static void
1127 : dumpcolors(struct colormap *cm,
1128 : FILE *f)
1129 : {
1130 : struct colordesc *cd;
1131 : struct colordesc *end;
1132 : color co;
1133 : chr c;
1134 :
1135 : fprintf(f, "max %ld\n", (long) cm->max);
1136 : end = CDEND(cm);
1137 : for (cd = cm->cd + 1, co = 1; cd < end; cd++, co++) /* skip 0 */
1138 : {
1139 : if (!UNUSEDCOLOR(cd))
1140 : {
1141 : assert(cd->nschrs > 0 || cd->nuchrs > 0);
1142 : if (cd->flags & PSEUDO)
1143 : fprintf(f, "#%2ld(ps): ", (long) co);
1144 : else
1145 : fprintf(f, "#%2ld(%2d): ", (long) co, cd->nschrs + cd->nuchrs);
1146 :
1147 : /*
1148 : * Unfortunately, it's hard to do this next bit more efficiently.
1149 : */
1150 : for (c = CHR_MIN; c <= MAX_SIMPLE_CHR; c++)
1151 : if (GETCOLOR(cm, c) == co)
1152 : dumpchr(c, f);
1153 : fprintf(f, "\n");
1154 : }
1155 : }
1156 : /* dump the high colormap if it contains anything interesting */
1157 : if (cm->hiarrayrows > 1 || cm->hiarraycols > 1)
1158 : {
1159 : int r,
1160 : c;
1161 : const color *rowptr;
1162 :
1163 : fprintf(f, "other:\t");
1164 : for (c = 0; c < cm->hiarraycols; c++)
1165 : {
1166 : fprintf(f, "\t%ld", (long) cm->hicolormap[c]);
1167 : }
1168 : fprintf(f, "\n");
1169 : for (r = 0; r < cm->numcmranges; r++)
1170 : {
1171 : dumpchr(cm->cmranges[r].cmin, f);
1172 : fprintf(f, "..");
1173 : dumpchr(cm->cmranges[r].cmax, f);
1174 : fprintf(f, ":");
1175 : rowptr = &cm->hicolormap[cm->cmranges[r].rownum * cm->hiarraycols];
1176 : for (c = 0; c < cm->hiarraycols; c++)
1177 : {
1178 : fprintf(f, "\t%ld", (long) rowptr[c]);
1179 : }
1180 : fprintf(f, "\n");
1181 : }
1182 : }
1183 : }
1184 :
1185 : /*
1186 : * dumpchr - print a chr
1187 : *
1188 : * Kind of char-centric but works well enough for debug use.
1189 : */
1190 : static void
1191 : dumpchr(chr c,
1192 : FILE *f)
1193 : {
1194 : if (c == '\\')
1195 : fprintf(f, "\\\\");
1196 : else if (c > ' ' && c <= '~')
1197 : putc((char) c, f);
1198 : else
1199 : fprintf(f, "\\u%04lx", (long) c);
1200 : }
1201 :
1202 : #endif /* REG_DEBUG */
|