Line data Source code
1 : /*--------------------------------------------------------------------------
2 : *
3 : * test_regex.c
4 : * Test harness for the regular expression package.
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : * IDENTIFICATION
10 : * src/test/modules/test_regex/test_regex.c
11 : *
12 : * -------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "funcapi.h"
18 : #include "regex/regex.h"
19 : #include "utils/array.h"
20 : #include "utils/builtins.h"
21 :
22 4 : PG_MODULE_MAGIC;
23 :
24 :
25 : /* all the options of interest for regex functions */
26 : typedef struct test_re_flags
27 : {
28 : int cflags; /* compile flags for Spencer's regex code */
29 : int eflags; /* execute flags for Spencer's regex code */
30 : long info; /* expected re_info bits */
31 : bool glob; /* do it globally (for each occurrence) */
32 : bool indices; /* report indices not actual strings */
33 : bool partial; /* expect partial match */
34 : } test_re_flags;
35 :
36 : /* cross-call state for test_regex() */
37 : typedef struct test_regex_ctx
38 : {
39 : test_re_flags re_flags; /* flags */
40 : rm_detail_t details; /* "details" from execution */
41 : text *orig_str; /* data string in original TEXT form */
42 : int nmatches; /* number of places where pattern matched */
43 : int npatterns; /* number of capturing subpatterns */
44 : /* We store start char index and end+1 char index for each match */
45 : /* so the number of entries in match_locs is nmatches * npatterns * 2 */
46 : int *match_locs; /* 0-based character indexes */
47 : int next_match; /* 0-based index of next match to process */
48 : /* workspace for build_test_match_result() */
49 : Datum *elems; /* has npatterns+1 elements */
50 : bool *nulls; /* has npatterns+1 elements */
51 : pg_wchar *wide_str; /* wide-char version of original string */
52 : char *conv_buf; /* conversion buffer, if needed */
53 : int conv_bufsiz; /* size thereof */
54 : } test_regex_ctx;
55 :
56 : /* Local functions */
57 : static void test_re_compile(text *text_re, int cflags, Oid collation,
58 : regex_t *result_re);
59 : static void parse_test_flags(test_re_flags *flags, text *opts);
60 : static test_regex_ctx *setup_test_matches(text *orig_str,
61 : regex_t *cpattern,
62 : test_re_flags *re_flags,
63 : Oid collation,
64 : bool use_subpatterns);
65 : static ArrayType *build_test_info_result(regex_t *cpattern,
66 : test_re_flags *flags);
67 : static ArrayType *build_test_match_result(test_regex_ctx *matchctx);
68 :
69 :
70 : /*
71 : * test_regex(pattern text, string text, flags text) returns setof text[]
72 : *
73 : * This is largely based on regexp.c's regexp_matches, with additions
74 : * for debugging purposes.
75 : */
76 6 : PG_FUNCTION_INFO_V1(test_regex);
77 :
78 : Datum
79 3534 : test_regex(PG_FUNCTION_ARGS)
80 : {
81 : FuncCallContext *funcctx;
82 : test_regex_ctx *matchctx;
83 : ArrayType *result_ary;
84 :
85 3534 : if (SRF_IS_FIRSTCALL())
86 : {
87 1392 : text *pattern = PG_GETARG_TEXT_PP(0);
88 1392 : text *flags = PG_GETARG_TEXT_PP(2);
89 1392 : Oid collation = PG_GET_COLLATION();
90 : test_re_flags re_flags;
91 : regex_t cpattern;
92 : MemoryContext oldcontext;
93 :
94 1392 : funcctx = SRF_FIRSTCALL_INIT();
95 1392 : oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
96 :
97 : /* Determine options */
98 1392 : parse_test_flags(&re_flags, flags);
99 :
100 : /* set up the compiled pattern */
101 1392 : test_re_compile(pattern, re_flags.cflags, collation, &cpattern);
102 :
103 : /* be sure to copy the input string into the multi-call ctx */
104 1180 : matchctx = setup_test_matches(PG_GETARG_TEXT_P_COPY(1), &cpattern,
105 : &re_flags,
106 : collation,
107 : true);
108 :
109 : /* Pre-create workspace that build_test_match_result needs */
110 1180 : matchctx->elems = palloc_array(Datum, matchctx->npatterns + 1);
111 1180 : matchctx->nulls = palloc_array(bool, matchctx->npatterns + 1);
112 :
113 1180 : MemoryContextSwitchTo(oldcontext);
114 1180 : funcctx->user_fctx = matchctx;
115 :
116 : /*
117 : * Return the first result row, which is info equivalent to Tcl's
118 : * "regexp -about" output
119 : */
120 1180 : result_ary = build_test_info_result(&cpattern, &re_flags);
121 :
122 1180 : pg_regfree(&cpattern);
123 :
124 1180 : SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
125 : }
126 : else
127 : {
128 : /* Each subsequent row describes one match */
129 2142 : funcctx = SRF_PERCALL_SETUP();
130 2142 : matchctx = (test_regex_ctx *) funcctx->user_fctx;
131 :
132 2142 : if (matchctx->next_match < matchctx->nmatches)
133 : {
134 962 : result_ary = build_test_match_result(matchctx);
135 962 : matchctx->next_match++;
136 962 : SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
137 : }
138 : }
139 :
140 1180 : SRF_RETURN_DONE(funcctx);
141 : }
142 :
143 :
144 : /*
145 : * test_re_compile - compile a RE
146 : *
147 : * text_re --- the pattern, expressed as a TEXT object
148 : * cflags --- compile options for the pattern
149 : * collation --- collation to use for LC_CTYPE-dependent behavior
150 : * result_re --- output, compiled RE is stored here
151 : *
152 : * Pattern is given in the database encoding. We internally convert to
153 : * an array of pg_wchar, which is what Spencer's regex package wants.
154 : *
155 : * Caller must eventually pg_regfree the resulting RE to avoid memory leaks.
156 : */
157 : static void
158 1392 : test_re_compile(text *text_re, int cflags, Oid collation,
159 : regex_t *result_re)
160 : {
161 1392 : int text_re_len = VARSIZE_ANY_EXHDR(text_re);
162 1392 : char *text_re_val = VARDATA_ANY(text_re);
163 : pg_wchar *pattern;
164 : int pattern_len;
165 : int regcomp_result;
166 : char errMsg[100];
167 :
168 : /* Convert pattern string to wide characters */
169 1392 : pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
170 1392 : pattern_len = pg_mb2wchar_with_len(text_re_val,
171 : pattern,
172 : text_re_len);
173 :
174 1392 : regcomp_result = pg_regcomp(result_re,
175 : pattern,
176 : pattern_len,
177 : cflags,
178 : collation);
179 :
180 1392 : pfree(pattern);
181 :
182 1392 : if (regcomp_result != REG_OKAY)
183 : {
184 : /* re didn't compile (no need for pg_regfree, if so) */
185 212 : pg_regerror(regcomp_result, result_re, errMsg, sizeof(errMsg));
186 212 : ereport(ERROR,
187 : (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
188 : errmsg("invalid regular expression: %s", errMsg)));
189 : }
190 1180 : }
191 :
192 : /*
193 : * test_re_execute - execute a RE on pg_wchar data
194 : *
195 : * Returns true on match, false on no match
196 : * Arguments are as for pg_regexec
197 : */
198 : static bool
199 1180 : test_re_execute(regex_t *re, pg_wchar *data, int data_len,
200 : int start_search,
201 : rm_detail_t *details,
202 : int nmatch, regmatch_t *pmatch,
203 : int eflags)
204 : {
205 : int regexec_result;
206 : char errMsg[100];
207 :
208 : /* Initialize match locations in case engine doesn't */
209 1180 : details->rm_extend.rm_so = -1;
210 1180 : details->rm_extend.rm_eo = -1;
211 2932 : for (int i = 0; i < nmatch; i++)
212 : {
213 1752 : pmatch[i].rm_so = -1;
214 1752 : pmatch[i].rm_eo = -1;
215 : }
216 :
217 : /* Perform RE match and return result */
218 1180 : regexec_result = pg_regexec(re,
219 : data,
220 : data_len,
221 : start_search,
222 : details,
223 : nmatch,
224 : pmatch,
225 : eflags);
226 :
227 1180 : if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
228 : {
229 : /* re failed??? */
230 0 : pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
231 0 : ereport(ERROR,
232 : (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
233 : errmsg("regular expression failed: %s", errMsg)));
234 : }
235 :
236 1180 : return (regexec_result == REG_OKAY);
237 : }
238 :
239 :
240 : /*
241 : * parse_test_flags - parse the flags argument
242 : *
243 : * flags --- output argument, filled with desired options
244 : * opts --- TEXT object, or NULL for defaults
245 : */
246 : static void
247 1392 : parse_test_flags(test_re_flags *flags, text *opts)
248 : {
249 : /* these defaults must match Tcl's */
250 1392 : int cflags = REG_ADVANCED;
251 1392 : int eflags = 0;
252 1392 : long info = 0;
253 :
254 1392 : flags->glob = false;
255 1392 : flags->indices = false;
256 1392 : flags->partial = false;
257 :
258 1392 : if (opts)
259 : {
260 1392 : char *opt_p = VARDATA_ANY(opts);
261 1392 : int opt_len = VARSIZE_ANY_EXHDR(opts);
262 : int i;
263 :
264 3782 : for (i = 0; i < opt_len; i++)
265 : {
266 2390 : switch (opt_p[i])
267 : {
268 156 : case '-':
269 : /* allowed, no-op */
270 156 : break;
271 14 : case '!':
272 14 : flags->partial = true;
273 14 : break;
274 2 : case '*':
275 : /* test requires Unicode --- ignored here */
276 2 : break;
277 106 : case '0':
278 106 : flags->indices = true;
279 106 : break;
280 :
281 : /* These flags correspond to user-exposed RE options: */
282 0 : case 'g': /* global match */
283 0 : flags->glob = true;
284 0 : break;
285 40 : case 'i': /* case insensitive */
286 40 : cflags |= REG_ICASE;
287 40 : break;
288 70 : case 'n': /* \n affects ^ $ . [^ */
289 70 : cflags |= REG_NEWLINE;
290 70 : break;
291 4 : case 'p': /* ~Perl, \n affects . [^ */
292 4 : cflags |= REG_NLSTOP;
293 4 : cflags &= ~REG_NLANCH;
294 4 : break;
295 4 : case 'w': /* weird, \n affects ^ $ only */
296 4 : cflags &= ~REG_NLSTOP;
297 4 : cflags |= REG_NLANCH;
298 4 : break;
299 28 : case 'x': /* expanded syntax */
300 28 : cflags |= REG_EXPANDED;
301 28 : break;
302 :
303 : /* These flags correspond to Tcl's -xflags options: */
304 4 : case 'a':
305 4 : cflags |= REG_ADVF;
306 4 : break;
307 262 : case 'b':
308 262 : cflags &= ~REG_ADVANCED;
309 262 : break;
310 22 : case 'c':
311 :
312 : /*
313 : * Tcl calls this TCL_REG_CANMATCH, but it's really
314 : * REG_EXPECT. In this implementation we must also set
315 : * the partial and indices flags, so that
316 : * setup_test_matches and build_test_match_result will
317 : * emit the desired data. (They'll emit more fields than
318 : * Tcl would, but that's fine.)
319 : */
320 22 : cflags |= REG_EXPECT;
321 22 : flags->partial = true;
322 22 : flags->indices = true;
323 22 : break;
324 20 : case 'e':
325 20 : cflags &= ~REG_ADVANCED;
326 20 : cflags |= REG_EXTENDED;
327 20 : break;
328 12 : case 'q':
329 12 : cflags &= ~REG_ADVANCED;
330 12 : cflags |= REG_QUOTE;
331 12 : break;
332 4 : case 'o': /* o for opaque */
333 4 : cflags |= REG_NOSUB;
334 4 : break;
335 4 : case 's': /* s for start */
336 4 : cflags |= REG_BOSONLY;
337 4 : break;
338 12 : case '+':
339 12 : cflags |= REG_FAKE;
340 12 : break;
341 0 : case ',':
342 0 : cflags |= REG_PROGRESS;
343 0 : break;
344 0 : case '.':
345 0 : cflags |= REG_DUMP;
346 0 : break;
347 0 : case ':':
348 0 : eflags |= REG_MTRACE;
349 0 : break;
350 0 : case ';':
351 0 : eflags |= REG_FTRACE;
352 0 : break;
353 12 : case '^':
354 12 : eflags |= REG_NOTBOL;
355 12 : break;
356 8 : case '$':
357 8 : eflags |= REG_NOTEOL;
358 8 : break;
359 34 : case 't':
360 34 : cflags |= REG_EXPECT;
361 34 : break;
362 10 : case '%':
363 10 : eflags |= REG_SMALL;
364 10 : break;
365 :
366 : /* These flags define expected info bits: */
367 10 : case 'A':
368 10 : info |= REG_UBSALNUM;
369 10 : break;
370 8 : case 'B':
371 8 : info |= REG_UBRACES;
372 8 : break;
373 84 : case 'E':
374 84 : info |= REG_UBBS;
375 84 : break;
376 68 : case 'H':
377 68 : info |= REG_ULOOKAROUND;
378 68 : break;
379 22 : case 'I':
380 22 : info |= REG_UIMPOSSIBLE;
381 22 : break;
382 328 : case 'L':
383 328 : info |= REG_ULOCALE;
384 328 : break;
385 86 : case 'M':
386 86 : info |= REG_UUNPORT;
387 86 : break;
388 94 : case 'N':
389 94 : info |= REG_UEMPTYMATCH;
390 94 : break;
391 614 : case 'P':
392 614 : info |= REG_UNONPOSIX;
393 614 : break;
394 72 : case 'Q':
395 72 : info |= REG_UBOUNDS;
396 72 : break;
397 84 : case 'R':
398 84 : info |= REG_UBACKREF;
399 84 : break;
400 50 : case 'S':
401 50 : info |= REG_UUNSPEC;
402 50 : break;
403 40 : case 'T':
404 40 : info |= REG_USHORTEST;
405 40 : break;
406 2 : case 'U':
407 2 : info |= REG_UPBOTCH;
408 2 : break;
409 :
410 0 : default:
411 0 : ereport(ERROR,
412 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
413 : errmsg("invalid regular expression test option: \"%.*s\"",
414 : pg_mblen_range(opt_p + i, opt_p + opt_len),
415 : opt_p + i)));
416 : break;
417 : }
418 : }
419 : }
420 1392 : flags->cflags = cflags;
421 1392 : flags->eflags = eflags;
422 1392 : flags->info = info;
423 1392 : }
424 :
425 : /*
426 : * setup_test_matches --- do the initial matching
427 : *
428 : * To simplify memory management, we do all the matching in one swoop.
429 : * The returned test_regex_ctx contains the locations of all the substrings
430 : * matching the pattern.
431 : */
432 : static test_regex_ctx *
433 1180 : setup_test_matches(text *orig_str,
434 : regex_t *cpattern, test_re_flags *re_flags,
435 : Oid collation,
436 : bool use_subpatterns)
437 : {
438 1180 : test_regex_ctx *matchctx = palloc0_object(test_regex_ctx);
439 1180 : int eml = pg_database_encoding_max_length();
440 : int orig_len;
441 : pg_wchar *wide_str;
442 : int wide_len;
443 : regmatch_t *pmatch;
444 : int pmatch_len;
445 : int array_len;
446 : int array_idx;
447 : int prev_match_end;
448 : int start_search;
449 1180 : int maxlen = 0; /* largest fetch length in characters */
450 :
451 : /* save flags */
452 1180 : matchctx->re_flags = *re_flags;
453 :
454 : /* save original string --- we'll extract result substrings from it */
455 1180 : matchctx->orig_str = orig_str;
456 :
457 : /* convert string to pg_wchar form for matching */
458 1180 : orig_len = VARSIZE_ANY_EXHDR(orig_str);
459 1180 : wide_str = palloc_array(pg_wchar, orig_len + 1);
460 1180 : wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len);
461 :
462 : /* do we want to remember subpatterns? */
463 1180 : if (use_subpatterns && cpattern->re_nsub > 0)
464 : {
465 254 : matchctx->npatterns = cpattern->re_nsub + 1;
466 254 : pmatch_len = cpattern->re_nsub + 1;
467 : }
468 : else
469 : {
470 926 : use_subpatterns = false;
471 926 : matchctx->npatterns = 1;
472 926 : pmatch_len = 1;
473 : }
474 :
475 : /* temporary output space for RE package */
476 1180 : pmatch = palloc_array(regmatch_t, pmatch_len);
477 :
478 : /*
479 : * the real output space (grown dynamically if needed)
480 : *
481 : * use values 2^n-1, not 2^n, so that we hit the limit at 2^28-1 rather
482 : * than at 2^27
483 : */
484 1180 : array_len = re_flags->glob ? 255 : 31;
485 1180 : matchctx->match_locs = palloc_array(int, array_len);
486 1180 : array_idx = 0;
487 :
488 : /* search for the pattern, perhaps repeatedly */
489 1180 : prev_match_end = 0;
490 1180 : start_search = 0;
491 1180 : while (test_re_execute(cpattern, wide_str, wide_len,
492 : start_search,
493 : &matchctx->details,
494 : pmatch_len, pmatch,
495 : re_flags->eflags))
496 : {
497 : /* enlarge output space if needed */
498 926 : while (array_idx + matchctx->npatterns * 2 + 1 > array_len)
499 : {
500 0 : array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */
501 0 : if (array_len > MaxAllocSize / sizeof(int))
502 0 : ereport(ERROR,
503 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
504 : errmsg("too many regular expression matches")));
505 0 : matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
506 : sizeof(int) * array_len);
507 : }
508 :
509 : /* save this match's locations */
510 2188 : for (int i = 0; i < matchctx->npatterns; i++)
511 : {
512 1262 : int so = pmatch[i].rm_so;
513 1262 : int eo = pmatch[i].rm_eo;
514 :
515 1262 : matchctx->match_locs[array_idx++] = so;
516 1262 : matchctx->match_locs[array_idx++] = eo;
517 1262 : if (so >= 0 && eo >= 0 && (eo - so) > maxlen)
518 876 : maxlen = (eo - so);
519 : }
520 926 : matchctx->nmatches++;
521 926 : prev_match_end = pmatch[0].rm_eo;
522 :
523 : /* if not glob, stop after one match */
524 926 : if (!re_flags->glob)
525 926 : break;
526 :
527 : /*
528 : * Advance search position. Normally we start the next search at the
529 : * end of the previous match; but if the match was of zero length, we
530 : * have to advance by one character, or we'd just find the same match
531 : * again.
532 : */
533 0 : start_search = prev_match_end;
534 0 : if (pmatch[0].rm_so == pmatch[0].rm_eo)
535 0 : start_search++;
536 0 : if (start_search > wide_len)
537 0 : break;
538 : }
539 :
540 : /*
541 : * If we had no match, but "partial" and "indices" are set, emit the
542 : * details.
543 : */
544 1180 : if (matchctx->nmatches == 0 && re_flags->partial && re_flags->indices)
545 : {
546 : /* enlarge output space if needed */
547 36 : while (array_idx + matchctx->npatterns * 2 + 1 > array_len)
548 : {
549 0 : array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */
550 0 : if (array_len > MaxAllocSize / sizeof(int))
551 0 : ereport(ERROR,
552 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
553 : errmsg("too many regular expression matches")));
554 0 : matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
555 : sizeof(int) * array_len);
556 : }
557 :
558 36 : matchctx->match_locs[array_idx++] = matchctx->details.rm_extend.rm_so;
559 36 : matchctx->match_locs[array_idx++] = matchctx->details.rm_extend.rm_eo;
560 : /* we don't have pmatch data, so emit -1 */
561 40 : for (int i = 1; i < matchctx->npatterns; i++)
562 : {
563 4 : matchctx->match_locs[array_idx++] = -1;
564 4 : matchctx->match_locs[array_idx++] = -1;
565 : }
566 36 : matchctx->nmatches++;
567 : }
568 :
569 : Assert(array_idx <= array_len);
570 :
571 1180 : if (eml > 1)
572 : {
573 1180 : int64 maxsiz = eml * (int64) maxlen;
574 : int conv_bufsiz;
575 :
576 : /*
577 : * Make the conversion buffer large enough for any substring of
578 : * interest.
579 : *
580 : * Worst case: assume we need the maximum size (maxlen*eml), but take
581 : * advantage of the fact that the original string length in bytes is
582 : * an upper bound on the byte length of any fetched substring (and we
583 : * know that len+1 is safe to allocate because the varlena header is
584 : * longer than 1 byte).
585 : */
586 1180 : if (maxsiz > orig_len)
587 830 : conv_bufsiz = orig_len + 1;
588 : else
589 350 : conv_bufsiz = maxsiz + 1; /* safe since maxsiz < 2^30 */
590 :
591 1180 : matchctx->conv_buf = palloc(conv_bufsiz);
592 1180 : matchctx->conv_bufsiz = conv_bufsiz;
593 1180 : matchctx->wide_str = wide_str;
594 : }
595 : else
596 : {
597 : /* No need to keep the wide string if we're in a single-byte charset. */
598 0 : pfree(wide_str);
599 0 : matchctx->wide_str = NULL;
600 0 : matchctx->conv_buf = NULL;
601 0 : matchctx->conv_bufsiz = 0;
602 : }
603 :
604 : /* Clean up temp storage */
605 1180 : pfree(pmatch);
606 :
607 1180 : return matchctx;
608 : }
609 :
610 : /*
611 : * build_test_info_result - build output array describing compiled regexp
612 : *
613 : * This borrows some code from Tcl's TclRegAbout().
614 : */
615 : static ArrayType *
616 1180 : build_test_info_result(regex_t *cpattern, test_re_flags *flags)
617 : {
618 : /* Translation data for flag bits in regex_t.re_info */
619 : struct infoname
620 : {
621 : int bit;
622 : const char *text;
623 : };
624 : static const struct infoname infonames[] = {
625 : {REG_UBACKREF, "REG_UBACKREF"},
626 : {REG_ULOOKAROUND, "REG_ULOOKAROUND"},
627 : {REG_UBOUNDS, "REG_UBOUNDS"},
628 : {REG_UBRACES, "REG_UBRACES"},
629 : {REG_UBSALNUM, "REG_UBSALNUM"},
630 : {REG_UPBOTCH, "REG_UPBOTCH"},
631 : {REG_UBBS, "REG_UBBS"},
632 : {REG_UNONPOSIX, "REG_UNONPOSIX"},
633 : {REG_UUNSPEC, "REG_UUNSPEC"},
634 : {REG_UUNPORT, "REG_UUNPORT"},
635 : {REG_ULOCALE, "REG_ULOCALE"},
636 : {REG_UEMPTYMATCH, "REG_UEMPTYMATCH"},
637 : {REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"},
638 : {REG_USHORTEST, "REG_USHORTEST"},
639 : {0, NULL}
640 : };
641 : const struct infoname *inf;
642 : Datum elems[lengthof(infonames) + 1];
643 1180 : int nresults = 0;
644 : char buf[80];
645 : int dims[1];
646 : int lbs[1];
647 :
648 : /* Set up results: first, the number of subexpressions */
649 1180 : snprintf(buf, sizeof(buf), "%d", (int) cpattern->re_nsub);
650 1180 : elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
651 :
652 : /* Report individual info bit states */
653 17700 : for (inf = infonames; inf->bit != 0; inf++)
654 : {
655 16520 : if (cpattern->re_info & inf->bit)
656 : {
657 1516 : if (flags->info & inf->bit)
658 1516 : elems[nresults++] = PointerGetDatum(cstring_to_text(inf->text));
659 : else
660 : {
661 0 : snprintf(buf, sizeof(buf), "unexpected %s!", inf->text);
662 0 : elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
663 : }
664 : }
665 : else
666 : {
667 15004 : if (flags->info & inf->bit)
668 : {
669 0 : snprintf(buf, sizeof(buf), "missing %s!", inf->text);
670 0 : elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
671 : }
672 : }
673 : }
674 :
675 : /* And form an array */
676 1180 : dims[0] = nresults;
677 1180 : lbs[0] = 1;
678 : /* XXX: this hardcodes assumptions about the text type */
679 1180 : return construct_md_array(elems, NULL, 1, dims, lbs,
680 : TEXTOID, -1, false, TYPALIGN_INT);
681 : }
682 :
683 : /*
684 : * build_test_match_result - build output array for current match
685 : *
686 : * Note that if the indices flag is set, we don't need any strings,
687 : * just the location data.
688 : */
689 : static ArrayType *
690 962 : build_test_match_result(test_regex_ctx *matchctx)
691 : {
692 962 : char *buf = matchctx->conv_buf;
693 962 : Datum *elems = matchctx->elems;
694 962 : bool *nulls = matchctx->nulls;
695 962 : bool indices = matchctx->re_flags.indices;
696 : char bufstr[80];
697 : int dims[1];
698 : int lbs[1];
699 : int loc;
700 : int i;
701 :
702 : /* Extract matching substrings from the original string */
703 962 : loc = matchctx->next_match * matchctx->npatterns * 2;
704 2264 : for (i = 0; i < matchctx->npatterns; i++)
705 : {
706 1302 : int so = matchctx->match_locs[loc++];
707 1302 : int eo = matchctx->match_locs[loc++];
708 :
709 1302 : if (indices)
710 : {
711 : /* Report eo this way for consistency with Tcl */
712 168 : snprintf(bufstr, sizeof(bufstr), "%d %d",
713 : so, so < 0 ? eo : eo - 1);
714 168 : elems[i] = PointerGetDatum(cstring_to_text(bufstr));
715 168 : nulls[i] = false;
716 : }
717 1134 : else if (so < 0 || eo < 0)
718 : {
719 24 : elems[i] = (Datum) 0;
720 24 : nulls[i] = true;
721 : }
722 1110 : else if (buf)
723 : {
724 1110 : int len = pg_wchar2mb_with_len(matchctx->wide_str + so,
725 : buf,
726 : eo - so);
727 :
728 : Assert(len < matchctx->conv_bufsiz);
729 1110 : elems[i] = PointerGetDatum(cstring_to_text_with_len(buf, len));
730 1110 : nulls[i] = false;
731 : }
732 : else
733 : {
734 0 : elems[i] = DirectFunctionCall3(text_substr,
735 : PointerGetDatum(matchctx->orig_str),
736 : Int32GetDatum(so + 1),
737 : Int32GetDatum(eo - so));
738 0 : nulls[i] = false;
739 : }
740 : }
741 :
742 : /* In EXPECT indices mode, also report the "details" */
743 962 : if (indices && (matchctx->re_flags.cflags & REG_EXPECT))
744 : {
745 56 : int so = matchctx->details.rm_extend.rm_so;
746 56 : int eo = matchctx->details.rm_extend.rm_eo;
747 :
748 56 : snprintf(bufstr, sizeof(bufstr), "%d %d",
749 : so, so < 0 ? eo : eo - 1);
750 56 : elems[i] = PointerGetDatum(cstring_to_text(bufstr));
751 56 : nulls[i] = false;
752 56 : i++;
753 : }
754 :
755 : /* And form an array */
756 962 : dims[0] = i;
757 962 : lbs[0] = 1;
758 : /* XXX: this hardcodes assumptions about the text type */
759 962 : return construct_md_array(elems, nulls, 1, dims, lbs,
760 : TEXTOID, -1, false, TYPALIGN_INT);
761 : }
|