Line data Source code
1 : /*--------------------------------------------------------------------------
2 : *
3 : * test_regex.c
4 : * Test harness for the regular expression package.
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : * IDENTIFICATION
10 : * src/test/modules/test_regex/test_regex.c
11 : *
12 : * -------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "catalog/pg_type_d.h"
18 : #include "funcapi.h"
19 : #include "regex/regex.h"
20 : #include "utils/array.h"
21 : #include "utils/builtins.h"
22 :
23 2 : PG_MODULE_MAGIC;
24 :
25 :
26 : /* all the options of interest for regex functions */
27 : typedef struct test_re_flags
28 : {
29 : int cflags; /* compile flags for Spencer's regex code */
30 : int eflags; /* execute flags for Spencer's regex code */
31 : long info; /* expected re_info bits */
32 : bool glob; /* do it globally (for each occurrence) */
33 : bool indices; /* report indices not actual strings */
34 : bool partial; /* expect partial match */
35 : } test_re_flags;
36 :
37 : /* cross-call state for test_regex() */
38 : typedef struct test_regex_ctx
39 : {
40 : test_re_flags re_flags; /* flags */
41 : rm_detail_t details; /* "details" from execution */
42 : text *orig_str; /* data string in original TEXT form */
43 : int nmatches; /* number of places where pattern matched */
44 : int npatterns; /* number of capturing subpatterns */
45 : /* We store start char index and end+1 char index for each match */
46 : /* so the number of entries in match_locs is nmatches * npatterns * 2 */
47 : int *match_locs; /* 0-based character indexes */
48 : int next_match; /* 0-based index of next match to process */
49 : /* workspace for build_test_match_result() */
50 : Datum *elems; /* has npatterns+1 elements */
51 : bool *nulls; /* has npatterns+1 elements */
52 : pg_wchar *wide_str; /* wide-char version of original string */
53 : char *conv_buf; /* conversion buffer, if needed */
54 : int conv_bufsiz; /* size thereof */
55 : } test_regex_ctx;
56 :
57 : /* Local functions */
58 : static void test_re_compile(text *text_re, int cflags, Oid collation,
59 : regex_t *result_re);
60 : static void parse_test_flags(test_re_flags *flags, text *opts);
61 : static test_regex_ctx *setup_test_matches(text *orig_str,
62 : regex_t *cpattern,
63 : test_re_flags *re_flags,
64 : Oid collation,
65 : bool use_subpatterns);
66 : static ArrayType *build_test_info_result(regex_t *cpattern,
67 : test_re_flags *flags);
68 : static ArrayType *build_test_match_result(test_regex_ctx *matchctx);
69 :
70 :
71 : /*
72 : * test_regex(pattern text, string text, flags text) returns setof text[]
73 : *
74 : * This is largely based on regexp.c's regexp_matches, with additions
75 : * for debugging purposes.
76 : */
77 3 : PG_FUNCTION_INFO_V1(test_regex);
78 :
79 : Datum
80 1767 : test_regex(PG_FUNCTION_ARGS)
81 : {
82 : FuncCallContext *funcctx;
83 : test_regex_ctx *matchctx;
84 : ArrayType *result_ary;
85 :
86 1767 : if (SRF_IS_FIRSTCALL())
87 : {
88 696 : text *pattern = PG_GETARG_TEXT_PP(0);
89 696 : text *flags = PG_GETARG_TEXT_PP(2);
90 696 : Oid collation = PG_GET_COLLATION();
91 : test_re_flags re_flags;
92 : regex_t cpattern;
93 : MemoryContext oldcontext;
94 :
95 696 : funcctx = SRF_FIRSTCALL_INIT();
96 696 : oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
97 :
98 : /* Determine options */
99 696 : parse_test_flags(&re_flags, flags);
100 :
101 : /* set up the compiled pattern */
102 696 : test_re_compile(pattern, re_flags.cflags, collation, &cpattern);
103 :
104 : /* be sure to copy the input string into the multi-call ctx */
105 590 : matchctx = setup_test_matches(PG_GETARG_TEXT_P_COPY(1), &cpattern,
106 : &re_flags,
107 : collation,
108 : true);
109 :
110 : /* Pre-create workspace that build_test_match_result needs */
111 590 : matchctx->elems = palloc_array(Datum, matchctx->npatterns + 1);
112 590 : matchctx->nulls = palloc_array(bool, matchctx->npatterns + 1);
113 :
114 590 : MemoryContextSwitchTo(oldcontext);
115 590 : funcctx->user_fctx = matchctx;
116 :
117 : /*
118 : * Return the first result row, which is info equivalent to Tcl's
119 : * "regexp -about" output
120 : */
121 590 : result_ary = build_test_info_result(&cpattern, &re_flags);
122 :
123 590 : pg_regfree(&cpattern);
124 :
125 590 : SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
126 : }
127 : else
128 : {
129 : /* Each subsequent row describes one match */
130 1071 : funcctx = SRF_PERCALL_SETUP();
131 1071 : matchctx = (test_regex_ctx *) funcctx->user_fctx;
132 :
133 1071 : if (matchctx->next_match < matchctx->nmatches)
134 : {
135 481 : result_ary = build_test_match_result(matchctx);
136 481 : matchctx->next_match++;
137 481 : SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
138 : }
139 : }
140 :
141 590 : SRF_RETURN_DONE(funcctx);
142 : }
143 :
144 :
145 : /*
146 : * test_re_compile - compile a RE
147 : *
148 : * text_re --- the pattern, expressed as a TEXT object
149 : * cflags --- compile options for the pattern
150 : * collation --- collation to use for LC_CTYPE-dependent behavior
151 : * result_re --- output, compiled RE is stored here
152 : *
153 : * Pattern is given in the database encoding. We internally convert to
154 : * an array of pg_wchar, which is what Spencer's regex package wants.
155 : *
156 : * Caller must eventually pg_regfree the resulting RE to avoid memory leaks.
157 : */
158 : static void
159 696 : test_re_compile(text *text_re, int cflags, Oid collation,
160 : regex_t *result_re)
161 : {
162 696 : int text_re_len = VARSIZE_ANY_EXHDR(text_re);
163 696 : char *text_re_val = VARDATA_ANY(text_re);
164 : pg_wchar *pattern;
165 : int pattern_len;
166 : int regcomp_result;
167 : char errMsg[100];
168 :
169 : /* Convert pattern string to wide characters */
170 696 : pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
171 696 : pattern_len = pg_mb2wchar_with_len(text_re_val,
172 : pattern,
173 : text_re_len);
174 :
175 696 : regcomp_result = pg_regcomp(result_re,
176 : pattern,
177 : pattern_len,
178 : cflags,
179 : collation);
180 :
181 696 : pfree(pattern);
182 :
183 696 : if (regcomp_result != REG_OKAY)
184 : {
185 : /* re didn't compile (no need for pg_regfree, if so) */
186 106 : pg_regerror(regcomp_result, result_re, errMsg, sizeof(errMsg));
187 106 : ereport(ERROR,
188 : (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
189 : errmsg("invalid regular expression: %s", errMsg)));
190 : }
191 590 : }
192 :
193 : /*
194 : * test_re_execute - execute a RE on pg_wchar data
195 : *
196 : * Returns true on match, false on no match
197 : * Arguments are as for pg_regexec
198 : */
199 : static bool
200 590 : test_re_execute(regex_t *re, pg_wchar *data, int data_len,
201 : int start_search,
202 : rm_detail_t *details,
203 : int nmatch, regmatch_t *pmatch,
204 : int eflags)
205 : {
206 : int regexec_result;
207 : char errMsg[100];
208 :
209 : /* Initialize match locations in case engine doesn't */
210 590 : details->rm_extend.rm_so = -1;
211 590 : details->rm_extend.rm_eo = -1;
212 1466 : for (int i = 0; i < nmatch; i++)
213 : {
214 876 : pmatch[i].rm_so = -1;
215 876 : pmatch[i].rm_eo = -1;
216 : }
217 :
218 : /* Perform RE match and return result */
219 590 : regexec_result = pg_regexec(re,
220 : data,
221 : data_len,
222 : start_search,
223 : details,
224 : nmatch,
225 : pmatch,
226 : eflags);
227 :
228 590 : if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
229 : {
230 : /* re failed??? */
231 0 : pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
232 0 : ereport(ERROR,
233 : (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
234 : errmsg("regular expression failed: %s", errMsg)));
235 : }
236 :
237 590 : return (regexec_result == REG_OKAY);
238 : }
239 :
240 :
241 : /*
242 : * parse_test_flags - parse the flags argument
243 : *
244 : * flags --- output argument, filled with desired options
245 : * opts --- TEXT object, or NULL for defaults
246 : */
247 : static void
248 696 : parse_test_flags(test_re_flags *flags, text *opts)
249 : {
250 : /* these defaults must match Tcl's */
251 696 : int cflags = REG_ADVANCED;
252 696 : int eflags = 0;
253 696 : long info = 0;
254 :
255 696 : flags->glob = false;
256 696 : flags->indices = false;
257 696 : flags->partial = false;
258 :
259 696 : if (opts)
260 : {
261 696 : char *opt_p = VARDATA_ANY(opts);
262 696 : int opt_len = VARSIZE_ANY_EXHDR(opts);
263 : int i;
264 :
265 1891 : for (i = 0; i < opt_len; i++)
266 : {
267 1195 : switch (opt_p[i])
268 : {
269 78 : case '-':
270 : /* allowed, no-op */
271 78 : break;
272 7 : case '!':
273 7 : flags->partial = true;
274 7 : break;
275 1 : case '*':
276 : /* test requires Unicode --- ignored here */
277 1 : break;
278 53 : case '0':
279 53 : flags->indices = true;
280 53 : break;
281 :
282 : /* These flags correspond to user-exposed RE options: */
283 0 : case 'g': /* global match */
284 0 : flags->glob = true;
285 0 : break;
286 20 : case 'i': /* case insensitive */
287 20 : cflags |= REG_ICASE;
288 20 : break;
289 35 : case 'n': /* \n affects ^ $ . [^ */
290 35 : cflags |= REG_NEWLINE;
291 35 : break;
292 2 : case 'p': /* ~Perl, \n affects . [^ */
293 2 : cflags |= REG_NLSTOP;
294 2 : cflags &= ~REG_NLANCH;
295 2 : break;
296 2 : case 'w': /* weird, \n affects ^ $ only */
297 2 : cflags &= ~REG_NLSTOP;
298 2 : cflags |= REG_NLANCH;
299 2 : break;
300 14 : case 'x': /* expanded syntax */
301 14 : cflags |= REG_EXPANDED;
302 14 : break;
303 :
304 : /* These flags correspond to Tcl's -xflags options: */
305 2 : case 'a':
306 2 : cflags |= REG_ADVF;
307 2 : break;
308 131 : case 'b':
309 131 : cflags &= ~REG_ADVANCED;
310 131 : break;
311 11 : case 'c':
312 :
313 : /*
314 : * Tcl calls this TCL_REG_CANMATCH, but it's really
315 : * REG_EXPECT. In this implementation we must also set
316 : * the partial and indices flags, so that
317 : * setup_test_matches and build_test_match_result will
318 : * emit the desired data. (They'll emit more fields than
319 : * Tcl would, but that's fine.)
320 : */
321 11 : cflags |= REG_EXPECT;
322 11 : flags->partial = true;
323 11 : flags->indices = true;
324 11 : break;
325 10 : case 'e':
326 10 : cflags &= ~REG_ADVANCED;
327 10 : cflags |= REG_EXTENDED;
328 10 : break;
329 6 : case 'q':
330 6 : cflags &= ~REG_ADVANCED;
331 6 : cflags |= REG_QUOTE;
332 6 : break;
333 2 : case 'o': /* o for opaque */
334 2 : cflags |= REG_NOSUB;
335 2 : break;
336 2 : case 's': /* s for start */
337 2 : cflags |= REG_BOSONLY;
338 2 : break;
339 6 : case '+':
340 6 : cflags |= REG_FAKE;
341 6 : break;
342 0 : case ',':
343 0 : cflags |= REG_PROGRESS;
344 0 : break;
345 0 : case '.':
346 0 : cflags |= REG_DUMP;
347 0 : break;
348 0 : case ':':
349 0 : eflags |= REG_MTRACE;
350 0 : break;
351 0 : case ';':
352 0 : eflags |= REG_FTRACE;
353 0 : break;
354 6 : case '^':
355 6 : eflags |= REG_NOTBOL;
356 6 : break;
357 4 : case '$':
358 4 : eflags |= REG_NOTEOL;
359 4 : break;
360 17 : case 't':
361 17 : cflags |= REG_EXPECT;
362 17 : break;
363 5 : case '%':
364 5 : eflags |= REG_SMALL;
365 5 : break;
366 :
367 : /* These flags define expected info bits: */
368 5 : case 'A':
369 5 : info |= REG_UBSALNUM;
370 5 : break;
371 4 : case 'B':
372 4 : info |= REG_UBRACES;
373 4 : break;
374 42 : case 'E':
375 42 : info |= REG_UBBS;
376 42 : break;
377 34 : case 'H':
378 34 : info |= REG_ULOOKAROUND;
379 34 : break;
380 11 : case 'I':
381 11 : info |= REG_UIMPOSSIBLE;
382 11 : break;
383 164 : case 'L':
384 164 : info |= REG_ULOCALE;
385 164 : break;
386 43 : case 'M':
387 43 : info |= REG_UUNPORT;
388 43 : break;
389 47 : case 'N':
390 47 : info |= REG_UEMPTYMATCH;
391 47 : break;
392 307 : case 'P':
393 307 : info |= REG_UNONPOSIX;
394 307 : break;
395 36 : case 'Q':
396 36 : info |= REG_UBOUNDS;
397 36 : break;
398 42 : case 'R':
399 42 : info |= REG_UBACKREF;
400 42 : break;
401 25 : case 'S':
402 25 : info |= REG_UUNSPEC;
403 25 : break;
404 20 : case 'T':
405 20 : info |= REG_USHORTEST;
406 20 : break;
407 1 : case 'U':
408 1 : info |= REG_UPBOTCH;
409 1 : break;
410 :
411 0 : default:
412 0 : ereport(ERROR,
413 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
414 : errmsg("invalid regular expression test option: \"%.*s\"",
415 : pg_mblen_range(opt_p + i, opt_p + opt_len),
416 : opt_p + i)));
417 : break;
418 : }
419 : }
420 : }
421 696 : flags->cflags = cflags;
422 696 : flags->eflags = eflags;
423 696 : flags->info = info;
424 696 : }
425 :
426 : /*
427 : * setup_test_matches --- do the initial matching
428 : *
429 : * To simplify memory management, we do all the matching in one swoop.
430 : * The returned test_regex_ctx contains the locations of all the substrings
431 : * matching the pattern.
432 : */
433 : static test_regex_ctx *
434 590 : setup_test_matches(text *orig_str,
435 : regex_t *cpattern, test_re_flags *re_flags,
436 : Oid collation,
437 : bool use_subpatterns)
438 : {
439 590 : test_regex_ctx *matchctx = palloc0_object(test_regex_ctx);
440 590 : int eml = pg_database_encoding_max_length();
441 : int orig_len;
442 : pg_wchar *wide_str;
443 : int wide_len;
444 : regmatch_t *pmatch;
445 : int pmatch_len;
446 : int array_len;
447 : int array_idx;
448 : int prev_match_end;
449 : int start_search;
450 590 : int maxlen = 0; /* largest fetch length in characters */
451 :
452 : /* save flags */
453 590 : matchctx->re_flags = *re_flags;
454 :
455 : /* save original string --- we'll extract result substrings from it */
456 590 : matchctx->orig_str = orig_str;
457 :
458 : /* convert string to pg_wchar form for matching */
459 590 : orig_len = VARSIZE_ANY_EXHDR(orig_str);
460 590 : wide_str = palloc_array(pg_wchar, orig_len + 1);
461 590 : wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len);
462 :
463 : /* do we want to remember subpatterns? */
464 590 : if (use_subpatterns && cpattern->re_nsub > 0)
465 : {
466 127 : matchctx->npatterns = cpattern->re_nsub + 1;
467 127 : pmatch_len = cpattern->re_nsub + 1;
468 : }
469 : else
470 : {
471 463 : use_subpatterns = false;
472 463 : matchctx->npatterns = 1;
473 463 : pmatch_len = 1;
474 : }
475 :
476 : /* temporary output space for RE package */
477 590 : pmatch = palloc_array(regmatch_t, pmatch_len);
478 :
479 : /*
480 : * the real output space (grown dynamically if needed)
481 : *
482 : * use values 2^n-1, not 2^n, so that we hit the limit at 2^28-1 rather
483 : * than at 2^27
484 : */
485 590 : array_len = re_flags->glob ? 255 : 31;
486 590 : matchctx->match_locs = palloc_array(int, array_len);
487 590 : array_idx = 0;
488 :
489 : /* search for the pattern, perhaps repeatedly */
490 590 : prev_match_end = 0;
491 590 : start_search = 0;
492 590 : while (test_re_execute(cpattern, wide_str, wide_len,
493 : start_search,
494 : &matchctx->details,
495 : pmatch_len, pmatch,
496 : re_flags->eflags))
497 : {
498 : /* enlarge output space if needed */
499 463 : while (array_idx + matchctx->npatterns * 2 + 1 > array_len)
500 : {
501 0 : array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */
502 0 : if (array_len > MaxAllocSize / sizeof(int))
503 0 : ereport(ERROR,
504 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
505 : errmsg("too many regular expression matches")));
506 0 : matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
507 : sizeof(int) * array_len);
508 : }
509 :
510 : /* save this match's locations */
511 1094 : for (int i = 0; i < matchctx->npatterns; i++)
512 : {
513 631 : int so = pmatch[i].rm_so;
514 631 : int eo = pmatch[i].rm_eo;
515 :
516 631 : matchctx->match_locs[array_idx++] = so;
517 631 : matchctx->match_locs[array_idx++] = eo;
518 631 : if (so >= 0 && eo >= 0 && (eo - so) > maxlen)
519 438 : maxlen = (eo - so);
520 : }
521 463 : matchctx->nmatches++;
522 463 : prev_match_end = pmatch[0].rm_eo;
523 :
524 : /* if not glob, stop after one match */
525 463 : if (!re_flags->glob)
526 463 : break;
527 :
528 : /*
529 : * Advance search position. Normally we start the next search at the
530 : * end of the previous match; but if the match was of zero length, we
531 : * have to advance by one character, or we'd just find the same match
532 : * again.
533 : */
534 0 : start_search = prev_match_end;
535 0 : if (pmatch[0].rm_so == pmatch[0].rm_eo)
536 0 : start_search++;
537 0 : if (start_search > wide_len)
538 0 : break;
539 : }
540 :
541 : /*
542 : * If we had no match, but "partial" and "indices" are set, emit the
543 : * details.
544 : */
545 590 : if (matchctx->nmatches == 0 && re_flags->partial && re_flags->indices)
546 : {
547 : /* enlarge output space if needed */
548 18 : while (array_idx + matchctx->npatterns * 2 + 1 > array_len)
549 : {
550 0 : array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */
551 0 : if (array_len > MaxAllocSize / sizeof(int))
552 0 : ereport(ERROR,
553 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
554 : errmsg("too many regular expression matches")));
555 0 : matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
556 : sizeof(int) * array_len);
557 : }
558 :
559 18 : matchctx->match_locs[array_idx++] = matchctx->details.rm_extend.rm_so;
560 18 : matchctx->match_locs[array_idx++] = matchctx->details.rm_extend.rm_eo;
561 : /* we don't have pmatch data, so emit -1 */
562 20 : for (int i = 1; i < matchctx->npatterns; i++)
563 : {
564 2 : matchctx->match_locs[array_idx++] = -1;
565 2 : matchctx->match_locs[array_idx++] = -1;
566 : }
567 18 : matchctx->nmatches++;
568 : }
569 :
570 : Assert(array_idx <= array_len);
571 :
572 590 : if (eml > 1)
573 : {
574 590 : int64 maxsiz = eml * (int64) maxlen;
575 : int conv_bufsiz;
576 :
577 : /*
578 : * Make the conversion buffer large enough for any substring of
579 : * interest.
580 : *
581 : * Worst case: assume we need the maximum size (maxlen*eml), but take
582 : * advantage of the fact that the original string length in bytes is
583 : * an upper bound on the byte length of any fetched substring (and we
584 : * know that len+1 is safe to allocate because the varlena header is
585 : * longer than 1 byte).
586 : */
587 590 : if (maxsiz > orig_len)
588 415 : conv_bufsiz = orig_len + 1;
589 : else
590 175 : conv_bufsiz = maxsiz + 1; /* safe since maxsiz < 2^30 */
591 :
592 590 : matchctx->conv_buf = palloc(conv_bufsiz);
593 590 : matchctx->conv_bufsiz = conv_bufsiz;
594 590 : matchctx->wide_str = wide_str;
595 : }
596 : else
597 : {
598 : /* No need to keep the wide string if we're in a single-byte charset. */
599 0 : pfree(wide_str);
600 0 : matchctx->wide_str = NULL;
601 0 : matchctx->conv_buf = NULL;
602 0 : matchctx->conv_bufsiz = 0;
603 : }
604 :
605 : /* Clean up temp storage */
606 590 : pfree(pmatch);
607 :
608 590 : return matchctx;
609 : }
610 :
611 : /*
612 : * build_test_info_result - build output array describing compiled regexp
613 : *
614 : * This borrows some code from Tcl's TclRegAbout().
615 : */
616 : static ArrayType *
617 590 : build_test_info_result(regex_t *cpattern, test_re_flags *flags)
618 : {
619 : /* Translation data for flag bits in regex_t.re_info */
620 : struct infoname
621 : {
622 : int bit;
623 : const char *text;
624 : };
625 : static const struct infoname infonames[] = {
626 : {REG_UBACKREF, "REG_UBACKREF"},
627 : {REG_ULOOKAROUND, "REG_ULOOKAROUND"},
628 : {REG_UBOUNDS, "REG_UBOUNDS"},
629 : {REG_UBRACES, "REG_UBRACES"},
630 : {REG_UBSALNUM, "REG_UBSALNUM"},
631 : {REG_UPBOTCH, "REG_UPBOTCH"},
632 : {REG_UBBS, "REG_UBBS"},
633 : {REG_UNONPOSIX, "REG_UNONPOSIX"},
634 : {REG_UUNSPEC, "REG_UUNSPEC"},
635 : {REG_UUNPORT, "REG_UUNPORT"},
636 : {REG_ULOCALE, "REG_ULOCALE"},
637 : {REG_UEMPTYMATCH, "REG_UEMPTYMATCH"},
638 : {REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"},
639 : {REG_USHORTEST, "REG_USHORTEST"},
640 : {0, NULL}
641 : };
642 : const struct infoname *inf;
643 : Datum elems[lengthof(infonames) + 1];
644 590 : int nresults = 0;
645 : char buf[80];
646 : int dims[1];
647 : int lbs[1];
648 :
649 : /* Set up results: first, the number of subexpressions */
650 590 : snprintf(buf, sizeof(buf), "%d", (int) cpattern->re_nsub);
651 590 : elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
652 :
653 : /* Report individual info bit states */
654 8850 : for (inf = infonames; inf->bit != 0; inf++)
655 : {
656 8260 : if (cpattern->re_info & inf->bit)
657 : {
658 758 : if (flags->info & inf->bit)
659 758 : elems[nresults++] = PointerGetDatum(cstring_to_text(inf->text));
660 : else
661 : {
662 0 : snprintf(buf, sizeof(buf), "unexpected %s!", inf->text);
663 0 : elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
664 : }
665 : }
666 : else
667 : {
668 7502 : if (flags->info & inf->bit)
669 : {
670 0 : snprintf(buf, sizeof(buf), "missing %s!", inf->text);
671 0 : elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
672 : }
673 : }
674 : }
675 :
676 : /* And form an array */
677 590 : dims[0] = nresults;
678 590 : lbs[0] = 1;
679 : /* XXX: this hardcodes assumptions about the text type */
680 590 : return construct_md_array(elems, NULL, 1, dims, lbs,
681 : TEXTOID, -1, false, TYPALIGN_INT);
682 : }
683 :
684 : /*
685 : * build_test_match_result - build output array for current match
686 : *
687 : * Note that if the indices flag is set, we don't need any strings,
688 : * just the location data.
689 : */
690 : static ArrayType *
691 481 : build_test_match_result(test_regex_ctx *matchctx)
692 : {
693 481 : char *buf = matchctx->conv_buf;
694 481 : Datum *elems = matchctx->elems;
695 481 : bool *nulls = matchctx->nulls;
696 481 : bool indices = matchctx->re_flags.indices;
697 : char bufstr[80];
698 : int dims[1];
699 : int lbs[1];
700 : int loc;
701 : int i;
702 :
703 : /* Extract matching substrings from the original string */
704 481 : loc = matchctx->next_match * matchctx->npatterns * 2;
705 1132 : for (i = 0; i < matchctx->npatterns; i++)
706 : {
707 651 : int so = matchctx->match_locs[loc++];
708 651 : int eo = matchctx->match_locs[loc++];
709 :
710 651 : if (indices)
711 : {
712 : /* Report eo this way for consistency with Tcl */
713 84 : snprintf(bufstr, sizeof(bufstr), "%d %d",
714 : so, so < 0 ? eo : eo - 1);
715 84 : elems[i] = PointerGetDatum(cstring_to_text(bufstr));
716 84 : nulls[i] = false;
717 : }
718 567 : else if (so < 0 || eo < 0)
719 : {
720 12 : elems[i] = (Datum) 0;
721 12 : nulls[i] = true;
722 : }
723 555 : else if (buf)
724 : {
725 555 : int len = pg_wchar2mb_with_len(matchctx->wide_str + so,
726 : buf,
727 : eo - so);
728 :
729 : Assert(len < matchctx->conv_bufsiz);
730 555 : elems[i] = PointerGetDatum(cstring_to_text_with_len(buf, len));
731 555 : nulls[i] = false;
732 : }
733 : else
734 : {
735 0 : elems[i] = DirectFunctionCall3(text_substr,
736 : PointerGetDatum(matchctx->orig_str),
737 : Int32GetDatum(so + 1),
738 : Int32GetDatum(eo - so));
739 0 : nulls[i] = false;
740 : }
741 : }
742 :
743 : /* In EXPECT indices mode, also report the "details" */
744 481 : if (indices && (matchctx->re_flags.cflags & REG_EXPECT))
745 : {
746 28 : int so = matchctx->details.rm_extend.rm_so;
747 28 : int eo = matchctx->details.rm_extend.rm_eo;
748 :
749 28 : snprintf(bufstr, sizeof(bufstr), "%d %d",
750 : so, so < 0 ? eo : eo - 1);
751 28 : elems[i] = PointerGetDatum(cstring_to_text(bufstr));
752 28 : nulls[i] = false;
753 28 : i++;
754 : }
755 :
756 : /* And form an array */
757 481 : dims[0] = i;
758 481 : lbs[0] = 1;
759 : /* XXX: this hardcodes assumptions about the text type */
760 481 : return construct_md_array(elems, nulls, 1, dims, lbs,
761 : TEXTOID, -1, false, TYPALIGN_INT);
762 : }
|