Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * numutils.c
4 : * utility functions for I/O of built-in numeric types.
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/utils/adt/numutils.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include <limits.h>
18 : #include <ctype.h>
19 :
20 : #include "common/int.h"
21 : #include "port/pg_bitutils.h"
22 : #include "utils/builtins.h"
23 :
24 : /*
25 : * A table of all two-digit numbers. This is used to speed up decimal digit
26 : * generation by copying pairs of digits into the final output.
27 : */
28 : static const char DIGIT_TABLE[200] =
29 : "00" "01" "02" "03" "04" "05" "06" "07" "08" "09"
30 : "10" "11" "12" "13" "14" "15" "16" "17" "18" "19"
31 : "20" "21" "22" "23" "24" "25" "26" "27" "28" "29"
32 : "30" "31" "32" "33" "34" "35" "36" "37" "38" "39"
33 : "40" "41" "42" "43" "44" "45" "46" "47" "48" "49"
34 : "50" "51" "52" "53" "54" "55" "56" "57" "58" "59"
35 : "60" "61" "62" "63" "64" "65" "66" "67" "68" "69"
36 : "70" "71" "72" "73" "74" "75" "76" "77" "78" "79"
37 : "80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
38 : "90" "91" "92" "93" "94" "95" "96" "97" "98" "99";
39 :
40 : /*
41 : * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
42 : */
43 : static inline int
44 14362704 : decimalLength32(const uint32 v)
45 : {
46 : int t;
47 : static const uint32 PowersOfTen[] = {
48 : 1, 10, 100,
49 : 1000, 10000, 100000,
50 : 1000000, 10000000, 100000000,
51 : 1000000000
52 : };
53 :
54 : /*
55 : * Compute base-10 logarithm by dividing the base-2 logarithm by a
56 : * good-enough approximation of the base-2 logarithm of 10
57 : */
58 14362704 : t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
59 14362704 : return t + (v >= PowersOfTen[t]);
60 : }
61 :
62 : static inline int
63 646772 : decimalLength64(const uint64 v)
64 : {
65 : int t;
66 : static const uint64 PowersOfTen[] = {
67 : UINT64CONST(1), UINT64CONST(10),
68 : UINT64CONST(100), UINT64CONST(1000),
69 : UINT64CONST(10000), UINT64CONST(100000),
70 : UINT64CONST(1000000), UINT64CONST(10000000),
71 : UINT64CONST(100000000), UINT64CONST(1000000000),
72 : UINT64CONST(10000000000), UINT64CONST(100000000000),
73 : UINT64CONST(1000000000000), UINT64CONST(10000000000000),
74 : UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
75 : UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
76 : UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
77 : };
78 :
79 : /*
80 : * Compute base-10 logarithm by dividing the base-2 logarithm by a
81 : * good-enough approximation of the base-2 logarithm of 10
82 : */
83 646772 : t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
84 646772 : return t + (v >= PowersOfTen[t]);
85 : }
86 :
87 : static const int8 hexlookup[128] = {
88 : -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
89 : -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
90 : -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
91 : 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
92 : -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
93 : -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
94 : -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
95 : -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
96 : };
97 :
98 : /*
99 : * Convert input string to a signed 16 bit integer. Input strings may be
100 : * expressed in base-10, hexadecimal, octal, or binary format, all of which
101 : * can be prefixed by an optional sign character, either '+' (the default) or
102 : * '-' for negative numbers. Hex strings are recognized by the digits being
103 : * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
104 : * prefix. The binary representation is recognized by the 0b or 0B prefix.
105 : *
106 : * Allows any number of leading or trailing whitespace characters. Digits may
107 : * optionally be separated by a single underscore character. These can only
108 : * come between digits and not before or after the digits. Underscores have
109 : * no effect on the return value and are supported only to assist in improving
110 : * the human readability of the input strings.
111 : *
112 : * pg_strtoint16() will throw ereport() upon bad input format or overflow;
113 : * while pg_strtoint16_safe() instead returns such complaints in *escontext,
114 : * if it's an ErrorSaveContext.
115 : *
116 : * NB: Accumulate input as an unsigned number, to deal with two's complement
117 : * representation of the most negative number, which can't be represented as a
118 : * signed positive number.
119 : */
120 : int16
121 0 : pg_strtoint16(const char *s)
122 : {
123 0 : return pg_strtoint16_safe(s, NULL);
124 : }
125 :
126 : int16
127 974290 : pg_strtoint16_safe(const char *s, Node *escontext)
128 : {
129 974290 : const char *ptr = s;
130 : const char *firstdigit;
131 974290 : uint16 tmp = 0;
132 974290 : bool neg = false;
133 : unsigned char digit;
134 : int16 result;
135 :
136 : /*
137 : * The majority of cases are likely to be base-10 digits without any
138 : * underscore separator characters. We'll first try to parse the string
139 : * with the assumption that's the case and only fallback on a slower
140 : * implementation which handles hex, octal and binary strings and
141 : * underscores if the fastpath version cannot parse the string.
142 : */
143 :
144 : /* leave it up to the slow path to look for leading spaces */
145 :
146 974290 : if (*ptr == '-')
147 : {
148 19634 : ptr++;
149 19634 : neg = true;
150 : }
151 :
152 : /* a leading '+' is uncommon so leave that for the slow path */
153 :
154 : /* process the first digit */
155 974290 : digit = (*ptr - '0');
156 :
157 : /*
158 : * Exploit unsigned arithmetic to save having to check both the upper and
159 : * lower bounds of the digit.
160 : */
161 974290 : if (likely(digit < 10))
162 : {
163 974170 : ptr++;
164 974170 : tmp = digit;
165 : }
166 : else
167 : {
168 : /* we need at least one digit */
169 120 : goto slow;
170 : }
171 :
172 : /* process remaining digits */
173 : for (;;)
174 : {
175 1004362 : digit = (*ptr - '0');
176 :
177 1004362 : if (digit >= 10)
178 974152 : break;
179 :
180 30210 : ptr++;
181 :
182 30210 : if (unlikely(tmp > -(PG_INT16_MIN / 10)))
183 18 : goto out_of_range;
184 :
185 30192 : tmp = tmp * 10 + digit;
186 : }
187 :
188 : /* when the string does not end in a digit, let the slow path handle it */
189 974152 : if (unlikely(*ptr != '\0'))
190 182 : goto slow;
191 :
192 973970 : if (neg)
193 : {
194 19592 : if (unlikely(pg_neg_u16_overflow(tmp, &result)))
195 0 : goto out_of_range;
196 19592 : return result;
197 : }
198 :
199 954378 : if (unlikely(tmp > PG_INT16_MAX))
200 0 : goto out_of_range;
201 :
202 954378 : return (int16) tmp;
203 :
204 302 : slow:
205 302 : tmp = 0;
206 302 : ptr = s;
207 : /* no need to reset neg */
208 :
209 : /* skip leading spaces */
210 362 : while (isspace((unsigned char) *ptr))
211 60 : ptr++;
212 :
213 : /* handle sign */
214 302 : if (*ptr == '-')
215 : {
216 48 : ptr++;
217 48 : neg = true;
218 : }
219 254 : else if (*ptr == '+')
220 0 : ptr++;
221 :
222 : /* process digits */
223 302 : if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
224 : {
225 42 : firstdigit = ptr += 2;
226 :
227 : for (;;)
228 : {
229 180 : if (isxdigit((unsigned char) *ptr))
230 : {
231 132 : if (unlikely(tmp > -(PG_INT16_MIN / 16)))
232 0 : goto out_of_range;
233 :
234 132 : tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
235 : }
236 48 : else if (*ptr == '_')
237 : {
238 : /* underscore must be followed by more digits */
239 6 : ptr++;
240 6 : if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
241 0 : goto invalid_syntax;
242 : }
243 : else
244 42 : break;
245 : }
246 : }
247 260 : else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
248 : {
249 42 : firstdigit = ptr += 2;
250 :
251 : for (;;)
252 : {
253 222 : if (*ptr >= '0' && *ptr <= '7')
254 : {
255 174 : if (unlikely(tmp > -(PG_INT16_MIN / 8)))
256 0 : goto out_of_range;
257 :
258 174 : tmp = tmp * 8 + (*ptr++ - '0');
259 : }
260 48 : else if (*ptr == '_')
261 : {
262 : /* underscore must be followed by more digits */
263 6 : ptr++;
264 6 : if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
265 0 : goto invalid_syntax;
266 : }
267 : else
268 42 : break;
269 : }
270 : }
271 218 : else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
272 : {
273 42 : firstdigit = ptr += 2;
274 :
275 : for (;;)
276 : {
277 504 : if (*ptr >= '0' && *ptr <= '1')
278 : {
279 450 : if (unlikely(tmp > -(PG_INT16_MIN / 2)))
280 0 : goto out_of_range;
281 :
282 450 : tmp = tmp * 2 + (*ptr++ - '0');
283 : }
284 54 : else if (*ptr == '_')
285 : {
286 : /* underscore must be followed by more digits */
287 12 : ptr++;
288 12 : if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
289 0 : goto invalid_syntax;
290 : }
291 : else
292 42 : break;
293 : }
294 : }
295 : else
296 : {
297 176 : firstdigit = ptr;
298 :
299 : for (;;)
300 : {
301 376 : if (*ptr >= '0' && *ptr <= '9')
302 : {
303 182 : if (unlikely(tmp > -(PG_INT16_MIN / 10)))
304 0 : goto out_of_range;
305 :
306 182 : tmp = tmp * 10 + (*ptr++ - '0');
307 : }
308 194 : else if (*ptr == '_')
309 : {
310 : /* underscore may not be first */
311 36 : if (unlikely(ptr == firstdigit))
312 6 : goto invalid_syntax;
313 : /* and it must be followed by more digits */
314 30 : ptr++;
315 30 : if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
316 12 : goto invalid_syntax;
317 : }
318 : else
319 158 : break;
320 : }
321 : }
322 :
323 : /* require at least one digit */
324 284 : if (unlikely(ptr == firstdigit))
325 120 : goto invalid_syntax;
326 :
327 : /* allow trailing whitespace, but not other trailing chars */
328 200 : while (isspace((unsigned char) *ptr))
329 36 : ptr++;
330 :
331 164 : if (unlikely(*ptr != '\0'))
332 26 : goto invalid_syntax;
333 :
334 138 : if (neg)
335 : {
336 42 : if (unlikely(pg_neg_u16_overflow(tmp, &result)))
337 18 : goto out_of_range;
338 24 : return result;
339 : }
340 :
341 96 : if (tmp > PG_INT16_MAX)
342 18 : goto out_of_range;
343 :
344 78 : return (int16) tmp;
345 :
346 54 : out_of_range:
347 54 : ereturn(escontext, 0,
348 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
349 : errmsg("value \"%s\" is out of range for type %s",
350 : s, "smallint")));
351 :
352 164 : invalid_syntax:
353 164 : ereturn(escontext, 0,
354 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
355 : errmsg("invalid input syntax for type %s: \"%s\"",
356 : "smallint", s)));
357 : }
358 :
359 : /*
360 : * Convert input string to a signed 32 bit integer. Input strings may be
361 : * expressed in base-10, hexadecimal, octal, or binary format, all of which
362 : * can be prefixed by an optional sign character, either '+' (the default) or
363 : * '-' for negative numbers. Hex strings are recognized by the digits being
364 : * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
365 : * prefix. The binary representation is recognized by the 0b or 0B prefix.
366 : *
367 : * Allows any number of leading or trailing whitespace characters. Digits may
368 : * optionally be separated by a single underscore character. These can only
369 : * come between digits and not before or after the digits. Underscores have
370 : * no effect on the return value and are supported only to assist in improving
371 : * the human readability of the input strings.
372 : *
373 : * pg_strtoint32() will throw ereport() upon bad input format or overflow;
374 : * while pg_strtoint32_safe() instead returns such complaints in *escontext,
375 : * if it's an ErrorSaveContext.
376 : *
377 : * NB: Accumulate input as an unsigned number, to deal with two's complement
378 : * representation of the most negative number, which can't be represented as a
379 : * signed positive number.
380 : */
381 : int32
382 11620 : pg_strtoint32(const char *s)
383 : {
384 11620 : return pg_strtoint32_safe(s, NULL);
385 : }
386 :
387 : int32
388 4996672 : pg_strtoint32_safe(const char *s, Node *escontext)
389 : {
390 4996672 : const char *ptr = s;
391 : const char *firstdigit;
392 4996672 : uint32 tmp = 0;
393 4996672 : bool neg = false;
394 : unsigned char digit;
395 : int32 result;
396 :
397 : /*
398 : * The majority of cases are likely to be base-10 digits without any
399 : * underscore separator characters. We'll first try to parse the string
400 : * with the assumption that's the case and only fallback on a slower
401 : * implementation which handles hex, octal and binary strings and
402 : * underscores if the fastpath version cannot parse the string.
403 : */
404 :
405 : /* leave it up to the slow path to look for leading spaces */
406 :
407 4996672 : if (*ptr == '-')
408 : {
409 54842 : ptr++;
410 54842 : neg = true;
411 : }
412 :
413 : /* a leading '+' is uncommon so leave that for the slow path */
414 :
415 : /* process the first digit */
416 4996672 : digit = (*ptr - '0');
417 :
418 : /*
419 : * Exploit unsigned arithmetic to save having to check both the upper and
420 : * lower bounds of the digit.
421 : */
422 4996672 : if (likely(digit < 10))
423 : {
424 4996160 : ptr++;
425 4996160 : tmp = digit;
426 : }
427 : else
428 : {
429 : /* we need at least one digit */
430 512 : goto slow;
431 : }
432 :
433 : /* process remaining digits */
434 : for (;;)
435 : {
436 14158588 : digit = (*ptr - '0');
437 :
438 14158588 : if (digit >= 10)
439 4994818 : break;
440 :
441 9163770 : ptr++;
442 :
443 9163770 : if (unlikely(tmp > -(PG_INT32_MIN / 10)))
444 1342 : goto out_of_range;
445 :
446 9162428 : tmp = tmp * 10 + digit;
447 : }
448 :
449 : /* when the string does not end in a digit, let the slow path handle it */
450 4994818 : if (unlikely(*ptr != '\0'))
451 1414 : goto slow;
452 :
453 4993404 : if (neg)
454 : {
455 54788 : if (unlikely(pg_neg_u32_overflow(tmp, &result)))
456 0 : goto out_of_range;
457 54788 : return result;
458 : }
459 :
460 4938616 : if (unlikely(tmp > PG_INT32_MAX))
461 206 : goto out_of_range;
462 :
463 4938410 : return (int32) tmp;
464 :
465 1926 : slow:
466 1926 : tmp = 0;
467 1926 : ptr = s;
468 : /* no need to reset neg */
469 :
470 : /* skip leading spaces */
471 2076 : while (isspace((unsigned char) *ptr))
472 150 : ptr++;
473 :
474 : /* handle sign */
475 1926 : if (*ptr == '-')
476 : {
477 60 : ptr++;
478 60 : neg = true;
479 : }
480 1866 : else if (*ptr == '+')
481 6 : ptr++;
482 :
483 : /* process digits */
484 1926 : if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
485 : {
486 998 : firstdigit = ptr += 2;
487 :
488 : for (;;)
489 : {
490 4942 : if (isxdigit((unsigned char) *ptr))
491 : {
492 4010 : if (unlikely(tmp > -(PG_INT32_MIN / 16)))
493 78 : goto out_of_range;
494 :
495 3932 : tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
496 : }
497 932 : else if (*ptr == '_')
498 : {
499 : /* underscore must be followed by more digits */
500 12 : ptr++;
501 12 : if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
502 0 : goto invalid_syntax;
503 : }
504 : else
505 920 : break;
506 : }
507 : }
508 928 : else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
509 : {
510 102 : firstdigit = ptr += 2;
511 :
512 : for (;;)
513 : {
514 972 : if (*ptr >= '0' && *ptr <= '7')
515 : {
516 882 : if (unlikely(tmp > -(PG_INT32_MIN / 8)))
517 24 : goto out_of_range;
518 :
519 858 : tmp = tmp * 8 + (*ptr++ - '0');
520 : }
521 90 : else if (*ptr == '_')
522 : {
523 : /* underscore must be followed by more digits */
524 12 : ptr++;
525 12 : if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
526 0 : goto invalid_syntax;
527 : }
528 : else
529 78 : break;
530 : }
531 : }
532 826 : else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
533 : {
534 104 : firstdigit = ptr += 2;
535 :
536 : for (;;)
537 : {
538 2622 : if (*ptr >= '0' && *ptr <= '1')
539 : {
540 2520 : if (unlikely(tmp > -(PG_INT32_MIN / 2)))
541 26 : goto out_of_range;
542 :
543 2494 : tmp = tmp * 2 + (*ptr++ - '0');
544 : }
545 102 : else if (*ptr == '_')
546 : {
547 : /* underscore must be followed by more digits */
548 24 : ptr++;
549 24 : if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
550 0 : goto invalid_syntax;
551 : }
552 : else
553 78 : break;
554 : }
555 : }
556 : else
557 : {
558 722 : firstdigit = ptr;
559 :
560 : for (;;)
561 : {
562 2116 : if (*ptr >= '0' && *ptr <= '9')
563 : {
564 1192 : if (unlikely(tmp > -(PG_INT32_MIN / 10)))
565 28 : goto out_of_range;
566 :
567 1164 : tmp = tmp * 10 + (*ptr++ - '0');
568 : }
569 924 : else if (*ptr == '_')
570 : {
571 : /* underscore may not be first */
572 248 : if (unlikely(ptr == firstdigit))
573 6 : goto invalid_syntax;
574 : /* and it must be followed by more digits */
575 242 : ptr++;
576 242 : if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
577 12 : goto invalid_syntax;
578 : }
579 : else
580 676 : break;
581 : }
582 : }
583 :
584 : /* require at least one digit */
585 1752 : if (unlikely(ptr == firstdigit))
586 458 : goto invalid_syntax;
587 :
588 : /* allow trailing whitespace, but not other trailing chars */
589 1378 : while (isspace((unsigned char) *ptr))
590 84 : ptr++;
591 :
592 1294 : if (unlikely(*ptr != '\0'))
593 80 : goto invalid_syntax;
594 :
595 1214 : if (neg)
596 : {
597 42 : if (unlikely(pg_neg_u32_overflow(tmp, &result)))
598 18 : goto out_of_range;
599 24 : return result;
600 : }
601 :
602 1172 : if (tmp > PG_INT32_MAX)
603 72 : goto out_of_range;
604 :
605 1100 : return (int32) tmp;
606 :
607 1794 : out_of_range:
608 1794 : ereturn(escontext, 0,
609 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
610 : errmsg("value \"%s\" is out of range for type %s",
611 : s, "integer")));
612 :
613 556 : invalid_syntax:
614 556 : ereturn(escontext, 0,
615 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
616 : errmsg("invalid input syntax for type %s: \"%s\"",
617 : "integer", s)));
618 : }
619 :
620 : /*
621 : * Convert input string to a signed 64 bit integer. Input strings may be
622 : * expressed in base-10, hexadecimal, octal, or binary format, all of which
623 : * can be prefixed by an optional sign character, either '+' (the default) or
624 : * '-' for negative numbers. Hex strings are recognized by the digits being
625 : * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
626 : * prefix. The binary representation is recognized by the 0b or 0B prefix.
627 : *
628 : * Allows any number of leading or trailing whitespace characters. Digits may
629 : * optionally be separated by a single underscore character. These can only
630 : * come between digits and not before or after the digits. Underscores have
631 : * no effect on the return value and are supported only to assist in improving
632 : * the human readability of the input strings.
633 : *
634 : * pg_strtoint64() will throw ereport() upon bad input format or overflow;
635 : * while pg_strtoint64_safe() instead returns such complaints in *escontext,
636 : * if it's an ErrorSaveContext.
637 : *
638 : * NB: Accumulate input as an unsigned number, to deal with two's complement
639 : * representation of the most negative number, which can't be represented as a
640 : * signed positive number.
641 : */
642 : int64
643 14 : pg_strtoint64(const char *s)
644 : {
645 14 : return pg_strtoint64_safe(s, NULL);
646 : }
647 :
648 : int64
649 157254 : pg_strtoint64_safe(const char *s, Node *escontext)
650 : {
651 157254 : const char *ptr = s;
652 : const char *firstdigit;
653 157254 : uint64 tmp = 0;
654 157254 : bool neg = false;
655 : unsigned char digit;
656 : int64 result;
657 :
658 : /*
659 : * The majority of cases are likely to be base-10 digits without any
660 : * underscore separator characters. We'll first try to parse the string
661 : * with the assumption that's the case and only fallback on a slower
662 : * implementation which handles hex, octal and binary strings and
663 : * underscores if the fastpath version cannot parse the string.
664 : */
665 :
666 : /* leave it up to the slow path to look for leading spaces */
667 :
668 157254 : if (*ptr == '-')
669 : {
670 1652 : ptr++;
671 1652 : neg = true;
672 : }
673 :
674 : /* a leading '+' is uncommon so leave that for the slow path */
675 :
676 : /* process the first digit */
677 157254 : digit = (*ptr - '0');
678 :
679 : /*
680 : * Exploit unsigned arithmetic to save having to check both the upper and
681 : * lower bounds of the digit.
682 : */
683 157254 : if (likely(digit < 10))
684 : {
685 157042 : ptr++;
686 157042 : tmp = digit;
687 : }
688 : else
689 : {
690 : /* we need at least one digit */
691 212 : goto slow;
692 : }
693 :
694 : /* process remaining digits */
695 : for (;;)
696 : {
697 395464 : digit = (*ptr - '0');
698 :
699 395464 : if (digit >= 10)
700 156802 : break;
701 :
702 238662 : ptr++;
703 :
704 238662 : if (unlikely(tmp > -(PG_INT64_MIN / 10)))
705 240 : goto out_of_range;
706 :
707 238422 : tmp = tmp * 10 + digit;
708 : }
709 :
710 : /* when the string does not end in a digit, let the slow path handle it */
711 156802 : if (unlikely(*ptr != '\0'))
712 10614 : goto slow;
713 :
714 146188 : if (neg)
715 : {
716 1000 : if (unlikely(pg_neg_u64_overflow(tmp, &result)))
717 18 : goto out_of_range;
718 982 : return result;
719 : }
720 :
721 145188 : if (unlikely(tmp > PG_INT64_MAX))
722 18 : goto out_of_range;
723 :
724 145170 : return (int64) tmp;
725 :
726 10826 : slow:
727 10826 : tmp = 0;
728 10826 : ptr = s;
729 : /* no need to reset neg */
730 :
731 : /* skip leading spaces */
732 10900 : while (isspace((unsigned char) *ptr))
733 74 : ptr++;
734 :
735 : /* handle sign */
736 10826 : if (*ptr == '-')
737 : {
738 646 : ptr++;
739 646 : neg = true;
740 : }
741 10180 : else if (*ptr == '+')
742 48 : ptr++;
743 :
744 : /* process digits */
745 10826 : if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
746 : {
747 134 : firstdigit = ptr += 2;
748 :
749 : for (;;)
750 : {
751 1804 : if (isxdigit((unsigned char) *ptr))
752 : {
753 1664 : if (unlikely(tmp > -(PG_INT64_MIN / 16)))
754 0 : goto out_of_range;
755 :
756 1664 : tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
757 : }
758 140 : else if (*ptr == '_')
759 : {
760 : /* underscore must be followed by more digits */
761 6 : ptr++;
762 6 : if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
763 0 : goto invalid_syntax;
764 : }
765 : else
766 134 : break;
767 : }
768 : }
769 10692 : else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
770 : {
771 84 : firstdigit = ptr += 2;
772 :
773 : for (;;)
774 : {
775 1368 : if (*ptr >= '0' && *ptr <= '7')
776 : {
777 1278 : if (unlikely(tmp > -(PG_INT64_MIN / 8)))
778 0 : goto out_of_range;
779 :
780 1278 : tmp = tmp * 8 + (*ptr++ - '0');
781 : }
782 90 : else if (*ptr == '_')
783 : {
784 : /* underscore must be followed by more digits */
785 6 : ptr++;
786 6 : if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
787 0 : goto invalid_syntax;
788 : }
789 : else
790 84 : break;
791 : }
792 : }
793 10608 : else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
794 : {
795 84 : firstdigit = ptr += 2;
796 :
797 : for (;;)
798 : {
799 3804 : if (*ptr >= '0' && *ptr <= '1')
800 : {
801 3708 : if (unlikely(tmp > -(PG_INT64_MIN / 2)))
802 0 : goto out_of_range;
803 :
804 3708 : tmp = tmp * 2 + (*ptr++ - '0');
805 : }
806 96 : else if (*ptr == '_')
807 : {
808 : /* underscore must be followed by more digits */
809 12 : ptr++;
810 12 : if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
811 0 : goto invalid_syntax;
812 : }
813 : else
814 84 : break;
815 : }
816 : }
817 : else
818 : {
819 10524 : firstdigit = ptr;
820 :
821 : for (;;)
822 : {
823 26500 : if (*ptr >= '0' && *ptr <= '9')
824 : {
825 15854 : if (unlikely(tmp > -(PG_INT64_MIN / 10)))
826 0 : goto out_of_range;
827 :
828 15854 : tmp = tmp * 10 + (*ptr++ - '0');
829 : }
830 10646 : else if (*ptr == '_')
831 : {
832 : /* underscore may not be first */
833 140 : if (unlikely(ptr == firstdigit))
834 6 : goto invalid_syntax;
835 : /* and it must be followed by more digits */
836 134 : ptr++;
837 134 : if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
838 12 : goto invalid_syntax;
839 : }
840 : else
841 10506 : break;
842 : }
843 : }
844 :
845 : /* require at least one digit */
846 10808 : if (unlikely(ptr == firstdigit))
847 156 : goto invalid_syntax;
848 :
849 : /* allow trailing whitespace, but not other trailing chars */
850 10718 : while (isspace((unsigned char) *ptr))
851 66 : ptr++;
852 :
853 10652 : if (unlikely(*ptr != '\0'))
854 10268 : goto invalid_syntax;
855 :
856 384 : if (neg)
857 : {
858 114 : if (unlikely(pg_neg_u64_overflow(tmp, &result)))
859 36 : goto out_of_range;
860 78 : return result;
861 : }
862 :
863 270 : if (tmp > PG_INT64_MAX)
864 36 : goto out_of_range;
865 :
866 234 : return (int64) tmp;
867 :
868 348 : out_of_range:
869 348 : ereturn(escontext, 0,
870 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
871 : errmsg("value \"%s\" is out of range for type %s",
872 : s, "bigint")));
873 :
874 10442 : invalid_syntax:
875 10442 : ereturn(escontext, 0,
876 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
877 : errmsg("invalid input syntax for type %s: \"%s\"",
878 : "bigint", s)));
879 : }
880 :
881 : /*
882 : * Convert input string to an unsigned 32 bit integer.
883 : *
884 : * Allows any number of leading or trailing whitespace characters.
885 : *
886 : * If endloc isn't NULL, store a pointer to the rest of the string there,
887 : * so that caller can parse the rest. Otherwise, it's an error if anything
888 : * but whitespace follows.
889 : *
890 : * typname is what is reported in error messages.
891 : *
892 : * If escontext points to an ErrorSaveContext node, that is filled instead
893 : * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
894 : * to detect errors.
895 : */
896 : uint32
897 7188264 : uint32in_subr(const char *s, char **endloc,
898 : const char *typname, Node *escontext)
899 : {
900 : uint32 result;
901 : unsigned long cvt;
902 : char *endptr;
903 :
904 7188264 : errno = 0;
905 7188264 : cvt = strtoul(s, &endptr, 0);
906 :
907 : /*
908 : * strtoul() normally only sets ERANGE. On some systems it may also set
909 : * EINVAL, which simply means it couldn't parse the input string. Be sure
910 : * to report that the same way as the standard error indication (that
911 : * endptr == s).
912 : */
913 7188264 : if ((errno && errno != ERANGE) || endptr == s)
914 60 : ereturn(escontext, 0,
915 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
916 : errmsg("invalid input syntax for type %s: \"%s\"",
917 : typname, s)));
918 :
919 7188204 : if (errno == ERANGE)
920 12 : ereturn(escontext, 0,
921 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
922 : errmsg("value \"%s\" is out of range for type %s",
923 : s, typname)));
924 :
925 7188192 : if (endloc)
926 : {
927 : /* caller wants to deal with rest of string */
928 637392 : *endloc = endptr;
929 : }
930 : else
931 : {
932 : /* allow only whitespace after number */
933 6550914 : while (*endptr && isspace((unsigned char) *endptr))
934 114 : endptr++;
935 6550800 : if (*endptr)
936 36 : ereturn(escontext, 0,
937 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
938 : errmsg("invalid input syntax for type %s: \"%s\"",
939 : typname, s)));
940 : }
941 :
942 7188156 : result = (uint32) cvt;
943 :
944 : /*
945 : * Cope with possibility that unsigned long is wider than uint32, in which
946 : * case strtoul will not raise an error for some values that are out of
947 : * the range of uint32.
948 : *
949 : * For backwards compatibility, we want to accept inputs that are given
950 : * with a minus sign, so allow the input value if it matches after either
951 : * signed or unsigned extension to long.
952 : *
953 : * To ensure consistent results on 32-bit and 64-bit platforms, make sure
954 : * the error message is the same as if strtoul() had returned ERANGE.
955 : */
956 : #if PG_UINT32_MAX != ULONG_MAX
957 7188156 : if (cvt != (unsigned long) result &&
958 42 : cvt != (unsigned long) ((int) result))
959 30 : ereturn(escontext, 0,
960 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
961 : errmsg("value \"%s\" is out of range for type %s",
962 : s, typname)));
963 : #endif
964 :
965 7188126 : return result;
966 : }
967 :
968 : /*
969 : * Convert input string to an unsigned 64 bit integer.
970 : *
971 : * Allows any number of leading or trailing whitespace characters.
972 : *
973 : * If endloc isn't NULL, store a pointer to the rest of the string there,
974 : * so that caller can parse the rest. Otherwise, it's an error if anything
975 : * but whitespace follows.
976 : *
977 : * typname is what is reported in error messages.
978 : *
979 : * If escontext points to an ErrorSaveContext node, that is filled instead
980 : * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
981 : * to detect errors.
982 : */
983 : uint64
984 4060 : uint64in_subr(const char *s, char **endloc,
985 : const char *typname, Node *escontext)
986 : {
987 : uint64 result;
988 : char *endptr;
989 :
990 4060 : errno = 0;
991 4060 : result = strtou64(s, &endptr, 0);
992 :
993 : /*
994 : * strtoul[l] normally only sets ERANGE. On some systems it may also set
995 : * EINVAL, which simply means it couldn't parse the input string. Be sure
996 : * to report that the same way as the standard error indication (that
997 : * endptr == s).
998 : */
999 4060 : if ((errno && errno != ERANGE) || endptr == s)
1000 42 : ereturn(escontext, 0,
1001 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1002 : errmsg("invalid input syntax for type %s: \"%s\"",
1003 : typname, s)));
1004 :
1005 4018 : if (errno == ERANGE)
1006 36 : ereturn(escontext, 0,
1007 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1008 : errmsg("value \"%s\" is out of range for type %s",
1009 : s, typname)));
1010 :
1011 3982 : if (endloc)
1012 : {
1013 : /* caller wants to deal with rest of string */
1014 0 : *endloc = endptr;
1015 : }
1016 : else
1017 : {
1018 : /* allow only whitespace after number */
1019 4096 : while (*endptr && isspace((unsigned char) *endptr))
1020 114 : endptr++;
1021 3982 : if (*endptr)
1022 36 : ereturn(escontext, 0,
1023 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1024 : errmsg("invalid input syntax for type %s: \"%s\"",
1025 : typname, s)));
1026 : }
1027 :
1028 3946 : return result;
1029 : }
1030 :
1031 : /*
1032 : * pg_itoa: converts a signed 16-bit integer to its string representation
1033 : * and returns strlen(a).
1034 : *
1035 : * Caller must ensure that 'a' points to enough memory to hold the result
1036 : * (at least 7 bytes, counting a leading sign and trailing NUL).
1037 : *
1038 : * It doesn't seem worth implementing this separately.
1039 : */
1040 : int
1041 698246 : pg_itoa(int16 i, char *a)
1042 : {
1043 698246 : return pg_ltoa((int32) i, a);
1044 : }
1045 :
1046 : /*
1047 : * pg_ultoa_n: converts an unsigned 32-bit integer to its string representation,
1048 : * not NUL-terminated, and returns the length of that string representation
1049 : *
1050 : * Caller must ensure that 'a' points to enough memory to hold the result (at
1051 : * least 10 bytes)
1052 : */
1053 : int
1054 18465916 : pg_ultoa_n(uint32 value, char *a)
1055 : {
1056 : int olength,
1057 18465916 : i = 0;
1058 :
1059 : /* Degenerate case */
1060 18465916 : if (value == 0)
1061 : {
1062 4103212 : *a = '0';
1063 4103212 : return 1;
1064 : }
1065 :
1066 14362704 : olength = decimalLength32(value);
1067 :
1068 : /* Compute the result string. */
1069 16517766 : while (value >= 10000)
1070 : {
1071 2155062 : const uint32 c = value - 10000 * (value / 10000);
1072 2155062 : const uint32 c0 = (c % 100) << 1;
1073 2155062 : const uint32 c1 = (c / 100) << 1;
1074 :
1075 2155062 : char *pos = a + olength - i;
1076 :
1077 2155062 : value /= 10000;
1078 :
1079 2155062 : memcpy(pos - 2, DIGIT_TABLE + c0, 2);
1080 2155062 : memcpy(pos - 4, DIGIT_TABLE + c1, 2);
1081 2155062 : i += 4;
1082 : }
1083 14362704 : if (value >= 100)
1084 : {
1085 6340398 : const uint32 c = (value % 100) << 1;
1086 :
1087 6340398 : char *pos = a + olength - i;
1088 :
1089 6340398 : value /= 100;
1090 :
1091 6340398 : memcpy(pos - 2, DIGIT_TABLE + c, 2);
1092 6340398 : i += 2;
1093 : }
1094 14362704 : if (value >= 10)
1095 : {
1096 7269418 : const uint32 c = value << 1;
1097 :
1098 7269418 : char *pos = a + olength - i;
1099 :
1100 7269418 : memcpy(pos - 2, DIGIT_TABLE + c, 2);
1101 : }
1102 : else
1103 : {
1104 7093286 : *a = (char) ('0' + value);
1105 : }
1106 :
1107 14362704 : return olength;
1108 : }
1109 :
1110 : /*
1111 : * pg_ltoa: converts a signed 32-bit integer to its string representation and
1112 : * returns strlen(a).
1113 : *
1114 : * It is the caller's responsibility to ensure that a is at least 12 bytes long,
1115 : * which is enough room to hold a minus sign, a maximally long int32, and the
1116 : * above terminating NUL.
1117 : */
1118 : int
1119 18330112 : pg_ltoa(int32 value, char *a)
1120 : {
1121 18330112 : uint32 uvalue = (uint32) value;
1122 18330112 : int len = 0;
1123 :
1124 18330112 : if (value < 0)
1125 : {
1126 496386 : uvalue = (uint32) 0 - uvalue;
1127 496386 : a[len++] = '-';
1128 : }
1129 18330112 : len += pg_ultoa_n(uvalue, a + len);
1130 18330112 : a[len] = '\0';
1131 18330112 : return len;
1132 : }
1133 :
1134 : /*
1135 : * Get the decimal representation, not NUL-terminated, and return the length of
1136 : * same. Caller must ensure that a points to at least MAXINT8LEN bytes.
1137 : */
1138 : int
1139 714622 : pg_ulltoa_n(uint64 value, char *a)
1140 : {
1141 : int olength,
1142 714622 : i = 0;
1143 : uint32 value2;
1144 :
1145 : /* Degenerate case */
1146 714622 : if (value == 0)
1147 : {
1148 67850 : *a = '0';
1149 67850 : return 1;
1150 : }
1151 :
1152 646772 : olength = decimalLength64(value);
1153 :
1154 : /* Compute the result string. */
1155 667248 : while (value >= 100000000)
1156 : {
1157 20476 : const uint64 q = value / 100000000;
1158 20476 : uint32 value3 = (uint32) (value - 100000000 * q);
1159 :
1160 20476 : const uint32 c = value3 % 10000;
1161 20476 : const uint32 d = value3 / 10000;
1162 20476 : const uint32 c0 = (c % 100) << 1;
1163 20476 : const uint32 c1 = (c / 100) << 1;
1164 20476 : const uint32 d0 = (d % 100) << 1;
1165 20476 : const uint32 d1 = (d / 100) << 1;
1166 :
1167 20476 : char *pos = a + olength - i;
1168 :
1169 20476 : value = q;
1170 :
1171 20476 : memcpy(pos - 2, DIGIT_TABLE + c0, 2);
1172 20476 : memcpy(pos - 4, DIGIT_TABLE + c1, 2);
1173 20476 : memcpy(pos - 6, DIGIT_TABLE + d0, 2);
1174 20476 : memcpy(pos - 8, DIGIT_TABLE + d1, 2);
1175 20476 : i += 8;
1176 : }
1177 :
1178 : /* Switch to 32-bit for speed */
1179 646772 : value2 = (uint32) value;
1180 :
1181 646772 : if (value2 >= 10000)
1182 : {
1183 30278 : const uint32 c = value2 - 10000 * (value2 / 10000);
1184 30278 : const uint32 c0 = (c % 100) << 1;
1185 30278 : const uint32 c1 = (c / 100) << 1;
1186 :
1187 30278 : char *pos = a + olength - i;
1188 :
1189 30278 : value2 /= 10000;
1190 :
1191 30278 : memcpy(pos - 2, DIGIT_TABLE + c0, 2);
1192 30278 : memcpy(pos - 4, DIGIT_TABLE + c1, 2);
1193 30278 : i += 4;
1194 : }
1195 646772 : if (value2 >= 100)
1196 : {
1197 224962 : const uint32 c = (value2 % 100) << 1;
1198 224962 : char *pos = a + olength - i;
1199 :
1200 224962 : value2 /= 100;
1201 :
1202 224962 : memcpy(pos - 2, DIGIT_TABLE + c, 2);
1203 224962 : i += 2;
1204 : }
1205 646772 : if (value2 >= 10)
1206 : {
1207 140232 : const uint32 c = value2 << 1;
1208 140232 : char *pos = a + olength - i;
1209 :
1210 140232 : memcpy(pos - 2, DIGIT_TABLE + c, 2);
1211 : }
1212 : else
1213 506540 : *a = (char) ('0' + value2);
1214 :
1215 646772 : return olength;
1216 : }
1217 :
1218 : /*
1219 : * pg_lltoa: converts a signed 64-bit integer to its string representation and
1220 : * returns strlen(a).
1221 : *
1222 : * Caller must ensure that 'a' points to enough memory to hold the result
1223 : * (at least MAXINT8LEN + 1 bytes, counting a leading sign and trailing NUL).
1224 : */
1225 : int
1226 328334 : pg_lltoa(int64 value, char *a)
1227 : {
1228 328334 : uint64 uvalue = value;
1229 328334 : int len = 0;
1230 :
1231 328334 : if (value < 0)
1232 : {
1233 2748 : uvalue = (uint64) 0 - uvalue;
1234 2748 : a[len++] = '-';
1235 : }
1236 :
1237 328334 : len += pg_ulltoa_n(uvalue, a + len);
1238 328334 : a[len] = '\0';
1239 328334 : return len;
1240 : }
1241 :
1242 :
1243 : /*
1244 : * pg_ultostr_zeropad
1245 : * Converts 'value' into a decimal string representation stored at 'str'.
1246 : * 'minwidth' specifies the minimum width of the result; any extra space
1247 : * is filled up by prefixing the number with zeros.
1248 : *
1249 : * Returns the ending address of the string result (the last character written
1250 : * plus 1). Note that no NUL terminator is written.
1251 : *
1252 : * The intended use-case for this function is to build strings that contain
1253 : * multiple individual numbers, for example:
1254 : *
1255 : * str = pg_ultostr_zeropad(str, hours, 2);
1256 : * *str++ = ':';
1257 : * str = pg_ultostr_zeropad(str, mins, 2);
1258 : * *str++ = ':';
1259 : * str = pg_ultostr_zeropad(str, secs, 2);
1260 : * *str = '\0';
1261 : *
1262 : * Note: Caller must ensure that 'str' points to enough memory to hold the
1263 : * result.
1264 : */
1265 : char *
1266 826922 : pg_ultostr_zeropad(char *str, uint32 value, int32 minwidth)
1267 : {
1268 : int len;
1269 :
1270 : Assert(minwidth > 0);
1271 :
1272 826922 : if (value < 100 && minwidth == 2) /* Short cut for common case */
1273 : {
1274 694512 : memcpy(str, DIGIT_TABLE + value * 2, 2);
1275 694512 : return str + 2;
1276 : }
1277 :
1278 132410 : len = pg_ultoa_n(value, str);
1279 132410 : if (len >= minwidth)
1280 131708 : return str + len;
1281 :
1282 702 : memmove(str + minwidth - len, str, len);
1283 702 : memset(str, '0', minwidth - len);
1284 702 : return str + minwidth;
1285 : }
1286 :
1287 : /*
1288 : * pg_ultostr
1289 : * Converts 'value' into a decimal string representation stored at 'str'.
1290 : *
1291 : * Returns the ending address of the string result (the last character written
1292 : * plus 1). Note that no NUL terminator is written.
1293 : *
1294 : * The intended use-case for this function is to build strings that contain
1295 : * multiple individual numbers, for example:
1296 : *
1297 : * str = pg_ultostr(str, a);
1298 : * *str++ = ' ';
1299 : * str = pg_ultostr(str, b);
1300 : * *str = '\0';
1301 : *
1302 : * Note: Caller must ensure that 'str' points to enough memory to hold the
1303 : * result.
1304 : */
1305 : char *
1306 3318 : pg_ultostr(char *str, uint32 value)
1307 : {
1308 3318 : int len = pg_ultoa_n(value, str);
1309 :
1310 3318 : return str + len;
1311 : }
|