Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * numutils.c
4 : * utility functions for I/O of built-in numeric types.
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/utils/adt/numutils.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include <math.h>
18 : #include <limits.h>
19 : #include <ctype.h>
20 :
21 : #include "common/int.h"
22 : #include "port/pg_bitutils.h"
23 : #include "utils/builtins.h"
24 :
25 : /*
26 : * A table of all two-digit numbers. This is used to speed up decimal digit
27 : * generation by copying pairs of digits into the final output.
28 : */
29 : static const char DIGIT_TABLE[200] =
30 : "00" "01" "02" "03" "04" "05" "06" "07" "08" "09"
31 : "10" "11" "12" "13" "14" "15" "16" "17" "18" "19"
32 : "20" "21" "22" "23" "24" "25" "26" "27" "28" "29"
33 : "30" "31" "32" "33" "34" "35" "36" "37" "38" "39"
34 : "40" "41" "42" "43" "44" "45" "46" "47" "48" "49"
35 : "50" "51" "52" "53" "54" "55" "56" "57" "58" "59"
36 : "60" "61" "62" "63" "64" "65" "66" "67" "68" "69"
37 : "70" "71" "72" "73" "74" "75" "76" "77" "78" "79"
38 : "80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
39 : "90" "91" "92" "93" "94" "95" "96" "97" "98" "99";
40 :
41 : /*
42 : * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
43 : */
44 : static inline int
45 13616454 : decimalLength32(const uint32 v)
46 : {
47 : int t;
48 : static const uint32 PowersOfTen[] = {
49 : 1, 10, 100,
50 : 1000, 10000, 100000,
51 : 1000000, 10000000, 100000000,
52 : 1000000000
53 : };
54 :
55 : /*
56 : * Compute base-10 logarithm by dividing the base-2 logarithm by a
57 : * good-enough approximation of the base-2 logarithm of 10
58 : */
59 13616454 : t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
60 13616454 : return t + (v >= PowersOfTen[t]);
61 : }
62 :
63 : static inline int
64 599044 : decimalLength64(const uint64 v)
65 : {
66 : int t;
67 : static const uint64 PowersOfTen[] = {
68 : UINT64CONST(1), UINT64CONST(10),
69 : UINT64CONST(100), UINT64CONST(1000),
70 : UINT64CONST(10000), UINT64CONST(100000),
71 : UINT64CONST(1000000), UINT64CONST(10000000),
72 : UINT64CONST(100000000), UINT64CONST(1000000000),
73 : UINT64CONST(10000000000), UINT64CONST(100000000000),
74 : UINT64CONST(1000000000000), UINT64CONST(10000000000000),
75 : UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
76 : UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
77 : UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
78 : };
79 :
80 : /*
81 : * Compute base-10 logarithm by dividing the base-2 logarithm by a
82 : * good-enough approximation of the base-2 logarithm of 10
83 : */
84 599044 : t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
85 599044 : return t + (v >= PowersOfTen[t]);
86 : }
87 :
88 : static const int8 hexlookup[128] = {
89 : -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
90 : -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
91 : -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
92 : 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
93 : -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
94 : -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
95 : -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
96 : -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
97 : };
98 :
99 : /*
100 : * Convert input string to a signed 16 bit integer. Input strings may be
101 : * expressed in base-10, hexadecimal, octal, or binary format, all of which
102 : * can be prefixed by an optional sign character, either '+' (the default) or
103 : * '-' for negative numbers. Hex strings are recognized by the digits being
104 : * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
105 : * prefix. The binary representation is recognized by the 0b or 0B prefix.
106 : *
107 : * Allows any number of leading or trailing whitespace characters. Digits may
108 : * optionally be separated by a single underscore character. These can only
109 : * come between digits and not before or after the digits. Underscores have
110 : * no effect on the return value and are supported only to assist in improving
111 : * the human readability of the input strings.
112 : *
113 : * pg_strtoint16() will throw ereport() upon bad input format or overflow;
114 : * while pg_strtoint16_safe() instead returns such complaints in *escontext,
115 : * if it's an ErrorSaveContext.
116 : *
117 : * NB: Accumulate input as an unsigned number, to deal with two's complement
118 : * representation of the most negative number, which can't be represented as a
119 : * signed positive number.
120 : */
121 : int16
122 0 : pg_strtoint16(const char *s)
123 : {
124 0 : return pg_strtoint16_safe(s, NULL);
125 : }
126 :
127 : int16
128 842022 : pg_strtoint16_safe(const char *s, Node *escontext)
129 : {
130 842022 : const char *ptr = s;
131 : const char *firstdigit;
132 842022 : uint16 tmp = 0;
133 842022 : bool neg = false;
134 : unsigned char digit;
135 : int16 result;
136 :
137 : /*
138 : * The majority of cases are likely to be base-10 digits without any
139 : * underscore separator characters. We'll first try to parse the string
140 : * with the assumption that's the case and only fallback on a slower
141 : * implementation which handles hex, octal and binary strings and
142 : * underscores if the fastpath version cannot parse the string.
143 : */
144 :
145 : /* leave it up to the slow path to look for leading spaces */
146 :
147 842022 : if (*ptr == '-')
148 : {
149 17000 : ptr++;
150 17000 : neg = true;
151 : }
152 :
153 : /* a leading '+' is uncommon so leave that for the slow path */
154 :
155 : /* process the first digit */
156 842022 : digit = (*ptr - '0');
157 :
158 : /*
159 : * Exploit unsigned arithmetic to save having to check both the upper and
160 : * lower bounds of the digit.
161 : */
162 842022 : if (likely(digit < 10))
163 : {
164 841950 : ptr++;
165 841950 : tmp = digit;
166 : }
167 : else
168 : {
169 : /* we need at least one digit */
170 72 : goto slow;
171 : }
172 :
173 : /* process remaining digits */
174 : for (;;)
175 : {
176 869020 : digit = (*ptr - '0');
177 :
178 869020 : if (digit >= 10)
179 841932 : break;
180 :
181 27088 : ptr++;
182 :
183 27088 : if (unlikely(tmp > -(PG_INT16_MIN / 10)))
184 18 : goto out_of_range;
185 :
186 27070 : tmp = tmp * 10 + digit;
187 : }
188 :
189 : /* when the string does not end in a digit, let the slow path handle it */
190 841932 : if (unlikely(*ptr != '\0'))
191 182 : goto slow;
192 :
193 841750 : if (neg)
194 : {
195 16958 : if (unlikely(pg_neg_u16_overflow(tmp, &result)))
196 0 : goto out_of_range;
197 16958 : return result;
198 : }
199 :
200 824792 : if (unlikely(tmp > PG_INT16_MAX))
201 0 : goto out_of_range;
202 :
203 824792 : return (int16) tmp;
204 :
205 254 : slow:
206 254 : tmp = 0;
207 254 : ptr = s;
208 : /* no need to reset neg */
209 :
210 : /* skip leading spaces */
211 314 : while (isspace((unsigned char) *ptr))
212 60 : ptr++;
213 :
214 : /* handle sign */
215 254 : if (*ptr == '-')
216 : {
217 48 : ptr++;
218 48 : neg = true;
219 : }
220 206 : else if (*ptr == '+')
221 0 : ptr++;
222 :
223 : /* process digits */
224 254 : if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
225 : {
226 42 : firstdigit = ptr += 2;
227 :
228 : for (;;)
229 : {
230 180 : if (isxdigit((unsigned char) *ptr))
231 : {
232 132 : if (unlikely(tmp > -(PG_INT16_MIN / 16)))
233 0 : goto out_of_range;
234 :
235 132 : tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
236 : }
237 48 : else if (*ptr == '_')
238 : {
239 : /* underscore must be followed by more digits */
240 6 : ptr++;
241 6 : if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
242 0 : goto invalid_syntax;
243 : }
244 : else
245 42 : break;
246 : }
247 : }
248 212 : else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
249 : {
250 42 : firstdigit = ptr += 2;
251 :
252 : for (;;)
253 : {
254 222 : if (*ptr >= '0' && *ptr <= '7')
255 : {
256 174 : if (unlikely(tmp > -(PG_INT16_MIN / 8)))
257 0 : goto out_of_range;
258 :
259 174 : tmp = tmp * 8 + (*ptr++ - '0');
260 : }
261 48 : else if (*ptr == '_')
262 : {
263 : /* underscore must be followed by more digits */
264 6 : ptr++;
265 6 : if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
266 0 : goto invalid_syntax;
267 : }
268 : else
269 42 : break;
270 : }
271 : }
272 170 : else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
273 : {
274 42 : firstdigit = ptr += 2;
275 :
276 : for (;;)
277 : {
278 504 : if (*ptr >= '0' && *ptr <= '1')
279 : {
280 450 : if (unlikely(tmp > -(PG_INT16_MIN / 2)))
281 0 : goto out_of_range;
282 :
283 450 : tmp = tmp * 2 + (*ptr++ - '0');
284 : }
285 54 : else if (*ptr == '_')
286 : {
287 : /* underscore must be followed by more digits */
288 12 : ptr++;
289 12 : if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
290 0 : goto invalid_syntax;
291 : }
292 : else
293 42 : break;
294 : }
295 : }
296 : else
297 : {
298 128 : firstdigit = ptr;
299 :
300 : for (;;)
301 : {
302 328 : if (*ptr >= '0' && *ptr <= '9')
303 : {
304 182 : if (unlikely(tmp > -(PG_INT16_MIN / 10)))
305 0 : goto out_of_range;
306 :
307 182 : tmp = tmp * 10 + (*ptr++ - '0');
308 : }
309 146 : else if (*ptr == '_')
310 : {
311 : /* underscore may not be first */
312 36 : if (unlikely(ptr == firstdigit))
313 6 : goto invalid_syntax;
314 : /* and it must be followed by more digits */
315 30 : ptr++;
316 30 : if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
317 12 : goto invalid_syntax;
318 : }
319 : else
320 110 : break;
321 : }
322 : }
323 :
324 : /* require at least one digit */
325 236 : if (unlikely(ptr == firstdigit))
326 72 : goto invalid_syntax;
327 :
328 : /* allow trailing whitespace, but not other trailing chars */
329 200 : while (isspace((unsigned char) *ptr))
330 36 : ptr++;
331 :
332 164 : if (unlikely(*ptr != '\0'))
333 26 : goto invalid_syntax;
334 :
335 138 : if (neg)
336 : {
337 42 : if (unlikely(pg_neg_u16_overflow(tmp, &result)))
338 18 : goto out_of_range;
339 24 : return result;
340 : }
341 :
342 96 : if (tmp > PG_INT16_MAX)
343 18 : goto out_of_range;
344 :
345 78 : return (int16) tmp;
346 :
347 54 : out_of_range:
348 54 : ereturn(escontext, 0,
349 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
350 : errmsg("value \"%s\" is out of range for type %s",
351 : s, "smallint")));
352 :
353 116 : invalid_syntax:
354 116 : ereturn(escontext, 0,
355 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
356 : errmsg("invalid input syntax for type %s: \"%s\"",
357 : "smallint", s)));
358 : }
359 :
360 : /*
361 : * Convert input string to a signed 32 bit integer. Input strings may be
362 : * expressed in base-10, hexadecimal, octal, or binary format, all of which
363 : * can be prefixed by an optional sign character, either '+' (the default) or
364 : * '-' for negative numbers. Hex strings are recognized by the digits being
365 : * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
366 : * prefix. The binary representation is recognized by the 0b or 0B prefix.
367 : *
368 : * Allows any number of leading or trailing whitespace characters. Digits may
369 : * optionally be separated by a single underscore character. These can only
370 : * come between digits and not before or after the digits. Underscores have
371 : * no effect on the return value and are supported only to assist in improving
372 : * the human readability of the input strings.
373 : *
374 : * pg_strtoint32() will throw ereport() upon bad input format or overflow;
375 : * while pg_strtoint32_safe() instead returns such complaints in *escontext,
376 : * if it's an ErrorSaveContext.
377 : *
378 : * NB: Accumulate input as an unsigned number, to deal with two's complement
379 : * representation of the most negative number, which can't be represented as a
380 : * signed positive number.
381 : */
382 : int32
383 10880 : pg_strtoint32(const char *s)
384 : {
385 10880 : return pg_strtoint32_safe(s, NULL);
386 : }
387 :
388 : int32
389 5005302 : pg_strtoint32_safe(const char *s, Node *escontext)
390 : {
391 5005302 : const char *ptr = s;
392 : const char *firstdigit;
393 5005302 : uint32 tmp = 0;
394 5005302 : bool neg = false;
395 : unsigned char digit;
396 : int32 result;
397 :
398 : /*
399 : * The majority of cases are likely to be base-10 digits without any
400 : * underscore separator characters. We'll first try to parse the string
401 : * with the assumption that's the case and only fallback on a slower
402 : * implementation which handles hex, octal and binary strings and
403 : * underscores if the fastpath version cannot parse the string.
404 : */
405 :
406 : /* leave it up to the slow path to look for leading spaces */
407 :
408 5005302 : if (*ptr == '-')
409 : {
410 49542 : ptr++;
411 49542 : neg = true;
412 : }
413 :
414 : /* a leading '+' is uncommon so leave that for the slow path */
415 :
416 : /* process the first digit */
417 5005302 : digit = (*ptr - '0');
418 :
419 : /*
420 : * Exploit unsigned arithmetic to save having to check both the upper and
421 : * lower bounds of the digit.
422 : */
423 5005302 : if (likely(digit < 10))
424 : {
425 5004828 : ptr++;
426 5004828 : tmp = digit;
427 : }
428 : else
429 : {
430 : /* we need at least one digit */
431 474 : goto slow;
432 : }
433 :
434 : /* process remaining digits */
435 : for (;;)
436 : {
437 14040516 : digit = (*ptr - '0');
438 :
439 14040516 : if (digit >= 10)
440 5003490 : break;
441 :
442 9037026 : ptr++;
443 :
444 9037026 : if (unlikely(tmp > -(PG_INT32_MIN / 10)))
445 1338 : goto out_of_range;
446 :
447 9035688 : tmp = tmp * 10 + digit;
448 : }
449 :
450 : /* when the string does not end in a digit, let the slow path handle it */
451 5003490 : if (unlikely(*ptr != '\0'))
452 970 : goto slow;
453 :
454 5002520 : if (neg)
455 : {
456 49488 : if (unlikely(pg_neg_u32_overflow(tmp, &result)))
457 0 : goto out_of_range;
458 49488 : return result;
459 : }
460 :
461 4953032 : if (unlikely(tmp > PG_INT32_MAX))
462 206 : goto out_of_range;
463 :
464 4952826 : return (int32) tmp;
465 :
466 1444 : slow:
467 1444 : tmp = 0;
468 1444 : ptr = s;
469 : /* no need to reset neg */
470 :
471 : /* skip leading spaces */
472 1594 : while (isspace((unsigned char) *ptr))
473 150 : ptr++;
474 :
475 : /* handle sign */
476 1444 : if (*ptr == '-')
477 : {
478 60 : ptr++;
479 60 : neg = true;
480 : }
481 1384 : else if (*ptr == '+')
482 6 : ptr++;
483 :
484 : /* process digits */
485 1444 : if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
486 : {
487 570 : firstdigit = ptr += 2;
488 :
489 : for (;;)
490 : {
491 3314 : if (isxdigit((unsigned char) *ptr))
492 : {
493 2794 : if (unlikely(tmp > -(PG_INT32_MIN / 16)))
494 62 : goto out_of_range;
495 :
496 2732 : tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
497 : }
498 520 : else if (*ptr == '_')
499 : {
500 : /* underscore must be followed by more digits */
501 12 : ptr++;
502 12 : if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
503 0 : goto invalid_syntax;
504 : }
505 : else
506 508 : break;
507 : }
508 : }
509 874 : else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
510 : {
511 102 : firstdigit = ptr += 2;
512 :
513 : for (;;)
514 : {
515 972 : if (*ptr >= '0' && *ptr <= '7')
516 : {
517 882 : if (unlikely(tmp > -(PG_INT32_MIN / 8)))
518 24 : goto out_of_range;
519 :
520 858 : tmp = tmp * 8 + (*ptr++ - '0');
521 : }
522 90 : else if (*ptr == '_')
523 : {
524 : /* underscore must be followed by more digits */
525 12 : ptr++;
526 12 : if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
527 0 : goto invalid_syntax;
528 : }
529 : else
530 78 : break;
531 : }
532 : }
533 772 : else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
534 : {
535 104 : firstdigit = ptr += 2;
536 :
537 : for (;;)
538 : {
539 2622 : if (*ptr >= '0' && *ptr <= '1')
540 : {
541 2520 : if (unlikely(tmp > -(PG_INT32_MIN / 2)))
542 26 : goto out_of_range;
543 :
544 2494 : tmp = tmp * 2 + (*ptr++ - '0');
545 : }
546 102 : else if (*ptr == '_')
547 : {
548 : /* underscore must be followed by more digits */
549 24 : ptr++;
550 24 : if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
551 0 : goto invalid_syntax;
552 : }
553 : else
554 78 : break;
555 : }
556 : }
557 : else
558 : {
559 668 : firstdigit = ptr;
560 :
561 : for (;;)
562 : {
563 1968 : if (*ptr >= '0' && *ptr <= '9')
564 : {
565 1122 : if (unlikely(tmp > -(PG_INT32_MIN / 10)))
566 28 : goto out_of_range;
567 :
568 1094 : tmp = tmp * 10 + (*ptr++ - '0');
569 : }
570 846 : else if (*ptr == '_')
571 : {
572 : /* underscore may not be first */
573 224 : if (unlikely(ptr == firstdigit))
574 6 : goto invalid_syntax;
575 : /* and it must be followed by more digits */
576 218 : ptr++;
577 218 : if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
578 12 : goto invalid_syntax;
579 : }
580 : else
581 622 : break;
582 : }
583 : }
584 :
585 : /* require at least one digit */
586 1286 : if (unlikely(ptr == firstdigit))
587 420 : goto invalid_syntax;
588 :
589 : /* allow trailing whitespace, but not other trailing chars */
590 950 : while (isspace((unsigned char) *ptr))
591 84 : ptr++;
592 :
593 866 : if (unlikely(*ptr != '\0'))
594 72 : goto invalid_syntax;
595 :
596 794 : if (neg)
597 : {
598 42 : if (unlikely(pg_neg_u32_overflow(tmp, &result)))
599 18 : goto out_of_range;
600 24 : return result;
601 : }
602 :
603 752 : if (tmp > PG_INT32_MAX)
604 72 : goto out_of_range;
605 :
606 680 : return (int32) tmp;
607 :
608 1774 : out_of_range:
609 1774 : ereturn(escontext, 0,
610 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
611 : errmsg("value \"%s\" is out of range for type %s",
612 : s, "integer")));
613 :
614 510 : invalid_syntax:
615 510 : ereturn(escontext, 0,
616 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
617 : errmsg("invalid input syntax for type %s: \"%s\"",
618 : "integer", s)));
619 : }
620 :
621 : /*
622 : * Convert input string to a signed 64 bit integer. Input strings may be
623 : * expressed in base-10, hexadecimal, octal, or binary format, all of which
624 : * can be prefixed by an optional sign character, either '+' (the default) or
625 : * '-' for negative numbers. Hex strings are recognized by the digits being
626 : * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
627 : * prefix. The binary representation is recognized by the 0b or 0B prefix.
628 : *
629 : * Allows any number of leading or trailing whitespace characters. Digits may
630 : * optionally be separated by a single underscore character. These can only
631 : * come between digits and not before or after the digits. Underscores have
632 : * no effect on the return value and are supported only to assist in improving
633 : * the human readability of the input strings.
634 : *
635 : * pg_strtoint64() will throw ereport() upon bad input format or overflow;
636 : * while pg_strtoint64_safe() instead returns such complaints in *escontext,
637 : * if it's an ErrorSaveContext.
638 : *
639 : * NB: Accumulate input as an unsigned number, to deal with two's complement
640 : * representation of the most negative number, which can't be represented as a
641 : * signed positive number.
642 : */
643 : int64
644 14 : pg_strtoint64(const char *s)
645 : {
646 14 : return pg_strtoint64_safe(s, NULL);
647 : }
648 :
649 : int64
650 142522 : pg_strtoint64_safe(const char *s, Node *escontext)
651 : {
652 142522 : const char *ptr = s;
653 : const char *firstdigit;
654 142522 : uint64 tmp = 0;
655 142522 : bool neg = false;
656 : unsigned char digit;
657 : int64 result;
658 :
659 : /*
660 : * The majority of cases are likely to be base-10 digits without any
661 : * underscore separator characters. We'll first try to parse the string
662 : * with the assumption that's the case and only fallback on a slower
663 : * implementation which handles hex, octal and binary strings and
664 : * underscores if the fastpath version cannot parse the string.
665 : */
666 :
667 : /* leave it up to the slow path to look for leading spaces */
668 :
669 142522 : if (*ptr == '-')
670 : {
671 1646 : ptr++;
672 1646 : neg = true;
673 : }
674 :
675 : /* a leading '+' is uncommon so leave that for the slow path */
676 :
677 : /* process the first digit */
678 142522 : digit = (*ptr - '0');
679 :
680 : /*
681 : * Exploit unsigned arithmetic to save having to check both the upper and
682 : * lower bounds of the digit.
683 : */
684 142522 : if (likely(digit < 10))
685 : {
686 142310 : ptr++;
687 142310 : tmp = digit;
688 : }
689 : else
690 : {
691 : /* we need at least one digit */
692 212 : goto slow;
693 : }
694 :
695 : /* process remaining digits */
696 : for (;;)
697 : {
698 381356 : digit = (*ptr - '0');
699 :
700 381356 : if (digit >= 10)
701 142070 : break;
702 :
703 239286 : ptr++;
704 :
705 239286 : if (unlikely(tmp > -(PG_INT64_MIN / 10)))
706 240 : goto out_of_range;
707 :
708 239046 : tmp = tmp * 10 + digit;
709 : }
710 :
711 : /* when the string does not end in a digit, let the slow path handle it */
712 142070 : if (unlikely(*ptr != '\0'))
713 10618 : goto slow;
714 :
715 131452 : if (neg)
716 : {
717 1000 : if (unlikely(pg_neg_u64_overflow(tmp, &result)))
718 18 : goto out_of_range;
719 982 : return result;
720 : }
721 :
722 130452 : if (unlikely(tmp > PG_INT64_MAX))
723 18 : goto out_of_range;
724 :
725 130434 : return (int64) tmp;
726 :
727 10830 : slow:
728 10830 : tmp = 0;
729 10830 : ptr = s;
730 : /* no need to reset neg */
731 :
732 : /* skip leading spaces */
733 10904 : while (isspace((unsigned char) *ptr))
734 74 : ptr++;
735 :
736 : /* handle sign */
737 10830 : if (*ptr == '-')
738 : {
739 640 : ptr++;
740 640 : neg = true;
741 : }
742 10190 : else if (*ptr == '+')
743 48 : ptr++;
744 :
745 : /* process digits */
746 10830 : if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
747 : {
748 122 : firstdigit = ptr += 2;
749 :
750 : for (;;)
751 : {
752 1600 : if (isxdigit((unsigned char) *ptr))
753 : {
754 1472 : if (unlikely(tmp > -(PG_INT64_MIN / 16)))
755 0 : goto out_of_range;
756 :
757 1472 : tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
758 : }
759 128 : else if (*ptr == '_')
760 : {
761 : /* underscore must be followed by more digits */
762 6 : ptr++;
763 6 : if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
764 0 : goto invalid_syntax;
765 : }
766 : else
767 122 : break;
768 : }
769 : }
770 10708 : else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
771 : {
772 84 : firstdigit = ptr += 2;
773 :
774 : for (;;)
775 : {
776 1368 : if (*ptr >= '0' && *ptr <= '7')
777 : {
778 1278 : if (unlikely(tmp > -(PG_INT64_MIN / 8)))
779 0 : goto out_of_range;
780 :
781 1278 : tmp = tmp * 8 + (*ptr++ - '0');
782 : }
783 90 : else if (*ptr == '_')
784 : {
785 : /* underscore must be followed by more digits */
786 6 : ptr++;
787 6 : if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
788 0 : goto invalid_syntax;
789 : }
790 : else
791 84 : break;
792 : }
793 : }
794 10624 : else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
795 : {
796 84 : firstdigit = ptr += 2;
797 :
798 : for (;;)
799 : {
800 3804 : if (*ptr >= '0' && *ptr <= '1')
801 : {
802 3708 : if (unlikely(tmp > -(PG_INT64_MIN / 2)))
803 0 : goto out_of_range;
804 :
805 3708 : tmp = tmp * 2 + (*ptr++ - '0');
806 : }
807 96 : else if (*ptr == '_')
808 : {
809 : /* underscore must be followed by more digits */
810 12 : ptr++;
811 12 : if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
812 0 : goto invalid_syntax;
813 : }
814 : else
815 84 : break;
816 : }
817 : }
818 : else
819 : {
820 10540 : firstdigit = ptr;
821 :
822 : for (;;)
823 : {
824 26460 : if (*ptr >= '0' && *ptr <= '9')
825 : {
826 15798 : if (unlikely(tmp > -(PG_INT64_MIN / 10)))
827 0 : goto out_of_range;
828 :
829 15798 : tmp = tmp * 10 + (*ptr++ - '0');
830 : }
831 10662 : else if (*ptr == '_')
832 : {
833 : /* underscore may not be first */
834 140 : if (unlikely(ptr == firstdigit))
835 6 : goto invalid_syntax;
836 : /* and it must be followed by more digits */
837 134 : ptr++;
838 134 : if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
839 12 : goto invalid_syntax;
840 : }
841 : else
842 10522 : break;
843 : }
844 : }
845 :
846 : /* require at least one digit */
847 10812 : if (unlikely(ptr == firstdigit))
848 156 : goto invalid_syntax;
849 :
850 : /* allow trailing whitespace, but not other trailing chars */
851 10722 : while (isspace((unsigned char) *ptr))
852 66 : ptr++;
853 :
854 10656 : if (unlikely(*ptr != '\0'))
855 10284 : goto invalid_syntax;
856 :
857 372 : if (neg)
858 : {
859 108 : if (unlikely(pg_neg_u64_overflow(tmp, &result)))
860 36 : goto out_of_range;
861 72 : return result;
862 : }
863 :
864 264 : if (tmp > PG_INT64_MAX)
865 36 : goto out_of_range;
866 :
867 228 : return (int64) tmp;
868 :
869 348 : out_of_range:
870 348 : ereturn(escontext, 0,
871 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
872 : errmsg("value \"%s\" is out of range for type %s",
873 : s, "bigint")));
874 :
875 10458 : invalid_syntax:
876 10458 : ereturn(escontext, 0,
877 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
878 : errmsg("invalid input syntax for type %s: \"%s\"",
879 : "bigint", s)));
880 : }
881 :
882 : /*
883 : * Convert input string to an unsigned 32 bit integer.
884 : *
885 : * Allows any number of leading or trailing whitespace characters.
886 : *
887 : * If endloc isn't NULL, store a pointer to the rest of the string there,
888 : * so that caller can parse the rest. Otherwise, it's an error if anything
889 : * but whitespace follows.
890 : *
891 : * typname is what is reported in error messages.
892 : *
893 : * If escontext points to an ErrorSaveContext node, that is filled instead
894 : * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
895 : * to detect errors.
896 : */
897 : uint32
898 5877010 : uint32in_subr(const char *s, char **endloc,
899 : const char *typname, Node *escontext)
900 : {
901 : uint32 result;
902 : unsigned long cvt;
903 : char *endptr;
904 :
905 5877010 : errno = 0;
906 5877010 : cvt = strtoul(s, &endptr, 0);
907 :
908 : /*
909 : * strtoul() normally only sets ERANGE. On some systems it may also set
910 : * EINVAL, which simply means it couldn't parse the input string. Be sure
911 : * to report that the same way as the standard error indication (that
912 : * endptr == s).
913 : */
914 5877010 : if ((errno && errno != ERANGE) || endptr == s)
915 60 : ereturn(escontext, 0,
916 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
917 : errmsg("invalid input syntax for type %s: \"%s\"",
918 : typname, s)));
919 :
920 5876950 : if (errno == ERANGE)
921 12 : ereturn(escontext, 0,
922 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
923 : errmsg("value \"%s\" is out of range for type %s",
924 : s, typname)));
925 :
926 5876938 : if (endloc)
927 : {
928 : /* caller wants to deal with rest of string */
929 555606 : *endloc = endptr;
930 : }
931 : else
932 : {
933 : /* allow only whitespace after number */
934 5321446 : while (*endptr && isspace((unsigned char) *endptr))
935 114 : endptr++;
936 5321332 : if (*endptr)
937 36 : ereturn(escontext, 0,
938 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
939 : errmsg("invalid input syntax for type %s: \"%s\"",
940 : typname, s)));
941 : }
942 :
943 5876902 : result = (uint32) cvt;
944 :
945 : /*
946 : * Cope with possibility that unsigned long is wider than uint32, in which
947 : * case strtoul will not raise an error for some values that are out of
948 : * the range of uint32.
949 : *
950 : * For backwards compatibility, we want to accept inputs that are given
951 : * with a minus sign, so allow the input value if it matches after either
952 : * signed or unsigned extension to long.
953 : *
954 : * To ensure consistent results on 32-bit and 64-bit platforms, make sure
955 : * the error message is the same as if strtoul() had returned ERANGE.
956 : */
957 : #if PG_UINT32_MAX != ULONG_MAX
958 5876902 : if (cvt != (unsigned long) result &&
959 42 : cvt != (unsigned long) ((int) result))
960 30 : ereturn(escontext, 0,
961 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
962 : errmsg("value \"%s\" is out of range for type %s",
963 : s, typname)));
964 : #endif
965 :
966 5876872 : return result;
967 : }
968 :
969 : /*
970 : * Convert input string to an unsigned 64 bit integer.
971 : *
972 : * Allows any number of leading or trailing whitespace characters.
973 : *
974 : * If endloc isn't NULL, store a pointer to the rest of the string there,
975 : * so that caller can parse the rest. Otherwise, it's an error if anything
976 : * but whitespace follows.
977 : *
978 : * typname is what is reported in error messages.
979 : *
980 : * If escontext points to an ErrorSaveContext node, that is filled instead
981 : * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
982 : * to detect errors.
983 : */
984 : uint64
985 868 : uint64in_subr(const char *s, char **endloc,
986 : const char *typname, Node *escontext)
987 : {
988 : uint64 result;
989 : char *endptr;
990 :
991 868 : errno = 0;
992 868 : result = strtou64(s, &endptr, 0);
993 :
994 : /*
995 : * strtoul[l] normally only sets ERANGE. On some systems it may also set
996 : * EINVAL, which simply means it couldn't parse the input string. Be sure
997 : * to report that the same way as the standard error indication (that
998 : * endptr == s).
999 : */
1000 868 : if ((errno && errno != ERANGE) || endptr == s)
1001 18 : ereturn(escontext, 0,
1002 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1003 : errmsg("invalid input syntax for type %s: \"%s\"",
1004 : typname, s)));
1005 :
1006 850 : if (errno == ERANGE)
1007 6 : ereturn(escontext, 0,
1008 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1009 : errmsg("value \"%s\" is out of range for type %s",
1010 : s, typname)));
1011 :
1012 844 : if (endloc)
1013 : {
1014 : /* caller wants to deal with rest of string */
1015 0 : *endloc = endptr;
1016 : }
1017 : else
1018 : {
1019 : /* allow only whitespace after number */
1020 844 : while (*endptr && isspace((unsigned char) *endptr))
1021 0 : endptr++;
1022 844 : if (*endptr)
1023 0 : ereturn(escontext, 0,
1024 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1025 : errmsg("invalid input syntax for type %s: \"%s\"",
1026 : typname, s)));
1027 : }
1028 :
1029 844 : return result;
1030 : }
1031 :
1032 : /*
1033 : * pg_itoa: converts a signed 16-bit integer to its string representation
1034 : * and returns strlen(a).
1035 : *
1036 : * Caller must ensure that 'a' points to enough memory to hold the result
1037 : * (at least 7 bytes, counting a leading sign and trailing NUL).
1038 : *
1039 : * It doesn't seem worth implementing this separately.
1040 : */
1041 : int
1042 640308 : pg_itoa(int16 i, char *a)
1043 : {
1044 640308 : return pg_ltoa((int32) i, a);
1045 : }
1046 :
1047 : /*
1048 : * pg_ultoa_n: converts an unsigned 32-bit integer to its string representation,
1049 : * not NUL-terminated, and returns the length of that string representation
1050 : *
1051 : * Caller must ensure that 'a' points to enough memory to hold the result (at
1052 : * least 10 bytes)
1053 : */
1054 : int
1055 17059084 : pg_ultoa_n(uint32 value, char *a)
1056 : {
1057 : int olength,
1058 17059084 : i = 0;
1059 :
1060 : /* Degenerate case */
1061 17059084 : if (value == 0)
1062 : {
1063 3442630 : *a = '0';
1064 3442630 : return 1;
1065 : }
1066 :
1067 13616454 : olength = decimalLength32(value);
1068 :
1069 : /* Compute the result string. */
1070 15656734 : while (value >= 10000)
1071 : {
1072 2040280 : const uint32 c = value - 10000 * (value / 10000);
1073 2040280 : const uint32 c0 = (c % 100) << 1;
1074 2040280 : const uint32 c1 = (c / 100) << 1;
1075 :
1076 2040280 : char *pos = a + olength - i;
1077 :
1078 2040280 : value /= 10000;
1079 :
1080 2040280 : memcpy(pos - 2, DIGIT_TABLE + c0, 2);
1081 2040280 : memcpy(pos - 4, DIGIT_TABLE + c1, 2);
1082 2040280 : i += 4;
1083 : }
1084 13616454 : if (value >= 100)
1085 : {
1086 6177706 : const uint32 c = (value % 100) << 1;
1087 :
1088 6177706 : char *pos = a + olength - i;
1089 :
1090 6177706 : value /= 100;
1091 :
1092 6177706 : memcpy(pos - 2, DIGIT_TABLE + c, 2);
1093 6177706 : i += 2;
1094 : }
1095 13616454 : if (value >= 10)
1096 : {
1097 7091482 : const uint32 c = value << 1;
1098 :
1099 7091482 : char *pos = a + olength - i;
1100 :
1101 7091482 : memcpy(pos - 2, DIGIT_TABLE + c, 2);
1102 : }
1103 : else
1104 : {
1105 6524972 : *a = (char) ('0' + value);
1106 : }
1107 :
1108 13616454 : return olength;
1109 : }
1110 :
1111 : /*
1112 : * pg_ltoa: converts a signed 32-bit integer to its string representation and
1113 : * returns strlen(a).
1114 : *
1115 : * It is the caller's responsibility to ensure that a is at least 12 bytes long,
1116 : * which is enough room to hold a minus sign, a maximally long int32, and the
1117 : * above terminating NUL.
1118 : */
1119 : int
1120 16928982 : pg_ltoa(int32 value, char *a)
1121 : {
1122 16928982 : uint32 uvalue = (uint32) value;
1123 16928982 : int len = 0;
1124 :
1125 16928982 : if (value < 0)
1126 : {
1127 183068 : uvalue = (uint32) 0 - uvalue;
1128 183068 : a[len++] = '-';
1129 : }
1130 16928982 : len += pg_ultoa_n(uvalue, a + len);
1131 16928982 : a[len] = '\0';
1132 16928982 : return len;
1133 : }
1134 :
1135 : /*
1136 : * Get the decimal representation, not NUL-terminated, and return the length of
1137 : * same. Caller must ensure that a points to at least MAXINT8LEN bytes.
1138 : */
1139 : int
1140 656416 : pg_ulltoa_n(uint64 value, char *a)
1141 : {
1142 : int olength,
1143 656416 : i = 0;
1144 : uint32 value2;
1145 :
1146 : /* Degenerate case */
1147 656416 : if (value == 0)
1148 : {
1149 57372 : *a = '0';
1150 57372 : return 1;
1151 : }
1152 :
1153 599044 : olength = decimalLength64(value);
1154 :
1155 : /* Compute the result string. */
1156 617682 : while (value >= 100000000)
1157 : {
1158 18638 : const uint64 q = value / 100000000;
1159 18638 : uint32 value3 = (uint32) (value - 100000000 * q);
1160 :
1161 18638 : const uint32 c = value3 % 10000;
1162 18638 : const uint32 d = value3 / 10000;
1163 18638 : const uint32 c0 = (c % 100) << 1;
1164 18638 : const uint32 c1 = (c / 100) << 1;
1165 18638 : const uint32 d0 = (d % 100) << 1;
1166 18638 : const uint32 d1 = (d / 100) << 1;
1167 :
1168 18638 : char *pos = a + olength - i;
1169 :
1170 18638 : value = q;
1171 :
1172 18638 : memcpy(pos - 2, DIGIT_TABLE + c0, 2);
1173 18638 : memcpy(pos - 4, DIGIT_TABLE + c1, 2);
1174 18638 : memcpy(pos - 6, DIGIT_TABLE + d0, 2);
1175 18638 : memcpy(pos - 8, DIGIT_TABLE + d1, 2);
1176 18638 : i += 8;
1177 : }
1178 :
1179 : /* Switch to 32-bit for speed */
1180 599044 : value2 = (uint32) value;
1181 :
1182 599044 : if (value2 >= 10000)
1183 : {
1184 27736 : const uint32 c = value2 - 10000 * (value2 / 10000);
1185 27736 : const uint32 c0 = (c % 100) << 1;
1186 27736 : const uint32 c1 = (c / 100) << 1;
1187 :
1188 27736 : char *pos = a + olength - i;
1189 :
1190 27736 : value2 /= 10000;
1191 :
1192 27736 : memcpy(pos - 2, DIGIT_TABLE + c0, 2);
1193 27736 : memcpy(pos - 4, DIGIT_TABLE + c1, 2);
1194 27736 : i += 4;
1195 : }
1196 599044 : if (value2 >= 100)
1197 : {
1198 219872 : const uint32 c = (value2 % 100) << 1;
1199 219872 : char *pos = a + olength - i;
1200 :
1201 219872 : value2 /= 100;
1202 :
1203 219872 : memcpy(pos - 2, DIGIT_TABLE + c, 2);
1204 219872 : i += 2;
1205 : }
1206 599044 : if (value2 >= 10)
1207 : {
1208 130374 : const uint32 c = value2 << 1;
1209 130374 : char *pos = a + olength - i;
1210 :
1211 130374 : memcpy(pos - 2, DIGIT_TABLE + c, 2);
1212 : }
1213 : else
1214 468670 : *a = (char) ('0' + value2);
1215 :
1216 599044 : return olength;
1217 : }
1218 :
1219 : /*
1220 : * pg_lltoa: converts a signed 64-bit integer to its string representation and
1221 : * returns strlen(a).
1222 : *
1223 : * Caller must ensure that 'a' points to enough memory to hold the result
1224 : * (at least MAXINT8LEN + 1 bytes, counting a leading sign and trailing NUL).
1225 : */
1226 : int
1227 314050 : pg_lltoa(int64 value, char *a)
1228 : {
1229 314050 : uint64 uvalue = value;
1230 314050 : int len = 0;
1231 :
1232 314050 : if (value < 0)
1233 : {
1234 2694 : uvalue = (uint64) 0 - uvalue;
1235 2694 : a[len++] = '-';
1236 : }
1237 :
1238 314050 : len += pg_ulltoa_n(uvalue, a + len);
1239 314050 : a[len] = '\0';
1240 314050 : return len;
1241 : }
1242 :
1243 :
1244 : /*
1245 : * pg_ultostr_zeropad
1246 : * Converts 'value' into a decimal string representation stored at 'str'.
1247 : * 'minwidth' specifies the minimum width of the result; any extra space
1248 : * is filled up by prefixing the number with zeros.
1249 : *
1250 : * Returns the ending address of the string result (the last character written
1251 : * plus 1). Note that no NUL terminator is written.
1252 : *
1253 : * The intended use-case for this function is to build strings that contain
1254 : * multiple individual numbers, for example:
1255 : *
1256 : * str = pg_ultostr_zeropad(str, hours, 2);
1257 : * *str++ = ':';
1258 : * str = pg_ultostr_zeropad(str, mins, 2);
1259 : * *str++ = ':';
1260 : * str = pg_ultostr_zeropad(str, secs, 2);
1261 : * *str = '\0';
1262 : *
1263 : * Note: Caller must ensure that 'str' points to enough memory to hold the
1264 : * result.
1265 : */
1266 : char *
1267 795842 : pg_ultostr_zeropad(char *str, uint32 value, int32 minwidth)
1268 : {
1269 : int len;
1270 :
1271 : Assert(minwidth > 0);
1272 :
1273 795842 : if (value < 100 && minwidth == 2) /* Short cut for common case */
1274 : {
1275 669132 : memcpy(str, DIGIT_TABLE + value * 2, 2);
1276 669132 : return str + 2;
1277 : }
1278 :
1279 126710 : len = pg_ultoa_n(value, str);
1280 126710 : if (len >= minwidth)
1281 126032 : return str + len;
1282 :
1283 678 : memmove(str + minwidth - len, str, len);
1284 678 : memset(str, '0', minwidth - len);
1285 678 : return str + minwidth;
1286 : }
1287 :
1288 : /*
1289 : * pg_ultostr
1290 : * Converts 'value' into a decimal string representation stored at 'str'.
1291 : *
1292 : * Returns the ending address of the string result (the last character written
1293 : * plus 1). Note that no NUL terminator is written.
1294 : *
1295 : * The intended use-case for this function is to build strings that contain
1296 : * multiple individual numbers, for example:
1297 : *
1298 : * str = pg_ultostr(str, a);
1299 : * *str++ = ' ';
1300 : * str = pg_ultostr(str, b);
1301 : * *str = '\0';
1302 : *
1303 : * Note: Caller must ensure that 'str' points to enough memory to hold the
1304 : * result.
1305 : */
1306 : char *
1307 3318 : pg_ultostr(char *str, uint32 value)
1308 : {
1309 3318 : int len = pg_ultoa_n(value, str);
1310 :
1311 3318 : return str + len;
1312 : }
|