Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * numutils.c
4 : * utility functions for I/O of built-in numeric types.
5 : *
6 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/utils/adt/numutils.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include <math.h>
18 : #include <limits.h>
19 : #include <ctype.h>
20 :
21 : #include "port/pg_bitutils.h"
22 : #include "utils/builtins.h"
23 :
24 : /*
25 : * A table of all two-digit numbers. This is used to speed up decimal digit
26 : * generation by copying pairs of digits into the final output.
27 : */
28 : static const char DIGIT_TABLE[200] =
29 : "00" "01" "02" "03" "04" "05" "06" "07" "08" "09"
30 : "10" "11" "12" "13" "14" "15" "16" "17" "18" "19"
31 : "20" "21" "22" "23" "24" "25" "26" "27" "28" "29"
32 : "30" "31" "32" "33" "34" "35" "36" "37" "38" "39"
33 : "40" "41" "42" "43" "44" "45" "46" "47" "48" "49"
34 : "50" "51" "52" "53" "54" "55" "56" "57" "58" "59"
35 : "60" "61" "62" "63" "64" "65" "66" "67" "68" "69"
36 : "70" "71" "72" "73" "74" "75" "76" "77" "78" "79"
37 : "80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
38 : "90" "91" "92" "93" "94" "95" "96" "97" "98" "99";
39 :
40 : /*
41 : * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
42 : */
43 : static inline int
44 12977054 : decimalLength32(const uint32 v)
45 : {
46 : int t;
47 : static const uint32 PowersOfTen[] = {
48 : 1, 10, 100,
49 : 1000, 10000, 100000,
50 : 1000000, 10000000, 100000000,
51 : 1000000000
52 : };
53 :
54 : /*
55 : * Compute base-10 logarithm by dividing the base-2 logarithm by a
56 : * good-enough approximation of the base-2 logarithm of 10
57 : */
58 12977054 : t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
59 12977054 : return t + (v >= PowersOfTen[t]);
60 : }
61 :
62 : static inline int
63 581602 : decimalLength64(const uint64 v)
64 : {
65 : int t;
66 : static const uint64 PowersOfTen[] = {
67 : UINT64CONST(1), UINT64CONST(10),
68 : UINT64CONST(100), UINT64CONST(1000),
69 : UINT64CONST(10000), UINT64CONST(100000),
70 : UINT64CONST(1000000), UINT64CONST(10000000),
71 : UINT64CONST(100000000), UINT64CONST(1000000000),
72 : UINT64CONST(10000000000), UINT64CONST(100000000000),
73 : UINT64CONST(1000000000000), UINT64CONST(10000000000000),
74 : UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
75 : UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
76 : UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
77 : };
78 :
79 : /*
80 : * Compute base-10 logarithm by dividing the base-2 logarithm by a
81 : * good-enough approximation of the base-2 logarithm of 10
82 : */
83 581602 : t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
84 581602 : return t + (v >= PowersOfTen[t]);
85 : }
86 :
87 : static const int8 hexlookup[128] = {
88 : -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
89 : -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
90 : -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
91 : 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
92 : -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
93 : -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
94 : -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
95 : -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
96 : };
97 :
98 : /*
99 : * Convert input string to a signed 16 bit integer. Input strings may be
100 : * expressed in base-10, hexadecimal, octal, or binary format, all of which
101 : * can be prefixed by an optional sign character, either '+' (the default) or
102 : * '-' for negative numbers. Hex strings are recognized by the digits being
103 : * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
104 : * prefix. The binary representation is recognized by the 0b or 0B prefix.
105 : *
106 : * Allows any number of leading or trailing whitespace characters. Digits may
107 : * optionally be separated by a single underscore character. These can only
108 : * come between digits and not before or after the digits. Underscores have
109 : * no effect on the return value and are supported only to assist in improving
110 : * the human readability of the input strings.
111 : *
112 : * pg_strtoint16() will throw ereport() upon bad input format or overflow;
113 : * while pg_strtoint16_safe() instead returns such complaints in *escontext,
114 : * if it's an ErrorSaveContext.
115 : *
116 : * NB: Accumulate input as an unsigned number, to deal with two's complement
117 : * representation of the most negative number, which can't be represented as a
118 : * signed positive number.
119 : */
120 : int16
121 0 : pg_strtoint16(const char *s)
122 : {
123 0 : return pg_strtoint16_safe(s, NULL);
124 : }
125 :
126 : int16
127 740886 : pg_strtoint16_safe(const char *s, Node *escontext)
128 : {
129 740886 : const char *ptr = s;
130 : const char *firstdigit;
131 740886 : uint16 tmp = 0;
132 740886 : bool neg = false;
133 : unsigned char digit;
134 :
135 : /*
136 : * The majority of cases are likely to be base-10 digits without any
137 : * underscore separator characters. We'll first try to parse the string
138 : * with the assumption that's the case and only fallback on a slower
139 : * implementation which handles hex, octal and binary strings and
140 : * underscores if the fastpath version cannot parse the string.
141 : */
142 :
143 : /* leave it up to the slow path to look for leading spaces */
144 :
145 740886 : if (*ptr == '-')
146 : {
147 15190 : ptr++;
148 15190 : neg = true;
149 : }
150 :
151 : /* a leading '+' is uncommon so leave that for the slow path */
152 :
153 : /* process the first digit */
154 740886 : digit = (*ptr - '0');
155 :
156 : /*
157 : * Exploit unsigned arithmetic to save having to check both the upper and
158 : * lower bounds of the digit.
159 : */
160 740886 : if (likely(digit < 10))
161 : {
162 740826 : ptr++;
163 740826 : tmp = digit;
164 : }
165 : else
166 : {
167 : /* we need at least one digit */
168 60 : goto slow;
169 : }
170 :
171 : /* process remaining digits */
172 : for (;;)
173 : {
174 765366 : digit = (*ptr - '0');
175 :
176 765366 : if (digit >= 10)
177 740808 : break;
178 :
179 24558 : ptr++;
180 :
181 24558 : if (unlikely(tmp > -(PG_INT16_MIN / 10)))
182 18 : goto out_of_range;
183 :
184 24540 : tmp = tmp * 10 + digit;
185 : }
186 :
187 : /* when the string does not end in a digit, let the slow path handle it */
188 740808 : if (unlikely(*ptr != '\0'))
189 182 : goto slow;
190 :
191 740626 : if (neg)
192 : {
193 : /* check the negative equivalent will fit without overflowing */
194 15148 : if (unlikely(tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1))
195 0 : goto out_of_range;
196 15148 : return -((int16) tmp);
197 : }
198 :
199 725478 : if (unlikely(tmp > PG_INT16_MAX))
200 0 : goto out_of_range;
201 :
202 725478 : return (int16) tmp;
203 :
204 242 : slow:
205 242 : tmp = 0;
206 242 : ptr = s;
207 : /* no need to reset neg */
208 :
209 : /* skip leading spaces */
210 302 : while (isspace((unsigned char) *ptr))
211 60 : ptr++;
212 :
213 : /* handle sign */
214 242 : if (*ptr == '-')
215 : {
216 48 : ptr++;
217 48 : neg = true;
218 : }
219 194 : else if (*ptr == '+')
220 0 : ptr++;
221 :
222 : /* process digits */
223 242 : if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
224 : {
225 42 : firstdigit = ptr += 2;
226 :
227 : for (;;)
228 : {
229 180 : if (isxdigit((unsigned char) *ptr))
230 : {
231 132 : if (unlikely(tmp > -(PG_INT16_MIN / 16)))
232 0 : goto out_of_range;
233 :
234 132 : tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
235 : }
236 48 : else if (*ptr == '_')
237 : {
238 : /* underscore must be followed by more digits */
239 6 : ptr++;
240 6 : if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
241 0 : goto invalid_syntax;
242 : }
243 : else
244 42 : break;
245 : }
246 : }
247 200 : else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
248 : {
249 42 : firstdigit = ptr += 2;
250 :
251 : for (;;)
252 : {
253 222 : if (*ptr >= '0' && *ptr <= '7')
254 : {
255 174 : if (unlikely(tmp > -(PG_INT16_MIN / 8)))
256 0 : goto out_of_range;
257 :
258 174 : tmp = tmp * 8 + (*ptr++ - '0');
259 : }
260 48 : else if (*ptr == '_')
261 : {
262 : /* underscore must be followed by more digits */
263 6 : ptr++;
264 6 : if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
265 0 : goto invalid_syntax;
266 : }
267 : else
268 42 : break;
269 : }
270 : }
271 158 : else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
272 : {
273 42 : firstdigit = ptr += 2;
274 :
275 : for (;;)
276 : {
277 504 : if (*ptr >= '0' && *ptr <= '1')
278 : {
279 450 : if (unlikely(tmp > -(PG_INT16_MIN / 2)))
280 0 : goto out_of_range;
281 :
282 450 : tmp = tmp * 2 + (*ptr++ - '0');
283 : }
284 54 : else if (*ptr == '_')
285 : {
286 : /* underscore must be followed by more digits */
287 12 : ptr++;
288 12 : if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
289 0 : goto invalid_syntax;
290 : }
291 : else
292 42 : break;
293 : }
294 : }
295 : else
296 : {
297 116 : firstdigit = ptr;
298 :
299 : for (;;)
300 : {
301 316 : if (*ptr >= '0' && *ptr <= '9')
302 : {
303 182 : if (unlikely(tmp > -(PG_INT16_MIN / 10)))
304 0 : goto out_of_range;
305 :
306 182 : tmp = tmp * 10 + (*ptr++ - '0');
307 : }
308 134 : else if (*ptr == '_')
309 : {
310 : /* underscore may not be first */
311 36 : if (unlikely(ptr == firstdigit))
312 6 : goto invalid_syntax;
313 : /* and it must be followed by more digits */
314 30 : ptr++;
315 30 : if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
316 12 : goto invalid_syntax;
317 : }
318 : else
319 98 : break;
320 : }
321 : }
322 :
323 : /* require at least one digit */
324 224 : if (unlikely(ptr == firstdigit))
325 60 : goto invalid_syntax;
326 :
327 : /* allow trailing whitespace, but not other trailing chars */
328 200 : while (isspace((unsigned char) *ptr))
329 36 : ptr++;
330 :
331 164 : if (unlikely(*ptr != '\0'))
332 26 : goto invalid_syntax;
333 :
334 138 : if (neg)
335 : {
336 : /* check the negative equivalent will fit without overflowing */
337 42 : if (tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1)
338 18 : goto out_of_range;
339 24 : return -((int16) tmp);
340 : }
341 :
342 96 : if (tmp > PG_INT16_MAX)
343 18 : goto out_of_range;
344 :
345 78 : return (int16) tmp;
346 :
347 54 : out_of_range:
348 54 : ereturn(escontext, 0,
349 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
350 : errmsg("value \"%s\" is out of range for type %s",
351 : s, "smallint")));
352 :
353 104 : invalid_syntax:
354 104 : ereturn(escontext, 0,
355 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
356 : errmsg("invalid input syntax for type %s: \"%s\"",
357 : "smallint", s)));
358 : }
359 :
360 : /*
361 : * Convert input string to a signed 32 bit integer. Input strings may be
362 : * expressed in base-10, hexadecimal, octal, or binary format, all of which
363 : * can be prefixed by an optional sign character, either '+' (the default) or
364 : * '-' for negative numbers. Hex strings are recognized by the digits being
365 : * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
366 : * prefix. The binary representation is recognized by the 0b or 0B prefix.
367 : *
368 : * Allows any number of leading or trailing whitespace characters. Digits may
369 : * optionally be separated by a single underscore character. These can only
370 : * come between digits and not before or after the digits. Underscores have
371 : * no effect on the return value and are supported only to assist in improving
372 : * the human readability of the input strings.
373 : *
374 : * pg_strtoint32() will throw ereport() upon bad input format or overflow;
375 : * while pg_strtoint32_safe() instead returns such complaints in *escontext,
376 : * if it's an ErrorSaveContext.
377 : *
378 : * NB: Accumulate input as an unsigned number, to deal with two's complement
379 : * representation of the most negative number, which can't be represented as a
380 : * signed positive number.
381 : */
382 : int32
383 11240 : pg_strtoint32(const char *s)
384 : {
385 11240 : return pg_strtoint32_safe(s, NULL);
386 : }
387 :
388 : int32
389 4654688 : pg_strtoint32_safe(const char *s, Node *escontext)
390 : {
391 4654688 : const char *ptr = s;
392 : const char *firstdigit;
393 4654688 : uint32 tmp = 0;
394 4654688 : bool neg = false;
395 : unsigned char digit;
396 :
397 : /*
398 : * The majority of cases are likely to be base-10 digits without any
399 : * underscore separator characters. We'll first try to parse the string
400 : * with the assumption that's the case and only fallback on a slower
401 : * implementation which handles hex, octal and binary strings and
402 : * underscores if the fastpath version cannot parse the string.
403 : */
404 :
405 : /* leave it up to the slow path to look for leading spaces */
406 :
407 4654688 : if (*ptr == '-')
408 : {
409 57470 : ptr++;
410 57470 : neg = true;
411 : }
412 :
413 : /* a leading '+' is uncommon so leave that for the slow path */
414 :
415 : /* process the first digit */
416 4654688 : digit = (*ptr - '0');
417 :
418 : /*
419 : * Exploit unsigned arithmetic to save having to check both the upper and
420 : * lower bounds of the digit.
421 : */
422 4654688 : if (likely(digit < 10))
423 : {
424 4654268 : ptr++;
425 4654268 : tmp = digit;
426 : }
427 : else
428 : {
429 : /* we need at least one digit */
430 420 : goto slow;
431 : }
432 :
433 : /* process remaining digits */
434 : for (;;)
435 : {
436 13330446 : digit = (*ptr - '0');
437 :
438 13330446 : if (digit >= 10)
439 4652988 : break;
440 :
441 8677458 : ptr++;
442 :
443 8677458 : if (unlikely(tmp > -(PG_INT32_MIN / 10)))
444 1280 : goto out_of_range;
445 :
446 8676178 : tmp = tmp * 10 + digit;
447 : }
448 :
449 : /* when the string does not end in a digit, let the slow path handle it */
450 4652988 : if (unlikely(*ptr != '\0'))
451 862 : goto slow;
452 :
453 4652126 : if (neg)
454 : {
455 : /* check the negative equivalent will fit without overflowing */
456 57416 : if (unlikely(tmp > (uint32) (-(PG_INT32_MIN + 1)) + 1))
457 0 : goto out_of_range;
458 57416 : return -((int32) tmp);
459 : }
460 :
461 4594710 : if (unlikely(tmp > PG_INT32_MAX))
462 176 : goto out_of_range;
463 :
464 4594534 : return (int32) tmp;
465 :
466 1282 : slow:
467 1282 : tmp = 0;
468 1282 : ptr = s;
469 : /* no need to reset neg */
470 :
471 : /* skip leading spaces */
472 1432 : while (isspace((unsigned char) *ptr))
473 150 : ptr++;
474 :
475 : /* handle sign */
476 1282 : if (*ptr == '-')
477 : {
478 60 : ptr++;
479 60 : neg = true;
480 : }
481 1222 : else if (*ptr == '+')
482 6 : ptr++;
483 :
484 : /* process digits */
485 1282 : if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
486 : {
487 520 : firstdigit = ptr += 2;
488 :
489 : for (;;)
490 : {
491 3064 : if (isxdigit((unsigned char) *ptr))
492 : {
493 2594 : if (unlikely(tmp > -(PG_INT32_MIN / 16)))
494 62 : goto out_of_range;
495 :
496 2532 : tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
497 : }
498 470 : else if (*ptr == '_')
499 : {
500 : /* underscore must be followed by more digits */
501 12 : ptr++;
502 12 : if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
503 0 : goto invalid_syntax;
504 : }
505 : else
506 458 : break;
507 : }
508 : }
509 762 : else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
510 : {
511 102 : firstdigit = ptr += 2;
512 :
513 : for (;;)
514 : {
515 972 : if (*ptr >= '0' && *ptr <= '7')
516 : {
517 882 : if (unlikely(tmp > -(PG_INT32_MIN / 8)))
518 24 : goto out_of_range;
519 :
520 858 : tmp = tmp * 8 + (*ptr++ - '0');
521 : }
522 90 : else if (*ptr == '_')
523 : {
524 : /* underscore must be followed by more digits */
525 12 : ptr++;
526 12 : if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
527 0 : goto invalid_syntax;
528 : }
529 : else
530 78 : break;
531 : }
532 : }
533 660 : else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
534 : {
535 104 : firstdigit = ptr += 2;
536 :
537 : for (;;)
538 : {
539 2622 : if (*ptr >= '0' && *ptr <= '1')
540 : {
541 2520 : if (unlikely(tmp > -(PG_INT32_MIN / 2)))
542 26 : goto out_of_range;
543 :
544 2494 : tmp = tmp * 2 + (*ptr++ - '0');
545 : }
546 102 : else if (*ptr == '_')
547 : {
548 : /* underscore must be followed by more digits */
549 24 : ptr++;
550 24 : if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
551 0 : goto invalid_syntax;
552 : }
553 : else
554 78 : break;
555 : }
556 : }
557 : else
558 : {
559 556 : firstdigit = ptr;
560 :
561 : for (;;)
562 : {
563 1508 : if (*ptr >= '0' && *ptr <= '9')
564 : {
565 842 : if (unlikely(tmp > -(PG_INT32_MIN / 10)))
566 28 : goto out_of_range;
567 :
568 814 : tmp = tmp * 10 + (*ptr++ - '0');
569 : }
570 666 : else if (*ptr == '_')
571 : {
572 : /* underscore may not be first */
573 156 : if (unlikely(ptr == firstdigit))
574 6 : goto invalid_syntax;
575 : /* and it must be followed by more digits */
576 150 : ptr++;
577 150 : if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
578 12 : goto invalid_syntax;
579 : }
580 : else
581 510 : break;
582 : }
583 : }
584 :
585 : /* require at least one digit */
586 1124 : if (unlikely(ptr == firstdigit))
587 366 : goto invalid_syntax;
588 :
589 : /* allow trailing whitespace, but not other trailing chars */
590 842 : while (isspace((unsigned char) *ptr))
591 84 : ptr++;
592 :
593 758 : if (unlikely(*ptr != '\0'))
594 66 : goto invalid_syntax;
595 :
596 692 : if (neg)
597 : {
598 : /* check the negative equivalent will fit without overflowing */
599 42 : if (tmp > (uint32) (-(PG_INT32_MIN + 1)) + 1)
600 18 : goto out_of_range;
601 24 : return -((int32) tmp);
602 : }
603 :
604 650 : if (tmp > PG_INT32_MAX)
605 72 : goto out_of_range;
606 :
607 578 : return (int32) tmp;
608 :
609 1686 : out_of_range:
610 1686 : ereturn(escontext, 0,
611 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
612 : errmsg("value \"%s\" is out of range for type %s",
613 : s, "integer")));
614 :
615 450 : invalid_syntax:
616 450 : ereturn(escontext, 0,
617 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
618 : errmsg("invalid input syntax for type %s: \"%s\"",
619 : "integer", s)));
620 : }
621 :
622 : /*
623 : * Convert input string to a signed 64 bit integer. Input strings may be
624 : * expressed in base-10, hexadecimal, octal, or binary format, all of which
625 : * can be prefixed by an optional sign character, either '+' (the default) or
626 : * '-' for negative numbers. Hex strings are recognized by the digits being
627 : * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
628 : * prefix. The binary representation is recognized by the 0b or 0B prefix.
629 : *
630 : * Allows any number of leading or trailing whitespace characters. Digits may
631 : * optionally be separated by a single underscore character. These can only
632 : * come between digits and not before or after the digits. Underscores have
633 : * no effect on the return value and are supported only to assist in improving
634 : * the human readability of the input strings.
635 : *
636 : * pg_strtoint64() will throw ereport() upon bad input format or overflow;
637 : * while pg_strtoint64_safe() instead returns such complaints in *escontext,
638 : * if it's an ErrorSaveContext.
639 : *
640 : * NB: Accumulate input as an unsigned number, to deal with two's complement
641 : * representation of the most negative number, which can't be represented as a
642 : * signed positive number.
643 : */
644 : int64
645 0 : pg_strtoint64(const char *s)
646 : {
647 0 : return pg_strtoint64_safe(s, NULL);
648 : }
649 :
650 : int64
651 140474 : pg_strtoint64_safe(const char *s, Node *escontext)
652 : {
653 140474 : const char *ptr = s;
654 : const char *firstdigit;
655 140474 : uint64 tmp = 0;
656 140474 : bool neg = false;
657 : unsigned char digit;
658 :
659 : /*
660 : * The majority of cases are likely to be base-10 digits without any
661 : * underscore separator characters. We'll first try to parse the string
662 : * with the assumption that's the case and only fallback on a slower
663 : * implementation which handles hex, octal and binary strings and
664 : * underscores if the fastpath version cannot parse the string.
665 : */
666 :
667 : /* leave it up to the slow path to look for leading spaces */
668 :
669 140474 : if (*ptr == '-')
670 : {
671 1610 : ptr++;
672 1610 : neg = true;
673 : }
674 :
675 : /* a leading '+' is uncommon so leave that for the slow path */
676 :
677 : /* process the first digit */
678 140474 : digit = (*ptr - '0');
679 :
680 : /*
681 : * Exploit unsigned arithmetic to save having to check both the upper and
682 : * lower bounds of the digit.
683 : */
684 140474 : if (likely(digit < 10))
685 : {
686 140276 : ptr++;
687 140276 : tmp = digit;
688 : }
689 : else
690 : {
691 : /* we need at least one digit */
692 198 : goto slow;
693 : }
694 :
695 : /* process remaining digits */
696 : for (;;)
697 : {
698 377868 : digit = (*ptr - '0');
699 :
700 377868 : if (digit >= 10)
701 140042 : break;
702 :
703 237826 : ptr++;
704 :
705 237826 : if (unlikely(tmp > -(PG_INT64_MIN / 10)))
706 234 : goto out_of_range;
707 :
708 237592 : tmp = tmp * 10 + digit;
709 : }
710 :
711 : /* when the string does not end in a digit, let the slow path handle it */
712 140042 : if (unlikely(*ptr != '\0'))
713 9504 : goto slow;
714 :
715 130538 : if (neg)
716 : {
717 : /* check the negative equivalent will fit without overflowing */
718 970 : if (unlikely(tmp > (uint64) (-(PG_INT64_MIN + 1)) + 1))
719 18 : goto out_of_range;
720 952 : return -((int64) tmp);
721 : }
722 :
723 129568 : if (unlikely(tmp > PG_INT64_MAX))
724 18 : goto out_of_range;
725 :
726 129550 : return (int64) tmp;
727 :
728 9702 : slow:
729 9702 : tmp = 0;
730 9702 : ptr = s;
731 : /* no need to reset neg */
732 :
733 : /* skip leading spaces */
734 9776 : while (isspace((unsigned char) *ptr))
735 74 : ptr++;
736 :
737 : /* handle sign */
738 9702 : if (*ptr == '-')
739 : {
740 634 : ptr++;
741 634 : neg = true;
742 : }
743 9068 : else if (*ptr == '+')
744 48 : ptr++;
745 :
746 : /* process digits */
747 9702 : if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
748 : {
749 122 : firstdigit = ptr += 2;
750 :
751 : for (;;)
752 : {
753 1600 : if (isxdigit((unsigned char) *ptr))
754 : {
755 1472 : if (unlikely(tmp > -(PG_INT64_MIN / 16)))
756 0 : goto out_of_range;
757 :
758 1472 : tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
759 : }
760 128 : else if (*ptr == '_')
761 : {
762 : /* underscore must be followed by more digits */
763 6 : ptr++;
764 6 : if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
765 0 : goto invalid_syntax;
766 : }
767 : else
768 122 : break;
769 : }
770 : }
771 9580 : else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
772 : {
773 84 : firstdigit = ptr += 2;
774 :
775 : for (;;)
776 : {
777 1368 : if (*ptr >= '0' && *ptr <= '7')
778 : {
779 1278 : if (unlikely(tmp > -(PG_INT64_MIN / 8)))
780 0 : goto out_of_range;
781 :
782 1278 : tmp = tmp * 8 + (*ptr++ - '0');
783 : }
784 90 : else if (*ptr == '_')
785 : {
786 : /* underscore must be followed by more digits */
787 6 : ptr++;
788 6 : if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
789 0 : goto invalid_syntax;
790 : }
791 : else
792 84 : break;
793 : }
794 : }
795 9496 : else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
796 : {
797 84 : firstdigit = ptr += 2;
798 :
799 : for (;;)
800 : {
801 3804 : if (*ptr >= '0' && *ptr <= '1')
802 : {
803 3708 : if (unlikely(tmp > -(PG_INT64_MIN / 2)))
804 0 : goto out_of_range;
805 :
806 3708 : tmp = tmp * 2 + (*ptr++ - '0');
807 : }
808 96 : else if (*ptr == '_')
809 : {
810 : /* underscore must be followed by more digits */
811 12 : ptr++;
812 12 : if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
813 0 : goto invalid_syntax;
814 : }
815 : else
816 84 : break;
817 : }
818 : }
819 : else
820 : {
821 9412 : firstdigit = ptr;
822 :
823 : for (;;)
824 : {
825 24014 : if (*ptr >= '0' && *ptr <= '9')
826 : {
827 14480 : if (unlikely(tmp > -(PG_INT64_MIN / 10)))
828 0 : goto out_of_range;
829 :
830 14480 : tmp = tmp * 10 + (*ptr++ - '0');
831 : }
832 9534 : else if (*ptr == '_')
833 : {
834 : /* underscore may not be first */
835 140 : if (unlikely(ptr == firstdigit))
836 6 : goto invalid_syntax;
837 : /* and it must be followed by more digits */
838 134 : ptr++;
839 134 : if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
840 12 : goto invalid_syntax;
841 : }
842 : else
843 9394 : break;
844 : }
845 : }
846 :
847 : /* require at least one digit */
848 9684 : if (unlikely(ptr == firstdigit))
849 142 : goto invalid_syntax;
850 :
851 : /* allow trailing whitespace, but not other trailing chars */
852 9608 : while (isspace((unsigned char) *ptr))
853 66 : ptr++;
854 :
855 9542 : if (unlikely(*ptr != '\0'))
856 9170 : goto invalid_syntax;
857 :
858 372 : if (neg)
859 : {
860 : /* check the negative equivalent will fit without overflowing */
861 108 : if (tmp > (uint64) (-(PG_INT64_MIN + 1)) + 1)
862 36 : goto out_of_range;
863 72 : return -((int64) tmp);
864 : }
865 :
866 264 : if (tmp > PG_INT64_MAX)
867 36 : goto out_of_range;
868 :
869 228 : return (int64) tmp;
870 :
871 342 : out_of_range:
872 342 : ereturn(escontext, 0,
873 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
874 : errmsg("value \"%s\" is out of range for type %s",
875 : s, "bigint")));
876 :
877 9330 : invalid_syntax:
878 9330 : ereturn(escontext, 0,
879 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
880 : errmsg("invalid input syntax for type %s: \"%s\"",
881 : "bigint", s)));
882 : }
883 :
884 : /*
885 : * Convert input string to an unsigned 32 bit integer.
886 : *
887 : * Allows any number of leading or trailing whitespace characters.
888 : *
889 : * If endloc isn't NULL, store a pointer to the rest of the string there,
890 : * so that caller can parse the rest. Otherwise, it's an error if anything
891 : * but whitespace follows.
892 : *
893 : * typname is what is reported in error messages.
894 : *
895 : * If escontext points to an ErrorSaveContext node, that is filled instead
896 : * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
897 : * to detect errors.
898 : */
899 : uint32
900 5182248 : uint32in_subr(const char *s, char **endloc,
901 : const char *typname, Node *escontext)
902 : {
903 : uint32 result;
904 : unsigned long cvt;
905 : char *endptr;
906 :
907 5182248 : errno = 0;
908 5182248 : cvt = strtoul(s, &endptr, 0);
909 :
910 : /*
911 : * strtoul() normally only sets ERANGE. On some systems it may also set
912 : * EINVAL, which simply means it couldn't parse the input string. Be sure
913 : * to report that the same way as the standard error indication (that
914 : * endptr == s).
915 : */
916 5182248 : if ((errno && errno != ERANGE) || endptr == s)
917 60 : ereturn(escontext, 0,
918 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
919 : errmsg("invalid input syntax for type %s: \"%s\"",
920 : typname, s)));
921 :
922 5182188 : if (errno == ERANGE)
923 12 : ereturn(escontext, 0,
924 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
925 : errmsg("value \"%s\" is out of range for type %s",
926 : s, typname)));
927 :
928 5182176 : if (endloc)
929 : {
930 : /* caller wants to deal with rest of string */
931 487426 : *endloc = endptr;
932 : }
933 : else
934 : {
935 : /* allow only whitespace after number */
936 4694864 : while (*endptr && isspace((unsigned char) *endptr))
937 114 : endptr++;
938 4694750 : if (*endptr)
939 36 : ereturn(escontext, 0,
940 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
941 : errmsg("invalid input syntax for type %s: \"%s\"",
942 : typname, s)));
943 : }
944 :
945 5182140 : result = (uint32) cvt;
946 :
947 : /*
948 : * Cope with possibility that unsigned long is wider than uint32, in which
949 : * case strtoul will not raise an error for some values that are out of
950 : * the range of uint32.
951 : *
952 : * For backwards compatibility, we want to accept inputs that are given
953 : * with a minus sign, so allow the input value if it matches after either
954 : * signed or unsigned extension to long.
955 : *
956 : * To ensure consistent results on 32-bit and 64-bit platforms, make sure
957 : * the error message is the same as if strtoul() had returned ERANGE.
958 : */
959 : #if PG_UINT32_MAX != ULONG_MAX
960 5182140 : if (cvt != (unsigned long) result &&
961 42 : cvt != (unsigned long) ((int) result))
962 30 : ereturn(escontext, 0,
963 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
964 : errmsg("value \"%s\" is out of range for type %s",
965 : s, typname)));
966 : #endif
967 :
968 5182110 : return result;
969 : }
970 :
971 : /*
972 : * Convert input string to an unsigned 64 bit integer.
973 : *
974 : * Allows any number of leading or trailing whitespace characters.
975 : *
976 : * If endloc isn't NULL, store a pointer to the rest of the string there,
977 : * so that caller can parse the rest. Otherwise, it's an error if anything
978 : * but whitespace follows.
979 : *
980 : * typname is what is reported in error messages.
981 : *
982 : * If escontext points to an ErrorSaveContext node, that is filled instead
983 : * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
984 : * to detect errors.
985 : */
986 : uint64
987 866 : uint64in_subr(const char *s, char **endloc,
988 : const char *typname, Node *escontext)
989 : {
990 : uint64 result;
991 : char *endptr;
992 :
993 866 : errno = 0;
994 866 : result = strtou64(s, &endptr, 0);
995 :
996 : /*
997 : * strtoul[l] normally only sets ERANGE. On some systems it may also set
998 : * EINVAL, which simply means it couldn't parse the input string. Be sure
999 : * to report that the same way as the standard error indication (that
1000 : * endptr == s).
1001 : */
1002 866 : if ((errno && errno != ERANGE) || endptr == s)
1003 18 : ereturn(escontext, 0,
1004 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1005 : errmsg("invalid input syntax for type %s: \"%s\"",
1006 : typname, s)));
1007 :
1008 848 : if (errno == ERANGE)
1009 6 : ereturn(escontext, 0,
1010 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1011 : errmsg("value \"%s\" is out of range for type %s",
1012 : s, typname)));
1013 :
1014 842 : if (endloc)
1015 : {
1016 : /* caller wants to deal with rest of string */
1017 0 : *endloc = endptr;
1018 : }
1019 : else
1020 : {
1021 : /* allow only whitespace after number */
1022 842 : while (*endptr && isspace((unsigned char) *endptr))
1023 0 : endptr++;
1024 842 : if (*endptr)
1025 0 : ereturn(escontext, 0,
1026 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1027 : errmsg("invalid input syntax for type %s: \"%s\"",
1028 : typname, s)));
1029 : }
1030 :
1031 842 : return result;
1032 : }
1033 :
1034 : /*
1035 : * pg_itoa: converts a signed 16-bit integer to its string representation
1036 : * and returns strlen(a).
1037 : *
1038 : * Caller must ensure that 'a' points to enough memory to hold the result
1039 : * (at least 7 bytes, counting a leading sign and trailing NUL).
1040 : *
1041 : * It doesn't seem worth implementing this separately.
1042 : */
1043 : int
1044 286524 : pg_itoa(int16 i, char *a)
1045 : {
1046 286524 : return pg_ltoa((int32) i, a);
1047 : }
1048 :
1049 : /*
1050 : * pg_ultoa_n: converts an unsigned 32-bit integer to its string representation,
1051 : * not NUL-terminated, and returns the length of that string representation
1052 : *
1053 : * Caller must ensure that 'a' points to enough memory to hold the result (at
1054 : * least 10 bytes)
1055 : */
1056 : int
1057 16121712 : pg_ultoa_n(uint32 value, char *a)
1058 : {
1059 : int olength,
1060 16121712 : i = 0;
1061 :
1062 : /* Degenerate case */
1063 16121712 : if (value == 0)
1064 : {
1065 3144658 : *a = '0';
1066 3144658 : return 1;
1067 : }
1068 :
1069 12977054 : olength = decimalLength32(value);
1070 :
1071 : /* Compute the result string. */
1072 14912680 : while (value >= 10000)
1073 : {
1074 1935626 : const uint32 c = value - 10000 * (value / 10000);
1075 1935626 : const uint32 c0 = (c % 100) << 1;
1076 1935626 : const uint32 c1 = (c / 100) << 1;
1077 :
1078 1935626 : char *pos = a + olength - i;
1079 :
1080 1935626 : value /= 10000;
1081 :
1082 1935626 : memcpy(pos - 2, DIGIT_TABLE + c0, 2);
1083 1935626 : memcpy(pos - 4, DIGIT_TABLE + c1, 2);
1084 1935626 : i += 4;
1085 : }
1086 12977054 : if (value >= 100)
1087 : {
1088 5965152 : const uint32 c = (value % 100) << 1;
1089 :
1090 5965152 : char *pos = a + olength - i;
1091 :
1092 5965152 : value /= 100;
1093 :
1094 5965152 : memcpy(pos - 2, DIGIT_TABLE + c, 2);
1095 5965152 : i += 2;
1096 : }
1097 12977054 : if (value >= 10)
1098 : {
1099 6900640 : const uint32 c = value << 1;
1100 :
1101 6900640 : char *pos = a + olength - i;
1102 :
1103 6900640 : memcpy(pos - 2, DIGIT_TABLE + c, 2);
1104 : }
1105 : else
1106 : {
1107 6076414 : *a = (char) ('0' + value);
1108 : }
1109 :
1110 12977054 : return olength;
1111 : }
1112 :
1113 : /*
1114 : * pg_ltoa: converts a signed 32-bit integer to its string representation and
1115 : * returns strlen(a).
1116 : *
1117 : * It is the caller's responsibility to ensure that a is at least 12 bytes long,
1118 : * which is enough room to hold a minus sign, a maximally long int32, and the
1119 : * above terminating NUL.
1120 : */
1121 : int
1122 15991900 : pg_ltoa(int32 value, char *a)
1123 : {
1124 15991900 : uint32 uvalue = (uint32) value;
1125 15991900 : int len = 0;
1126 :
1127 15991900 : if (value < 0)
1128 : {
1129 81228 : uvalue = (uint32) 0 - uvalue;
1130 81228 : a[len++] = '-';
1131 : }
1132 15991900 : len += pg_ultoa_n(uvalue, a + len);
1133 15991900 : a[len] = '\0';
1134 15991900 : return len;
1135 : }
1136 :
1137 : /*
1138 : * Get the decimal representation, not NUL-terminated, and return the length of
1139 : * same. Caller must ensure that a points to at least MAXINT8LEN bytes.
1140 : */
1141 : int
1142 635160 : pg_ulltoa_n(uint64 value, char *a)
1143 : {
1144 : int olength,
1145 635160 : i = 0;
1146 : uint32 value2;
1147 :
1148 : /* Degenerate case */
1149 635160 : if (value == 0)
1150 : {
1151 53558 : *a = '0';
1152 53558 : return 1;
1153 : }
1154 :
1155 581602 : olength = decimalLength64(value);
1156 :
1157 : /* Compute the result string. */
1158 598096 : while (value >= 100000000)
1159 : {
1160 16494 : const uint64 q = value / 100000000;
1161 16494 : uint32 value3 = (uint32) (value - 100000000 * q);
1162 :
1163 16494 : const uint32 c = value3 % 10000;
1164 16494 : const uint32 d = value3 / 10000;
1165 16494 : const uint32 c0 = (c % 100) << 1;
1166 16494 : const uint32 c1 = (c / 100) << 1;
1167 16494 : const uint32 d0 = (d % 100) << 1;
1168 16494 : const uint32 d1 = (d / 100) << 1;
1169 :
1170 16494 : char *pos = a + olength - i;
1171 :
1172 16494 : value = q;
1173 :
1174 16494 : memcpy(pos - 2, DIGIT_TABLE + c0, 2);
1175 16494 : memcpy(pos - 4, DIGIT_TABLE + c1, 2);
1176 16494 : memcpy(pos - 6, DIGIT_TABLE + d0, 2);
1177 16494 : memcpy(pos - 8, DIGIT_TABLE + d1, 2);
1178 16494 : i += 8;
1179 : }
1180 :
1181 : /* Switch to 32-bit for speed */
1182 581602 : value2 = (uint32) value;
1183 :
1184 581602 : if (value2 >= 10000)
1185 : {
1186 26310 : const uint32 c = value2 - 10000 * (value2 / 10000);
1187 26310 : const uint32 c0 = (c % 100) << 1;
1188 26310 : const uint32 c1 = (c / 100) << 1;
1189 :
1190 26310 : char *pos = a + olength - i;
1191 :
1192 26310 : value2 /= 10000;
1193 :
1194 26310 : memcpy(pos - 2, DIGIT_TABLE + c0, 2);
1195 26310 : memcpy(pos - 4, DIGIT_TABLE + c1, 2);
1196 26310 : i += 4;
1197 : }
1198 581602 : if (value2 >= 100)
1199 : {
1200 217234 : const uint32 c = (value2 % 100) << 1;
1201 217234 : char *pos = a + olength - i;
1202 :
1203 217234 : value2 /= 100;
1204 :
1205 217234 : memcpy(pos - 2, DIGIT_TABLE + c, 2);
1206 217234 : i += 2;
1207 : }
1208 581602 : if (value2 >= 10)
1209 : {
1210 127808 : const uint32 c = value2 << 1;
1211 127808 : char *pos = a + olength - i;
1212 :
1213 127808 : memcpy(pos - 2, DIGIT_TABLE + c, 2);
1214 : }
1215 : else
1216 453794 : *a = (char) ('0' + value2);
1217 :
1218 581602 : return olength;
1219 : }
1220 :
1221 : /*
1222 : * pg_lltoa: converts a signed 64-bit integer to its string representation and
1223 : * returns strlen(a).
1224 : *
1225 : * Caller must ensure that 'a' points to enough memory to hold the result
1226 : * (at least MAXINT8LEN + 1 bytes, counting a leading sign and trailing NUL).
1227 : */
1228 : int
1229 306994 : pg_lltoa(int64 value, char *a)
1230 : {
1231 306994 : uint64 uvalue = value;
1232 306994 : int len = 0;
1233 :
1234 306994 : if (value < 0)
1235 : {
1236 2440 : uvalue = (uint64) 0 - uvalue;
1237 2440 : a[len++] = '-';
1238 : }
1239 :
1240 306994 : len += pg_ulltoa_n(uvalue, a + len);
1241 306994 : a[len] = '\0';
1242 306994 : return len;
1243 : }
1244 :
1245 :
1246 : /*
1247 : * pg_ultostr_zeropad
1248 : * Converts 'value' into a decimal string representation stored at 'str'.
1249 : * 'minwidth' specifies the minimum width of the result; any extra space
1250 : * is filled up by prefixing the number with zeros.
1251 : *
1252 : * Returns the ending address of the string result (the last character written
1253 : * plus 1). Note that no NUL terminator is written.
1254 : *
1255 : * The intended use-case for this function is to build strings that contain
1256 : * multiple individual numbers, for example:
1257 : *
1258 : * str = pg_ultostr_zeropad(str, hours, 2);
1259 : * *str++ = ':';
1260 : * str = pg_ultostr_zeropad(str, mins, 2);
1261 : * *str++ = ':';
1262 : * str = pg_ultostr_zeropad(str, secs, 2);
1263 : * *str = '\0';
1264 : *
1265 : * Note: Caller must ensure that 'str' points to enough memory to hold the
1266 : * result.
1267 : */
1268 : char *
1269 792782 : pg_ultostr_zeropad(char *str, uint32 value, int32 minwidth)
1270 : {
1271 : int len;
1272 :
1273 : Assert(minwidth > 0);
1274 :
1275 792782 : if (value < 100 && minwidth == 2) /* Short cut for common case */
1276 : {
1277 666334 : memcpy(str, DIGIT_TABLE + value * 2, 2);
1278 666334 : return str + 2;
1279 : }
1280 :
1281 126448 : len = pg_ultoa_n(value, str);
1282 126448 : if (len >= minwidth)
1283 125770 : return str + len;
1284 :
1285 678 : memmove(str + minwidth - len, str, len);
1286 678 : memset(str, '0', minwidth - len);
1287 678 : return str + minwidth;
1288 : }
1289 :
1290 : /*
1291 : * pg_ultostr
1292 : * Converts 'value' into a decimal string representation stored at 'str'.
1293 : *
1294 : * Returns the ending address of the string result (the last character written
1295 : * plus 1). Note that no NUL terminator is written.
1296 : *
1297 : * The intended use-case for this function is to build strings that contain
1298 : * multiple individual numbers, for example:
1299 : *
1300 : * str = pg_ultostr(str, a);
1301 : * *str++ = ' ';
1302 : * str = pg_ultostr(str, b);
1303 : * *str = '\0';
1304 : *
1305 : * Note: Caller must ensure that 'str' points to enough memory to hold the
1306 : * result.
1307 : */
1308 : char *
1309 3300 : pg_ultostr(char *str, uint32 value)
1310 : {
1311 3300 : int len = pg_ultoa_n(value, str);
1312 :
1313 3300 : return str + len;
1314 : }
|