Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * numutils.c
4 : * utility functions for I/O of built-in numeric types.
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/utils/adt/numutils.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include <math.h>
18 : #include <limits.h>
19 : #include <ctype.h>
20 :
21 : #include "common/int.h"
22 : #include "utils/builtins.h"
23 : #include "port/pg_bitutils.h"
24 :
25 : /*
26 : * A table of all two-digit numbers. This is used to speed up decimal digit
27 : * generation by copying pairs of digits into the final output.
28 : */
29 : static const char DIGIT_TABLE[200] =
30 : "00" "01" "02" "03" "04" "05" "06" "07" "08" "09"
31 : "10" "11" "12" "13" "14" "15" "16" "17" "18" "19"
32 : "20" "21" "22" "23" "24" "25" "26" "27" "28" "29"
33 : "30" "31" "32" "33" "34" "35" "36" "37" "38" "39"
34 : "40" "41" "42" "43" "44" "45" "46" "47" "48" "49"
35 : "50" "51" "52" "53" "54" "55" "56" "57" "58" "59"
36 : "60" "61" "62" "63" "64" "65" "66" "67" "68" "69"
37 : "70" "71" "72" "73" "74" "75" "76" "77" "78" "79"
38 : "80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
39 : "90" "91" "92" "93" "94" "95" "96" "97" "98" "99";
40 :
41 : /*
42 : * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
43 : */
44 : static inline int
45 12958080 : decimalLength32(const uint32 v)
46 : {
47 : int t;
48 : static const uint32 PowersOfTen[] = {
49 : 1, 10, 100,
50 : 1000, 10000, 100000,
51 : 1000000, 10000000, 100000000,
52 : 1000000000
53 : };
54 :
55 : /*
56 : * Compute base-10 logarithm by dividing the base-2 logarithm by a
57 : * good-enough approximation of the base-2 logarithm of 10
58 : */
59 12958080 : t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
60 12958080 : return t + (v >= PowersOfTen[t]);
61 : }
62 :
63 : static inline int
64 554194 : decimalLength64(const uint64 v)
65 : {
66 : int t;
67 : static const uint64 PowersOfTen[] = {
68 : UINT64CONST(1), UINT64CONST(10),
69 : UINT64CONST(100), UINT64CONST(1000),
70 : UINT64CONST(10000), UINT64CONST(100000),
71 : UINT64CONST(1000000), UINT64CONST(10000000),
72 : UINT64CONST(100000000), UINT64CONST(1000000000),
73 : UINT64CONST(10000000000), UINT64CONST(100000000000),
74 : UINT64CONST(1000000000000), UINT64CONST(10000000000000),
75 : UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
76 : UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
77 : UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
78 : };
79 :
80 : /*
81 : * Compute base-10 logarithm by dividing the base-2 logarithm by a
82 : * good-enough approximation of the base-2 logarithm of 10
83 : */
84 554194 : t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
85 554194 : return t + (v >= PowersOfTen[t]);
86 : }
87 :
88 : static const int8 hexlookup[128] = {
89 : -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
90 : -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
91 : -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
92 : 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
93 : -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
94 : -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
95 : -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
96 : -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
97 : };
98 :
99 : /*
100 : * Convert input string to a signed 16 bit integer. Input strings may be
101 : * expressed in base-10, hexadecimal, octal, or binary format, all of which
102 : * can be prefixed by an optional sign character, either '+' (the default) or
103 : * '-' for negative numbers. Hex strings are recognized by the digits being
104 : * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
105 : * prefix. The binary representation is recognized by the 0b or 0B prefix.
106 : *
107 : * Allows any number of leading or trailing whitespace characters. Digits may
108 : * optionally be separated by a single underscore character. These can only
109 : * come between digits and not before or after the digits. Underscores have
110 : * no effect on the return value and are supported only to assist in improving
111 : * the human readability of the input strings.
112 : *
113 : * pg_strtoint16() will throw ereport() upon bad input format or overflow;
114 : * while pg_strtoint16_safe() instead returns such complaints in *escontext,
115 : * if it's an ErrorSaveContext.
116 : *
117 : * NB: Accumulate input as an unsigned number, to deal with two's complement
118 : * representation of the most negative number, which can't be represented as a
119 : * signed positive number.
120 : */
121 : int16
122 0 : pg_strtoint16(const char *s)
123 : {
124 0 : return pg_strtoint16_safe(s, NULL);
125 : }
126 :
127 : int16
128 599888 : pg_strtoint16_safe(const char *s, Node *escontext)
129 : {
130 599888 : const char *ptr = s;
131 : const char *firstdigit;
132 599888 : uint16 tmp = 0;
133 599888 : bool neg = false;
134 : unsigned char digit;
135 :
136 : /*
137 : * The majority of cases are likely to be base-10 digits without any
138 : * underscore separator characters. We'll first try to parse the string
139 : * with the assumption that's the case and only fallback on a slower
140 : * implementation which handles hex, octal and binary strings and
141 : * underscores if the fastpath version cannot parse the string.
142 : */
143 :
144 : /* leave it up to the slow path to look for leading spaces */
145 :
146 599888 : if (*ptr == '-')
147 : {
148 20038 : ptr++;
149 20038 : neg = true;
150 : }
151 :
152 : /* a leading '+' is uncommon so leave that for the slow path */
153 :
154 : /* process the first digit */
155 599888 : digit = (*ptr - '0');
156 :
157 : /*
158 : * Exploit unsigned arithmetic to save having to check both the upper and
159 : * lower bounds of the digit.
160 : */
161 599888 : if (likely(digit < 10))
162 : {
163 599834 : ptr++;
164 599834 : tmp = digit;
165 : }
166 : else
167 : {
168 : /* we need at least one digit */
169 54 : goto slow;
170 : }
171 :
172 : /* process remaining digits */
173 : for (;;)
174 : {
175 619978 : digit = (*ptr - '0');
176 :
177 619978 : if (digit >= 10)
178 599816 : break;
179 :
180 20162 : ptr++;
181 :
182 20162 : if (unlikely(tmp > -(PG_INT16_MIN / 10)))
183 18 : goto out_of_range;
184 :
185 20144 : tmp = tmp * 10 + digit;
186 : }
187 :
188 : /* when the string does not end in a digit, let the slow path handle it */
189 599816 : if (unlikely(*ptr != '\0'))
190 176 : goto slow;
191 :
192 599640 : if (neg)
193 : {
194 : /* check the negative equivalent will fit without overflowing */
195 19996 : if (unlikely(tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1))
196 0 : goto out_of_range;
197 19996 : return -((int16) tmp);
198 : }
199 :
200 579644 : if (unlikely(tmp > PG_INT16_MAX))
201 0 : goto out_of_range;
202 :
203 579644 : return (int16) tmp;
204 :
205 230 : slow:
206 230 : tmp = 0;
207 230 : ptr = s;
208 : /* no need to reset neg */
209 :
210 : /* skip leading spaces */
211 290 : while (isspace((unsigned char) *ptr))
212 60 : ptr++;
213 :
214 : /* handle sign */
215 230 : if (*ptr == '-')
216 : {
217 48 : ptr++;
218 48 : neg = true;
219 : }
220 182 : else if (*ptr == '+')
221 0 : ptr++;
222 :
223 : /* process digits */
224 230 : if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
225 : {
226 42 : firstdigit = ptr += 2;
227 :
228 : for (;;)
229 : {
230 180 : if (isxdigit((unsigned char) *ptr))
231 : {
232 132 : if (unlikely(tmp > -(PG_INT16_MIN / 16)))
233 0 : goto out_of_range;
234 :
235 132 : tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
236 : }
237 48 : else if (*ptr == '_')
238 : {
239 : /* underscore must be followed by more digits */
240 6 : ptr++;
241 6 : if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
242 0 : goto invalid_syntax;
243 : }
244 : else
245 42 : break;
246 : }
247 : }
248 188 : else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
249 : {
250 42 : firstdigit = ptr += 2;
251 :
252 : for (;;)
253 : {
254 222 : if (*ptr >= '0' && *ptr <= '7')
255 : {
256 174 : if (unlikely(tmp > -(PG_INT16_MIN / 8)))
257 0 : goto out_of_range;
258 :
259 174 : tmp = tmp * 8 + (*ptr++ - '0');
260 : }
261 48 : else if (*ptr == '_')
262 : {
263 : /* underscore must be followed by more digits */
264 6 : ptr++;
265 6 : if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
266 0 : goto invalid_syntax;
267 : }
268 : else
269 42 : break;
270 : }
271 : }
272 146 : else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
273 : {
274 42 : firstdigit = ptr += 2;
275 :
276 : for (;;)
277 : {
278 504 : if (*ptr >= '0' && *ptr <= '1')
279 : {
280 450 : if (unlikely(tmp > -(PG_INT16_MIN / 2)))
281 0 : goto out_of_range;
282 :
283 450 : tmp = tmp * 2 + (*ptr++ - '0');
284 : }
285 54 : else if (*ptr == '_')
286 : {
287 : /* underscore must be followed by more digits */
288 12 : ptr++;
289 12 : if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
290 0 : goto invalid_syntax;
291 : }
292 : else
293 42 : break;
294 : }
295 : }
296 : else
297 : {
298 104 : firstdigit = ptr;
299 :
300 : for (;;)
301 : {
302 286 : if (*ptr >= '0' && *ptr <= '9')
303 : {
304 164 : if (unlikely(tmp > -(PG_INT16_MIN / 10)))
305 0 : goto out_of_range;
306 :
307 164 : tmp = tmp * 10 + (*ptr++ - '0');
308 : }
309 122 : else if (*ptr == '_')
310 : {
311 : /* underscore may not be first */
312 36 : if (unlikely(ptr == firstdigit))
313 6 : goto invalid_syntax;
314 : /* and it must be followed by more digits */
315 30 : ptr++;
316 30 : if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
317 12 : goto invalid_syntax;
318 : }
319 : else
320 86 : break;
321 : }
322 : }
323 :
324 : /* require at least one digit */
325 212 : if (unlikely(ptr == firstdigit))
326 54 : goto invalid_syntax;
327 :
328 : /* allow trailing whitespace, but not other trailing chars */
329 194 : while (isspace((unsigned char) *ptr))
330 36 : ptr++;
331 :
332 158 : if (unlikely(*ptr != '\0'))
333 20 : goto invalid_syntax;
334 :
335 138 : if (neg)
336 : {
337 : /* check the negative equivalent will fit without overflowing */
338 42 : if (tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1)
339 18 : goto out_of_range;
340 24 : return -((int16) tmp);
341 : }
342 :
343 96 : if (tmp > PG_INT16_MAX)
344 18 : goto out_of_range;
345 :
346 78 : return (int16) tmp;
347 :
348 54 : out_of_range:
349 54 : ereturn(escontext, 0,
350 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
351 : errmsg("value \"%s\" is out of range for type %s",
352 : s, "smallint")));
353 :
354 92 : invalid_syntax:
355 92 : ereturn(escontext, 0,
356 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
357 : errmsg("invalid input syntax for type %s: \"%s\"",
358 : "smallint", s)));
359 : }
360 :
361 : /*
362 : * Convert input string to a signed 32 bit integer. Input strings may be
363 : * expressed in base-10, hexadecimal, octal, or binary format, all of which
364 : * can be prefixed by an optional sign character, either '+' (the default) or
365 : * '-' for negative numbers. Hex strings are recognized by the digits being
366 : * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
367 : * prefix. The binary representation is recognized by the 0b or 0B prefix.
368 : *
369 : * Allows any number of leading or trailing whitespace characters. Digits may
370 : * optionally be separated by a single underscore character. These can only
371 : * come between digits and not before or after the digits. Underscores have
372 : * no effect on the return value and are supported only to assist in improving
373 : * the human readability of the input strings.
374 : *
375 : * pg_strtoint32() will throw ereport() upon bad input format or overflow;
376 : * while pg_strtoint32_safe() instead returns such complaints in *escontext,
377 : * if it's an ErrorSaveContext.
378 : *
379 : * NB: Accumulate input as an unsigned number, to deal with two's complement
380 : * representation of the most negative number, which can't be represented as a
381 : * signed positive number.
382 : */
383 : int32
384 10082 : pg_strtoint32(const char *s)
385 : {
386 10082 : return pg_strtoint32_safe(s, NULL);
387 : }
388 :
389 : int32
390 4530792 : pg_strtoint32_safe(const char *s, Node *escontext)
391 : {
392 4530792 : const char *ptr = s;
393 : const char *firstdigit;
394 4530792 : uint32 tmp = 0;
395 4530792 : bool neg = false;
396 : unsigned char digit;
397 :
398 : /*
399 : * The majority of cases are likely to be base-10 digits without any
400 : * underscore separator characters. We'll first try to parse the string
401 : * with the assumption that's the case and only fallback on a slower
402 : * implementation which handles hex, octal and binary strings and
403 : * underscores if the fastpath version cannot parse the string.
404 : */
405 :
406 : /* leave it up to the slow path to look for leading spaces */
407 :
408 4530792 : if (*ptr == '-')
409 : {
410 49280 : ptr++;
411 49280 : neg = true;
412 : }
413 :
414 : /* a leading '+' is uncommon so leave that for the slow path */
415 :
416 : /* process the first digit */
417 4530792 : digit = (*ptr - '0');
418 :
419 : /*
420 : * Exploit unsigned arithmetic to save having to check both the upper and
421 : * lower bounds of the digit.
422 : */
423 4530792 : if (likely(digit < 10))
424 : {
425 4530564 : ptr++;
426 4530564 : tmp = digit;
427 : }
428 : else
429 : {
430 : /* we need at least one digit */
431 228 : goto slow;
432 : }
433 :
434 : /* process remaining digits */
435 : for (;;)
436 : {
437 13153774 : digit = (*ptr - '0');
438 :
439 13153774 : if (digit >= 10)
440 4529482 : break;
441 :
442 8624292 : ptr++;
443 :
444 8624292 : if (unlikely(tmp > -(PG_INT32_MIN / 10)))
445 1082 : goto out_of_range;
446 :
447 8623210 : tmp = tmp * 10 + digit;
448 : }
449 :
450 : /* when the string does not end in a digit, let the slow path handle it */
451 4529482 : if (unlikely(*ptr != '\0'))
452 734 : goto slow;
453 :
454 4528748 : if (neg)
455 : {
456 : /* check the negative equivalent will fit without overflowing */
457 49238 : if (unlikely(tmp > (uint32) (-(PG_INT32_MIN + 1)) + 1))
458 0 : goto out_of_range;
459 49238 : return -((int32) tmp);
460 : }
461 :
462 4479510 : if (unlikely(tmp > PG_INT32_MAX))
463 154 : goto out_of_range;
464 :
465 4479356 : return (int32) tmp;
466 :
467 962 : slow:
468 962 : tmp = 0;
469 962 : ptr = s;
470 : /* no need to reset neg */
471 :
472 : /* skip leading spaces */
473 1100 : while (isspace((unsigned char) *ptr))
474 138 : ptr++;
475 :
476 : /* handle sign */
477 962 : if (*ptr == '-')
478 : {
479 48 : ptr++;
480 48 : neg = true;
481 : }
482 914 : else if (*ptr == '+')
483 0 : ptr++;
484 :
485 : /* process digits */
486 962 : if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
487 : {
488 440 : firstdigit = ptr += 2;
489 :
490 : for (;;)
491 : {
492 2664 : if (isxdigit((unsigned char) *ptr))
493 : {
494 2274 : if (unlikely(tmp > -(PG_INT32_MIN / 16)))
495 62 : goto out_of_range;
496 :
497 2212 : tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
498 : }
499 390 : else if (*ptr == '_')
500 : {
501 : /* underscore must be followed by more digits */
502 12 : ptr++;
503 12 : if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
504 0 : goto invalid_syntax;
505 : }
506 : else
507 378 : break;
508 : }
509 : }
510 522 : else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
511 : {
512 102 : firstdigit = ptr += 2;
513 :
514 : for (;;)
515 : {
516 972 : if (*ptr >= '0' && *ptr <= '7')
517 : {
518 882 : if (unlikely(tmp > -(PG_INT32_MIN / 8)))
519 24 : goto out_of_range;
520 :
521 858 : tmp = tmp * 8 + (*ptr++ - '0');
522 : }
523 90 : else if (*ptr == '_')
524 : {
525 : /* underscore must be followed by more digits */
526 12 : ptr++;
527 12 : if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
528 0 : goto invalid_syntax;
529 : }
530 : else
531 78 : break;
532 : }
533 : }
534 420 : else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
535 : {
536 104 : firstdigit = ptr += 2;
537 :
538 : for (;;)
539 : {
540 2622 : if (*ptr >= '0' && *ptr <= '1')
541 : {
542 2520 : if (unlikely(tmp > -(PG_INT32_MIN / 2)))
543 26 : goto out_of_range;
544 :
545 2494 : tmp = tmp * 2 + (*ptr++ - '0');
546 : }
547 102 : else if (*ptr == '_')
548 : {
549 : /* underscore must be followed by more digits */
550 24 : ptr++;
551 24 : if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
552 0 : goto invalid_syntax;
553 : }
554 : else
555 78 : break;
556 : }
557 : }
558 : else
559 : {
560 316 : firstdigit = ptr;
561 :
562 : for (;;)
563 : {
564 1202 : if (*ptr >= '0' && *ptr <= '9')
565 : {
566 776 : if (unlikely(tmp > -(PG_INT32_MIN / 10)))
567 28 : goto out_of_range;
568 :
569 748 : tmp = tmp * 10 + (*ptr++ - '0');
570 : }
571 426 : else if (*ptr == '_')
572 : {
573 : /* underscore may not be first */
574 156 : if (unlikely(ptr == firstdigit))
575 6 : goto invalid_syntax;
576 : /* and it must be followed by more digits */
577 150 : ptr++;
578 150 : if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
579 12 : goto invalid_syntax;
580 : }
581 : else
582 270 : break;
583 : }
584 : }
585 :
586 : /* require at least one digit */
587 804 : if (unlikely(ptr == firstdigit))
588 180 : goto invalid_syntax;
589 :
590 : /* allow trailing whitespace, but not other trailing chars */
591 708 : while (isspace((unsigned char) *ptr))
592 84 : ptr++;
593 :
594 624 : if (unlikely(*ptr != '\0'))
595 18 : goto invalid_syntax;
596 :
597 606 : if (neg)
598 : {
599 : /* check the negative equivalent will fit without overflowing */
600 42 : if (tmp > (uint32) (-(PG_INT32_MIN + 1)) + 1)
601 18 : goto out_of_range;
602 24 : return -((int32) tmp);
603 : }
604 :
605 564 : if (tmp > PG_INT32_MAX)
606 72 : goto out_of_range;
607 :
608 492 : return (int32) tmp;
609 :
610 1466 : out_of_range:
611 1466 : ereturn(escontext, 0,
612 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
613 : errmsg("value \"%s\" is out of range for type %s",
614 : s, "integer")));
615 :
616 216 : invalid_syntax:
617 216 : ereturn(escontext, 0,
618 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
619 : errmsg("invalid input syntax for type %s: \"%s\"",
620 : "integer", s)));
621 : }
622 :
623 : /*
624 : * Convert input string to a signed 64 bit integer. Input strings may be
625 : * expressed in base-10, hexadecimal, octal, or binary format, all of which
626 : * can be prefixed by an optional sign character, either '+' (the default) or
627 : * '-' for negative numbers. Hex strings are recognized by the digits being
628 : * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
629 : * prefix. The binary representation is recognized by the 0b or 0B prefix.
630 : *
631 : * Allows any number of leading or trailing whitespace characters. Digits may
632 : * optionally be separated by a single underscore character. These can only
633 : * come between digits and not before or after the digits. Underscores have
634 : * no effect on the return value and are supported only to assist in improving
635 : * the human readability of the input strings.
636 : *
637 : * pg_strtoint64() will throw ereport() upon bad input format or overflow;
638 : * while pg_strtoint64_safe() instead returns such complaints in *escontext,
639 : * if it's an ErrorSaveContext.
640 : *
641 : * NB: Accumulate input as an unsigned number, to deal with two's complement
642 : * representation of the most negative number, which can't be represented as a
643 : * signed positive number.
644 : */
645 : int64
646 0 : pg_strtoint64(const char *s)
647 : {
648 0 : return pg_strtoint64_safe(s, NULL);
649 : }
650 :
651 : int64
652 139142 : pg_strtoint64_safe(const char *s, Node *escontext)
653 : {
654 139142 : const char *ptr = s;
655 : const char *firstdigit;
656 139142 : uint64 tmp = 0;
657 139142 : bool neg = false;
658 : unsigned char digit;
659 :
660 : /*
661 : * The majority of cases are likely to be base-10 digits without any
662 : * underscore separator characters. We'll first try to parse the string
663 : * with the assumption that's the case and only fallback on a slower
664 : * implementation which handles hex, octal and binary strings and
665 : * underscores if the fastpath version cannot parse the string.
666 : */
667 :
668 : /* leave it up to the slow path to look for leading spaces */
669 :
670 139142 : if (*ptr == '-')
671 : {
672 1484 : ptr++;
673 1484 : neg = true;
674 : }
675 :
676 : /* a leading '+' is uncommon so leave that for the slow path */
677 :
678 : /* process the first digit */
679 139142 : digit = (*ptr - '0');
680 :
681 : /*
682 : * Exploit unsigned arithmetic to save having to check both the upper and
683 : * lower bounds of the digit.
684 : */
685 139142 : if (likely(digit < 10))
686 : {
687 138992 : ptr++;
688 138992 : tmp = digit;
689 : }
690 : else
691 : {
692 : /* we need at least one digit */
693 150 : goto slow;
694 : }
695 :
696 : /* process remaining digits */
697 : for (;;)
698 : {
699 374322 : digit = (*ptr - '0');
700 :
701 374322 : if (digit >= 10)
702 138764 : break;
703 :
704 235558 : ptr++;
705 :
706 235558 : if (unlikely(tmp > -(PG_INT64_MIN / 10)))
707 228 : goto out_of_range;
708 :
709 235330 : tmp = tmp * 10 + digit;
710 : }
711 :
712 : /* when the string does not end in a digit, let the slow path handle it */
713 138764 : if (unlikely(*ptr != '\0'))
714 9280 : goto slow;
715 :
716 129484 : if (neg)
717 : {
718 : /* check the negative equivalent will fit without overflowing */
719 904 : if (unlikely(tmp > (uint64) (-(PG_INT64_MIN + 1)) + 1))
720 18 : goto out_of_range;
721 886 : return -((int64) tmp);
722 : }
723 :
724 128580 : if (unlikely(tmp > PG_INT64_MAX))
725 18 : goto out_of_range;
726 :
727 128562 : return (int64) tmp;
728 :
729 9430 : slow:
730 9430 : tmp = 0;
731 9430 : ptr = s;
732 : /* no need to reset neg */
733 :
734 : /* skip leading spaces */
735 9504 : while (isspace((unsigned char) *ptr))
736 74 : ptr++;
737 :
738 : /* handle sign */
739 9430 : if (*ptr == '-')
740 : {
741 574 : ptr++;
742 574 : neg = true;
743 : }
744 8856 : else if (*ptr == '+')
745 42 : ptr++;
746 :
747 : /* process digits */
748 9430 : if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
749 : {
750 122 : firstdigit = ptr += 2;
751 :
752 : for (;;)
753 : {
754 1600 : if (isxdigit((unsigned char) *ptr))
755 : {
756 1472 : if (unlikely(tmp > -(PG_INT64_MIN / 16)))
757 0 : goto out_of_range;
758 :
759 1472 : tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
760 : }
761 128 : else if (*ptr == '_')
762 : {
763 : /* underscore must be followed by more digits */
764 6 : ptr++;
765 6 : if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
766 0 : goto invalid_syntax;
767 : }
768 : else
769 122 : break;
770 : }
771 : }
772 9308 : else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
773 : {
774 84 : firstdigit = ptr += 2;
775 :
776 : for (;;)
777 : {
778 1368 : if (*ptr >= '0' && *ptr <= '7')
779 : {
780 1278 : if (unlikely(tmp > -(PG_INT64_MIN / 8)))
781 0 : goto out_of_range;
782 :
783 1278 : tmp = tmp * 8 + (*ptr++ - '0');
784 : }
785 90 : else if (*ptr == '_')
786 : {
787 : /* underscore must be followed by more digits */
788 6 : ptr++;
789 6 : if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
790 0 : goto invalid_syntax;
791 : }
792 : else
793 84 : break;
794 : }
795 : }
796 9224 : else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
797 : {
798 84 : firstdigit = ptr += 2;
799 :
800 : for (;;)
801 : {
802 3804 : if (*ptr >= '0' && *ptr <= '1')
803 : {
804 3708 : if (unlikely(tmp > -(PG_INT64_MIN / 2)))
805 0 : goto out_of_range;
806 :
807 3708 : tmp = tmp * 2 + (*ptr++ - '0');
808 : }
809 96 : else if (*ptr == '_')
810 : {
811 : /* underscore must be followed by more digits */
812 12 : ptr++;
813 12 : if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
814 0 : goto invalid_syntax;
815 : }
816 : else
817 84 : break;
818 : }
819 : }
820 : else
821 : {
822 9140 : firstdigit = ptr;
823 :
824 : for (;;)
825 : {
826 23386 : if (*ptr >= '0' && *ptr <= '9')
827 : {
828 14124 : if (unlikely(tmp > -(PG_INT64_MIN / 10)))
829 0 : goto out_of_range;
830 :
831 14124 : tmp = tmp * 10 + (*ptr++ - '0');
832 : }
833 9262 : else if (*ptr == '_')
834 : {
835 : /* underscore may not be first */
836 140 : if (unlikely(ptr == firstdigit))
837 6 : goto invalid_syntax;
838 : /* and it must be followed by more digits */
839 134 : ptr++;
840 134 : if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
841 12 : goto invalid_syntax;
842 : }
843 : else
844 9122 : break;
845 : }
846 : }
847 :
848 : /* require at least one digit */
849 9412 : if (unlikely(ptr == firstdigit))
850 100 : goto invalid_syntax;
851 :
852 : /* allow trailing whitespace, but not other trailing chars */
853 9378 : while (isspace((unsigned char) *ptr))
854 66 : ptr++;
855 :
856 9312 : if (unlikely(*ptr != '\0'))
857 8946 : goto invalid_syntax;
858 :
859 366 : if (neg)
860 : {
861 : /* check the negative equivalent will fit without overflowing */
862 108 : if (tmp > (uint64) (-(PG_INT64_MIN + 1)) + 1)
863 36 : goto out_of_range;
864 72 : return -((int64) tmp);
865 : }
866 :
867 258 : if (tmp > PG_INT64_MAX)
868 36 : goto out_of_range;
869 :
870 222 : return (int64) tmp;
871 :
872 336 : out_of_range:
873 336 : ereturn(escontext, 0,
874 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
875 : errmsg("value \"%s\" is out of range for type %s",
876 : s, "bigint")));
877 :
878 9064 : invalid_syntax:
879 9064 : ereturn(escontext, 0,
880 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
881 : errmsg("invalid input syntax for type %s: \"%s\"",
882 : "bigint", s)));
883 : }
884 :
885 : /*
886 : * Convert input string to an unsigned 32 bit integer.
887 : *
888 : * Allows any number of leading or trailing whitespace characters.
889 : *
890 : * If endloc isn't NULL, store a pointer to the rest of the string there,
891 : * so that caller can parse the rest. Otherwise, it's an error if anything
892 : * but whitespace follows.
893 : *
894 : * typname is what is reported in error messages.
895 : *
896 : * If escontext points to an ErrorSaveContext node, that is filled instead
897 : * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
898 : * to detect errors.
899 : */
900 : uint32
901 4149010 : uint32in_subr(const char *s, char **endloc,
902 : const char *typname, Node *escontext)
903 : {
904 : uint32 result;
905 : unsigned long cvt;
906 : char *endptr;
907 :
908 4149010 : errno = 0;
909 4149010 : cvt = strtoul(s, &endptr, 0);
910 :
911 : /*
912 : * strtoul() normally only sets ERANGE. On some systems it may also set
913 : * EINVAL, which simply means it couldn't parse the input string. Be sure
914 : * to report that the same way as the standard error indication (that
915 : * endptr == s).
916 : */
917 4149010 : if ((errno && errno != ERANGE) || endptr == s)
918 60 : ereturn(escontext, 0,
919 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
920 : errmsg("invalid input syntax for type %s: \"%s\"",
921 : typname, s)));
922 :
923 4148950 : if (errno == ERANGE)
924 12 : ereturn(escontext, 0,
925 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
926 : errmsg("value \"%s\" is out of range for type %s",
927 : s, typname)));
928 :
929 4148938 : if (endloc)
930 : {
931 : /* caller wants to deal with rest of string */
932 388530 : *endloc = endptr;
933 : }
934 : else
935 : {
936 : /* allow only whitespace after number */
937 3760522 : while (*endptr && isspace((unsigned char) *endptr))
938 114 : endptr++;
939 3760408 : if (*endptr)
940 36 : ereturn(escontext, 0,
941 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
942 : errmsg("invalid input syntax for type %s: \"%s\"",
943 : typname, s)));
944 : }
945 :
946 4148902 : result = (uint32) cvt;
947 :
948 : /*
949 : * Cope with possibility that unsigned long is wider than uint32, in which
950 : * case strtoul will not raise an error for some values that are out of
951 : * the range of uint32.
952 : *
953 : * For backwards compatibility, we want to accept inputs that are given
954 : * with a minus sign, so allow the input value if it matches after either
955 : * signed or unsigned extension to long.
956 : *
957 : * To ensure consistent results on 32-bit and 64-bit platforms, make sure
958 : * the error message is the same as if strtoul() had returned ERANGE.
959 : */
960 : #if PG_UINT32_MAX != ULONG_MAX
961 4148902 : if (cvt != (unsigned long) result &&
962 42 : cvt != (unsigned long) ((int) result))
963 30 : ereturn(escontext, 0,
964 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
965 : errmsg("value \"%s\" is out of range for type %s",
966 : s, typname)));
967 : #endif
968 :
969 4148872 : return result;
970 : }
971 :
972 : /*
973 : * Convert input string to an unsigned 64 bit integer.
974 : *
975 : * Allows any number of leading or trailing whitespace characters.
976 : *
977 : * If endloc isn't NULL, store a pointer to the rest of the string there,
978 : * so that caller can parse the rest. Otherwise, it's an error if anything
979 : * but whitespace follows.
980 : *
981 : * typname is what is reported in error messages.
982 : *
983 : * If escontext points to an ErrorSaveContext node, that is filled instead
984 : * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
985 : * to detect errors.
986 : */
987 : uint64
988 866 : uint64in_subr(const char *s, char **endloc,
989 : const char *typname, Node *escontext)
990 : {
991 : uint64 result;
992 : char *endptr;
993 :
994 866 : errno = 0;
995 866 : result = strtou64(s, &endptr, 0);
996 :
997 : /*
998 : * strtoul[l] normally only sets ERANGE. On some systems it may also set
999 : * EINVAL, which simply means it couldn't parse the input string. Be sure
1000 : * to report that the same way as the standard error indication (that
1001 : * endptr == s).
1002 : */
1003 866 : if ((errno && errno != ERANGE) || endptr == s)
1004 18 : ereturn(escontext, 0,
1005 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1006 : errmsg("invalid input syntax for type %s: \"%s\"",
1007 : typname, s)));
1008 :
1009 848 : if (errno == ERANGE)
1010 6 : ereturn(escontext, 0,
1011 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1012 : errmsg("value \"%s\" is out of range for type %s",
1013 : s, typname)));
1014 :
1015 842 : if (endloc)
1016 : {
1017 : /* caller wants to deal with rest of string */
1018 0 : *endloc = endptr;
1019 : }
1020 : else
1021 : {
1022 : /* allow only whitespace after number */
1023 842 : while (*endptr && isspace((unsigned char) *endptr))
1024 0 : endptr++;
1025 842 : if (*endptr)
1026 0 : ereturn(escontext, 0,
1027 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1028 : errmsg("invalid input syntax for type %s: \"%s\"",
1029 : typname, s)));
1030 : }
1031 :
1032 842 : return result;
1033 : }
1034 :
1035 : /*
1036 : * pg_itoa: converts a signed 16-bit integer to its string representation
1037 : * and returns strlen(a).
1038 : *
1039 : * Caller must ensure that 'a' points to enough memory to hold the result
1040 : * (at least 7 bytes, counting a leading sign and trailing NUL).
1041 : *
1042 : * It doesn't seem worth implementing this separately.
1043 : */
1044 : int
1045 321562 : pg_itoa(int16 i, char *a)
1046 : {
1047 321562 : return pg_ltoa((int32) i, a);
1048 : }
1049 :
1050 : /*
1051 : * pg_ultoa_n: converts an unsigned 32-bit integer to its string representation,
1052 : * not NUL-terminated, and returns the length of that string representation
1053 : *
1054 : * Caller must ensure that 'a' points to enough memory to hold the result (at
1055 : * least 10 bytes)
1056 : */
1057 : int
1058 15979182 : pg_ultoa_n(uint32 value, char *a)
1059 : {
1060 : int olength,
1061 15979182 : i = 0;
1062 :
1063 : /* Degenerate case */
1064 15979182 : if (value == 0)
1065 : {
1066 3021102 : *a = '0';
1067 3021102 : return 1;
1068 : }
1069 :
1070 12958080 : olength = decimalLength32(value);
1071 :
1072 : /* Compute the result string. */
1073 14893590 : while (value >= 10000)
1074 : {
1075 1935510 : const uint32 c = value - 10000 * (value / 10000);
1076 1935510 : const uint32 c0 = (c % 100) << 1;
1077 1935510 : const uint32 c1 = (c / 100) << 1;
1078 :
1079 1935510 : char *pos = a + olength - i;
1080 :
1081 1935510 : value /= 10000;
1082 :
1083 1935510 : memcpy(pos - 2, DIGIT_TABLE + c0, 2);
1084 1935510 : memcpy(pos - 4, DIGIT_TABLE + c1, 2);
1085 1935510 : i += 4;
1086 : }
1087 12958080 : if (value >= 100)
1088 : {
1089 5932252 : const uint32 c = (value % 100) << 1;
1090 :
1091 5932252 : char *pos = a + olength - i;
1092 :
1093 5932252 : value /= 100;
1094 :
1095 5932252 : memcpy(pos - 2, DIGIT_TABLE + c, 2);
1096 5932252 : i += 2;
1097 : }
1098 12958080 : if (value >= 10)
1099 : {
1100 6867458 : const uint32 c = value << 1;
1101 :
1102 6867458 : char *pos = a + olength - i;
1103 :
1104 6867458 : memcpy(pos - 2, DIGIT_TABLE + c, 2);
1105 : }
1106 : else
1107 : {
1108 6090622 : *a = (char) ('0' + value);
1109 : }
1110 :
1111 12958080 : return olength;
1112 : }
1113 :
1114 : /*
1115 : * pg_ltoa: converts a signed 32-bit integer to its string representation and
1116 : * returns strlen(a).
1117 : *
1118 : * It is the caller's responsibility to ensure that a is at least 12 bytes long,
1119 : * which is enough room to hold a minus sign, a maximally long int32, and the
1120 : * above terminating NUL.
1121 : */
1122 : int
1123 15850974 : pg_ltoa(int32 value, char *a)
1124 : {
1125 15850974 : uint32 uvalue = (uint32) value;
1126 15850974 : int len = 0;
1127 :
1128 15850974 : if (value < 0)
1129 : {
1130 122392 : uvalue = (uint32) 0 - uvalue;
1131 122392 : a[len++] = '-';
1132 : }
1133 15850974 : len += pg_ultoa_n(uvalue, a + len);
1134 15850974 : a[len] = '\0';
1135 15850974 : return len;
1136 : }
1137 :
1138 : /*
1139 : * Get the decimal representation, not NUL-terminated, and return the length of
1140 : * same. Caller must ensure that a points to at least MAXINT8LEN bytes.
1141 : */
1142 : int
1143 604748 : pg_ulltoa_n(uint64 value, char *a)
1144 : {
1145 : int olength,
1146 604748 : i = 0;
1147 : uint32 value2;
1148 :
1149 : /* Degenerate case */
1150 604748 : if (value == 0)
1151 : {
1152 50554 : *a = '0';
1153 50554 : return 1;
1154 : }
1155 :
1156 554194 : olength = decimalLength64(value);
1157 :
1158 : /* Compute the result string. */
1159 570298 : while (value >= 100000000)
1160 : {
1161 16104 : const uint64 q = value / 100000000;
1162 16104 : uint32 value3 = (uint32) (value - 100000000 * q);
1163 :
1164 16104 : const uint32 c = value3 % 10000;
1165 16104 : const uint32 d = value3 / 10000;
1166 16104 : const uint32 c0 = (c % 100) << 1;
1167 16104 : const uint32 c1 = (c / 100) << 1;
1168 16104 : const uint32 d0 = (d % 100) << 1;
1169 16104 : const uint32 d1 = (d / 100) << 1;
1170 :
1171 16104 : char *pos = a + olength - i;
1172 :
1173 16104 : value = q;
1174 :
1175 16104 : memcpy(pos - 2, DIGIT_TABLE + c0, 2);
1176 16104 : memcpy(pos - 4, DIGIT_TABLE + c1, 2);
1177 16104 : memcpy(pos - 6, DIGIT_TABLE + d0, 2);
1178 16104 : memcpy(pos - 8, DIGIT_TABLE + d1, 2);
1179 16104 : i += 8;
1180 : }
1181 :
1182 : /* Switch to 32-bit for speed */
1183 554194 : value2 = (uint32) value;
1184 :
1185 554194 : if (value2 >= 10000)
1186 : {
1187 26186 : const uint32 c = value2 - 10000 * (value2 / 10000);
1188 26186 : const uint32 c0 = (c % 100) << 1;
1189 26186 : const uint32 c1 = (c / 100) << 1;
1190 :
1191 26186 : char *pos = a + olength - i;
1192 :
1193 26186 : value2 /= 10000;
1194 :
1195 26186 : memcpy(pos - 2, DIGIT_TABLE + c0, 2);
1196 26186 : memcpy(pos - 4, DIGIT_TABLE + c1, 2);
1197 26186 : i += 4;
1198 : }
1199 554194 : if (value2 >= 100)
1200 : {
1201 216582 : const uint32 c = (value2 % 100) << 1;
1202 216582 : char *pos = a + olength - i;
1203 :
1204 216582 : value2 /= 100;
1205 :
1206 216582 : memcpy(pos - 2, DIGIT_TABLE + c, 2);
1207 216582 : i += 2;
1208 : }
1209 554194 : if (value2 >= 10)
1210 : {
1211 125198 : const uint32 c = value2 << 1;
1212 125198 : char *pos = a + olength - i;
1213 :
1214 125198 : memcpy(pos - 2, DIGIT_TABLE + c, 2);
1215 : }
1216 : else
1217 428996 : *a = (char) ('0' + value2);
1218 :
1219 554194 : return olength;
1220 : }
1221 :
1222 : /*
1223 : * pg_lltoa: converts a signed 64-bit integer to its string representation and
1224 : * returns strlen(a).
1225 : *
1226 : * Caller must ensure that 'a' points to enough memory to hold the result
1227 : * (at least MAXINT8LEN + 1 bytes, counting a leading sign and trailing NUL).
1228 : */
1229 : int
1230 302780 : pg_lltoa(int64 value, char *a)
1231 : {
1232 302780 : uint64 uvalue = value;
1233 302780 : int len = 0;
1234 :
1235 302780 : if (value < 0)
1236 : {
1237 2362 : uvalue = (uint64) 0 - uvalue;
1238 2362 : a[len++] = '-';
1239 : }
1240 :
1241 302780 : len += pg_ulltoa_n(uvalue, a + len);
1242 302780 : a[len] = '\0';
1243 302780 : return len;
1244 : }
1245 :
1246 :
1247 : /*
1248 : * pg_ultostr_zeropad
1249 : * Converts 'value' into a decimal string representation stored at 'str'.
1250 : * 'minwidth' specifies the minimum width of the result; any extra space
1251 : * is filled up by prefixing the number with zeros.
1252 : *
1253 : * Returns the ending address of the string result (the last character written
1254 : * plus 1). Note that no NUL terminator is written.
1255 : *
1256 : * The intended use-case for this function is to build strings that contain
1257 : * multiple individual numbers, for example:
1258 : *
1259 : * str = pg_ultostr_zeropad(str, hours, 2);
1260 : * *str++ = ':';
1261 : * str = pg_ultostr_zeropad(str, mins, 2);
1262 : * *str++ = ':';
1263 : * str = pg_ultostr_zeropad(str, secs, 2);
1264 : * *str = '\0';
1265 : *
1266 : * Note: Caller must ensure that 'str' points to enough memory to hold the
1267 : * result.
1268 : */
1269 : char *
1270 794880 : pg_ultostr_zeropad(char *str, uint32 value, int32 minwidth)
1271 : {
1272 : int len;
1273 :
1274 : Assert(minwidth > 0);
1275 :
1276 794880 : if (value < 100 && minwidth == 2) /* Short cut for common case */
1277 : {
1278 670034 : memcpy(str, DIGIT_TABLE + value * 2, 2);
1279 670034 : return str + 2;
1280 : }
1281 :
1282 124846 : len = pg_ultoa_n(value, str);
1283 124846 : if (len >= minwidth)
1284 124168 : return str + len;
1285 :
1286 678 : memmove(str + minwidth - len, str, len);
1287 678 : memset(str, '0', minwidth - len);
1288 678 : return str + minwidth;
1289 : }
1290 :
1291 : /*
1292 : * pg_ultostr
1293 : * Converts 'value' into a decimal string representation stored at 'str'.
1294 : *
1295 : * Returns the ending address of the string result (the last character written
1296 : * plus 1). Note that no NUL terminator is written.
1297 : *
1298 : * The intended use-case for this function is to build strings that contain
1299 : * multiple individual numbers, for example:
1300 : *
1301 : * str = pg_ultostr(str, a);
1302 : * *str++ = ' ';
1303 : * str = pg_ultostr(str, b);
1304 : * *str = '\0';
1305 : *
1306 : * Note: Caller must ensure that 'str' points to enough memory to hold the
1307 : * result.
1308 : */
1309 : char *
1310 3300 : pg_ultostr(char *str, uint32 value)
1311 : {
1312 3300 : int len = pg_ultoa_n(value, str);
1313 :
1314 3300 : return str + len;
1315 : }
|