Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * numutils.c
4 : * utility functions for I/O of built-in numeric types.
5 : *
6 : * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/utils/adt/numutils.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include <math.h>
18 : #include <limits.h>
19 : #include <ctype.h>
20 :
21 : #include "common/int.h"
22 : #include "utils/builtins.h"
23 : #include "port/pg_bitutils.h"
24 :
25 : /*
26 : * A table of all two-digit numbers. This is used to speed up decimal digit
27 : * generation by copying pairs of digits into the final output.
28 : */
29 : static const char DIGIT_TABLE[200] =
30 : "00" "01" "02" "03" "04" "05" "06" "07" "08" "09"
31 : "10" "11" "12" "13" "14" "15" "16" "17" "18" "19"
32 : "20" "21" "22" "23" "24" "25" "26" "27" "28" "29"
33 : "30" "31" "32" "33" "34" "35" "36" "37" "38" "39"
34 : "40" "41" "42" "43" "44" "45" "46" "47" "48" "49"
35 : "50" "51" "52" "53" "54" "55" "56" "57" "58" "59"
36 : "60" "61" "62" "63" "64" "65" "66" "67" "68" "69"
37 : "70" "71" "72" "73" "74" "75" "76" "77" "78" "79"
38 : "80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
39 : "90" "91" "92" "93" "94" "95" "96" "97" "98" "99";
40 :
41 : /*
42 : * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
43 : */
44 : static inline int
45 14266006 : decimalLength32(const uint32 v)
46 : {
47 : int t;
48 : static const uint32 PowersOfTen[] = {
49 : 1, 10, 100,
50 : 1000, 10000, 100000,
51 : 1000000, 10000000, 100000000,
52 : 1000000000
53 : };
54 :
55 : /*
56 : * Compute base-10 logarithm by dividing the base-2 logarithm by a
57 : * good-enough approximation of the base-2 logarithm of 10
58 : */
59 14266006 : t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
60 14266006 : return t + (v >= PowersOfTen[t]);
61 : }
62 :
63 : static inline int
64 285278 : decimalLength64(const uint64 v)
65 : {
66 : int t;
67 : static const uint64 PowersOfTen[] = {
68 : UINT64CONST(1), UINT64CONST(10),
69 : UINT64CONST(100), UINT64CONST(1000),
70 : UINT64CONST(10000), UINT64CONST(100000),
71 : UINT64CONST(1000000), UINT64CONST(10000000),
72 : UINT64CONST(100000000), UINT64CONST(1000000000),
73 : UINT64CONST(10000000000), UINT64CONST(100000000000),
74 : UINT64CONST(1000000000000), UINT64CONST(10000000000000),
75 : UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
76 : UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
77 : UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
78 : };
79 :
80 : /*
81 : * Compute base-10 logarithm by dividing the base-2 logarithm by a
82 : * good-enough approximation of the base-2 logarithm of 10
83 : */
84 285278 : t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
85 285278 : return t + (v >= PowersOfTen[t]);
86 : }
87 :
88 : /*
89 : * Convert input string to a signed 16 bit integer.
90 : *
91 : * Allows any number of leading or trailing whitespace characters. Will throw
92 : * ereport() upon bad input format or overflow.
93 : *
94 : * NB: Accumulate input as a negative number, to deal with two's complement
95 : * representation of the most negative number, which can't be represented as a
96 : * positive number.
97 : */
98 : int16
99 4672826 : pg_strtoint16(const char *s)
100 : {
101 4672826 : const char *ptr = s;
102 4672826 : int16 tmp = 0;
103 4672826 : bool neg = false;
104 :
105 : /* skip leading spaces */
106 4672886 : while (likely(*ptr) && isspace((unsigned char) *ptr))
107 60 : ptr++;
108 :
109 : /* handle sign */
110 4672826 : if (*ptr == '-')
111 : {
112 96972 : ptr++;
113 96972 : neg = true;
114 : }
115 4575854 : else if (*ptr == '+')
116 0 : ptr++;
117 :
118 : /* require at least one digit */
119 4672826 : if (unlikely(!isdigit((unsigned char) *ptr)))
120 30 : goto invalid_syntax;
121 :
122 : /* process digits */
123 9479896 : while (*ptr && isdigit((unsigned char) *ptr))
124 : {
125 4807106 : int8 digit = (*ptr++ - '0');
126 :
127 4807106 : if (unlikely(pg_mul_s16_overflow(tmp, 10, &tmp)) ||
128 4807100 : unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
129 6 : goto out_of_range;
130 : }
131 :
132 : /* allow trailing whitespace, but not other trailing chars */
133 4672826 : while (*ptr != '\0' && isspace((unsigned char) *ptr))
134 36 : ptr++;
135 :
136 4672790 : if (unlikely(*ptr != '\0'))
137 20 : goto invalid_syntax;
138 :
139 4672770 : if (!neg)
140 : {
141 : /* could fail if input is most negative number */
142 4575804 : if (unlikely(tmp == PG_INT16_MIN))
143 0 : goto out_of_range;
144 4575804 : tmp = -tmp;
145 : }
146 :
147 4672770 : return tmp;
148 :
149 6 : out_of_range:
150 6 : ereport(ERROR,
151 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
152 : errmsg("value \"%s\" is out of range for type %s",
153 : s, "smallint")));
154 :
155 50 : invalid_syntax:
156 50 : ereport(ERROR,
157 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
158 : errmsg("invalid input syntax for type %s: \"%s\"",
159 : "smallint", s)));
160 :
161 : return 0; /* keep compiler quiet */
162 : }
163 :
164 : /*
165 : * Convert input string to a signed 32 bit integer.
166 : *
167 : * Allows any number of leading or trailing whitespace characters. Will throw
168 : * ereport() upon bad input format or overflow.
169 : *
170 : * NB: Accumulate input as a negative number, to deal with two's complement
171 : * representation of the most negative number, which can't be represented as a
172 : * positive number.
173 : */
174 : int32
175 6611250 : pg_strtoint32(const char *s)
176 : {
177 6611250 : const char *ptr = s;
178 6611250 : int32 tmp = 0;
179 6611250 : bool neg = false;
180 :
181 : /* skip leading spaces */
182 6611400 : while (likely(*ptr) && isspace((unsigned char) *ptr))
183 150 : ptr++;
184 :
185 : /* handle sign */
186 6611250 : if (*ptr == '-')
187 : {
188 347388 : ptr++;
189 347388 : neg = true;
190 : }
191 6263862 : else if (*ptr == '+')
192 0 : ptr++;
193 :
194 : /* require at least one digit */
195 6611250 : if (unlikely(!isdigit((unsigned char) *ptr)))
196 186 : goto invalid_syntax;
197 :
198 : /* process digits */
199 22064258 : while (*ptr && isdigit((unsigned char) *ptr))
200 : {
201 15453200 : int8 digit = (*ptr++ - '0');
202 :
203 15453200 : if (unlikely(pg_mul_s32_overflow(tmp, 10, &tmp)) ||
204 15453194 : unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
205 6 : goto out_of_range;
206 : }
207 :
208 : /* allow trailing whitespace, but not other trailing chars */
209 6611142 : while (*ptr != '\0' && isspace((unsigned char) *ptr))
210 84 : ptr++;
211 :
212 6611058 : if (unlikely(*ptr != '\0'))
213 24 : goto invalid_syntax;
214 :
215 6611034 : if (!neg)
216 : {
217 : /* could fail if input is most negative number */
218 6263652 : if (unlikely(tmp == PG_INT32_MIN))
219 0 : goto out_of_range;
220 6263652 : tmp = -tmp;
221 : }
222 :
223 6611034 : return tmp;
224 :
225 6 : out_of_range:
226 6 : ereport(ERROR,
227 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
228 : errmsg("value \"%s\" is out of range for type %s",
229 : s, "integer")));
230 :
231 210 : invalid_syntax:
232 210 : ereport(ERROR,
233 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
234 : errmsg("invalid input syntax for type %s: \"%s\"",
235 : "integer", s)));
236 :
237 : return 0; /* keep compiler quiet */
238 : }
239 :
240 : /*
241 : * Convert input string to a signed 64 bit integer.
242 : *
243 : * Allows any number of leading or trailing whitespace characters. Will throw
244 : * ereport() upon bad input format or overflow.
245 : *
246 : * NB: Accumulate input as a negative number, to deal with two's complement
247 : * representation of the most negative number, which can't be represented as a
248 : * positive number.
249 : */
250 : int64
251 127330 : pg_strtoint64(const char *s)
252 : {
253 127330 : const char *ptr = s;
254 127330 : int64 tmp = 0;
255 127330 : bool neg = false;
256 :
257 : /*
258 : * Do our own scan, rather than relying on sscanf which might be broken
259 : * for long long.
260 : *
261 : * As INT64_MIN can't be stored as a positive 64 bit integer, accumulate
262 : * value as a negative number.
263 : */
264 :
265 : /* skip leading spaces */
266 127404 : while (*ptr && isspace((unsigned char) *ptr))
267 74 : ptr++;
268 :
269 : /* handle sign */
270 127330 : if (*ptr == '-')
271 : {
272 628 : ptr++;
273 628 : neg = true;
274 : }
275 126702 : else if (*ptr == '+')
276 42 : ptr++;
277 :
278 : /* require at least one digit */
279 127330 : if (unlikely(!isdigit((unsigned char) *ptr)))
280 24 : goto invalid_syntax;
281 :
282 : /* process digits */
283 473138 : while (*ptr && isdigit((unsigned char) *ptr))
284 : {
285 345850 : int8 digit = (*ptr++ - '0');
286 :
287 345850 : if (unlikely(pg_mul_s64_overflow(tmp, 10, &tmp)) ||
288 345838 : unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
289 18 : goto out_of_range;
290 : }
291 :
292 : /* allow trailing whitespace, but not other trailing chars */
293 127354 : while (*ptr != '\0' && isspace((unsigned char) *ptr))
294 66 : ptr++;
295 :
296 127288 : if (unlikely(*ptr != '\0'))
297 6 : goto invalid_syntax;
298 :
299 127282 : if (!neg)
300 : {
301 : /* could fail if input is most negative number */
302 126672 : if (unlikely(tmp == PG_INT64_MIN))
303 6 : goto out_of_range;
304 126666 : tmp = -tmp;
305 : }
306 :
307 127276 : return tmp;
308 :
309 24 : out_of_range:
310 24 : ereport(ERROR,
311 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
312 : errmsg("value \"%s\" is out of range for type %s",
313 : s, "bigint")));
314 :
315 30 : invalid_syntax:
316 30 : ereport(ERROR,
317 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
318 : errmsg("invalid input syntax for type %s: \"%s\"",
319 : "bigint", s)));
320 :
321 : return 0; /* keep compiler quiet */
322 : }
323 :
324 : /*
325 : * pg_itoa: converts a signed 16-bit integer to its string representation
326 : * and returns strlen(a).
327 : *
328 : * Caller must ensure that 'a' points to enough memory to hold the result
329 : * (at least 7 bytes, counting a leading sign and trailing NUL).
330 : *
331 : * It doesn't seem worth implementing this separately.
332 : */
333 : int
334 196870 : pg_itoa(int16 i, char *a)
335 : {
336 196870 : return pg_ltoa((int32) i, a);
337 : }
338 :
339 : /*
340 : * pg_ultoa_n: converts an unsigned 32-bit integer to its string representation,
341 : * not NUL-terminated, and returns the length of that string representation
342 : *
343 : * Caller must ensure that 'a' points to enough memory to hold the result (at
344 : * least 10 bytes)
345 : */
346 : int
347 17938096 : pg_ultoa_n(uint32 value, char *a)
348 : {
349 : int olength,
350 17938096 : i = 0;
351 :
352 : /* Degenerate case */
353 17938096 : if (value == 0)
354 : {
355 3672090 : *a = '0';
356 3672090 : return 1;
357 : }
358 :
359 14266006 : olength = decimalLength32(value);
360 :
361 : /* Compute the result string. */
362 16734754 : while (value >= 10000)
363 : {
364 2468748 : const uint32 c = value - 10000 * (value / 10000);
365 2468748 : const uint32 c0 = (c % 100) << 1;
366 2468748 : const uint32 c1 = (c / 100) << 1;
367 :
368 2468748 : char *pos = a + olength - i;
369 :
370 2468748 : value /= 10000;
371 :
372 2468748 : memcpy(pos - 2, DIGIT_TABLE + c0, 2);
373 2468748 : memcpy(pos - 4, DIGIT_TABLE + c1, 2);
374 2468748 : i += 4;
375 : }
376 14266006 : if (value >= 100)
377 : {
378 5933204 : const uint32 c = (value % 100) << 1;
379 :
380 5933204 : char *pos = a + olength - i;
381 :
382 5933204 : value /= 100;
383 :
384 5933204 : memcpy(pos - 2, DIGIT_TABLE + c, 2);
385 5933204 : i += 2;
386 : }
387 14266006 : if (value >= 10)
388 : {
389 6847796 : const uint32 c = value << 1;
390 :
391 6847796 : char *pos = a + olength - i;
392 :
393 6847796 : memcpy(pos - 2, DIGIT_TABLE + c, 2);
394 : }
395 : else
396 : {
397 7418210 : *a = (char) ('0' + value);
398 : }
399 :
400 14266006 : return olength;
401 : }
402 :
403 : /*
404 : * pg_ltoa: converts a signed 32-bit integer to its string representation and
405 : * returns strlen(a).
406 : *
407 : * It is the caller's responsibility to ensure that a is at least 12 bytes long,
408 : * which is enough room to hold a minus sign, a maximally long int32, and the
409 : * above terminating NUL.
410 : */
411 : int
412 17811856 : pg_ltoa(int32 value, char *a)
413 : {
414 17811856 : uint32 uvalue = (uint32) value;
415 17811856 : int len = 0;
416 :
417 17811856 : if (value < 0)
418 : {
419 121698 : uvalue = (uint32) 0 - uvalue;
420 121698 : a[len++] = '-';
421 : }
422 17811856 : len += pg_ultoa_n(uvalue, a + len);
423 17811856 : a[len] = '\0';
424 17811856 : return len;
425 : }
426 :
427 : /*
428 : * Get the decimal representation, not NUL-terminated, and return the length of
429 : * same. Caller must ensure that a points to at least MAXINT8LEN bytes.
430 : */
431 : int
432 297946 : pg_ulltoa_n(uint64 value, char *a)
433 : {
434 : int olength,
435 297946 : i = 0;
436 : uint32 value2;
437 :
438 : /* Degenerate case */
439 297946 : if (value == 0)
440 : {
441 12668 : *a = '0';
442 12668 : return 1;
443 : }
444 :
445 285278 : olength = decimalLength64(value);
446 :
447 : /* Compute the result string. */
448 300668 : while (value >= 100000000)
449 : {
450 15390 : const uint64 q = value / 100000000;
451 15390 : uint32 value2 = (uint32) (value - 100000000 * q);
452 :
453 15390 : const uint32 c = value2 % 10000;
454 15390 : const uint32 d = value2 / 10000;
455 15390 : const uint32 c0 = (c % 100) << 1;
456 15390 : const uint32 c1 = (c / 100) << 1;
457 15390 : const uint32 d0 = (d % 100) << 1;
458 15390 : const uint32 d1 = (d / 100) << 1;
459 :
460 15390 : char *pos = a + olength - i;
461 :
462 15390 : value = q;
463 :
464 15390 : memcpy(pos - 2, DIGIT_TABLE + c0, 2);
465 15390 : memcpy(pos - 4, DIGIT_TABLE + c1, 2);
466 15390 : memcpy(pos - 6, DIGIT_TABLE + d0, 2);
467 15390 : memcpy(pos - 8, DIGIT_TABLE + d1, 2);
468 15390 : i += 8;
469 : }
470 :
471 : /* Switch to 32-bit for speed */
472 285278 : value2 = (uint32) value;
473 :
474 285278 : if (value2 >= 10000)
475 : {
476 24392 : const uint32 c = value2 - 10000 * (value2 / 10000);
477 24392 : const uint32 c0 = (c % 100) << 1;
478 24392 : const uint32 c1 = (c / 100) << 1;
479 :
480 24392 : char *pos = a + olength - i;
481 :
482 24392 : value2 /= 10000;
483 :
484 24392 : memcpy(pos - 2, DIGIT_TABLE + c0, 2);
485 24392 : memcpy(pos - 4, DIGIT_TABLE + c1, 2);
486 24392 : i += 4;
487 : }
488 285278 : if (value2 >= 100)
489 : {
490 206250 : const uint32 c = (value2 % 100) << 1;
491 206250 : char *pos = a + olength - i;
492 :
493 206250 : value2 /= 100;
494 :
495 206250 : memcpy(pos - 2, DIGIT_TABLE + c, 2);
496 206250 : i += 2;
497 : }
498 285278 : if (value2 >= 10)
499 : {
500 109088 : const uint32 c = value2 << 1;
501 109088 : char *pos = a + olength - i;
502 :
503 109088 : memcpy(pos - 2, DIGIT_TABLE + c, 2);
504 : }
505 : else
506 176190 : *a = (char) ('0' + value2);
507 :
508 285278 : return olength;
509 : }
510 :
511 : /*
512 : * pg_lltoa: converts a signed 64-bit integer to its string representation and
513 : * returns strlen(a).
514 : *
515 : * Caller must ensure that 'a' points to enough memory to hold the result
516 : * (at least MAXINT8LEN + 1 bytes, counting a leading sign and trailing NUL).
517 : */
518 : int
519 297946 : pg_lltoa(int64 value, char *a)
520 : {
521 297946 : uint64 uvalue = value;
522 297946 : int len = 0;
523 :
524 297946 : if (value < 0)
525 : {
526 2284 : uvalue = (uint64) 0 - uvalue;
527 2284 : a[len++] = '-';
528 : }
529 :
530 297946 : len += pg_ulltoa_n(uvalue, a + len);
531 297946 : a[len] = '\0';
532 297946 : return len;
533 : }
534 :
535 :
536 : /*
537 : * pg_ultostr_zeropad
538 : * Converts 'value' into a decimal string representation stored at 'str'.
539 : * 'minwidth' specifies the minimum width of the result; any extra space
540 : * is filled up by prefixing the number with zeros.
541 : *
542 : * Returns the ending address of the string result (the last character written
543 : * plus 1). Note that no NUL terminator is written.
544 : *
545 : * The intended use-case for this function is to build strings that contain
546 : * multiple individual numbers, for example:
547 : *
548 : * str = pg_ultostr_zeropad(str, hours, 2);
549 : * *str++ = ':';
550 : * str = pg_ultostr_zeropad(str, mins, 2);
551 : * *str++ = ':';
552 : * str = pg_ultostr_zeropad(str, secs, 2);
553 : * *str = '\0';
554 : *
555 : * Note: Caller must ensure that 'str' points to enough memory to hold the
556 : * result.
557 : */
558 : char *
559 782144 : pg_ultostr_zeropad(char *str, uint32 value, int32 minwidth)
560 : {
561 : int len;
562 :
563 : Assert(minwidth > 0);
564 :
565 782144 : if (value < 100 && minwidth == 2) /* Short cut for common case */
566 : {
567 659168 : memcpy(str, DIGIT_TABLE + value * 2, 2);
568 659168 : return str + 2;
569 : }
570 :
571 122976 : len = pg_ultoa_n(value, str);
572 122976 : if (len >= minwidth)
573 122298 : return str + len;
574 :
575 678 : memmove(str + minwidth - len, str, len);
576 678 : memset(str, '0', minwidth - len);
577 678 : return str + minwidth;
578 : }
579 :
580 : /*
581 : * pg_ultostr
582 : * Converts 'value' into a decimal string representation stored at 'str'.
583 : *
584 : * Returns the ending address of the string result (the last character written
585 : * plus 1). Note that no NUL terminator is written.
586 : *
587 : * The intended use-case for this function is to build strings that contain
588 : * multiple individual numbers, for example:
589 : *
590 : * str = pg_ultostr(str, a);
591 : * *str++ = ' ';
592 : * str = pg_ultostr(str, b);
593 : * *str = '\0';
594 : *
595 : * Note: Caller must ensure that 'str' points to enough memory to hold the
596 : * result.
597 : */
598 : char *
599 3264 : pg_ultostr(char *str, uint32 value)
600 : {
601 3264 : int len = pg_ultoa_n(value, str);
602 :
603 3264 : return str + len;
604 : }
|