Line data Source code
1 : /*-----------------------------------------------------------------------
2 : * ascii.h
3 : *
4 : * Portions Copyright (c) 1999-2026, PostgreSQL Global Development Group
5 : *
6 : * src/include/utils/ascii.h
7 : *
8 : *-----------------------------------------------------------------------
9 : */
10 :
11 : #ifndef _ASCII_H_
12 : #define _ASCII_H_
13 :
14 : #include "port/simd.h"
15 :
16 : extern void ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz);
17 :
18 : /*
19 : * Verify a chunk of bytes for valid ASCII.
20 : *
21 : * Returns false if the input contains any zero bytes or bytes with the
22 : * high-bit set. Input len must be a multiple of the chunk size (8 or 16).
23 : */
24 : static inline bool
25 1825682 : is_valid_ascii(const unsigned char *s, int len)
26 : {
27 1825682 : const unsigned char *const s_end = s + len;
28 : Vector8 chunk;
29 1825682 : Vector8 highbit_cum = vector8_broadcast(0);
30 : #ifdef USE_NO_SIMD
31 : Vector8 zero_cum = vector8_broadcast(0x80);
32 : #endif
33 :
34 : Assert(len % sizeof(chunk) == 0);
35 :
36 5477046 : while (s < s_end)
37 : {
38 3651364 : vector8_load(&chunk, s);
39 :
40 : /* Capture any zero bytes in this chunk. */
41 : #ifdef USE_NO_SIMD
42 :
43 : /*
44 : * First, add 0x7f to each byte. This sets the high bit in each byte,
45 : * unless it was a zero. If any resulting high bits are zero, the
46 : * corresponding high bits in the zero accumulator will be cleared.
47 : *
48 : * If none of the bytes in the chunk had the high bit set, the max
49 : * value each byte can have after the addition is 0x7f + 0x7f = 0xfe,
50 : * and we don't need to worry about carrying over to the next byte. If
51 : * any input bytes did have the high bit set, it doesn't matter
52 : * because we check for those separately.
53 : */
54 : zero_cum &= (chunk + vector8_broadcast(0x7F));
55 : #else
56 :
57 : /*
58 : * Set all bits in each lane of the highbit accumulator where input
59 : * bytes are zero.
60 : */
61 3651364 : highbit_cum = vector8_or(highbit_cum,
62 : vector8_eq(chunk, vector8_broadcast(0)));
63 : #endif
64 :
65 : /* Capture all set bits in this chunk. */
66 3651364 : highbit_cum = vector8_or(highbit_cum, chunk);
67 :
68 3651364 : s += sizeof(chunk);
69 : }
70 :
71 : /* Check if any high bits in the high bit accumulator got set. */
72 1825682 : if (vector8_is_highbit_set(highbit_cum))
73 588 : return false;
74 :
75 : #ifdef USE_NO_SIMD
76 : /* Check if any high bits in the zero accumulator got cleared. */
77 : if (zero_cum != vector8_broadcast(0x80))
78 : return false;
79 : #endif
80 :
81 1825094 : return true;
82 : }
83 :
84 : #endif /* _ASCII_H_ */
|