Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pg_lfind.h
4 : * Optimized linear search routines using SIMD intrinsics where
5 : * available.
6 : *
7 : * Copyright (c) 2022-2024, PostgreSQL Global Development Group
8 : *
9 : * IDENTIFICATION
10 : * src/include/port/pg_lfind.h
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #ifndef PG_LFIND_H
15 : #define PG_LFIND_H
16 :
17 : #include "port/simd.h"
18 :
19 : /*
20 : * pg_lfind8
21 : *
22 : * Return true if there is an element in 'base' that equals 'key', otherwise
23 : * return false.
24 : */
25 : static inline bool
26 5902102 : pg_lfind8(uint8 key, uint8 *base, uint32 nelem)
27 : {
28 : uint32 i;
29 :
30 : /* round down to multiple of vector length */
31 5902102 : uint32 tail_idx = nelem & ~(sizeof(Vector8) - 1);
32 : Vector8 chunk;
33 :
34 9501494 : for (i = 0; i < tail_idx; i += sizeof(Vector8))
35 : {
36 5902182 : vector8_load(&chunk, &base[i]);
37 5902182 : if (vector8_has(chunk, key))
38 2302790 : return true;
39 : }
40 :
41 : /* Process the remaining elements one at a time. */
42 3599418 : for (; i < nelem; i++)
43 : {
44 120 : if (key == base[i])
45 14 : return true;
46 : }
47 :
48 3599298 : return false;
49 : }
50 :
51 : /*
52 : * pg_lfind8_le
53 : *
54 : * Return true if there is an element in 'base' that is less than or equal to
55 : * 'key', otherwise return false.
56 : */
57 : static inline bool
58 649362 : pg_lfind8_le(uint8 key, uint8 *base, uint32 nelem)
59 : {
60 : uint32 i;
61 :
62 : /* round down to multiple of vector length */
63 649362 : uint32 tail_idx = nelem & ~(sizeof(Vector8) - 1);
64 : Vector8 chunk;
65 :
66 1298778 : for (i = 0; i < tail_idx; i += sizeof(Vector8))
67 : {
68 649442 : vector8_load(&chunk, &base[i]);
69 649442 : if (vector8_has_le(chunk, key))
70 26 : return true;
71 : }
72 :
73 : /* Process the remaining elements one at a time. */
74 649430 : for (; i < nelem; i++)
75 : {
76 120 : if (base[i] <= key)
77 26 : return true;
78 : }
79 :
80 649310 : return false;
81 : }
82 :
83 : /*
84 : * pg_lfind32_one_by_one_helper
85 : *
86 : * Searches the array of integers one-by-one. The caller is responsible for
87 : * ensuring that there are at least "nelem" integers in the array.
88 : */
89 : static inline bool
90 11043224 : pg_lfind32_one_by_one_helper(uint32 key, const uint32 *base, uint32 nelem)
91 : {
92 27252716 : for (uint32 i = 0; i < nelem; i++)
93 : {
94 16223650 : if (key == base[i])
95 14158 : return true;
96 : }
97 :
98 11029066 : return false;
99 : }
100 :
101 : #ifndef USE_NO_SIMD
102 : /*
103 : * pg_lfind32_simd_helper
104 : *
105 : * Searches one 4-register-block of integers. The caller is responsible for
106 : * ensuring that there are at least 4-registers-worth of integers remaining.
107 : */
108 : static inline bool
109 84 : pg_lfind32_simd_helper(const Vector32 keys, const uint32 *base)
110 : {
111 84 : const uint32 nelem_per_vector = sizeof(Vector32) / sizeof(uint32);
112 : Vector32 vals1,
113 : vals2,
114 : vals3,
115 : vals4,
116 : result1,
117 : result2,
118 : result3,
119 : result4,
120 : tmp1,
121 : tmp2,
122 : result;
123 :
124 : /* load the next block into 4 registers */
125 84 : vector32_load(&vals1, base);
126 84 : vector32_load(&vals2, &base[nelem_per_vector]);
127 84 : vector32_load(&vals3, &base[nelem_per_vector * 2]);
128 84 : vector32_load(&vals4, &base[nelem_per_vector * 3]);
129 :
130 : /* compare each value to the key */
131 84 : result1 = vector32_eq(keys, vals1);
132 84 : result2 = vector32_eq(keys, vals2);
133 84 : result3 = vector32_eq(keys, vals3);
134 84 : result4 = vector32_eq(keys, vals4);
135 :
136 : /* combine the results into a single variable */
137 84 : tmp1 = vector32_or(result1, result2);
138 84 : tmp2 = vector32_or(result3, result4);
139 84 : result = vector32_or(tmp1, tmp2);
140 :
141 : /* return whether there was a match */
142 84 : return vector32_is_highbit_set(result);
143 : }
144 : #endif /* ! USE_NO_SIMD */
145 :
146 : /*
147 : * pg_lfind32
148 : *
149 : * Return true if there is an element in 'base' that equals 'key', otherwise
150 : * return false.
151 : */
152 : static inline bool
153 11043244 : pg_lfind32(uint32 key, const uint32 *base, uint32 nelem)
154 : {
155 : #ifndef USE_NO_SIMD
156 11043244 : uint32 i = 0;
157 :
158 : /*
159 : * For better instruction-level parallelism, each loop iteration operates
160 : * on a block of four registers.
161 : */
162 11043244 : const Vector32 keys = vector32_broadcast(key); /* load copies of key */
163 11043244 : const uint32 nelem_per_vector = sizeof(Vector32) / sizeof(uint32);
164 11043244 : const uint32 nelem_per_iteration = 4 * nelem_per_vector;
165 :
166 : /* round down to multiple of elements per iteration */
167 11043244 : const uint32 tail_idx = nelem & ~(nelem_per_iteration - 1);
168 :
169 : #if defined(USE_ASSERT_CHECKING)
170 : bool assert_result = pg_lfind32_one_by_one_helper(key, base, nelem);
171 : #endif
172 :
173 : /*
174 : * If there aren't enough elements for the SIMD code, use the standard
175 : * one-by-one linear search code.
176 : */
177 11043244 : if (nelem < nelem_per_iteration)
178 11043224 : return pg_lfind32_one_by_one_helper(key, base, nelem);
179 :
180 : /*
181 : * Process as many elements as possible with a block of 4 registers.
182 : */
183 : do
184 : {
185 68 : if (pg_lfind32_simd_helper(keys, &base[i]))
186 : {
187 : Assert(assert_result == true);
188 4 : return true;
189 : }
190 :
191 64 : i += nelem_per_iteration;
192 :
193 64 : } while (i < tail_idx);
194 :
195 : /*
196 : * Process the last 'nelem_per_iteration' elements in the array with a
197 : * 4-register block. This will cause us to check a subset of the elements
198 : * more than once, but that won't affect correctness, and testing has
199 : * demonstrated that this helps more cases than it harms.
200 : */
201 : Assert(assert_result == pg_lfind32_simd_helper(keys, &base[nelem - nelem_per_iteration]));
202 16 : return pg_lfind32_simd_helper(keys, &base[nelem - nelem_per_iteration]);
203 : #else
204 : /* Process the elements one at a time. */
205 : return pg_lfind32_one_by_one_helper(key, base, nelem);
206 : #endif
207 : }
208 :
209 : #endif /* PG_LFIND_H */
|