Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * network_gist.c
4 : * GiST support for network types.
5 : *
6 : * The key thing to understand about this code is the definition of the
7 : * "union" of a set of INET/CIDR values. It works like this:
8 : * 1. If the values are not all of the same IP address family, the "union"
9 : * is a dummy value with family number zero, minbits zero, commonbits zero,
10 : * address all zeroes. Otherwise:
11 : * 2. The union has the common IP address family number.
12 : * 3. The union's minbits value is the smallest netmask length ("ip_bits")
13 : * of all the input values.
14 : * 4. Let C be the number of leading address bits that are in common among
15 : * all the input values (C ranges from 0 to ip_maxbits for the family).
16 : * 5. The union's commonbits value is C.
17 : * 6. The union's address value is the same as the common prefix for its
18 : * first C bits, and is zeroes to the right of that. The physical width
19 : * of the address value is ip_maxbits for the address family.
20 : *
21 : * In a leaf index entry (representing a single key), commonbits is equal to
22 : * ip_maxbits for the address family, minbits is the same as the represented
23 : * value's ip_bits, and the address is equal to the represented address.
24 : * Although it may appear that we're wasting a byte by storing the union
25 : * format and not just the represented INET/CIDR value in leaf keys, the
26 : * extra byte is actually "free" because of alignment considerations.
27 : *
28 : * Note that this design tracks minbits and commonbits independently; in any
29 : * given union value, either might be smaller than the other. This does not
30 : * help us much when descending the tree, because of the way inet comparison
31 : * is defined: at non-leaf nodes we can't compare more than minbits bits
32 : * even if we know them. However, it greatly improves the quality of split
33 : * decisions. Preliminary testing suggests that searches are as much as
34 : * twice as fast as for a simpler design in which a single field doubles as
35 : * the common prefix length and the minimum ip_bits value.
36 : *
37 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
38 : * Portions Copyright (c) 1994, Regents of the University of California
39 : *
40 : *
41 : * IDENTIFICATION
42 : * src/backend/utils/adt/network_gist.c
43 : *
44 : *-------------------------------------------------------------------------
45 : */
46 : #include "postgres.h"
47 :
48 : #include <sys/socket.h>
49 :
50 : #include "access/gist.h"
51 : #include "access/stratnum.h"
52 : #include "utils/fmgrprotos.h"
53 : #include "utils/inet.h"
54 : #include "varatt.h"
55 :
56 : /*
57 : * Operator strategy numbers used in the GiST inet_ops opclass
58 : */
59 : #define INETSTRAT_OVERLAPS RTOverlapStrategyNumber
60 : #define INETSTRAT_EQ RTEqualStrategyNumber
61 : #define INETSTRAT_NE RTNotEqualStrategyNumber
62 : #define INETSTRAT_LT RTLessStrategyNumber
63 : #define INETSTRAT_LE RTLessEqualStrategyNumber
64 : #define INETSTRAT_GT RTGreaterStrategyNumber
65 : #define INETSTRAT_GE RTGreaterEqualStrategyNumber
66 : #define INETSTRAT_SUB RTSubStrategyNumber
67 : #define INETSTRAT_SUBEQ RTSubEqualStrategyNumber
68 : #define INETSTRAT_SUP RTSuperStrategyNumber
69 : #define INETSTRAT_SUPEQ RTSuperEqualStrategyNumber
70 :
71 :
72 : /*
73 : * Representation of a GiST INET/CIDR index key. This is not identical to
74 : * INET/CIDR because we need to keep track of the length of the common address
75 : * prefix as well as the minimum netmask length. However, as long as it
76 : * follows varlena header rules, the core GiST code won't know the difference.
77 : * For simplicity we always use 1-byte-header varlena format.
78 : */
79 : typedef struct GistInetKey
80 : {
81 : uint8 va_header; /* varlena header --- don't touch directly */
82 : unsigned char family; /* PGSQL_AF_INET, PGSQL_AF_INET6, or zero */
83 : unsigned char minbits; /* minimum number of bits in netmask */
84 : unsigned char commonbits; /* number of common prefix bits in addresses */
85 : unsigned char ipaddr[16]; /* up to 128 bits of common address */
86 : } GistInetKey;
87 :
88 : #define DatumGetInetKeyP(X) ((GistInetKey *) DatumGetPointer(X))
89 : #define InetKeyPGetDatum(X) PointerGetDatum(X)
90 :
91 : /*
92 : * Access macros; not really exciting, but we use these for notational
93 : * consistency with access to INET/CIDR values. Note that family-zero values
94 : * are stored with 4 bytes of address, not 16.
95 : */
96 : #define gk_ip_family(gkptr) ((gkptr)->family)
97 : #define gk_ip_minbits(gkptr) ((gkptr)->minbits)
98 : #define gk_ip_commonbits(gkptr) ((gkptr)->commonbits)
99 : #define gk_ip_addr(gkptr) ((gkptr)->ipaddr)
100 : #define ip_family_maxbits(fam) ((fam) == PGSQL_AF_INET6 ? 128 : 32)
101 :
102 : /* These require that the family field has been set: */
103 : #define gk_ip_addrsize(gkptr) \
104 : (gk_ip_family(gkptr) == PGSQL_AF_INET6 ? 16 : 4)
105 : #define gk_ip_maxbits(gkptr) \
106 : ip_family_maxbits(gk_ip_family(gkptr))
107 : #define SET_GK_VARSIZE(dst) \
108 : SET_VARSIZE_SHORT(dst, offsetof(GistInetKey, ipaddr) + gk_ip_addrsize(dst))
109 :
110 :
111 : /*
112 : * The GiST query consistency check
113 : */
114 : Datum
115 1224 : inet_gist_consistent(PG_FUNCTION_ARGS)
116 : {
117 1224 : GISTENTRY *ent = (GISTENTRY *) PG_GETARG_POINTER(0);
118 1224 : inet *query = PG_GETARG_INET_PP(1);
119 1224 : StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
120 :
121 : /* Oid subtype = PG_GETARG_OID(3); */
122 1224 : bool *recheck = (bool *) PG_GETARG_POINTER(4);
123 1224 : GistInetKey *key = DatumGetInetKeyP(ent->key);
124 : int minbits,
125 : order;
126 :
127 : /* All operators served by this function are exact. */
128 1224 : *recheck = false;
129 :
130 : /*
131 : * Check 0: different families
132 : *
133 : * If key represents multiple address families, its children could match
134 : * anything. This can only happen on an inner index page.
135 : */
136 1224 : if (gk_ip_family(key) == 0)
137 : {
138 : Assert(!GIST_LEAF(ent));
139 0 : PG_RETURN_BOOL(true);
140 : }
141 :
142 : /*
143 : * Check 1: different families
144 : *
145 : * Matching families do not help any of the strategies.
146 : */
147 1224 : if (gk_ip_family(key) != ip_family(query))
148 : {
149 216 : switch (strategy)
150 : {
151 36 : case INETSTRAT_LT:
152 : case INETSTRAT_LE:
153 36 : if (gk_ip_family(key) < ip_family(query))
154 0 : PG_RETURN_BOOL(true);
155 36 : break;
156 :
157 36 : case INETSTRAT_GE:
158 : case INETSTRAT_GT:
159 36 : if (gk_ip_family(key) > ip_family(query))
160 36 : PG_RETURN_BOOL(true);
161 0 : break;
162 :
163 18 : case INETSTRAT_NE:
164 18 : PG_RETURN_BOOL(true);
165 : }
166 : /* For all other cases, we can be sure there is no match */
167 162 : PG_RETURN_BOOL(false);
168 : }
169 :
170 : /*
171 : * Check 2: network bit count
172 : *
173 : * Network bit count (ip_bits) helps to check leaves for sub network and
174 : * sup network operators. At non-leaf nodes, we know every child value
175 : * has ip_bits >= gk_ip_minbits(key), so we can avoid descending in some
176 : * cases too.
177 : */
178 1008 : switch (strategy)
179 : {
180 168 : case INETSTRAT_SUB:
181 168 : if (GIST_LEAF(ent) && gk_ip_minbits(key) <= ip_bits(query))
182 120 : PG_RETURN_BOOL(false);
183 48 : break;
184 :
185 84 : case INETSTRAT_SUBEQ:
186 84 : if (GIST_LEAF(ent) && gk_ip_minbits(key) < ip_bits(query))
187 36 : PG_RETURN_BOOL(false);
188 48 : break;
189 :
190 168 : case INETSTRAT_SUPEQ:
191 : case INETSTRAT_EQ:
192 168 : if (gk_ip_minbits(key) > ip_bits(query))
193 48 : PG_RETURN_BOOL(false);
194 120 : break;
195 :
196 84 : case INETSTRAT_SUP:
197 84 : if (gk_ip_minbits(key) >= ip_bits(query))
198 48 : PG_RETURN_BOOL(false);
199 36 : break;
200 : }
201 :
202 : /*
203 : * Check 3: common network bits
204 : *
205 : * Compare available common prefix bits to the query, but not beyond
206 : * either the query's netmask or the minimum netmask among the represented
207 : * values. If these bits don't match the query, we have our answer (and
208 : * may or may not need to descend, depending on the operator). If they do
209 : * match, and we are not at a leaf, we descend in all cases.
210 : *
211 : * Note this is the final check for operators that only consider the
212 : * network part of the address.
213 : */
214 756 : minbits = Min(gk_ip_commonbits(key), gk_ip_minbits(key));
215 756 : minbits = Min(minbits, ip_bits(query));
216 :
217 756 : order = bitncmp(gk_ip_addr(key), ip_addr(query), minbits);
218 :
219 756 : switch (strategy)
220 : {
221 276 : case INETSTRAT_SUB:
222 : case INETSTRAT_SUBEQ:
223 : case INETSTRAT_OVERLAPS:
224 : case INETSTRAT_SUPEQ:
225 : case INETSTRAT_SUP:
226 276 : PG_RETURN_BOOL(order == 0);
227 :
228 168 : case INETSTRAT_LT:
229 : case INETSTRAT_LE:
230 168 : if (order > 0)
231 0 : PG_RETURN_BOOL(false);
232 168 : if (order < 0 || !GIST_LEAF(ent))
233 96 : PG_RETURN_BOOL(true);
234 72 : break;
235 :
236 60 : case INETSTRAT_EQ:
237 60 : if (order != 0)
238 42 : PG_RETURN_BOOL(false);
239 18 : if (!GIST_LEAF(ent))
240 0 : PG_RETURN_BOOL(true);
241 18 : break;
242 :
243 168 : case INETSTRAT_GE:
244 : case INETSTRAT_GT:
245 168 : if (order < 0)
246 96 : PG_RETURN_BOOL(false);
247 72 : if (order > 0 || !GIST_LEAF(ent))
248 0 : PG_RETURN_BOOL(true);
249 72 : break;
250 :
251 84 : case INETSTRAT_NE:
252 84 : if (order != 0 || !GIST_LEAF(ent))
253 48 : PG_RETURN_BOOL(true);
254 36 : break;
255 : }
256 :
257 : /*
258 : * Remaining checks are only for leaves and basic comparison strategies.
259 : * See network_cmp_internal() in network.c for the implementation we need
260 : * to match. Note that in a leaf key, commonbits should equal the address
261 : * length, so we compared the whole network parts above.
262 : */
263 198 : Assert(GIST_LEAF(ent));
264 :
265 : /*
266 : * Check 4: network bit count
267 : *
268 : * Next step is to compare netmask widths.
269 : */
270 198 : switch (strategy)
271 : {
272 72 : case INETSTRAT_LT:
273 : case INETSTRAT_LE:
274 72 : if (gk_ip_minbits(key) < ip_bits(query))
275 0 : PG_RETURN_BOOL(true);
276 72 : if (gk_ip_minbits(key) > ip_bits(query))
277 36 : PG_RETURN_BOOL(false);
278 36 : break;
279 :
280 18 : case INETSTRAT_EQ:
281 18 : if (gk_ip_minbits(key) != ip_bits(query))
282 0 : PG_RETURN_BOOL(false);
283 18 : break;
284 :
285 72 : case INETSTRAT_GE:
286 : case INETSTRAT_GT:
287 72 : if (gk_ip_minbits(key) > ip_bits(query))
288 36 : PG_RETURN_BOOL(true);
289 36 : if (gk_ip_minbits(key) < ip_bits(query))
290 0 : PG_RETURN_BOOL(false);
291 36 : break;
292 :
293 36 : case INETSTRAT_NE:
294 36 : if (gk_ip_minbits(key) != ip_bits(query))
295 18 : PG_RETURN_BOOL(true);
296 18 : break;
297 : }
298 :
299 : /*
300 : * Check 5: whole address
301 : *
302 : * Netmask bit counts are the same, so check all the address bits.
303 : */
304 108 : order = bitncmp(gk_ip_addr(key), ip_addr(query), gk_ip_maxbits(key));
305 :
306 108 : switch (strategy)
307 : {
308 18 : case INETSTRAT_LT:
309 18 : PG_RETURN_BOOL(order < 0);
310 :
311 18 : case INETSTRAT_LE:
312 18 : PG_RETURN_BOOL(order <= 0);
313 :
314 18 : case INETSTRAT_EQ:
315 18 : PG_RETURN_BOOL(order == 0);
316 :
317 18 : case INETSTRAT_GE:
318 18 : PG_RETURN_BOOL(order >= 0);
319 :
320 18 : case INETSTRAT_GT:
321 18 : PG_RETURN_BOOL(order > 0);
322 :
323 18 : case INETSTRAT_NE:
324 18 : PG_RETURN_BOOL(order != 0);
325 : }
326 :
327 0 : elog(ERROR, "unknown strategy for inet GiST");
328 : PG_RETURN_BOOL(false); /* keep compiler quiet */
329 : }
330 :
331 : /*
332 : * Calculate parameters of the union of some GistInetKeys.
333 : *
334 : * Examine the keys in elements m..n inclusive of the GISTENTRY array,
335 : * and compute these output parameters:
336 : * *minfamily_p = minimum IP address family number
337 : * *maxfamily_p = maximum IP address family number
338 : * *minbits_p = minimum netmask width
339 : * *commonbits_p = number of leading bits in common among the addresses
340 : *
341 : * minbits and commonbits are forced to zero if there's more than one
342 : * address family.
343 : */
344 : static void
345 830 : calc_inet_union_params(GISTENTRY *ent,
346 : int m, int n,
347 : int *minfamily_p,
348 : int *maxfamily_p,
349 : int *minbits_p,
350 : int *commonbits_p)
351 : {
352 : int minfamily,
353 : maxfamily,
354 : minbits,
355 : commonbits;
356 : unsigned char *addr;
357 : GistInetKey *tmp;
358 : int i;
359 :
360 : /* Must be at least one key. */
361 : Assert(m <= n);
362 :
363 : /* Initialize variables using the first key. */
364 830 : tmp = DatumGetInetKeyP(ent[m].key);
365 830 : minfamily = maxfamily = gk_ip_family(tmp);
366 830 : minbits = gk_ip_minbits(tmp);
367 830 : commonbits = gk_ip_commonbits(tmp);
368 830 : addr = gk_ip_addr(tmp);
369 :
370 : /* Scan remaining keys. */
371 3240 : for (i = m + 1; i <= n; i++)
372 : {
373 2410 : tmp = DatumGetInetKeyP(ent[i].key);
374 :
375 : /* Determine range of family numbers */
376 2410 : if (minfamily > gk_ip_family(tmp))
377 0 : minfamily = gk_ip_family(tmp);
378 2410 : if (maxfamily < gk_ip_family(tmp))
379 0 : maxfamily = gk_ip_family(tmp);
380 :
381 : /* Find minimum minbits */
382 2410 : if (minbits > gk_ip_minbits(tmp))
383 0 : minbits = gk_ip_minbits(tmp);
384 :
385 : /* Find minimum number of bits in common */
386 2410 : if (commonbits > gk_ip_commonbits(tmp))
387 0 : commonbits = gk_ip_commonbits(tmp);
388 2410 : if (commonbits > 0)
389 1646 : commonbits = bitncommon(addr, gk_ip_addr(tmp), commonbits);
390 : }
391 :
392 : /* Force minbits/commonbits to zero if more than one family. */
393 830 : if (minfamily != maxfamily)
394 0 : minbits = commonbits = 0;
395 :
396 830 : *minfamily_p = minfamily;
397 830 : *maxfamily_p = maxfamily;
398 830 : *minbits_p = minbits;
399 830 : *commonbits_p = commonbits;
400 830 : }
401 :
402 : /*
403 : * Same as above, but the GISTENTRY elements to examine are those with
404 : * indices listed in the offsets[] array.
405 : */
406 : static void
407 0 : calc_inet_union_params_indexed(GISTENTRY *ent,
408 : OffsetNumber *offsets, int noffsets,
409 : int *minfamily_p,
410 : int *maxfamily_p,
411 : int *minbits_p,
412 : int *commonbits_p)
413 : {
414 : int minfamily,
415 : maxfamily,
416 : minbits,
417 : commonbits;
418 : unsigned char *addr;
419 : GistInetKey *tmp;
420 : int i;
421 :
422 : /* Must be at least one key. */
423 : Assert(noffsets > 0);
424 :
425 : /* Initialize variables using the first key. */
426 0 : tmp = DatumGetInetKeyP(ent[offsets[0]].key);
427 0 : minfamily = maxfamily = gk_ip_family(tmp);
428 0 : minbits = gk_ip_minbits(tmp);
429 0 : commonbits = gk_ip_commonbits(tmp);
430 0 : addr = gk_ip_addr(tmp);
431 :
432 : /* Scan remaining keys. */
433 0 : for (i = 1; i < noffsets; i++)
434 : {
435 0 : tmp = DatumGetInetKeyP(ent[offsets[i]].key);
436 :
437 : /* Determine range of family numbers */
438 0 : if (minfamily > gk_ip_family(tmp))
439 0 : minfamily = gk_ip_family(tmp);
440 0 : if (maxfamily < gk_ip_family(tmp))
441 0 : maxfamily = gk_ip_family(tmp);
442 :
443 : /* Find minimum minbits */
444 0 : if (minbits > gk_ip_minbits(tmp))
445 0 : minbits = gk_ip_minbits(tmp);
446 :
447 : /* Find minimum number of bits in common */
448 0 : if (commonbits > gk_ip_commonbits(tmp))
449 0 : commonbits = gk_ip_commonbits(tmp);
450 0 : if (commonbits > 0)
451 0 : commonbits = bitncommon(addr, gk_ip_addr(tmp), commonbits);
452 : }
453 :
454 : /* Force minbits/commonbits to zero if more than one family. */
455 0 : if (minfamily != maxfamily)
456 0 : minbits = commonbits = 0;
457 :
458 0 : *minfamily_p = minfamily;
459 0 : *maxfamily_p = maxfamily;
460 0 : *minbits_p = minbits;
461 0 : *commonbits_p = commonbits;
462 0 : }
463 :
464 : /*
465 : * Construct a GistInetKey representing a union value.
466 : *
467 : * Inputs are the family/minbits/commonbits values to use, plus a pointer to
468 : * the address field of one of the union inputs. (Since we're going to copy
469 : * just the bits-in-common, it doesn't matter which one.)
470 : */
471 : static GistInetKey *
472 830 : build_inet_union_key(int family, int minbits, int commonbits,
473 : unsigned char *addr)
474 : {
475 : GistInetKey *result;
476 :
477 : /* Make sure any unused bits are zeroed. */
478 830 : result = (GistInetKey *) palloc0(sizeof(GistInetKey));
479 :
480 830 : gk_ip_family(result) = family;
481 830 : gk_ip_minbits(result) = minbits;
482 830 : gk_ip_commonbits(result) = commonbits;
483 :
484 : /* Clone appropriate bytes of the address. */
485 830 : if (commonbits > 0)
486 488 : memcpy(gk_ip_addr(result), addr, (commonbits + 7) / 8);
487 :
488 : /* Clean any unwanted bits in the last partial byte. */
489 830 : if (commonbits % 8 != 0)
490 488 : gk_ip_addr(result)[commonbits / 8] &= ~(0xFF >> (commonbits % 8));
491 :
492 : /* Set varlena header correctly. */
493 830 : SET_GK_VARSIZE(result);
494 :
495 830 : return result;
496 : }
497 :
498 :
499 : /*
500 : * The GiST union function
501 : *
502 : * See comments at head of file for the definition of the union.
503 : */
504 : Datum
505 830 : inet_gist_union(PG_FUNCTION_ARGS)
506 : {
507 830 : GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
508 830 : GISTENTRY *ent = entryvec->vector;
509 : int minfamily,
510 : maxfamily,
511 : minbits,
512 : commonbits;
513 : unsigned char *addr;
514 : GistInetKey *tmp,
515 : *result;
516 :
517 : /* Determine parameters of the union. */
518 830 : calc_inet_union_params(ent, 0, entryvec->n - 1,
519 : &minfamily, &maxfamily,
520 : &minbits, &commonbits);
521 :
522 : /* If more than one family, emit family number zero. */
523 830 : if (minfamily != maxfamily)
524 0 : minfamily = 0;
525 :
526 : /* Initialize address using the first key. */
527 830 : tmp = DatumGetInetKeyP(ent[0].key);
528 830 : addr = gk_ip_addr(tmp);
529 :
530 : /* Construct the union value. */
531 830 : result = build_inet_union_key(minfamily, minbits, commonbits, addr);
532 :
533 830 : PG_RETURN_POINTER(result);
534 : }
535 :
536 : /*
537 : * The GiST compress function
538 : *
539 : * Convert an inet value to GistInetKey.
540 : */
541 : Datum
542 1324 : inet_gist_compress(PG_FUNCTION_ARGS)
543 : {
544 1324 : GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
545 : GISTENTRY *retval;
546 :
547 1324 : if (entry->leafkey)
548 : {
549 1302 : retval = palloc(sizeof(GISTENTRY));
550 1302 : if (DatumGetPointer(entry->key) != NULL)
551 : {
552 1302 : inet *in = DatumGetInetPP(entry->key);
553 : GistInetKey *r;
554 :
555 1302 : r = (GistInetKey *) palloc0(sizeof(GistInetKey));
556 :
557 1302 : gk_ip_family(r) = ip_family(in);
558 1302 : gk_ip_minbits(r) = ip_bits(in);
559 1302 : gk_ip_commonbits(r) = gk_ip_maxbits(r);
560 1302 : memcpy(gk_ip_addr(r), ip_addr(in), gk_ip_addrsize(r));
561 1302 : SET_GK_VARSIZE(r);
562 :
563 1302 : gistentryinit(*retval, PointerGetDatum(r),
564 : entry->rel, entry->page,
565 : entry->offset, false);
566 : }
567 : else
568 : {
569 0 : gistentryinit(*retval, (Datum) 0,
570 : entry->rel, entry->page,
571 : entry->offset, false);
572 : }
573 : }
574 : else
575 22 : retval = entry;
576 1324 : PG_RETURN_POINTER(retval);
577 : }
578 :
579 : /*
580 : * We do not need a decompress function, because the other GiST inet
581 : * support functions work with the GistInetKey representation.
582 : */
583 :
584 : /*
585 : * The GiST fetch function
586 : *
587 : * Reconstruct the original inet datum from a GistInetKey.
588 : */
589 : Datum
590 20 : inet_gist_fetch(PG_FUNCTION_ARGS)
591 : {
592 20 : GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
593 20 : GistInetKey *key = DatumGetInetKeyP(entry->key);
594 : GISTENTRY *retval;
595 : inet *dst;
596 :
597 20 : dst = (inet *) palloc0(sizeof(inet));
598 :
599 20 : ip_family(dst) = gk_ip_family(key);
600 20 : ip_bits(dst) = gk_ip_minbits(key);
601 20 : memcpy(ip_addr(dst), gk_ip_addr(key), ip_addrsize(dst));
602 20 : SET_INET_VARSIZE(dst);
603 :
604 20 : retval = palloc(sizeof(GISTENTRY));
605 20 : gistentryinit(*retval, InetPGetDatum(dst), entry->rel, entry->page,
606 : entry->offset, false);
607 :
608 20 : PG_RETURN_POINTER(retval);
609 : }
610 :
611 : /*
612 : * The GiST page split penalty function
613 : *
614 : * Charge a large penalty if address family doesn't match, or a somewhat
615 : * smaller one if the new value would degrade the union's minbits
616 : * (minimum netmask width). Otherwise, penalty is inverse of the
617 : * new number of common address bits.
618 : */
619 : Datum
620 1446 : inet_gist_penalty(PG_FUNCTION_ARGS)
621 : {
622 1446 : GISTENTRY *origent = (GISTENTRY *) PG_GETARG_POINTER(0);
623 1446 : GISTENTRY *newent = (GISTENTRY *) PG_GETARG_POINTER(1);
624 1446 : float *penalty = (float *) PG_GETARG_POINTER(2);
625 1446 : GistInetKey *orig = DatumGetInetKeyP(origent->key),
626 1446 : *new = DatumGetInetKeyP(newent->key);
627 : int commonbits;
628 :
629 1446 : if (gk_ip_family(orig) == gk_ip_family(new))
630 : {
631 1446 : if (gk_ip_minbits(orig) <= gk_ip_minbits(new))
632 : {
633 1446 : commonbits = bitncommon(gk_ip_addr(orig), gk_ip_addr(new),
634 1446 : Min(gk_ip_commonbits(orig),
635 : gk_ip_commonbits(new)));
636 1446 : if (commonbits > 0)
637 612 : *penalty = 1.0f / commonbits;
638 : else
639 834 : *penalty = 2;
640 : }
641 : else
642 0 : *penalty = 3;
643 : }
644 : else
645 0 : *penalty = 4;
646 :
647 1446 : PG_RETURN_POINTER(penalty);
648 : }
649 :
650 : /*
651 : * The GiST PickSplit method
652 : *
653 : * There are two ways to split. First one is to split by address families,
654 : * if there are multiple families appearing in the input.
655 : *
656 : * The second and more common way is to split by addresses. To achieve this,
657 : * determine the number of leading bits shared by all the keys, then split on
658 : * the next bit. (We don't currently consider the netmask widths while doing
659 : * this; should we?) If we fail to get a nontrivial split that way, split
660 : * 50-50.
661 : */
662 : Datum
663 0 : inet_gist_picksplit(PG_FUNCTION_ARGS)
664 : {
665 0 : GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
666 0 : GIST_SPLITVEC *splitvec = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
667 0 : GISTENTRY *ent = entryvec->vector;
668 : int minfamily,
669 : maxfamily,
670 : minbits,
671 : commonbits;
672 : unsigned char *addr;
673 : GistInetKey *tmp,
674 : *left_union,
675 : *right_union;
676 : int maxoff,
677 : nbytes;
678 : OffsetNumber i,
679 : *left,
680 : *right;
681 :
682 0 : maxoff = entryvec->n - 1;
683 0 : nbytes = (maxoff + 1) * sizeof(OffsetNumber);
684 :
685 0 : left = (OffsetNumber *) palloc(nbytes);
686 0 : right = (OffsetNumber *) palloc(nbytes);
687 :
688 0 : splitvec->spl_left = left;
689 0 : splitvec->spl_right = right;
690 :
691 0 : splitvec->spl_nleft = 0;
692 0 : splitvec->spl_nright = 0;
693 :
694 : /* Determine parameters of the union of all the inputs. */
695 0 : calc_inet_union_params(ent, FirstOffsetNumber, maxoff,
696 : &minfamily, &maxfamily,
697 : &minbits, &commonbits);
698 :
699 0 : if (minfamily != maxfamily)
700 : {
701 : /* Multiple families, so split by family. */
702 0 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
703 : {
704 : /*
705 : * If there's more than 2 families, all but maxfamily go into the
706 : * left union. This could only happen if the inputs include some
707 : * IPv4, some IPv6, and some already-multiple-family unions.
708 : */
709 0 : tmp = DatumGetInetKeyP(ent[i].key);
710 0 : if (gk_ip_family(tmp) != maxfamily)
711 0 : left[splitvec->spl_nleft++] = i;
712 : else
713 0 : right[splitvec->spl_nright++] = i;
714 : }
715 : }
716 : else
717 : {
718 : /*
719 : * Split on the next bit after the common bits. If that yields a
720 : * trivial split, try the next bit position to the right. Repeat till
721 : * success; or if we run out of bits, do an arbitrary 50-50 split.
722 : */
723 0 : int maxbits = ip_family_maxbits(minfamily);
724 :
725 0 : while (commonbits < maxbits)
726 : {
727 : /* Split using the commonbits'th bit position. */
728 0 : int bitbyte = commonbits / 8;
729 0 : int bitmask = 0x80 >> (commonbits % 8);
730 :
731 0 : splitvec->spl_nleft = splitvec->spl_nright = 0;
732 :
733 0 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
734 : {
735 0 : tmp = DatumGetInetKeyP(ent[i].key);
736 0 : addr = gk_ip_addr(tmp);
737 0 : if ((addr[bitbyte] & bitmask) == 0)
738 0 : left[splitvec->spl_nleft++] = i;
739 : else
740 0 : right[splitvec->spl_nright++] = i;
741 : }
742 :
743 0 : if (splitvec->spl_nleft > 0 && splitvec->spl_nright > 0)
744 0 : break; /* success */
745 0 : commonbits++;
746 : }
747 :
748 0 : if (commonbits >= maxbits)
749 : {
750 : /* Failed ... do a 50-50 split. */
751 0 : splitvec->spl_nleft = splitvec->spl_nright = 0;
752 :
753 0 : for (i = FirstOffsetNumber; i <= maxoff / 2; i = OffsetNumberNext(i))
754 : {
755 0 : left[splitvec->spl_nleft++] = i;
756 : }
757 0 : for (; i <= maxoff; i = OffsetNumberNext(i))
758 : {
759 0 : right[splitvec->spl_nright++] = i;
760 : }
761 : }
762 : }
763 :
764 : /*
765 : * Compute the union value for each side from scratch. In most cases we
766 : * could approximate the union values with what we already know, but this
767 : * ensures that each side has minbits and commonbits set as high as
768 : * possible.
769 : */
770 0 : calc_inet_union_params_indexed(ent, left, splitvec->spl_nleft,
771 : &minfamily, &maxfamily,
772 : &minbits, &commonbits);
773 0 : if (minfamily != maxfamily)
774 0 : minfamily = 0;
775 0 : tmp = DatumGetInetKeyP(ent[left[0]].key);
776 0 : addr = gk_ip_addr(tmp);
777 0 : left_union = build_inet_union_key(minfamily, minbits, commonbits, addr);
778 0 : splitvec->spl_ldatum = PointerGetDatum(left_union);
779 :
780 0 : calc_inet_union_params_indexed(ent, right, splitvec->spl_nright,
781 : &minfamily, &maxfamily,
782 : &minbits, &commonbits);
783 0 : if (minfamily != maxfamily)
784 0 : minfamily = 0;
785 0 : tmp = DatumGetInetKeyP(ent[right[0]].key);
786 0 : addr = gk_ip_addr(tmp);
787 0 : right_union = build_inet_union_key(minfamily, minbits, commonbits, addr);
788 0 : splitvec->spl_rdatum = PointerGetDatum(right_union);
789 :
790 0 : PG_RETURN_POINTER(splitvec);
791 : }
792 :
793 : /*
794 : * The GiST equality function
795 : */
796 : Datum
797 808 : inet_gist_same(PG_FUNCTION_ARGS)
798 : {
799 808 : GistInetKey *left = DatumGetInetKeyP(PG_GETARG_DATUM(0));
800 808 : GistInetKey *right = DatumGetInetKeyP(PG_GETARG_DATUM(1));
801 808 : bool *result = (bool *) PG_GETARG_POINTER(2);
802 :
803 2424 : *result = (gk_ip_family(left) == gk_ip_family(right) &&
804 808 : gk_ip_minbits(left) == gk_ip_minbits(right) &&
805 2424 : gk_ip_commonbits(left) == gk_ip_commonbits(right) &&
806 808 : memcmp(gk_ip_addr(left), gk_ip_addr(right),
807 808 : gk_ip_addrsize(left)) == 0);
808 :
809 808 : PG_RETURN_POINTER(result);
810 : }
|