Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * network_gist.c
4 : * GiST support for network types.
5 : *
6 : * The key thing to understand about this code is the definition of the
7 : * "union" of a set of INET/CIDR values. It works like this:
8 : * 1. If the values are not all of the same IP address family, the "union"
9 : * is a dummy value with family number zero, minbits zero, commonbits zero,
10 : * address all zeroes. Otherwise:
11 : * 2. The union has the common IP address family number.
12 : * 3. The union's minbits value is the smallest netmask length ("ip_bits")
13 : * of all the input values.
14 : * 4. Let C be the number of leading address bits that are in common among
15 : * all the input values (C ranges from 0 to ip_maxbits for the family).
16 : * 5. The union's commonbits value is C.
17 : * 6. The union's address value is the same as the common prefix for its
18 : * first C bits, and is zeroes to the right of that. The physical width
19 : * of the address value is ip_maxbits for the address family.
20 : *
21 : * In a leaf index entry (representing a single key), commonbits is equal to
22 : * ip_maxbits for the address family, minbits is the same as the represented
23 : * value's ip_bits, and the address is equal to the represented address.
24 : * Although it may appear that we're wasting a byte by storing the union
25 : * format and not just the represented INET/CIDR value in leaf keys, the
26 : * extra byte is actually "free" because of alignment considerations.
27 : *
28 : * Note that this design tracks minbits and commonbits independently; in any
29 : * given union value, either might be smaller than the other. This does not
30 : * help us much when descending the tree, because of the way inet comparison
31 : * is defined: at non-leaf nodes we can't compare more than minbits bits
32 : * even if we know them. However, it greatly improves the quality of split
33 : * decisions. Preliminary testing suggests that searches are as much as
34 : * twice as fast as for a simpler design in which a single field doubles as
35 : * the common prefix length and the minimum ip_bits value.
36 : *
37 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
38 : * Portions Copyright (c) 1994, Regents of the University of California
39 : *
40 : *
41 : * IDENTIFICATION
42 : * src/backend/utils/adt/network_gist.c
43 : *
44 : *-------------------------------------------------------------------------
45 : */
46 : #include "postgres.h"
47 :
48 : #include <sys/socket.h>
49 :
50 : #include "access/gist.h"
51 : #include "access/stratnum.h"
52 : #include "utils/fmgrprotos.h"
53 : #include "utils/inet.h"
54 : #include "varatt.h"
55 :
56 : /*
57 : * Operator strategy numbers used in the GiST inet_ops opclass
58 : */
59 : #define INETSTRAT_OVERLAPS RTOverlapStrategyNumber
60 : #define INETSTRAT_EQ RTEqualStrategyNumber
61 : #define INETSTRAT_NE RTNotEqualStrategyNumber
62 : #define INETSTRAT_LT RTLessStrategyNumber
63 : #define INETSTRAT_LE RTLessEqualStrategyNumber
64 : #define INETSTRAT_GT RTGreaterStrategyNumber
65 : #define INETSTRAT_GE RTGreaterEqualStrategyNumber
66 : #define INETSTRAT_SUB RTSubStrategyNumber
67 : #define INETSTRAT_SUBEQ RTSubEqualStrategyNumber
68 : #define INETSTRAT_SUP RTSuperStrategyNumber
69 : #define INETSTRAT_SUPEQ RTSuperEqualStrategyNumber
70 :
71 :
72 : /*
73 : * Representation of a GiST INET/CIDR index key. This is not identical to
74 : * INET/CIDR because we need to keep track of the length of the common address
75 : * prefix as well as the minimum netmask length. However, as long as it
76 : * follows varlena header rules, the core GiST code won't know the difference.
77 : * For simplicity we always use 1-byte-header varlena format.
78 : */
79 : typedef struct GistInetKey
80 : {
81 : uint8 va_header; /* varlena header --- don't touch directly */
82 : unsigned char family; /* PGSQL_AF_INET, PGSQL_AF_INET6, or zero */
83 : unsigned char minbits; /* minimum number of bits in netmask */
84 : unsigned char commonbits; /* number of common prefix bits in addresses */
85 : unsigned char ipaddr[16]; /* up to 128 bits of common address */
86 : } GistInetKey;
87 :
88 : #define DatumGetInetKeyP(X) ((GistInetKey *) DatumGetPointer(X))
89 : #define InetKeyPGetDatum(X) PointerGetDatum(X)
90 :
91 : /*
92 : * Access macros; not really exciting, but we use these for notational
93 : * consistency with access to INET/CIDR values. Note that family-zero values
94 : * are stored with 4 bytes of address, not 16.
95 : */
96 : #define gk_ip_family(gkptr) ((gkptr)->family)
97 : #define gk_ip_minbits(gkptr) ((gkptr)->minbits)
98 : #define gk_ip_commonbits(gkptr) ((gkptr)->commonbits)
99 : #define gk_ip_addr(gkptr) ((gkptr)->ipaddr)
100 : #define ip_family_maxbits(fam) ((fam) == PGSQL_AF_INET6 ? 128 : 32)
101 :
102 : /* These require that the family field has been set: */
103 : #define gk_ip_addrsize(gkptr) \
104 : (gk_ip_family(gkptr) == PGSQL_AF_INET6 ? 16 : 4)
105 : #define gk_ip_maxbits(gkptr) \
106 : ip_family_maxbits(gk_ip_family(gkptr))
107 : #define SET_GK_VARSIZE(dst) \
108 : SET_VARSIZE_SHORT(dst, offsetof(GistInetKey, ipaddr) + gk_ip_addrsize(dst))
109 :
110 :
111 : /*
112 : * The GiST query consistency check
113 : */
114 : Datum
115 1224 : inet_gist_consistent(PG_FUNCTION_ARGS)
116 : {
117 1224 : GISTENTRY *ent = (GISTENTRY *) PG_GETARG_POINTER(0);
118 1224 : inet *query = PG_GETARG_INET_PP(1);
119 1224 : StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
120 : #ifdef NOT_USED
121 : Oid subtype = PG_GETARG_OID(3);
122 : #endif
123 1224 : bool *recheck = (bool *) PG_GETARG_POINTER(4);
124 1224 : GistInetKey *key = DatumGetInetKeyP(ent->key);
125 : int minbits,
126 : order;
127 :
128 : /* All operators served by this function are exact. */
129 1224 : *recheck = false;
130 :
131 : /*
132 : * Check 0: different families
133 : *
134 : * If key represents multiple address families, its children could match
135 : * anything. This can only happen on an inner index page.
136 : */
137 1224 : if (gk_ip_family(key) == 0)
138 : {
139 : Assert(!GIST_LEAF(ent));
140 0 : PG_RETURN_BOOL(true);
141 : }
142 :
143 : /*
144 : * Check 1: different families
145 : *
146 : * Matching families do not help any of the strategies.
147 : */
148 1224 : if (gk_ip_family(key) != ip_family(query))
149 : {
150 216 : switch (strategy)
151 : {
152 36 : case INETSTRAT_LT:
153 : case INETSTRAT_LE:
154 36 : if (gk_ip_family(key) < ip_family(query))
155 0 : PG_RETURN_BOOL(true);
156 36 : break;
157 :
158 36 : case INETSTRAT_GE:
159 : case INETSTRAT_GT:
160 36 : if (gk_ip_family(key) > ip_family(query))
161 36 : PG_RETURN_BOOL(true);
162 0 : break;
163 :
164 18 : case INETSTRAT_NE:
165 18 : PG_RETURN_BOOL(true);
166 : }
167 : /* For all other cases, we can be sure there is no match */
168 162 : PG_RETURN_BOOL(false);
169 : }
170 :
171 : /*
172 : * Check 2: network bit count
173 : *
174 : * Network bit count (ip_bits) helps to check leaves for sub network and
175 : * sup network operators. At non-leaf nodes, we know every child value
176 : * has ip_bits >= gk_ip_minbits(key), so we can avoid descending in some
177 : * cases too.
178 : */
179 1008 : switch (strategy)
180 : {
181 168 : case INETSTRAT_SUB:
182 168 : if (GIST_LEAF(ent) && gk_ip_minbits(key) <= ip_bits(query))
183 120 : PG_RETURN_BOOL(false);
184 48 : break;
185 :
186 84 : case INETSTRAT_SUBEQ:
187 84 : if (GIST_LEAF(ent) && gk_ip_minbits(key) < ip_bits(query))
188 36 : PG_RETURN_BOOL(false);
189 48 : break;
190 :
191 168 : case INETSTRAT_SUPEQ:
192 : case INETSTRAT_EQ:
193 168 : if (gk_ip_minbits(key) > ip_bits(query))
194 48 : PG_RETURN_BOOL(false);
195 120 : break;
196 :
197 84 : case INETSTRAT_SUP:
198 84 : if (gk_ip_minbits(key) >= ip_bits(query))
199 48 : PG_RETURN_BOOL(false);
200 36 : break;
201 : }
202 :
203 : /*
204 : * Check 3: common network bits
205 : *
206 : * Compare available common prefix bits to the query, but not beyond
207 : * either the query's netmask or the minimum netmask among the represented
208 : * values. If these bits don't match the query, we have our answer (and
209 : * may or may not need to descend, depending on the operator). If they do
210 : * match, and we are not at a leaf, we descend in all cases.
211 : *
212 : * Note this is the final check for operators that only consider the
213 : * network part of the address.
214 : */
215 756 : minbits = Min(gk_ip_commonbits(key), gk_ip_minbits(key));
216 756 : minbits = Min(minbits, ip_bits(query));
217 :
218 756 : order = bitncmp(gk_ip_addr(key), ip_addr(query), minbits);
219 :
220 756 : switch (strategy)
221 : {
222 276 : case INETSTRAT_SUB:
223 : case INETSTRAT_SUBEQ:
224 : case INETSTRAT_OVERLAPS:
225 : case INETSTRAT_SUPEQ:
226 : case INETSTRAT_SUP:
227 276 : PG_RETURN_BOOL(order == 0);
228 :
229 168 : case INETSTRAT_LT:
230 : case INETSTRAT_LE:
231 168 : if (order > 0)
232 0 : PG_RETURN_BOOL(false);
233 168 : if (order < 0 || !GIST_LEAF(ent))
234 96 : PG_RETURN_BOOL(true);
235 72 : break;
236 :
237 60 : case INETSTRAT_EQ:
238 60 : if (order != 0)
239 42 : PG_RETURN_BOOL(false);
240 18 : if (!GIST_LEAF(ent))
241 0 : PG_RETURN_BOOL(true);
242 18 : break;
243 :
244 168 : case INETSTRAT_GE:
245 : case INETSTRAT_GT:
246 168 : if (order < 0)
247 96 : PG_RETURN_BOOL(false);
248 72 : if (order > 0 || !GIST_LEAF(ent))
249 0 : PG_RETURN_BOOL(true);
250 72 : break;
251 :
252 84 : case INETSTRAT_NE:
253 84 : if (order != 0 || !GIST_LEAF(ent))
254 48 : PG_RETURN_BOOL(true);
255 36 : break;
256 : }
257 :
258 : /*
259 : * Remaining checks are only for leaves and basic comparison strategies.
260 : * See network_cmp_internal() in network.c for the implementation we need
261 : * to match. Note that in a leaf key, commonbits should equal the address
262 : * length, so we compared the whole network parts above.
263 : */
264 : Assert(GIST_LEAF(ent));
265 :
266 : /*
267 : * Check 4: network bit count
268 : *
269 : * Next step is to compare netmask widths.
270 : */
271 198 : switch (strategy)
272 : {
273 72 : case INETSTRAT_LT:
274 : case INETSTRAT_LE:
275 72 : if (gk_ip_minbits(key) < ip_bits(query))
276 0 : PG_RETURN_BOOL(true);
277 72 : if (gk_ip_minbits(key) > ip_bits(query))
278 36 : PG_RETURN_BOOL(false);
279 36 : break;
280 :
281 18 : case INETSTRAT_EQ:
282 18 : if (gk_ip_minbits(key) != ip_bits(query))
283 0 : PG_RETURN_BOOL(false);
284 18 : break;
285 :
286 72 : case INETSTRAT_GE:
287 : case INETSTRAT_GT:
288 72 : if (gk_ip_minbits(key) > ip_bits(query))
289 36 : PG_RETURN_BOOL(true);
290 36 : if (gk_ip_minbits(key) < ip_bits(query))
291 0 : PG_RETURN_BOOL(false);
292 36 : break;
293 :
294 36 : case INETSTRAT_NE:
295 36 : if (gk_ip_minbits(key) != ip_bits(query))
296 18 : PG_RETURN_BOOL(true);
297 18 : break;
298 : }
299 :
300 : /*
301 : * Check 5: whole address
302 : *
303 : * Netmask bit counts are the same, so check all the address bits.
304 : */
305 108 : order = bitncmp(gk_ip_addr(key), ip_addr(query), gk_ip_maxbits(key));
306 :
307 108 : switch (strategy)
308 : {
309 18 : case INETSTRAT_LT:
310 18 : PG_RETURN_BOOL(order < 0);
311 :
312 18 : case INETSTRAT_LE:
313 18 : PG_RETURN_BOOL(order <= 0);
314 :
315 18 : case INETSTRAT_EQ:
316 18 : PG_RETURN_BOOL(order == 0);
317 :
318 18 : case INETSTRAT_GE:
319 18 : PG_RETURN_BOOL(order >= 0);
320 :
321 18 : case INETSTRAT_GT:
322 18 : PG_RETURN_BOOL(order > 0);
323 :
324 18 : case INETSTRAT_NE:
325 18 : PG_RETURN_BOOL(order != 0);
326 : }
327 :
328 0 : elog(ERROR, "unknown strategy for inet GiST");
329 : PG_RETURN_BOOL(false); /* keep compiler quiet */
330 : }
331 :
332 : /*
333 : * Calculate parameters of the union of some GistInetKeys.
334 : *
335 : * Examine the keys in elements m..n inclusive of the GISTENTRY array,
336 : * and compute these output parameters:
337 : * *minfamily_p = minimum IP address family number
338 : * *maxfamily_p = maximum IP address family number
339 : * *minbits_p = minimum netmask width
340 : * *commonbits_p = number of leading bits in common among the addresses
341 : *
342 : * minbits and commonbits are forced to zero if there's more than one
343 : * address family.
344 : */
345 : static void
346 830 : calc_inet_union_params(GISTENTRY *ent,
347 : int m, int n,
348 : int *minfamily_p,
349 : int *maxfamily_p,
350 : int *minbits_p,
351 : int *commonbits_p)
352 : {
353 : int minfamily,
354 : maxfamily,
355 : minbits,
356 : commonbits;
357 : unsigned char *addr;
358 : GistInetKey *tmp;
359 : int i;
360 :
361 : /* Must be at least one key. */
362 : Assert(m <= n);
363 :
364 : /* Initialize variables using the first key. */
365 830 : tmp = DatumGetInetKeyP(ent[m].key);
366 830 : minfamily = maxfamily = gk_ip_family(tmp);
367 830 : minbits = gk_ip_minbits(tmp);
368 830 : commonbits = gk_ip_commonbits(tmp);
369 830 : addr = gk_ip_addr(tmp);
370 :
371 : /* Scan remaining keys. */
372 3240 : for (i = m + 1; i <= n; i++)
373 : {
374 2410 : tmp = DatumGetInetKeyP(ent[i].key);
375 :
376 : /* Determine range of family numbers */
377 2410 : if (minfamily > gk_ip_family(tmp))
378 0 : minfamily = gk_ip_family(tmp);
379 2410 : if (maxfamily < gk_ip_family(tmp))
380 0 : maxfamily = gk_ip_family(tmp);
381 :
382 : /* Find minimum minbits */
383 2410 : if (minbits > gk_ip_minbits(tmp))
384 0 : minbits = gk_ip_minbits(tmp);
385 :
386 : /* Find minimum number of bits in common */
387 2410 : if (commonbits > gk_ip_commonbits(tmp))
388 0 : commonbits = gk_ip_commonbits(tmp);
389 2410 : if (commonbits > 0)
390 1646 : commonbits = bitncommon(addr, gk_ip_addr(tmp), commonbits);
391 : }
392 :
393 : /* Force minbits/commonbits to zero if more than one family. */
394 830 : if (minfamily != maxfamily)
395 0 : minbits = commonbits = 0;
396 :
397 830 : *minfamily_p = minfamily;
398 830 : *maxfamily_p = maxfamily;
399 830 : *minbits_p = minbits;
400 830 : *commonbits_p = commonbits;
401 830 : }
402 :
403 : /*
404 : * Same as above, but the GISTENTRY elements to examine are those with
405 : * indices listed in the offsets[] array.
406 : */
407 : static void
408 0 : calc_inet_union_params_indexed(GISTENTRY *ent,
409 : OffsetNumber *offsets, int noffsets,
410 : int *minfamily_p,
411 : int *maxfamily_p,
412 : int *minbits_p,
413 : int *commonbits_p)
414 : {
415 : int minfamily,
416 : maxfamily,
417 : minbits,
418 : commonbits;
419 : unsigned char *addr;
420 : GistInetKey *tmp;
421 : int i;
422 :
423 : /* Must be at least one key. */
424 : Assert(noffsets > 0);
425 :
426 : /* Initialize variables using the first key. */
427 0 : tmp = DatumGetInetKeyP(ent[offsets[0]].key);
428 0 : minfamily = maxfamily = gk_ip_family(tmp);
429 0 : minbits = gk_ip_minbits(tmp);
430 0 : commonbits = gk_ip_commonbits(tmp);
431 0 : addr = gk_ip_addr(tmp);
432 :
433 : /* Scan remaining keys. */
434 0 : for (i = 1; i < noffsets; i++)
435 : {
436 0 : tmp = DatumGetInetKeyP(ent[offsets[i]].key);
437 :
438 : /* Determine range of family numbers */
439 0 : if (minfamily > gk_ip_family(tmp))
440 0 : minfamily = gk_ip_family(tmp);
441 0 : if (maxfamily < gk_ip_family(tmp))
442 0 : maxfamily = gk_ip_family(tmp);
443 :
444 : /* Find minimum minbits */
445 0 : if (minbits > gk_ip_minbits(tmp))
446 0 : minbits = gk_ip_minbits(tmp);
447 :
448 : /* Find minimum number of bits in common */
449 0 : if (commonbits > gk_ip_commonbits(tmp))
450 0 : commonbits = gk_ip_commonbits(tmp);
451 0 : if (commonbits > 0)
452 0 : commonbits = bitncommon(addr, gk_ip_addr(tmp), commonbits);
453 : }
454 :
455 : /* Force minbits/commonbits to zero if more than one family. */
456 0 : if (minfamily != maxfamily)
457 0 : minbits = commonbits = 0;
458 :
459 0 : *minfamily_p = minfamily;
460 0 : *maxfamily_p = maxfamily;
461 0 : *minbits_p = minbits;
462 0 : *commonbits_p = commonbits;
463 0 : }
464 :
465 : /*
466 : * Construct a GistInetKey representing a union value.
467 : *
468 : * Inputs are the family/minbits/commonbits values to use, plus a pointer to
469 : * the address field of one of the union inputs. (Since we're going to copy
470 : * just the bits-in-common, it doesn't matter which one.)
471 : */
472 : static GistInetKey *
473 830 : build_inet_union_key(int family, int minbits, int commonbits,
474 : unsigned char *addr)
475 : {
476 : GistInetKey *result;
477 :
478 : /* Make sure any unused bits are zeroed. */
479 830 : result = palloc0_object(GistInetKey);
480 :
481 830 : gk_ip_family(result) = family;
482 830 : gk_ip_minbits(result) = minbits;
483 830 : gk_ip_commonbits(result) = commonbits;
484 :
485 : /* Clone appropriate bytes of the address. */
486 830 : if (commonbits > 0)
487 488 : memcpy(gk_ip_addr(result), addr, (commonbits + 7) / 8);
488 :
489 : /* Clean any unwanted bits in the last partial byte. */
490 830 : if (commonbits % 8 != 0)
491 488 : gk_ip_addr(result)[commonbits / 8] &= ~(0xFF >> (commonbits % 8));
492 :
493 : /* Set varlena header correctly. */
494 830 : SET_GK_VARSIZE(result);
495 :
496 830 : return result;
497 : }
498 :
499 :
500 : /*
501 : * The GiST union function
502 : *
503 : * See comments at head of file for the definition of the union.
504 : */
505 : Datum
506 830 : inet_gist_union(PG_FUNCTION_ARGS)
507 : {
508 830 : GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
509 830 : GISTENTRY *ent = entryvec->vector;
510 : int minfamily,
511 : maxfamily,
512 : minbits,
513 : commonbits;
514 : unsigned char *addr;
515 : GistInetKey *tmp,
516 : *result;
517 :
518 : /* Determine parameters of the union. */
519 830 : calc_inet_union_params(ent, 0, entryvec->n - 1,
520 : &minfamily, &maxfamily,
521 : &minbits, &commonbits);
522 :
523 : /* If more than one family, emit family number zero. */
524 830 : if (minfamily != maxfamily)
525 0 : minfamily = 0;
526 :
527 : /* Initialize address using the first key. */
528 830 : tmp = DatumGetInetKeyP(ent[0].key);
529 830 : addr = gk_ip_addr(tmp);
530 :
531 : /* Construct the union value. */
532 830 : result = build_inet_union_key(minfamily, minbits, commonbits, addr);
533 :
534 830 : PG_RETURN_POINTER(result);
535 : }
536 :
537 : /*
538 : * The GiST compress function
539 : *
540 : * Convert an inet value to GistInetKey.
541 : */
542 : Datum
543 1324 : inet_gist_compress(PG_FUNCTION_ARGS)
544 : {
545 1324 : GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
546 : GISTENTRY *retval;
547 :
548 1324 : if (entry->leafkey)
549 : {
550 1302 : retval = palloc_object(GISTENTRY);
551 1302 : if (DatumGetPointer(entry->key) != NULL)
552 : {
553 1302 : inet *in = DatumGetInetPP(entry->key);
554 : GistInetKey *r;
555 :
556 1302 : r = palloc0_object(GistInetKey);
557 :
558 1302 : gk_ip_family(r) = ip_family(in);
559 1302 : gk_ip_minbits(r) = ip_bits(in);
560 1302 : gk_ip_commonbits(r) = gk_ip_maxbits(r);
561 1302 : memcpy(gk_ip_addr(r), ip_addr(in), gk_ip_addrsize(r));
562 1302 : SET_GK_VARSIZE(r);
563 :
564 1302 : gistentryinit(*retval, PointerGetDatum(r),
565 : entry->rel, entry->page,
566 : entry->offset, false);
567 : }
568 : else
569 : {
570 0 : gistentryinit(*retval, (Datum) 0,
571 : entry->rel, entry->page,
572 : entry->offset, false);
573 : }
574 : }
575 : else
576 22 : retval = entry;
577 1324 : PG_RETURN_POINTER(retval);
578 : }
579 :
580 : /*
581 : * We do not need a decompress function, because the other GiST inet
582 : * support functions work with the GistInetKey representation.
583 : */
584 :
585 : /*
586 : * The GiST fetch function
587 : *
588 : * Reconstruct the original inet datum from a GistInetKey.
589 : */
590 : Datum
591 20 : inet_gist_fetch(PG_FUNCTION_ARGS)
592 : {
593 20 : GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
594 20 : GistInetKey *key = DatumGetInetKeyP(entry->key);
595 : GISTENTRY *retval;
596 : inet *dst;
597 :
598 20 : dst = palloc0_object(inet);
599 :
600 20 : ip_family(dst) = gk_ip_family(key);
601 20 : ip_bits(dst) = gk_ip_minbits(key);
602 20 : memcpy(ip_addr(dst), gk_ip_addr(key), ip_addrsize(dst));
603 20 : SET_INET_VARSIZE(dst);
604 :
605 20 : retval = palloc_object(GISTENTRY);
606 20 : gistentryinit(*retval, InetPGetDatum(dst), entry->rel, entry->page,
607 : entry->offset, false);
608 :
609 20 : PG_RETURN_POINTER(retval);
610 : }
611 :
612 : /*
613 : * The GiST page split penalty function
614 : *
615 : * Charge a large penalty if address family doesn't match, or a somewhat
616 : * smaller one if the new value would degrade the union's minbits
617 : * (minimum netmask width). Otherwise, penalty is inverse of the
618 : * new number of common address bits.
619 : */
620 : Datum
621 1446 : inet_gist_penalty(PG_FUNCTION_ARGS)
622 : {
623 1446 : GISTENTRY *origent = (GISTENTRY *) PG_GETARG_POINTER(0);
624 1446 : GISTENTRY *newent = (GISTENTRY *) PG_GETARG_POINTER(1);
625 1446 : float *penalty = (float *) PG_GETARG_POINTER(2);
626 1446 : GistInetKey *orig = DatumGetInetKeyP(origent->key),
627 1446 : *new = DatumGetInetKeyP(newent->key);
628 : int commonbits;
629 :
630 1446 : if (gk_ip_family(orig) == gk_ip_family(new))
631 : {
632 1446 : if (gk_ip_minbits(orig) <= gk_ip_minbits(new))
633 : {
634 1446 : commonbits = bitncommon(gk_ip_addr(orig), gk_ip_addr(new),
635 1446 : Min(gk_ip_commonbits(orig),
636 : gk_ip_commonbits(new)));
637 1446 : if (commonbits > 0)
638 612 : *penalty = 1.0f / commonbits;
639 : else
640 834 : *penalty = 2;
641 : }
642 : else
643 0 : *penalty = 3;
644 : }
645 : else
646 0 : *penalty = 4;
647 :
648 1446 : PG_RETURN_POINTER(penalty);
649 : }
650 :
651 : /*
652 : * The GiST PickSplit method
653 : *
654 : * There are two ways to split. First one is to split by address families,
655 : * if there are multiple families appearing in the input.
656 : *
657 : * The second and more common way is to split by addresses. To achieve this,
658 : * determine the number of leading bits shared by all the keys, then split on
659 : * the next bit. (We don't currently consider the netmask widths while doing
660 : * this; should we?) If we fail to get a nontrivial split that way, split
661 : * 50-50.
662 : */
663 : Datum
664 0 : inet_gist_picksplit(PG_FUNCTION_ARGS)
665 : {
666 0 : GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
667 0 : GIST_SPLITVEC *splitvec = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
668 0 : GISTENTRY *ent = entryvec->vector;
669 : int minfamily,
670 : maxfamily,
671 : minbits,
672 : commonbits;
673 : unsigned char *addr;
674 : GistInetKey *tmp,
675 : *left_union,
676 : *right_union;
677 : int maxoff,
678 : nbytes;
679 : OffsetNumber i,
680 : *left,
681 : *right;
682 :
683 0 : maxoff = entryvec->n - 1;
684 0 : nbytes = (maxoff + 1) * sizeof(OffsetNumber);
685 :
686 0 : left = (OffsetNumber *) palloc(nbytes);
687 0 : right = (OffsetNumber *) palloc(nbytes);
688 :
689 0 : splitvec->spl_left = left;
690 0 : splitvec->spl_right = right;
691 :
692 0 : splitvec->spl_nleft = 0;
693 0 : splitvec->spl_nright = 0;
694 :
695 : /* Determine parameters of the union of all the inputs. */
696 0 : calc_inet_union_params(ent, FirstOffsetNumber, maxoff,
697 : &minfamily, &maxfamily,
698 : &minbits, &commonbits);
699 :
700 0 : if (minfamily != maxfamily)
701 : {
702 : /* Multiple families, so split by family. */
703 0 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
704 : {
705 : /*
706 : * If there's more than 2 families, all but maxfamily go into the
707 : * left union. This could only happen if the inputs include some
708 : * IPv4, some IPv6, and some already-multiple-family unions.
709 : */
710 0 : tmp = DatumGetInetKeyP(ent[i].key);
711 0 : if (gk_ip_family(tmp) != maxfamily)
712 0 : left[splitvec->spl_nleft++] = i;
713 : else
714 0 : right[splitvec->spl_nright++] = i;
715 : }
716 : }
717 : else
718 : {
719 : /*
720 : * Split on the next bit after the common bits. If that yields a
721 : * trivial split, try the next bit position to the right. Repeat till
722 : * success; or if we run out of bits, do an arbitrary 50-50 split.
723 : */
724 0 : int maxbits = ip_family_maxbits(minfamily);
725 :
726 0 : while (commonbits < maxbits)
727 : {
728 : /* Split using the commonbits'th bit position. */
729 0 : int bitbyte = commonbits / 8;
730 0 : int bitmask = 0x80 >> (commonbits % 8);
731 :
732 0 : splitvec->spl_nleft = splitvec->spl_nright = 0;
733 :
734 0 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
735 : {
736 0 : tmp = DatumGetInetKeyP(ent[i].key);
737 0 : addr = gk_ip_addr(tmp);
738 0 : if ((addr[bitbyte] & bitmask) == 0)
739 0 : left[splitvec->spl_nleft++] = i;
740 : else
741 0 : right[splitvec->spl_nright++] = i;
742 : }
743 :
744 0 : if (splitvec->spl_nleft > 0 && splitvec->spl_nright > 0)
745 0 : break; /* success */
746 0 : commonbits++;
747 : }
748 :
749 0 : if (commonbits >= maxbits)
750 : {
751 : /* Failed ... do a 50-50 split. */
752 0 : splitvec->spl_nleft = splitvec->spl_nright = 0;
753 :
754 0 : for (i = FirstOffsetNumber; i <= maxoff / 2; i = OffsetNumberNext(i))
755 : {
756 0 : left[splitvec->spl_nleft++] = i;
757 : }
758 0 : for (; i <= maxoff; i = OffsetNumberNext(i))
759 : {
760 0 : right[splitvec->spl_nright++] = i;
761 : }
762 : }
763 : }
764 :
765 : /*
766 : * Compute the union value for each side from scratch. In most cases we
767 : * could approximate the union values with what we already know, but this
768 : * ensures that each side has minbits and commonbits set as high as
769 : * possible.
770 : */
771 0 : calc_inet_union_params_indexed(ent, left, splitvec->spl_nleft,
772 : &minfamily, &maxfamily,
773 : &minbits, &commonbits);
774 0 : if (minfamily != maxfamily)
775 0 : minfamily = 0;
776 0 : tmp = DatumGetInetKeyP(ent[left[0]].key);
777 0 : addr = gk_ip_addr(tmp);
778 0 : left_union = build_inet_union_key(minfamily, minbits, commonbits, addr);
779 0 : splitvec->spl_ldatum = PointerGetDatum(left_union);
780 :
781 0 : calc_inet_union_params_indexed(ent, right, splitvec->spl_nright,
782 : &minfamily, &maxfamily,
783 : &minbits, &commonbits);
784 0 : if (minfamily != maxfamily)
785 0 : minfamily = 0;
786 0 : tmp = DatumGetInetKeyP(ent[right[0]].key);
787 0 : addr = gk_ip_addr(tmp);
788 0 : right_union = build_inet_union_key(minfamily, minbits, commonbits, addr);
789 0 : splitvec->spl_rdatum = PointerGetDatum(right_union);
790 :
791 0 : PG_RETURN_POINTER(splitvec);
792 : }
793 :
794 : /*
795 : * The GiST equality function
796 : */
797 : Datum
798 808 : inet_gist_same(PG_FUNCTION_ARGS)
799 : {
800 808 : GistInetKey *left = DatumGetInetKeyP(PG_GETARG_DATUM(0));
801 808 : GistInetKey *right = DatumGetInetKeyP(PG_GETARG_DATUM(1));
802 808 : bool *result = (bool *) PG_GETARG_POINTER(2);
803 :
804 2424 : *result = (gk_ip_family(left) == gk_ip_family(right) &&
805 808 : gk_ip_minbits(left) == gk_ip_minbits(right) &&
806 2424 : gk_ip_commonbits(left) == gk_ip_commonbits(right) &&
807 808 : memcmp(gk_ip_addr(left), gk_ip_addr(right),
808 808 : gk_ip_addrsize(left)) == 0);
809 :
810 808 : PG_RETURN_POINTER(result);
811 : }
|