LCOV - code coverage report
Current view: top level - src/backend/utils/adt - mac.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 157 191 82.2 %
Date: 2024-04-25 06:13:26 Functions: 20 22 90.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * mac.c
       4             :  *    PostgreSQL type definitions for 6 byte, EUI-48, MAC addresses.
       5             :  *
       6             :  * Portions Copyright (c) 1998-2024, PostgreSQL Global Development Group
       7             :  *
       8             :  * IDENTIFICATION
       9             :  *        src/backend/utils/adt/mac.c
      10             :  *
      11             :  *-------------------------------------------------------------------------
      12             :  */
      13             : 
      14             : #include "postgres.h"
      15             : 
      16             : #include "common/hashfn.h"
      17             : #include "lib/hyperloglog.h"
      18             : #include "libpq/pqformat.h"
      19             : #include "port/pg_bswap.h"
      20             : #include "utils/fmgrprotos.h"
      21             : #include "utils/guc.h"
      22             : #include "utils/inet.h"
      23             : #include "utils/sortsupport.h"
      24             : 
      25             : 
      26             : /*
      27             :  *  Utility macros used for sorting and comparing:
      28             :  */
      29             : 
      30             : #define hibits(addr) \
      31             :   ((unsigned long)(((addr)->a<<16)|((addr)->b<<8)|((addr)->c)))
      32             : 
      33             : #define lobits(addr) \
      34             :   ((unsigned long)(((addr)->d<<16)|((addr)->e<<8)|((addr)->f)))
      35             : 
      36             : /* sortsupport for macaddr */
      37             : typedef struct
      38             : {
      39             :     int64       input_count;    /* number of non-null values seen */
      40             :     bool        estimating;     /* true if estimating cardinality */
      41             : 
      42             :     hyperLogLogState abbr_card; /* cardinality estimator */
      43             : } macaddr_sortsupport_state;
      44             : 
      45             : static int  macaddr_cmp_internal(macaddr *a1, macaddr *a2);
      46             : static int  macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup);
      47             : static bool macaddr_abbrev_abort(int memtupcount, SortSupport ssup);
      48             : static Datum macaddr_abbrev_convert(Datum original, SortSupport ssup);
      49             : 
      50             : /*
      51             :  *  MAC address reader.  Accepts several common notations.
      52             :  */
      53             : 
      54             : Datum
      55        3728 : macaddr_in(PG_FUNCTION_ARGS)
      56             : {
      57        3728 :     char       *str = PG_GETARG_CSTRING(0);
      58        3728 :     Node       *escontext = fcinfo->context;
      59             :     macaddr    *result;
      60             :     int         a,
      61             :                 b,
      62             :                 c,
      63             :                 d,
      64             :                 e,
      65             :                 f;
      66             :     char        junk[2];
      67             :     int         count;
      68             : 
      69             :     /* %1s matches iff there is trailing non-whitespace garbage */
      70             : 
      71        3728 :     count = sscanf(str, "%x:%x:%x:%x:%x:%x%1s",
      72             :                    &a, &b, &c, &d, &e, &f, junk);
      73        3728 :     if (count != 6)
      74          72 :         count = sscanf(str, "%x-%x-%x-%x-%x-%x%1s",
      75             :                        &a, &b, &c, &d, &e, &f, junk);
      76        3728 :     if (count != 6)
      77          66 :         count = sscanf(str, "%2x%2x%2x:%2x%2x%2x%1s",
      78             :                        &a, &b, &c, &d, &e, &f, junk);
      79        3728 :     if (count != 6)
      80          60 :         count = sscanf(str, "%2x%2x%2x-%2x%2x%2x%1s",
      81             :                        &a, &b, &c, &d, &e, &f, junk);
      82        3728 :     if (count != 6)
      83          54 :         count = sscanf(str, "%2x%2x.%2x%2x.%2x%2x%1s",
      84             :                        &a, &b, &c, &d, &e, &f, junk);
      85        3728 :     if (count != 6)
      86          48 :         count = sscanf(str, "%2x%2x-%2x%2x-%2x%2x%1s",
      87             :                        &a, &b, &c, &d, &e, &f, junk);
      88        3728 :     if (count != 6)
      89          42 :         count = sscanf(str, "%2x%2x%2x%2x%2x%2x%1s",
      90             :                        &a, &b, &c, &d, &e, &f, junk);
      91        3728 :     if (count != 6)
      92          36 :         ereturn(escontext, (Datum) 0,
      93             :                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
      94             :                  errmsg("invalid input syntax for type %s: \"%s\"", "macaddr",
      95             :                         str)));
      96             : 
      97        3692 :     if ((a < 0) || (a > 255) || (b < 0) || (b > 255) ||
      98        3692 :         (c < 0) || (c > 255) || (d < 0) || (d > 255) ||
      99        3692 :         (e < 0) || (e > 255) || (f < 0) || (f > 255))
     100           0 :         ereturn(escontext, (Datum) 0,
     101             :                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
     102             :                  errmsg("invalid octet value in \"macaddr\" value: \"%s\"", str)));
     103             : 
     104        3692 :     result = (macaddr *) palloc(sizeof(macaddr));
     105             : 
     106        3692 :     result->a = a;
     107        3692 :     result->b = b;
     108        3692 :     result->c = c;
     109        3692 :     result->d = d;
     110        3692 :     result->e = e;
     111        3692 :     result->f = f;
     112             : 
     113        3692 :     PG_RETURN_MACADDR_P(result);
     114             : }
     115             : 
     116             : /*
     117             :  *  MAC address output function.  Fixed format.
     118             :  */
     119             : 
     120             : Datum
     121        3208 : macaddr_out(PG_FUNCTION_ARGS)
     122             : {
     123        3208 :     macaddr    *addr = PG_GETARG_MACADDR_P(0);
     124             :     char       *result;
     125             : 
     126        3208 :     result = (char *) palloc(32);
     127             : 
     128        3208 :     snprintf(result, 32, "%02x:%02x:%02x:%02x:%02x:%02x",
     129        3208 :              addr->a, addr->b, addr->c, addr->d, addr->e, addr->f);
     130             : 
     131        3208 :     PG_RETURN_CSTRING(result);
     132             : }
     133             : 
     134             : /*
     135             :  *      macaddr_recv            - converts external binary format to macaddr
     136             :  *
     137             :  * The external representation is just the six bytes, MSB first.
     138             :  */
     139             : Datum
     140           0 : macaddr_recv(PG_FUNCTION_ARGS)
     141             : {
     142           0 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
     143             :     macaddr    *addr;
     144             : 
     145           0 :     addr = (macaddr *) palloc(sizeof(macaddr));
     146             : 
     147           0 :     addr->a = pq_getmsgbyte(buf);
     148           0 :     addr->b = pq_getmsgbyte(buf);
     149           0 :     addr->c = pq_getmsgbyte(buf);
     150           0 :     addr->d = pq_getmsgbyte(buf);
     151           0 :     addr->e = pq_getmsgbyte(buf);
     152           0 :     addr->f = pq_getmsgbyte(buf);
     153             : 
     154           0 :     PG_RETURN_MACADDR_P(addr);
     155             : }
     156             : 
     157             : /*
     158             :  *      macaddr_send            - converts macaddr to binary format
     159             :  */
     160             : Datum
     161           0 : macaddr_send(PG_FUNCTION_ARGS)
     162             : {
     163           0 :     macaddr    *addr = PG_GETARG_MACADDR_P(0);
     164             :     StringInfoData buf;
     165             : 
     166           0 :     pq_begintypsend(&buf);
     167           0 :     pq_sendbyte(&buf, addr->a);
     168           0 :     pq_sendbyte(&buf, addr->b);
     169           0 :     pq_sendbyte(&buf, addr->c);
     170           0 :     pq_sendbyte(&buf, addr->d);
     171           0 :     pq_sendbyte(&buf, addr->e);
     172           0 :     pq_sendbyte(&buf, addr->f);
     173           0 :     PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
     174             : }
     175             : 
     176             : 
     177             : /*
     178             :  *  Comparison function for sorting:
     179             :  */
     180             : 
     181             : static int
     182       61476 : macaddr_cmp_internal(macaddr *a1, macaddr *a2)
     183             : {
     184       61476 :     if (hibits(a1) < hibits(a2))
     185       25110 :         return -1;
     186       36366 :     else if (hibits(a1) > hibits(a2))
     187       24824 :         return 1;
     188       11542 :     else if (lobits(a1) < lobits(a2))
     189          58 :         return -1;
     190       11484 :     else if (lobits(a1) > lobits(a2))
     191          50 :         return 1;
     192             :     else
     193       11434 :         return 0;
     194             : }
     195             : 
     196             : Datum
     197       13278 : macaddr_cmp(PG_FUNCTION_ARGS)
     198             : {
     199       13278 :     macaddr    *a1 = PG_GETARG_MACADDR_P(0);
     200       13278 :     macaddr    *a2 = PG_GETARG_MACADDR_P(1);
     201             : 
     202       13278 :     PG_RETURN_INT32(macaddr_cmp_internal(a1, a2));
     203             : }
     204             : 
     205             : /*
     206             :  *  Boolean comparisons.
     207             :  */
     208             : 
     209             : Datum
     210       20014 : macaddr_lt(PG_FUNCTION_ARGS)
     211             : {
     212       20014 :     macaddr    *a1 = PG_GETARG_MACADDR_P(0);
     213       20014 :     macaddr    *a2 = PG_GETARG_MACADDR_P(1);
     214             : 
     215       20014 :     PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) < 0);
     216             : }
     217             : 
     218             : Datum
     219        6236 : macaddr_le(PG_FUNCTION_ARGS)
     220             : {
     221        6236 :     macaddr    *a1 = PG_GETARG_MACADDR_P(0);
     222        6236 :     macaddr    *a2 = PG_GETARG_MACADDR_P(1);
     223             : 
     224        6236 :     PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) <= 0);
     225             : }
     226             : 
     227             : Datum
     228        7658 : macaddr_eq(PG_FUNCTION_ARGS)
     229             : {
     230        7658 :     macaddr    *a1 = PG_GETARG_MACADDR_P(0);
     231        7658 :     macaddr    *a2 = PG_GETARG_MACADDR_P(1);
     232             : 
     233        7658 :     PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) == 0);
     234             : }
     235             : 
     236             : Datum
     237        4914 : macaddr_ge(PG_FUNCTION_ARGS)
     238             : {
     239        4914 :     macaddr    *a1 = PG_GETARG_MACADDR_P(0);
     240        4914 :     macaddr    *a2 = PG_GETARG_MACADDR_P(1);
     241             : 
     242        4914 :     PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) >= 0);
     243             : }
     244             : 
     245             : Datum
     246        9028 : macaddr_gt(PG_FUNCTION_ARGS)
     247             : {
     248        9028 :     macaddr    *a1 = PG_GETARG_MACADDR_P(0);
     249        9028 :     macaddr    *a2 = PG_GETARG_MACADDR_P(1);
     250             : 
     251        9028 :     PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) > 0);
     252             : }
     253             : 
     254             : Datum
     255          24 : macaddr_ne(PG_FUNCTION_ARGS)
     256             : {
     257          24 :     macaddr    *a1 = PG_GETARG_MACADDR_P(0);
     258          24 :     macaddr    *a2 = PG_GETARG_MACADDR_P(1);
     259             : 
     260          24 :     PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) != 0);
     261             : }
     262             : 
     263             : /*
     264             :  * Support function for hash indexes on macaddr.
     265             :  */
     266             : Datum
     267        2334 : hashmacaddr(PG_FUNCTION_ARGS)
     268             : {
     269        2334 :     macaddr    *key = PG_GETARG_MACADDR_P(0);
     270             : 
     271        2334 :     return hash_any((unsigned char *) key, sizeof(macaddr));
     272             : }
     273             : 
     274             : Datum
     275          60 : hashmacaddrextended(PG_FUNCTION_ARGS)
     276             : {
     277          60 :     macaddr    *key = PG_GETARG_MACADDR_P(0);
     278             : 
     279          60 :     return hash_any_extended((unsigned char *) key, sizeof(macaddr),
     280          60 :                              PG_GETARG_INT64(1));
     281             : }
     282             : 
     283             : /*
     284             :  * Arithmetic functions: bitwise NOT, AND, OR.
     285             :  */
     286             : Datum
     287          72 : macaddr_not(PG_FUNCTION_ARGS)
     288             : {
     289          72 :     macaddr    *addr = PG_GETARG_MACADDR_P(0);
     290             :     macaddr    *result;
     291             : 
     292          72 :     result = (macaddr *) palloc(sizeof(macaddr));
     293          72 :     result->a = ~addr->a;
     294          72 :     result->b = ~addr->b;
     295          72 :     result->c = ~addr->c;
     296          72 :     result->d = ~addr->d;
     297          72 :     result->e = ~addr->e;
     298          72 :     result->f = ~addr->f;
     299          72 :     PG_RETURN_MACADDR_P(result);
     300             : }
     301             : 
     302             : Datum
     303          72 : macaddr_and(PG_FUNCTION_ARGS)
     304             : {
     305          72 :     macaddr    *addr1 = PG_GETARG_MACADDR_P(0);
     306          72 :     macaddr    *addr2 = PG_GETARG_MACADDR_P(1);
     307             :     macaddr    *result;
     308             : 
     309          72 :     result = (macaddr *) palloc(sizeof(macaddr));
     310          72 :     result->a = addr1->a & addr2->a;
     311          72 :     result->b = addr1->b & addr2->b;
     312          72 :     result->c = addr1->c & addr2->c;
     313          72 :     result->d = addr1->d & addr2->d;
     314          72 :     result->e = addr1->e & addr2->e;
     315          72 :     result->f = addr1->f & addr2->f;
     316          72 :     PG_RETURN_MACADDR_P(result);
     317             : }
     318             : 
     319             : Datum
     320          72 : macaddr_or(PG_FUNCTION_ARGS)
     321             : {
     322          72 :     macaddr    *addr1 = PG_GETARG_MACADDR_P(0);
     323          72 :     macaddr    *addr2 = PG_GETARG_MACADDR_P(1);
     324             :     macaddr    *result;
     325             : 
     326          72 :     result = (macaddr *) palloc(sizeof(macaddr));
     327          72 :     result->a = addr1->a | addr2->a;
     328          72 :     result->b = addr1->b | addr2->b;
     329          72 :     result->c = addr1->c | addr2->c;
     330          72 :     result->d = addr1->d | addr2->d;
     331          72 :     result->e = addr1->e | addr2->e;
     332          72 :     result->f = addr1->f | addr2->f;
     333          72 :     PG_RETURN_MACADDR_P(result);
     334             : }
     335             : 
     336             : /*
     337             :  *  Truncation function to allow comparing mac manufacturers.
     338             :  *  From suggestion by Alex Pilosov <alex@pilosoft.com>
     339             :  */
     340             : Datum
     341          72 : macaddr_trunc(PG_FUNCTION_ARGS)
     342             : {
     343          72 :     macaddr    *addr = PG_GETARG_MACADDR_P(0);
     344             :     macaddr    *result;
     345             : 
     346          72 :     result = (macaddr *) palloc(sizeof(macaddr));
     347             : 
     348          72 :     result->a = addr->a;
     349          72 :     result->b = addr->b;
     350          72 :     result->c = addr->c;
     351          72 :     result->d = 0;
     352          72 :     result->e = 0;
     353          72 :     result->f = 0;
     354             : 
     355          72 :     PG_RETURN_MACADDR_P(result);
     356             : }
     357             : 
     358             : /*
     359             :  * SortSupport strategy function. Populates a SortSupport struct with the
     360             :  * information necessary to use comparison by abbreviated keys.
     361             :  */
     362             : Datum
     363          20 : macaddr_sortsupport(PG_FUNCTION_ARGS)
     364             : {
     365          20 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
     366             : 
     367          20 :     ssup->comparator = macaddr_fast_cmp;
     368          20 :     ssup->ssup_extra = NULL;
     369             : 
     370          20 :     if (ssup->abbreviate)
     371             :     {
     372             :         macaddr_sortsupport_state *uss;
     373             :         MemoryContext oldcontext;
     374             : 
     375          20 :         oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
     376             : 
     377          20 :         uss = palloc(sizeof(macaddr_sortsupport_state));
     378          20 :         uss->input_count = 0;
     379          20 :         uss->estimating = true;
     380          20 :         initHyperLogLog(&uss->abbr_card, 10);
     381             : 
     382          20 :         ssup->ssup_extra = uss;
     383             : 
     384          20 :         ssup->comparator = ssup_datum_unsigned_cmp;
     385          20 :         ssup->abbrev_converter = macaddr_abbrev_convert;
     386          20 :         ssup->abbrev_abort = macaddr_abbrev_abort;
     387          20 :         ssup->abbrev_full_comparator = macaddr_fast_cmp;
     388             : 
     389          20 :         MemoryContextSwitchTo(oldcontext);
     390             :     }
     391             : 
     392          20 :     PG_RETURN_VOID();
     393             : }
     394             : 
     395             : /*
     396             :  * SortSupport "traditional" comparison function. Pulls two MAC addresses from
     397             :  * the heap and runs a standard comparison on them.
     398             :  */
     399             : static int
     400         324 : macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup)
     401             : {
     402         324 :     macaddr    *arg1 = DatumGetMacaddrP(x);
     403         324 :     macaddr    *arg2 = DatumGetMacaddrP(y);
     404             : 
     405         324 :     return macaddr_cmp_internal(arg1, arg2);
     406             : }
     407             : 
     408             : /*
     409             :  * Callback for estimating effectiveness of abbreviated key optimization.
     410             :  *
     411             :  * We pay no attention to the cardinality of the non-abbreviated data, because
     412             :  * there is no equality fast-path within authoritative macaddr comparator.
     413             :  */
     414             : static bool
     415          12 : macaddr_abbrev_abort(int memtupcount, SortSupport ssup)
     416             : {
     417          12 :     macaddr_sortsupport_state *uss = ssup->ssup_extra;
     418             :     double      abbr_card;
     419             : 
     420          12 :     if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
     421          12 :         return false;
     422             : 
     423           0 :     abbr_card = estimateHyperLogLog(&uss->abbr_card);
     424             : 
     425             :     /*
     426             :      * If we have >100k distinct values, then even if we were sorting many
     427             :      * billion rows we'd likely still break even, and the penalty of undoing
     428             :      * that many rows of abbrevs would probably not be worth it. At this point
     429             :      * we stop counting because we know that we're now fully committed.
     430             :      */
     431           0 :     if (abbr_card > 100000.0)
     432             :     {
     433             : #ifdef TRACE_SORT
     434           0 :         if (trace_sort)
     435           0 :             elog(LOG,
     436             :                  "macaddr_abbrev: estimation ends at cardinality %f"
     437             :                  " after " INT64_FORMAT " values (%d rows)",
     438             :                  abbr_card, uss->input_count, memtupcount);
     439             : #endif
     440           0 :         uss->estimating = false;
     441           0 :         return false;
     442             :     }
     443             : 
     444             :     /*
     445             :      * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
     446             :      * fudge factor allows us to abort earlier on genuinely pathological data
     447             :      * where we've had exactly one abbreviated value in the first 2k
     448             :      * (non-null) rows.
     449             :      */
     450           0 :     if (abbr_card < uss->input_count / 2000.0 + 0.5)
     451             :     {
     452             : #ifdef TRACE_SORT
     453           0 :         if (trace_sort)
     454           0 :             elog(LOG,
     455             :                  "macaddr_abbrev: aborting abbreviation at cardinality %f"
     456             :                  " below threshold %f after " INT64_FORMAT " values (%d rows)",
     457             :                  abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
     458             :                  memtupcount);
     459             : #endif
     460           0 :         return true;
     461             :     }
     462             : 
     463             : #ifdef TRACE_SORT
     464           0 :     if (trace_sort)
     465           0 :         elog(LOG,
     466             :              "macaddr_abbrev: cardinality %f after " INT64_FORMAT
     467             :              " values (%d rows)", abbr_card, uss->input_count, memtupcount);
     468             : #endif
     469             : 
     470           0 :     return false;
     471             : }
     472             : 
     473             : /*
     474             :  * SortSupport conversion routine. Converts original macaddr representation
     475             :  * to abbreviated key representation.
     476             :  *
     477             :  * Packs the bytes of a 6-byte MAC address into a Datum and treats it as an
     478             :  * unsigned integer for purposes of comparison. On a 64-bit machine, there
     479             :  * will be two zeroed bytes of padding. The integer is converted to native
     480             :  * endianness to facilitate easy comparison.
     481             :  */
     482             : static Datum
     483         168 : macaddr_abbrev_convert(Datum original, SortSupport ssup)
     484             : {
     485         168 :     macaddr_sortsupport_state *uss = ssup->ssup_extra;
     486         168 :     macaddr    *authoritative = DatumGetMacaddrP(original);
     487             :     Datum       res;
     488             : 
     489             :     /*
     490             :      * On a 64-bit machine, zero out the 8-byte datum and copy the 6 bytes of
     491             :      * the MAC address in. There will be two bytes of zero padding on the end
     492             :      * of the least significant bits.
     493             :      */
     494             : #if SIZEOF_DATUM == 8
     495             :     memset(&res, 0, SIZEOF_DATUM);
     496         168 :     memcpy(&res, authoritative, sizeof(macaddr));
     497             : #else                           /* SIZEOF_DATUM != 8 */
     498             :     memcpy(&res, authoritative, SIZEOF_DATUM);
     499             : #endif
     500         168 :     uss->input_count += 1;
     501             : 
     502             :     /*
     503             :      * Cardinality estimation. The estimate uses uint32, so on a 64-bit
     504             :      * architecture, XOR the two 32-bit halves together to produce slightly
     505             :      * more entropy. The two zeroed bytes won't have any practical impact on
     506             :      * this operation.
     507             :      */
     508         168 :     if (uss->estimating)
     509             :     {
     510             :         uint32      tmp;
     511             : 
     512             : #if SIZEOF_DATUM == 8
     513         168 :         tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
     514             : #else                           /* SIZEOF_DATUM != 8 */
     515             :         tmp = (uint32) res;
     516             : #endif
     517             : 
     518         168 :         addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
     519             :     }
     520             : 
     521             :     /*
     522             :      * Byteswap on little-endian machines.
     523             :      *
     524             :      * This is needed so that ssup_datum_unsigned_cmp() (an unsigned integer
     525             :      * 3-way comparator) works correctly on all platforms. Without this, the
     526             :      * comparator would have to call memcmp() with a pair of pointers to the
     527             :      * first byte of each abbreviated key, which is slower.
     528             :      */
     529         168 :     res = DatumBigEndianToNative(res);
     530             : 
     531         168 :     return res;
     532             : }

Generated by: LCOV version 1.14