LCOV - code coverage report
Current view: top level - src/include/port - pg_lfind.h (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 52 52 100.0 %
Date: 2024-11-21 08:14:44 Functions: 5 5 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * pg_lfind.h
       4             :  *    Optimized linear search routines using SIMD intrinsics where
       5             :  *    available.
       6             :  *
       7             :  * Copyright (c) 2022-2024, PostgreSQL Global Development Group
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *    src/include/port/pg_lfind.h
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : #ifndef PG_LFIND_H
      15             : #define PG_LFIND_H
      16             : 
      17             : #include "port/simd.h"
      18             : 
      19             : /*
      20             :  * pg_lfind8
      21             :  *
      22             :  * Return true if there is an element in 'base' that equals 'key', otherwise
      23             :  * return false.
      24             :  */
      25             : static inline bool
      26     5902102 : pg_lfind8(uint8 key, uint8 *base, uint32 nelem)
      27             : {
      28             :     uint32      i;
      29             : 
      30             :     /* round down to multiple of vector length */
      31     5902102 :     uint32      tail_idx = nelem & ~(sizeof(Vector8) - 1);
      32             :     Vector8     chunk;
      33             : 
      34     9501494 :     for (i = 0; i < tail_idx; i += sizeof(Vector8))
      35             :     {
      36     5902182 :         vector8_load(&chunk, &base[i]);
      37     5902182 :         if (vector8_has(chunk, key))
      38     2302790 :             return true;
      39             :     }
      40             : 
      41             :     /* Process the remaining elements one at a time. */
      42     3599418 :     for (; i < nelem; i++)
      43             :     {
      44         120 :         if (key == base[i])
      45          14 :             return true;
      46             :     }
      47             : 
      48     3599298 :     return false;
      49             : }
      50             : 
      51             : /*
      52             :  * pg_lfind8_le
      53             :  *
      54             :  * Return true if there is an element in 'base' that is less than or equal to
      55             :  * 'key', otherwise return false.
      56             :  */
      57             : static inline bool
      58      649362 : pg_lfind8_le(uint8 key, uint8 *base, uint32 nelem)
      59             : {
      60             :     uint32      i;
      61             : 
      62             :     /* round down to multiple of vector length */
      63      649362 :     uint32      tail_idx = nelem & ~(sizeof(Vector8) - 1);
      64             :     Vector8     chunk;
      65             : 
      66     1298778 :     for (i = 0; i < tail_idx; i += sizeof(Vector8))
      67             :     {
      68      649442 :         vector8_load(&chunk, &base[i]);
      69      649442 :         if (vector8_has_le(chunk, key))
      70          26 :             return true;
      71             :     }
      72             : 
      73             :     /* Process the remaining elements one at a time. */
      74      649430 :     for (; i < nelem; i++)
      75             :     {
      76         120 :         if (base[i] <= key)
      77          26 :             return true;
      78             :     }
      79             : 
      80      649310 :     return false;
      81             : }
      82             : 
      83             : /*
      84             :  * pg_lfind32_one_by_one_helper
      85             :  *
      86             :  * Searches the array of integers one-by-one.  The caller is responsible for
      87             :  * ensuring that there are at least "nelem" integers in the array.
      88             :  */
      89             : static inline bool
      90    11043224 : pg_lfind32_one_by_one_helper(uint32 key, const uint32 *base, uint32 nelem)
      91             : {
      92    27252716 :     for (uint32 i = 0; i < nelem; i++)
      93             :     {
      94    16223650 :         if (key == base[i])
      95       14158 :             return true;
      96             :     }
      97             : 
      98    11029066 :     return false;
      99             : }
     100             : 
     101             : #ifndef USE_NO_SIMD
     102             : /*
     103             :  * pg_lfind32_simd_helper
     104             :  *
     105             :  * Searches one 4-register-block of integers.  The caller is responsible for
     106             :  * ensuring that there are at least 4-registers-worth of integers remaining.
     107             :  */
     108             : static inline bool
     109          84 : pg_lfind32_simd_helper(const Vector32 keys, const uint32 *base)
     110             : {
     111          84 :     const uint32 nelem_per_vector = sizeof(Vector32) / sizeof(uint32);
     112             :     Vector32    vals1,
     113             :                 vals2,
     114             :                 vals3,
     115             :                 vals4,
     116             :                 result1,
     117             :                 result2,
     118             :                 result3,
     119             :                 result4,
     120             :                 tmp1,
     121             :                 tmp2,
     122             :                 result;
     123             : 
     124             :     /* load the next block into 4 registers */
     125          84 :     vector32_load(&vals1, base);
     126          84 :     vector32_load(&vals2, &base[nelem_per_vector]);
     127          84 :     vector32_load(&vals3, &base[nelem_per_vector * 2]);
     128          84 :     vector32_load(&vals4, &base[nelem_per_vector * 3]);
     129             : 
     130             :     /* compare each value to the key */
     131          84 :     result1 = vector32_eq(keys, vals1);
     132          84 :     result2 = vector32_eq(keys, vals2);
     133          84 :     result3 = vector32_eq(keys, vals3);
     134          84 :     result4 = vector32_eq(keys, vals4);
     135             : 
     136             :     /* combine the results into a single variable */
     137          84 :     tmp1 = vector32_or(result1, result2);
     138          84 :     tmp2 = vector32_or(result3, result4);
     139          84 :     result = vector32_or(tmp1, tmp2);
     140             : 
     141             :     /* return whether there was a match */
     142          84 :     return vector32_is_highbit_set(result);
     143             : }
     144             : #endif                          /* ! USE_NO_SIMD */
     145             : 
     146             : /*
     147             :  * pg_lfind32
     148             :  *
     149             :  * Return true if there is an element in 'base' that equals 'key', otherwise
     150             :  * return false.
     151             :  */
     152             : static inline bool
     153    11043244 : pg_lfind32(uint32 key, const uint32 *base, uint32 nelem)
     154             : {
     155             : #ifndef USE_NO_SIMD
     156    11043244 :     uint32      i = 0;
     157             : 
     158             :     /*
     159             :      * For better instruction-level parallelism, each loop iteration operates
     160             :      * on a block of four registers.
     161             :      */
     162    11043244 :     const Vector32 keys = vector32_broadcast(key);  /* load copies of key */
     163    11043244 :     const uint32 nelem_per_vector = sizeof(Vector32) / sizeof(uint32);
     164    11043244 :     const uint32 nelem_per_iteration = 4 * nelem_per_vector;
     165             : 
     166             :     /* round down to multiple of elements per iteration */
     167    11043244 :     const uint32 tail_idx = nelem & ~(nelem_per_iteration - 1);
     168             : 
     169             : #if defined(USE_ASSERT_CHECKING)
     170             :     bool        assert_result = pg_lfind32_one_by_one_helper(key, base, nelem);
     171             : #endif
     172             : 
     173             :     /*
     174             :      * If there aren't enough elements for the SIMD code, use the standard
     175             :      * one-by-one linear search code.
     176             :      */
     177    11043244 :     if (nelem < nelem_per_iteration)
     178    11043224 :         return pg_lfind32_one_by_one_helper(key, base, nelem);
     179             : 
     180             :     /*
     181             :      * Process as many elements as possible with a block of 4 registers.
     182             :      */
     183             :     do
     184             :     {
     185          68 :         if (pg_lfind32_simd_helper(keys, &base[i]))
     186             :         {
     187             :             Assert(assert_result == true);
     188           4 :             return true;
     189             :         }
     190             : 
     191          64 :         i += nelem_per_iteration;
     192             : 
     193          64 :     } while (i < tail_idx);
     194             : 
     195             :     /*
     196             :      * Process the last 'nelem_per_iteration' elements in the array with a
     197             :      * 4-register block.  This will cause us to check a subset of the elements
     198             :      * more than once, but that won't affect correctness, and testing has
     199             :      * demonstrated that this helps more cases than it harms.
     200             :      */
     201             :     Assert(assert_result == pg_lfind32_simd_helper(keys, &base[nelem - nelem_per_iteration]));
     202          16 :     return pg_lfind32_simd_helper(keys, &base[nelem - nelem_per_iteration]);
     203             : #else
     204             :     /* Process the elements one at a time. */
     205             :     return pg_lfind32_one_by_one_helper(key, base, nelem);
     206             : #endif
     207             : }
     208             : 
     209             : #endif                          /* PG_LFIND_H */

Generated by: LCOV version 1.14