LCOV - PostgreSQL 19devel - usr/lib/x86_64-linux-gnu/perl/5.40/CORE/inline.h

LCOV - code coverage report

Current view:	top level - /usr/lib/x86_64-linux-gnu/perl/5.40/CORE - inline.h (source / functions)		Coverage	Total	Hit
Test:	PostgreSQL 19devel	Lines:	100.0 %	2	2
Test Date:	2026-02-27 04:14:43	Functions:	100.0 %	1	1
Legend:	Lines: hit not hit

            Line data    Source code

       1              : /*    inline.h
       2              :  *
       3              :  *    Copyright (C) 2012 by Larry Wall and others
       4              :  *
       5              :  *    You may distribute under the terms of either the GNU General Public
       6              :  *    License or the Artistic License, as specified in the README file.
       7              :  *
       8              :  *    This file contains tables and code adapted from
       9              :  *    https://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which requires this
      10              :  *    copyright notice:
      11              : 
      12              : Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
      13              : 
      14              : Permission is hereby granted, free of charge, to any person obtaining a copy of
      15              : this software and associated documentation files (the "Software"), to deal in
      16              : the Software without restriction, including without limitation the rights to
      17              : use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
      18              : of the Software, and to permit persons to whom the Software is furnished to do
      19              : so, subject to the following conditions:
      20              : 
      21              : The above copyright notice and this permission notice shall be included in all
      22              : copies or substantial portions of the Software.
      23              : 
      24              : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      25              : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      26              : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
      27              : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      28              : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
      29              : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
      30              : SOFTWARE.
      31              : 
      32              :  *
      33              :  * This file is a home for static inline functions that cannot go in other
      34              :  * header files, because they depend on proto.h (included after most other
      35              :  * headers) or struct definitions.
      36              :  *
      37              :  * Note also perlstatic.h for functions that can't or shouldn't be inlined, but
      38              :  * whose details should be exposed to the compiler, for such things as tail
      39              :  * call optimization.
      40              :  *
      41              :  * Each section names the header file that the functions "belong" to.
      42              :  */
      43              : 
      44              : /* ------------------------------- av.h ------------------------------- */
      45              : 
      46              : /*
      47              : =for apidoc_section $AV
      48              : =for apidoc av_count
      49              : Returns the number of elements in the array C<av>.  This is the true length of
      50              : the array, including any undefined elements.  It is always the same as
      51              : S<C<av_top_index(av) + 1>>.
      52              : 
      53              : =cut
      54              : */
      55              : PERL_STATIC_INLINE Size_t
      56              : Perl_av_count(pTHX_ AV *av)
      57              : {
      58              :     PERL_ARGS_ASSERT_AV_COUNT;
      59              :     assert(SvTYPE(av) == SVt_PVAV);
      60              : 
      61              :     return AvFILL(av) + 1;
      62              : }
      63              : 
      64              : /* ------------------------------- av.c ------------------------------- */
      65              : 
      66              : /*
      67              : =for apidoc av_store_simple
      68              : 
      69              : This is a cut-down version of av_store that assumes that the array is
      70              : very straightforward - no magic, not readonly, and AvREAL - and that
      71              : C<key> is not negative. This function MUST NOT be used in situations
      72              : where any of those assumptions may not hold.
      73              : 
      74              : Stores an SV in an array.  The array index is specified as C<key>. It
      75              : can be dereferenced to get the C<SV*> that was stored there (= C<val>)).
      76              : 
      77              : Note that the caller is responsible for suitably incrementing the reference
      78              : count of C<val> before the call.
      79              : 
      80              : Approximate Perl equivalent: C<splice(@myarray, $key, 1, $val)>.
      81              : 
      82              : =cut
      83              : */
      84              : 
      85              : PERL_STATIC_INLINE SV**
      86              : Perl_av_store_simple(pTHX_ AV *av, SSize_t key, SV *val)
      87              : {
      88              :     SV** ary;
      89              : 
      90              :     PERL_ARGS_ASSERT_AV_STORE_SIMPLE;
      91              :     assert(SvTYPE(av) == SVt_PVAV);
      92              :     assert(!SvMAGICAL(av));
      93              :     assert(!SvREADONLY(av));
      94              :     assert(AvREAL(av));
      95              :     assert(key > -1);
      96              : 
      97              :     ary = AvARRAY(av);
      98              : 
      99              :     if (AvFILLp(av) < key) {
     100              :         if (key > AvMAX(av)) {
     101              :             av_extend(av,key);
     102              :             ary = AvARRAY(av);
     103              :         }
     104              :         AvFILLp(av) = key;
     105              :     } else
     106              :         SvREFCNT_dec(ary[key]);
     107              : 
     108              :     ary[key] = val;
     109              :     return &ary[key];
     110              : }
     111              : 
     112              : /*
     113              : =for apidoc av_fetch_simple
     114              : 
     115              : This is a cut-down version of av_fetch that assumes that the array is
     116              : very straightforward - no magic, not readonly, and AvREAL - and that
     117              : C<key> is not negative. This function MUST NOT be used in situations
     118              : where any of those assumptions may not hold.
     119              : 
     120              : Returns the SV at the specified index in the array.  The C<key> is the
     121              : index.  If lval is true, you are guaranteed to get a real SV back (in case
     122              : it wasn't real before), which you can then modify.  Check that the return
     123              : value is non-null before dereferencing it to a C<SV*>.
     124              : 
     125              : The rough perl equivalent is C<$myarray[$key]>.
     126              : 
     127              : =cut
     128              : */
     129              : 
     130              : PERL_STATIC_INLINE SV**
     131              : Perl_av_fetch_simple(pTHX_ AV *av, SSize_t key, I32 lval)
     132              : {
     133              :     PERL_ARGS_ASSERT_AV_FETCH_SIMPLE;
     134              :     assert(SvTYPE(av) == SVt_PVAV);
     135              :     assert(!SvMAGICAL(av));
     136              :     assert(!SvREADONLY(av));
     137              :     assert(AvREAL(av));
     138              :     assert(key > -1);
     139              : 
     140              :     if ( (key > AvFILLp(av)) || !AvARRAY(av)[key]) {
     141              :         return lval ? av_store_simple(av,key,newSV_type(SVt_NULL)) : NULL;
     142              :     } else {
     143              :         return &AvARRAY(av)[key];
     144              :     }
     145              : }
     146              : 
     147              : /*
     148              : =for apidoc av_push_simple
     149              : 
     150              : This is a cut-down version of av_push that assumes that the array is very
     151              : straightforward - no magic, not readonly, and AvREAL - and that C<key> is
     152              : not less than -1. This function MUST NOT be used in situations where any
     153              : of those assumptions may not hold.
     154              : 
     155              : Pushes an SV (transferring control of one reference count) onto the end of the
     156              : array.  The array will grow automatically to accommodate the addition.
     157              : 
     158              : Perl equivalent: C<push @myarray, $val;>.
     159              : 
     160              : =cut
     161              : */
     162              : 
     163              : PERL_STATIC_INLINE void
     164              : Perl_av_push_simple(pTHX_ AV *av, SV *val)
     165              : {
     166              :     PERL_ARGS_ASSERT_AV_PUSH_SIMPLE;
     167              :     assert(SvTYPE(av) == SVt_PVAV);
     168              :     assert(!SvMAGICAL(av));
     169              :     assert(!SvREADONLY(av));
     170              :     assert(AvREAL(av));
     171              :     assert(AvFILLp(av) > -2);
     172              : 
     173              :     (void)av_store_simple(av,AvFILLp(av)+1,val);
     174              : }
     175              : 
     176              : /*
     177              : =for apidoc av_new_alloc
     178              : 
     179              : This implements L<perlapi/C<newAV_alloc_x>>
     180              : and L<perlapi/C<newAV_alloc_xz>>, which are the public API for this
     181              : functionality.
     182              : 
     183              : Creates a new AV and allocates its SV* array.
     184              : 
     185              : This is similar to, but more efficient than doing:
     186              : 
     187              :     AV *av = newAV();
     188              :     av_extend(av, key);
     189              : 
     190              : The size parameter is used to pre-allocate a SV* array large enough to
     191              : hold at least elements C<0..(size-1)>.  C<size> must be at least 1.
     192              : 
     193              : The C<zeroflag> parameter controls whether or not the array is NULL
     194              : initialized.
     195              : 
     196              : =cut
     197              : */
     198              : 
     199              : PERL_STATIC_INLINE AV *
     200              : Perl_av_new_alloc(pTHX_ SSize_t size, bool zeroflag)
     201              : {
     202              :     AV * const av = newAV();
     203              :     SV** ary;
     204              :     PERL_ARGS_ASSERT_AV_NEW_ALLOC;
     205              :     assert(size > 0);
     206              : 
     207              :     Newx(ary, size, SV*); /* Newx performs the memwrap check */
     208              :     AvALLOC(av) = ary;
     209              :     AvARRAY(av) = ary;
     210              :     AvMAX(av) = size - 1;
     211              : 
     212              :     if (zeroflag)
     213              :         Zero(ary, size, SV*);
     214              : 
     215              :     return av;
     216              : }
     217              : 
     218              : 
     219              : /* remove (AvARRAY(av) - AvALLOC(av)) offset from empty array */
     220              : 
     221              : PERL_STATIC_INLINE void
     222              : Perl_av_remove_offset(pTHX_ AV *av)
     223              : {
     224              :     PERL_ARGS_ASSERT_AV_REMOVE_OFFSET;
     225              :     assert(AvFILLp(av) == -1);
     226              :     SSize_t i = AvARRAY(av) - AvALLOC(av);
     227              :     if (i) {
     228              :         AvARRAY(av) = AvALLOC(av);
     229              :         AvMAX(av)   += i;
     230              : #ifdef PERL_RC_STACK
     231              :         Zero(AvALLOC(av), i, SV*);
     232              : #endif
     233              :     }
     234              : }
     235              : 
     236              : 
     237              : /* ------------------------------- cv.h ------------------------------- */
     238              : 
     239              : /*
     240              : =for apidoc_section $CV
     241              : =for apidoc CvGV
     242              : Returns the GV associated with the CV C<sv>, reifying it if necessary.
     243              : 
     244              : =cut
     245              : */
     246              : PERL_STATIC_INLINE GV *
     247              : Perl_CvGV(pTHX_ CV *sv)
     248              : {
     249              :     PERL_ARGS_ASSERT_CVGV;
     250              : 
     251              :     return CvNAMED(sv)
     252              :         ? Perl_cvgv_from_hek(aTHX_ sv)
     253              :         : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
     254              : }
     255              : 
     256              : /*
     257              : =for apidoc CvDEPTH
     258              : Returns the recursion level of the CV C<sv>.  Hence >= 2 indicates we are in a
     259              : recursive call.
     260              : 
     261              : =cut
     262              : */
     263              : PERL_STATIC_INLINE I32 *
     264              : Perl_CvDEPTH(const CV * const sv)
     265              : {
     266              :     PERL_ARGS_ASSERT_CVDEPTH;
     267              :     assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
     268              : 
     269              :     return &((XPVCV*)SvANY(sv))->xcv_depth;
     270              : }
     271              : 
     272              : /*
     273              :  CvPROTO returns the prototype as stored, which is not necessarily what
     274              :  the interpreter should be using. Specifically, the interpreter assumes
     275              :  that spaces have been stripped, which has been the case if the prototype
     276              :  was added by toke.c, but is generally not the case if it was added elsewhere.
     277              :  Since we can't enforce the spacelessness at assignment time, this routine
     278              :  provides a temporary copy at parse time with spaces removed.
     279              :  I<orig> is the start of the original buffer, I<len> is the length of the
     280              :  prototype and will be updated when this returns.
     281              :  */
     282              : 
     283              : #ifdef PERL_CORE
     284              : PERL_STATIC_INLINE char *
     285              : S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
     286              : {
     287              :     SV * tmpsv;
     288              :     char * tmps;
     289              :     tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
     290              :     tmps = SvPVX(tmpsv);
     291              :     while ((*len)--) {
     292              :         if (!isSPACE(*orig))
     293              :             *tmps++ = *orig;
     294              :         orig++;
     295              :     }
     296              :     *tmps = '\0';
     297              :     *len = tmps - SvPVX(tmpsv);
     298              :                 return SvPVX(tmpsv);
     299              : }
     300              : #endif
     301              : 
     302              : /* ------------------------------- iperlsys.h ------------------------------- */
     303              : #if ! defined(PERL_IMPLICIT_SYS) && defined(USE_ITHREADS)
     304              : 
     305              : /* Otherwise this function is implemented as macros in iperlsys.h */
     306              : 
     307              : PERL_STATIC_INLINE bool
     308              : S_PerlEnv_putenv(pTHX_ char * str)
     309              : {
     310              :     PERL_ARGS_ASSERT_PERLENV_PUTENV;
     311              : 
     312              :     ENV_LOCK;
     313              :     bool retval = putenv(str);
     314              :     ENV_UNLOCK;
     315              : 
     316              :     return retval;
     317              : }
     318              : 
     319              : #endif
     320              : 
     321              : /* ------------------------------- mg.h ------------------------------- */
     322              : 
     323              : #if defined(PERL_CORE) || defined(PERL_EXT)
     324              : /* assumes get-magic and stringification have already occurred */
     325              : PERL_STATIC_INLINE STRLEN
     326              : S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
     327              : {
     328              :     assert(mg->mg_type == PERL_MAGIC_regex_global);
     329              :     assert(mg->mg_len != -1);
     330              :     if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
     331              :         return (STRLEN)mg->mg_len;
     332              :     else {
     333              :         const STRLEN pos = (STRLEN)mg->mg_len;
     334              :         /* Without this check, we may read past the end of the buffer: */
     335              :         if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
     336              :         return sv_or_pv_pos_u2b(sv, s, pos, NULL);
     337              :     }
     338              : }
     339              : #endif
     340              : 
     341              : /* ------------------------------- pad.h ------------------------------ */
     342              : 
     343              : #if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
     344              : PERL_STATIC_INLINE bool
     345              : S_PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
     346              : {
     347              :     PERL_ARGS_ASSERT_PADNAMEIN_SCOPE;
     348              : 
     349              :     /* is seq within the range _LOW to _HIGH ?
     350              :      * This is complicated by the fact that PL_cop_seqmax
     351              :      * may have wrapped around at some point */
     352              :     if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
     353              :         return FALSE; /* not yet introduced */
     354              : 
     355              :     if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
     356              :     /* in compiling scope */
     357              :         if (
     358              :             (seq >  COP_SEQ_RANGE_LOW(pn))
     359              :             ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
     360              :             : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
     361              :         )
     362              :             return TRUE;
     363              :     }
     364              :     else if (
     365              :         (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
     366              :         ?
     367              :             (  seq >  COP_SEQ_RANGE_LOW(pn)
     368              :             || seq <= COP_SEQ_RANGE_HIGH(pn))
     369              : 
     370              :         :    (  seq >  COP_SEQ_RANGE_LOW(pn)
     371              :              && seq <= COP_SEQ_RANGE_HIGH(pn))
     372              :     )
     373              :         return TRUE;
     374              :     return FALSE;
     375              : }
     376              : #endif
     377              : 
     378              : /* ------------------------------- pp.h ------------------------------- */
     379              : 
     380              : PERL_STATIC_INLINE Stack_off_t
     381              : Perl_TOPMARK(pTHX)
     382              : {
     383              :     DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
     384              :                                  "MARK top  %p %" IVdf "\n",
     385              :                                   PL_markstack_ptr,
     386              :                                   (IV)*PL_markstack_ptr)));
     387              :     return *PL_markstack_ptr;
     388              : }
     389              : 
     390              : PERL_STATIC_INLINE Stack_off_t
     391          706 : Perl_POPMARK(pTHX)
     392              : {
     393              :     DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
     394              :                                  "MARK pop  %p %" IVdf "\n",
     395              :                                   (PL_markstack_ptr-1),
     396              :                                   (IV)*(PL_markstack_ptr-1))));
     397              :     assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
     398          706 :     return *PL_markstack_ptr--;
     399              : }
     400              : 
     401              : /*
     402              : =for apidoc_section $rpp
     403              : 
     404              : =for apidoc rpp_extend
     405              : Ensures that there is space on the stack to push C<n> items, extending it
     406              : if necessary.
     407              : 
     408              : =cut
     409              : */
     410              : 
     411              : PERL_STATIC_INLINE void
     412              : Perl_rpp_extend(pTHX_ SSize_t n)
     413              : {
     414              :     PERL_ARGS_ASSERT_RPP_EXTEND;
     415              : 
     416              :     EXTEND_HWM_SET(PL_stack_sp, n);
     417              : #ifndef STRESS_REALLOC
     418              :     if (UNLIKELY(_EXTEND_NEEDS_GROW(PL_stack_sp, n)))
     419              : #endif
     420              :     {
     421              :         (void)stack_grow(PL_stack_sp, PL_stack_sp, n);
     422              :     }
     423              : }
     424              : 
     425              : 
     426              : /*
     427              : =for apidoc rpp_popfree_to
     428              : 
     429              : Pop and free all items on the argument stack above C<sp>. On return,
     430              : C<PL_stack_sp> will be equal to C<sp>.
     431              : 
     432              : =cut
     433              : */
     434              : 
     435              : PERL_STATIC_INLINE void
     436              : Perl_rpp_popfree_to(pTHX_ SV **sp)
     437              : {
     438              :     PERL_ARGS_ASSERT_RPP_POPFREE_TO;
     439              : 
     440              :     assert(sp <= PL_stack_sp);
     441              : #ifdef PERL_RC_STACK
     442              :     assert(rpp_stack_is_rc());
     443              :     while (PL_stack_sp > sp) {
     444              :         SV *sv = *PL_stack_sp--;
     445              :         SvREFCNT_dec(sv);
     446              :     }
     447              : #else
     448              :     PL_stack_sp = sp;
     449              : #endif
     450              : }
     451              : 
     452              : 
     453              : /*
     454              : =for apidoc rpp_popfree_to_NN
     455              : 
     456              : A variant of rpp_popfree_to() which assumes that all the pointers being
     457              : popped off the stack are non-NULL.
     458              : 
     459              : =cut
     460              : */
     461              : 
     462              : PERL_STATIC_INLINE void
     463              : Perl_rpp_popfree_to_NN(pTHX_ SV **sp)
     464              : {
     465              :     PERL_ARGS_ASSERT_RPP_POPFREE_TO_NN;
     466              : 
     467              :     assert(sp <= PL_stack_sp);
     468              : #ifdef PERL_RC_STACK
     469              :     assert(rpp_stack_is_rc());
     470              :     while (PL_stack_sp > sp) {
     471              :         SV *sv = *PL_stack_sp--;
     472              :         assert(sv);
     473              :         SvREFCNT_dec_NN(sv);
     474              :     }
     475              : #else
     476              :     PL_stack_sp = sp;
     477              : #endif
     478              : }
     479              : 
     480              : 
     481              : /*
     482              : =for apidoc rpp_popfree_1
     483              : 
     484              : Pop and free the top item on the argument stack and update C<PL_stack_sp>.
     485              : 
     486              : =cut
     487              : */
     488              : 
     489              : PERL_STATIC_INLINE void
     490              : Perl_rpp_popfree_1(pTHX)
     491              : {
     492              :     PERL_ARGS_ASSERT_RPP_POPFREE_1;
     493              : 
     494              : #ifdef PERL_RC_STACK
     495              :     assert(rpp_stack_is_rc());
     496              :     SV *sv = *PL_stack_sp--;
     497              :     SvREFCNT_dec(sv);
     498              : #else
     499              :     PL_stack_sp--;
     500              : #endif
     501              : }
     502              : 
     503              : 
     504              : /*
     505              : =for apidoc rpp_popfree_1_NN
     506              : 
     507              : A variant of rpp_popfree_1() which assumes that the pointer being popped
     508              : off the stack is non-NULL.
     509              : 
     510              : =cut
     511              : */
     512              : 
     513              : PERL_STATIC_INLINE void
     514              : Perl_rpp_popfree_1_NN(pTHX)
     515              : {
     516              :     PERL_ARGS_ASSERT_RPP_POPFREE_1_NN;
     517              : 
     518              :     assert(*PL_stack_sp);
     519              : #ifdef PERL_RC_STACK
     520              :     assert(rpp_stack_is_rc());
     521              :     SV *sv = *PL_stack_sp--;
     522              :     SvREFCNT_dec_NN(sv);
     523              : #else
     524              :     PL_stack_sp--;
     525              : #endif
     526              : }
     527              : 
     528              : 
     529              : /*
     530              : =for apidoc rpp_popfree_2
     531              : 
     532              : Pop and free the top two items on the argument stack and update
     533              : C<PL_stack_sp>.
     534              : 
     535              : =cut
     536              : */
     537              : 
     538              : 
     539              : PERL_STATIC_INLINE void
     540              : Perl_rpp_popfree_2(pTHX)
     541              : {
     542              :     PERL_ARGS_ASSERT_RPP_POPFREE_2;
     543              : 
     544              : #ifdef PERL_RC_STACK
     545              :     assert(rpp_stack_is_rc());
     546              :     for (int i = 0; i < 2; i++) {
     547              :         SV *sv = *PL_stack_sp--;
     548              :         SvREFCNT_dec(sv);
     549              :     }
     550              : #else
     551              :     PL_stack_sp -= 2;
     552              : #endif
     553              : }
     554              : 
     555              : 
     556              : /*
     557              : =for apidoc rpp_popfree_2_NN
     558              : 
     559              : A variant of rpp_popfree_2() which assumes that the two pointers being
     560              : popped off the stack are non-NULL.
     561              : 
     562              : =cut
     563              : */
     564              : 
     565              : 
     566              : PERL_STATIC_INLINE void
     567              : Perl_rpp_popfree_2_NN(pTHX)
     568              : {
     569              :     PERL_ARGS_ASSERT_RPP_POPFREE_2_NN;
     570              : #ifdef PERL_RC_STACK
     571              :     SV *sv2 = *PL_stack_sp--;
     572              :     assert(sv2);
     573              :     SV *sv1 = *PL_stack_sp;
     574              :     assert(sv1);
     575              : 
     576              :     assert(rpp_stack_is_rc());
     577              :     U32 rc1 = SvREFCNT(sv1);
     578              :     U32 rc2 = SvREFCNT(sv2);
     579              :     /* This expression is intended to be true if either of rc1 or rc2 has
     580              :      * the value 0 or 1, but using only a single branch test, rather
     581              :      * than the two branches that a compiler would plant for a boolean
     582              :      * expression. We are working on the assumption that, most of the
     583              :      * time, neither of the args to a binary function will need to be
     584              :      * freed - they're likely to lex vars, or PADTMPs or whatever.
     585              :      * So give the CPU a single branch that is rarely taken. */
     586              :     if (UNLIKELY( !(rc1>>1) + !(rc2>>1) ))
     587              :         /* at least one of the old SVs needs freeing. Do it the long way */
     588              :         Perl_rpp_free_2_(aTHX_ sv1, sv2, rc1, rc2);
     589              :     else {
     590              :         SvREFCNT(sv1) = rc1 - 1;
     591              :         SvREFCNT(sv2) = rc2 - 1;
     592              :     }
     593              :     PL_stack_sp--;
     594              : #else
     595              :     PL_stack_sp -= 2;
     596              : #endif
     597              : }
     598              : 
     599              : 
     600              : /*
     601              : =for apidoc rpp_pop_1_norc
     602              : 
     603              : Pop and return the top item off the argument stack and update
     604              : C<PL_stack_sp>. It's similar to rpp_popfree_1(), except that it actually
     605              : returns a value, and it I<doesn't> decrement the SV's reference count.
     606              : On non-C<PERL_RC_STACK> builds it actually increments the SV's reference
     607              : count.
     608              : 
     609              : This is useful in cases where the popped value is immediately embedded
     610              : somewhere e.g. via av_store(), allowing you skip decrementing and then
     611              : immediately incrementing the reference count again (and risk prematurely
     612              : freeing the SV if it had a RC of 1). On non-RC builds, the reference count
     613              : bookkeeping still works too, which is why it should be used rather than
     614              : a simple C<*PL_stack_sp-->.
     615              : 
     616              : =cut
     617              : */
     618              : 
     619              : PERL_STATIC_INLINE SV*
     620              : Perl_rpp_pop_1_norc(pTHX)
     621              : {
     622              :     PERL_ARGS_ASSERT_RPP_POP_1_NORC
     623              : 
     624              :     SV *sv = *PL_stack_sp--;
     625              : 
     626              : #ifndef PERL_RC_STACK
     627              :     SvREFCNT_inc(sv);
     628              : #else
     629              :     assert(rpp_stack_is_rc());
     630              : #endif
     631              :     return sv;
     632              : }
     633              : 
     634              : 
     635              : 
     636              : /*
     637              : =for apidoc      rpp_push_1
     638              : =for apidoc_item rpp_push_IMM
     639              : =for apidoc_item rpp_push_2
     640              : =for apidoc_item rpp_xpush_1
     641              : =for apidoc_item rpp_xpush_IMM
     642              : =for apidoc_item rpp_xpush_2
     643              : 
     644              : Push one or two SVs onto the stack, incrementing their reference counts
     645              : and updating C<PL_stack_sp>. With the C<x> variants, it extends the stack
     646              : first. The C<IMM> variants assume that the single argument is an immortal
     647              : such as <&PL_sv_undef> and, for efficiency, will skip incrementing its
     648              : reference count.
     649              : 
     650              : =cut
     651              : */
     652              : 
     653              : PERL_STATIC_INLINE void
     654              : Perl_rpp_push_1(pTHX_ SV *sv)
     655              : {
     656              :     PERL_ARGS_ASSERT_RPP_PUSH_1;
     657              : 
     658              :     *++PL_stack_sp = sv;
     659              : #ifdef PERL_RC_STACK
     660              :     assert(rpp_stack_is_rc());
     661              :     SvREFCNT_inc_simple_void_NN(sv);
     662              : #endif
     663              : }
     664              : 
     665              : PERL_STATIC_INLINE void
     666              : Perl_rpp_push_IMM(pTHX_ SV *sv)
     667              : {
     668              :     PERL_ARGS_ASSERT_RPP_PUSH_IMM;
     669              : 
     670              :     assert(SvIMMORTAL(sv));
     671              :     *++PL_stack_sp = sv;
     672              : #ifdef PERL_RC_STACK
     673              :     assert(rpp_stack_is_rc());
     674              : #endif
     675              : }
     676              : 
     677              : PERL_STATIC_INLINE void
     678              : Perl_rpp_push_2(pTHX_ SV *sv1, SV *sv2)
     679              : {
     680              :     PERL_ARGS_ASSERT_RPP_PUSH_2;
     681              : 
     682              :     *++PL_stack_sp = sv1;
     683              :     *++PL_stack_sp = sv2;
     684              : #ifdef PERL_RC_STACK
     685              :     assert(rpp_stack_is_rc());
     686              :     SvREFCNT_inc_simple_void_NN(sv1);
     687              :     SvREFCNT_inc_simple_void_NN(sv2);
     688              : #endif
     689              : }
     690              : 
     691              : PERL_STATIC_INLINE void
     692              : Perl_rpp_xpush_1(pTHX_ SV *sv)
     693              : {
     694              :     PERL_ARGS_ASSERT_RPP_XPUSH_1;
     695              : 
     696              :     rpp_extend(1);
     697              :     rpp_push_1(sv);
     698              : }
     699              : 
     700              : PERL_STATIC_INLINE void
     701              : Perl_rpp_xpush_IMM(pTHX_ SV *sv)
     702              : {
     703              :     PERL_ARGS_ASSERT_RPP_XPUSH_IMM;
     704              : 
     705              :     rpp_extend(1);
     706              :     rpp_push_IMM(sv);
     707              : }
     708              : 
     709              : PERL_STATIC_INLINE void
     710              : Perl_rpp_xpush_2(pTHX_ SV *sv1, SV *sv2)
     711              : {
     712              :     PERL_ARGS_ASSERT_RPP_XPUSH_2;
     713              : 
     714              :     rpp_extend(2);
     715              :     rpp_push_2(sv1, sv2);
     716              : }
     717              : 
     718              : 
     719              : /*
     720              : =for apidoc rpp_push_1_norc
     721              : 
     722              : Push C<sv> onto the stack without incrementing its reference count, and
     723              : update C<PL_stack_sp>. On non-PERL_RC_STACK builds, mortalise too.
     724              : 
     725              : This is most useful where an SV has just been created and already has a
     726              : reference count of 1, but has not yet been anchored anywhere.
     727              : 
     728              : =cut
     729              : */
     730              : 
     731              : PERL_STATIC_INLINE void
     732              : Perl_rpp_push_1_norc(pTHX_ SV *sv)
     733              : {
     734              :     PERL_ARGS_ASSERT_RPP_PUSH_1;
     735              : 
     736              :     *++PL_stack_sp = sv;
     737              : #ifdef PERL_RC_STACK
     738              :     assert(rpp_stack_is_rc());
     739              : #else
     740              :     sv_2mortal(sv);
     741              : #endif
     742              : }
     743              : 
     744              : 
     745              : /*
     746              : =for apidoc      rpp_replace_1_1
     747              : =for apidoc_item rpp_replace_1_1_NN
     748              : =for apidoc_item rpp_replace_1_IMM_NN
     749              : 
     750              : Replace the current top stack item with C<sv>, while suitably adjusting
     751              : reference counts. Equivalent to rpp_popfree_1(); rpp_push_1(sv), but
     752              : is more efficient and handles both SVs being the same.
     753              : 
     754              : The C<_NN> variant assumes that the pointer on the stack to the SV being
     755              : freed is non-NULL.
     756              : 
     757              : The C<IMM_NN> variant is like the C<_NN> variant, but in addition, assumes
     758              : that the single argument is an immortal such as <&PL_sv_undef> and, for
     759              : efficiency, will skip incrementing its reference count.
     760              : 
     761              : =cut
     762              : */
     763              : 
     764              : PERL_STATIC_INLINE void
     765              : Perl_rpp_replace_1_1(pTHX_ SV *sv)
     766              : {
     767              :     PERL_ARGS_ASSERT_RPP_REPLACE_1_1;
     768              : 
     769              :     assert(sv);
     770              : #ifdef PERL_RC_STACK
     771              :     assert(rpp_stack_is_rc());
     772              :     SV *oldsv = *PL_stack_sp;
     773              :     *PL_stack_sp = sv;
     774              :     SvREFCNT_inc_simple_void_NN(sv);
     775              :     SvREFCNT_dec(oldsv);
     776              : #else
     777              :     *PL_stack_sp = sv;
     778              : #endif
     779              : }
     780              : 
     781              : 
     782              : PERL_STATIC_INLINE void
     783              : Perl_rpp_replace_1_1_NN(pTHX_ SV *sv)
     784              : {
     785              :     PERL_ARGS_ASSERT_RPP_REPLACE_1_1_NN;
     786              : 
     787              :     assert(sv);
     788              :     assert(*PL_stack_sp);
     789              : #ifdef PERL_RC_STACK
     790              :     assert(rpp_stack_is_rc());
     791              :     SV *oldsv = *PL_stack_sp;
     792              :     *PL_stack_sp = sv;
     793              :     SvREFCNT_inc_simple_void_NN(sv);
     794              :     SvREFCNT_dec_NN(oldsv);
     795              : #else
     796              :     *PL_stack_sp = sv;
     797              : #endif
     798              : }
     799              : 
     800              : 
     801              : PERL_STATIC_INLINE void
     802              : Perl_rpp_replace_1_IMM_NN(pTHX_ SV *sv)
     803              : {
     804              :     PERL_ARGS_ASSERT_RPP_REPLACE_1_IMM_NN;
     805              : 
     806              :     assert(sv);
     807              :     assert(SvIMMORTAL(sv));
     808              :     assert(*PL_stack_sp);
     809              : #ifdef PERL_RC_STACK
     810              :     assert(rpp_stack_is_rc());
     811              :     SV *oldsv = *PL_stack_sp;
     812              :     *PL_stack_sp = sv;
     813              :     SvREFCNT_dec_NN(oldsv);
     814              : #else
     815              :     *PL_stack_sp = sv;
     816              : #endif
     817              : }
     818              : 
     819              : 
     820              : /*
     821              : =for apidoc      rpp_replace_2_1
     822              : =for apidoc_item rpp_replace_2_1_NN
     823              : =for apidoc_item rpp_replace_2_IMM_NN
     824              : 
     825              : Replace the current top to stacks item with C<sv>, while suitably
     826              : adjusting reference counts. Equivalent to rpp_popfree_2(); rpp_push_1(sv),
     827              : but is more efficient and handles SVs being the same.
     828              : 
     829              : The C<_NN> variant assumes that the pointers on the stack to the SVs being
     830              : freed are non-NULL.
     831              : 
     832              : The C<IMM_NN> variant is like the C<_NN> variant, but in addition, assumes
     833              : that the single argument is an immortal such as <&PL_sv_undef> and, for
     834              : efficiency, will skip incrementing its reference count.
     835              : =cut
     836              : */
     837              : 
     838              : PERL_STATIC_INLINE void
     839              : Perl_rpp_replace_2_1(pTHX_ SV *sv)
     840              : {
     841              :     PERL_ARGS_ASSERT_RPP_REPLACE_2_1;
     842              : 
     843              : #ifdef PERL_RC_STACK
     844              :     assert(rpp_stack_is_rc());
     845              :     /* replace PL_stack_sp[-1] first; leave PL_stack_sp[0] in place while
     846              :      * we free [-1], so if an exception occurs, [0] will still be freed.
     847              :      */
     848              :     SV *oldsv = PL_stack_sp[-1];
     849              :     PL_stack_sp[-1] = sv;
     850              :     SvREFCNT_inc_simple_void_NN(sv);
     851              :     SvREFCNT_dec(oldsv);
     852              :     oldsv = *PL_stack_sp--;
     853              :     SvREFCNT_dec(oldsv);
     854              : #else
     855              :     *--PL_stack_sp = sv;
     856              : #endif
     857              : }
     858              : 
     859              : 
     860              : /* Private helper function for _NN and _IMM_NN variants.
     861              :  * Assumes sv has already had its ref count incremented,
     862              :  * ready for being put on the stack.
     863              :  * Intended to be small and fast, since it's inlined into many hot parts of
     864              :  * code.
     865              :  */
     866              : 
     867              : PERL_STATIC_INLINE void
     868              : Perl_rpp_replace_2_1_COMMON(pTHX_ SV *sv)
     869              : {
     870              : 
     871              :     assert(sv);
     872              : #ifdef PERL_RC_STACK
     873              :     SV *sv2 = *PL_stack_sp--;
     874              :     assert(sv2);
     875              :     SV *sv1 = *PL_stack_sp;
     876              :     assert(sv1);
     877              : 
     878              :     *PL_stack_sp = sv;
     879              :     assert(rpp_stack_is_rc());
     880              :     U32 rc1 = SvREFCNT(sv1);
     881              :     U32 rc2 = SvREFCNT(sv2);
     882              :     /* This expression is intended to be true if either of rc1 or rc2 has
     883              :      * the value 0 or 1, but using only a single branch test, rather
     884              :      * than the two branches that a compiler would plant for a boolean
     885              :      * expression. We are working on the assumption that, most of the
     886              :      * time, neither of the args to a binary function will need to be
     887              :      * freed - they're likely to lex vars, or PADTMPs or whatever.
     888              :      * So give the CPU a single branch that is rarely taken. */
     889              :     if (UNLIKELY( !(rc1>>1) + !(rc2>>1) ))
     890              :         /* at least one of the old SVs needs freeing. Do it the long way */
     891              :         Perl_rpp_free_2_(aTHX_ sv1, sv2, rc1, rc2);
     892              :     else {
     893              :         SvREFCNT(sv1) = rc1 - 1;
     894              :         SvREFCNT(sv2) = rc2 - 1;
     895              :     }
     896              : #else
     897              :     *--PL_stack_sp = sv;
     898              : #endif
     899              : }
     900              : 
     901              : 
     902              : PERL_STATIC_INLINE void
     903              : Perl_rpp_replace_2_1_NN(pTHX_ SV *sv)
     904              : {
     905              :     PERL_ARGS_ASSERT_RPP_REPLACE_2_1_NN;
     906              : 
     907              :     assert(sv);
     908              : #ifdef PERL_RC_STACK
     909              :     SvREFCNT_inc_simple_void_NN(sv);
     910              : #endif
     911              :     rpp_replace_2_1_COMMON(sv);
     912              : }
     913              : 
     914              : 
     915              : PERL_STATIC_INLINE void
     916              : Perl_rpp_replace_2_IMM_NN(pTHX_ SV *sv)
     917              : {
     918              :     PERL_ARGS_ASSERT_RPP_REPLACE_2_IMM_NN;
     919              : 
     920              :     assert(sv);
     921              :     assert(SvIMMORTAL(sv));
     922              :     rpp_replace_2_1_COMMON(sv);
     923              : }
     924              : 
     925              : 
     926              : /*
     927              : =for apidoc rpp_replace_at
     928              : 
     929              : Replace the SV at address sp within the stack with C<sv>, while suitably
     930              : adjusting reference counts. Equivalent to C<*sp = sv>, except with proper
     931              : reference count handling.
     932              : 
     933              : =cut
     934              : */
     935              : 
     936              : PERL_STATIC_INLINE void
     937              : Perl_rpp_replace_at(pTHX_ SV **sp, SV *sv)
     938              : {
     939              :     PERL_ARGS_ASSERT_RPP_REPLACE_AT;
     940              : 
     941              : #ifdef PERL_RC_STACK
     942              :     assert(rpp_stack_is_rc());
     943              :     SV *oldsv = *sp;
     944              :     *sp = sv;
     945              :     SvREFCNT_inc_simple_void_NN(sv);
     946              :     SvREFCNT_dec(oldsv);
     947              : #else
     948              :     *sp = sv;
     949              : #endif
     950              : }
     951              : 
     952              : 
     953              : /*
     954              : =for apidoc rpp_replace_at_NN
     955              : 
     956              : A variant of rpp_replace_at() which assumes that the SV pointer on the
     957              : stack is non-NULL.
     958              : 
     959              : =cut
     960              : */
     961              : 
     962              : PERL_STATIC_INLINE void
     963              : Perl_rpp_replace_at_NN(pTHX_ SV **sp, SV *sv)
     964              : {
     965              :     PERL_ARGS_ASSERT_RPP_REPLACE_AT_NN;
     966              : 
     967              :     assert(sv);
     968              :     assert(*sp);
     969              : #ifdef PERL_RC_STACK
     970              :     assert(rpp_stack_is_rc());
     971              :     SV *oldsv = *sp;
     972              :     *sp = sv;
     973              :     SvREFCNT_inc_simple_void_NN(sv);
     974              :     SvREFCNT_dec_NN(oldsv);
     975              : #else
     976              :     *sp = sv;
     977              : #endif
     978              : }
     979              : 
     980              : 
     981              : /*
     982              : =for apidoc rpp_replace_at_norc
     983              : 
     984              : Replace the SV at address sp within the stack with C<sv>, while suitably
     985              : adjusting the reference count of the old SV. Equivalent to C<*sp = sv>,
     986              : except with proper reference count handling.
     987              : 
     988              : C<sv>'s reference count doesn't get incremented. On non-C<PERL_RC_STACK>
     989              : builds, it gets mortalised too.
     990              : 
     991              : This is most useful where an SV has just been created and already has a
     992              : reference count of 1, but has not yet been anchored anywhere.
     993              : 
     994              : =cut
     995              : */
     996              : 
     997              : PERL_STATIC_INLINE void
     998              : Perl_rpp_replace_at_norc(pTHX_ SV **sp, SV *sv)
     999              : {
    1000              :     PERL_ARGS_ASSERT_RPP_REPLACE_AT_NORC;
    1001              : 
    1002              : #ifdef PERL_RC_STACK
    1003              :     assert(rpp_stack_is_rc());
    1004              :     SV *oldsv = *sp;
    1005              :     *sp = sv;
    1006              :     SvREFCNT_dec(oldsv);
    1007              : #else
    1008              :     *sp = sv;
    1009              :     sv_2mortal(sv);
    1010              : #endif
    1011              : }
    1012              : 
    1013              : 
    1014              : /*
    1015              : =for apidoc rpp_replace_at_norc_NN
    1016              : 
    1017              : A variant of rpp_replace_at_norc() which assumes that the SV pointer on the
    1018              : stack is non-NULL.
    1019              : 
    1020              : =cut
    1021              : */
    1022              : 
    1023              : PERL_STATIC_INLINE void
    1024              : Perl_rpp_replace_at_norc_NN(pTHX_ SV **sp, SV *sv)
    1025              : {
    1026              :     PERL_ARGS_ASSERT_RPP_REPLACE_AT_NORC_NN;
    1027              : 
    1028              :     assert(*sp);
    1029              : #ifdef PERL_RC_STACK
    1030              :     assert(rpp_stack_is_rc());
    1031              :     SV *oldsv = *sp;
    1032              :     *sp = sv;
    1033              :     SvREFCNT_dec_NN(oldsv);
    1034              : #else
    1035              :     *sp = sv;
    1036              :     sv_2mortal(sv);
    1037              : #endif
    1038              : }
    1039              : 
    1040              : 
    1041              : /*
    1042              : =for apidoc rpp_context
    1043              : 
    1044              : Impose void, scalar or list context on the stack.
    1045              : First, pop C<extra> items off the stack, then when C<gimme> is:
    1046              : C<G_LIST>:   return as-is.
    1047              : C<G_VOID>:   pop everything back to C<mark>
    1048              : C<G_SCALAR>: move the top stack item (or C<&PL_sv_undef> if none) to
    1049              : C<mark+1> and free everything above it.
    1050              : 
    1051              : =cut
    1052              : */
    1053              : 
    1054              : PERL_STATIC_INLINE void
    1055              : Perl_rpp_context(pTHX_ SV **mark, U8 gimme, SSize_t extra)
    1056              : {
    1057              :     PERL_ARGS_ASSERT_RPP_CONTEXT;
    1058              :     assert(extra >= 0);
    1059              :     assert(mark <= PL_stack_sp - extra);
    1060              : 
    1061              :     if (gimme == G_LIST)
    1062              :         mark = PL_stack_sp - extra;
    1063              :     else if (gimme == G_SCALAR) {
    1064              :         SV **svp = PL_stack_sp - extra;
    1065              :         mark++;
    1066              :         if (mark > svp) {
    1067              :             /* empty list (plus extra) */
    1068              :             rpp_popfree_to(svp);
    1069              :             rpp_extend(1);
    1070              :             *++PL_stack_sp = &PL_sv_undef;
    1071              :             return;
    1072              :         }
    1073              :         /* swap top and bottom list items */
    1074              :         SV *top = *svp;
    1075              :         *svp = *mark;
    1076              :         *mark = top;
    1077              :      }
    1078              :     rpp_popfree_to(mark);
    1079              : }
    1080              : 
    1081              : 
    1082              : 
    1083              : 
    1084              : /*
    1085              : =for apidoc      rpp_try_AMAGIC_1
    1086              : =for apidoc_item rpp_try_AMAGIC_2
    1087              : 
    1088              : Check whether either of the one or two SVs at the top of the stack is
    1089              : magical or a ref, and in either case handle it specially: invoke get
    1090              : magic, call an overload method, or replace a ref with a temporary numeric
    1091              : value, as appropriate. If this function returns true, it indicates that
    1092              : the correct return value is already on the stack. Intended to be used at
    1093              : the beginning of the PP function for unary or binary ops.
    1094              : 
    1095              : =cut
    1096              : */
    1097              : 
    1098              : PERL_STATIC_INLINE bool
    1099              : Perl_rpp_try_AMAGIC_1(pTHX_ int method, int flags)
    1100              : {
    1101              :     return    UNLIKELY((SvFLAGS(*PL_stack_sp) & (SVf_ROK|SVs_GMG)))
    1102              :            && Perl_try_amagic_un(aTHX_ method, flags);
    1103              : }
    1104              : 
    1105              : PERL_STATIC_INLINE bool
    1106              : Perl_rpp_try_AMAGIC_2(pTHX_ int method, int flags)
    1107              : {
    1108              :     return    UNLIKELY(((SvFLAGS(PL_stack_sp[-1])|SvFLAGS(PL_stack_sp[0]))
    1109              :                      & (SVf_ROK|SVs_GMG)))
    1110              :            && Perl_try_amagic_bin(aTHX_ method, flags);
    1111              : }
    1112              : 
    1113              : 
    1114              : /*
    1115              : =for apidoc rpp_stack_is_rc
    1116              : 
    1117              : Returns a boolean value indicating whether the stack is currently
    1118              : reference-counted. Note that if the stack is split (bottom half RC, top
    1119              : half non-RC), this function returns false, even if the top half currently
    1120              : contains zero items.
    1121              : 
    1122              : =cut
    1123              : */
    1124              : 
    1125              : PERL_STATIC_INLINE bool
    1126              : Perl_rpp_stack_is_rc(pTHX)
    1127              : {
    1128              : #ifdef PERL_RC_STACK
    1129              :     return AvREAL(PL_curstack) && !PL_curstackinfo->si_stack_nonrc_base;
    1130              : #else
    1131              :     return 0;
    1132              : #endif
    1133              : 
    1134              : }
    1135              : 
    1136              : 
    1137              : /*
    1138              : =for apidoc rpp_is_lone
    1139              : 
    1140              : Indicates whether the stacked SV C<sv> (assumed to be not yet popped off
    1141              : the stack) is only kept alive due to a single reference from the argument
    1142              : stack and/or and the temps stack.
    1143              : 
    1144              : This can used for example to decide whether the copying of return values
    1145              : in rvalue context can be skipped, or whether it shouldn't be assigned to
    1146              : in lvalue context.
    1147              : 
    1148              : =cut
    1149              : */
    1150              : 
    1151              : 
    1152              : PERL_STATIC_INLINE bool
    1153              : Perl_rpp_is_lone(pTHX_ SV *sv)
    1154              : {
    1155              : #ifdef PERL_RC_STACK
    1156              :     /* note that rpp_is_lone() can be used in wrapped pp functions,
    1157              :      * where technically the stack is no longer ref-counted; but because
    1158              :      * the args are non-RC copies of RC args further down the stack, we
    1159              :      * can't be in a *completely* non-ref stack.
    1160              :      */
    1161              :     assert(AvREAL(PL_curstack));
    1162              : #endif
    1163              : 
    1164              :     return SvREFCNT(sv) <= cBOOL(SvTEMP(sv))
    1165              : #ifdef PERL_RC_STACK
    1166              :                          + 1
    1167              :             && !SvIMMORTAL(sv) /* PL_sv_undef etc are never stealable */
    1168              : #endif
    1169              :     ;
    1170              : }
    1171              : 
    1172              : 
    1173              : /*
    1174              : =for apidoc rpp_invoke_xs
    1175              : 
    1176              : Call the XS function associated with C<cv>. Wraps the call if necessary to
    1177              : handle XS functions which are not aware of reference-counted stacks.
    1178              : 
    1179              : =cut
    1180              : */
    1181              : 
    1182              : 
    1183              : PERL_STATIC_INLINE void
    1184              : Perl_rpp_invoke_xs(pTHX_ CV *cv)
    1185              : {
    1186              :     PERL_ARGS_ASSERT_RPP_INVOKE_XS;
    1187              : 
    1188              : #ifdef PERL_RC_STACK
    1189              :     if (!CvXS_RCSTACK(cv))
    1190              :         Perl_xs_wrap(aTHX_ CvXSUB(cv), cv);
    1191              :     else
    1192              : #endif
    1193              :         CvXSUB(cv)(aTHX_ cv);
    1194              : }
    1195              : 
    1196              : 
    1197              : 
    1198              : 
    1199              : /* ----------------------------- regexp.h ----------------------------- */
    1200              : 
    1201              : /* PVLVs need to act as a superset of all scalar types - they are basically
    1202              :  * PVMGs with a few extra fields.
    1203              :  * REGEXPs are first class scalars, but have many fields that can't be copied
    1204              :  * into a PVLV body.
    1205              :  *
    1206              :  * Hence we take a different approach - instead of a copy, PVLVs store a pointer
    1207              :  * back to the original body. To avoid increasing the size of PVLVs just for the
    1208              :  * rare case of REGEXP assignment, this pointer is stored in the memory usually
    1209              :  * used for SvLEN(). Hence the check for SVt_PVLV below, and the ? : ternary to
    1210              :  * read the pointer from the two possible locations. The macro SvLEN() wraps the
    1211              :  * access to the union's member xpvlenu_len, but there is no equivalent macro
    1212              :  * for wrapping the union's member xpvlenu_rx, hence the direct reference here.
    1213              :  *
    1214              :  * See commit df6b4bd56551f2d3 for more details. */
    1215              : 
    1216              : PERL_STATIC_INLINE struct regexp *
    1217              : Perl_ReANY(const REGEXP * const re)
    1218              : {
    1219              :     XPV* const p = (XPV*)SvANY(re);
    1220              : 
    1221              :     PERL_ARGS_ASSERT_REANY;
    1222              :     assert(isREGEXP(re));
    1223              : 
    1224              :     return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
    1225              :                                    : (struct regexp *)p;
    1226              : }
    1227              : 
    1228              : /* ------------------------------- utf8.h ------------------------------- */
    1229              : 
    1230              : /*
    1231              : =for apidoc_section $unicode
    1232              : */
    1233              : 
    1234              : PERL_STATIC_INLINE void
    1235              : Perl_append_utf8_from_native_byte(const U8 byte, U8** dest)
    1236              : {
    1237              :     /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
    1238              :      * encoded string at '*dest', updating '*dest' to include it */
    1239              : 
    1240              :     PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
    1241              : 
    1242              :     if (NATIVE_BYTE_IS_INVARIANT(byte))
    1243              :         *((*dest)++) = byte;
    1244              :     else {
    1245              :         *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
    1246              :         *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
    1247              :     }
    1248              : }
    1249              : 
    1250              : /*
    1251              : =for apidoc valid_utf8_to_uvchr
    1252              : Like C<L<perlapi/utf8_to_uvchr_buf>>, but should only be called when it is
    1253              : known that the next character in the input UTF-8 string C<s> is well-formed
    1254              : (I<e.g.>, it passes C<L<perlapi/isUTF8_CHAR>>.  Surrogates, non-character code
    1255              : points, and non-Unicode code points are allowed.
    1256              : 
    1257              : =cut
    1258              : 
    1259              :  */
    1260              : 
    1261              : PERL_STATIC_INLINE UV
    1262              : Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
    1263              : {
    1264              :     const UV expectlen = UTF8SKIP(s);
    1265              :     const U8* send = s + expectlen;
    1266              :     UV uv = *s;
    1267              : 
    1268              :     PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
    1269              : 
    1270              :     if (retlen) {
    1271              :         *retlen = expectlen;
    1272              :     }
    1273              : 
    1274              :     /* An invariant is trivially returned */
    1275              :     if (expectlen == 1) {
    1276              :         return uv;
    1277              :     }
    1278              : 
    1279              :     /* Remove the leading bits that indicate the number of bytes, leaving just
    1280              :      * the bits that are part of the value */
    1281              :     uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
    1282              : 
    1283              :     /* Now, loop through the remaining bytes, accumulating each into the
    1284              :      * working total as we go.  (I khw tried unrolling the loop for up to 4
    1285              :      * bytes, but there was no performance improvement) */
    1286              :     for (++s; s < send; s++) {
    1287              :         uv = UTF8_ACCUMULATE(uv, *s);
    1288              :     }
    1289              : 
    1290              :     return UNI_TO_NATIVE(uv);
    1291              : 
    1292              : }
    1293              : 
    1294              : /*
    1295              : =for apidoc is_utf8_invariant_string
    1296              : 
    1297              : Returns TRUE if the first C<len> bytes of the string C<s> are the same
    1298              : regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
    1299              : EBCDIC machines); otherwise it returns FALSE.  That is, it returns TRUE if they
    1300              : are UTF-8 invariant.  On ASCII-ish machines, all the ASCII characters and only
    1301              : the ASCII characters fit this definition.  On EBCDIC machines, the ASCII-range
    1302              : characters are invariant, but so also are the C1 controls.
    1303              : 
    1304              : If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
    1305              : use this option, that C<s> can't have embedded C<NUL> characters and has to
    1306              : have a terminating C<NUL> byte).
    1307              : 
    1308              : See also
    1309              : C<L</is_utf8_string>>,
    1310              : C<L</is_utf8_string_flags>>,
    1311              : C<L</is_utf8_string_loc>>,
    1312              : C<L</is_utf8_string_loc_flags>>,
    1313              : C<L</is_utf8_string_loclen>>,
    1314              : C<L</is_utf8_string_loclen_flags>>,
    1315              : C<L</is_utf8_fixed_width_buf_flags>>,
    1316              : C<L</is_utf8_fixed_width_buf_loc_flags>>,
    1317              : C<L</is_utf8_fixed_width_buf_loclen_flags>>,
    1318              : C<L</is_strict_utf8_string>>,
    1319              : C<L</is_strict_utf8_string_loc>>,
    1320              : C<L</is_strict_utf8_string_loclen>>,
    1321              : C<L</is_c9strict_utf8_string>>,
    1322              : C<L</is_c9strict_utf8_string_loc>>,
    1323              : and
    1324              : C<L</is_c9strict_utf8_string_loclen>>.
    1325              : 
    1326              : =cut
    1327              : 
    1328              : */
    1329              : 
    1330              : #define is_utf8_invariant_string(s, len)                                    \
    1331              :                                 is_utf8_invariant_string_loc(s, len, NULL)
    1332              : 
    1333              : /*
    1334              : =for apidoc is_utf8_invariant_string_loc
    1335              : 
    1336              : Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
    1337              : the first UTF-8 variant character in the C<ep> pointer; if all characters are
    1338              : UTF-8 invariant, this function does not change the contents of C<*ep>.
    1339              : 
    1340              : =cut
    1341              : 
    1342              : */
    1343              : 
    1344              : PERL_STATIC_INLINE bool
    1345              : Perl_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
    1346              : {
    1347              :     const U8* send;
    1348              :     const U8* x = s;
    1349              : 
    1350              :     PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
    1351              : 
    1352              :     if (len == 0) {
    1353              :         len = strlen((const char *)s);
    1354              :     }
    1355              : 
    1356              :     send = s + len;
    1357              : 
    1358              : /* This looks like 0x010101... */
    1359              : #  define PERL_COUNT_MULTIPLIER   (~ (UINTMAX_C(0)) / 0xFF)
    1360              : 
    1361              : /* This looks like 0x808080... */
    1362              : #  define PERL_VARIANTS_WORD_MASK (PERL_COUNT_MULTIPLIER * 0x80)
    1363              : #  define PERL_WORDSIZE            sizeof(PERL_UINTMAX_T)
    1364              : #  define PERL_WORD_BOUNDARY_MASK (PERL_WORDSIZE - 1)
    1365              : 
    1366              : /* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1, by
    1367              :  * or'ing together the lowest bits of 'x'.  Hopefully the final term gets
    1368              :  * optimized out completely on a 32-bit system, and its mask gets optimized out
    1369              :  * on a 64-bit system */
    1370              : #  define PERL_IS_SUBWORD_ADDR(x) (1 & (       PTR2nat(x)                     \
    1371              :                                       |   (  PTR2nat(x) >> 1)                 \
    1372              :                                       | ( ( (PTR2nat(x)                       \
    1373              :                                            & PERL_WORD_BOUNDARY_MASK) >> 2))))
    1374              : 
    1375              : #ifndef EBCDIC
    1376              : 
    1377              :     /* Do the word-at-a-time iff there is at least one usable full word.  That
    1378              :      * means that after advancing to a word boundary, there still is at least a
    1379              :      * full word left.  The number of bytes needed to advance is 'wordsize -
    1380              :      * offset' unless offset is 0. */
    1381              :     if ((STRLEN) (send - x) >= PERL_WORDSIZE
    1382              : 
    1383              :                             /* This term is wordsize if subword; 0 if not */
    1384              :                           + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
    1385              : 
    1386              :                             /* 'offset' */
    1387              :                           - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
    1388              :     {
    1389              : 
    1390              :         /* Process per-byte until reach word boundary.  XXX This loop could be
    1391              :          * eliminated if we knew that this platform had fast unaligned reads */
    1392              :         while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
    1393              :             if (! UTF8_IS_INVARIANT(*x)) {
    1394              :                 if (ep) {
    1395              :                     *ep = x;
    1396              :                 }
    1397              : 
    1398              :                 return FALSE;
    1399              :             }
    1400              :             x++;
    1401              :         }
    1402              : 
    1403              :         /* Here, we know we have at least one full word to process.  Process
    1404              :          * per-word as long as we have at least a full word left */
    1405              :         do {
    1406              :             if ((* (const PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK)  {
    1407              : 
    1408              :                 /* Found a variant.  Just return if caller doesn't want its
    1409              :                  * exact position */
    1410              :                 if (! ep) {
    1411              :                     return FALSE;
    1412              :                 }
    1413              : 
    1414              : #  if   BYTEORDER == 0x1234 || BYTEORDER == 0x12345678    \
    1415              :      || BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
    1416              : 
    1417              :                 *ep = x + variant_byte_number(* (const PERL_UINTMAX_T *) x);
    1418              :                 assert(*ep >= s && *ep < send);
    1419              : 
    1420              :                 return FALSE;
    1421              : 
    1422              : #  else   /* If weird byte order, drop into next loop to do byte-at-a-time
    1423              :            checks. */
    1424              : 
    1425              :                 break;
    1426              : #  endif
    1427              :             }
    1428              : 
    1429              :             x += PERL_WORDSIZE;
    1430              : 
    1431              :         } while (x + PERL_WORDSIZE <= send);
    1432              :     }
    1433              : 
    1434              : #endif      /* End of ! EBCDIC */
    1435              : 
    1436              :     /* Process per-byte */
    1437              :     while (x < send) {
    1438              :         if (! UTF8_IS_INVARIANT(*x)) {
    1439              :             if (ep) {
    1440              :                 *ep = x;
    1441              :             }
    1442              : 
    1443              :             return FALSE;
    1444              :         }
    1445              : 
    1446              :         x++;
    1447              :     }
    1448              : 
    1449              :     return TRUE;
    1450              : }
    1451              : 
    1452              : /* See if the platform has builtins for finding the most/least significant bit,
    1453              :  * and which one is right for using on 32 and 64 bit operands */
    1454              : #if (__has_builtin(__builtin_clz) || PERL_GCC_VERSION_GE(3,4,0))
    1455              : #  if U32SIZE == INTSIZE
    1456              : #    define PERL_CLZ_32 __builtin_clz
    1457              : #  endif
    1458              : #  if defined(U64TYPE) && U64SIZE == INTSIZE
    1459              : #    define PERL_CLZ_64 __builtin_clz
    1460              : #  endif
    1461              : #endif
    1462              : #if (__has_builtin(__builtin_ctz) || PERL_GCC_VERSION_GE(3,4,0))
    1463              : #  if U32SIZE == INTSIZE
    1464              : #    define PERL_CTZ_32 __builtin_ctz
    1465              : #  endif
    1466              : #  if defined(U64TYPE) && U64SIZE == INTSIZE
    1467              : #    define PERL_CTZ_64 __builtin_ctz
    1468              : #  endif
    1469              : #endif
    1470              : 
    1471              : #if (__has_builtin(__builtin_clzl) || PERL_GCC_VERSION_GE(3,4,0))
    1472              : #  if U32SIZE == LONGSIZE && ! defined(PERL_CLZ_32)
    1473              : #    define PERL_CLZ_32 __builtin_clzl
    1474              : #  endif
    1475              : #  if defined(U64TYPE) && U64SIZE == LONGSIZE && ! defined(PERL_CLZ_64)
    1476              : #    define PERL_CLZ_64 __builtin_clzl
    1477              : #  endif
    1478              : #endif
    1479              : #if (__has_builtin(__builtin_ctzl) || PERL_GCC_VERSION_GE(3,4,0))
    1480              : #  if U32SIZE == LONGSIZE && ! defined(PERL_CTZ_32)
    1481              : #    define PERL_CTZ_32 __builtin_ctzl
    1482              : #  endif
    1483              : #  if defined(U64TYPE) && U64SIZE == LONGSIZE && ! defined(PERL_CTZ_64)
    1484              : #    define PERL_CTZ_64 __builtin_ctzl
    1485              : #  endif
    1486              : #endif
    1487              : 
    1488              : #if (__has_builtin(__builtin_clzll) || PERL_GCC_VERSION_GE(3,4,0))
    1489              : #  if U32SIZE == LONGLONGSIZE && ! defined(PERL_CLZ_32)
    1490              : #    define PERL_CLZ_32 __builtin_clzll
    1491              : #  endif
    1492              : #  if defined(U64TYPE) && U64SIZE == LONGLONGSIZE && ! defined(PERL_CLZ_64)
    1493              : #    define PERL_CLZ_64 __builtin_clzll
    1494              : #  endif
    1495              : #endif
    1496              : #if (__has_builtin(__builtin_ctzll) || PERL_GCC_VERSION_GE(3,4,0))
    1497              : #  if U32SIZE == LONGLONGSIZE && ! defined(PERL_CTZ_32)
    1498              : #    define PERL_CTZ_32 __builtin_ctzll
    1499              : #  endif
    1500              : #  if defined(U64TYPE) && U64SIZE == LONGLONGSIZE && ! defined(PERL_CTZ_64)
    1501              : #    define PERL_CTZ_64 __builtin_ctzll
    1502              : #  endif
    1503              : #endif
    1504              : 
    1505              : #if defined(WIN32)
    1506              : #  include <intrin.h>
    1507              :    /* MinGW warns that it ignores "pragma intrinsic". */
    1508              : #  if defined(_MSC_VER)
    1509              : #    pragma intrinsic(_BitScanForward)
    1510              : #    pragma intrinsic(_BitScanReverse)
    1511              : #    if defined(_WIN64)
    1512              : #      pragma intrinsic(_BitScanForward64)
    1513              : #      pragma intrinsic(_BitScanReverse64)
    1514              : #    endif
    1515              : #  endif
    1516              : #endif
    1517              : 
    1518              : /* The reason there are not checks to see if ffs() and ffsl() are available for
    1519              :  * determining the lsb, is because these don't improve on the deBruijn method
    1520              :  * fallback, which is just a branchless integer multiply, array element
    1521              :  * retrieval, and shift.  The others, even if the function call overhead is
    1522              :  * optimized out, have to cope with the possibility of the input being all
    1523              :  * zeroes, and almost certainly will have conditionals for this eventuality.
    1524              :  * khw, at the time of this commit, looked at the source for both gcc and clang
    1525              :  * to verify this.  (gcc used a method inferior to deBruijn.) */
    1526              : 
    1527              : /* Below are functions to find the first, last, or only set bit in a word.  On
    1528              :  * platforms with 64-bit capability, there is a pair for each operation; the
    1529              :  * first taking a 64 bit operand, and the second a 32 bit one.  The logic is
    1530              :  * the same in each pair, so the second is stripped of most comments. */
    1531              : 
    1532              : #ifdef U64TYPE  /* HAS_QUAD not usable outside the core */
    1533              : 
    1534              : PERL_STATIC_INLINE unsigned
    1535              : Perl_lsbit_pos64(U64 word)
    1536              : {
    1537              :     /* Find the position (0..63) of the least significant set bit in the input
    1538              :      * word */
    1539              : 
    1540              :     ASSUME(word != 0);
    1541              : 
    1542              :     /* If we can determine that the platform has a usable fast method to get
    1543              :      * this info, use that */
    1544              : 
    1545              : #  if defined(PERL_CTZ_64)
    1546              : #    define PERL_HAS_FAST_GET_LSB_POS64
    1547              : 
    1548              :     return (unsigned) PERL_CTZ_64(word);
    1549              : 
    1550              : #  elif U64SIZE == 8 && defined(_WIN64)
    1551              : #    define PERL_HAS_FAST_GET_LSB_POS64
    1552              : 
    1553              :     {
    1554              :         unsigned long index;
    1555              :         _BitScanForward64(&index, word);
    1556              :         return (unsigned)index;
    1557              :     }
    1558              : 
    1559              : #  else
    1560              : 
    1561              :     /* Here, we didn't find a fast method for finding the lsb.  Fall back to
    1562              :      * making the lsb the only set bit in the word, and use our function that
    1563              :      * works on words with a single bit set.
    1564              :      *
    1565              :      * Isolate the lsb;
    1566              :      * https://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set
    1567              :      *
    1568              :      * The word will look like this, with a rightmost set bit in position 's':
    1569              :      * ('x's are don't cares, and 'y's are their complements)
    1570              :      *      s
    1571              :      *  x..x100..00
    1572              :      *  y..y011..11      Complement
    1573              :      *  y..y100..00      Add 1
    1574              :      *  0..0100..00      And with the original
    1575              :      *
    1576              :      *  (Yes, complementing and adding 1 is just taking the negative on 2's
    1577              :      *  complement machines, but not on 1's complement ones, and some compilers
    1578              :      *  complain about negating an unsigned.)
    1579              :      */
    1580              :     return single_1bit_pos64(word & (~word + 1));
    1581              : 
    1582              : #  endif
    1583              : 
    1584              : }
    1585              : 
    1586              : #  define lsbit_pos_uintmax_(word) lsbit_pos64(word)
    1587              : #else   /* ! QUAD */
    1588              : #  define lsbit_pos_uintmax_(word) lsbit_pos32(word)
    1589              : #endif
    1590              : 
    1591              : PERL_STATIC_INLINE unsigned     /* Like above for 32 bit word */
    1592              : Perl_lsbit_pos32(U32 word)
    1593              : {
    1594              :     /* Find the position (0..31) of the least significant set bit in the input
    1595              :      * word */
    1596              : 
    1597              :     ASSUME(word != 0);
    1598              : 
    1599              : #if defined(PERL_CTZ_32)
    1600              : #  define PERL_HAS_FAST_GET_LSB_POS32
    1601              : 
    1602              :     return (unsigned) PERL_CTZ_32(word);
    1603              : 
    1604              : #elif U32SIZE == 4 && defined(WIN32)
    1605              : #  define PERL_HAS_FAST_GET_LSB_POS32
    1606              : 
    1607              :     {
    1608              :         unsigned long index;
    1609              :         _BitScanForward(&index, word);
    1610              :         return (unsigned)index;
    1611              :     }
    1612              : 
    1613              : #elif defined(PERL_HAS_FAST_GET_LSB_POS64)
    1614              : #  define PERL_HAS_FAST_GET_LSB_POS32
    1615              : 
    1616              :     /* Unlikely, but possible for the platform to have a wider fast operation
    1617              :      * but not a narrower one.  But easy enough to handle the case by widening
    1618              :      * the parameter size. */
    1619              :     return lsbit_pos64(word);
    1620              : 
    1621              : #else
    1622              : 
    1623              :     return single_1bit_pos32(word & (~word + 1));
    1624              : 
    1625              : #endif
    1626              : 
    1627              : }
    1628              : 
    1629              : 
    1630              : /* Convert the leading zeros count to the bit position of the first set bit.
    1631              :  * This just subtracts from the highest position, 31 or 63.  But some compilers
    1632              :  * don't optimize this optimally, and so a bit of bit twiddling encourages them
    1633              :  * to do the right thing.  It turns out that subtracting a smaller non-negative
    1634              :  * number 'x' from 2**n-1 for any n is the same as taking the exclusive-or of
    1635              :  * the two numbers.  To see why, first note that the sum of any number, x, and
    1636              :  * its complement, x', is all ones.  So all ones minus x is x'.  Then note that
    1637              :  * the xor of x and all ones is x'. */
    1638              : #define LZC_TO_MSBIT_POS_(size, lzc)  ((size##SIZE * CHARBITS - 1) ^ (lzc))
    1639              : 
    1640              : #ifdef U64TYPE  /* HAS_QUAD not usable outside the core */
    1641              : 
    1642              : PERL_STATIC_INLINE unsigned
    1643              : Perl_msbit_pos64(U64 word)
    1644              : {
    1645              :     /* Find the position (0..63) of the most significant set bit in the input
    1646              :      * word */
    1647              : 
    1648              :     ASSUME(word != 0);
    1649              : 
    1650              :     /* If we can determine that the platform has a usable fast method to get
    1651              :      * this, use that */
    1652              : 
    1653              : #  if defined(PERL_CLZ_64)
    1654              : #    define PERL_HAS_FAST_GET_MSB_POS64
    1655              : 
    1656              :     return (unsigned) LZC_TO_MSBIT_POS_(U64, PERL_CLZ_64(word));
    1657              : 
    1658              : #  elif U64SIZE == 8 && defined(_WIN64)
    1659              : #    define PERL_HAS_FAST_GET_MSB_POS64
    1660              : 
    1661              :     {
    1662              :         unsigned long index;
    1663              :         _BitScanReverse64(&index, word);
    1664              :         return (unsigned)index;
    1665              :     }
    1666              : 
    1667              : #  else
    1668              : 
    1669              :     /* Here, we didn't find a fast method for finding the msb.  Fall back to
    1670              :      * making the msb the only set bit in the word, and use our function that
    1671              :      * works on words with a single bit set.
    1672              :      *
    1673              :      * Isolate the msb; http://codeforces.com/blog/entry/10330
    1674              :      *
    1675              :      * Only the most significant set bit matters.  Or'ing word with its right
    1676              :      * shift of 1 makes that bit and the next one to its right both 1.
    1677              :      * Repeating that with the right shift of 2 makes for 4 1-bits in a row.
    1678              :      * ...  We end with the msb and all to the right being 1. */
    1679              :     word |= (word >>  1);
    1680              :     word |= (word >>  2);
    1681              :     word |= (word >>  4);
    1682              :     word |= (word >>  8);
    1683              :     word |= (word >> 16);
    1684              :     word |= (word >> 32);
    1685              : 
    1686              :     /* Then subtracting the right shift by 1 clears all but the left-most of
    1687              :      * the 1 bits, which is our desired result */
    1688              :     word -= (word >> 1);
    1689              : 
    1690              :     /* Now we have a single bit set */
    1691              :     return single_1bit_pos64(word);
    1692              : 
    1693              : #  endif
    1694              : 
    1695              : }
    1696              : 
    1697              : #  define msbit_pos_uintmax_(word) msbit_pos64(word)
    1698              : #else   /* ! QUAD */
    1699              : #  define msbit_pos_uintmax_(word) msbit_pos32(word)
    1700              : #endif
    1701              : 
    1702              : PERL_STATIC_INLINE unsigned
    1703              : Perl_msbit_pos32(U32 word)
    1704              : {
    1705              :     /* Find the position (0..31) of the most significant set bit in the input
    1706              :      * word */
    1707              : 
    1708              :     ASSUME(word != 0);
    1709              : 
    1710              : #if defined(PERL_CLZ_32)
    1711              : #  define PERL_HAS_FAST_GET_MSB_POS32
    1712              : 
    1713              :     return (unsigned) LZC_TO_MSBIT_POS_(U32, PERL_CLZ_32(word));
    1714              : #elif U32SIZE == 4 && defined(WIN32)
    1715              : #  define PERL_HAS_FAST_GET_MSB_POS32
    1716              : 
    1717              :     {
    1718              :         unsigned long index;
    1719              :         _BitScanReverse(&index, word);
    1720              :         return (unsigned)index;
    1721              :     }
    1722              : 
    1723              : #elif defined(PERL_HAS_FAST_GET_MSB_POS64)
    1724              : #  define PERL_HAS_FAST_GET_MSB_POS32
    1725              : 
    1726              :     return msbit_pos64(word);   /* Let compiler widen parameter */
    1727              : 
    1728              : #else
    1729              : 
    1730              :     word |= (word >>  1);
    1731              :     word |= (word >>  2);
    1732              :     word |= (word >>  4);
    1733              :     word |= (word >>  8);
    1734              :     word |= (word >> 16);
    1735              :     word -= (word >> 1);
    1736              :     return single_1bit_pos32(word);
    1737              : 
    1738              : #endif
    1739              : 
    1740              : }
    1741              : 
    1742              : /* Note that if you are working through all the 1 bits in a word, and don't
    1743              :  * care which order you process them in, it is better to use lsbit_pos.  This
    1744              :  * is because some platforms have a fast way to find the msb but not the lsb,
    1745              :  * and others vice versa.  The code above falls back to use the single
    1746              :  * available fast method when the desired one is missing, and it is cheaper to
    1747              :  * fall back from lsb to msb than the other way around */
    1748              : 
    1749              : #if UVSIZE == U64SIZE
    1750              : #  define msbit_pos(word)  msbit_pos64(word)
    1751              : #  define lsbit_pos(word)  lsbit_pos64(word)
    1752              : #elif UVSIZE == U32SIZE
    1753              : #  define msbit_pos(word)  msbit_pos32(word)
    1754              : #  define lsbit_pos(word)  lsbit_pos32(word)
    1755              : #endif
    1756              : 
    1757              : #ifdef U64TYPE  /* HAS_QUAD not usable outside the core */
    1758              : 
    1759              : PERL_STATIC_INLINE unsigned
    1760              : Perl_single_1bit_pos64(U64 word)
    1761              : {
    1762              :     /* Given a 64-bit word known to contain all zero bits except one 1 bit,
    1763              :      * find and return the 1's position: 0..63 */
    1764              : 
    1765              : #  ifdef PERL_CORE    /* macro not exported */
    1766              :     ASSUME(isPOWER_OF_2(word));
    1767              : #  else
    1768              :     ASSUME(word && (word & (word-1)) == 0);
    1769              : #  endif
    1770              : 
    1771              :     /* The only set bit is both the most and least significant bit.  If we have
    1772              :      * a fast way of finding either one, use that.
    1773              :      *
    1774              :      * It may appear at first glance that those functions call this one, but
    1775              :      * they don't if the corresponding #define is set */
    1776              : 
    1777              : #  ifdef PERL_HAS_FAST_GET_MSB_POS64
    1778              : 
    1779              :     return msbit_pos64(word);
    1780              : 
    1781              : #  elif defined(PERL_HAS_FAST_GET_LSB_POS64)
    1782              : 
    1783              :     return lsbit_pos64(word);
    1784              : 
    1785              : #  else
    1786              : 
    1787              :     /* The position of the only set bit in a word can be quickly calculated
    1788              :      * using deBruijn sequences.  See for example
    1789              :      * https://en.wikipedia.org/wiki/De_Bruijn_sequence */
    1790              :     return PL_deBruijn_bitpos_tab64[(word * PERL_deBruijnMagic64_)
    1791              :                                                     >> PERL_deBruijnShift64_];
    1792              : #  endif
    1793              : 
    1794              : }
    1795              : 
    1796              : #endif
    1797              : 
    1798              : PERL_STATIC_INLINE unsigned
    1799              : Perl_single_1bit_pos32(U32 word)
    1800              : {
    1801              :     /* Given a 32-bit word known to contain all zero bits except one 1 bit,
    1802              :      * find and return the 1's position: 0..31 */
    1803              : 
    1804              : #ifdef PERL_CORE    /* macro not exported */
    1805              :     ASSUME(isPOWER_OF_2(word));
    1806              : #else
    1807              :     ASSUME(word && (word & (word-1)) == 0);
    1808              : #endif
    1809              : #ifdef PERL_HAS_FAST_GET_MSB_POS32
    1810              : 
    1811              :     return msbit_pos32(word);
    1812              : 
    1813              : #elif defined(PERL_HAS_FAST_GET_LSB_POS32)
    1814              : 
    1815              :     return lsbit_pos32(word);
    1816              : 
    1817              : #else
    1818              : 
    1819              :     return PL_deBruijn_bitpos_tab32[(word * PERL_deBruijnMagic32_)
    1820              :                                                     >> PERL_deBruijnShift32_];
    1821              : #endif
    1822              : 
    1823              : }
    1824              : 
    1825              : #ifndef EBCDIC
    1826              : 
    1827              : PERL_STATIC_INLINE unsigned int
    1828              : Perl_variant_byte_number(PERL_UINTMAX_T word)
    1829              : {
    1830              :     /* This returns the position in a word (0..7) of the first variant byte in
    1831              :      * it.  This is a helper function.  Note that there are no branches */
    1832              : 
    1833              :     /* Get just the msb bits of each byte */
    1834              :     word &= PERL_VARIANTS_WORD_MASK;
    1835              : 
    1836              :     /* This should only be called if we know there is a variant byte in the
    1837              :      * word */
    1838              :     assert(word);
    1839              : 
    1840              : #  if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678
    1841              : 
    1842              :     /* Bytes are stored like
    1843              :      *  Byte8 ... Byte2 Byte1
    1844              :      *  63..56...15...8 7...0
    1845              :      * so getting the lsb of the whole modified word is getting the msb of the
    1846              :      * first byte that has its msb set */
    1847              :     word = lsbit_pos_uintmax_(word);
    1848              : 
    1849              :     /* Here, word contains the position 7,15,23,...55,63 of that bit.  Convert
    1850              :      * to 0..7 */
    1851              :     return (unsigned int) ((word + 1) >> 3) - 1;
    1852              : 
    1853              : #  elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
    1854              : 
    1855              :     /* Bytes are stored like
    1856              :      *  Byte1 Byte2  ... Byte8
    1857              :      * 63..56 55..47 ... 7...0
    1858              :      * so getting the msb of the whole modified word is getting the msb of the
    1859              :      * first byte that has its msb set */
    1860              :     word = msbit_pos_uintmax_(word);
    1861              : 
    1862              :     /* Here, word contains the position 63,55,...,23,15,7 of that bit.  Convert
    1863              :      * to 0..7 */
    1864              :     word = ((word + 1) >> 3) - 1;
    1865              : 
    1866              :     /* And invert the result because of the reversed byte order on this
    1867              :      * platform */
    1868              :     word = CHARBITS - word - 1;
    1869              : 
    1870              :     return (unsigned int) word;
    1871              : 
    1872              : #  else
    1873              : #    error Unexpected byte order
    1874              : #  endif
    1875              : 
    1876              : }
    1877              : 
    1878              : #endif
    1879              : #if defined(PERL_CORE) || defined(PERL_EXT)
    1880              : 
    1881              : /*
    1882              : =for apidoc variant_under_utf8_count
    1883              : 
    1884              : This function looks at the sequence of bytes between C<s> and C<e>, which are
    1885              : assumed to be encoded in ASCII/Latin1, and returns how many of them would
    1886              : change should the string be translated into UTF-8.  Due to the nature of UTF-8,
    1887              : each of these would occupy two bytes instead of the single one in the input
    1888              : string.  Thus, this function returns the precise number of bytes the string
    1889              : would expand by when translated to UTF-8.
    1890              : 
    1891              : Unlike most of the other functions that have C<utf8> in their name, the input
    1892              : to this function is NOT a UTF-8-encoded string.  The function name is slightly
    1893              : I<odd> to emphasize this.
    1894              : 
    1895              : This function is internal to Perl because khw thinks that any XS code that
    1896              : would want this is probably operating too close to the internals.  Presenting a
    1897              : valid use case could change that.
    1898              : 
    1899              : See also
    1900              : C<L<perlapi/is_utf8_invariant_string>>
    1901              : and
    1902              : C<L<perlapi/is_utf8_invariant_string_loc>>,
    1903              : 
    1904              : =cut
    1905              : 
    1906              : */
    1907              : 
    1908              : PERL_STATIC_INLINE Size_t
    1909              : S_variant_under_utf8_count(const U8* const s, const U8* const e)
    1910              : {
    1911              :     const U8* x = s;
    1912              :     Size_t count = 0;
    1913              : 
    1914              :     PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT;
    1915              : 
    1916              : #  ifndef EBCDIC
    1917              : 
    1918              :     /* Test if the string is long enough to use word-at-a-time.  (Logic is the
    1919              :      * same as for is_utf8_invariant_string()) */
    1920              :     if ((STRLEN) (e - x) >= PERL_WORDSIZE
    1921              :                           + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
    1922              :                           - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
    1923              :     {
    1924              : 
    1925              :         /* Process per-byte until reach word boundary.  XXX This loop could be
    1926              :          * eliminated if we knew that this platform had fast unaligned reads */
    1927              :         while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
    1928              :             count += ! UTF8_IS_INVARIANT(*x++);
    1929              :         }
    1930              : 
    1931              :         /* Process per-word as long as we have at least a full word left */
    1932              :         do {    /* Commit 03c1e4ab1d6ee9062fb3f94b0ba31db6698724b1 contains an
    1933              :                    explanation of how this works */
    1934              :             PERL_UINTMAX_T increment
    1935              :                 = ((((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) >> 7)
    1936              :                       * PERL_COUNT_MULTIPLIER)
    1937              :                     >> ((PERL_WORDSIZE - 1) * CHARBITS);
    1938              :             count += (Size_t) increment;
    1939              :             x += PERL_WORDSIZE;
    1940              :         } while (x + PERL_WORDSIZE <= e);
    1941              :     }
    1942              : 
    1943              : #  endif
    1944              : 
    1945              :     /* Process per-byte */
    1946              :     while (x < e) {
    1947              :         if (! UTF8_IS_INVARIANT(*x)) {
    1948              :             count++;
    1949              :         }
    1950              : 
    1951              :         x++;
    1952              :     }
    1953              : 
    1954              :     return count;
    1955              : }
    1956              : 
    1957              : #endif
    1958              : 
    1959              :    /* Keep  these around for these files */
    1960              : #if ! defined(PERL_IN_REGEXEC_C) && ! defined(PERL_IN_UTF8_C)
    1961              : #  undef PERL_WORDSIZE
    1962              : #  undef PERL_COUNT_MULTIPLIER
    1963              : #  undef PERL_WORD_BOUNDARY_MASK
    1964              : #  undef PERL_VARIANTS_WORD_MASK
    1965              : #endif
    1966              : 
    1967              : /*
    1968              : =for apidoc is_utf8_string
    1969              : 
    1970              : Returns TRUE if the first C<len> bytes of string C<s> form a valid
    1971              : Perl-extended-UTF-8 string; returns FALSE otherwise.  If C<len> is 0, it will
    1972              : be calculated using C<strlen(s)> (which means if you use this option, that C<s>
    1973              : can't have embedded C<NUL> characters and has to have a terminating C<NUL>
    1974              : byte).  Note that all characters being ASCII constitute 'a valid UTF-8 string'.
    1975              : 
    1976              : This function considers Perl's extended UTF-8 to be valid.  That means that
    1977              : code points above Unicode, surrogates, and non-character code points are
    1978              : considered valid by this function.  Use C<L</is_strict_utf8_string>>,
    1979              : C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
    1980              : code points are considered valid.
    1981              : 
    1982              : See also
    1983              : C<L</is_utf8_invariant_string>>,
    1984              : C<L</is_utf8_invariant_string_loc>>,
    1985              : C<L</is_utf8_string_loc>>,
    1986              : C<L</is_utf8_string_loclen>>,
    1987              : C<L</is_utf8_fixed_width_buf_flags>>,
    1988              : C<L</is_utf8_fixed_width_buf_loc_flags>>,
    1989              : C<L</is_utf8_fixed_width_buf_loclen_flags>>,
    1990              : 
    1991              : =cut
    1992              : */
    1993              : 
    1994              : #define is_utf8_string(s, len)  is_utf8_string_loclen(s, len, NULL, NULL)
    1995              : 
    1996              : #if defined(PERL_CORE) || defined (PERL_EXT)
    1997              : 
    1998              : /*
    1999              : =for apidoc is_utf8_non_invariant_string
    2000              : 
    2001              : Returns TRUE if L<perlapi/is_utf8_invariant_string> returns FALSE for the first
    2002              : C<len> bytes of the string C<s>, but they are, nonetheless, legal Perl-extended
    2003              : UTF-8; otherwise returns FALSE.
    2004              : 
    2005              : A TRUE return means that at least one code point represented by the sequence
    2006              : either is a wide character not representable as a single byte, or the
    2007              : representation differs depending on whether the sequence is encoded in UTF-8 or
    2008              : not.
    2009              : 
    2010              : See also
    2011              : C<L<perlapi/is_utf8_invariant_string>>,
    2012              : C<L<perlapi/is_utf8_string>>
    2013              : 
    2014              : =cut
    2015              : 
    2016              : This is commonly used to determine if a SV's UTF-8 flag should be turned on.
    2017              : It generally needn't be if its string is entirely UTF-8 invariant, and it
    2018              : shouldn't be if it otherwise contains invalid UTF-8.
    2019              : 
    2020              : It is an internal function because khw thinks that XS code shouldn't be working
    2021              : at this low a level.  A valid use case could change that.
    2022              : 
    2023              : */
    2024              : 
    2025              : PERL_STATIC_INLINE bool
    2026              : Perl_is_utf8_non_invariant_string(const U8* const s, STRLEN len)
    2027              : {
    2028              :     const U8 * first_variant;
    2029              : 
    2030              :     PERL_ARGS_ASSERT_IS_UTF8_NON_INVARIANT_STRING;
    2031              : 
    2032              :     if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
    2033              :         return FALSE;
    2034              :     }
    2035              : 
    2036              :     return is_utf8_string(first_variant, len - (first_variant - s));
    2037              : }
    2038              : 
    2039              : #endif
    2040              : 
    2041              : /*
    2042              : =for apidoc is_strict_utf8_string
    2043              : 
    2044              : Returns TRUE if the first C<len> bytes of string C<s> form a valid
    2045              : UTF-8-encoded string that is fully interchangeable by any application using
    2046              : Unicode rules; otherwise it returns FALSE.  If C<len> is 0, it will be
    2047              : calculated using C<strlen(s)> (which means if you use this option, that C<s>
    2048              : can't have embedded C<NUL> characters and has to have a terminating C<NUL>
    2049              : byte).  Note that all characters being ASCII constitute 'a valid UTF-8 string'.
    2050              : 
    2051              : This function returns FALSE for strings containing any
    2052              : code points above the Unicode max of 0x10FFFF, surrogate code points, or
    2053              : non-character code points.
    2054              : 
    2055              : See also
    2056              : C<L</is_utf8_invariant_string>>,
    2057              : C<L</is_utf8_invariant_string_loc>>,
    2058              : C<L</is_utf8_string>>,
    2059              : C<L</is_utf8_string_flags>>,
    2060              : C<L</is_utf8_string_loc>>,
    2061              : C<L</is_utf8_string_loc_flags>>,
    2062              : C<L</is_utf8_string_loclen>>,
    2063              : C<L</is_utf8_string_loclen_flags>>,
    2064              : C<L</is_utf8_fixed_width_buf_flags>>,
    2065              : C<L</is_utf8_fixed_width_buf_loc_flags>>,
    2066              : C<L</is_utf8_fixed_width_buf_loclen_flags>>,
    2067              : C<L</is_strict_utf8_string_loc>>,
    2068              : C<L</is_strict_utf8_string_loclen>>,
    2069              : C<L</is_c9strict_utf8_string>>,
    2070              : C<L</is_c9strict_utf8_string_loc>>,
    2071              : and
    2072              : C<L</is_c9strict_utf8_string_loclen>>.
    2073              : 
    2074              : =cut
    2075              : */
    2076              : 
    2077              : #define is_strict_utf8_string(s, len)  is_strict_utf8_string_loclen(s, len, NULL, NULL)
    2078              : 
    2079              : /*
    2080              : =for apidoc is_c9strict_utf8_string
    2081              : 
    2082              : Returns TRUE if the first C<len> bytes of string C<s> form a valid
    2083              : UTF-8-encoded string that conforms to
    2084              : L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
    2085              : otherwise it returns FALSE.  If C<len> is 0, it will be calculated using
    2086              : C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
    2087              : C<NUL> characters and has to have a terminating C<NUL> byte).  Note that all
    2088              : characters being ASCII constitute 'a valid UTF-8 string'.
    2089              : 
    2090              : This function returns FALSE for strings containing any code points above the
    2091              : Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
    2092              : code points per
    2093              : L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
    2094              : 
    2095              : See also
    2096              : C<L</is_utf8_invariant_string>>,
    2097              : C<L</is_utf8_invariant_string_loc>>,
    2098              : C<L</is_utf8_string>>,
    2099              : C<L</is_utf8_string_flags>>,
    2100              : C<L</is_utf8_string_loc>>,
    2101              : C<L</is_utf8_string_loc_flags>>,
    2102              : C<L</is_utf8_string_loclen>>,
    2103              : C<L</is_utf8_string_loclen_flags>>,
    2104              : C<L</is_utf8_fixed_width_buf_flags>>,
    2105              : C<L</is_utf8_fixed_width_buf_loc_flags>>,
    2106              : C<L</is_utf8_fixed_width_buf_loclen_flags>>,
    2107              : C<L</is_strict_utf8_string>>,
    2108              : C<L</is_strict_utf8_string_loc>>,
    2109              : C<L</is_strict_utf8_string_loclen>>,
    2110              : C<L</is_c9strict_utf8_string_loc>>,
    2111              : and
    2112              : C<L</is_c9strict_utf8_string_loclen>>.
    2113              : 
    2114              : =cut
    2115              : */
    2116              : 
    2117              : #define is_c9strict_utf8_string(s, len)  is_c9strict_utf8_string_loclen(s, len, NULL, 0)
    2118              : 
    2119              : /*
    2120              : =for apidoc is_utf8_string_flags
    2121              : 
    2122              : Returns TRUE if the first C<len> bytes of string C<s> form a valid
    2123              : UTF-8 string, subject to the restrictions imposed by C<flags>;
    2124              : returns FALSE otherwise.  If C<len> is 0, it will be calculated
    2125              : using C<strlen(s)> (which means if you use this option, that C<s> can't have
    2126              : embedded C<NUL> characters and has to have a terminating C<NUL> byte).  Note
    2127              : that all characters being ASCII constitute 'a valid UTF-8 string'.
    2128              : 
    2129              : If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
    2130              : C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
    2131              : as C<L</is_strict_utf8_string>>; and if C<flags> is
    2132              : C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
    2133              : C<L</is_c9strict_utf8_string>>.  Otherwise C<flags> may be any
    2134              : combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
    2135              : C<L</utf8n_to_uvchr>>, with the same meanings.
    2136              : 
    2137              : See also
    2138              : C<L</is_utf8_invariant_string>>,
    2139              : C<L</is_utf8_invariant_string_loc>>,
    2140              : C<L</is_utf8_string>>,
    2141              : C<L</is_utf8_string_loc>>,
    2142              : C<L</is_utf8_string_loc_flags>>,
    2143              : C<L</is_utf8_string_loclen>>,
    2144              : C<L</is_utf8_string_loclen_flags>>,
    2145              : C<L</is_utf8_fixed_width_buf_flags>>,
    2146              : C<L</is_utf8_fixed_width_buf_loc_flags>>,
    2147              : C<L</is_utf8_fixed_width_buf_loclen_flags>>,
    2148              : C<L</is_strict_utf8_string>>,
    2149              : C<L</is_strict_utf8_string_loc>>,
    2150              : C<L</is_strict_utf8_string_loclen>>,
    2151              : C<L</is_c9strict_utf8_string>>,
    2152              : C<L</is_c9strict_utf8_string_loc>>,
    2153              : and
    2154              : C<L</is_c9strict_utf8_string_loclen>>.
    2155              : 
    2156              : =cut
    2157              : */
    2158              : 
    2159              : PERL_STATIC_INLINE bool
    2160              : Perl_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags)
    2161              : {
    2162              :     const U8 * first_variant;
    2163              : 
    2164              :     PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
    2165              :     assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
    2166              :                           |UTF8_DISALLOW_PERL_EXTENDED)));
    2167              : 
    2168              :     if (len == 0) {
    2169              :         len = strlen((const char *)s);
    2170              :     }
    2171              : 
    2172              :     if (flags == 0) {
    2173              :         return is_utf8_string(s, len);
    2174              :     }
    2175              : 
    2176              :     if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
    2177              :                                         == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
    2178              :     {
    2179              :         return is_strict_utf8_string(s, len);
    2180              :     }
    2181              : 
    2182              :     if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
    2183              :                                        == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
    2184              :     {
    2185              :         return is_c9strict_utf8_string(s, len);
    2186              :     }
    2187              : 
    2188              :     if (! is_utf8_invariant_string_loc(s, len, &first_variant)) {
    2189              :         const U8* const send = s + len;
    2190              :         const U8* x = first_variant;
    2191              : 
    2192              :         while (x < send) {
    2193              :             STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
    2194              :             if (UNLIKELY(! cur_len)) {
    2195              :                 return FALSE;
    2196              :             }
    2197              :             x += cur_len;
    2198              :         }
    2199              :     }
    2200              : 
    2201              :     return TRUE;
    2202              : }
    2203              : 
    2204              : /*
    2205              : 
    2206              : =for apidoc is_utf8_string_loc
    2207              : 
    2208              : Like C<L</is_utf8_string>> but stores the location of the failure (in the
    2209              : case of "utf8ness failure") or the location C<s>+C<len> (in the case of
    2210              : "utf8ness success") in the C<ep> pointer.
    2211              : 
    2212              : See also C<L</is_utf8_string_loclen>>.
    2213              : 
    2214              : =cut
    2215              : */
    2216              : 
    2217              : #define is_utf8_string_loc(s, len, ep)  is_utf8_string_loclen(s, len, ep, 0)
    2218              : 
    2219              : /*
    2220              : 
    2221              : =for apidoc is_utf8_string_loclen
    2222              : 
    2223              : Like C<L</is_utf8_string>> but stores the location of the failure (in the
    2224              : case of "utf8ness failure") or the location C<s>+C<len> (in the case of
    2225              : "utf8ness success") in the C<ep> pointer, and the number of UTF-8
    2226              : encoded characters in the C<el> pointer.
    2227              : 
    2228              : See also C<L</is_utf8_string_loc>>.
    2229              : 
    2230              : =cut
    2231              : */
    2232              : 
    2233              : PERL_STATIC_INLINE bool
    2234              : Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
    2235              : {
    2236              :     const U8 * first_variant;
    2237              : 
    2238              :     PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
    2239              : 
    2240              :     if (len == 0) {
    2241              :         len = strlen((const char *) s);
    2242              :     }
    2243              : 
    2244              :     if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
    2245              :         if (el)
    2246              :             *el = len;
    2247              : 
    2248              :         if (ep) {
    2249              :             *ep = s + len;
    2250              :         }
    2251              : 
    2252              :         return TRUE;
    2253              :     }
    2254              : 
    2255              :     {
    2256              :         const U8* const send = s + len;
    2257              :         const U8* x = first_variant;
    2258              :         STRLEN outlen = first_variant - s;
    2259              : 
    2260              :         while (x < send) {
    2261              :             const STRLEN cur_len = isUTF8_CHAR(x, send);
    2262              :             if (UNLIKELY(! cur_len)) {
    2263              :                 break;
    2264              :             }
    2265              :             x += cur_len;
    2266              :             outlen++;
    2267              :         }
    2268              : 
    2269              :         if (el)
    2270              :             *el = outlen;
    2271              : 
    2272              :         if (ep) {
    2273              :             *ep = x;
    2274              :         }
    2275              : 
    2276              :         return (x == send);
    2277              :     }
    2278              : }
    2279              : 
    2280              : /* The perl core arranges to never call the DFA below without there being at
    2281              :  * least one byte available to look at.  This allows the DFA to use a do {}
    2282              :  * while loop which means that calling it with a UTF-8 invariant has a single
    2283              :  * conditional, same as the calling code checking for invariance ahead of time.
    2284              :  * And having the calling code remove that conditional speeds up by that
    2285              :  * conditional, the case where it wasn't invariant.  So there's no reason to
    2286              :  * check before caling this.
    2287              :  *
    2288              :  * But we don't know this for non-core calls, so have to retain the check for
    2289              :  * them. */
    2290              : #ifdef PERL_CORE
    2291              : #  define PERL_NON_CORE_CHECK_EMPTY(s,e)  assert((e) > (s))
    2292              : #else
    2293              : #  define PERL_NON_CORE_CHECK_EMPTY(s,e)  if ((e) <= (s)) return FALSE
    2294              : #endif
    2295              : 
    2296              : /*
    2297              :  * DFA for checking input is valid UTF-8 syntax.
    2298              :  *
    2299              :  * This uses adaptations of the table and algorithm given in
    2300              :  * https://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which provides comprehensive
    2301              :  * documentation of the original version.  A copyright notice for the original
    2302              :  * version is given at the beginning of this file.  The Perl adaptations are
    2303              :  * documented at the definition of PL_extended_utf8_dfa_tab[].
    2304              :  *
    2305              :  * This dfa is fast.  There are three exit conditions:
    2306              :  *  1) a well-formed code point, acceptable to the table
    2307              :  *  2) the beginning bytes of an incomplete character, whose completion might
    2308              :  *     or might not be acceptable
    2309              :  *  3) unacceptable to the table.  Some of the adaptations have certain,
    2310              :  *     hopefully less likely to occur, legal inputs be unacceptable to the
    2311              :  *     table, so these must be sorted out afterwards.
    2312              :  *
    2313              :  * This macro is a complete implementation of the code executing the DFA.  It
    2314              :  * is passed the input sequence bounds and the table to use, and what to do
    2315              :  * for each of the exit conditions.  There are three canned actions, likely to
    2316              :  * be the ones you want:
    2317              :  *      DFA_RETURN_SUCCESS_
    2318              :  *      DFA_RETURN_FAILURE_
    2319              :  *      DFA_GOTO_TEASE_APART_FF_
    2320              :  *
    2321              :  * You pass a parameter giving the action to take for each of the three
    2322              :  * possible exit conditions:
    2323              :  *
    2324              :  * 'accept_action'  This is executed when the DFA accepts the input.
    2325              :  *                  DFA_RETURN_SUCCESS_ is the most likely candidate.
    2326              :  * 'reject_action'  This is executed when the DFA rejects the input.
    2327              :  *                  DFA_RETURN_FAILURE_ is a candidate, or 'goto label' where
    2328              :  *                  you have written code to distinguish the rejecting state
    2329              :  *                  results.  Because it happens in several places, and
    2330              :  *                  involves #ifdefs, the special action
    2331              :  *                  DFA_GOTO_TEASE_APART_FF_ is what you want with
    2332              :  *                  PL_extended_utf8_dfa_tab.  On platforms without
    2333              :  *                  EXTRA_LONG_UTF8, there is no need to tease anything apart,
    2334              :  *                  so this evaluates to DFA_RETURN_FAILURE_; otherwise you
    2335              :  *                  need to have a label 'tease_apart_FF' that it will transfer
    2336              :  *                  to.
    2337              :  * 'incomplete_char_action'  This is executed when the DFA ran off the end
    2338              :  *                  before accepting or rejecting the input.
    2339              :  *                  DFA_RETURN_FAILURE_ is the likely action, but you could
    2340              :  *                  have a 'goto', or NOOP.  In the latter case the DFA drops
    2341              :  *                  off the end, and you place your code to handle this case
    2342              :  *                  immediately after it.
    2343              :  */
    2344              : 
    2345              : #define DFA_RETURN_SUCCESS_      return s - s0
    2346              : #define DFA_RETURN_FAILURE_      return 0
    2347              : #ifdef HAS_EXTRA_LONG_UTF8
    2348              : #  define DFA_TEASE_APART_FF_  goto tease_apart_FF
    2349              : #else
    2350              : #  define DFA_TEASE_APART_FF_  DFA_RETURN_FAILURE_
    2351              : #endif
    2352              : 
    2353              : #define PERL_IS_UTF8_CHAR_DFA(s0, e, dfa_tab,                               \
    2354              :                               accept_action,                                \
    2355              :                               reject_action,                                \
    2356              :                               incomplete_char_action)                       \
    2357              :     STMT_START {                                                            \
    2358              :         const U8 * s = s0;                                                  \
    2359              :         const U8 * e_ = e;                                                  \
    2360              :         UV state = 0;                                                       \
    2361              :                                                                             \
    2362              :         PERL_NON_CORE_CHECK_EMPTY(s, e_);                                   \
    2363              :                                                                             \
    2364              :         do {                                                                \
    2365              :             state = dfa_tab[256 + state + dfa_tab[*s]];                     \
    2366              :             s++;                                                            \
    2367              :                                                                             \
    2368              :             if (state == 0) {   /* Accepting state */                       \
    2369              :                 accept_action;                                              \
    2370              :             }                                                               \
    2371              :                                                                             \
    2372              :             if (UNLIKELY(state == 1)) { /* Rejecting state */               \
    2373              :                 reject_action;                                              \
    2374              :             }                                                               \
    2375              :         } while (s < e_);                                                   \
    2376              :                                                                             \
    2377              :         /* Here, dropped out of loop before end-of-char */                  \
    2378              :         incomplete_char_action;                                             \
    2379              :     } STMT_END
    2380              : 
    2381              : 
    2382              : /*
    2383              : 
    2384              : =for apidoc isUTF8_CHAR
    2385              : 
    2386              : Evaluates to non-zero if the first few bytes of the string starting at C<s> and
    2387              : looking no further than S<C<e - 1>> are well-formed UTF-8, as extended by Perl,
    2388              : that represents some code point; otherwise it evaluates to 0.  If non-zero, the
    2389              : value gives how many bytes starting at C<s> comprise the code point's
    2390              : representation.  Any bytes remaining before C<e>, but beyond the ones needed to
    2391              : form the first code point in C<s>, are not examined.
    2392              : 
    2393              : The code point can be any that will fit in an IV on this machine, using Perl's
    2394              : extension to official UTF-8 to represent those higher than the Unicode maximum
    2395              : of 0x10FFFF.  That means that this macro is used to efficiently decide if the
    2396              : next few bytes in C<s> is legal UTF-8 for a single character.
    2397              : 
    2398              : Use C<L</isSTRICT_UTF8_CHAR>> to restrict the acceptable code points to those
    2399              : defined by Unicode to be fully interchangeable across applications;
    2400              : C<L</isC9_STRICT_UTF8_CHAR>> to use the L<Unicode Corrigendum
    2401              : #9|http://www.unicode.org/versions/corrigendum9.html> definition of allowable
    2402              : code points; and C<L</isUTF8_CHAR_flags>> for a more customized definition.
    2403              : 
    2404              : Use C<L</is_utf8_string>>, C<L</is_utf8_string_loc>>, and
    2405              : C<L</is_utf8_string_loclen>> to check entire strings.
    2406              : 
    2407              : Note also that a UTF-8 "invariant" character (i.e. ASCII on non-EBCDIC
    2408              : machines) is a valid UTF-8 character.
    2409              : 
    2410              : =cut
    2411              : 
    2412              : This uses an adaptation of the table and algorithm given in
    2413              : https://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which provides comprehensive
    2414              : documentation of the original version.  A copyright notice for the original
    2415              : version is given at the beginning of this file.  The Perl adaptation is
    2416              : documented at the definition of PL_extended_utf8_dfa_tab[].
    2417              : */
    2418              : 
    2419              : PERL_STATIC_INLINE Size_t
    2420              : Perl_isUTF8_CHAR(const U8 * const s0, const U8 * const e)
    2421              : {
    2422              :     PERL_ARGS_ASSERT_ISUTF8_CHAR;
    2423              : 
    2424              :     PERL_IS_UTF8_CHAR_DFA(s0, e, PL_extended_utf8_dfa_tab,
    2425              :                           DFA_RETURN_SUCCESS_,
    2426              :                           DFA_TEASE_APART_FF_,
    2427              :                           DFA_RETURN_FAILURE_);
    2428              : 
    2429              :     /* Here, we didn't return success, but dropped out of the loop.  In the
    2430              :      * case of PL_extended_utf8_dfa_tab, this means the input is either
    2431              :      * malformed, or the start byte was FF on a platform that the dfa doesn't
    2432              :      * handle FF's.  Call a helper function. */
    2433              : 
    2434              : #ifdef HAS_EXTRA_LONG_UTF8
    2435              : 
    2436              :   tease_apart_FF:
    2437              : 
    2438              :     /* In the case of PL_extended_utf8_dfa_tab, getting here means the input is
    2439              :      * either malformed, or was for the largest possible start byte, which we
    2440              :      * now check, not inline */
    2441              :     if (*s0 != I8_TO_NATIVE_UTF8(0xFF)) {
    2442              :         return 0;
    2443              :     }
    2444              : 
    2445              :     return is_utf8_FF_helper_(s0, e,
    2446              :                               FALSE /* require full, not partial char */
    2447              :                              );
    2448              : #endif
    2449              : 
    2450              : }
    2451              : 
    2452              : /*
    2453              : 
    2454              : =for apidoc isSTRICT_UTF8_CHAR
    2455              : 
    2456              : Evaluates to non-zero if the first few bytes of the string starting at C<s> and
    2457              : looking no further than S<C<e - 1>> are well-formed UTF-8 that represents some
    2458              : Unicode code point completely acceptable for open interchange between all
    2459              : applications; otherwise it evaluates to 0.  If non-zero, the value gives how
    2460              : many bytes starting at C<s> comprise the code point's representation.  Any
    2461              : bytes remaining before C<e>, but beyond the ones needed to form the first code
    2462              : point in C<s>, are not examined.
    2463              : 
    2464              : The largest acceptable code point is the Unicode maximum 0x10FFFF, and must not
    2465              : be a surrogate nor a non-character code point.  Thus this excludes any code
    2466              : point from Perl's extended UTF-8.
    2467              : 
    2468              : This is used to efficiently decide if the next few bytes in C<s> is
    2469              : legal Unicode-acceptable UTF-8 for a single character.
    2470              : 
    2471              : Use C<L</isC9_STRICT_UTF8_CHAR>> to use the L<Unicode Corrigendum
    2472              : #9|http://www.unicode.org/versions/corrigendum9.html> definition of allowable
    2473              : code points; C<L</isUTF8_CHAR>> to check for Perl's extended UTF-8;
    2474              : and C<L</isUTF8_CHAR_flags>> for a more customized definition.
    2475              : 
    2476              : Use C<L</is_strict_utf8_string>>, C<L</is_strict_utf8_string_loc>>, and
    2477              : C<L</is_strict_utf8_string_loclen>> to check entire strings.
    2478              : 
    2479              : =cut
    2480              : 
    2481              : This uses an adaptation of the tables and algorithm given in
    2482              : https://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which provides comprehensive
    2483              : documentation of the original version.  A copyright notice for the original
    2484              : version is given at the beginning of this file.  The Perl adaptation is
    2485              : documented at the definition of strict_extended_utf8_dfa_tab[].
    2486              : 
    2487              : */
    2488              : 
    2489              : PERL_STATIC_INLINE Size_t
    2490              : Perl_isSTRICT_UTF8_CHAR(const U8 * const s0, const U8 * const e)
    2491              : {
    2492              :     PERL_ARGS_ASSERT_ISSTRICT_UTF8_CHAR;
    2493              : 
    2494              :     PERL_IS_UTF8_CHAR_DFA(s0, e, PL_strict_utf8_dfa_tab,
    2495              :                           DFA_RETURN_SUCCESS_,
    2496              :                           goto check_hanguls,
    2497              :                           DFA_RETURN_FAILURE_);
    2498              :   check_hanguls:
    2499              : 
    2500              :     /* Here, we didn't return success, but dropped out of the loop.  In the
    2501              :      * case of PL_strict_utf8_dfa_tab, this means the input is either
    2502              :      * malformed, or was for certain Hanguls; handle them specially */
    2503              : 
    2504              :     /* The dfa above drops out for incomplete or illegal inputs, and certain
    2505              :      * legal Hanguls; check and return accordingly */
    2506              :     return is_HANGUL_ED_utf8_safe(s0, e);
    2507              : }
    2508              : 
    2509              : /*
    2510              : 
    2511              : =for apidoc isC9_STRICT_UTF8_CHAR
    2512              : 
    2513              : Evaluates to non-zero if the first few bytes of the string starting at C<s> and
    2514              : looking no further than S<C<e - 1>> are well-formed UTF-8 that represents some
    2515              : Unicode non-surrogate code point; otherwise it evaluates to 0.  If non-zero,
    2516              : the value gives how many bytes starting at C<s> comprise the code point's
    2517              : representation.  Any bytes remaining before C<e>, but beyond the ones needed to
    2518              : form the first code point in C<s>, are not examined.
    2519              : 
    2520              : The largest acceptable code point is the Unicode maximum 0x10FFFF.  This
    2521              : differs from C<L</isSTRICT_UTF8_CHAR>> only in that it accepts non-character
    2522              : code points.  This corresponds to
    2523              : L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
    2524              : which said that non-character code points are merely discouraged rather than
    2525              : completely forbidden in open interchange.  See
    2526              : L<perlunicode/Noncharacter code points>.
    2527              : 
    2528              : Use C<L</isUTF8_CHAR>> to check for Perl's extended UTF-8; and
    2529              : C<L</isUTF8_CHAR_flags>> for a more customized definition.
    2530              : 
    2531              : Use C<L</is_c9strict_utf8_string>>, C<L</is_c9strict_utf8_string_loc>>, and
    2532              : C<L</is_c9strict_utf8_string_loclen>> to check entire strings.
    2533              : 
    2534              : =cut
    2535              : 
    2536              : This uses an adaptation of the tables and algorithm given in
    2537              : https://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which provides comprehensive
    2538              : documentation of the original version.  A copyright notice for the original
    2539              : version is given at the beginning of this file.  The Perl adaptation is
    2540              : documented at the definition of PL_c9_utf8_dfa_tab[].
    2541              : 
    2542              : */
    2543              : 
    2544              : PERL_STATIC_INLINE Size_t
    2545              : Perl_isC9_STRICT_UTF8_CHAR(const U8 * const s0, const U8 * const e)
    2546              : {
    2547              :     PERL_ARGS_ASSERT_ISC9_STRICT_UTF8_CHAR;
    2548              : 
    2549              :     PERL_IS_UTF8_CHAR_DFA(s0, e, PL_c9_utf8_dfa_tab,
    2550              :                           DFA_RETURN_SUCCESS_,
    2551              :                           DFA_RETURN_FAILURE_,
    2552              :                           DFA_RETURN_FAILURE_);
    2553              : }
    2554              : 
    2555              : /*
    2556              : 
    2557              : =for apidoc is_strict_utf8_string_loc
    2558              : 
    2559              : Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
    2560              : case of "utf8ness failure") or the location C<s>+C<len> (in the case of
    2561              : "utf8ness success") in the C<ep> pointer.
    2562              : 
    2563              : See also C<L</is_strict_utf8_string_loclen>>.
    2564              : 
    2565              : =cut
    2566              : */
    2567              : 
    2568              : #define is_strict_utf8_string_loc(s, len, ep)                               \
    2569              :                                 is_strict_utf8_string_loclen(s, len, ep, 0)
    2570              : 
    2571              : /*
    2572              : 
    2573              : =for apidoc is_strict_utf8_string_loclen
    2574              : 
    2575              : Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
    2576              : case of "utf8ness failure") or the location C<s>+C<len> (in the case of
    2577              : "utf8ness success") in the C<ep> pointer, and the number of UTF-8
    2578              : encoded characters in the C<el> pointer.
    2579              : 
    2580              : See also C<L</is_strict_utf8_string_loc>>.
    2581              : 
    2582              : =cut
    2583              : */
    2584              : 
    2585              : PERL_STATIC_INLINE bool
    2586              : Perl_is_strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
    2587              : {
    2588              :     const U8 * first_variant;
    2589              : 
    2590              :     PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
    2591              : 
    2592              :     if (len == 0) {
    2593              :         len = strlen((const char *) s);
    2594              :     }
    2595              : 
    2596              :     if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
    2597              :         if (el)
    2598              :             *el = len;
    2599              : 
    2600              :         if (ep) {
    2601              :             *ep = s + len;
    2602              :         }
    2603              : 
    2604              :         return TRUE;
    2605              :     }
    2606              : 
    2607              :     {
    2608              :         const U8* const send = s + len;
    2609              :         const U8* x = first_variant;
    2610              :         STRLEN outlen = first_variant - s;
    2611              : 
    2612              :         while (x < send) {
    2613              :             const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
    2614              :             if (UNLIKELY(! cur_len)) {
    2615              :                 break;
    2616              :             }
    2617              :             x += cur_len;
    2618              :             outlen++;
    2619              :         }
    2620              : 
    2621              :         if (el)
    2622              :             *el = outlen;
    2623              : 
    2624              :         if (ep) {
    2625              :             *ep = x;
    2626              :         }
    2627              : 
    2628              :         return (x == send);
    2629              :     }
    2630              : }
    2631              : 
    2632              : /*
    2633              : 
    2634              : =for apidoc is_c9strict_utf8_string_loc
    2635              : 
    2636              : Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
    2637              : the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
    2638              : "utf8ness success") in the C<ep> pointer.
    2639              : 
    2640              : See also C<L</is_c9strict_utf8_string_loclen>>.
    2641              : 
    2642              : =cut
    2643              : */
    2644              : 
    2645              : #define is_c9strict_utf8_string_loc(s, len, ep)                             \
    2646              :                             is_c9strict_utf8_string_loclen(s, len, ep, 0)
    2647              : 
    2648              : /*
    2649              : 
    2650              : =for apidoc is_c9strict_utf8_string_loclen
    2651              : 
    2652              : Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
    2653              : the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
    2654              : "utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
    2655              : characters in the C<el> pointer.
    2656              : 
    2657              : See also C<L</is_c9strict_utf8_string_loc>>.
    2658              : 
    2659              : =cut
    2660              : */
    2661              : 
    2662              : PERL_STATIC_INLINE bool
    2663              : Perl_is_c9strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
    2664              : {
    2665              :     const U8 * first_variant;
    2666              : 
    2667              :     PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
    2668              : 
    2669              :     if (len == 0) {
    2670              :         len = strlen((const char *) s);
    2671              :     }
    2672              : 
    2673              :     if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
    2674              :         if (el)
    2675              :             *el = len;
    2676              : 
    2677              :         if (ep) {
    2678              :             *ep = s + len;
    2679              :         }
    2680              : 
    2681              :         return TRUE;
    2682              :     }
    2683              : 
    2684              :     {
    2685              :         const U8* const send = s + len;
    2686              :         const U8* x = first_variant;
    2687              :         STRLEN outlen = first_variant - s;
    2688              : 
    2689              :         while (x < send) {
    2690              :             const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
    2691              :             if (UNLIKELY(! cur_len)) {
    2692              :                 break;
    2693              :             }
    2694              :             x += cur_len;
    2695              :             outlen++;
    2696              :         }
    2697              : 
    2698              :         if (el)
    2699              :             *el = outlen;
    2700              : 
    2701              :         if (ep) {
    2702              :             *ep = x;
    2703              :         }
    2704              : 
    2705              :         return (x == send);
    2706              :     }
    2707              : }
    2708              : 
    2709              : /*
    2710              : 
    2711              : =for apidoc is_utf8_string_loc_flags
    2712              : 
    2713              : Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
    2714              : case of "utf8ness failure") or the location C<s>+C<len> (in the case of
    2715              : "utf8ness success") in the C<ep> pointer.
    2716              : 
    2717              : See also C<L</is_utf8_string_loclen_flags>>.
    2718              : 
    2719              : =cut
    2720              : */
    2721              : 
    2722              : #define is_utf8_string_loc_flags(s, len, ep, flags)                         \
    2723              :                         is_utf8_string_loclen_flags(s, len, ep, 0, flags)
    2724              : 
    2725              : 
    2726              : /* The above 3 actual functions could have been moved into the more general one
    2727              :  * just below, and made #defines that call it with the right 'flags'.  They are
    2728              :  * currently kept separate to increase their chances of getting inlined */
    2729              : 
    2730              : /*
    2731              : 
    2732              : =for apidoc is_utf8_string_loclen_flags
    2733              : 
    2734              : Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
    2735              : case of "utf8ness failure") or the location C<s>+C<len> (in the case of
    2736              : "utf8ness success") in the C<ep> pointer, and the number of UTF-8
    2737              : encoded characters in the C<el> pointer.
    2738              : 
    2739              : See also C<L</is_utf8_string_loc_flags>>.
    2740              : 
    2741              : =cut
    2742              : */
    2743              : 
    2744              : PERL_STATIC_INLINE bool
    2745              : Perl_is_utf8_string_loclen_flags(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
    2746              : {
    2747              :     const U8 * first_variant;
    2748              : 
    2749              :     PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
    2750              :     assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
    2751              :                           |UTF8_DISALLOW_PERL_EXTENDED)));
    2752              : 
    2753              :     if (flags == 0) {
    2754              :         return is_utf8_string_loclen(s, len, ep, el);
    2755              :     }
    2756              : 
    2757              :     if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
    2758              :                                         == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
    2759              :     {
    2760              :         return is_strict_utf8_string_loclen(s, len, ep, el);
    2761              :     }
    2762              : 
    2763              :     if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
    2764              :                                     == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
    2765              :     {
    2766              :         return is_c9strict_utf8_string_loclen(s, len, ep, el);
    2767              :     }
    2768              : 
    2769              :     if (len == 0) {
    2770              :         len = strlen((const char *) s);
    2771              :     }
    2772              : 
    2773              :     if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
    2774              :         if (el)
    2775              :             *el = len;
    2776              : 
    2777              :         if (ep) {
    2778              :             *ep = s + len;
    2779              :         }
    2780              : 
    2781              :         return TRUE;
    2782              :     }
    2783              : 
    2784              :     {
    2785              :         const U8* send = s + len;
    2786              :         const U8* x = first_variant;
    2787              :         STRLEN outlen = first_variant - s;
    2788              : 
    2789              :         while (x < send) {
    2790              :             const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
    2791              :             if (UNLIKELY(! cur_len)) {
    2792              :                 break;
    2793              :             }
    2794              :             x += cur_len;
    2795              :             outlen++;
    2796              :         }
    2797              : 
    2798              :         if (el)
    2799              :             *el = outlen;
    2800              : 
    2801              :         if (ep) {
    2802              :             *ep = x;
    2803              :         }
    2804              : 
    2805              :         return (x == send);
    2806              :     }
    2807              : }
    2808              : 
    2809              : /*
    2810              : =for apidoc utf8_distance
    2811              : 
    2812              : Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
    2813              : and C<b>.
    2814              : 
    2815              : WARNING: use only if you *know* that the pointers point inside the
    2816              : same UTF-8 buffer.
    2817              : 
    2818              : =cut
    2819              : */
    2820              : 
    2821              : PERL_STATIC_INLINE IV
    2822              : Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
    2823              : {
    2824              :     PERL_ARGS_ASSERT_UTF8_DISTANCE;
    2825              : 
    2826              :     return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
    2827              : }
    2828              : 
    2829              : /*
    2830              : =for apidoc utf8_hop
    2831              : 
    2832              : Return the UTF-8 pointer C<s> displaced by C<off> characters, either
    2833              : forward (if C<off> is positive) or backward (if negative).  C<s> does not need
    2834              : to be pointing to the starting byte of a character.  If it isn't, one count of
    2835              : C<off> will be used up to get to the start of the next character for forward
    2836              : hops, and to the start of the current character for negative ones.
    2837              : 
    2838              : WARNING: Prefer L</utf8_hop_safe> to this one.
    2839              : 
    2840              : Do NOT use this function unless you B<know> C<off> is within
    2841              : the UTF-8 data pointed to by C<s> B<and> that on entry C<s> is aligned
    2842              : on the first byte of a character or just after the last byte of a character.
    2843              : 
    2844              : =cut
    2845              : */
    2846              : 
    2847              : PERL_STATIC_INLINE U8 *
    2848              : Perl_utf8_hop(const U8 *s, SSize_t off)
    2849              : {
    2850              :     PERL_ARGS_ASSERT_UTF8_HOP;
    2851              : 
    2852              :     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
    2853              :      * the XXX bitops (especially ~) can create illegal UTF-8.
    2854              :      * In other words: in Perl UTF-8 is not just for Unicode. */
    2855              : 
    2856              :     if (off > 0) {
    2857              : 
    2858              :         /* Get to next non-continuation byte */
    2859              :         if (UNLIKELY(UTF8_IS_CONTINUATION(*s))) {
    2860              :             do {
    2861              :                 s++;
    2862              :             }
    2863              :             while (UTF8_IS_CONTINUATION(*s));
    2864              :             off--;
    2865              :         }
    2866              : 
    2867              :         while (off--)
    2868              :             s += UTF8SKIP(s);
    2869              :     }
    2870              :     else {
    2871              :         while (off++) {
    2872              :             s--;
    2873              :             while (UTF8_IS_CONTINUATION(*s))
    2874              :                 s--;
    2875              :         }
    2876              :     }
    2877              : 
    2878              :     GCC_DIAG_IGNORE(-Wcast-qual)
    2879              :     return (U8 *)s;
    2880              :     GCC_DIAG_RESTORE
    2881              : }
    2882              : 
    2883              : /*
    2884              : =for apidoc utf8_hop_forward
    2885              : 
    2886              : Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
    2887              : forward.  C<s> does not need to be pointing to the starting byte of a
    2888              : character.  If it isn't, one count of C<off> will be used up to get to the
    2889              : start of the next character.
    2890              : 
    2891              : C<off> must be non-negative.
    2892              : 
    2893              : C<s> must be before or equal to C<end>.
    2894              : 
    2895              : When moving forward it will not move beyond C<end>.
    2896              : 
    2897              : Will not exceed this limit even if the string is not valid "UTF-8".
    2898              : 
    2899              : =cut
    2900              : */
    2901              : 
    2902              : PERL_STATIC_INLINE U8 *
    2903              : Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
    2904              : {
    2905              :     PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
    2906              : 
    2907              :     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
    2908              :      * the bitops (especially ~) can create illegal UTF-8.
    2909              :      * In other words: in Perl UTF-8 is not just for Unicode. */
    2910              : 
    2911              :     assert(s <= end);
    2912              :     assert(off >= 0);
    2913              : 
    2914              :     if (off && UNLIKELY(UTF8_IS_CONTINUATION(*s))) {
    2915              :         /* Get to next non-continuation byte */
    2916              :         do {
    2917              :             s++;
    2918              :         }
    2919              :         while (UTF8_IS_CONTINUATION(*s));
    2920              :         off--;
    2921              :     }
    2922              : 
    2923              :     while (off--) {
    2924              :         STRLEN skip = UTF8SKIP(s);
    2925              :         if ((STRLEN)(end - s) <= skip) {
    2926              :             GCC_DIAG_IGNORE(-Wcast-qual)
    2927              :             return (U8 *)end;
    2928              :             GCC_DIAG_RESTORE
    2929              :         }
    2930              :         s += skip;
    2931              :     }
    2932              : 
    2933              :     GCC_DIAG_IGNORE(-Wcast-qual)
    2934              :     return (U8 *)s;
    2935              :     GCC_DIAG_RESTORE
    2936              : }
    2937              : 
    2938              : /*
    2939              : =for apidoc utf8_hop_back
    2940              : 
    2941              : Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
    2942              : backward.  C<s> does not need to be pointing to the starting byte of a
    2943              : character.  If it isn't, one count of C<off> will be used up to get to that
    2944              : start.
    2945              : 
    2946              : C<off> must be non-positive.
    2947              : 
    2948              : C<s> must be after or equal to C<start>.
    2949              : 
    2950              : When moving backward it will not move before C<start>.
    2951              : 
    2952              : Will not exceed this limit even if the string is not valid "UTF-8".
    2953              : 
    2954              : =cut
    2955              : */
    2956              : 
    2957              : PERL_STATIC_INLINE U8 *
    2958              : Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
    2959              : {
    2960              :     PERL_ARGS_ASSERT_UTF8_HOP_BACK;
    2961              : 
    2962              :     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
    2963              :      * the bitops (especially ~) can create illegal UTF-8.
    2964              :      * In other words: in Perl UTF-8 is not just for Unicode. */
    2965              : 
    2966              :     assert(start <= s);
    2967              :     assert(off <= 0);
    2968              : 
    2969              :     /* Note: if we know that the input is well-formed, we can do per-word
    2970              :      * hop-back.  Commit d6ad3b72778369a84a215b498d8d60d5b03aa1af implemented
    2971              :      * that.  But it was reverted because doing per-word has some
    2972              :      * start-up/tear-down overhead, so only makes sense if the distance to be
    2973              :      * moved is large, and core perl doesn't currently move more than a few
    2974              :      * characters at a time.  You can reinstate it if it does become
    2975              :      * advantageous. */
    2976              :     while (off++ && s > start) {
    2977              :         do {
    2978              :             s--;
    2979              :         } while (s > start && UTF8_IS_CONTINUATION(*s));
    2980              :     }
    2981              : 
    2982              :     GCC_DIAG_IGNORE(-Wcast-qual)
    2983              :     return (U8 *)s;
    2984              :     GCC_DIAG_RESTORE
    2985              : }
    2986              : 
    2987              : /*
    2988              : =for apidoc utf8_hop_safe
    2989              : 
    2990              : Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
    2991              : either forward or backward.  C<s> does not need to be pointing to the starting
    2992              : byte of a character.  If it isn't, one count of C<off> will be used up to get
    2993              : to the start of the next character for forward hops, and to the start of the
    2994              : current character for negative ones.
    2995              : 
    2996              : When moving backward it will not move before C<start>.
    2997              : 
    2998              : When moving forward it will not move beyond C<end>.
    2999              : 
    3000              : Will not exceed those limits even if the string is not valid "UTF-8".
    3001              : 
    3002              : =cut
    3003              : */
    3004              : 
    3005              : PERL_STATIC_INLINE U8 *
    3006              : Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
    3007              : {
    3008              :     PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
    3009              : 
    3010              :     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
    3011              :      * the bitops (especially ~) can create illegal UTF-8.
    3012              :      * In other words: in Perl UTF-8 is not just for Unicode. */
    3013              : 
    3014              :     assert(start <= s && s <= end);
    3015              : 
    3016              :     if (off >= 0) {
    3017              :         return utf8_hop_forward(s, off, end);
    3018              :     }
    3019              :     else {
    3020              :         return utf8_hop_back(s, off, start);
    3021              :     }
    3022              : }
    3023              : 
    3024              : /*
    3025              : 
    3026              : =for apidoc isUTF8_CHAR_flags
    3027              : 
    3028              : Evaluates to non-zero if the first few bytes of the string starting at C<s> and
    3029              : looking no further than S<C<e - 1>> are well-formed UTF-8, as extended by Perl,
    3030              : that represents some code point, subject to the restrictions given by C<flags>;
    3031              : otherwise it evaluates to 0.  If non-zero, the value gives how many bytes
    3032              : starting at C<s> comprise the code point's representation.  Any bytes remaining
    3033              : before C<e>, but beyond the ones needed to form the first code point in C<s>,
    3034              : are not examined.
    3035              : 
    3036              : If C<flags> is 0, this gives the same results as C<L</isUTF8_CHAR>>;
    3037              : if C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
    3038              : as C<L</isSTRICT_UTF8_CHAR>>;
    3039              : and if C<flags> is C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives
    3040              : the same results as C<L</isC9_STRICT_UTF8_CHAR>>.
    3041              : Otherwise C<flags> may be any combination of the C<UTF8_DISALLOW_I<foo>> flags
    3042              : understood by C<L</utf8n_to_uvchr>>, with the same meanings.
    3043              : 
    3044              : The three alternative macros are for the most commonly needed validations; they
    3045              : are likely to run somewhat faster than this more general one, as they can be
    3046              : inlined into your code.
    3047              : 
    3048              : Use L</is_utf8_string_flags>, L</is_utf8_string_loc_flags>, and
    3049              : L</is_utf8_string_loclen_flags> to check entire strings.
    3050              : 
    3051              : =cut
    3052              : */
    3053              : 
    3054              : PERL_STATIC_INLINE STRLEN
    3055              : Perl_isUTF8_CHAR_flags(const U8 * const s0, const U8 * const e, const U32 flags)
    3056              : {
    3057              :     PERL_ARGS_ASSERT_ISUTF8_CHAR_FLAGS;
    3058              :     assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
    3059              :                           |UTF8_DISALLOW_PERL_EXTENDED)));
    3060              : 
    3061              :     PERL_IS_UTF8_CHAR_DFA(s0, e, PL_extended_utf8_dfa_tab,
    3062              :                           goto check_success,
    3063              :                           DFA_TEASE_APART_FF_,
    3064              :                           DFA_RETURN_FAILURE_);
    3065              : 
    3066              :   check_success:
    3067              : 
    3068              :     return is_utf8_char_helper_(s0, e, flags);
    3069              : 
    3070              : #ifdef HAS_EXTRA_LONG_UTF8
    3071              : 
    3072              :   tease_apart_FF:
    3073              : 
    3074              :     /* In the case of PL_extended_utf8_dfa_tab, getting here means the input is
    3075              :      * either malformed, or was for the largest possible start byte, which
    3076              :      * indicates perl extended UTF-8, well above the Unicode maximum */
    3077              :     if (   *s0 != I8_TO_NATIVE_UTF8(0xFF)
    3078              :         || (flags & (UTF8_DISALLOW_SUPER|UTF8_DISALLOW_PERL_EXTENDED)))
    3079              :     {
    3080              :         return 0;
    3081              :     }
    3082              : 
    3083              :     /* Otherwise examine the sequence not inline */
    3084              :     return is_utf8_FF_helper_(s0, e,
    3085              :                               FALSE /* require full, not partial char */
    3086              :                              );
    3087              : #endif
    3088              : 
    3089              : }
    3090              : 
    3091              : /*
    3092              : 
    3093              : =for apidoc is_utf8_valid_partial_char
    3094              : 
    3095              : Returns 0 if the sequence of bytes starting at C<s> and looking no further than
    3096              : S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
    3097              : points.  Otherwise, it returns 1 if there exists at least one non-empty
    3098              : sequence of bytes that when appended to sequence C<s>, starting at position
    3099              : C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
    3100              : otherwise returns 0.
    3101              : 
    3102              : In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
    3103              : point.
    3104              : 
    3105              : This is useful when a fixed-length buffer is being tested for being well-formed
    3106              : UTF-8, but the final few bytes in it don't comprise a full character; that is,
    3107              : it is split somewhere in the middle of the final code point's UTF-8
    3108              : representation.  (Presumably when the buffer is refreshed with the next chunk
    3109              : of data, the new first bytes will complete the partial code point.)   This
    3110              : function is used to verify that the final bytes in the current buffer are in
    3111              : fact the legal beginning of some code point, so that if they aren't, the
    3112              : failure can be signalled without having to wait for the next read.
    3113              : 
    3114              : =cut
    3115              : */
    3116              : #define is_utf8_valid_partial_char(s, e)                                    \
    3117              :                                 is_utf8_valid_partial_char_flags(s, e, 0)
    3118              : 
    3119              : /*
    3120              : 
    3121              : =for apidoc is_utf8_valid_partial_char_flags
    3122              : 
    3123              : Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
    3124              : or not the input is a valid UTF-8 encoded partial character, but it takes an
    3125              : extra parameter, C<flags>, which can further restrict which code points are
    3126              : considered valid.
    3127              : 
    3128              : If C<flags> is 0, this behaves identically to
    3129              : C<L</is_utf8_valid_partial_char>>.  Otherwise C<flags> can be any combination
    3130              : of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>.  If
    3131              : there is any sequence of bytes that can complete the input partial character in
    3132              : such a way that a non-prohibited character is formed, the function returns
    3133              : TRUE; otherwise FALSE.  Non character code points cannot be determined based on
    3134              : partial character input.  But many  of the other possible excluded types can be
    3135              : determined from just the first one or two bytes.
    3136              : 
    3137              : =cut
    3138              :  */
    3139              : 
    3140              : PERL_STATIC_INLINE bool
    3141              : Perl_is_utf8_valid_partial_char_flags(const U8 * const s0, const U8 * const e, const U32 flags)
    3142              : {
    3143              :     PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
    3144              :     assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
    3145              :                           |UTF8_DISALLOW_PERL_EXTENDED)));
    3146              : 
    3147              :     PERL_IS_UTF8_CHAR_DFA(s0, e, PL_extended_utf8_dfa_tab,
    3148              :                           DFA_RETURN_FAILURE_,
    3149              :                           DFA_TEASE_APART_FF_,
    3150              :                           NOOP);
    3151              : 
    3152              :     /* The NOOP above causes the DFA to drop down here iff the input was a
    3153              :      * partial character.  flags=0 => can return TRUE immediately; otherwise we
    3154              :      * need to check (not inline) if the partial character is the beginning of
    3155              :      * a disallowed one */
    3156              :     if (flags == 0) {
    3157              :         return TRUE;
    3158              :     }
    3159              : 
    3160              :     return cBOOL(is_utf8_char_helper_(s0, e, flags));
    3161              : 
    3162              : #ifdef HAS_EXTRA_LONG_UTF8
    3163              : 
    3164              :   tease_apart_FF:
    3165              : 
    3166              :     /* Getting here means the input is either malformed, or, in the case of
    3167              :      * PL_extended_utf8_dfa_tab, was for the largest possible start byte.  The
    3168              :      * latter case has to be extended UTF-8, so can fail immediately if that is
    3169              :      * forbidden */
    3170              : 
    3171              :     if (   *s0 != I8_TO_NATIVE_UTF8(0xFF)
    3172              :         || (flags & (UTF8_DISALLOW_SUPER|UTF8_DISALLOW_PERL_EXTENDED)))
    3173              :     {
    3174              :         return 0;
    3175              :     }
    3176              : 
    3177              :     return is_utf8_FF_helper_(s0, e,
    3178              :                               TRUE /* Require to be a partial character */
    3179              :                              );
    3180              : #endif
    3181              : 
    3182              : }
    3183              : 
    3184              : /*
    3185              : 
    3186              : =for apidoc is_utf8_fixed_width_buf_flags
    3187              : 
    3188              : Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
    3189              : is entirely valid UTF-8, subject to the restrictions given by C<flags>;
    3190              : otherwise it returns FALSE.
    3191              : 
    3192              : If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
    3193              : without restriction.  If the final few bytes of the buffer do not form a
    3194              : complete code point, this will return TRUE anyway, provided that
    3195              : C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
    3196              : 
    3197              : If C<flags> in non-zero, it can be any combination of the
    3198              : C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
    3199              : same meanings.
    3200              : 
    3201              : This function differs from C<L</is_utf8_string_flags>> only in that the latter
    3202              : returns FALSE if the final few bytes of the string don't form a complete code
    3203              : point.
    3204              : 
    3205              : =cut
    3206              :  */
    3207              : #define is_utf8_fixed_width_buf_flags(s, len, flags)                        \
    3208              :                 is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
    3209              : 
    3210              : /*
    3211              : 
    3212              : =for apidoc is_utf8_fixed_width_buf_loc_flags
    3213              : 
    3214              : Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
    3215              : failure in the C<ep> pointer.  If the function returns TRUE, C<*ep> will point
    3216              : to the beginning of any partial character at the end of the buffer; if there is
    3217              : no partial character C<*ep> will contain C<s>+C<len>.
    3218              : 
    3219              : See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
    3220              : 
    3221              : =cut
    3222              : */
    3223              : 
    3224              : #define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags)               \
    3225              :                 is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
    3226              : 
    3227              : /*
    3228              : 
    3229              : =for apidoc is_utf8_fixed_width_buf_loclen_flags
    3230              : 
    3231              : Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
    3232              : complete, valid characters found in the C<el> pointer.
    3233              : 
    3234              : =cut
    3235              : */
    3236              : 
    3237              : PERL_STATIC_INLINE bool
    3238              : Perl_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
    3239              :                                        STRLEN len,
    3240              :                                        const U8 **ep,
    3241              :                                        STRLEN *el,
    3242              :                                        const U32 flags)
    3243              : {
    3244              :     const U8 * maybe_partial;
    3245              : 
    3246              :     PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
    3247              : 
    3248              :     if (! ep) {
    3249              :         ep  = &maybe_partial;
    3250              :     }
    3251              : 
    3252              :     /* If it's entirely valid, return that; otherwise see if the only error is
    3253              :      * that the final few bytes are for a partial character */
    3254              :     return    is_utf8_string_loclen_flags(s, len, ep, el, flags)
    3255              :            || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
    3256              : }
    3257              : 
    3258              : PERL_STATIC_INLINE UV
    3259              : Perl_utf8n_to_uvchr_msgs(const U8 *s,
    3260              :                          STRLEN curlen,
    3261              :                          STRLEN *retlen,
    3262              :                          const U32 flags,
    3263              :                          U32 * errors,
    3264              :                          AV ** msgs)
    3265              : {
    3266              :     /* This is the inlined portion of utf8n_to_uvchr_msgs.  It handles the
    3267              :      * simple cases, and, if necessary calls a helper function to deal with the
    3268              :      * more complex ones.  Almost all well-formed non-problematic code points
    3269              :      * are considered simple, so that it's unlikely that the helper function
    3270              :      * will need to be called.
    3271              :      *
    3272              :      * This is an adaptation of the tables and algorithm given in
    3273              :      * https://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which provides
    3274              :      * comprehensive documentation of the original version.  A copyright notice
    3275              :      * for the original version is given at the beginning of this file.  The
    3276              :      * Perl adaptation is documented at the definition of PL_strict_utf8_dfa_tab[].
    3277              :      */
    3278              : 
    3279              :     const U8 * const s0 = s;
    3280              :     const U8 * send = s0 + curlen;
    3281              :     UV type;
    3282              :     UV uv;
    3283              : 
    3284              :     PERL_ARGS_ASSERT_UTF8N_TO_UVCHR_MSGS;
    3285              : 
    3286              :     /* This dfa is fast.  If it accepts the input, it was for a well-formed,
    3287              :      * non-problematic code point, which can be returned immediately.
    3288              :      * Otherwise we call a helper function to figure out the more complicated
    3289              :      * cases. */
    3290              : 
    3291              :     /* No calls from core pass in an empty string; non-core need a check */
    3292              : #ifdef PERL_CORE
    3293              :     assert(curlen > 0);
    3294              : #else
    3295              :     if (curlen == 0) return _utf8n_to_uvchr_msgs_helper(s0, 0, retlen,
    3296              :                                                         flags, errors, msgs);
    3297              : #endif
    3298              : 
    3299              :     type = PL_strict_utf8_dfa_tab[*s];
    3300              : 
    3301              :     /* The table is structured so that 'type' is 0 iff the input byte is
    3302              :      * represented identically regardless of the UTF-8ness of the string */
    3303              :     if (type == 0) {   /* UTF-8 invariants are returned unchanged */
    3304              :         uv = *s;
    3305              :     }
    3306              :     else {
    3307              :         UV state = PL_strict_utf8_dfa_tab[256 + type];
    3308              :         uv = (0xff >> type) & NATIVE_UTF8_TO_I8(*s);
    3309              : 
    3310              :         while (LIKELY(state != 1) && ++s < send) {
    3311              :             type  = PL_strict_utf8_dfa_tab[*s];
    3312              :             state = PL_strict_utf8_dfa_tab[256 + state + type];
    3313              : 
    3314              :             uv = UTF8_ACCUMULATE(uv, *s);
    3315              : 
    3316              :             if (state == 0) {
    3317              : #ifdef EBCDIC
    3318              :                 uv = UNI_TO_NATIVE(uv);
    3319              : #endif
    3320              :                 goto success;
    3321              :             }
    3322              :         }
    3323              : 
    3324              :         /* Here is potentially problematic.  Use the full mechanism */
    3325              :         return _utf8n_to_uvchr_msgs_helper(s0, curlen, retlen, flags,
    3326              :                                            errors, msgs);
    3327              :     }
    3328              : 
    3329              :   success:
    3330              :     if (retlen) {
    3331              :         *retlen = s - s0 + 1;
    3332              :     }
    3333              :     if (errors) {
    3334              :         *errors = 0;
    3335              :     }
    3336              :     if (msgs) {
    3337              :         *msgs = NULL;
    3338              :     }
    3339              : 
    3340              :     return uv;
    3341              : }
    3342              : 
    3343              : PERL_STATIC_INLINE UV
    3344              : Perl_utf8_to_uvchr_buf_helper(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
    3345              : {
    3346              :     PERL_ARGS_ASSERT_UTF8_TO_UVCHR_BUF_HELPER;
    3347              : 
    3348              :     assert(s < send);
    3349              : 
    3350              :     if (! ckWARN_d(WARN_UTF8)) {
    3351              : 
    3352              :         /* EMPTY is not really allowed, and asserts on debugging builds.  But
    3353              :          * on non-debugging we have to deal with it, and this causes it to
    3354              :          * return the REPLACEMENT CHARACTER, as the documentation indicates */
    3355              :         return utf8n_to_uvchr(s, send - s, retlen,
    3356              :                               (UTF8_ALLOW_ANY | UTF8_ALLOW_EMPTY));
    3357              :     }
    3358              :     else {
    3359              :         UV ret = utf8n_to_uvchr(s, send - s, retlen, 0);
    3360              :         if (retlen && ret == 0 && (send <= s || *s != '\0')) {
    3361              :             *retlen = (STRLEN) -1;
    3362              :         }
    3363              : 
    3364              :         return ret;
    3365              :     }
    3366              : }
    3367              : 
    3368              : /* ------------------------------- perl.h ----------------------------- */
    3369              : 
    3370              : /*
    3371              : =for apidoc_section $utility
    3372              : 
    3373              : =for apidoc is_safe_syscall
    3374              : 
    3375              : Test that the given C<pv> (with length C<len>) doesn't contain any internal
    3376              : C<NUL> characters.
    3377              : If it does, set C<errno> to C<ENOENT>, optionally warn using the C<syscalls>
    3378              : category, and return FALSE.
    3379              : 
    3380              : Return TRUE if the name is safe.
    3381              : 
    3382              : C<what> and C<op_name> are used in any warning.
    3383              : 
    3384              : Used by the C<IS_SAFE_SYSCALL()> macro.
    3385              : 
    3386              : =cut
    3387              : */
    3388              : 
    3389              : PERL_STATIC_INLINE bool
    3390              : Perl_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name)
    3391              : {
    3392              :     /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
    3393              :      * perl itself uses xce*() functions which accept 8-bit strings.
    3394              :      */
    3395              : 
    3396              :     PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
    3397              : 
    3398              :     if (len > 1) {
    3399              :         char *null_at;
    3400              :         if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
    3401              :                 SETERRNO(ENOENT, LIB_INVARG);
    3402              :                 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
    3403              :                                    "Invalid \\0 character in %s for %s: %s\\0%s",
    3404              :                                    what, op_name, pv, null_at+1);
    3405              :                 return FALSE;
    3406              :         }
    3407              :     }
    3408              : 
    3409              :     return TRUE;
    3410              : }
    3411              : 
    3412              : /*
    3413              : 
    3414              : Return true if the supplied filename has a newline character
    3415              : immediately before the first (hopefully only) NUL.
    3416              : 
    3417              : My original look at this incorrectly used the len from SvPV(), but
    3418              : that's incorrect, since we allow for a NUL in pv[len-1].
    3419              : 
    3420              : So instead, strlen() and work from there.
    3421              : 
    3422              : This allow for the user reading a filename, forgetting to chomp it,
    3423              : then calling:
    3424              : 
    3425              :   open my $foo, "$file\0";
    3426              : 
    3427              : */
    3428              : 
    3429              : #ifdef PERL_CORE
    3430              : 
    3431              : PERL_STATIC_INLINE bool
    3432              : S_should_warn_nl(const char *pv)
    3433              : {
    3434              :     STRLEN len;
    3435              : 
    3436              :     PERL_ARGS_ASSERT_SHOULD_WARN_NL;
    3437              : 
    3438              :     len = strlen(pv);
    3439              : 
    3440              :     return len > 0 && pv[len-1] == '\n';
    3441              : }
    3442              : 
    3443              : #endif
    3444              : 
    3445              : #if defined(PERL_IN_PP_C) || defined(PERL_IN_PP_HOT_C)
    3446              : 
    3447              : PERL_STATIC_INLINE bool
    3448              : S_lossless_NV_to_IV(const NV nv, IV *ivp)
    3449              : {
    3450              :     /* This function determines if the input NV 'nv' may be converted without
    3451              :      * loss of data to an IV.  If not, it returns FALSE taking no other action.
    3452              :      * But if it is possible, it does the conversion, returning TRUE, and
    3453              :      * storing the converted result in '*ivp' */
    3454              : 
    3455              :     PERL_ARGS_ASSERT_LOSSLESS_NV_TO_IV;
    3456              : 
    3457              : #  if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
    3458              :     /* Normally any comparison with a NaN returns false; if we can't rely
    3459              :      * on that behaviour, check explicitly */
    3460              :     if (UNLIKELY(Perl_isnan(nv))) {
    3461              :         return FALSE;
    3462              :     }
    3463              : #  endif
    3464              : 
    3465              : #  ifndef NV_PRESERVES_UV
    3466              :     STATIC_ASSERT_STMT(((UV)1 << NV_PRESERVES_UV_BITS) - 1 <= (UV)IV_MAX);
    3467              : #  endif
    3468              : 
    3469              :     /* Written this way so that with an always-false NaN comparison we
    3470              :      * return false */
    3471              :     if (
    3472              : #  ifdef NV_PRESERVES_UV
    3473              :         LIKELY(nv >= (NV) IV_MIN) && LIKELY(nv < IV_MAX_P1) &&
    3474              : #  else
    3475              :         /* If the condition below is not satisfied, lower bits of nv's
    3476              :          * integral part is already lost and accurate conversion to integer
    3477              :          * is impossible.
    3478              :          * Note this should be consistent with S_sv_2iuv_common in sv.c. */
    3479              :         Perl_fabs(nv) < (NV) ((UV)1 << NV_PRESERVES_UV_BITS) &&
    3480              : #  endif
    3481              :         (IV) nv == nv) {
    3482              :         *ivp = (IV) nv;
    3483              :         return TRUE;
    3484              :     }
    3485              :     return FALSE;
    3486              : }
    3487              : 
    3488              : #endif
    3489              : 
    3490              : /* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
    3491              : 
    3492              : #if defined(PERL_IN_PP_C) || defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UNIVERSAL_C)
    3493              : 
    3494              : #define MAX_CHARSET_NAME_LENGTH 2
    3495              : 
    3496              : PERL_STATIC_INLINE const char *
    3497              : S_get_regex_charset_name(const U32 flags, STRLEN* const lenp)
    3498              : {
    3499              :     PERL_ARGS_ASSERT_GET_REGEX_CHARSET_NAME;
    3500              : 
    3501              :     /* Returns a string that corresponds to the name of the regex character set
    3502              :      * given by 'flags', and *lenp is set the length of that string, which
    3503              :      * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
    3504              : 
    3505              :     *lenp = 1;
    3506              :     switch (get_regex_charset(flags)) {
    3507              :         case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
    3508              :         case REGEX_LOCALE_CHARSET:  return LOCALE_PAT_MODS;
    3509              :         case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
    3510              :         case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
    3511              :         case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
    3512              :             *lenp = 2;
    3513              :             return ASCII_MORE_RESTRICT_PAT_MODS;
    3514              :     }
    3515              :     /* The NOT_REACHED; hides an assert() which has a rather complex
    3516              :      * definition in perl.h. */
    3517              :     NOT_REACHED; /* NOTREACHED */
    3518              :     return "?";       /* Unknown */
    3519              : }
    3520              : 
    3521              : #endif
    3522              : 
    3523              : /*
    3524              : 
    3525              : Return false if any get magic is on the SV other than taint magic.
    3526              : 
    3527              : */
    3528              : 
    3529              : PERL_STATIC_INLINE bool
    3530              : Perl_sv_only_taint_gmagic(SV *sv)
    3531              : {
    3532              :     MAGIC *mg = SvMAGIC(sv);
    3533              : 
    3534              :     PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
    3535              : 
    3536              :     while (mg) {
    3537              :         if (mg->mg_type != PERL_MAGIC_taint
    3538              :             && !(mg->mg_flags & MGf_GSKIP)
    3539              :             && mg->mg_virtual->svt_get) {
    3540              :             return FALSE;
    3541              :         }
    3542              :         mg = mg->mg_moremagic;
    3543              :     }
    3544              : 
    3545              :     return TRUE;
    3546              : }
    3547              : 
    3548              : /* ------------------ cop.h ------------------------------------------- */
    3549              : 
    3550              : /* implement GIMME_V() macro */
    3551              : 
    3552              : PERL_STATIC_INLINE U8
    3553              : Perl_gimme_V(pTHX)
    3554              : {
    3555              :     I32 cxix;
    3556              :     U8  gimme = (PL_op->op_flags & OPf_WANT);
    3557              : 
    3558              :     if (gimme)
    3559              :         return gimme;
    3560              :     cxix = PL_curstackinfo->si_cxsubix;
    3561              :     if (cxix < 0)
    3562              :         return PL_curstackinfo->si_type == PERLSI_SORT ? G_SCALAR: G_VOID;
    3563              :     assert(cxstack[cxix].blk_gimme & G_WANT);
    3564              :     return (cxstack[cxix].blk_gimme & G_WANT);
    3565              : }
    3566              : 
    3567              : 
    3568              : /* Enter a block. Push a new base context and return its address. */
    3569              : 
    3570              : PERL_STATIC_INLINE PERL_CONTEXT *
    3571              : Perl_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
    3572              : {
    3573              :     PERL_CONTEXT * cx;
    3574              : 
    3575              :     PERL_ARGS_ASSERT_CX_PUSHBLOCK;
    3576              : 
    3577              :     CXINC;
    3578              :     cx = CX_CUR();
    3579              :     cx->cx_type        = type;
    3580              :     cx->blk_gimme      = gimme;
    3581              :     cx->blk_oldsaveix  = saveix;
    3582              :     cx->blk_oldsp      = (Stack_off_t)(sp - PL_stack_base);
    3583              :     assert(cxstack_ix <= 0
    3584              :             || CxTYPE(cx-1) == CXt_SUBST
    3585              :             || cx->blk_oldsp >= (cx-1)->blk_oldsp);
    3586              :     cx->blk_oldcop     = PL_curcop;
    3587              :     cx->blk_oldmarksp  = (I32)(PL_markstack_ptr - PL_markstack);
    3588              :     cx->blk_oldscopesp = PL_scopestack_ix;
    3589              :     cx->blk_oldpm      = PL_curpm;
    3590              :     cx->blk_old_tmpsfloor = PL_tmps_floor;
    3591              : 
    3592              :     PL_tmps_floor        = PL_tmps_ix;
    3593              :     CX_DEBUG(cx, "PUSH");
    3594              :     return cx;
    3595              : }
    3596              : 
    3597              : 
    3598              : /* Exit a block (RETURN and LAST). */
    3599              : 
    3600              : PERL_STATIC_INLINE void
    3601              : Perl_cx_popblock(pTHX_ PERL_CONTEXT *cx)
    3602              : {
    3603              :     PERL_ARGS_ASSERT_CX_POPBLOCK;
    3604              : 
    3605              :     CX_DEBUG(cx, "POP");
    3606              :     /* these 3 are common to cx_popblock and cx_topblock */
    3607              :     PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
    3608              :     PL_scopestack_ix = cx->blk_oldscopesp;
    3609              :     PL_curpm         = cx->blk_oldpm;
    3610              : 
    3611              :     /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
    3612              :      * and leaves a CX entry lying around for repeated use, so
    3613              :      * skip for multicall */                  \
    3614              :     assert(   (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
    3615              :             || PL_savestack_ix == cx->blk_oldsaveix);
    3616              :     PL_curcop     = cx->blk_oldcop;
    3617              :     PL_tmps_floor = cx->blk_old_tmpsfloor;
    3618              : }
    3619              : 
    3620              : /* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
    3621              :  * Whereas cx_popblock() restores the state to the point just before
    3622              :  * cx_pushblock() was called,  cx_topblock() restores it to the point just
    3623              :  * *after* cx_pushblock() was called. */
    3624              : 
    3625              : PERL_STATIC_INLINE void
    3626              : Perl_cx_topblock(pTHX_ PERL_CONTEXT *cx)
    3627              : {
    3628              :     PERL_ARGS_ASSERT_CX_TOPBLOCK;
    3629              : 
    3630              :     CX_DEBUG(cx, "TOP");
    3631              :     /* these 3 are common to cx_popblock and cx_topblock */
    3632              :     PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
    3633              :     PL_scopestack_ix = cx->blk_oldscopesp;
    3634              :     PL_curpm         = cx->blk_oldpm;
    3635              :     Perl_rpp_popfree_to(aTHX_ PL_stack_base + cx->blk_oldsp);
    3636              : }
    3637              : 
    3638              : 
    3639              : PERL_STATIC_INLINE void
    3640              : Perl_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
    3641              : {
    3642              :     U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
    3643              : 
    3644              :     PERL_ARGS_ASSERT_CX_PUSHSUB;
    3645              : 
    3646              :     PERL_DTRACE_PROBE_ENTRY(cv);
    3647              :     cx->blk_sub.old_cxsubix     = PL_curstackinfo->si_cxsubix;
    3648              :     PL_curstackinfo->si_cxsubix = (I32)(cx - PL_curstackinfo->si_cxstack);
    3649              :     cx->blk_sub.cv = cv;
    3650              :     cx->blk_sub.olddepth = CvDEPTH(cv);
    3651              :     cx->blk_sub.prevcomppad = PL_comppad;
    3652              :     cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
    3653              :     cx->blk_sub.retop = retop;
    3654              :     SvREFCNT_inc_simple_void_NN(cv);
    3655              :     cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
    3656              : }
    3657              : 
    3658              : 
    3659              : /* subsets of cx_popsub() */
    3660              : 
    3661              : PERL_STATIC_INLINE void
    3662              : Perl_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
    3663              : {
    3664              :     CV *cv;
    3665              : 
    3666              :     PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
    3667              :     assert(CxTYPE(cx) == CXt_SUB);
    3668              : 
    3669              :     PL_comppad = cx->blk_sub.prevcomppad;
    3670              :     PL_curpad = LIKELY(PL_comppad != NULL) ? AvARRAY(PL_comppad) : NULL;
    3671              :     cv = cx->blk_sub.cv;
    3672              :     CvDEPTH(cv) = cx->blk_sub.olddepth;
    3673              :     cx->blk_sub.cv = NULL;
    3674              :     SvREFCNT_dec(cv);
    3675              :     PL_curstackinfo->si_cxsubix = cx->blk_sub.old_cxsubix;
    3676              : }
    3677              : 
    3678              : 
    3679              : /* handle the @_ part of leaving a sub */
    3680              : 
    3681              : PERL_STATIC_INLINE void
    3682              : Perl_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
    3683              : {
    3684              :     AV *av;
    3685              : 
    3686              :     PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
    3687              :     assert(CxTYPE(cx) == CXt_SUB);
    3688              :     assert(AvARRAY(MUTABLE_AV(
    3689              :         PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
    3690              :                 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
    3691              : 
    3692              :     CX_POP_SAVEARRAY(cx);
    3693              :     av = MUTABLE_AV(PAD_SVl(0));
    3694              :     if (!SvMAGICAL(av) && SvREFCNT(av) == 1
    3695              : #ifndef PERL_RC_STACK
    3696              :         && !AvREAL(av)
    3697              : #endif
    3698              :     )
    3699              :         clear_defarray_simple(av);
    3700              :     else
    3701              :         /* abandon @_ if it got reified */
    3702              :         clear_defarray(av, 0);
    3703              : }
    3704              : 
    3705              : 
    3706              : PERL_STATIC_INLINE void
    3707              : Perl_cx_popsub(pTHX_ PERL_CONTEXT *cx)
    3708              : {
    3709              :     PERL_ARGS_ASSERT_CX_POPSUB;
    3710              :     assert(CxTYPE(cx) == CXt_SUB);
    3711              : 
    3712              :     PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
    3713              : 
    3714              :     if (CxHASARGS(cx))
    3715              :         cx_popsub_args(cx);
    3716              :     cx_popsub_common(cx);
    3717              : }
    3718              : 
    3719              : 
    3720              : PERL_STATIC_INLINE void
    3721              : Perl_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
    3722              : {
    3723              :     PERL_ARGS_ASSERT_CX_PUSHFORMAT;
    3724              : 
    3725              :     cx->blk_format.old_cxsubix = PL_curstackinfo->si_cxsubix;
    3726              :     PL_curstackinfo->si_cxsubix= (I32)(cx - PL_curstackinfo->si_cxstack);
    3727              :     cx->blk_format.cv          = cv;
    3728              :     cx->blk_format.retop       = retop;
    3729              :     cx->blk_format.gv          = gv;
    3730              :     cx->blk_format.dfoutgv     = PL_defoutgv;
    3731              :     cx->blk_format.prevcomppad = PL_comppad;
    3732              :     cx->blk_u16                = 0;
    3733              : 
    3734              :     SvREFCNT_inc_simple_void_NN(cv);
    3735              :     CvDEPTH(cv)++;
    3736              :     SvREFCNT_inc_void(cx->blk_format.dfoutgv);
    3737              : }
    3738              : 
    3739              : 
    3740              : PERL_STATIC_INLINE void
    3741              : Perl_cx_popformat(pTHX_ PERL_CONTEXT *cx)
    3742              : {
    3743              :     CV *cv;
    3744              :     GV *dfout;
    3745              : 
    3746              :     PERL_ARGS_ASSERT_CX_POPFORMAT;
    3747              :     assert(CxTYPE(cx) == CXt_FORMAT);
    3748              : 
    3749              :     dfout = cx->blk_format.dfoutgv;
    3750              :     setdefout(dfout);
    3751              :     cx->blk_format.dfoutgv = NULL;
    3752              :     SvREFCNT_dec_NN(dfout);
    3753              : 
    3754              :     PL_comppad = cx->blk_format.prevcomppad;
    3755              :     PL_curpad = LIKELY(PL_comppad != NULL) ? AvARRAY(PL_comppad) : NULL;
    3756              :     cv = cx->blk_format.cv;
    3757              :     cx->blk_format.cv = NULL;
    3758              :     --CvDEPTH(cv);
    3759              :     SvREFCNT_dec_NN(cv);
    3760              :     PL_curstackinfo->si_cxsubix = cx->blk_format.old_cxsubix;
    3761              : }
    3762              : 
    3763              : 
    3764              : PERL_STATIC_INLINE void
    3765              : Perl_push_evalortry_common(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
    3766              : {
    3767              :     cx->blk_eval.retop         = retop;
    3768              :     cx->blk_eval.old_namesv    = namesv;
    3769              :     cx->blk_eval.old_eval_root = PL_eval_root;
    3770              :     cx->blk_eval.cur_text      = PL_parser ? PL_parser->linestr : NULL;
    3771              :     cx->blk_eval.cv            = NULL; /* later set by doeval_compile() */
    3772              :     cx->blk_eval.cur_top_env   = PL_top_env;
    3773              : 
    3774              :     assert(!(PL_in_eval     & ~ 0x3F));
    3775              :     assert(!(PL_op->op_type & ~0x1FF));
    3776              :     cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
    3777              : }
    3778              : 
    3779              : PERL_STATIC_INLINE void
    3780              : Perl_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
    3781              : {
    3782              :     PERL_ARGS_ASSERT_CX_PUSHEVAL;
    3783              : 
    3784              :     Perl_push_evalortry_common(aTHX_ cx, retop, namesv);
    3785              : 
    3786              :     cx->blk_eval.old_cxsubix    = PL_curstackinfo->si_cxsubix;
    3787              :     PL_curstackinfo->si_cxsubix = (I32)(cx - PL_curstackinfo->si_cxstack);
    3788              : }
    3789              : 
    3790              : PERL_STATIC_INLINE void
    3791              : Perl_cx_pushtry(pTHX_ PERL_CONTEXT *cx, OP *retop)
    3792              : {
    3793              :     PERL_ARGS_ASSERT_CX_PUSHTRY;
    3794              : 
    3795              :     Perl_push_evalortry_common(aTHX_ cx, retop, NULL);
    3796              : 
    3797              :     /* Don't actually change it, just store the current value so it's restored
    3798              :      * by the common popeval */
    3799              :     cx->blk_eval.old_cxsubix = PL_curstackinfo->si_cxsubix;
    3800              : }
    3801              : 
    3802              : 
    3803              : PERL_STATIC_INLINE void
    3804              : Perl_cx_popeval(pTHX_ PERL_CONTEXT *cx)
    3805              : {
    3806              :     SV *sv;
    3807              : 
    3808              :     PERL_ARGS_ASSERT_CX_POPEVAL;
    3809              :     assert(CxTYPE(cx) == CXt_EVAL);
    3810              : 
    3811              :     PL_in_eval = CxOLD_IN_EVAL(cx);
    3812              :     assert(!(PL_in_eval & 0xc0));
    3813              :     PL_eval_root = cx->blk_eval.old_eval_root;
    3814              :     sv = cx->blk_eval.cur_text;
    3815              :     if (sv && CxEVAL_TXT_REFCNTED(cx)) {
    3816              :         cx->blk_eval.cur_text = NULL;
    3817              :         SvREFCNT_dec_NN(sv);
    3818              :     }
    3819              : 
    3820              :     sv = cx->blk_eval.old_namesv;
    3821              :     if (sv) {
    3822              :         cx->blk_eval.old_namesv = NULL;
    3823              :         SvREFCNT_dec_NN(sv);
    3824              :     }
    3825              :     PL_curstackinfo->si_cxsubix = cx->blk_eval.old_cxsubix;
    3826              : }
    3827              : 
    3828              : 
    3829              : /* push a plain loop, i.e.
    3830              :  *     { block }
    3831              :  *     while (cond) { block }
    3832              :  *     for (init;cond;continue) { block }
    3833              :  * This loop can be last/redo'ed etc.
    3834              :  */
    3835              : 
    3836              : PERL_STATIC_INLINE void
    3837              : Perl_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
    3838              : {
    3839              :     PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
    3840              :     cx->blk_loop.my_op = cLOOP;
    3841              : }
    3842              : 
    3843              : 
    3844              : /* push a true for loop, i.e.
    3845              :  *     for var (list) { block }
    3846              :  */
    3847              : 
    3848              : PERL_STATIC_INLINE void
    3849              : Perl_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
    3850              : {
    3851              :     PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
    3852              : 
    3853              :     /* this one line is common with cx_pushloop_plain */
    3854              :     cx->blk_loop.my_op = cLOOP;
    3855              : 
    3856              :     cx->blk_loop.itervar_u.svp = (SV**)itervarp;
    3857              :     cx->blk_loop.itersave      = itersave;
    3858              : #ifdef USE_ITHREADS
    3859              :     cx->blk_loop.oldcomppad = PL_comppad;
    3860              : #endif
    3861              : }
    3862              : 
    3863              : 
    3864              : /* pop all loop types, including plain */
    3865              : 
    3866              : PERL_STATIC_INLINE void
    3867              : Perl_cx_poploop(pTHX_ PERL_CONTEXT *cx)
    3868              : {
    3869              :     PERL_ARGS_ASSERT_CX_POPLOOP;
    3870              : 
    3871              :     assert(CxTYPE_is_LOOP(cx));
    3872              :     if (  CxTYPE(cx) == CXt_LOOP_ARY
    3873              :        || CxTYPE(cx) == CXt_LOOP_LAZYSV)
    3874              :     {
    3875              :         /* Free ary or cur. This assumes that state_u.ary.ary
    3876              :          * aligns with state_u.lazysv.cur. See cx_dup() */
    3877              :         SV *sv = cx->blk_loop.state_u.lazysv.cur;
    3878              :         cx->blk_loop.state_u.lazysv.cur = NULL;
    3879              :         SvREFCNT_dec_NN(sv);
    3880              :         if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
    3881              :             sv = cx->blk_loop.state_u.lazysv.end;
    3882              :             cx->blk_loop.state_u.lazysv.end = NULL;
    3883              :             SvREFCNT_dec_NN(sv);
    3884              :         }
    3885              :     }
    3886              :     if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
    3887              :         SV *cursv;
    3888              :         SV **svp = (cx)->blk_loop.itervar_u.svp;
    3889              :         if ((cx->cx_type & CXp_FOR_GV))
    3890              :             svp = &GvSV((GV*)svp);
    3891              :         cursv = *svp;
    3892              :         *svp = cx->blk_loop.itersave;
    3893              :         cx->blk_loop.itersave = NULL;
    3894              :         SvREFCNT_dec(cursv);
    3895              :     }
    3896              :     if (cx->cx_type & (CXp_FOR_GV|CXp_FOR_LVREF))
    3897              :         SvREFCNT_dec(cx->blk_loop.itervar_u.svp);
    3898              : }
    3899              : 
    3900              : 
    3901              : PERL_STATIC_INLINE void
    3902              : Perl_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
    3903              : {
    3904              :     PERL_ARGS_ASSERT_CX_PUSHWHEN;
    3905              : 
    3906              :     cx->blk_givwhen.leave_op = cLOGOP->op_other;
    3907              : }
    3908              : 
    3909              : 
    3910              : PERL_STATIC_INLINE void
    3911              : Perl_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
    3912              : {
    3913              :     PERL_ARGS_ASSERT_CX_POPWHEN;
    3914              :     assert(CxTYPE(cx) == CXt_WHEN);
    3915              : 
    3916              :     PERL_UNUSED_ARG(cx);
    3917              :     PERL_UNUSED_CONTEXT;
    3918              :     /* currently NOOP */
    3919              : }
    3920              : 
    3921              : 
    3922              : PERL_STATIC_INLINE void
    3923              : Perl_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
    3924              : {
    3925              :     PERL_ARGS_ASSERT_CX_PUSHGIVEN;
    3926              : 
    3927              :     cx->blk_givwhen.leave_op = cLOGOP->op_other;
    3928              :     cx->blk_givwhen.defsv_save = orig_defsv;
    3929              : }
    3930              : 
    3931              : 
    3932              : PERL_STATIC_INLINE void
    3933              : Perl_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
    3934              : {
    3935              :     SV *sv;
    3936              : 
    3937              :     PERL_ARGS_ASSERT_CX_POPGIVEN;
    3938              :     assert(CxTYPE(cx) == CXt_GIVEN);
    3939              : 
    3940              :     sv = GvSV(PL_defgv);
    3941              :     GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
    3942              :     cx->blk_givwhen.defsv_save = NULL;
    3943              :     SvREFCNT_dec(sv);
    3944              : }
    3945              : 
    3946              : 
    3947              : /* Make @_ empty in-place in simple cases: a cheap av_clear().
    3948              :  * See Perl_clear_defarray() for non-simple cases */
    3949              : 
    3950              : 
    3951              : PERL_STATIC_INLINE void
    3952              : Perl_clear_defarray_simple(pTHX_ AV *av)
    3953              : {
    3954              :     PERL_ARGS_ASSERT_CLEAR_DEFARRAY_SIMPLE;
    3955              : 
    3956              :     assert(SvTYPE(av) == SVt_PVAV);
    3957              :     assert(!SvREADONLY(av));
    3958              :     assert(!SvMAGICAL(av));
    3959              :     assert(SvREFCNT(av) == 1);
    3960              : 
    3961              : #ifdef PERL_RC_STACK
    3962              :     assert(AvREAL(av));
    3963              :     /* this code assumes that destructors called here can't free av
    3964              :      * itself, because pad[0] and/or CX pointers will keep it alive */
    3965              :     SSize_t i = AvFILLp(av);
    3966              :     while (i >= 0) {
    3967              :         SV *sv = AvARRAY(av)[i];
    3968              :         AvARRAY(av)[i--] = NULL;
    3969              :         SvREFCNT_dec(sv);
    3970              :     }
    3971              : #else
    3972              :     assert(!AvREAL(av));
    3973              : #endif
    3974              :     AvFILLp(av) = -1;
    3975              :     Perl_av_remove_offset(aTHX_ av);
    3976              : }
    3977              : 
    3978              : /* Switch to a different argument stack.
    3979              :  *
    3980              :  * Note that it doesn't update PL_curstackinfo->si_stack_nonrc_base,
    3981              :  * so this should only be used as part of a general switching between
    3982              :  * stackinfos.
    3983              :  */
    3984              : 
    3985              : PERL_STATIC_INLINE void
    3986              : Perl_switch_argstack(pTHX_ AV *to)
    3987              : {
    3988              :     PERL_ARGS_ASSERT_SWITCH_ARGSTACK;
    3989              : 
    3990              :     AvFILLp(PL_curstack) = PL_stack_sp - PL_stack_base;
    3991              :     PL_stack_base = AvARRAY(to);
    3992              :     PL_stack_max  = PL_stack_base + AvMAX(to);
    3993              :     PL_stack_sp   = PL_stack_base + AvFILLp(to);
    3994              :     PL_curstack   = to;
    3995              : }
    3996              : 
    3997              : 
    3998              : /* Push, and switch to a new stackinfo, allocating one if none are spare,
    3999              :  * to get a fresh set of stacks.
    4000              :  * Update all the interpreter variables like PL_curstackinfo,
    4001              :  * PL_stack_sp, etc.
    4002              :  * current flag meanings:
    4003              :  *   1 make the new arg stack AvREAL
    4004              :  */
    4005              : 
    4006              : 
    4007              : PERL_STATIC_INLINE void
    4008              : Perl_push_stackinfo(pTHX_ I32 type, UV flags)
    4009              : {
    4010              :     PERL_ARGS_ASSERT_PUSH_STACKINFO;
    4011              : 
    4012              :     PERL_SI *next = PL_curstackinfo->si_next;
    4013              :     DEBUG_l({
    4014              :         int i = 0; PERL_SI *p = PL_curstackinfo;
    4015              :         while (p) { i++; p = p->si_prev; }
    4016              :         Perl_deb(aTHX_ "push STACKINFO %d in %s at %s:%d\n",
    4017              :                      i, SAFE_FUNCTION__, __FILE__, __LINE__);
    4018              :     })
    4019              : 
    4020              :     if (!next) {
    4021              :         next = new_stackinfo_flags(32, 2048/sizeof(PERL_CONTEXT) - 1, flags);
    4022              :         next->si_prev = PL_curstackinfo;
    4023              :         PL_curstackinfo->si_next = next;
    4024              :     }
    4025              :     next->si_type = type;
    4026              :     next->si_cxix = -1;
    4027              :     next->si_cxsubix = -1;
    4028              :     PUSHSTACK_INIT_HWM(next);
    4029              : #ifdef PERL_RC_STACK
    4030              :     next->si_stack_nonrc_base = 0;
    4031              : #endif
    4032              :     if (flags & 1)
    4033              :         AvREAL_on(next->si_stack);
    4034              :     else
    4035              :         AvREAL_off(next->si_stack);
    4036              :     AvFILLp(next->si_stack) = 0;
    4037              :     switch_argstack(next->si_stack);
    4038              :     PL_curstackinfo = next;
    4039              :     SET_MARK_OFFSET;
    4040              : }
    4041              : 
    4042              : 
    4043              : /* Pop, then switch to the previous stackinfo and set of stacks.
    4044              :  * Update all the interpreter variables like PL_curstackinfo,
    4045              :  * PL_stack_sp, etc. */
    4046              : 
    4047              : PERL_STATIC_INLINE void
    4048              : Perl_pop_stackinfo(pTHX)
    4049              : {
    4050              :     PERL_ARGS_ASSERT_POP_STACKINFO;
    4051              : 
    4052              :     PERL_SI * const prev = PL_curstackinfo->si_prev;
    4053              :     DEBUG_l({
    4054              :         int i = -1; PERL_SI *p = PL_curstackinfo;
    4055              :         while (p) { i++; p = p->si_prev; }
    4056              :         Perl_deb(aTHX_ "pop  STACKINFO %d in %s at %s:%d\n",
    4057              :                      i, SAFE_FUNCTION__, __FILE__, __LINE__);})
    4058              :     if (!prev) {
    4059              :         Perl_croak_popstack();
    4060              :     }
    4061              : 
    4062              :     switch_argstack(prev->si_stack);
    4063              :     /* don't free prev here, free them all at the END{} */
    4064              :     PL_curstackinfo = prev;
    4065              : }
    4066              : 
    4067              : 
    4068              : 
    4069              : /*
    4070              : =for apidoc newPADxVOP
    4071              : 
    4072              : Constructs, checks and returns an op containing a pad offset.  C<type> is
    4073              : the opcode, which should be one of C<OP_PADSV>, C<OP_PADAV>, C<OP_PADHV>
    4074              : or C<OP_PADCV>.  The returned op will have the C<op_targ> field set by
    4075              : the C<padix> argument.
    4076              : 
    4077              : This is convenient when constructing a large optree in nested function
    4078              : calls, as it avoids needing to store the pad op directly to set the
    4079              : C<op_targ> field as a side-effect. For example
    4080              : 
    4081              :     o = op_append_elem(OP_LINESEQ, o,
    4082              :         newPADxVOP(OP_PADSV, 0, padix));
    4083              : 
    4084              : =cut
    4085              : */
    4086              : 
    4087              : PERL_STATIC_INLINE OP *
    4088              : Perl_newPADxVOP(pTHX_ I32 type, I32 flags, PADOFFSET padix)
    4089              : {
    4090              :     PERL_ARGS_ASSERT_NEWPADXVOP;
    4091              : 
    4092              :     assert(type == OP_PADSV || type == OP_PADAV || type == OP_PADHV
    4093              :             || type == OP_PADCV);
    4094              :     OP *o = newOP(type, flags);
    4095              :     o->op_targ = padix;
    4096              :     return o;
    4097              : }
    4098              : 
    4099              : /* ------------------ util.h ------------------------------------------- */
    4100              : 
    4101              : /*
    4102              : =for apidoc_section $string
    4103              : 
    4104              : =for apidoc foldEQ
    4105              : 
    4106              : Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
    4107              : same
    4108              : case-insensitively; false otherwise.  Uppercase and lowercase ASCII range bytes
    4109              : match themselves and their opposite case counterparts.  Non-cased and non-ASCII
    4110              : range bytes match only themselves.
    4111              : 
    4112              : =cut
    4113              : */
    4114              : 
    4115              : PERL_STATIC_INLINE I32
    4116              : Perl_foldEQ(pTHX_ const char *s1, const char *s2, I32 len)
    4117              : {
    4118              :     PERL_UNUSED_CONTEXT;
    4119              : 
    4120              :     const U8 *a = (const U8 *)s1;
    4121              :     const U8 *b = (const U8 *)s2;
    4122              : 
    4123              :     PERL_ARGS_ASSERT_FOLDEQ;
    4124              : 
    4125              :     assert(len >= 0);
    4126              : 
    4127              :     while (len--) {
    4128              :         if (*a != *b && *a != PL_fold[*b])
    4129              :             return 0;
    4130              :         a++,b++;
    4131              :     }
    4132              :     return 1;
    4133              : }
    4134              : 
    4135              : PERL_STATIC_INLINE I32
    4136              : Perl_foldEQ_latin1(pTHX_ const char *s1, const char *s2, I32 len)
    4137              : {
    4138              :     /* Compare non-UTF-8 using Unicode (Latin1) semantics.  Works on all folds
    4139              :      * representable without UTF-8, except for LATIN_SMALL_LETTER_SHARP_S, and
    4140              :      * does not check for this.  Nor does it check that the strings each have
    4141              :      * at least 'len' characters. */
    4142              : 
    4143              :     PERL_UNUSED_CONTEXT;
    4144              : 
    4145              :     const U8 *a = (const U8 *)s1;
    4146              :     const U8 *b = (const U8 *)s2;
    4147              : 
    4148              :     PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
    4149              : 
    4150              :     assert(len >= 0);
    4151              : 
    4152              :     while (len--) {
    4153              :         if (*a != *b && *a != PL_fold_latin1[*b]) {
    4154              :             return 0;
    4155              :         }
    4156              :         a++, b++;
    4157              :     }
    4158              :     return 1;
    4159              : }
    4160              : 
    4161              : /*
    4162              : =for apidoc_section $locale
    4163              : =for apidoc foldEQ_locale
    4164              : 
    4165              : Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
    4166              : same case-insensitively in the current locale; false otherwise.
    4167              : 
    4168              : =cut
    4169              : */
    4170              : 
    4171              : PERL_STATIC_INLINE I32
    4172              : Perl_foldEQ_locale(pTHX_ const char *s1, const char *s2, I32 len)
    4173              : {
    4174              :     const U8 *a = (const U8 *)s1;
    4175              :     const U8 *b = (const U8 *)s2;
    4176              : 
    4177              :     PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
    4178              : 
    4179              :     assert(len >= 0);
    4180              : 
    4181              :     while (len--) {
    4182              :         if (*a != *b && *a != PL_fold_locale[*b]) {
    4183              :             DEBUG_Lv(PerlIO_printf(Perl_debug_log,
    4184              :                      "%s:%d: Our records indicate %02x is not a fold of %02x"
    4185              :                      " or its mate %02x\n",
    4186              :                      __FILE__, __LINE__, *a, *b, PL_fold_locale[*b]));
    4187              : 
    4188              :             return 0;
    4189              :         }
    4190              :         a++,b++;
    4191              :     }
    4192              :     return 1;
    4193              : }
    4194              : 
    4195              : /*
    4196              : =for apidoc_section $string
    4197              : =for apidoc my_strnlen
    4198              : 
    4199              : The C library C<strnlen> if available, or a Perl implementation of it.
    4200              : 
    4201              : C<my_strnlen()> computes the length of the string, up to C<maxlen>
    4202              : bytes.  It will never attempt to address more than C<maxlen>
    4203              : bytes, making it suitable for use with strings that are not
    4204              : guaranteed to be NUL-terminated.
    4205              : 
    4206              : =cut
    4207              : 
    4208              : Description stolen from http://man.openbsd.org/strnlen.3,
    4209              : implementation stolen from PostgreSQL.
    4210              : */
    4211              : #ifndef HAS_STRNLEN
    4212              : 
    4213              : PERL_STATIC_INLINE Size_t
    4214              : Perl_my_strnlen(const char *str, Size_t maxlen)
    4215              : {
    4216              :     const char *end = (char *) memchr(str, '\0', maxlen);
    4217              : 
    4218              :     PERL_ARGS_ASSERT_MY_STRNLEN;
    4219              : 
    4220              :     if (end == NULL) return maxlen;
    4221              :     return end - str;
    4222              : }
    4223              : 
    4224              : #endif
    4225              : 
    4226              : #if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) || defined(PERL_EXT))
    4227              : 
    4228              : PERL_STATIC_INLINE void *
    4229              : S_my_memrchr(const char * s, const char c, const STRLEN len)
    4230              : {
    4231              :     /* memrchr(), since many platforms lack it */
    4232              : 
    4233              :     const char * t = s + len - 1;
    4234              : 
    4235              :     PERL_ARGS_ASSERT_MY_MEMRCHR;
    4236              : 
    4237              :     while (t >= s) {
    4238              :         if (*t == c) {
    4239              :             return (void *) t;
    4240              :         }
    4241              :         t--;
    4242              :     }
    4243              : 
    4244              :     return NULL;
    4245              : }
    4246              : 
    4247              : #endif
    4248              : 
    4249              : PERL_STATIC_INLINE char *
    4250              : Perl_mortal_getenv(const char * str)
    4251              : {
    4252              :     /* This implements a (mostly) thread-safe, sequential-call-safe getenv().
    4253              :      *
    4254              :      * It's (mostly) thread-safe because it uses a mutex to prevent other
    4255              :      * threads (that look at this mutex) from destroying the result before this
    4256              :      * routine has a chance to copy the result to a place that won't be
    4257              :      * destroyed before the caller gets a chance to handle it.  That place is a
    4258              :      * mortal SV.  khw chose this over SAVEFREEPV because he is under the
    4259              :      * impression that the SV will hang around longer under more circumstances
    4260              :      *
    4261              :      * The reason it isn't completely thread-safe is that other code could
    4262              :      * simply not pay attention to the mutex.  All of the Perl core uses the
    4263              :      * mutex, but it is possible for code from, say XS, to not use this mutex,
    4264              :      * defeating the safety.
    4265              :      *
    4266              :      * getenv() returns, in some implementations, a pointer to a spot in the
    4267              :      * **environ array, which could be invalidated at any time by this or
    4268              :      * another thread changing the environment.  Other implementations copy the
    4269              :      * **environ value to a static buffer, returning a pointer to that.  That
    4270              :      * buffer might or might not be invalidated by a getenv() call in another
    4271              :      * thread.  If it does get zapped, we need an exclusive lock.  Otherwise,
    4272              :      * many getenv() calls can safely be running simultaneously, so a
    4273              :      * many-reader (but no simultaneous writers) lock is ok.  There is a
    4274              :      * Configure probe to see if another thread destroys the buffer, and the
    4275              :      * mutex is defined accordingly.
    4276              :      *
    4277              :      * But in all cases, using the mutex prevents these problems, as long as
    4278              :      * all code uses the same mutex.
    4279              :      *
    4280              :      * A complication is that this can be called during phases where the
    4281              :      * mortalization process isn't available.  These are in interpreter
    4282              :      * destruction or early in construction.  khw believes that at these times
    4283              :      * there shouldn't be anything else going on, so plain getenv is safe AS
    4284              :      * LONG AS the caller acts on the return before calling it again. */
    4285              : 
    4286              :     char * ret;
    4287              :     dTHX;
    4288              : 
    4289              :     PERL_ARGS_ASSERT_MORTAL_GETENV;
    4290              : 
    4291              :     /* Can't mortalize without stacks.  khw believes that no other threads
    4292              :      * should be running, so no need to lock things, and this may be during a
    4293              :      * phase when locking isn't even available */
    4294              :     if (UNLIKELY(PL_scopestack_ix == 0)) {
    4295              :         return getenv(str);
    4296              :     }
    4297              : 
    4298              : #ifdef PERL_MEM_LOG
    4299              : 
    4300              :     /* A major complication arises under PERL_MEM_LOG.  When that is active,
    4301              :      * every memory allocation may result in logging, depending on the value of
    4302              :      * ENV{PERL_MEM_LOG} at the moment.  That means, as we create the SV for
    4303              :      * saving ENV{foo}'s value (but before saving it), the logging code will
    4304              :      * call us recursively to find out what ENV{PERL_MEM_LOG} is.  Without some
    4305              :      * care that could lead to: 1) infinite recursion; or 2) deadlock (trying to
    4306              :      * lock a boolean mutex recursively); 3) destroying the getenv() static
    4307              :      * buffer; or 4) destroying the temporary created by this for the copy
    4308              :      * causes a log entry to be made which could cause a new temporary to be
    4309              :      * created, which will need to be destroyed at some point, leading to an
    4310              :      * infinite loop.
    4311              :      *
    4312              :      * The solution adopted here (after some gnashing of teeth) is to detect
    4313              :      * the recursive calls and calls from the logger, and treat them specially.
    4314              :      * Let's say we want to do getenv("foo").  We first find
    4315              :      * getenv(PERL_MEM_LOG) and save it to a fixed-length per-interpreter
    4316              :      * variable, so no temporary is required.  Then we do getenv(foo), and in
    4317              :      * the process of creating a temporary to save it, this function will be
    4318              :      * called recursively to do a getenv(PERL_MEM_LOG).  On the recursed call,
    4319              :      * we detect that it is such a call and return our saved value instead of
    4320              :      * locking and doing a new getenv().  This solves all of problems 1), 2),
    4321              :      * and 3).  Because all the getenv()s are done while the mutex is locked,
    4322              :      * the state cannot have changed.  To solve 4), we don't create a temporary
    4323              :      * when this is called from the logging code.  That code disposes of the
    4324              :      * return value while the mutex is still locked.
    4325              :      *
    4326              :      * The value of getenv(PERL_MEM_LOG) can be anything, but only initial
    4327              :      * digits and 3 particular letters are significant; the rest are ignored by
    4328              :      * the memory logging code.  Thus the per-interpreter variable only needs
    4329              :      * to be large enough to save the significant information, the size of
    4330              :      * which is known at compile time.  The first byte is extra, reserved for
    4331              :      * flags for our use.  To protect against overflowing, only the reserved
    4332              :      * byte, as many digits as don't overflow, and the three letters are
    4333              :      * stored.
    4334              :      *
    4335              :      * The reserved byte has two bits:
    4336              :      *      0x1 if set indicates that if we get here, it is a recursive call of
    4337              :      *          getenv()
    4338              :      *      0x2 if set indicates that the call is from the logging code.
    4339              :      *
    4340              :      * If the flag indicates this is a recursive call, just return the stored
    4341              :      * value of PL_mem_log;  An empty value gets turned into NULL. */
    4342              :     if (strEQ(str, "PERL_MEM_LOG") && PL_mem_log[0] & 0x1) {
    4343              :         if (PL_mem_log[1] == '\0') {
    4344              :             return NULL;
    4345              :         } else {
    4346              :             return PL_mem_log + 1;
    4347              :         }
    4348              :     }
    4349              : 
    4350              : #endif
    4351              : 
    4352              :     GETENV_LOCK;
    4353              : 
    4354              : #ifdef PERL_MEM_LOG
    4355              : 
    4356              :     /* Here we are in a critical section.  As explained above, we do our own
    4357              :      * getenv(PERL_MEM_LOG), saving the result safely. */
    4358              :     ret = getenv("PERL_MEM_LOG");
    4359              :     if (ret == NULL) {  /* No logging active */
    4360              : 
    4361              :         /* Return that immediately if called from the logging code */
    4362              :         if (PL_mem_log[0] & 0x2) {
    4363              :             GETENV_UNLOCK;
    4364              :             return NULL;
    4365              :         }
    4366              : 
    4367              :         PL_mem_log[1] = '\0';
    4368              :     }
    4369              :     else {
    4370              :         char *mem_log_meat = PL_mem_log + 1;    /* first byte reserved */
    4371              : 
    4372              :         /* There is nothing to prevent the value of PERL_MEM_LOG from being an
    4373              :          * extremely long string.  But we want only a few characters from it.
    4374              :          * PL_mem_log has been made large enough to hold just the ones we need.
    4375              :          * First the file descriptor. */
    4376              :         if (isDIGIT(*ret)) {
    4377              :             const char * s = ret;
    4378              :             if (UNLIKELY(*s == '0')) {
    4379              : 
    4380              :                 /* Reduce multiple leading zeros to a single one.  This is to
    4381              :                  * allow the caller to change what to do with leading zeros. */
    4382              :                 *mem_log_meat++ = '0';
    4383              :                 s++;
    4384              :                 while (*s == '0') {
    4385              :                     s++;
    4386              :                 }
    4387              :             }
    4388              : 
    4389              :             /* If the input overflows, copy just enough for the result to also
    4390              :              * overflow, plus 1 to make sure */
    4391              :             while (isDIGIT(*s) && s < ret + TYPE_DIGITS(UV) + 1) {
    4392              :                 *mem_log_meat++ = *s++;
    4393              :             }
    4394              :         }
    4395              : 
    4396              :         /* Then each of the four significant characters */
    4397              :         if (strchr(ret, 'm')) {
    4398              :             *mem_log_meat++ = 'm';
    4399              :         }
    4400              :         if (strchr(ret, 's')) {
    4401              :             *mem_log_meat++ = 's';
    4402              :         }
    4403              :         if (strchr(ret, 't')) {
    4404              :             *mem_log_meat++ = 't';
    4405              :         }
    4406              :         if (strchr(ret, 'c')) {
    4407              :             *mem_log_meat++ = 'c';
    4408              :         }
    4409              :         *mem_log_meat = '\0';
    4410              : 
    4411              :         assert(mem_log_meat < PL_mem_log + sizeof(PL_mem_log));
    4412              :     }
    4413              : 
    4414              :     /* If we are being called from the logger, it only needs the significant
    4415              :      * portion of PERL_MEM_LOG, and doesn't need a safe copy */
    4416              :     if (PL_mem_log[0] & 0x2) {
    4417              :         assert(strEQ(str, "PERL_MEM_LOG"));
    4418              :         GETENV_UNLOCK;
    4419              :         return PL_mem_log + 1;
    4420              :     }
    4421              : 
    4422              :     /* Here is a generic getenv().  This could be a getenv("PERL_MEM_LOG") that
    4423              :      * is coming from other than the logging code, so it should be treated the
    4424              :      * same as any other getenv(), returning the full value, not just the
    4425              :      * significant part, and having its value saved.  Set the flag that
    4426              :      * indicates any call to this routine will be a recursion from here */
    4427              :     PL_mem_log[0] = 0x1;
    4428              : 
    4429              : #endif
    4430              : 
    4431              :     /* Now get the value of the real desired variable, and save a copy */
    4432              :     ret = getenv(str);
    4433              : 
    4434              :     if (ret != NULL) {
    4435              :         ret = SvPVX( newSVpvn_flags(ret, strlen(ret) ,SVs_TEMP) );
    4436              :     }
    4437              : 
    4438              :     GETENV_UNLOCK;
    4439              : 
    4440              : #ifdef PERL_MEM_LOG
    4441              : 
    4442              :     /* Clear the buffer */
    4443              :     Zero(PL_mem_log, sizeof(PL_mem_log), char);
    4444              : 
    4445              : #endif
    4446              : 
    4447              :     return ret;
    4448              : }
    4449              : 
    4450              : PERL_STATIC_INLINE bool
    4451              : Perl_sv_isbool(pTHX_ const SV *sv)
    4452              : {
    4453              :     PERL_UNUSED_CONTEXT;
    4454              :     return SvBoolFlagsOK(sv) && BOOL_INTERNALS_sv_isbool(sv);
    4455              : }
    4456              : 
    4457              : #ifdef USE_ITHREADS
    4458              : 
    4459              : PERL_STATIC_INLINE AV *
    4460              : Perl_cop_file_avn(pTHX_ const COP *cop) {
    4461              : 
    4462              :     PERL_ARGS_ASSERT_COP_FILE_AVN;
    4463              : 
    4464              :     const char *file = CopFILE(cop);
    4465              :     if (file) {
    4466              :         GV *gv = gv_fetchfile_flags(file, strlen(file), GVF_NOADD);
    4467              :         if (gv) {
    4468              :             return GvAVn(gv);
    4469              :         }
    4470              :         else
    4471              :             return NULL;
    4472              :      }
    4473              :      else
    4474              :          return NULL;
    4475              : }
    4476              : 
    4477              : #endif
    4478              : 
    4479              : PERL_STATIC_INLINE PADNAME *
    4480              : Perl_padname_refcnt_inc(PADNAME *pn)
    4481              : {
    4482              :     PadnameREFCNT(pn)++;
    4483              :     return pn;
    4484              : }
    4485              : 
    4486              : PERL_STATIC_INLINE PADNAMELIST *
    4487              : Perl_padnamelist_refcnt_inc(PADNAMELIST *pnl)
    4488              : {
    4489              :     PadnamelistREFCNT(pnl)++;
    4490              :     return pnl;
    4491              : }
    4492              : 
    4493              : /* copy a string to a safe spot */
    4494              : 
    4495              : /*
    4496              : =for apidoc_section $string
    4497              : =for apidoc savepv
    4498              : 
    4499              : Perl's version of C<strdup()>.  Returns a pointer to a newly allocated
    4500              : string which is a duplicate of C<pv>.  The size of the string is
    4501              : determined by C<strlen()>, which means it may not contain embedded C<NUL>
    4502              : characters and must have a trailing C<NUL>.  To prevent memory leaks, the
    4503              : memory allocated for the new string needs to be freed when no longer needed.
    4504              : This can be done with the C<L</Safefree>> function, or
    4505              : L<C<SAVEFREEPV>|perlguts/SAVEFREEPV(p)>.
    4506              : 
    4507              : On some platforms, Windows for example, all allocated memory owned by a thread
    4508              : is deallocated when that thread ends.  So if you need that not to happen, you
    4509              : need to use the shared memory functions, such as C<L</savesharedpv>>.
    4510              : 
    4511              : =cut
    4512              : */
    4513              : 
    4514              : PERL_STATIC_INLINE char *
    4515              : Perl_savepv(pTHX_ const char *pv)
    4516              : {
    4517              :     PERL_UNUSED_CONTEXT;
    4518              :     if (!pv)
    4519              :         return NULL;
    4520              :     else {
    4521              :         char *newaddr;
    4522              :         const STRLEN pvlen = strlen(pv)+1;
    4523              :         Newx(newaddr, pvlen, char);
    4524              :         return (char*)memcpy(newaddr, pv, pvlen);
    4525              :     }
    4526              : }
    4527              : 
    4528              : /* same thing but with a known length */
    4529              : 
    4530              : /*
    4531              : =for apidoc savepvn
    4532              : 
    4533              : Perl's version of what C<strndup()> would be if it existed.  Returns a
    4534              : pointer to a newly allocated string which is a duplicate of the first
    4535              : C<len> bytes from C<pv>, plus a trailing
    4536              : C<NUL> byte.  The memory allocated for
    4537              : the new string can be freed with the C<Safefree()> function.
    4538              : 
    4539              : On some platforms, Windows for example, all allocated memory owned by a thread
    4540              : is deallocated when that thread ends.  So if you need that not to happen, you
    4541              : need to use the shared memory functions, such as C<L</savesharedpvn>>.
    4542              : 
    4543              : =cut
    4544              : */
    4545              : 
    4546              : PERL_STATIC_INLINE char *
    4547              : Perl_savepvn(pTHX_ const char *pv, Size_t len)
    4548              : {
    4549              :     char *newaddr;
    4550              :     PERL_UNUSED_CONTEXT;
    4551              : 
    4552              :     Newx(newaddr,len+1,char);
    4553              :     /* Give a meaning to NULL pointer mainly for the use in sv_magic() */
    4554              :     if (pv) {
    4555              :         /* might not be null terminated */
    4556              :         newaddr[len] = '\0';
    4557              :         return (char *) CopyD(pv,newaddr,len,char);
    4558              :     }
    4559              :     else {
    4560              :         return (char *) ZeroD(newaddr,len+1,char);
    4561              :     }
    4562              : }
    4563              : 
    4564              : /*
    4565              : =for apidoc savesvpv
    4566              : 
    4567              : A version of C<savepv()>/C<savepvn()> which gets the string to duplicate from
    4568              : the passed in SV using C<SvPV()>
    4569              : 
    4570              : On some platforms, Windows for example, all allocated memory owned by a thread
    4571              : is deallocated when that thread ends.  So if you need that not to happen, you
    4572              : need to use the shared memory functions, such as C<L</savesharedsvpv>>.
    4573              : 
    4574              : =cut
    4575              : */
    4576              : 
    4577              : PERL_STATIC_INLINE char *
    4578              : Perl_savesvpv(pTHX_ SV *sv)
    4579              : {
    4580              :     STRLEN len;
    4581              :     const char * const pv = SvPV_const(sv, len);
    4582              :     char *newaddr;
    4583              : 
    4584              :     PERL_ARGS_ASSERT_SAVESVPV;
    4585              : 
    4586              :     ++len;
    4587              :     Newx(newaddr,len,char);
    4588              :     return (char *) CopyD(pv,newaddr,len,char);
    4589              : }
    4590              : 
    4591              : /*
    4592              : =for apidoc savesharedsvpv
    4593              : 
    4594              : A version of C<savesharedpv()> which allocates the duplicate string in
    4595              : memory which is shared between threads.
    4596              : 
    4597              : =cut
    4598              : */
    4599              : 
    4600              : PERL_STATIC_INLINE char *
    4601              : Perl_savesharedsvpv(pTHX_ SV *sv)
    4602              : {
    4603              :     STRLEN len;
    4604              :     const char * const pv = SvPV_const(sv, len);
    4605              : 
    4606              :     PERL_ARGS_ASSERT_SAVESHAREDSVPV;
    4607              : 
    4608              :     return savesharedpvn(pv, len);
    4609              : }
    4610              : 
    4611              : #ifndef PERL_GET_CONTEXT_DEFINED
    4612              : 
    4613              : /*
    4614              : =for apidoc_section $embedding
    4615              : =for apidoc get_context
    4616              : 
    4617              : Implements L<perlapi/C<PERL_GET_CONTEXT>>, which you should use instead.
    4618              : 
    4619              : =cut
    4620              : */
    4621              : 
    4622              : PERL_STATIC_INLINE void *
    4623              : Perl_get_context(void)
    4624              : {
    4625              : #  if defined(USE_ITHREADS)
    4626              : #    ifdef OLD_PTHREADS_API
    4627              :     pthread_addr_t t;
    4628              :     int error = pthread_getspecific(PL_thr_key, &t);
    4629              :     if (error)
    4630              :         Perl_croak_nocontext("panic: pthread_getspecific, error=%d", error);
    4631              :     return (void*)t;
    4632              : #    elif defined(I_MACH_CTHREADS)
    4633              :     return (void*)cthread_data(cthread_self());
    4634              : #    else
    4635              :     return (void*)PTHREAD_GETSPECIFIC(PL_thr_key);
    4636              : #    endif
    4637              : #  else
    4638              :     return (void*)NULL;
    4639              : #  endif
    4640              : }
    4641              : 
    4642              : #endif
    4643              : 
    4644              : PERL_STATIC_INLINE MGVTBL*
    4645              : Perl_get_vtbl(pTHX_ int vtbl_id)
    4646              : {
    4647              :     PERL_UNUSED_CONTEXT;
    4648              : 
    4649              :     return (vtbl_id < 0 || vtbl_id >= magic_vtable_max)
    4650              :         ? NULL : (MGVTBL*)PL_magic_vtables + vtbl_id;
    4651              : }
    4652              : 
    4653              : /*
    4654              : =for apidoc my_strlcat
    4655              : 
    4656              : The C library C<strlcat> if available, or a Perl implementation of it.
    4657              : This operates on C C<NUL>-terminated strings.
    4658              : 
    4659              : C<my_strlcat()> appends string C<src> to the end of C<dst>.  It will append at
    4660              : most S<C<size - strlen(dst) - 1>> bytes.  It will then C<NUL>-terminate,
    4661              : unless C<size> is 0 or the original C<dst> string was longer than C<size> (in
    4662              : practice this should not happen as it means that either C<size> is incorrect or
    4663              : that C<dst> is not a proper C<NUL>-terminated string).
    4664              : 
    4665              : Note that C<size> is the full size of the destination buffer and
    4666              : the result is guaranteed to be C<NUL>-terminated if there is room.  Note that
    4667              : room for the C<NUL> should be included in C<size>.
    4668              : 
    4669              : The return value is the total length that C<dst> would have if C<size> is
    4670              : sufficiently large.  Thus it is the initial length of C<dst> plus the length of
    4671              : C<src>.  If C<size> is smaller than the return, the excess was not appended.
    4672              : 
    4673              : =cut
    4674              : 
    4675              : Description stolen from http://man.openbsd.org/strlcat.3
    4676              : */
    4677              : #ifndef HAS_STRLCAT
    4678              : PERL_STATIC_INLINE Size_t
    4679              : Perl_my_strlcat(char *dst, const char *src, Size_t size)
    4680              : {
    4681              :     Size_t used, length, copy;
    4682              : 
    4683              :     used = strlen(dst);
    4684              :     length = strlen(src);
    4685              :     if (size > 0 && used < size - 1) {
    4686              :         copy = (length >= size - used) ? size - used - 1 : length;
    4687              :         memcpy(dst + used, src, copy);
    4688              :         dst[used + copy] = '\0';
    4689              :     }
    4690              :     return used + length;
    4691              : }
    4692              : #endif
    4693              : 
    4694              : 
    4695              : /*
    4696              : =for apidoc my_strlcpy
    4697              : 
    4698              : The C library C<strlcpy> if available, or a Perl implementation of it.
    4699              : This operates on C C<NUL>-terminated strings.
    4700              : 
    4701              : C<my_strlcpy()> copies up to S<C<size - 1>> bytes from the string C<src>
    4702              : to C<dst>, C<NUL>-terminating the result if C<size> is not 0.
    4703              : 
    4704              : The return value is the total length C<src> would be if the copy completely
    4705              : succeeded.  If it is larger than C<size>, the excess was not copied.
    4706              : 
    4707              : =cut
    4708              : 
    4709              : Description stolen from http://man.openbsd.org/strlcpy.3
    4710              : */
    4711              : #ifndef HAS_STRLCPY
    4712              : PERL_STATIC_INLINE Size_t
    4713              : Perl_my_strlcpy(char *dst, const char *src, Size_t size)
    4714              : {
    4715              :     Size_t length, copy;
    4716              : 
    4717              :     length = strlen(src);
    4718              :     if (size > 0) {
    4719              :         copy = (length >= size) ? size - 1 : length;
    4720              :         memcpy(dst, src, copy);
    4721              :         dst[copy] = '\0';
    4722              :     }
    4723              :     return length;
    4724              : }
    4725              : #endif
    4726              : 
    4727              : /*
    4728              :  * ex: set ts=8 sts=4 sw=4 et:
    4729              :  */

Generated by: LCOV version 2.0-1