LCOV - code coverage report
Current view: top level - contrib/fuzzystrmatch - dmetaphone.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 111 623 17.8 %
Date: 2019-11-21 13:06:38 Functions: 11 15 73.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * This is a port of the Double Metaphone algorithm for use in PostgreSQL.
       3             :  *
       4             :  * contrib/fuzzystrmatch/dmetaphone.c
       5             :  *
       6             :  * Double Metaphone computes 2 "sounds like" strings - a primary and an
       7             :  * alternate. In most cases they are the same, but for foreign names
       8             :  * especially they can be a bit different, depending on pronunciation.
       9             :  *
      10             :  * Information on using Double Metaphone can be found at
      11             :  *   http://www.codeproject.com/string/dmetaphone1.asp
      12             :  * and the original article describing it can be found at
      13             :  *   http://drdobbs.com/184401251
      14             :  *
      15             :  * For PostgreSQL we provide 2 functions - one for the primary and one for
      16             :  * the alternate. That way the functions are pure text->text mappings that
      17             :  * are useful in functional indexes. These are 'dmetaphone' for the
      18             :  * primary and 'dmetaphone_alt' for the alternate.
      19             :  *
      20             :  * Assuming that dmetaphone.so is in $libdir, the SQL to set up the
      21             :  * functions looks like this:
      22             :  *
      23             :  * CREATE FUNCTION dmetaphone (text) RETURNS text
      24             :  *    LANGUAGE C IMMUTABLE STRICT
      25             :  *    AS '$libdir/dmetaphone', 'dmetaphone';
      26             :  *
      27             :  * CREATE FUNCTION dmetaphone_alt (text) RETURNS text
      28             :  *    LANGUAGE C IMMUTABLE STRICT
      29             :  *    AS '$libdir/dmetaphone', 'dmetaphone_alt';
      30             :  *
      31             :  * Note that you have to declare the functions IMMUTABLE if you want to
      32             :  * use them in functional indexes, and you have to declare them as STRICT
      33             :  * as they do not check for NULL input, and will segfault if given NULL input.
      34             :  * (See below for alternative ) Declaring them as STRICT means PostgreSQL
      35             :  * will never call them with NULL, but instead assume the result is NULL,
      36             :  * which is what we (I) want.
      37             :  *
      38             :  * Alternatively, compile with -DDMETAPHONE_NOSTRICT and the functions
      39             :  * will detect NULL input and return NULL. The you don't have to declare them
      40             :  * as STRICT.
      41             :  *
      42             :  * There is a small inefficiency here - each function call actually computes
      43             :  * both the primary and the alternate and then throws away the one it doesn't
      44             :  * need. That's the way the perl module was written, because perl can handle
      45             :  * a list return more easily than we can in PostgreSQL. The result has been
      46             :  * fast enough for my needs, but it could maybe be optimized a bit to remove
      47             :  * that behaviour.
      48             :  *
      49             :  */
      50             : 
      51             : 
      52             : /***************************** COPYRIGHT NOTICES ***********************
      53             : 
      54             : Most of this code is directly from the Text::DoubleMetaphone perl module
      55             : version 0.05 available from https://www.cpan.org/.
      56             : It bears this copyright notice:
      57             : 
      58             : 
      59             :   Copyright 2000, Maurice Aubrey <maurice@hevanet.com>.
      60             :   All rights reserved.
      61             : 
      62             :   This code is based heavily on the C++ implementation by
      63             :   Lawrence Philips and incorporates several bug fixes courtesy
      64             :   of Kevin Atkinson <kevina@users.sourceforge.net>.
      65             : 
      66             :   This module is free software; you may redistribute it and/or
      67             :   modify it under the same terms as Perl itself.
      68             : 
      69             : The remaining code is authored by Andrew Dunstan <amdunstan@ncshp.org> and
      70             : <andrew@dunslane.net> and is covered this copyright:
      71             : 
      72             :   Copyright 2003, North Carolina State Highway Patrol.
      73             :   All rights reserved.
      74             : 
      75             :   Permission to use, copy, modify, and distribute this software and its
      76             :   documentation for any purpose, without fee, and without a written agreement
      77             :   is hereby granted, provided that the above copyright notice and this
      78             :   paragraph and the following two paragraphs appear in all copies.
      79             : 
      80             :   IN NO EVENT SHALL THE NORTH CAROLINA STATE HIGHWAY PATROL BE LIABLE TO ANY
      81             :   PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
      82             :   INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
      83             :   DOCUMENTATION, EVEN IF THE NORTH CAROLINA STATE HIGHWAY PATROL HAS BEEN
      84             :   ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      85             : 
      86             :   THE NORTH CAROLINA STATE HIGHWAY PATROL SPECIFICALLY DISCLAIMS ANY
      87             :   WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
      88             :   MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED
      89             :   HEREUNDER IS ON AN "AS IS" BASIS, AND THE NORTH CAROLINA STATE HIGHWAY PATROL
      90             :   HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
      91             :   MODIFICATIONS.
      92             : 
      93             : ***********************************************************************/
      94             : 
      95             : 
      96             : /* include these first, according to the docs */
      97             : #ifndef DMETAPHONE_MAIN
      98             : 
      99             : #include "postgres.h"
     100             : 
     101             : #include "utils/builtins.h"
     102             : 
     103             : /* turn off assertions for embedded function */
     104             : #define NDEBUG
     105             : 
     106             : #else                           /* DMETAPHONE_MAIN */
     107             : 
     108             : /* we need these if we didn't get them from postgres.h */
     109             : #include <stdio.h>
     110             : #include <stdlib.h>
     111             : #include <string.h>
     112             : #include <stdarg.h>
     113             : 
     114             : #endif                          /* DMETAPHONE_MAIN */
     115             : 
     116             : #include <assert.h>
     117             : #include <ctype.h>
     118             : 
     119             : /* prototype for the main function we got from the perl module */
     120             : static void DoubleMetaphone(char *, char **);
     121             : 
     122             : #ifndef DMETAPHONE_MAIN
     123             : 
     124             : /*
     125             :  * The PostgreSQL visible dmetaphone function.
     126             :  */
     127             : 
     128           4 : PG_FUNCTION_INFO_V1(dmetaphone);
     129             : 
     130             : Datum
     131           2 : dmetaphone(PG_FUNCTION_ARGS)
     132             : {
     133             :     text       *arg;
     134             :     char       *aptr,
     135             :                *codes[2],
     136             :                *code;
     137             : 
     138             : #ifdef DMETAPHONE_NOSTRICT
     139             :     if (PG_ARGISNULL(0))
     140             :         PG_RETURN_NULL();
     141             : #endif
     142           2 :     arg = PG_GETARG_TEXT_PP(0);
     143           2 :     aptr = text_to_cstring(arg);
     144             : 
     145           2 :     DoubleMetaphone(aptr, codes);
     146           2 :     code = codes[0];
     147           2 :     if (!code)
     148           0 :         code = "";
     149             : 
     150           2 :     PG_RETURN_TEXT_P(cstring_to_text(code));
     151             : }
     152             : 
     153             : /*
     154             :  * The PostgreSQL visible dmetaphone_alt function.
     155             :  */
     156             : 
     157           4 : PG_FUNCTION_INFO_V1(dmetaphone_alt);
     158             : 
     159             : Datum
     160           2 : dmetaphone_alt(PG_FUNCTION_ARGS)
     161             : {
     162             :     text       *arg;
     163             :     char       *aptr,
     164             :                *codes[2],
     165             :                *code;
     166             : 
     167             : #ifdef DMETAPHONE_NOSTRICT
     168             :     if (PG_ARGISNULL(0))
     169             :         PG_RETURN_NULL();
     170             : #endif
     171           2 :     arg = PG_GETARG_TEXT_PP(0);
     172           2 :     aptr = text_to_cstring(arg);
     173             : 
     174           2 :     DoubleMetaphone(aptr, codes);
     175           2 :     code = codes[1];
     176           2 :     if (!code)
     177           0 :         code = "";
     178             : 
     179           2 :     PG_RETURN_TEXT_P(cstring_to_text(code));
     180             : }
     181             : 
     182             : 
     183             : /* here is where we start the code imported from the perl module */
     184             : 
     185             : /* all memory handling is done with these macros */
     186             : 
     187             : #define META_MALLOC(v,n,t) \
     188             :           (v = (t*)palloc(((n)*sizeof(t))))
     189             : 
     190             : #define META_REALLOC(v,n,t) \
     191             :                       (v = (t*)repalloc((v),((n)*sizeof(t))))
     192             : 
     193             : /*
     194             :  * Don't do pfree - it seems to cause a SIGSEGV sometimes - which might have just
     195             :  * been caused by reloading the module in development.
     196             :  * So we rely on context cleanup - Tom Lane says pfree shouldn't be necessary
     197             :  * in a case like this.
     198             :  */
     199             : 
     200             : #define META_FREE(x) ((void)true)   /* pfree((x)) */
     201             : #else                           /* not defined DMETAPHONE_MAIN */
     202             : 
     203             : /* use the standard malloc library when not running in PostgreSQL */
     204             : 
     205             : #define META_MALLOC(v,n,t) \
     206             :           (v = (t*)malloc(((n)*sizeof(t))))
     207             : 
     208             : #define META_REALLOC(v,n,t) \
     209             :                       (v = (t*)realloc((v),((n)*sizeof(t))))
     210             : 
     211             : #define META_FREE(x) free((x))
     212             : #endif                          /* defined DMETAPHONE_MAIN */
     213             : 
     214             : 
     215             : 
     216             : /* this typedef was originally in the perl module's .h file */
     217             : 
     218             : typedef struct
     219             : {
     220             :     char       *str;
     221             :     int         length;
     222             :     int         bufsize;
     223             :     int         free_string_on_destroy;
     224             : }
     225             : 
     226             : metastring;
     227             : 
     228             : /*
     229             :  * remaining perl module funcs unchanged except for declaring them static
     230             :  * and reformatting to PostgreSQL indentation and to fit in 80 cols.
     231             :  *
     232             :  */
     233             : 
     234             : static metastring *
     235          12 : NewMetaString(const char *init_str)
     236             : {
     237             :     metastring *s;
     238          12 :     char        empty_string[] = "";
     239             : 
     240          12 :     META_MALLOC(s, 1, metastring);
     241             :     assert(s != NULL);
     242             : 
     243          12 :     if (init_str == NULL)
     244           0 :         init_str = empty_string;
     245          12 :     s->length = strlen(init_str);
     246             :     /* preallocate a bit more for potential growth */
     247          12 :     s->bufsize = s->length + 7;
     248             : 
     249          12 :     META_MALLOC(s->str, s->bufsize, char);
     250             :     assert(s->str != NULL);
     251             : 
     252          12 :     memcpy(s->str, init_str, s->length + 1);
     253          12 :     s->free_string_on_destroy = 1;
     254             : 
     255          12 :     return s;
     256             : }
     257             : 
     258             : 
     259             : static void
     260          12 : DestroyMetaString(metastring *s)
     261             : {
     262          12 :     if (s == NULL)
     263           0 :         return;
     264             : 
     265          12 :     if (s->free_string_on_destroy && (s->str != NULL))
     266             :         META_FREE(s->str);
     267             : 
     268             :     META_FREE(s);
     269             : }
     270             : 
     271             : 
     272             : static void
     273           0 : IncreaseBuffer(metastring *s, int chars_needed)
     274             : {
     275           0 :     META_REALLOC(s->str, (s->bufsize + chars_needed + 10), char);
     276             :     assert(s->str != NULL);
     277           0 :     s->bufsize = s->bufsize + chars_needed + 10;
     278           0 : }
     279             : 
     280             : 
     281             : static void
     282           4 : MakeUpper(metastring *s)
     283             : {
     284             :     char       *i;
     285             : 
     286          44 :     for (i = s->str; *i; i++)
     287          40 :         *i = toupper((unsigned char) *i);
     288           4 : }
     289             : 
     290             : 
     291             : static int
     292           0 : IsVowel(metastring *s, int pos)
     293             : {
     294             :     char        c;
     295             : 
     296           0 :     if ((pos < 0) || (pos >= s->length))
     297           0 :         return 0;
     298             : 
     299           0 :     c = *(s->str + pos);
     300           0 :     if ((c == 'A') || (c == 'E') || (c == 'I') || (c == 'O') ||
     301           0 :         (c == 'U') || (c == 'Y'))
     302           0 :         return 1;
     303             : 
     304           0 :     return 0;
     305             : }
     306             : 
     307             : 
     308             : static int
     309           0 : SlavoGermanic(metastring *s)
     310             : {
     311           0 :     if ((char *) strstr(s->str, "W"))
     312           0 :         return 1;
     313           0 :     else if ((char *) strstr(s->str, "K"))
     314           0 :         return 1;
     315           0 :     else if ((char *) strstr(s->str, "CZ"))
     316           0 :         return 1;
     317           0 :     else if ((char *) strstr(s->str, "WITZ"))
     318           0 :         return 1;
     319             :     else
     320           0 :         return 0;
     321             : }
     322             : 
     323             : 
     324             : static char
     325          52 : GetAt(metastring *s, int pos)
     326             : {
     327          52 :     if ((pos < 0) || (pos >= s->length))
     328           0 :         return '\0';
     329             : 
     330          52 :     return ((char) *(s->str + pos));
     331             : }
     332             : 
     333             : 
     334             : static void
     335           0 : SetAt(metastring *s, int pos, char c)
     336             : {
     337           0 :     if ((pos < 0) || (pos >= s->length))
     338           0 :         return;
     339             : 
     340           0 :     *(s->str + pos) = c;
     341             : }
     342             : 
     343             : 
     344             : /*
     345             :    Caveats: the START value is 0 based
     346             : */
     347             : static int
     348          32 : StringAt(metastring *s, int start, int length,...)
     349             : {
     350             :     char       *test;
     351             :     char       *pos;
     352             :     va_list     ap;
     353             : 
     354          32 :     if ((start < 0) || (start >= s->length))
     355           4 :         return 0;
     356             : 
     357          28 :     pos = (s->str + start);
     358          28 :     va_start(ap, length);
     359             : 
     360             :     do
     361             :     {
     362         116 :         test = va_arg(ap, char *);
     363         116 :         if (*test && (strncmp(pos, test, length) == 0))
     364             :         {
     365           4 :             va_end(ap);
     366           4 :             return 1;
     367             :         }
     368             :     }
     369         112 :     while (strcmp(test, "") != 0);
     370             : 
     371          24 :     va_end(ap);
     372             : 
     373          24 :     return 0;
     374             : }
     375             : 
     376             : 
     377             : static void
     378          28 : MetaphAdd(metastring *s, const char *new_str)
     379             : {
     380             :     int         add_length;
     381             : 
     382          28 :     if (new_str == NULL)
     383           0 :         return;
     384             : 
     385          28 :     add_length = strlen(new_str);
     386          28 :     if ((s->length + add_length) > (s->bufsize - 1))
     387           0 :         IncreaseBuffer(s, add_length);
     388             : 
     389          28 :     strcat(s->str, new_str);
     390          28 :     s->length += add_length;
     391             : }
     392             : 
     393             : 
     394             : static void
     395           4 : DoubleMetaphone(char *str, char **codes)
     396             : {
     397             :     int         length;
     398             :     metastring *original;
     399             :     metastring *primary;
     400             :     metastring *secondary;
     401             :     int         current;
     402             :     int         last;
     403             : 
     404           4 :     current = 0;
     405             :     /* we need the real length and last prior to padding */
     406           4 :     length = strlen(str);
     407           4 :     last = length - 1;
     408           4 :     original = NewMetaString(str);
     409             :     /* Pad original so we can index beyond end */
     410           4 :     MetaphAdd(original, "     ");
     411             : 
     412           4 :     primary = NewMetaString("");
     413           4 :     secondary = NewMetaString("");
     414           4 :     primary->free_string_on_destroy = 0;
     415           4 :     secondary->free_string_on_destroy = 0;
     416             : 
     417           4 :     MakeUpper(original);
     418             : 
     419             :     /* skip these when at start of word */
     420           4 :     if (StringAt(original, 0, 2, "GN", "KN", "PN", "WR", "PS", ""))
     421           0 :         current += 1;
     422             : 
     423             :     /* Initial 'X' is pronounced 'Z' e.g. 'Xavier' */
     424           4 :     if (GetAt(original, 0) == 'X')
     425             :     {
     426           0 :         MetaphAdd(primary, "S");  /* 'Z' maps to 'S' */
     427           0 :         MetaphAdd(secondary, "S");
     428           0 :         current += 1;
     429             :     }
     430             : 
     431             :     /* main loop */
     432          28 :     while ((primary->length < 4) || (secondary->length < 4))
     433             :     {
     434          24 :         if (current >= length)
     435           4 :             break;
     436             : 
     437          20 :         switch (GetAt(original, current))
     438             :         {
     439             :             case 'A':
     440             :             case 'E':
     441             :             case 'I':
     442             :             case 'O':
     443             :             case 'U':
     444             :             case 'Y':
     445           8 :                 if (current == 0)
     446             :                 {
     447             :                     /* all init vowels now map to 'A' */
     448           0 :                     MetaphAdd(primary, "A");
     449           0 :                     MetaphAdd(secondary, "A");
     450             :                 }
     451           8 :                 current += 1;
     452           8 :                 break;
     453             : 
     454             :             case 'B':
     455             : 
     456             :                 /* "-mb", e.g", "dumb", already skipped over... */
     457           4 :                 MetaphAdd(primary, "P");
     458           4 :                 MetaphAdd(secondary, "P");
     459             : 
     460           4 :                 if (GetAt(original, current + 1) == 'B')
     461           0 :                     current += 2;
     462             :                 else
     463           4 :                     current += 1;
     464           4 :                 break;
     465             : 
     466             :             case '\xc7':        /* C with cedilla */
     467           0 :                 MetaphAdd(primary, "S");
     468           0 :                 MetaphAdd(secondary, "S");
     469           0 :                 current += 1;
     470           0 :                 break;
     471             : 
     472             :             case 'C':
     473             :                 /* various germanic */
     474           0 :                 if ((current > 1)
     475           0 :                     && !IsVowel(original, current - 2)
     476           0 :                     && StringAt(original, (current - 1), 3, "ACH", "")
     477           0 :                     && ((GetAt(original, current + 2) != 'I')
     478           0 :                         && ((GetAt(original, current + 2) != 'E')
     479           0 :                             || StringAt(original, (current - 2), 6, "BACHER",
     480             :                                         "MACHER", ""))))
     481             :                 {
     482           0 :                     MetaphAdd(primary, "K");
     483           0 :                     MetaphAdd(secondary, "K");
     484           0 :                     current += 2;
     485           0 :                     break;
     486             :                 }
     487             : 
     488             :                 /* special case 'caesar' */
     489           0 :                 if ((current == 0)
     490           0 :                     && StringAt(original, current, 6, "CAESAR", ""))
     491             :                 {
     492           0 :                     MetaphAdd(primary, "S");
     493           0 :                     MetaphAdd(secondary, "S");
     494           0 :                     current += 2;
     495           0 :                     break;
     496             :                 }
     497             : 
     498             :                 /* italian 'chianti' */
     499           0 :                 if (StringAt(original, current, 4, "CHIA", ""))
     500             :                 {
     501           0 :                     MetaphAdd(primary, "K");
     502           0 :                     MetaphAdd(secondary, "K");
     503           0 :                     current += 2;
     504           0 :                     break;
     505             :                 }
     506             : 
     507           0 :                 if (StringAt(original, current, 2, "CH", ""))
     508             :                 {
     509             :                     /* find 'michael' */
     510           0 :                     if ((current > 0)
     511           0 :                         && StringAt(original, current, 4, "CHAE", ""))
     512             :                     {
     513           0 :                         MetaphAdd(primary, "K");
     514           0 :                         MetaphAdd(secondary, "X");
     515           0 :                         current += 2;
     516           0 :                         break;
     517             :                     }
     518             : 
     519             :                     /* greek roots e.g. 'chemistry', 'chorus' */
     520           0 :                     if ((current == 0)
     521           0 :                         && (StringAt(original, (current + 1), 5,
     522             :                                      "HARAC", "HARIS", "")
     523           0 :                             || StringAt(original, (current + 1), 3, "HOR",
     524             :                                         "HYM", "HIA", "HEM", ""))
     525           0 :                         && !StringAt(original, 0, 5, "CHORE", ""))
     526             :                     {
     527           0 :                         MetaphAdd(primary, "K");
     528           0 :                         MetaphAdd(secondary, "K");
     529           0 :                         current += 2;
     530           0 :                         break;
     531             :                     }
     532             : 
     533             :                     /* germanic, greek, or otherwise 'ch' for 'kh' sound */
     534           0 :                     if (
     535           0 :                         (StringAt(original, 0, 4, "VAN ", "VON ", "")
     536           0 :                          || StringAt(original, 0, 3, "SCH", ""))
     537             :                     /* 'architect but not 'arch', 'orchestra', 'orchid' */
     538           0 :                         || StringAt(original, (current - 2), 6, "ORCHES",
     539             :                                     "ARCHIT", "ORCHID", "")
     540           0 :                         || StringAt(original, (current + 2), 1, "T", "S",
     541             :                                     "")
     542           0 :                         || ((StringAt(original, (current - 1), 1,
     543             :                                       "A", "O", "U", "E", "")
     544           0 :                              || (current == 0))
     545             : 
     546             :                     /*
     547             :                      * e.g., 'wachtler', 'wechsler', but not 'tichner'
     548             :                      */
     549           0 :                             && StringAt(original, (current + 2), 1, "L", "R",
     550             :                                         "N", "M", "B", "H", "F", "V", "W",
     551             :                                         " ", "")))
     552             :                     {
     553           0 :                         MetaphAdd(primary, "K");
     554           0 :                         MetaphAdd(secondary, "K");
     555             :                     }
     556             :                     else
     557             :                     {
     558           0 :                         if (current > 0)
     559             :                         {
     560           0 :                             if (StringAt(original, 0, 2, "MC", ""))
     561             :                             {
     562             :                                 /* e.g., "McHugh" */
     563           0 :                                 MetaphAdd(primary, "K");
     564           0 :                                 MetaphAdd(secondary, "K");
     565             :                             }
     566             :                             else
     567             :                             {
     568           0 :                                 MetaphAdd(primary, "X");
     569           0 :                                 MetaphAdd(secondary, "K");
     570             :                             }
     571             :                         }
     572             :                         else
     573             :                         {
     574           0 :                             MetaphAdd(primary, "X");
     575           0 :                             MetaphAdd(secondary, "X");
     576             :                         }
     577             :                     }
     578           0 :                     current += 2;
     579           0 :                     break;
     580             :                 }
     581             :                 /* e.g, 'czerny' */
     582           0 :                 if (StringAt(original, current, 2, "CZ", "")
     583           0 :                     && !StringAt(original, (current - 2), 4, "WICZ", ""))
     584             :                 {
     585           0 :                     MetaphAdd(primary, "S");
     586           0 :                     MetaphAdd(secondary, "X");
     587           0 :                     current += 2;
     588           0 :                     break;
     589             :                 }
     590             : 
     591             :                 /* e.g., 'focaccia' */
     592           0 :                 if (StringAt(original, (current + 1), 3, "CIA", ""))
     593             :                 {
     594           0 :                     MetaphAdd(primary, "X");
     595           0 :                     MetaphAdd(secondary, "X");
     596           0 :                     current += 3;
     597           0 :                     break;
     598             :                 }
     599             : 
     600             :                 /* double 'C', but not if e.g. 'McClellan' */
     601           0 :                 if (StringAt(original, current, 2, "CC", "")
     602           0 :                     && !((current == 1) && (GetAt(original, 0) == 'M')))
     603             :                 {
     604             :                     /* 'bellocchio' but not 'bacchus' */
     605           0 :                     if (StringAt(original, (current + 2), 1, "I", "E", "H", "")
     606           0 :                         && !StringAt(original, (current + 2), 2, "HU", ""))
     607             :                     {
     608             :                         /* 'accident', 'accede' 'succeed' */
     609           0 :                         if (
     610             :                             ((current == 1)
     611           0 :                              && (GetAt(original, current - 1) == 'A'))
     612           0 :                             || StringAt(original, (current - 1), 5, "UCCEE",
     613             :                                         "UCCES", ""))
     614             :                         {
     615           0 :                             MetaphAdd(primary, "KS");
     616           0 :                             MetaphAdd(secondary, "KS");
     617             :                             /* 'bacci', 'bertucci', other italian */
     618             :                         }
     619             :                         else
     620             :                         {
     621           0 :                             MetaphAdd(primary, "X");
     622           0 :                             MetaphAdd(secondary, "X");
     623             :                         }
     624           0 :                         current += 3;
     625           0 :                         break;
     626             :                     }
     627             :                     else
     628             :                     {           /* Pierce's rule */
     629           0 :                         MetaphAdd(primary, "K");
     630           0 :                         MetaphAdd(secondary, "K");
     631           0 :                         current += 2;
     632           0 :                         break;
     633             :                     }
     634             :                 }
     635             : 
     636           0 :                 if (StringAt(original, current, 2, "CK", "CG", "CQ", ""))
     637             :                 {
     638           0 :                     MetaphAdd(primary, "K");
     639           0 :                     MetaphAdd(secondary, "K");
     640           0 :                     current += 2;
     641           0 :                     break;
     642             :                 }
     643             : 
     644           0 :                 if (StringAt(original, current, 2, "CI", "CE", "CY", ""))
     645             :                 {
     646             :                     /* italian vs. english */
     647           0 :                     if (StringAt
     648             :                         (original, current, 3, "CIO", "CIE", "CIA", ""))
     649             :                     {
     650           0 :                         MetaphAdd(primary, "S");
     651           0 :                         MetaphAdd(secondary, "X");
     652             :                     }
     653             :                     else
     654             :                     {
     655           0 :                         MetaphAdd(primary, "S");
     656           0 :                         MetaphAdd(secondary, "S");
     657             :                     }
     658           0 :                     current += 2;
     659           0 :                     break;
     660             :                 }
     661             : 
     662             :                 /* else */
     663           0 :                 MetaphAdd(primary, "K");
     664           0 :                 MetaphAdd(secondary, "K");
     665             : 
     666             :                 /* name sent in 'mac caffrey', 'mac gregor */
     667           0 :                 if (StringAt(original, (current + 1), 2, " C", " Q", " G", ""))
     668           0 :                     current += 3;
     669           0 :                 else if (StringAt(original, (current + 1), 1, "C", "K", "Q", "")
     670           0 :                          && !StringAt(original, (current + 1), 2,
     671             :                                       "CE", "CI", ""))
     672           0 :                     current += 2;
     673             :                 else
     674           0 :                     current += 1;
     675           0 :                 break;
     676             : 
     677             :             case 'D':
     678           0 :                 if (StringAt(original, current, 2, "DG", ""))
     679             :                 {
     680           0 :                     if (StringAt(original, (current + 2), 1,
     681             :                                  "I", "E", "Y", ""))
     682             :                     {
     683             :                         /* e.g. 'edge' */
     684           0 :                         MetaphAdd(primary, "J");
     685           0 :                         MetaphAdd(secondary, "J");
     686           0 :                         current += 3;
     687           0 :                         break;
     688             :                     }
     689             :                     else
     690             :                     {
     691             :                         /* e.g. 'edgar' */
     692           0 :                         MetaphAdd(primary, "TK");
     693           0 :                         MetaphAdd(secondary, "TK");
     694           0 :                         current += 2;
     695           0 :                         break;
     696             :                     }
     697             :                 }
     698             : 
     699           0 :                 if (StringAt(original, current, 2, "DT", "DD", ""))
     700             :                 {
     701           0 :                     MetaphAdd(primary, "T");
     702           0 :                     MetaphAdd(secondary, "T");
     703           0 :                     current += 2;
     704           0 :                     break;
     705             :                 }
     706             : 
     707             :                 /* else */
     708           0 :                 MetaphAdd(primary, "T");
     709           0 :                 MetaphAdd(secondary, "T");
     710           0 :                 current += 1;
     711           0 :                 break;
     712             : 
     713             :             case 'F':
     714           0 :                 if (GetAt(original, current + 1) == 'F')
     715           0 :                     current += 2;
     716             :                 else
     717           0 :                     current += 1;
     718           0 :                 MetaphAdd(primary, "F");
     719           0 :                 MetaphAdd(secondary, "F");
     720           0 :                 break;
     721             : 
     722             :             case 'G':
     723           4 :                 if (GetAt(original, current + 1) == 'H')
     724             :                 {
     725           0 :                     if ((current > 0) && !IsVowel(original, current - 1))
     726             :                     {
     727           0 :                         MetaphAdd(primary, "K");
     728           0 :                         MetaphAdd(secondary, "K");
     729           0 :                         current += 2;
     730           0 :                         break;
     731             :                     }
     732             : 
     733           0 :                     if (current < 3)
     734             :                     {
     735             :                         /* 'ghislane', ghiradelli */
     736           0 :                         if (current == 0)
     737             :                         {
     738           0 :                             if (GetAt(original, current + 2) == 'I')
     739             :                             {
     740           0 :                                 MetaphAdd(primary, "J");
     741           0 :                                 MetaphAdd(secondary, "J");
     742             :                             }
     743             :                             else
     744             :                             {
     745           0 :                                 MetaphAdd(primary, "K");
     746           0 :                                 MetaphAdd(secondary, "K");
     747             :                             }
     748           0 :                             current += 2;
     749           0 :                             break;
     750             :                         }
     751             :                     }
     752             : 
     753             :                     /*
     754             :                      * Parker's rule (with some further refinements) - e.g.,
     755             :                      * 'hugh'
     756             :                      */
     757           0 :                     if (
     758             :                         ((current > 1)
     759           0 :                          && StringAt(original, (current - 2), 1,
     760             :                                      "B", "H", "D", ""))
     761             :                     /* e.g., 'bough' */
     762           0 :                         || ((current > 2)
     763           0 :                             && StringAt(original, (current - 3), 1,
     764             :                                         "B", "H", "D", ""))
     765             :                     /* e.g., 'broughton' */
     766           0 :                         || ((current > 3)
     767           0 :                             && StringAt(original, (current - 4), 1,
     768             :                                         "B", "H", "")))
     769             :                     {
     770           0 :                         current += 2;
     771           0 :                         break;
     772             :                     }
     773             :                     else
     774             :                     {
     775             :                         /*
     776             :                          * e.g., 'laugh', 'McLaughlin', 'cough', 'gough',
     777             :                          * 'rough', 'tough'
     778             :                          */
     779           0 :                         if ((current > 2)
     780           0 :                             && (GetAt(original, current - 1) == 'U')
     781           0 :                             && StringAt(original, (current - 3), 1, "C",
     782             :                                         "G", "L", "R", "T", ""))
     783             :                         {
     784           0 :                             MetaphAdd(primary, "F");
     785           0 :                             MetaphAdd(secondary, "F");
     786             :                         }
     787           0 :                         else if ((current > 0)
     788           0 :                                  && GetAt(original, current - 1) != 'I')
     789             :                         {
     790             : 
     791             : 
     792           0 :                             MetaphAdd(primary, "K");
     793           0 :                             MetaphAdd(secondary, "K");
     794             :                         }
     795             : 
     796           0 :                         current += 2;
     797           0 :                         break;
     798             :                     }
     799             :                 }
     800             : 
     801           4 :                 if (GetAt(original, current + 1) == 'N')
     802             :                 {
     803           0 :                     if ((current == 1) && IsVowel(original, 0)
     804           0 :                         && !SlavoGermanic(original))
     805             :                     {
     806           0 :                         MetaphAdd(primary, "KN");
     807           0 :                         MetaphAdd(secondary, "N");
     808             :                     }
     809             :                     else
     810             :                         /* not e.g. 'cagney' */
     811           0 :                         if (!StringAt(original, (current + 2), 2, "EY", "")
     812           0 :                             && (GetAt(original, current + 1) != 'Y')
     813           0 :                             && !SlavoGermanic(original))
     814             :                     {
     815           0 :                         MetaphAdd(primary, "N");
     816           0 :                         MetaphAdd(secondary, "KN");
     817             :                     }
     818             :                     else
     819             :                     {
     820           0 :                         MetaphAdd(primary, "KN");
     821           0 :                         MetaphAdd(secondary, "KN");
     822             :                     }
     823           0 :                     current += 2;
     824           0 :                     break;
     825             :                 }
     826             : 
     827             :                 /* 'tagliaro' */
     828           4 :                 if (StringAt(original, (current + 1), 2, "LI", "")
     829           0 :                     && !SlavoGermanic(original))
     830             :                 {
     831           0 :                     MetaphAdd(primary, "KL");
     832           0 :                     MetaphAdd(secondary, "L");
     833           0 :                     current += 2;
     834           0 :                     break;
     835             :                 }
     836             : 
     837             :                 /* -ges-,-gep-,-gel-, -gie- at beginning */
     838           4 :                 if ((current == 0)
     839           4 :                     && ((GetAt(original, current + 1) == 'Y')
     840           4 :                         || StringAt(original, (current + 1), 2, "ES", "EP",
     841             :                                     "EB", "EL", "EY", "IB", "IL", "IN", "IE",
     842             :                                     "EI", "ER", "")))
     843             :                 {
     844           0 :                     MetaphAdd(primary, "K");
     845           0 :                     MetaphAdd(secondary, "J");
     846           0 :                     current += 2;
     847           0 :                     break;
     848             :                 }
     849             : 
     850             :                 /* -ger-,  -gy- */
     851           4 :                 if (
     852           4 :                     (StringAt(original, (current + 1), 2, "ER", "")
     853           4 :                      || (GetAt(original, current + 1) == 'Y'))
     854           0 :                     && !StringAt(original, 0, 6,
     855             :                                  "DANGER", "RANGER", "MANGER", "")
     856           0 :                     && !StringAt(original, (current - 1), 1, "E", "I", "")
     857           0 :                     && !StringAt(original, (current - 1), 3, "RGY", "OGY",
     858             :                                  ""))
     859             :                 {
     860           0 :                     MetaphAdd(primary, "K");
     861           0 :                     MetaphAdd(secondary, "J");
     862           0 :                     current += 2;
     863           0 :                     break;
     864             :                 }
     865             : 
     866             :                 /* italian e.g, 'biaggi' */
     867           4 :                 if (StringAt(original, (current + 1), 1, "E", "I", "Y", "")
     868           4 :                     || StringAt(original, (current - 1), 4,
     869             :                                 "AGGI", "OGGI", ""))
     870             :                 {
     871             :                     /* obvious germanic */
     872           0 :                     if (
     873           0 :                         (StringAt(original, 0, 4, "VAN ", "VON ", "")
     874           0 :                          || StringAt(original, 0, 3, "SCH", ""))
     875           0 :                         || StringAt(original, (current + 1), 2, "ET", ""))
     876             :                     {
     877           0 :                         MetaphAdd(primary, "K");
     878           0 :                         MetaphAdd(secondary, "K");
     879             :                     }
     880             :                     else
     881             :                     {
     882             :                         /* always soft if french ending */
     883           0 :                         if (StringAt
     884             :                             (original, (current + 1), 4, "IER ", ""))
     885             :                         {
     886           0 :                             MetaphAdd(primary, "J");
     887           0 :                             MetaphAdd(secondary, "J");
     888             :                         }
     889             :                         else
     890             :                         {
     891           0 :                             MetaphAdd(primary, "J");
     892           0 :                             MetaphAdd(secondary, "K");
     893             :                         }
     894             :                     }
     895           0 :                     current += 2;
     896           0 :                     break;
     897             :                 }
     898             : 
     899           4 :                 if (GetAt(original, current + 1) == 'G')
     900           0 :                     current += 2;
     901             :                 else
     902           4 :                     current += 1;
     903           4 :                 MetaphAdd(primary, "K");
     904           4 :                 MetaphAdd(secondary, "K");
     905           4 :                 break;
     906             : 
     907             :             case 'H':
     908             :                 /* only keep if first & before vowel or btw. 2 vowels */
     909           0 :                 if (((current == 0) || IsVowel(original, current - 1))
     910           0 :                     && IsVowel(original, current + 1))
     911             :                 {
     912           0 :                     MetaphAdd(primary, "H");
     913           0 :                     MetaphAdd(secondary, "H");
     914           0 :                     current += 2;
     915             :                 }
     916             :                 else
     917             :                     /* also takes care of 'HH' */
     918           0 :                     current += 1;
     919           0 :                 break;
     920             : 
     921             :             case 'J':
     922             :                 /* obvious spanish, 'jose', 'san jacinto' */
     923           0 :                 if (StringAt(original, current, 4, "JOSE", "")
     924           0 :                     || StringAt(original, 0, 4, "SAN ", ""))
     925             :                 {
     926           0 :                     if (((current == 0)
     927           0 :                          && (GetAt(original, current + 4) == ' '))
     928           0 :                         || StringAt(original, 0, 4, "SAN ", ""))
     929             :                     {
     930           0 :                         MetaphAdd(primary, "H");
     931           0 :                         MetaphAdd(secondary, "H");
     932             :                     }
     933             :                     else
     934             :                     {
     935           0 :                         MetaphAdd(primary, "J");
     936           0 :                         MetaphAdd(secondary, "H");
     937             :                     }
     938           0 :                     current += 1;
     939           0 :                     break;
     940             :                 }
     941             : 
     942           0 :                 if ((current == 0)
     943           0 :                     && !StringAt(original, current, 4, "JOSE", ""))
     944             :                 {
     945           0 :                     MetaphAdd(primary, "J");  /* Yankelovich/Jankelowicz */
     946           0 :                     MetaphAdd(secondary, "A");
     947             :                 }
     948             :                 else
     949             :                 {
     950             :                     /* spanish pron. of e.g. 'bajador' */
     951           0 :                     if (IsVowel(original, current - 1)
     952           0 :                         && !SlavoGermanic(original)
     953           0 :                         && ((GetAt(original, current + 1) == 'A')
     954           0 :                             || (GetAt(original, current + 1) == 'O')))
     955             :                     {
     956           0 :                         MetaphAdd(primary, "J");
     957           0 :                         MetaphAdd(secondary, "H");
     958             :                     }
     959             :                     else
     960             :                     {
     961           0 :                         if (current == last)
     962             :                         {
     963           0 :                             MetaphAdd(primary, "J");
     964           0 :                             MetaphAdd(secondary, "");
     965             :                         }
     966             :                         else
     967             :                         {
     968           0 :                             if (!StringAt(original, (current + 1), 1, "L", "T",
     969             :                                           "K", "S", "N", "M", "B", "Z", "")
     970           0 :                                 && !StringAt(original, (current - 1), 1,
     971             :                                              "S", "K", "L", ""))
     972             :                             {
     973           0 :                                 MetaphAdd(primary, "J");
     974           0 :                                 MetaphAdd(secondary, "J");
     975             :                             }
     976             :                         }
     977             :                     }
     978             :                 }
     979             : 
     980           0 :                 if (GetAt(original, current + 1) == 'J')    /* it could happen! */
     981           0 :                     current += 2;
     982             :                 else
     983           0 :                     current += 1;
     984           0 :                 break;
     985             : 
     986             :             case 'K':
     987           0 :                 if (GetAt(original, current + 1) == 'K')
     988           0 :                     current += 2;
     989             :                 else
     990           0 :                     current += 1;
     991           0 :                 MetaphAdd(primary, "K");
     992           0 :                 MetaphAdd(secondary, "K");
     993           0 :                 break;
     994             : 
     995             :             case 'L':
     996           0 :                 if (GetAt(original, current + 1) == 'L')
     997             :                 {
     998             :                     /* spanish e.g. 'cabrillo', 'gallegos' */
     999           0 :                     if (((current == (length - 3))
    1000           0 :                          && StringAt(original, (current - 1), 4, "ILLO",
    1001             :                                      "ILLA", "ALLE", ""))
    1002           0 :                         || ((StringAt(original, (last - 1), 2, "AS", "OS", "")
    1003           0 :                              || StringAt(original, last, 1, "A", "O", ""))
    1004           0 :                             && StringAt(original, (current - 1), 4,
    1005             :                                         "ALLE", "")))
    1006             :                     {
    1007           0 :                         MetaphAdd(primary, "L");
    1008           0 :                         MetaphAdd(secondary, "");
    1009           0 :                         current += 2;
    1010           0 :                         break;
    1011             :                     }
    1012           0 :                     current += 2;
    1013             :                 }
    1014             :                 else
    1015           0 :                     current += 1;
    1016           0 :                 MetaphAdd(primary, "L");
    1017           0 :                 MetaphAdd(secondary, "L");
    1018           0 :                 break;
    1019             : 
    1020             :             case 'M':
    1021           4 :                 if ((StringAt(original, (current - 1), 3, "UMB", "")
    1022           4 :                      && (((current + 1) == last)
    1023           4 :                          || StringAt(original, (current + 2), 2, "ER", "")))
    1024             :                 /* 'dumb','thumb' */
    1025           4 :                     || (GetAt(original, current + 1) == 'M'))
    1026           0 :                     current += 2;
    1027             :                 else
    1028           4 :                     current += 1;
    1029           4 :                 MetaphAdd(primary, "M");
    1030           4 :                 MetaphAdd(secondary, "M");
    1031           4 :                 break;
    1032             : 
    1033             :             case 'N':
    1034           0 :                 if (GetAt(original, current + 1) == 'N')
    1035           0 :                     current += 2;
    1036             :                 else
    1037           0 :                     current += 1;
    1038           0 :                 MetaphAdd(primary, "N");
    1039           0 :                 MetaphAdd(secondary, "N");
    1040           0 :                 break;
    1041             : 
    1042             :             case '\xd1':        /* N with tilde */
    1043           0 :                 current += 1;
    1044           0 :                 MetaphAdd(primary, "N");
    1045           0 :                 MetaphAdd(secondary, "N");
    1046           0 :                 break;
    1047             : 
    1048             :             case 'P':
    1049           0 :                 if (GetAt(original, current + 1) == 'H')
    1050             :                 {
    1051           0 :                     MetaphAdd(primary, "F");
    1052           0 :                     MetaphAdd(secondary, "F");
    1053           0 :                     current += 2;
    1054           0 :                     break;
    1055             :                 }
    1056             : 
    1057             :                 /* also account for "campbell", "raspberry" */
    1058           0 :                 if (StringAt(original, (current + 1), 1, "P", "B", ""))
    1059           0 :                     current += 2;
    1060             :                 else
    1061           0 :                     current += 1;
    1062           0 :                 MetaphAdd(primary, "P");
    1063           0 :                 MetaphAdd(secondary, "P");
    1064           0 :                 break;
    1065             : 
    1066             :             case 'Q':
    1067           0 :                 if (GetAt(original, current + 1) == 'Q')
    1068           0 :                     current += 2;
    1069             :                 else
    1070           0 :                     current += 1;
    1071           0 :                 MetaphAdd(primary, "K");
    1072           0 :                 MetaphAdd(secondary, "K");
    1073           0 :                 break;
    1074             : 
    1075             :             case 'R':
    1076             :                 /* french e.g. 'rogier', but exclude 'hochmeier' */
    1077           0 :                 if ((current == last)
    1078           0 :                     && !SlavoGermanic(original)
    1079           0 :                     && StringAt(original, (current - 2), 2, "IE", "")
    1080           0 :                     && !StringAt(original, (current - 4), 2, "ME", "MA", ""))
    1081             :                 {
    1082           0 :                     MetaphAdd(primary, "");
    1083           0 :                     MetaphAdd(secondary, "R");
    1084             :                 }
    1085             :                 else
    1086             :                 {
    1087           0 :                     MetaphAdd(primary, "R");
    1088           0 :                     MetaphAdd(secondary, "R");
    1089             :                 }
    1090             : 
    1091           0 :                 if (GetAt(original, current + 1) == 'R')
    1092           0 :                     current += 2;
    1093             :                 else
    1094           0 :                     current += 1;
    1095           0 :                 break;
    1096             : 
    1097             :             case 'S':
    1098             :                 /* special cases 'island', 'isle', 'carlisle', 'carlysle' */
    1099           0 :                 if (StringAt(original, (current - 1), 3, "ISL", "YSL", ""))
    1100             :                 {
    1101           0 :                     current += 1;
    1102           0 :                     break;
    1103             :                 }
    1104             : 
    1105             :                 /* special case 'sugar-' */
    1106           0 :                 if ((current == 0)
    1107           0 :                     && StringAt(original, current, 5, "SUGAR", ""))
    1108             :                 {
    1109           0 :                     MetaphAdd(primary, "X");
    1110           0 :                     MetaphAdd(secondary, "S");
    1111           0 :                     current += 1;
    1112           0 :                     break;
    1113             :                 }
    1114             : 
    1115           0 :                 if (StringAt(original, current, 2, "SH", ""))
    1116             :                 {
    1117             :                     /* germanic */
    1118           0 :                     if (StringAt
    1119             :                         (original, (current + 1), 4, "HEIM", "HOEK", "HOLM",
    1120             :                          "HOLZ", ""))
    1121             :                     {
    1122           0 :                         MetaphAdd(primary, "S");
    1123           0 :                         MetaphAdd(secondary, "S");
    1124             :                     }
    1125             :                     else
    1126             :                     {
    1127           0 :                         MetaphAdd(primary, "X");
    1128           0 :                         MetaphAdd(secondary, "X");
    1129             :                     }
    1130           0 :                     current += 2;
    1131           0 :                     break;
    1132             :                 }
    1133             : 
    1134             :                 /* italian & armenian */
    1135           0 :                 if (StringAt(original, current, 3, "SIO", "SIA", "")
    1136           0 :                     || StringAt(original, current, 4, "SIAN", ""))
    1137             :                 {
    1138           0 :                     if (!SlavoGermanic(original))
    1139             :                     {
    1140           0 :                         MetaphAdd(primary, "S");
    1141           0 :                         MetaphAdd(secondary, "X");
    1142             :                     }
    1143             :                     else
    1144             :                     {
    1145           0 :                         MetaphAdd(primary, "S");
    1146           0 :                         MetaphAdd(secondary, "S");
    1147             :                     }
    1148           0 :                     current += 3;
    1149           0 :                     break;
    1150             :                 }
    1151             : 
    1152             :                 /*
    1153             :                  * german & anglicisations, e.g. 'smith' match 'schmidt',
    1154             :                  * 'snider' match 'schneider' also, -sz- in slavic language
    1155             :                  * although in hungarian it is pronounced 's'
    1156             :                  */
    1157           0 :                 if (((current == 0)
    1158           0 :                      && StringAt(original, (current + 1), 1,
    1159             :                                  "M", "N", "L", "W", ""))
    1160           0 :                     || StringAt(original, (current + 1), 1, "Z", ""))
    1161             :                 {
    1162           0 :                     MetaphAdd(primary, "S");
    1163           0 :                     MetaphAdd(secondary, "X");
    1164           0 :                     if (StringAt(original, (current + 1), 1, "Z", ""))
    1165           0 :                         current += 2;
    1166             :                     else
    1167           0 :                         current += 1;
    1168           0 :                     break;
    1169             :                 }
    1170             : 
    1171           0 :                 if (StringAt(original, current, 2, "SC", ""))
    1172             :                 {
    1173             :                     /* Schlesinger's rule */
    1174           0 :                     if (GetAt(original, current + 2) == 'H')
    1175             :                     {
    1176             :                         /* dutch origin, e.g. 'school', 'schooner' */
    1177           0 :                         if (StringAt(original, (current + 3), 2,
    1178             :                                      "OO", "ER", "EN",
    1179             :                                      "UY", "ED", "EM", ""))
    1180             :                         {
    1181             :                             /* 'schermerhorn', 'schenker' */
    1182           0 :                             if (StringAt(original, (current + 3), 2,
    1183             :                                          "ER", "EN", ""))
    1184             :                             {
    1185           0 :                                 MetaphAdd(primary, "X");
    1186           0 :                                 MetaphAdd(secondary, "SK");
    1187             :                             }
    1188             :                             else
    1189             :                             {
    1190           0 :                                 MetaphAdd(primary, "SK");
    1191           0 :                                 MetaphAdd(secondary, "SK");
    1192             :                             }
    1193           0 :                             current += 3;
    1194           0 :                             break;
    1195             :                         }
    1196             :                         else
    1197             :                         {
    1198           0 :                             if ((current == 0) && !IsVowel(original, 3)
    1199           0 :                                 && (GetAt(original, 3) != 'W'))
    1200             :                             {
    1201           0 :                                 MetaphAdd(primary, "X");
    1202           0 :                                 MetaphAdd(secondary, "S");
    1203             :                             }
    1204             :                             else
    1205             :                             {
    1206           0 :                                 MetaphAdd(primary, "X");
    1207           0 :                                 MetaphAdd(secondary, "X");
    1208             :                             }
    1209           0 :                             current += 3;
    1210           0 :                             break;
    1211             :                         }
    1212             :                     }
    1213             : 
    1214           0 :                     if (StringAt(original, (current + 2), 1,
    1215             :                                  "I", "E", "Y", ""))
    1216             :                     {
    1217           0 :                         MetaphAdd(primary, "S");
    1218           0 :                         MetaphAdd(secondary, "S");
    1219           0 :                         current += 3;
    1220           0 :                         break;
    1221             :                     }
    1222             :                     /* else */
    1223           0 :                     MetaphAdd(primary, "SK");
    1224           0 :                     MetaphAdd(secondary, "SK");
    1225           0 :                     current += 3;
    1226           0 :                     break;
    1227             :                 }
    1228             : 
    1229             :                 /* french e.g. 'resnais', 'artois' */
    1230           0 :                 if ((current == last)
    1231           0 :                     && StringAt(original, (current - 2), 2, "AI", "OI", ""))
    1232             :                 {
    1233           0 :                     MetaphAdd(primary, "");
    1234           0 :                     MetaphAdd(secondary, "S");
    1235             :                 }
    1236             :                 else
    1237             :                 {
    1238           0 :                     MetaphAdd(primary, "S");
    1239           0 :                     MetaphAdd(secondary, "S");
    1240             :                 }
    1241             : 
    1242           0 :                 if (StringAt(original, (current + 1), 1, "S", "Z", ""))
    1243           0 :                     current += 2;
    1244             :                 else
    1245           0 :                     current += 1;
    1246           0 :                 break;
    1247             : 
    1248             :             case 'T':
    1249           0 :                 if (StringAt(original, current, 4, "TION", ""))
    1250             :                 {
    1251           0 :                     MetaphAdd(primary, "X");
    1252           0 :                     MetaphAdd(secondary, "X");
    1253           0 :                     current += 3;
    1254           0 :                     break;
    1255             :                 }
    1256             : 
    1257           0 :                 if (StringAt(original, current, 3, "TIA", "TCH", ""))
    1258             :                 {
    1259           0 :                     MetaphAdd(primary, "X");
    1260           0 :                     MetaphAdd(secondary, "X");
    1261           0 :                     current += 3;
    1262           0 :                     break;
    1263             :                 }
    1264             : 
    1265           0 :                 if (StringAt(original, current, 2, "TH", "")
    1266           0 :                     || StringAt(original, current, 3, "TTH", ""))
    1267             :                 {
    1268             :                     /* special case 'thomas', 'thames' or germanic */
    1269           0 :                     if (StringAt(original, (current + 2), 2, "OM", "AM", "")
    1270           0 :                         || StringAt(original, 0, 4, "VAN ", "VON ", "")
    1271           0 :                         || StringAt(original, 0, 3, "SCH", ""))
    1272             :                     {
    1273           0 :                         MetaphAdd(primary, "T");
    1274           0 :                         MetaphAdd(secondary, "T");
    1275             :                     }
    1276             :                     else
    1277             :                     {
    1278           0 :                         MetaphAdd(primary, "0");
    1279           0 :                         MetaphAdd(secondary, "T");
    1280             :                     }
    1281           0 :                     current += 2;
    1282           0 :                     break;
    1283             :                 }
    1284             : 
    1285           0 :                 if (StringAt(original, (current + 1), 1, "T", "D", ""))
    1286           0 :                     current += 2;
    1287             :                 else
    1288           0 :                     current += 1;
    1289           0 :                 MetaphAdd(primary, "T");
    1290           0 :                 MetaphAdd(secondary, "T");
    1291           0 :                 break;
    1292             : 
    1293             :             case 'V':
    1294           0 :                 if (GetAt(original, current + 1) == 'V')
    1295           0 :                     current += 2;
    1296             :                 else
    1297           0 :                     current += 1;
    1298           0 :                 MetaphAdd(primary, "F");
    1299           0 :                 MetaphAdd(secondary, "F");
    1300           0 :                 break;
    1301             : 
    1302             :             case 'W':
    1303             :                 /* can also be in middle of word */
    1304           0 :                 if (StringAt(original, current, 2, "WR", ""))
    1305             :                 {
    1306           0 :                     MetaphAdd(primary, "R");
    1307           0 :                     MetaphAdd(secondary, "R");
    1308           0 :                     current += 2;
    1309           0 :                     break;
    1310             :                 }
    1311             : 
    1312           0 :                 if ((current == 0)
    1313           0 :                     && (IsVowel(original, current + 1)
    1314           0 :                         || StringAt(original, current, 2, "WH", "")))
    1315             :                 {
    1316             :                     /* Wasserman should match Vasserman */
    1317           0 :                     if (IsVowel(original, current + 1))
    1318             :                     {
    1319           0 :                         MetaphAdd(primary, "A");
    1320           0 :                         MetaphAdd(secondary, "F");
    1321             :                     }
    1322             :                     else
    1323             :                     {
    1324             :                         /* need Uomo to match Womo */
    1325           0 :                         MetaphAdd(primary, "A");
    1326           0 :                         MetaphAdd(secondary, "A");
    1327             :                     }
    1328             :                 }
    1329             : 
    1330             :                 /* Arnow should match Arnoff */
    1331           0 :                 if (((current == last) && IsVowel(original, current - 1))
    1332           0 :                     || StringAt(original, (current - 1), 5, "EWSKI", "EWSKY",
    1333             :                                 "OWSKI", "OWSKY", "")
    1334           0 :                     || StringAt(original, 0, 3, "SCH", ""))
    1335             :                 {
    1336           0 :                     MetaphAdd(primary, "");
    1337           0 :                     MetaphAdd(secondary, "F");
    1338           0 :                     current += 1;
    1339           0 :                     break;
    1340             :                 }
    1341             : 
    1342             :                 /* polish e.g. 'filipowicz' */
    1343           0 :                 if (StringAt(original, current, 4, "WICZ", "WITZ", ""))
    1344             :                 {
    1345           0 :                     MetaphAdd(primary, "TS");
    1346           0 :                     MetaphAdd(secondary, "FX");
    1347           0 :                     current += 4;
    1348           0 :                     break;
    1349             :                 }
    1350             : 
    1351             :                 /* else skip it */
    1352           0 :                 current += 1;
    1353           0 :                 break;
    1354             : 
    1355             :             case 'X':
    1356             :                 /* french e.g. breaux */
    1357           0 :                 if (!((current == last)
    1358           0 :                       && (StringAt(original, (current - 3), 3,
    1359             :                                    "IAU", "EAU", "")
    1360           0 :                           || StringAt(original, (current - 2), 2,
    1361             :                                       "AU", "OU", ""))))
    1362             :                 {
    1363           0 :                     MetaphAdd(primary, "KS");
    1364           0 :                     MetaphAdd(secondary, "KS");
    1365             :                 }
    1366             : 
    1367             : 
    1368           0 :                 if (StringAt(original, (current + 1), 1, "C", "X", ""))
    1369           0 :                     current += 2;
    1370             :                 else
    1371           0 :                     current += 1;
    1372           0 :                 break;
    1373             : 
    1374             :             case 'Z':
    1375             :                 /* chinese pinyin e.g. 'zhao' */
    1376           0 :                 if (GetAt(original, current + 1) == 'H')
    1377             :                 {
    1378           0 :                     MetaphAdd(primary, "J");
    1379           0 :                     MetaphAdd(secondary, "J");
    1380           0 :                     current += 2;
    1381           0 :                     break;
    1382             :                 }
    1383           0 :                 else if (StringAt(original, (current + 1), 2,
    1384             :                                   "ZO", "ZI", "ZA", "")
    1385           0 :                          || (SlavoGermanic(original)
    1386           0 :                              && ((current > 0)
    1387           0 :                                  && GetAt(original, current - 1) != 'T')))
    1388             :                 {
    1389           0 :                     MetaphAdd(primary, "S");
    1390           0 :                     MetaphAdd(secondary, "TS");
    1391             :                 }
    1392             :                 else
    1393             :                 {
    1394           0 :                     MetaphAdd(primary, "S");
    1395           0 :                     MetaphAdd(secondary, "S");
    1396             :                 }
    1397             : 
    1398           0 :                 if (GetAt(original, current + 1) == 'Z')
    1399           0 :                     current += 2;
    1400             :                 else
    1401           0 :                     current += 1;
    1402           0 :                 break;
    1403             : 
    1404             :             default:
    1405           0 :                 current += 1;
    1406             :         }
    1407             : 
    1408             :         /*
    1409             :          * printf("PRIMARY: %s\n", primary->str); printf("SECONDARY: %s\n",
    1410             :          * secondary->str);
    1411             :          */
    1412             :     }
    1413             : 
    1414             : 
    1415           4 :     if (primary->length > 4)
    1416           0 :         SetAt(primary, 4, '\0');
    1417             : 
    1418           4 :     if (secondary->length > 4)
    1419           0 :         SetAt(secondary, 4, '\0');
    1420             : 
    1421           4 :     *codes = primary->str;
    1422           4 :     *++codes = secondary->str;
    1423             : 
    1424           4 :     DestroyMetaString(original);
    1425           4 :     DestroyMetaString(primary);
    1426           4 :     DestroyMetaString(secondary);
    1427           4 : }
    1428             : 
    1429             : #ifdef DMETAPHONE_MAIN
    1430             : 
    1431             : /* just for testing - not part of the perl code */
    1432             : 
    1433             : main(int argc, char **argv)
    1434             : {
    1435             :     char       *codes[2];
    1436             : 
    1437             :     if (argc > 1)
    1438             :     {
    1439             :         DoubleMetaphone(argv[1], codes);
    1440             :         printf("%s|%s\n", codes[0], codes[1]);
    1441             :     }
    1442             : }
    1443             : 
    1444             : #endif

Generated by: LCOV version 1.13