LCOV - code coverage report
Current view: top level - contrib/fuzzystrmatch - dmetaphone.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 114 644 17.7 %
Date: 2024-11-21 08:14:44 Functions: 11 15 73.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * This is a port of the Double Metaphone algorithm for use in PostgreSQL.
       3             :  *
       4             :  * contrib/fuzzystrmatch/dmetaphone.c
       5             :  *
       6             :  * Double Metaphone computes 2 "sounds like" strings - a primary and an
       7             :  * alternate. In most cases they are the same, but for foreign names
       8             :  * especially they can be a bit different, depending on pronunciation.
       9             :  *
      10             :  * Information on using Double Metaphone can be found at
      11             :  *   http://www.codeproject.com/string/dmetaphone1.asp
      12             :  * and the original article describing it can be found at
      13             :  *   http://drdobbs.com/184401251
      14             :  *
      15             :  * For PostgreSQL we provide 2 functions - one for the primary and one for
      16             :  * the alternate. That way the functions are pure text->text mappings that
      17             :  * are useful in functional indexes. These are 'dmetaphone' for the
      18             :  * primary and 'dmetaphone_alt' for the alternate.
      19             :  *
      20             :  * Assuming that dmetaphone.so is in $libdir, the SQL to set up the
      21             :  * functions looks like this:
      22             :  *
      23             :  * CREATE FUNCTION dmetaphone (text) RETURNS text
      24             :  *    LANGUAGE C IMMUTABLE STRICT
      25             :  *    AS '$libdir/dmetaphone', 'dmetaphone';
      26             :  *
      27             :  * CREATE FUNCTION dmetaphone_alt (text) RETURNS text
      28             :  *    LANGUAGE C IMMUTABLE STRICT
      29             :  *    AS '$libdir/dmetaphone', 'dmetaphone_alt';
      30             :  *
      31             :  * Note that you have to declare the functions IMMUTABLE if you want to
      32             :  * use them in functional indexes, and you have to declare them as STRICT
      33             :  * as they do not check for NULL input, and will segfault if given NULL input.
      34             :  * (See below for alternative ) Declaring them as STRICT means PostgreSQL
      35             :  * will never call them with NULL, but instead assume the result is NULL,
      36             :  * which is what we (I) want.
      37             :  *
      38             :  * Alternatively, compile with -DDMETAPHONE_NOSTRICT and the functions
      39             :  * will detect NULL input and return NULL. The you don't have to declare them
      40             :  * as STRICT.
      41             :  *
      42             :  * There is a small inefficiency here - each function call actually computes
      43             :  * both the primary and the alternate and then throws away the one it doesn't
      44             :  * need. That's the way the perl module was written, because perl can handle
      45             :  * a list return more easily than we can in PostgreSQL. The result has been
      46             :  * fast enough for my needs, but it could maybe be optimized a bit to remove
      47             :  * that behaviour.
      48             :  *
      49             :  */
      50             : 
      51             : 
      52             : /***************************** COPYRIGHT NOTICES ***********************
      53             : 
      54             : Most of this code is directly from the Text::DoubleMetaphone perl module
      55             : version 0.05 available from https://www.cpan.org/.
      56             : It bears this copyright notice:
      57             : 
      58             : 
      59             :   Copyright 2000, Maurice Aubrey <maurice@hevanet.com>.
      60             :   All rights reserved.
      61             : 
      62             :   This code is based heavily on the C++ implementation by
      63             :   Lawrence Philips and incorporates several bug fixes courtesy
      64             :   of Kevin Atkinson <kevina@users.sourceforge.net>.
      65             : 
      66             :   This module is free software; you may redistribute it and/or
      67             :   modify it under the same terms as Perl itself.
      68             : 
      69             : The remaining code is authored by Andrew Dunstan <amdunstan@ncshp.org> and
      70             : <andrew@dunslane.net> and is covered this copyright:
      71             : 
      72             :   Copyright 2003, North Carolina State Highway Patrol.
      73             :   All rights reserved.
      74             : 
      75             :   Permission to use, copy, modify, and distribute this software and its
      76             :   documentation for any purpose, without fee, and without a written agreement
      77             :   is hereby granted, provided that the above copyright notice and this
      78             :   paragraph and the following two paragraphs appear in all copies.
      79             : 
      80             :   IN NO EVENT SHALL THE NORTH CAROLINA STATE HIGHWAY PATROL BE LIABLE TO ANY
      81             :   PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
      82             :   INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
      83             :   DOCUMENTATION, EVEN IF THE NORTH CAROLINA STATE HIGHWAY PATROL HAS BEEN
      84             :   ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      85             : 
      86             :   THE NORTH CAROLINA STATE HIGHWAY PATROL SPECIFICALLY DISCLAIMS ANY
      87             :   WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
      88             :   MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED
      89             :   HEREUNDER IS ON AN "AS IS" BASIS, AND THE NORTH CAROLINA STATE HIGHWAY PATROL
      90             :   HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
      91             :   MODIFICATIONS.
      92             : 
      93             : ***********************************************************************/
      94             : 
      95             : 
      96             : /* include these first, according to the docs */
      97             : #ifndef DMETAPHONE_MAIN
      98             : 
      99             : #include "postgres.h"
     100             : 
     101             : #include "utils/builtins.h"
     102             : 
     103             : /* turn off assertions for embedded function */
     104             : #define NDEBUG
     105             : 
     106             : #else                           /* DMETAPHONE_MAIN */
     107             : 
     108             : /* we need these if we didn't get them from postgres.h */
     109             : #include <stdio.h>
     110             : #include <stdlib.h>
     111             : #include <string.h>
     112             : #include <stdarg.h>
     113             : 
     114             : #endif                          /* DMETAPHONE_MAIN */
     115             : 
     116             : #include <assert.h>
     117             : #include <ctype.h>
     118             : 
     119             : /* prototype for the main function we got from the perl module */
     120             : static void DoubleMetaphone(char *str, char **codes);
     121             : 
     122             : #ifndef DMETAPHONE_MAIN
     123             : 
     124             : /*
     125             :  * The PostgreSQL visible dmetaphone function.
     126             :  */
     127             : 
     128           4 : PG_FUNCTION_INFO_V1(dmetaphone);
     129             : 
     130             : Datum
     131           2 : dmetaphone(PG_FUNCTION_ARGS)
     132             : {
     133             :     text       *arg;
     134             :     char       *aptr,
     135             :                *codes[2],
     136             :                *code;
     137             : 
     138             : #ifdef DMETAPHONE_NOSTRICT
     139             :     if (PG_ARGISNULL(0))
     140             :         PG_RETURN_NULL();
     141             : #endif
     142           2 :     arg = PG_GETARG_TEXT_PP(0);
     143           2 :     aptr = text_to_cstring(arg);
     144             : 
     145           2 :     DoubleMetaphone(aptr, codes);
     146           2 :     code = codes[0];
     147           2 :     if (!code)
     148           0 :         code = "";
     149             : 
     150           2 :     PG_RETURN_TEXT_P(cstring_to_text(code));
     151             : }
     152             : 
     153             : /*
     154             :  * The PostgreSQL visible dmetaphone_alt function.
     155             :  */
     156             : 
     157           4 : PG_FUNCTION_INFO_V1(dmetaphone_alt);
     158             : 
     159             : Datum
     160           2 : dmetaphone_alt(PG_FUNCTION_ARGS)
     161             : {
     162             :     text       *arg;
     163             :     char       *aptr,
     164             :                *codes[2],
     165             :                *code;
     166             : 
     167             : #ifdef DMETAPHONE_NOSTRICT
     168             :     if (PG_ARGISNULL(0))
     169             :         PG_RETURN_NULL();
     170             : #endif
     171           2 :     arg = PG_GETARG_TEXT_PP(0);
     172           2 :     aptr = text_to_cstring(arg);
     173             : 
     174           2 :     DoubleMetaphone(aptr, codes);
     175           2 :     code = codes[1];
     176           2 :     if (!code)
     177           0 :         code = "";
     178             : 
     179           2 :     PG_RETURN_TEXT_P(cstring_to_text(code));
     180             : }
     181             : 
     182             : 
     183             : /* here is where we start the code imported from the perl module */
     184             : 
     185             : /* all memory handling is done with these macros */
     186             : 
     187             : #define META_MALLOC(v,n,t) \
     188             :           (v = (t*)palloc(((n)*sizeof(t))))
     189             : 
     190             : #define META_REALLOC(v,n,t) \
     191             :                       (v = (t*)repalloc((v),((n)*sizeof(t))))
     192             : 
     193             : /*
     194             :  * Don't do pfree - it seems to cause a SIGSEGV sometimes - which might have just
     195             :  * been caused by reloading the module in development.
     196             :  * So we rely on context cleanup - Tom Lane says pfree shouldn't be necessary
     197             :  * in a case like this.
     198             :  */
     199             : 
     200             : #define META_FREE(x) ((void)true)   /* pfree((x)) */
     201             : #else                           /* not defined DMETAPHONE_MAIN */
     202             : 
     203             : /* use the standard malloc library when not running in PostgreSQL */
     204             : 
     205             : #define META_MALLOC(v,n,t) \
     206             :           (v = (t*)malloc(((n)*sizeof(t))))
     207             : 
     208             : #define META_REALLOC(v,n,t) \
     209             :                       (v = (t*)realloc((v),((n)*sizeof(t))))
     210             : 
     211             : #define META_FREE(x) free((x))
     212             : #endif                          /* defined DMETAPHONE_MAIN */
     213             : 
     214             : 
     215             : 
     216             : /* this typedef was originally in the perl module's .h file */
     217             : 
     218             : typedef struct
     219             : {
     220             :     char       *str;
     221             :     int         length;
     222             :     int         bufsize;
     223             :     int         free_string_on_destroy;
     224             : }
     225             : 
     226             : metastring;
     227             : 
     228             : /*
     229             :  * remaining perl module funcs unchanged except for declaring them static
     230             :  * and reformatting to PostgreSQL indentation and to fit in 80 cols.
     231             :  *
     232             :  */
     233             : 
     234             : static metastring *
     235          12 : NewMetaString(const char *init_str)
     236             : {
     237             :     metastring *s;
     238          12 :     char        empty_string[] = "";
     239             : 
     240          12 :     META_MALLOC(s, 1, metastring);
     241             :     assert(s != NULL);
     242             : 
     243          12 :     if (init_str == NULL)
     244           0 :         init_str = empty_string;
     245          12 :     s->length = strlen(init_str);
     246             :     /* preallocate a bit more for potential growth */
     247          12 :     s->bufsize = s->length + 7;
     248             : 
     249          12 :     META_MALLOC(s->str, s->bufsize, char);
     250             :     assert(s->str != NULL);
     251             : 
     252          12 :     memcpy(s->str, init_str, s->length + 1);
     253          12 :     s->free_string_on_destroy = 1;
     254             : 
     255          12 :     return s;
     256             : }
     257             : 
     258             : 
     259             : static void
     260          12 : DestroyMetaString(metastring *s)
     261             : {
     262          12 :     if (s == NULL)
     263           0 :         return;
     264             : 
     265          12 :     if (s->free_string_on_destroy && (s->str != NULL))
     266             :         META_FREE(s->str);
     267             : 
     268             :     META_FREE(s);
     269             : }
     270             : 
     271             : 
     272             : static void
     273           0 : IncreaseBuffer(metastring *s, int chars_needed)
     274             : {
     275           0 :     META_REALLOC(s->str, (s->bufsize + chars_needed + 10), char);
     276             :     assert(s->str != NULL);
     277           0 :     s->bufsize = s->bufsize + chars_needed + 10;
     278           0 : }
     279             : 
     280             : 
     281             : static void
     282           4 : MakeUpper(metastring *s)
     283             : {
     284             :     char       *i;
     285             : 
     286          44 :     for (i = s->str; *i; i++)
     287          40 :         *i = toupper((unsigned char) *i);
     288           4 : }
     289             : 
     290             : 
     291             : static int
     292           0 : IsVowel(metastring *s, int pos)
     293             : {
     294             :     char        c;
     295             : 
     296           0 :     if ((pos < 0) || (pos >= s->length))
     297           0 :         return 0;
     298             : 
     299           0 :     c = *(s->str + pos);
     300           0 :     if ((c == 'A') || (c == 'E') || (c == 'I') || (c == 'O') ||
     301           0 :         (c == 'U') || (c == 'Y'))
     302           0 :         return 1;
     303             : 
     304           0 :     return 0;
     305             : }
     306             : 
     307             : 
     308             : static int
     309           0 : SlavoGermanic(metastring *s)
     310             : {
     311           0 :     if ((char *) strstr(s->str, "W"))
     312           0 :         return 1;
     313           0 :     else if ((char *) strstr(s->str, "K"))
     314           0 :         return 1;
     315           0 :     else if ((char *) strstr(s->str, "CZ"))
     316           0 :         return 1;
     317           0 :     else if ((char *) strstr(s->str, "WITZ"))
     318           0 :         return 1;
     319             :     else
     320           0 :         return 0;
     321             : }
     322             : 
     323             : 
     324             : static char
     325          52 : GetAt(metastring *s, int pos)
     326             : {
     327          52 :     if ((pos < 0) || (pos >= s->length))
     328           0 :         return '\0';
     329             : 
     330          52 :     return ((char) *(s->str + pos));
     331             : }
     332             : 
     333             : 
     334             : static void
     335           0 : SetAt(metastring *s, int pos, char c)
     336             : {
     337           0 :     if ((pos < 0) || (pos >= s->length))
     338           0 :         return;
     339             : 
     340           0 :     *(s->str + pos) = c;
     341             : }
     342             : 
     343             : 
     344             : /*
     345             :    Caveats: the START value is 0 based
     346             : */
     347             : static int
     348          32 : StringAt(metastring *s, int start, int length,...)
     349             : {
     350             :     char       *test;
     351             :     char       *pos;
     352             :     va_list     ap;
     353             : 
     354          32 :     if ((start < 0) || (start >= s->length))
     355           4 :         return 0;
     356             : 
     357          28 :     pos = (s->str + start);
     358          28 :     va_start(ap, length);
     359             : 
     360             :     do
     361             :     {
     362         116 :         test = va_arg(ap, char *);
     363         116 :         if (*test && (strncmp(pos, test, length) == 0))
     364             :         {
     365           4 :             va_end(ap);
     366           4 :             return 1;
     367             :         }
     368             :     }
     369         112 :     while (strcmp(test, "") != 0);
     370             : 
     371          24 :     va_end(ap);
     372             : 
     373          24 :     return 0;
     374             : }
     375             : 
     376             : 
     377             : static void
     378          28 : MetaphAdd(metastring *s, const char *new_str)
     379             : {
     380             :     int         add_length;
     381             : 
     382          28 :     if (new_str == NULL)
     383           0 :         return;
     384             : 
     385          28 :     add_length = strlen(new_str);
     386          28 :     if ((s->length + add_length) > (s->bufsize - 1))
     387           0 :         IncreaseBuffer(s, add_length);
     388             : 
     389          28 :     strcat(s->str, new_str);
     390          28 :     s->length += add_length;
     391             : }
     392             : 
     393             : 
     394             : static void
     395           4 : DoubleMetaphone(char *str, char **codes)
     396             : {
     397             :     int         length;
     398             :     metastring *original;
     399             :     metastring *primary;
     400             :     metastring *secondary;
     401             :     int         current;
     402             :     int         last;
     403             : 
     404           4 :     current = 0;
     405             :     /* we need the real length and last prior to padding */
     406           4 :     length = strlen(str);
     407           4 :     last = length - 1;
     408           4 :     original = NewMetaString(str);
     409             :     /* Pad original so we can index beyond end */
     410           4 :     MetaphAdd(original, "     ");
     411             : 
     412           4 :     primary = NewMetaString("");
     413           4 :     secondary = NewMetaString("");
     414           4 :     primary->free_string_on_destroy = 0;
     415           4 :     secondary->free_string_on_destroy = 0;
     416             : 
     417           4 :     MakeUpper(original);
     418             : 
     419             :     /* skip these when at start of word */
     420           4 :     if (StringAt(original, 0, 2, "GN", "KN", "PN", "WR", "PS", ""))
     421           0 :         current += 1;
     422             : 
     423             :     /* Initial 'X' is pronounced 'Z' e.g. 'Xavier' */
     424           4 :     if (GetAt(original, 0) == 'X')
     425             :     {
     426           0 :         MetaphAdd(primary, "S");  /* 'Z' maps to 'S' */
     427           0 :         MetaphAdd(secondary, "S");
     428           0 :         current += 1;
     429             :     }
     430             : 
     431             :     /* main loop */
     432          24 :     while ((primary->length < 4) || (secondary->length < 4))
     433             :     {
     434          24 :         if (current >= length)
     435           4 :             break;
     436             : 
     437          20 :         switch (GetAt(original, current))
     438             :         {
     439           8 :             case 'A':
     440             :             case 'E':
     441             :             case 'I':
     442             :             case 'O':
     443             :             case 'U':
     444             :             case 'Y':
     445           8 :                 if (current == 0)
     446             :                 {
     447             :                     /* all init vowels now map to 'A' */
     448           0 :                     MetaphAdd(primary, "A");
     449           0 :                     MetaphAdd(secondary, "A");
     450             :                 }
     451           8 :                 current += 1;
     452           8 :                 break;
     453             : 
     454           4 :             case 'B':
     455             : 
     456             :                 /* "-mb", e.g", "dumb", already skipped over... */
     457           4 :                 MetaphAdd(primary, "P");
     458           4 :                 MetaphAdd(secondary, "P");
     459             : 
     460           4 :                 if (GetAt(original, current + 1) == 'B')
     461           0 :                     current += 2;
     462             :                 else
     463           4 :                     current += 1;
     464           4 :                 break;
     465             : 
     466           0 :             case '\xc7':        /* C with cedilla */
     467           0 :                 MetaphAdd(primary, "S");
     468           0 :                 MetaphAdd(secondary, "S");
     469           0 :                 current += 1;
     470           0 :                 break;
     471             : 
     472           0 :             case 'C':
     473             :                 /* various germanic */
     474           0 :                 if ((current > 1)
     475           0 :                     && !IsVowel(original, current - 2)
     476           0 :                     && StringAt(original, (current - 1), 3, "ACH", "")
     477           0 :                     && ((GetAt(original, current + 2) != 'I')
     478           0 :                         && ((GetAt(original, current + 2) != 'E')
     479           0 :                             || StringAt(original, (current - 2), 6, "BACHER",
     480             :                                         "MACHER", ""))))
     481             :                 {
     482           0 :                     MetaphAdd(primary, "K");
     483           0 :                     MetaphAdd(secondary, "K");
     484           0 :                     current += 2;
     485           0 :                     break;
     486             :                 }
     487             : 
     488             :                 /* special case 'caesar' */
     489           0 :                 if ((current == 0)
     490           0 :                     && StringAt(original, current, 6, "CAESAR", ""))
     491             :                 {
     492           0 :                     MetaphAdd(primary, "S");
     493           0 :                     MetaphAdd(secondary, "S");
     494           0 :                     current += 2;
     495           0 :                     break;
     496             :                 }
     497             : 
     498             :                 /* italian 'chianti' */
     499           0 :                 if (StringAt(original, current, 4, "CHIA", ""))
     500             :                 {
     501           0 :                     MetaphAdd(primary, "K");
     502           0 :                     MetaphAdd(secondary, "K");
     503           0 :                     current += 2;
     504           0 :                     break;
     505             :                 }
     506             : 
     507           0 :                 if (StringAt(original, current, 2, "CH", ""))
     508             :                 {
     509             :                     /* find 'michael' */
     510           0 :                     if ((current > 0)
     511           0 :                         && StringAt(original, current, 4, "CHAE", ""))
     512             :                     {
     513           0 :                         MetaphAdd(primary, "K");
     514           0 :                         MetaphAdd(secondary, "X");
     515           0 :                         current += 2;
     516           0 :                         break;
     517             :                     }
     518             : 
     519             :                     /* greek roots e.g. 'chemistry', 'chorus' */
     520           0 :                     if ((current == 0)
     521           0 :                         && (StringAt(original, (current + 1), 5,
     522             :                                      "HARAC", "HARIS", "")
     523           0 :                             || StringAt(original, (current + 1), 3, "HOR",
     524             :                                         "HYM", "HIA", "HEM", ""))
     525           0 :                         && !StringAt(original, 0, 5, "CHORE", ""))
     526             :                     {
     527           0 :                         MetaphAdd(primary, "K");
     528           0 :                         MetaphAdd(secondary, "K");
     529           0 :                         current += 2;
     530           0 :                         break;
     531             :                     }
     532             : 
     533             :                     /* germanic, greek, or otherwise 'ch' for 'kh' sound */
     534           0 :                     if ((StringAt(original, 0, 4, "VAN ", "VON ", "")
     535           0 :                          || StringAt(original, 0, 3, "SCH", ""))
     536             :                     /* 'architect but not 'arch', 'orchestra', 'orchid' */
     537           0 :                         || StringAt(original, (current - 2), 6, "ORCHES",
     538             :                                     "ARCHIT", "ORCHID", "")
     539           0 :                         || StringAt(original, (current + 2), 1, "T", "S",
     540             :                                     "")
     541           0 :                         || ((StringAt(original, (current - 1), 1,
     542             :                                       "A", "O", "U", "E", "")
     543           0 :                              || (current == 0))
     544             : 
     545             :                     /*
     546             :                      * e.g., 'wachtler', 'wechsler', but not 'tichner'
     547             :                      */
     548           0 :                             && StringAt(original, (current + 2), 1, "L", "R",
     549             :                                         "N", "M", "B", "H", "F", "V", "W",
     550             :                                         " ", "")))
     551             :                     {
     552           0 :                         MetaphAdd(primary, "K");
     553           0 :                         MetaphAdd(secondary, "K");
     554             :                     }
     555             :                     else
     556             :                     {
     557           0 :                         if (current > 0)
     558             :                         {
     559           0 :                             if (StringAt(original, 0, 2, "MC", ""))
     560             :                             {
     561             :                                 /* e.g., "McHugh" */
     562           0 :                                 MetaphAdd(primary, "K");
     563           0 :                                 MetaphAdd(secondary, "K");
     564             :                             }
     565             :                             else
     566             :                             {
     567           0 :                                 MetaphAdd(primary, "X");
     568           0 :                                 MetaphAdd(secondary, "K");
     569             :                             }
     570             :                         }
     571             :                         else
     572             :                         {
     573           0 :                             MetaphAdd(primary, "X");
     574           0 :                             MetaphAdd(secondary, "X");
     575             :                         }
     576             :                     }
     577           0 :                     current += 2;
     578           0 :                     break;
     579             :                 }
     580             :                 /* e.g, 'czerny' */
     581           0 :                 if (StringAt(original, current, 2, "CZ", "")
     582           0 :                     && !StringAt(original, (current - 2), 4, "WICZ", ""))
     583             :                 {
     584           0 :                     MetaphAdd(primary, "S");
     585           0 :                     MetaphAdd(secondary, "X");
     586           0 :                     current += 2;
     587           0 :                     break;
     588             :                 }
     589             : 
     590             :                 /* e.g., 'focaccia' */
     591           0 :                 if (StringAt(original, (current + 1), 3, "CIA", ""))
     592             :                 {
     593           0 :                     MetaphAdd(primary, "X");
     594           0 :                     MetaphAdd(secondary, "X");
     595           0 :                     current += 3;
     596           0 :                     break;
     597             :                 }
     598             : 
     599             :                 /* double 'C', but not if e.g. 'McClellan' */
     600           0 :                 if (StringAt(original, current, 2, "CC", "")
     601           0 :                     && !((current == 1) && (GetAt(original, 0) == 'M')))
     602             :                 {
     603             :                     /* 'bellocchio' but not 'bacchus' */
     604           0 :                     if (StringAt(original, (current + 2), 1, "I", "E", "H", "")
     605           0 :                         && !StringAt(original, (current + 2), 2, "HU", ""))
     606             :                     {
     607             :                         /* 'accident', 'accede' 'succeed' */
     608           0 :                         if (((current == 1)
     609           0 :                              && (GetAt(original, current - 1) == 'A'))
     610           0 :                             || StringAt(original, (current - 1), 5, "UCCEE",
     611             :                                         "UCCES", ""))
     612             :                         {
     613           0 :                             MetaphAdd(primary, "KS");
     614           0 :                             MetaphAdd(secondary, "KS");
     615             :                             /* 'bacci', 'bertucci', other italian */
     616             :                         }
     617             :                         else
     618             :                         {
     619           0 :                             MetaphAdd(primary, "X");
     620           0 :                             MetaphAdd(secondary, "X");
     621             :                         }
     622           0 :                         current += 3;
     623           0 :                         break;
     624             :                     }
     625             :                     else
     626             :                     {           /* Pierce's rule */
     627           0 :                         MetaphAdd(primary, "K");
     628           0 :                         MetaphAdd(secondary, "K");
     629           0 :                         current += 2;
     630           0 :                         break;
     631             :                     }
     632             :                 }
     633             : 
     634           0 :                 if (StringAt(original, current, 2, "CK", "CG", "CQ", ""))
     635             :                 {
     636           0 :                     MetaphAdd(primary, "K");
     637           0 :                     MetaphAdd(secondary, "K");
     638           0 :                     current += 2;
     639           0 :                     break;
     640             :                 }
     641             : 
     642           0 :                 if (StringAt(original, current, 2, "CI", "CE", "CY", ""))
     643             :                 {
     644             :                     /* italian vs. english */
     645           0 :                     if (StringAt
     646             :                         (original, current, 3, "CIO", "CIE", "CIA", ""))
     647             :                     {
     648           0 :                         MetaphAdd(primary, "S");
     649           0 :                         MetaphAdd(secondary, "X");
     650             :                     }
     651             :                     else
     652             :                     {
     653           0 :                         MetaphAdd(primary, "S");
     654           0 :                         MetaphAdd(secondary, "S");
     655             :                     }
     656           0 :                     current += 2;
     657           0 :                     break;
     658             :                 }
     659             : 
     660             :                 /* else */
     661           0 :                 MetaphAdd(primary, "K");
     662           0 :                 MetaphAdd(secondary, "K");
     663             : 
     664             :                 /* name sent in 'mac caffrey', 'mac gregor */
     665           0 :                 if (StringAt(original, (current + 1), 2, " C", " Q", " G", ""))
     666           0 :                     current += 3;
     667           0 :                 else if (StringAt(original, (current + 1), 1, "C", "K", "Q", "")
     668           0 :                          && !StringAt(original, (current + 1), 2,
     669             :                                       "CE", "CI", ""))
     670           0 :                     current += 2;
     671             :                 else
     672           0 :                     current += 1;
     673           0 :                 break;
     674             : 
     675           0 :             case 'D':
     676           0 :                 if (StringAt(original, current, 2, "DG", ""))
     677             :                 {
     678           0 :                     if (StringAt(original, (current + 2), 1,
     679             :                                  "I", "E", "Y", ""))
     680             :                     {
     681             :                         /* e.g. 'edge' */
     682           0 :                         MetaphAdd(primary, "J");
     683           0 :                         MetaphAdd(secondary, "J");
     684           0 :                         current += 3;
     685           0 :                         break;
     686             :                     }
     687             :                     else
     688             :                     {
     689             :                         /* e.g. 'edgar' */
     690           0 :                         MetaphAdd(primary, "TK");
     691           0 :                         MetaphAdd(secondary, "TK");
     692           0 :                         current += 2;
     693           0 :                         break;
     694             :                     }
     695             :                 }
     696             : 
     697           0 :                 if (StringAt(original, current, 2, "DT", "DD", ""))
     698             :                 {
     699           0 :                     MetaphAdd(primary, "T");
     700           0 :                     MetaphAdd(secondary, "T");
     701           0 :                     current += 2;
     702           0 :                     break;
     703             :                 }
     704             : 
     705             :                 /* else */
     706           0 :                 MetaphAdd(primary, "T");
     707           0 :                 MetaphAdd(secondary, "T");
     708           0 :                 current += 1;
     709           0 :                 break;
     710             : 
     711           0 :             case 'F':
     712           0 :                 if (GetAt(original, current + 1) == 'F')
     713           0 :                     current += 2;
     714             :                 else
     715           0 :                     current += 1;
     716           0 :                 MetaphAdd(primary, "F");
     717           0 :                 MetaphAdd(secondary, "F");
     718           0 :                 break;
     719             : 
     720           4 :             case 'G':
     721           4 :                 if (GetAt(original, current + 1) == 'H')
     722             :                 {
     723           0 :                     if ((current > 0) && !IsVowel(original, current - 1))
     724             :                     {
     725           0 :                         MetaphAdd(primary, "K");
     726           0 :                         MetaphAdd(secondary, "K");
     727           0 :                         current += 2;
     728           0 :                         break;
     729             :                     }
     730             : 
     731           0 :                     if (current < 3)
     732             :                     {
     733             :                         /* 'ghislane', ghiradelli */
     734           0 :                         if (current == 0)
     735             :                         {
     736           0 :                             if (GetAt(original, current + 2) == 'I')
     737             :                             {
     738           0 :                                 MetaphAdd(primary, "J");
     739           0 :                                 MetaphAdd(secondary, "J");
     740             :                             }
     741             :                             else
     742             :                             {
     743           0 :                                 MetaphAdd(primary, "K");
     744           0 :                                 MetaphAdd(secondary, "K");
     745             :                             }
     746           0 :                             current += 2;
     747           0 :                             break;
     748             :                         }
     749             :                     }
     750             : 
     751             :                     /*
     752             :                      * Parker's rule (with some further refinements) - e.g.,
     753             :                      * 'hugh'
     754             :                      */
     755           0 :                     if (((current > 1)
     756           0 :                          && StringAt(original, (current - 2), 1,
     757             :                                      "B", "H", "D", ""))
     758             :                     /* e.g., 'bough' */
     759           0 :                         || ((current > 2)
     760           0 :                             && StringAt(original, (current - 3), 1,
     761             :                                         "B", "H", "D", ""))
     762             :                     /* e.g., 'broughton' */
     763           0 :                         || ((current > 3)
     764           0 :                             && StringAt(original, (current - 4), 1,
     765             :                                         "B", "H", "")))
     766             :                     {
     767           0 :                         current += 2;
     768           0 :                         break;
     769             :                     }
     770             :                     else
     771             :                     {
     772             :                         /*
     773             :                          * e.g., 'laugh', 'McLaughlin', 'cough', 'gough',
     774             :                          * 'rough', 'tough'
     775             :                          */
     776           0 :                         if ((current > 2)
     777           0 :                             && (GetAt(original, current - 1) == 'U')
     778           0 :                             && StringAt(original, (current - 3), 1, "C",
     779             :                                         "G", "L", "R", "T", ""))
     780             :                         {
     781           0 :                             MetaphAdd(primary, "F");
     782           0 :                             MetaphAdd(secondary, "F");
     783             :                         }
     784           0 :                         else if ((current > 0)
     785           0 :                                  && GetAt(original, current - 1) != 'I')
     786             :                         {
     787             : 
     788             : 
     789           0 :                             MetaphAdd(primary, "K");
     790           0 :                             MetaphAdd(secondary, "K");
     791             :                         }
     792             : 
     793           0 :                         current += 2;
     794           0 :                         break;
     795             :                     }
     796             :                 }
     797             : 
     798           4 :                 if (GetAt(original, current + 1) == 'N')
     799             :                 {
     800           0 :                     if ((current == 1) && IsVowel(original, 0)
     801           0 :                         && !SlavoGermanic(original))
     802             :                     {
     803           0 :                         MetaphAdd(primary, "KN");
     804           0 :                         MetaphAdd(secondary, "N");
     805             :                     }
     806             :                     else
     807             :                         /* not e.g. 'cagney' */
     808           0 :                         if (!StringAt(original, (current + 2), 2, "EY", "")
     809           0 :                             && (GetAt(original, current + 1) != 'Y')
     810           0 :                             && !SlavoGermanic(original))
     811             :                     {
     812           0 :                         MetaphAdd(primary, "N");
     813           0 :                         MetaphAdd(secondary, "KN");
     814             :                     }
     815             :                     else
     816             :                     {
     817           0 :                         MetaphAdd(primary, "KN");
     818           0 :                         MetaphAdd(secondary, "KN");
     819             :                     }
     820           0 :                     current += 2;
     821           0 :                     break;
     822             :                 }
     823             : 
     824             :                 /* 'tagliaro' */
     825           4 :                 if (StringAt(original, (current + 1), 2, "LI", "")
     826           0 :                     && !SlavoGermanic(original))
     827             :                 {
     828           0 :                     MetaphAdd(primary, "KL");
     829           0 :                     MetaphAdd(secondary, "L");
     830           0 :                     current += 2;
     831           0 :                     break;
     832             :                 }
     833             : 
     834             :                 /* -ges-,-gep-,-gel-, -gie- at beginning */
     835           4 :                 if ((current == 0)
     836           4 :                     && ((GetAt(original, current + 1) == 'Y')
     837           4 :                         || StringAt(original, (current + 1), 2, "ES", "EP",
     838             :                                     "EB", "EL", "EY", "IB", "IL", "IN", "IE",
     839             :                                     "EI", "ER", "")))
     840             :                 {
     841           0 :                     MetaphAdd(primary, "K");
     842           0 :                     MetaphAdd(secondary, "J");
     843           0 :                     current += 2;
     844           0 :                     break;
     845             :                 }
     846             : 
     847             :                 /* -ger-,  -gy- */
     848           4 :                 if ((StringAt(original, (current + 1), 2, "ER", "")
     849           4 :                      || (GetAt(original, current + 1) == 'Y'))
     850           0 :                     && !StringAt(original, 0, 6,
     851             :                                  "DANGER", "RANGER", "MANGER", "")
     852           0 :                     && !StringAt(original, (current - 1), 1, "E", "I", "")
     853           0 :                     && !StringAt(original, (current - 1), 3, "RGY", "OGY", ""))
     854             :                 {
     855           0 :                     MetaphAdd(primary, "K");
     856           0 :                     MetaphAdd(secondary, "J");
     857           0 :                     current += 2;
     858           0 :                     break;
     859             :                 }
     860             : 
     861             :                 /* italian e.g, 'biaggi' */
     862           4 :                 if (StringAt(original, (current + 1), 1, "E", "I", "Y", "")
     863           4 :                     || StringAt(original, (current - 1), 4,
     864             :                                 "AGGI", "OGGI", ""))
     865             :                 {
     866             :                     /* obvious germanic */
     867           0 :                     if ((StringAt(original, 0, 4, "VAN ", "VON ", "")
     868           0 :                          || StringAt(original, 0, 3, "SCH", ""))
     869           0 :                         || StringAt(original, (current + 1), 2, "ET", ""))
     870             :                     {
     871           0 :                         MetaphAdd(primary, "K");
     872           0 :                         MetaphAdd(secondary, "K");
     873             :                     }
     874             :                     else
     875             :                     {
     876             :                         /* always soft if french ending */
     877           0 :                         if (StringAt
     878             :                             (original, (current + 1), 4, "IER ", ""))
     879             :                         {
     880           0 :                             MetaphAdd(primary, "J");
     881           0 :                             MetaphAdd(secondary, "J");
     882             :                         }
     883             :                         else
     884             :                         {
     885           0 :                             MetaphAdd(primary, "J");
     886           0 :                             MetaphAdd(secondary, "K");
     887             :                         }
     888             :                     }
     889           0 :                     current += 2;
     890           0 :                     break;
     891             :                 }
     892             : 
     893           4 :                 if (GetAt(original, current + 1) == 'G')
     894           0 :                     current += 2;
     895             :                 else
     896           4 :                     current += 1;
     897           4 :                 MetaphAdd(primary, "K");
     898           4 :                 MetaphAdd(secondary, "K");
     899           4 :                 break;
     900             : 
     901           0 :             case 'H':
     902             :                 /* only keep if first & before vowel or btw. 2 vowels */
     903           0 :                 if (((current == 0) || IsVowel(original, current - 1))
     904           0 :                     && IsVowel(original, current + 1))
     905             :                 {
     906           0 :                     MetaphAdd(primary, "H");
     907           0 :                     MetaphAdd(secondary, "H");
     908           0 :                     current += 2;
     909             :                 }
     910             :                 else
     911             :                     /* also takes care of 'HH' */
     912           0 :                     current += 1;
     913           0 :                 break;
     914             : 
     915           0 :             case 'J':
     916             :                 /* obvious spanish, 'jose', 'san jacinto' */
     917           0 :                 if (StringAt(original, current, 4, "JOSE", "")
     918           0 :                     || StringAt(original, 0, 4, "SAN ", ""))
     919             :                 {
     920           0 :                     if (((current == 0)
     921           0 :                          && (GetAt(original, current + 4) == ' '))
     922           0 :                         || StringAt(original, 0, 4, "SAN ", ""))
     923             :                     {
     924           0 :                         MetaphAdd(primary, "H");
     925           0 :                         MetaphAdd(secondary, "H");
     926             :                     }
     927             :                     else
     928             :                     {
     929           0 :                         MetaphAdd(primary, "J");
     930           0 :                         MetaphAdd(secondary, "H");
     931             :                     }
     932           0 :                     current += 1;
     933           0 :                     break;
     934             :                 }
     935             : 
     936           0 :                 if ((current == 0)
     937           0 :                     && !StringAt(original, current, 4, "JOSE", ""))
     938             :                 {
     939           0 :                     MetaphAdd(primary, "J");  /* Yankelovich/Jankelowicz */
     940           0 :                     MetaphAdd(secondary, "A");
     941             :                 }
     942             :                 else
     943             :                 {
     944             :                     /* spanish pron. of e.g. 'bajador' */
     945           0 :                     if (IsVowel(original, current - 1)
     946           0 :                         && !SlavoGermanic(original)
     947           0 :                         && ((GetAt(original, current + 1) == 'A')
     948           0 :                             || (GetAt(original, current + 1) == 'O')))
     949             :                     {
     950           0 :                         MetaphAdd(primary, "J");
     951           0 :                         MetaphAdd(secondary, "H");
     952             :                     }
     953             :                     else
     954             :                     {
     955           0 :                         if (current == last)
     956             :                         {
     957           0 :                             MetaphAdd(primary, "J");
     958           0 :                             MetaphAdd(secondary, "");
     959             :                         }
     960             :                         else
     961             :                         {
     962           0 :                             if (!StringAt(original, (current + 1), 1, "L", "T",
     963             :                                           "K", "S", "N", "M", "B", "Z", "")
     964           0 :                                 && !StringAt(original, (current - 1), 1,
     965             :                                              "S", "K", "L", ""))
     966             :                             {
     967           0 :                                 MetaphAdd(primary, "J");
     968           0 :                                 MetaphAdd(secondary, "J");
     969             :                             }
     970             :                         }
     971             :                     }
     972             :                 }
     973             : 
     974           0 :                 if (GetAt(original, current + 1) == 'J')    /* it could happen! */
     975           0 :                     current += 2;
     976             :                 else
     977           0 :                     current += 1;
     978           0 :                 break;
     979             : 
     980           0 :             case 'K':
     981           0 :                 if (GetAt(original, current + 1) == 'K')
     982           0 :                     current += 2;
     983             :                 else
     984           0 :                     current += 1;
     985           0 :                 MetaphAdd(primary, "K");
     986           0 :                 MetaphAdd(secondary, "K");
     987           0 :                 break;
     988             : 
     989           0 :             case 'L':
     990           0 :                 if (GetAt(original, current + 1) == 'L')
     991             :                 {
     992             :                     /* spanish e.g. 'cabrillo', 'gallegos' */
     993           0 :                     if (((current == (length - 3))
     994           0 :                          && StringAt(original, (current - 1), 4, "ILLO",
     995             :                                      "ILLA", "ALLE", ""))
     996           0 :                         || ((StringAt(original, (last - 1), 2, "AS", "OS", "")
     997           0 :                              || StringAt(original, last, 1, "A", "O", ""))
     998           0 :                             && StringAt(original, (current - 1), 4,
     999             :                                         "ALLE", "")))
    1000             :                     {
    1001           0 :                         MetaphAdd(primary, "L");
    1002           0 :                         MetaphAdd(secondary, "");
    1003           0 :                         current += 2;
    1004           0 :                         break;
    1005             :                     }
    1006           0 :                     current += 2;
    1007             :                 }
    1008             :                 else
    1009           0 :                     current += 1;
    1010           0 :                 MetaphAdd(primary, "L");
    1011           0 :                 MetaphAdd(secondary, "L");
    1012           0 :                 break;
    1013             : 
    1014           4 :             case 'M':
    1015           4 :                 if ((StringAt(original, (current - 1), 3, "UMB", "")
    1016           4 :                      && (((current + 1) == last)
    1017           4 :                          || StringAt(original, (current + 2), 2, "ER", "")))
    1018             :                 /* 'dumb','thumb' */
    1019           4 :                     || (GetAt(original, current + 1) == 'M'))
    1020           0 :                     current += 2;
    1021             :                 else
    1022           4 :                     current += 1;
    1023           4 :                 MetaphAdd(primary, "M");
    1024           4 :                 MetaphAdd(secondary, "M");
    1025           4 :                 break;
    1026             : 
    1027           0 :             case 'N':
    1028           0 :                 if (GetAt(original, current + 1) == 'N')
    1029           0 :                     current += 2;
    1030             :                 else
    1031           0 :                     current += 1;
    1032           0 :                 MetaphAdd(primary, "N");
    1033           0 :                 MetaphAdd(secondary, "N");
    1034           0 :                 break;
    1035             : 
    1036           0 :             case '\xd1':        /* N with tilde */
    1037           0 :                 current += 1;
    1038           0 :                 MetaphAdd(primary, "N");
    1039           0 :                 MetaphAdd(secondary, "N");
    1040           0 :                 break;
    1041             : 
    1042           0 :             case 'P':
    1043           0 :                 if (GetAt(original, current + 1) == 'H')
    1044             :                 {
    1045           0 :                     MetaphAdd(primary, "F");
    1046           0 :                     MetaphAdd(secondary, "F");
    1047           0 :                     current += 2;
    1048           0 :                     break;
    1049             :                 }
    1050             : 
    1051             :                 /* also account for "campbell", "raspberry" */
    1052           0 :                 if (StringAt(original, (current + 1), 1, "P", "B", ""))
    1053           0 :                     current += 2;
    1054             :                 else
    1055           0 :                     current += 1;
    1056           0 :                 MetaphAdd(primary, "P");
    1057           0 :                 MetaphAdd(secondary, "P");
    1058           0 :                 break;
    1059             : 
    1060           0 :             case 'Q':
    1061           0 :                 if (GetAt(original, current + 1) == 'Q')
    1062           0 :                     current += 2;
    1063             :                 else
    1064           0 :                     current += 1;
    1065           0 :                 MetaphAdd(primary, "K");
    1066           0 :                 MetaphAdd(secondary, "K");
    1067           0 :                 break;
    1068             : 
    1069           0 :             case 'R':
    1070             :                 /* french e.g. 'rogier', but exclude 'hochmeier' */
    1071           0 :                 if ((current == last)
    1072           0 :                     && !SlavoGermanic(original)
    1073           0 :                     && StringAt(original, (current - 2), 2, "IE", "")
    1074           0 :                     && !StringAt(original, (current - 4), 2, "ME", "MA", ""))
    1075             :                 {
    1076           0 :                     MetaphAdd(primary, "");
    1077           0 :                     MetaphAdd(secondary, "R");
    1078             :                 }
    1079             :                 else
    1080             :                 {
    1081           0 :                     MetaphAdd(primary, "R");
    1082           0 :                     MetaphAdd(secondary, "R");
    1083             :                 }
    1084             : 
    1085           0 :                 if (GetAt(original, current + 1) == 'R')
    1086           0 :                     current += 2;
    1087             :                 else
    1088           0 :                     current += 1;
    1089           0 :                 break;
    1090             : 
    1091           0 :             case 'S':
    1092             :                 /* special cases 'island', 'isle', 'carlisle', 'carlysle' */
    1093           0 :                 if (StringAt(original, (current - 1), 3, "ISL", "YSL", ""))
    1094             :                 {
    1095           0 :                     current += 1;
    1096           0 :                     break;
    1097             :                 }
    1098             : 
    1099             :                 /* special case 'sugar-' */
    1100           0 :                 if ((current == 0)
    1101           0 :                     && StringAt(original, current, 5, "SUGAR", ""))
    1102             :                 {
    1103           0 :                     MetaphAdd(primary, "X");
    1104           0 :                     MetaphAdd(secondary, "S");
    1105           0 :                     current += 1;
    1106           0 :                     break;
    1107             :                 }
    1108             : 
    1109           0 :                 if (StringAt(original, current, 2, "SH", ""))
    1110             :                 {
    1111             :                     /* germanic */
    1112           0 :                     if (StringAt
    1113             :                         (original, (current + 1), 4, "HEIM", "HOEK", "HOLM",
    1114             :                          "HOLZ", ""))
    1115             :                     {
    1116           0 :                         MetaphAdd(primary, "S");
    1117           0 :                         MetaphAdd(secondary, "S");
    1118             :                     }
    1119             :                     else
    1120             :                     {
    1121           0 :                         MetaphAdd(primary, "X");
    1122           0 :                         MetaphAdd(secondary, "X");
    1123             :                     }
    1124           0 :                     current += 2;
    1125           0 :                     break;
    1126             :                 }
    1127             : 
    1128             :                 /* italian & armenian */
    1129           0 :                 if (StringAt(original, current, 3, "SIO", "SIA", "")
    1130           0 :                     || StringAt(original, current, 4, "SIAN", ""))
    1131             :                 {
    1132           0 :                     if (!SlavoGermanic(original))
    1133             :                     {
    1134           0 :                         MetaphAdd(primary, "S");
    1135           0 :                         MetaphAdd(secondary, "X");
    1136             :                     }
    1137             :                     else
    1138             :                     {
    1139           0 :                         MetaphAdd(primary, "S");
    1140           0 :                         MetaphAdd(secondary, "S");
    1141             :                     }
    1142           0 :                     current += 3;
    1143           0 :                     break;
    1144             :                 }
    1145             : 
    1146             :                 /*
    1147             :                  * german & anglicisations, e.g. 'smith' match 'schmidt',
    1148             :                  * 'snider' match 'schneider' also, -sz- in slavic language
    1149             :                  * although in hungarian it is pronounced 's'
    1150             :                  */
    1151           0 :                 if (((current == 0)
    1152           0 :                      && StringAt(original, (current + 1), 1,
    1153             :                                  "M", "N", "L", "W", ""))
    1154           0 :                     || StringAt(original, (current + 1), 1, "Z", ""))
    1155             :                 {
    1156           0 :                     MetaphAdd(primary, "S");
    1157           0 :                     MetaphAdd(secondary, "X");
    1158           0 :                     if (StringAt(original, (current + 1), 1, "Z", ""))
    1159           0 :                         current += 2;
    1160             :                     else
    1161           0 :                         current += 1;
    1162           0 :                     break;
    1163             :                 }
    1164             : 
    1165           0 :                 if (StringAt(original, current, 2, "SC", ""))
    1166             :                 {
    1167             :                     /* Schlesinger's rule */
    1168           0 :                     if (GetAt(original, current + 2) == 'H')
    1169             :                     {
    1170             :                         /* dutch origin, e.g. 'school', 'schooner' */
    1171           0 :                         if (StringAt(original, (current + 3), 2,
    1172             :                                      "OO", "ER", "EN",
    1173             :                                      "UY", "ED", "EM", ""))
    1174             :                         {
    1175             :                             /* 'schermerhorn', 'schenker' */
    1176           0 :                             if (StringAt(original, (current + 3), 2,
    1177             :                                          "ER", "EN", ""))
    1178             :                             {
    1179           0 :                                 MetaphAdd(primary, "X");
    1180           0 :                                 MetaphAdd(secondary, "SK");
    1181             :                             }
    1182             :                             else
    1183             :                             {
    1184           0 :                                 MetaphAdd(primary, "SK");
    1185           0 :                                 MetaphAdd(secondary, "SK");
    1186             :                             }
    1187           0 :                             current += 3;
    1188           0 :                             break;
    1189             :                         }
    1190             :                         else
    1191             :                         {
    1192           0 :                             if ((current == 0) && !IsVowel(original, 3)
    1193           0 :                                 && (GetAt(original, 3) != 'W'))
    1194             :                             {
    1195           0 :                                 MetaphAdd(primary, "X");
    1196           0 :                                 MetaphAdd(secondary, "S");
    1197             :                             }
    1198             :                             else
    1199             :                             {
    1200           0 :                                 MetaphAdd(primary, "X");
    1201           0 :                                 MetaphAdd(secondary, "X");
    1202             :                             }
    1203           0 :                             current += 3;
    1204           0 :                             break;
    1205             :                         }
    1206             :                     }
    1207             : 
    1208           0 :                     if (StringAt(original, (current + 2), 1,
    1209             :                                  "I", "E", "Y", ""))
    1210             :                     {
    1211           0 :                         MetaphAdd(primary, "S");
    1212           0 :                         MetaphAdd(secondary, "S");
    1213           0 :                         current += 3;
    1214           0 :                         break;
    1215             :                     }
    1216             :                     /* else */
    1217           0 :                     MetaphAdd(primary, "SK");
    1218           0 :                     MetaphAdd(secondary, "SK");
    1219           0 :                     current += 3;
    1220           0 :                     break;
    1221             :                 }
    1222             : 
    1223             :                 /* french e.g. 'resnais', 'artois' */
    1224           0 :                 if ((current == last)
    1225           0 :                     && StringAt(original, (current - 2), 2, "AI", "OI", ""))
    1226             :                 {
    1227           0 :                     MetaphAdd(primary, "");
    1228           0 :                     MetaphAdd(secondary, "S");
    1229             :                 }
    1230             :                 else
    1231             :                 {
    1232           0 :                     MetaphAdd(primary, "S");
    1233           0 :                     MetaphAdd(secondary, "S");
    1234             :                 }
    1235             : 
    1236           0 :                 if (StringAt(original, (current + 1), 1, "S", "Z", ""))
    1237           0 :                     current += 2;
    1238             :                 else
    1239           0 :                     current += 1;
    1240           0 :                 break;
    1241             : 
    1242           0 :             case 'T':
    1243           0 :                 if (StringAt(original, current, 4, "TION", ""))
    1244             :                 {
    1245           0 :                     MetaphAdd(primary, "X");
    1246           0 :                     MetaphAdd(secondary, "X");
    1247           0 :                     current += 3;
    1248           0 :                     break;
    1249             :                 }
    1250             : 
    1251           0 :                 if (StringAt(original, current, 3, "TIA", "TCH", ""))
    1252             :                 {
    1253           0 :                     MetaphAdd(primary, "X");
    1254           0 :                     MetaphAdd(secondary, "X");
    1255           0 :                     current += 3;
    1256           0 :                     break;
    1257             :                 }
    1258             : 
    1259           0 :                 if (StringAt(original, current, 2, "TH", "")
    1260           0 :                     || StringAt(original, current, 3, "TTH", ""))
    1261             :                 {
    1262             :                     /* special case 'thomas', 'thames' or germanic */
    1263           0 :                     if (StringAt(original, (current + 2), 2, "OM", "AM", "")
    1264           0 :                         || StringAt(original, 0, 4, "VAN ", "VON ", "")
    1265           0 :                         || StringAt(original, 0, 3, "SCH", ""))
    1266             :                     {
    1267           0 :                         MetaphAdd(primary, "T");
    1268           0 :                         MetaphAdd(secondary, "T");
    1269             :                     }
    1270             :                     else
    1271             :                     {
    1272           0 :                         MetaphAdd(primary, "0");
    1273           0 :                         MetaphAdd(secondary, "T");
    1274             :                     }
    1275           0 :                     current += 2;
    1276           0 :                     break;
    1277             :                 }
    1278             : 
    1279           0 :                 if (StringAt(original, (current + 1), 1, "T", "D", ""))
    1280           0 :                     current += 2;
    1281             :                 else
    1282           0 :                     current += 1;
    1283           0 :                 MetaphAdd(primary, "T");
    1284           0 :                 MetaphAdd(secondary, "T");
    1285           0 :                 break;
    1286             : 
    1287           0 :             case 'V':
    1288           0 :                 if (GetAt(original, current + 1) == 'V')
    1289           0 :                     current += 2;
    1290             :                 else
    1291           0 :                     current += 1;
    1292           0 :                 MetaphAdd(primary, "F");
    1293           0 :                 MetaphAdd(secondary, "F");
    1294           0 :                 break;
    1295             : 
    1296           0 :             case 'W':
    1297             :                 /* can also be in middle of word */
    1298           0 :                 if (StringAt(original, current, 2, "WR", ""))
    1299             :                 {
    1300           0 :                     MetaphAdd(primary, "R");
    1301           0 :                     MetaphAdd(secondary, "R");
    1302           0 :                     current += 2;
    1303           0 :                     break;
    1304             :                 }
    1305             : 
    1306           0 :                 if ((current == 0)
    1307           0 :                     && (IsVowel(original, current + 1)
    1308           0 :                         || StringAt(original, current, 2, "WH", "")))
    1309             :                 {
    1310             :                     /* Wasserman should match Vasserman */
    1311           0 :                     if (IsVowel(original, current + 1))
    1312             :                     {
    1313           0 :                         MetaphAdd(primary, "A");
    1314           0 :                         MetaphAdd(secondary, "F");
    1315             :                     }
    1316             :                     else
    1317             :                     {
    1318             :                         /* need Uomo to match Womo */
    1319           0 :                         MetaphAdd(primary, "A");
    1320           0 :                         MetaphAdd(secondary, "A");
    1321             :                     }
    1322             :                 }
    1323             : 
    1324             :                 /* Arnow should match Arnoff */
    1325           0 :                 if (((current == last) && IsVowel(original, current - 1))
    1326           0 :                     || StringAt(original, (current - 1), 5, "EWSKI", "EWSKY",
    1327             :                                 "OWSKI", "OWSKY", "")
    1328           0 :                     || StringAt(original, 0, 3, "SCH", ""))
    1329             :                 {
    1330           0 :                     MetaphAdd(primary, "");
    1331           0 :                     MetaphAdd(secondary, "F");
    1332           0 :                     current += 1;
    1333           0 :                     break;
    1334             :                 }
    1335             : 
    1336             :                 /* polish e.g. 'filipowicz' */
    1337           0 :                 if (StringAt(original, current, 4, "WICZ", "WITZ", ""))
    1338             :                 {
    1339           0 :                     MetaphAdd(primary, "TS");
    1340           0 :                     MetaphAdd(secondary, "FX");
    1341           0 :                     current += 4;
    1342           0 :                     break;
    1343             :                 }
    1344             : 
    1345             :                 /* else skip it */
    1346           0 :                 current += 1;
    1347           0 :                 break;
    1348             : 
    1349           0 :             case 'X':
    1350             :                 /* french e.g. breaux */
    1351           0 :                 if (!((current == last)
    1352           0 :                       && (StringAt(original, (current - 3), 3,
    1353             :                                    "IAU", "EAU", "")
    1354           0 :                           || StringAt(original, (current - 2), 2,
    1355             :                                       "AU", "OU", ""))))
    1356             :                 {
    1357           0 :                     MetaphAdd(primary, "KS");
    1358           0 :                     MetaphAdd(secondary, "KS");
    1359             :                 }
    1360             : 
    1361             : 
    1362           0 :                 if (StringAt(original, (current + 1), 1, "C", "X", ""))
    1363           0 :                     current += 2;
    1364             :                 else
    1365           0 :                     current += 1;
    1366           0 :                 break;
    1367             : 
    1368           0 :             case 'Z':
    1369             :                 /* chinese pinyin e.g. 'zhao' */
    1370           0 :                 if (GetAt(original, current + 1) == 'H')
    1371             :                 {
    1372           0 :                     MetaphAdd(primary, "J");
    1373           0 :                     MetaphAdd(secondary, "J");
    1374           0 :                     current += 2;
    1375           0 :                     break;
    1376             :                 }
    1377           0 :                 else if (StringAt(original, (current + 1), 2,
    1378             :                                   "ZO", "ZI", "ZA", "")
    1379           0 :                          || (SlavoGermanic(original)
    1380           0 :                              && ((current > 0)
    1381           0 :                                  && GetAt(original, current - 1) != 'T')))
    1382             :                 {
    1383           0 :                     MetaphAdd(primary, "S");
    1384           0 :                     MetaphAdd(secondary, "TS");
    1385             :                 }
    1386             :                 else
    1387             :                 {
    1388           0 :                     MetaphAdd(primary, "S");
    1389           0 :                     MetaphAdd(secondary, "S");
    1390             :                 }
    1391             : 
    1392           0 :                 if (GetAt(original, current + 1) == 'Z')
    1393           0 :                     current += 2;
    1394             :                 else
    1395           0 :                     current += 1;
    1396           0 :                 break;
    1397             : 
    1398           0 :             default:
    1399           0 :                 current += 1;
    1400             :         }
    1401             : 
    1402             :         /*
    1403             :          * printf("PRIMARY: %s\n", primary->str); printf("SECONDARY: %s\n",
    1404             :          * secondary->str);
    1405             :          */
    1406             :     }
    1407             : 
    1408             : 
    1409           4 :     if (primary->length > 4)
    1410           0 :         SetAt(primary, 4, '\0');
    1411             : 
    1412           4 :     if (secondary->length > 4)
    1413           0 :         SetAt(secondary, 4, '\0');
    1414             : 
    1415           4 :     *codes = primary->str;
    1416           4 :     *++codes = secondary->str;
    1417             : 
    1418           4 :     DestroyMetaString(original);
    1419           4 :     DestroyMetaString(primary);
    1420           4 :     DestroyMetaString(secondary);
    1421           4 : }
    1422             : 
    1423             : #ifdef DMETAPHONE_MAIN
    1424             : 
    1425             : /* just for testing - not part of the perl code */
    1426             : 
    1427             : main(int argc, char **argv)
    1428             : {
    1429             :     char       *codes[2];
    1430             : 
    1431             :     if (argc > 1)
    1432             :     {
    1433             :         DoubleMetaphone(argv[1], codes);
    1434             :         printf("%s|%s\n", codes[0], codes[1]);
    1435             :     }
    1436             : }
    1437             : 
    1438             : #endif

Generated by: LCOV version 1.14