View Javadoc
1   /**
2    * Copyright 2014 Internet2
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *   http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  /*
17   * Copyright 2001-2004 The Apache Software Foundation.
18   * 
19   * Licensed under the Apache License, Version 2.0 (the "License");
20   * you may not use this file except in compliance with the License.
21   * You may obtain a copy of the License at
22   * 
23   *      http://www.apache.org/licenses/LICENSE-2.0
24   * 
25   * Unless required by applicable law or agreed to in writing, software
26   * distributed under the License is distributed on an "AS IS" BASIS,
27   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
28   * See the License for the specific language governing permissions and
29   * limitations under the License.
30   */ 
31  
32  package edu.internet2.middleware.grouperClientExt.org.apache.commons.codec.language;
33  
34  import edu.internet2.middleware.grouperClientExt.org.apache.commons.codec.EncoderException;
35  import edu.internet2.middleware.grouperClientExt.org.apache.commons.codec.StringEncoder;
36  
37  /**
38   * Encodes a string into a double metaphone value.
39   * This Implementation is based on the algorithm by <CITE>Lawrence Philips</CITE>.
40   * <ul>
41   * <li>Original Article: <a 
42   * href="http://www.cuj.com/documents/s=8038/cuj0006philips/">
43   * http://www.cuj.com/documents/s=8038/cuj0006philips/</a></li>
44   * <li>Original Source Code: <a href="ftp://ftp.cuj.com/pub/2000/1806/philips.zip">
45   * ftp://ftp.cuj.com/pub/2000/1806/philips.zip</a></li>
46   * </ul>
47   * 
48   * @author Apache Software Foundation
49   * @version $Id: DoubleMetaphone.java,v 1.1 2008-11-30 10:57:28 mchyzer Exp $
50   */
51  public class DoubleMetaphone implements StringEncoder {
52  
53      /**
54       * "Vowels" to test for
55       */
56      private static final String VOWELS = "AEIOUY";
57  
58      /**
59       * Prefixes when present which are not pronounced
60       */
61      private static final String[] SILENT_START = 
62      { "GN", "KN", "PN", "WR", "PS" };
63      private static final String[] L_R_N_M_B_H_F_V_W_SPACE = 
64      { "L", "R", "N", "M", "B", "H", "F", "V", "W", " " };
65      private static final String[] ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER = 
66      { "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER" };
67      private static final String[] L_T_K_S_N_M_B_Z = 
68      { "L", "T", "K", "S", "N", "M", "B", "Z" };
69  
70      /**
71       * Maximum length of an encoding, default is 4
72       */
73      protected int maxCodeLen = 4;
74  
75      /**
76       * Creates an instance of this DoubleMetaphone encoder
77       */
78      public DoubleMetaphone() {
79          super();
80      }
81      
82      /**
83       * Encode a value with Double Metaphone
84       *
85       * @param value String to encode
86       * @return an encoded string
87       */
88      public String doubleMetaphone(String value) {
89          return doubleMetaphone(value, false);
90      }
91      
92      /**
93       * Encode a value with Double Metaphone, optionally using the alternate
94       * encoding.
95       *
96       * @param value String to encode
97       * @param alternate use alternate encode
98       * @return an encoded string
99       */
100     public String doubleMetaphone(String value, boolean alternate) {
101         value = cleanInput(value);
102         if (value == null) {
103             return null;
104         }
105         
106         boolean slavoGermanic = isSlavoGermanic(value);
107         int index = isSilentStart(value) ? 1 : 0;
108         
109         DoubleMetaphoneResult result = new DoubleMetaphoneResult(this.getMaxCodeLen());
110         
111         while (!result.isComplete() && index <= value.length() - 1) {
112             switch (value.charAt(index)) {
113             case 'A':
114             case 'E':
115             case 'I':
116             case 'O':
117             case 'U':
118             case 'Y':
119                 index = handleAEIOUY(value, result, index);
120                 break;
121             case 'B':
122                 result.append('P');
123                 index = charAt(value, index + 1) == 'B' ? index + 2 : index + 1;
124                 break;
125             case '\u00C7':
126                 // A C with a Cedilla
127                 result.append('S');
128                 index++;
129                 break; 
130             case 'C':
131                 index = handleC(value, result, index);
132                 break;
133             case 'D':
134                 index = handleD(value, result, index);
135                 break;
136             case 'F':
137                 result.append('F');
138                 index = charAt(value, index + 1) == 'F' ? index + 2 : index + 1;
139                 break;
140             case 'G':
141                 index = handleG(value, result, index, slavoGermanic);
142                 break;
143             case 'H':
144                 index = handleH(value, result, index);
145                 break;
146             case 'J':
147                 index = handleJ(value, result, index, slavoGermanic);
148                 break;
149             case 'K':
150                 result.append('K');
151                 index = charAt(value, index + 1) == 'K' ? index + 2 : index + 1;
152                 break;
153             case 'L':
154                 index = handleL(value, result, index);
155                 break;
156             case 'M':
157                 result.append('M');
158                 index = conditionM0(value, index) ? index + 2 : index + 1;
159                 break;
160             case 'N':
161                 result.append('N');
162                 index = charAt(value, index + 1) == 'N' ? index + 2 : index + 1;
163                 break;
164             case '\u00D1':
165                 // N with a tilde (spanish ene)
166                 result.append('N');
167                 index++;
168                 break;
169             case 'P':
170                 index = handleP(value, result, index);
171                 break;
172             case 'Q':
173                 result.append('K');
174                 index = charAt(value, index + 1) == 'Q' ? index + 2 : index + 1;
175                 break;
176             case 'R':
177                 index = handleR(value, result, index, slavoGermanic);
178                 break;
179             case 'S':
180                 index = handleS(value, result, index, slavoGermanic);
181                 break;
182             case 'T':
183                 index = handleT(value, result, index);
184                 break;
185             case 'V':
186                 result.append('F');
187                 index = charAt(value, index + 1) == 'V' ? index + 2 : index + 1;
188                 break;
189             case 'W':
190                 index = handleW(value, result, index);
191                 break;
192             case 'X':
193                 index = handleX(value, result, index);
194                 break;
195             case 'Z':
196                 index = handleZ(value, result, index, slavoGermanic);
197                 break;
198             default:
199                 index++;
200                 break;
201             }
202         }
203 
204         return alternate ? result.getAlternate() : result.getPrimary();
205     }
206     
207     /**
208      * Encode the value using DoubleMetaphone.  It will only work if 
209      * <code>obj</code> is a <code>String</code> (like <code>Metaphone</code>).
210      *
211      * @param obj Object to encode (should be of type String)
212      * @return An encoded Object (will be of type String)
213      * @throws EncoderException encode parameter is not of type String
214      */
215     public Object encode(Object obj) throws EncoderException {
216         if (!(obj instanceof String)) {
217             throw new EncoderException("DoubleMetaphone encode parameter is not of type String"); 
218         } 
219         return doubleMetaphone((String) obj);
220     }
221 
222     /**
223      * Encode the value using DoubleMetaphone.
224      *
225      * @param value String to encode
226      * @return An encoded String
227      */
228     public String encode(String value) {
229         return doubleMetaphone(value);   
230     }
231 
232     /**
233      * Check if the Double Metaphone values of two <code>String</code> values
234      * are equal.
235      * 
236      * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
237      * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
238      * @return <code>true</code> if the encoded <code>String</code>s are equal;
239      *          <code>false</code> otherwise.
240      * @see #isDoubleMetaphoneEqual(String,String,boolean)
241      */
242     public boolean isDoubleMetaphoneEqual(String value1, String value2) {
243         return isDoubleMetaphoneEqual(value1, value2, false);
244     }
245     
246     /**
247      * Check if the Double Metaphone values of two <code>String</code> values
248      * are equal, optionally using the alternate value.
249      * 
250      * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
251      * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
252      * @param alternate use the alternate value if <code>true</code>.
253      * @return <code>true</code> if the encoded <code>String</code>s are equal;
254      *          <code>false</code> otherwise.
255      */
256     public boolean isDoubleMetaphoneEqual(String value1, 
257                                           String value2, 
258                                           boolean alternate) {
259         return doubleMetaphone(value1, alternate).equals(doubleMetaphone
260                                                          (value2, alternate));
261     }
262     
263     /**
264      * Returns the maxCodeLen.
265      * @return int
266      */
267     public int getMaxCodeLen() {
268         return this.maxCodeLen;
269     }
270 
271     /**
272      * Sets the maxCodeLen.
273      * @param maxCodeLen The maxCodeLen to set
274      */
275     public void setMaxCodeLen(int maxCodeLen) {
276         this.maxCodeLen = maxCodeLen;
277     }
278 
279     //-- BEGIN HANDLERS --//
280 
281     /**
282      * Handles 'A', 'E', 'I', 'O', 'U', and 'Y' cases
283      */
284     private int handleAEIOUY(String value, DoubleMetaphoneResult result, int 
285                              index) {
286         if (index == 0) {
287             result.append('A');
288         }
289         return index + 1;
290     }
291     
292     /**
293      * Handles 'C' cases
294      */
295     private int handleC(String value, 
296                         DoubleMetaphoneResult result, 
297                         int index) {
298         if (conditionC0(value, index)) {  // very confusing, moved out
299             result.append('K');
300             index += 2;
301         } else if (index == 0 && contains(value, index, 6, "CAESAR")) {
302             result.append('S');
303             index += 2;
304         } else if (contains(value, index, 2, "CH")) {
305             index = handleCH(value, result, index);
306         } else if (contains(value, index, 2, "CZ") && 
307                    !contains(value, index - 2, 4, "WICZ")) {
308             //-- "Czerny" --//
309             result.append('S', 'X');
310             index += 2;
311         } else if (contains(value, index + 1, 3, "CIA")) {
312             //-- "focaccia" --//
313             result.append('X');
314             index += 3;
315         } else if (contains(value, index, 2, "CC") && 
316                    !(index == 1 && charAt(value, 0) == 'M')) {
317             //-- double "cc" but not "McClelland" --//
318             return handleCC(value, result, index);
319         } else if (contains(value, index, 2, "CK", "CG", "CQ")) {
320             result.append('K');
321             index += 2;
322         } else if (contains(value, index, 2, "CI", "CE", "CY")) {
323             //-- Italian vs. English --//
324             if (contains(value, index, 3, "CIO", "CIE", "CIA")) {
325                 result.append('S', 'X');
326             } else {
327                 result.append('S');
328             }
329             index += 2;
330         } else {
331             result.append('K');
332             if (contains(value, index + 1, 2, " C", " Q", " G")) { 
333                 //-- Mac Caffrey, Mac Gregor --//
334                 index += 3;
335             } else if (contains(value, index + 1, 1, "C", "K", "Q") && 
336                        !contains(value, index + 1, 2, "CE", "CI")) {
337                 index += 2;
338             } else {
339                 index++;
340             }
341         }
342         
343         return index;
344     }
345 
346     /**
347      * Handles 'CC' cases
348      */
349     private int handleCC(String value, 
350                          DoubleMetaphoneResult result, 
351                          int index) {
352         if (contains(value, index + 2, 1, "I", "E", "H") && 
353             !contains(value, index + 2, 2, "HU")) {
354             //-- "bellocchio" but not "bacchus" --//
355             if ((index == 1 && charAt(value, index - 1) == 'A') || 
356                 contains(value, index - 1, 5, "UCCEE", "UCCES")) {
357                 //-- "accident", "accede", "succeed" --//
358                 result.append("KS");
359             } else {
360                 //-- "bacci", "bertucci", other Italian --//
361                 result.append('X');
362             }
363             index += 3;
364         } else {    // Pierce's rule
365             result.append('K');
366             index += 2;
367         }
368         
369         return index;
370     }
371     
372     /**
373      * Handles 'CH' cases
374      */
375     private int handleCH(String value, 
376                          DoubleMetaphoneResult result, 
377                          int index) {
378         if (index > 0 && contains(value, index, 4, "CHAE")) {   // Michael
379             result.append('K', 'X');
380             return index + 2;
381         } else if (conditionCH0(value, index)) {
382             //-- Greek roots ("chemistry", "chorus", etc.) --//
383             result.append('K');
384             return index + 2;
385         } else if (conditionCH1(value, index)) {
386             //-- Germanic, Greek, or otherwise 'ch' for 'kh' sound --//
387             result.append('K');
388             return index + 2;
389         } else {
390             if (index > 0) {
391                 if (contains(value, 0, 2, "MC")) {
392                     result.append('K');
393                 } else {
394                     result.append('X', 'K');
395                 }
396             } else {
397                 result.append('X');
398             }
399             return index + 2;
400         }
401     }
402 
403     /**
404      * Handles 'D' cases
405      */
406     private int handleD(String value, 
407                         DoubleMetaphoneResult result, 
408                         int index) {
409         if (contains(value, index, 2, "DG")) {
410             //-- "Edge" --//
411             if (contains(value, index + 2, 1, "I", "E", "Y")) {
412                 result.append('J');
413                 index += 3;
414                 //-- "Edgar" --//
415             } else {
416                 result.append("TK");
417                 index += 2;
418             }
419         } else if (contains(value, index, 2, "DT", "DD")) {
420             result.append('T');
421             index += 2;
422         } else {
423             result.append('T');
424             index++;
425         }
426         return index;
427     }
428 
429     /**
430      * Handles 'G' cases
431      */
432     private int handleG(String value, 
433                         DoubleMetaphoneResult result, 
434                         int index, 
435                         boolean slavoGermanic) {
436         if (charAt(value, index + 1) == 'H') {
437             index = handleGH(value, result, index);
438         } else if (charAt(value, index + 1) == 'N') {
439             if (index == 1 && isVowel(charAt(value, 0)) && !slavoGermanic) {
440                 result.append("KN", "N");
441             } else if (!contains(value, index + 2, 2, "EY") && 
442                        charAt(value, index + 1) != 'Y' && !slavoGermanic) {
443                 result.append("N", "KN");
444             } else {
445                 result.append("KN");
446             }
447             index = index + 2;
448         } else if (contains(value, index + 1, 2, "LI") && !slavoGermanic) {
449             result.append("KL", "L");
450             index += 2;
451         } else if (index == 0 && (charAt(value, index + 1) == 'Y' || contains(value, index + 1, 2, ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER))) {
452             //-- -ges-, -gep-, -gel-, -gie- at beginning --//
453             result.append('K', 'J');
454             index += 2;
455         } else if ((contains(value, index + 1, 2, "ER") || 
456                     charAt(value, index + 1) == 'Y') &&
457                    !contains(value, 0, 6, "DANGER", "RANGER", "MANGER") &&
458                    !contains(value, index - 1, 1, "E", "I") && 
459                    !contains(value, index - 1, 3, "RGY", "OGY")) {
460             //-- -ger-, -gy- --//
461             result.append('K', 'J');
462             index += 2;
463         } else if (contains(value, index + 1, 1, "E", "I", "Y") || 
464                    contains(value, index - 1, 4, "AGGI", "OGGI")) {
465             //-- Italian "biaggi" --//
466             if ((contains(value, 0 ,4, "VAN ", "VON ") || contains(value, 0, 3, "SCH")) || contains(value, index + 1, 2, "ET")) {
467                 //-- obvious germanic --//
468                 result.append('K');
469             } else if (contains(value, index + 1, 4, "IER")) {
470                 result.append('J');
471             } else {
472                 result.append('J', 'K');
473             }
474             index += 2;
475         } else if (charAt(value, index + 1) == 'G') {
476             index += 2;
477             result.append('K');
478         } else {
479             index++;
480             result.append('K');
481         }
482         return index;
483     }
484     
485     /**
486      * Handles 'GH' cases
487      */
488     private int handleGH(String value, 
489                          DoubleMetaphoneResult result, 
490                          int index) {
491         if (index > 0 && !isVowel(charAt(value, index - 1))) {
492             result.append('K');
493             index += 2;
494         } else if (index == 0) {
495             if (charAt(value, index + 2) == 'I') {
496                 result.append('J');
497             } else {
498                 result.append('K');
499             }
500             index += 2;
501         } else if ((index > 1 && contains(value, index - 2, 1, "B", "H", "D")) ||
502                    (index > 2 && contains(value, index - 3, 1, "B", "H", "D")) ||
503                    (index > 3 && contains(value, index - 4, 1, "B", "H"))) {
504             //-- Parker's rule (with some further refinements) - "hugh"
505             index += 2;
506         } else {
507             if (index > 2 && charAt(value, index - 1) == 'U' && 
508                 contains(value, index - 3, 1, "C", "G", "L", "R", "T")) {
509                 //-- "laugh", "McLaughlin", "cough", "gough", "rough", "tough"
510                 result.append('F');
511             } else if (index > 0 && charAt(value, index - 1) != 'I') {
512                 result.append('K');
513             }
514             index += 2;
515         }
516         return index;
517     }
518 
519     /**
520      * Handles 'H' cases
521      */
522     private int handleH(String value, 
523                         DoubleMetaphoneResult result, 
524                         int index) {
525         //-- only keep if first & before vowel or between 2 vowels --//
526         if ((index == 0 || isVowel(charAt(value, index - 1))) && 
527             isVowel(charAt(value, index + 1))) {
528             result.append('H');
529             index += 2;
530             //-- also takes car of "HH" --//
531         } else {
532             index++;
533         }
534         return index;
535     }
536     
537     /**
538      * Handles 'J' cases
539      */
540     private int handleJ(String value, DoubleMetaphoneResult result, int index, 
541                         boolean slavoGermanic) {
542         if (contains(value, index, 4, "JOSE") || contains(value, 0, 4, "SAN ")) {
543                 //-- obvious Spanish, "Jose", "San Jacinto" --//
544                 if ((index == 0 && (charAt(value, index + 4) == ' ') || 
545                      value.length() == 4) || contains(value, 0, 4, "SAN ")) {
546                     result.append('H');
547                 } else {
548                     result.append('J', 'H');
549                 }
550                 index++;
551             } else {
552                 if (index == 0 && !contains(value, index, 4, "JOSE")) {
553                     result.append('J', 'A');
554                 } else if (isVowel(charAt(value, index - 1)) && !slavoGermanic && 
555                               (charAt(value, index + 1) == 'A' || charAt(value, index + 1) == 'O')) {
556                     result.append('J', 'H');
557                 } else if (index == value.length() - 1) {
558                     result.append('J', ' ');
559                 } else if (!contains(value, index + 1, 1, L_T_K_S_N_M_B_Z) && !contains(value, index - 1, 1, "S", "K", "L")) {
560                     result.append('J');
561                 }
562 
563                 if (charAt(value, index + 1) == 'J') {
564                     index += 2;
565                 } else {
566                     index++;
567                 }
568             }
569         return index;
570     }
571     
572     /**
573      * Handles 'L' cases
574      */
575     private int handleL(String value, 
576                         DoubleMetaphoneResult result, 
577                         int index) {
578         result.append('L');
579         if (charAt(value, index + 1) == 'L') {
580             if (conditionL0(value, index)) {
581                 result.appendAlternate(' ');
582             }
583             index += 2;
584         } else {
585             index++;
586         }
587         return index;
588     }
589 
590     /**
591      * Handles 'P' cases
592      */
593     private int handleP(String value, 
594                         DoubleMetaphoneResult result, 
595                         int index) {
596         if (charAt(value, index + 1) == 'H') {
597             result.append('F');
598             index += 2;
599         } else {
600             result.append('P');
601             index = contains(value, index + 1, 1, "P", "B") ? index + 2 : index + 1;
602         }
603         return index;
604     }
605 
606     /**
607      * Handles 'R' cases
608      */
609     private int handleR(String value, 
610                         DoubleMetaphoneResult result, 
611                         int index, 
612                         boolean slavoGermanic) {
613         if (index == value.length() - 1 && !slavoGermanic && 
614             contains(value, index - 2, 2, "IE") && 
615             !contains(value, index - 4, 2, "ME", "MA")) {
616             result.appendAlternate('R');
617         } else {
618             result.append('R');
619         }
620         return charAt(value, index + 1) == 'R' ? index + 2 : index + 1;
621     }
622 
623     /**
624      * Handles 'S' cases
625      */
626     private int handleS(String value, 
627                         DoubleMetaphoneResult result, 
628                         int index, 
629                         boolean slavoGermanic) {
630         if (contains(value, index - 1, 3, "ISL", "YSL")) {
631             //-- special cases "island", "isle", "carlisle", "carlysle" --//
632             index++;
633         } else if (index == 0 && contains(value, index, 5, "SUGAR")) {
634             //-- special case "sugar-" --//
635             result.append('X', 'S');
636             index++;
637         } else if (contains(value, index, 2, "SH")) {
638             if (contains(value, index + 1, 4, 
639                          "HEIM", "HOEK", "HOLM", "HOLZ")) {
640                 //-- germanic --//
641                 result.append('S');
642             } else {
643                 result.append('X');
644             }
645             index += 2;
646         } else if (contains(value, index, 3, "SIO", "SIA") || contains(value, index, 4, "SIAN")) {
647             //-- Italian and Armenian --//
648             if (slavoGermanic) {
649                 result.append('S');
650             } else {
651                 result.append('S', 'X');
652             }
653             index += 3;
654         } else if ((index == 0 && contains(value, index + 1, 1, "M", "N", "L", "W")) || contains(value, index + 1, 1, "Z")) {
655             //-- german & anglicisations, e.g. "smith" match "schmidt" //
656             // "snider" match "schneider" --//
657             //-- also, -sz- in slavic language altho in hungarian it //
658             //   is pronounced "s" --//
659             result.append('S', 'X');
660             index = contains(value, index + 1, 1, "Z") ? index + 2 : index + 1;
661         } else if (contains(value, index, 2, "SC")) {
662             index = handleSC(value, result, index);
663         } else {
664             if (index == value.length() - 1 && contains(value, index - 2, 
665                                                         2, "AI", "OI")){
666                 //-- french e.g. "resnais", "artois" --//
667                 result.appendAlternate('S');
668             } else {
669                 result.append('S');
670             }
671             index = contains(value, index + 1, 1, "S", "Z") ? index + 2 : index + 1;
672         }
673         return index;
674     }
675 
676     /**
677      * Handles 'SC' cases
678      */
679     private int handleSC(String value, 
680                          DoubleMetaphoneResult result, 
681                          int index) {
682         if (charAt(value, index + 2) == 'H') {
683             //-- Schlesinger's rule --//
684             if (contains(value, index + 3, 
685                          2, "OO", "ER", "EN", "UY", "ED", "EM")) {
686                 //-- Dutch origin, e.g. "school", "schooner" --//
687                 if (contains(value, index + 3, 2, "ER", "EN")) {
688                     //-- "schermerhorn", "schenker" --//
689                     result.append("X", "SK");
690                 } else {
691                     result.append("SK");
692                 }
693             } else {
694                 if (index == 0 && !isVowel(charAt(value, 3)) && charAt(value, 3) != 'W') {
695                     result.append('X', 'S');
696                 } else {
697                     result.append('X');
698                 }
699             }
700         } else if (contains(value, index + 2, 1, "I", "E", "Y")) {
701             result.append('S');
702         } else {
703             result.append("SK");
704         }
705         return index + 3;
706     }
707 
708     /**
709      * Handles 'T' cases
710      */
711     private int handleT(String value, 
712                         DoubleMetaphoneResult result, 
713                         int index) {
714         if (contains(value, index, 4, "TION")) {
715             result.append('X');
716             index += 3;
717         } else if (contains(value, index, 3, "TIA", "TCH")) {
718             result.append('X');
719             index += 3;
720         } else if (contains(value, index, 2, "TH") || contains(value, index, 
721                                                                3, "TTH")) {
722             if (contains(value, index + 2, 2, "OM", "AM") || 
723                 //-- special case "thomas", "thames" or germanic --//
724                 contains(value, 0, 4, "VAN ", "VON ") || 
725                 contains(value, 0, 3, "SCH")) {
726                 result.append('T');
727             } else {
728                 result.append('0', 'T');
729             }
730             index += 2;
731         } else {
732             result.append('T');
733             index = contains(value, index + 1, 1, "T", "D") ? index + 2 : index + 1;
734         }
735         return index;
736     }
737 
738     /**
739      * Handles 'W' cases
740      */
741     private int handleW(String value, 
742                         DoubleMetaphoneResult result, 
743                         int index) {
744         if (contains(value, index, 2, "WR")) {
745             //-- can also be in middle of word --//
746             result.append('R');
747             index += 2;
748         } else {
749             if (index == 0 && (isVowel(charAt(value, index + 1)) || 
750                                contains(value, index, 2, "WH"))) {
751                 if (isVowel(charAt(value, index + 1))) {
752                     //-- Wasserman should match Vasserman --//
753                     result.append('A', 'F');
754                 } else {
755                     //-- need Uomo to match Womo --//
756                     result.append('A');
757                 }
758                 index++;
759             } else if ((index == value.length() - 1 && isVowel(charAt(value, index - 1))) ||
760                        contains(value, index - 1, 
761                                 5, "EWSKI", "EWSKY", "OWSKI", "OWSKY") ||
762                        contains(value, 0, 3, "SCH")) {
763                 //-- Arnow should match Arnoff --//
764                 result.appendAlternate('F');
765                 index++;
766             } else if (contains(value, index, 4, "WICZ", "WITZ")) {
767                 //-- Polish e.g. "filipowicz" --//
768                 result.append("TS", "FX");
769                 index += 4;
770             } else {
771                 index++;
772             }
773         }
774         return index;
775     }
776     
777     /**
778      * Handles 'X' cases
779      */
780     private int handleX(String value, 
781                         DoubleMetaphoneResult result, 
782                         int index) {
783         if (index == 0) {
784             result.append('S');
785             index++;
786         } else {
787             if (!((index == value.length() - 1) && 
788                   (contains(value, index - 3, 3, "IAU", "EAU") || 
789                    contains(value, index - 2, 2, "AU", "OU")))) {
790                 //-- French e.g. breaux --//
791                 result.append("KS");
792             }
793             index = contains(value, index + 1, 1, "C", "X") ? index + 2 : index + 1;
794         }
795         return index;
796     }
797 
798     /**
799      * Handles 'Z' cases
800      */
801     private int handleZ(String value, DoubleMetaphoneResult result, int index, 
802                         boolean slavoGermanic) {
803         if (charAt(value, index + 1) == 'H') {
804             //-- Chinese pinyin e.g. "zhao" or Angelina "Zhang" --//
805             result.append('J');
806             index += 2;
807         } else {
808             if (contains(value, index + 1, 2, "ZO", "ZI", "ZA") || (slavoGermanic && (index > 0 && charAt(value, index - 1) != 'T'))) {
809                 result.append("S", "TS");
810             } else {
811                 result.append('S');
812             }
813             index = charAt(value, index + 1) == 'Z' ? index + 2 : index + 1;
814         }
815         return index;
816     }
817 
818     //-- BEGIN CONDITIONS --//
819 
820     /**
821      * Complex condition 0 for 'C'
822      */
823     private boolean conditionC0(String value, int index) {
824 		if (contains(value, index, 4, "CHIA")) {
825 			return true;
826 		} else if (index <= 1) {
827 			return false;
828 		} else if (isVowel(charAt(value, index - 2))) {
829 			return false;
830 		} else if (!contains(value, index - 1, 3, "ACH")) {
831 			return false;
832 		} else {
833 			char c = charAt(value, index + 2);
834 			return (c != 'I' && c != 'E')
835 					|| contains(value, index - 2, 6, "BACHER", "MACHER");
836 		}
837 	}
838     
839     /**
840 	 * Complex condition 0 for 'CH'
841 	 */
842     private boolean conditionCH0(String value, int index) {
843         if (index != 0) {
844             return false;
845         } else if (!contains(value, index + 1, 5, "HARAC", "HARIS") && 
846                    !contains(value, index + 1, 3, "HOR", "HYM", "HIA", "HEM")) {
847             return false;
848         } else if (contains(value, 0, 5, "CHORE")) {
849             return false;
850         } else {
851             return true;
852         }
853     }
854     
855     /**
856      * Complex condition 1 for 'CH'
857      */
858     private boolean conditionCH1(String value, int index) {
859         return ((contains(value, 0, 4, "VAN ", "VON ") || contains(value, 0, 
860                                                                    3, "SCH")) ||
861                 contains(value, index - 2, 6, "ORCHES", "ARCHIT", "ORCHID") ||
862                 contains(value, index + 2, 1, "T", "S") ||
863                 ((contains(value, index - 1, 1, "A", "O", "U", "E") || index == 0) &&
864                  (contains(value, index + 2, 1, L_R_N_M_B_H_F_V_W_SPACE) || index + 1 == value.length() - 1)));
865     }
866     
867     /**
868      * Complex condition 0 for 'L'
869      */
870     private boolean conditionL0(String value, int index) {
871         if (index == value.length() - 3 && 
872             contains(value, index - 1, 4, "ILLO", "ILLA", "ALLE")) {
873             return true;
874         } else if ((contains(value, index - 1, 2, "AS", "OS") || 
875                     contains(value, value.length() - 1, 1, "A", "O")) &&
876                    contains(value, index - 1, 4, "ALLE")) {
877             return true;
878         } else {
879             return false;
880         }
881     }
882     
883     /**
884      * Complex condition 0 for 'M'
885      */
886     private boolean conditionM0(String value, int index) {
887 		if (charAt(value, index + 1) == 'M') {
888 			return true;
889 		}
890 		return contains(value, index - 1, 3, "UMB")
891 				&& ((index + 1) == value.length() - 1 || contains(value,
892 						index + 2, 2, "ER"));
893 	}
894     
895     //-- BEGIN HELPER FUNCTIONS --//
896 
897     /**
898 	 * Determines whether or not a value is of slavo-germanic orgin. A value is
899 	 * of slavo-germanic origin if it contians any of 'W', 'K', 'CZ', or 'WITZ'.
900 	 */
901     private boolean isSlavoGermanic(String value) {
902         return value.indexOf('W') > -1 || value.indexOf('K') > -1 || 
903             value.indexOf("CZ") > -1 || value.indexOf("WITZ") > -1;
904     }
905 
906     /**
907      * Determines whether or not a character is a vowel or not
908      */
909     private boolean isVowel(char ch) {
910         return VOWELS.indexOf(ch) != -1;
911     }
912 
913     /**
914      * Determines whether or not the value starts with a silent letter.  It will
915      * return <code>true</code> if the value starts with any of 'GN', 'KN',
916      * 'PN', 'WR' or 'PS'.
917      */    
918     private boolean isSilentStart(String value) {
919         boolean result = false;
920         for (int i = 0; i < SILENT_START.length; i++) {
921             if (value.startsWith(SILENT_START[i])) {
922                 result = true;
923                 break;
924             }
925         }
926         return result;
927     }
928 
929     /**
930      * Cleans the input
931      */    
932     private String cleanInput(String input) {
933 		if (input == null) {
934 			return null;
935 		}
936 		input = input.trim();
937 		if (input.length() == 0) {
938 			return null;
939 		}
940 		return input.toUpperCase();
941 	}
942 
943     /**
944 	 * Gets the character at index <code>index</code> if available, otherwise
945 	 * it returns <code>Character.MIN_VALUE</code> so that there is some sort
946 	 * of a default
947 	 */    
948     protected char charAt(String value, int index) {
949         if (index < 0 || index >= value.length()) {
950             return Character.MIN_VALUE;
951         } 
952         return value.charAt(index);
953     }
954 
955     /**
956      * Shortcut method with 1 criteria
957      */    
958     private static boolean contains(String value, int start, int length, 
959                                     String criteria) {
960         return contains(value, start, length, 
961                         new String[] { criteria });
962     }
963 
964     /**
965      * Shortcut method with 2 criteria
966      */    
967     private static boolean contains(String value, int start, int length, 
968                                     String criteria1, String criteria2) {
969         return contains(value, start, length, 
970                         new String[] { criteria1, criteria2 });
971     }
972 
973     /**
974      * Shortcut method with 3 criteria
975      */    
976     private static boolean contains(String value, int start, int length, 
977                                     String criteria1, String criteria2, 
978                                     String criteria3) {
979         return contains(value, start, length, 
980                         new String[] { criteria1, criteria2, criteria3 });
981     }
982 
983     /**
984      * Shortcut method with 4 criteria
985      */    
986     private static boolean contains(String value, int start, int length, 
987                                     String criteria1, String criteria2, 
988                                     String criteria3, String criteria4) {
989         return contains(value, start, length, 
990                         new String[] { criteria1, criteria2, criteria3, 
991                                        criteria4 });
992     }
993 
994     /**
995      * Shortcut method with 5 criteria
996      */    
997     private static boolean contains(String value, int start, int length, 
998                                     String criteria1, String criteria2, 
999                                     String criteria3, String criteria4, 
1000                                     String criteria5) {
1001         return contains(value, start, length, 
1002                         new String[] { criteria1, criteria2, criteria3, 
1003                                        criteria4, criteria5 });
1004     }
1005 
1006     /**
1007      * Shortcut method with 6 criteria
1008      */    
1009     private static boolean contains(String value, int start, int length, 
1010                                     String criteria1, String criteria2, 
1011                                     String criteria3, String criteria4, 
1012                                     String criteria5, String criteria6) {
1013         return contains(value, start, length, 
1014                         new String[] { criteria1, criteria2, criteria3, 
1015                                        criteria4, criteria5, criteria6 });
1016     }
1017     
1018     /**
1019      * Determines whether <code>value</code> contains any of the criteria 
1020      starting
1021      * at index <code>start</code> and matching up to length <code>length</code>
1022      */    
1023     protected static boolean contains(String value, int start, int length, 
1024                                       String[] criteria) {
1025         boolean result = false;
1026         if (start >= 0 && start + length <= value.length()) {
1027             String target = value.substring(start, start + length);
1028 
1029             for (int i = 0; i < criteria.length; i++) {
1030                 if (target.equals(criteria[i])) {
1031                     result = true;
1032                     break;
1033                 }
1034             }
1035         }
1036         return result;
1037     }
1038     
1039     //-- BEGIN INNER CLASSES --//
1040     
1041     /**
1042      * Inner class for storing results, since there is the optional alternate
1043      * encoding.
1044      */
1045     public class DoubleMetaphoneResult {
1046 
1047         private StringBuffer primary = new StringBuffer(getMaxCodeLen());
1048         private StringBuffer alternate = new StringBuffer(getMaxCodeLen());
1049         private int maxLength;
1050 
1051         public DoubleMetaphoneResult(int maxLength) {
1052             this.maxLength = maxLength;
1053         }
1054 
1055         public void append(char value) {
1056             appendPrimary(value);
1057             appendAlternate(value);
1058         }
1059 
1060         public void append(char primary, char alternate) {
1061             appendPrimary(primary);
1062             appendAlternate(alternate);
1063         }
1064 
1065         public void appendPrimary(char value) {
1066             if (this.primary.length() < this.maxLength) {
1067                 this.primary.append(value);
1068             }
1069         }
1070 
1071         public void appendAlternate(char value) {
1072             if (this.alternate.length() < this.maxLength) {
1073                 this.alternate.append(value);
1074             }
1075         }
1076 
1077         public void append(String value) {
1078             appendPrimary(value);
1079             appendAlternate(value);
1080         }
1081 
1082         public void append(String primary, String alternate) {
1083             appendPrimary(primary);
1084             appendAlternate(alternate);
1085         }
1086 
1087         public void appendPrimary(String value) {
1088             int addChars = this.maxLength - this.primary.length();
1089             if (value.length() <= addChars) {
1090                 this.primary.append(value);
1091             } else {
1092                 this.primary.append(value.substring(0, addChars));
1093             }
1094         }
1095 
1096         public void appendAlternate(String value) {
1097             int addChars = this.maxLength - this.alternate.length();
1098             if (value.length() <= addChars) {
1099                 this.alternate.append(value);
1100             } else {
1101                 this.alternate.append(value.substring(0, addChars));
1102             }
1103         }
1104 
1105         public String getPrimary() {
1106             return this.primary.toString();
1107         }
1108 
1109         public String getAlternate() {
1110             return this.alternate.toString();
1111         }
1112 
1113         public boolean isComplete() {
1114             return this.primary.length() >= this.maxLength && 
1115                 this.alternate.length() >= this.maxLength;
1116         }
1117     }
1118 }