1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32 package edu.internet2.middleware.grouperInstallerExt.org.apache.commons.codec.language;
33
34 import edu.internet2.middleware.grouperInstallerExt.org.apache.commons.codec.EncoderException;
35 import edu.internet2.middleware.grouperInstallerExt.org.apache.commons.codec.StringEncoder;
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51 public class Metaphone implements StringEncoder {
52
53
54
55
56 private String vowels = "AEIOU" ;
57
58
59
60
61 private String frontv = "EIY" ;
62
63
64
65
66 private String varson = "CSPTG" ;
67
68
69
70
71 private int maxCodeLen = 4 ;
72
73
74
75
76 public Metaphone() {
77 super();
78 }
79
80
81
82
83
84
85
86
87
88
89
90 public String metaphone(String txt) {
91 boolean hard = false ;
92 if ((txt == null) || (txt.length() == 0)) {
93 return "" ;
94 }
95
96 if (txt.length() == 1) {
97 return txt.toUpperCase() ;
98 }
99
100 char[] inwd = txt.toUpperCase().toCharArray() ;
101
102 StringBuffer local = new StringBuffer(40);
103 StringBuffer code = new StringBuffer(10) ;
104
105 switch(inwd[0]) {
106 case 'K' :
107 case 'G' :
108 case 'P' :
109 if (inwd[1] == 'N') {
110 local.append(inwd, 1, inwd.length - 1);
111 } else {
112 local.append(inwd);
113 }
114 break;
115 case 'A':
116 if (inwd[1] == 'E') {
117 local.append(inwd, 1, inwd.length - 1);
118 } else {
119 local.append(inwd);
120 }
121 break;
122 case 'W' :
123 if (inwd[1] == 'R') {
124 local.append(inwd, 1, inwd.length - 1);
125 break ;
126 }
127 if (inwd[1] == 'H') {
128 local.append(inwd, 1, inwd.length - 1);
129 local.setCharAt(0, 'W');
130 } else {
131 local.append(inwd);
132 }
133 break;
134 case 'X' :
135 inwd[0] = 'S';
136 local.append(inwd);
137 break ;
138 default :
139 local.append(inwd);
140 }
141
142 int wdsz = local.length();
143 int n = 0 ;
144
145 while ((code.length() < this.getMaxCodeLen()) &&
146 (n < wdsz) ) {
147 char symb = local.charAt(n) ;
148
149 if ((symb != 'C') && (isPreviousChar( local, n, symb )) ) {
150 n++ ;
151 } else {
152 switch(symb) {
153 case 'A' : case 'E' : case 'I' : case 'O' : case 'U' :
154 if (n == 0) {
155 code.append(symb);
156 }
157 break ;
158 case 'B' :
159 if ( isPreviousChar(local, n, 'M') &&
160 isLastChar(wdsz, n) ) {
161 break;
162 }
163 code.append(symb);
164 break;
165 case 'C' :
166
167 if ( isPreviousChar(local, n, 'S') &&
168 !isLastChar(wdsz, n) &&
169 (this.frontv.indexOf(local.charAt(n + 1)) >= 0) ) {
170 break;
171 }
172 if (regionMatch(local, n, "CIA")) {
173 code.append('X');
174 break;
175 }
176 if (!isLastChar(wdsz, n) &&
177 (this.frontv.indexOf(local.charAt(n + 1)) >= 0)) {
178 code.append('S');
179 break;
180 }
181 if (isPreviousChar(local, n, 'S') &&
182 isNextChar(local, n, 'H') ) {
183 code.append('K') ;
184 break ;
185 }
186 if (isNextChar(local, n, 'H')) {
187 if ((n == 0) &&
188 (wdsz >= 3) &&
189 isVowel(local,2) ) {
190 code.append('K');
191 } else {
192 code.append('X');
193 }
194 } else {
195 code.append('K');
196 }
197 break ;
198 case 'D' :
199 if (!isLastChar(wdsz, n + 1) &&
200 isNextChar(local, n, 'G') &&
201 (this.frontv.indexOf(local.charAt(n + 2)) >= 0)) {
202 code.append('J'); n += 2 ;
203 } else {
204 code.append('T');
205 }
206 break ;
207 case 'G' :
208 if (isLastChar(wdsz, n + 1) &&
209 isNextChar(local, n, 'H')) {
210 break;
211 }
212 if (!isLastChar(wdsz, n + 1) &&
213 isNextChar(local,n,'H') &&
214 !isVowel(local,n+2)) {
215 break;
216 }
217 if ((n > 0) &&
218 ( regionMatch(local, n, "GN") ||
219 regionMatch(local, n, "GNED") ) ) {
220 break;
221 }
222 if (isPreviousChar(local, n, 'G')) {
223 hard = true ;
224 } else {
225 hard = false ;
226 }
227 if (!isLastChar(wdsz, n) &&
228 (this.frontv.indexOf(local.charAt(n + 1)) >= 0) &&
229 (!hard)) {
230 code.append('J');
231 } else {
232 code.append('K');
233 }
234 break ;
235 case 'H':
236 if (isLastChar(wdsz, n)) {
237 break ;
238 }
239 if ((n > 0) &&
240 (this.varson.indexOf(local.charAt(n - 1)) >= 0)) {
241 break;
242 }
243 if (isVowel(local,n+1)) {
244 code.append('H');
245 }
246 break;
247 case 'F':
248 case 'J' :
249 case 'L' :
250 case 'M':
251 case 'N' :
252 case 'R' :
253 code.append(symb);
254 break;
255 case 'K' :
256 if (n > 0) {
257 if (!isPreviousChar(local, n, 'C')) {
258 code.append(symb);
259 }
260 } else {
261 code.append(symb);
262 }
263 break ;
264 case 'P' :
265 if (isNextChar(local,n,'H')) {
266
267 code.append('F');
268 } else {
269 code.append(symb);
270 }
271 break ;
272 case 'Q' :
273 code.append('K');
274 break;
275 case 'S' :
276 if (regionMatch(local,n,"SH") ||
277 regionMatch(local,n,"SIO") ||
278 regionMatch(local,n,"SIA")) {
279 code.append('X');
280 } else {
281 code.append('S');
282 }
283 break;
284 case 'T' :
285 if (regionMatch(local,n,"TIA") ||
286 regionMatch(local,n,"TIO")) {
287 code.append('X');
288 break;
289 }
290 if (regionMatch(local,n,"TCH")) {
291
292 break;
293 }
294
295 if (regionMatch(local,n,"TH")) {
296 code.append('0');
297 } else {
298 code.append('T');
299 }
300 break ;
301 case 'V' :
302 code.append('F'); break ;
303 case 'W' : case 'Y' :
304 if (!isLastChar(wdsz,n) &&
305 isVowel(local,n+1)) {
306 code.append(symb);
307 }
308 break ;
309 case 'X' :
310 code.append('K'); code.append('S');
311 break ;
312 case 'Z' :
313 code.append('S'); break ;
314 }
315 n++ ;
316 }
317 if (code.length() > this.getMaxCodeLen()) {
318 code.setLength(this.getMaxCodeLen());
319 }
320 }
321 return code.toString();
322 }
323
324 private boolean isVowel(StringBuffer string, int index) {
325 return (this.vowels.indexOf(string.charAt(index)) >= 0);
326 }
327
328 private boolean isPreviousChar(StringBuffer string, int index, char c) {
329 boolean matches = false;
330 if( index > 0 &&
331 index < string.length() ) {
332 matches = string.charAt(index - 1) == c;
333 }
334 return matches;
335 }
336
337 private boolean isNextChar(StringBuffer string, int index, char c) {
338 boolean matches = false;
339 if( index >= 0 &&
340 index < string.length() - 1 ) {
341 matches = string.charAt(index + 1) == c;
342 }
343 return matches;
344 }
345
346 private boolean regionMatch(StringBuffer string, int index, String test) {
347 boolean matches = false;
348 if( index >= 0 &&
349 (index + test.length() - 1) < string.length() ) {
350 String substring = string.substring( index, index + test.length());
351 matches = substring.equals( test );
352 }
353 return matches;
354 }
355
356 private boolean isLastChar(int wdsz, int n) {
357 return n + 1 == wdsz;
358 }
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373 public Object encode(Object pObject) throws EncoderException {
374 if (!(pObject instanceof java.lang.String)) {
375 throw new EncoderException("Parameter supplied to Metaphone encode is not of type java.lang.String");
376 }
377 return metaphone((String) pObject);
378 }
379
380
381
382
383
384
385
386 public String encode(String pString) {
387 return metaphone(pString);
388 }
389
390
391
392
393
394
395
396
397
398 public boolean isMetaphoneEqual(String str1, String str2) {
399 return metaphone(str1).equals(metaphone(str2));
400 }
401
402
403
404
405
406 public int getMaxCodeLen() { return this.maxCodeLen; }
407
408
409
410
411
412 public void setMaxCodeLen(int maxCodeLen) { this.maxCodeLen = maxCodeLen; }
413
414 }