1 /** 2 * Copyright 2014 Internet2 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 /* 17 * Copyright 2001-2004 The Apache Software Foundation. 18 * 19 * Licensed under the Apache License, Version 2.0 (the "License"); 20 * you may not use this file except in compliance with the License. 21 * You may obtain a copy of the License at 22 * 23 * http://www.apache.org/licenses/LICENSE-2.0 24 * 25 * Unless required by applicable law or agreed to in writing, software 26 * distributed under the License is distributed on an "AS IS" BASIS, 27 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 28 * See the License for the specific language governing permissions and 29 * limitations under the License. 30 */ 31 32 package edu.internet2.middleware.grouperClientExt.org.apache.commons.codec.language; 33 34 import edu.internet2.middleware.grouperClientExt.org.apache.commons.codec.EncoderException; 35 import edu.internet2.middleware.grouperClientExt.org.apache.commons.codec.StringEncoder; 36 37 /** 38 * Encodes a string into a Refined Soundex value. A refined soundex code is 39 * optimized for spell checking words. Soundex method originally developed by 40 * <CITE>Margaret Odell</CITE> and <CITE>Robert Russell</CITE>. 41 * 42 * @author Apache Software Foundation 43 * @version $Id: RefinedSoundex.java,v 1.1 2008-11-30 10:57:28 mchyzer Exp $ 44 */ 45 public class RefinedSoundex implements StringEncoder { 46 47 /** 48 * This static variable contains an instance of the RefinedSoundex using 49 * the US_ENGLISH mapping. 50 */ 51 public static final RefinedSoundexgrouperClientExt/org/apache/commons/codec/language/RefinedSoundex.html#RefinedSoundex">RefinedSoundex US_ENGLISH = new RefinedSoundex(); 52 53 /** 54 * RefinedSoundex is *refined* for a number of reasons one being that the 55 * mappings have been altered. This implementation contains default 56 * mappings for US English. 57 */ 58 public static final char[] US_ENGLISH_MAPPING = "01360240043788015936020505".toCharArray(); 59 60 /** 61 * Every letter of the alphabet is "mapped" to a numerical value. This char 62 * array holds the values to which each letter is mapped. This 63 * implementation contains a default map for US_ENGLISH 64 */ 65 private char[] soundexMapping; 66 67 /** 68 * Creates an instance of the RefinedSoundex object using the default US 69 * English mapping. 70 */ 71 public RefinedSoundex() { 72 this(US_ENGLISH_MAPPING); 73 } 74 75 /** 76 * Creates a refined soundex instance using a custom mapping. This 77 * constructor can be used to customize the mapping, and/or possibly 78 * provide an internationalized mapping for a non-Western character set. 79 * 80 * @param mapping 81 * Mapping array to use when finding the corresponding code for 82 * a given character 83 */ 84 public RefinedSoundex(char[] mapping) { 85 this.soundexMapping = mapping; 86 } 87 88 /** 89 * Returns the number of characters in the two encoded Strings that are the 90 * same. This return value ranges from 0 to the length of the shortest 91 * encoded String: 0 indicates little or no similarity, and 4 out of 4 (for 92 * example) indicates strong similarity or identical values. For refined 93 * Soundex, the return value can be greater than 4. 94 * 95 * @param s1 96 * A String that will be encoded and compared. 97 * @param s2 98 * A String that will be encoded and compared. 99 * @return The number of characters in the two encoded Strings that are the 100 * same from 0 to to the length of the shortest encoded String. 101 * 102 * @see SoundexUtils#difference(StringEncoder,String,String) 103 * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> 104 * MS T-SQL DIFFERENCE</a> 105 * 106 * @throws EncoderException 107 * if an error occurs encoding one of the strings 108 * @since 1.3 109 */ 110 public int difference(String s1, String s2) throws EncoderException { 111 return SoundexUtils.difference(this, s1, s2); 112 } 113 114 /** 115 * Encodes an Object using the refined soundex algorithm. This method is 116 * provided in order to satisfy the requirements of the Encoder interface, 117 * and will throw an EncoderException if the supplied object is not of type 118 * java.lang.String. 119 * 120 * @param pObject 121 * Object to encode 122 * @return An object (or type java.lang.String) containing the refined 123 * soundex code which corresponds to the String supplied. 124 * @throws EncoderException 125 * if the parameter supplied is not of type java.lang.String 126 */ 127 public Object encode(Object pObject) throws EncoderException { 128 if (!(pObject instanceof java.lang.String)) { 129 throw new EncoderException("Parameter supplied to RefinedSoundex encode is not of type java.lang.String"); 130 } 131 return soundex((String) pObject); 132 } 133 134 /** 135 * Encodes a String using the refined soundex algorithm. 136 * 137 * @param pString 138 * A String object to encode 139 * @return A Soundex code corresponding to the String supplied 140 */ 141 public String encode(String pString) { 142 return soundex(pString); 143 } 144 145 /** 146 * Returns the mapping code for a given character. The mapping codes are 147 * maintained in an internal char array named soundexMapping, and the 148 * default values of these mappings are US English. 149 * 150 * @param c 151 * char to get mapping for 152 * @return A character (really a numeral) to return for the given char 153 */ 154 char getMappingCode(char c) { 155 if (!Character.isLetter(c)) { 156 return 0; 157 } 158 return this.soundexMapping[Character.toUpperCase(c) - 'A']; 159 } 160 161 /** 162 * Retreives the Refined Soundex code for a given String object. 163 * 164 * @param str 165 * String to encode using the Refined Soundex algorithm 166 * @return A soundex code for the String supplied 167 */ 168 public String soundex(String str) { 169 if (str == null) { 170 return null; 171 } 172 str = SoundexUtils.clean(str); 173 if (str.length() == 0) { 174 return str; 175 } 176 177 StringBuffer sBuf = new StringBuffer(); 178 sBuf.append(str.charAt(0)); 179 180 char last, current; 181 last = '*'; 182 183 for (int i = 0; i < str.length(); i++) { 184 185 current = getMappingCode(str.charAt(i)); 186 if (current == last) { 187 continue; 188 } else if (current != 0) { 189 sBuf.append(current); 190 } 191 192 last = current; 193 194 } 195 196 return sBuf.toString(); 197 } 198 }