View Javadoc
1 package com.bga.wbrogden.metaphone; 2 3 /* Metaphone.java 4 * A class to generate phonetic code and keep lists of objects 5 * retrievable by a phonetic code. 6 * reference: Computer Language of Dec. 1990, p 39 7 * "Hanging on the Metaphone" by Lawrence Philips 8 * 9 * This Java implementation, Copyright 1997, William B. Brogden 10 * is hereby released for all uses. I would appreciate hearing about it 11 * if you find a good use for the class. December, 1997 12 * wbrogden@bga.com CompuServe 75415,610 13 */ 14 15 /* 16 * List functionality removed: 2001-06-21 bayard@generationjava.com 17 */ 18 19 /* 20 * Notes: 21 * The static method metaPhone converts an input String into a code. 22 * All input is converted to upper case. 23 * Limitations: Input format is expected to be a single ASCII word 24 * with only characters in the A - Z range, no punctuation or numbers. 25 * 26 */ 27 28 import java.util.* ; 29 import java.io.* ; 30 31 public class Metaphone extends Object { 32 33 static String vowels = "AEIOU" ; 34 static String frontv = "EIY" ; 35 static String varson = "CSPTG" ; 36 37 static final int maxCodeLen = 4 ; 38 39 static public String metaPhone( String txt ){ 40 int mtsz = 0 ; 41 boolean hard = false ; 42 if(( txt == null ) || 43 ( txt.length() == 0 )) return "" ; 44 // single character is itself 45 if( txt.length() == 1 ) return txt.toUpperCase() ; 46 // 47 char[] inwd = txt.toUpperCase().toCharArray() ; 48 // 49 String tmpS ; 50 StringBuffer local = new StringBuffer( 40 ); // manipulate 51 StringBuffer code = new StringBuffer( 10 ) ; // output 52 // handle initial 2 characters exceptions 53 switch( inwd[0] ){ 54 case 'K': case 'G' : case 'P' : /* looking for KN, etc*/ 55 if( inwd[1] == 'N')local.append(inwd, 1, inwd.length - 1 ); 56 else local.append( inwd ); 57 break; 58 case 'A': /* looking for AE */ 59 if( inwd[1] == 'E' )local.append(inwd, 1, inwd.length - 1 ); 60 else local.append( inwd ); 61 break; 62 case 'W' : /* looking for WR or WH */ 63 if( inwd[1] == 'R' ){ // WR -> R 64 local.append(inwd, 1, inwd.length - 1 ); break ; 65 } 66 if( inwd[1] == 'H'){ 67 local.append(inwd, 1, inwd.length - 1 ); 68 local.setCharAt( 0,'W'); // WH -> W 69 } 70 else local.append( inwd ); 71 break; 72 case 'X' : /* initial X becomes S */ 73 inwd[0] = 'S' ;local.append( inwd ); 74 break ; 75 default : 76 local.append( inwd ); 77 } // now local has working string with initials fixed 78 int wdsz = local.length(); 79 int n = 0 ; 80 while((mtsz < maxCodeLen ) && // max code size of 4 works well 81 (n < wdsz ) ){ 82 char symb = local.charAt(n) ; 83 // remove duplicate letters except C 84 if(( symb != 'C' ) && 85 (n > 0 ) && ( local.charAt(n - 1 ) == symb )) n++ ; 86 else{ // not dup 87 switch( symb ){ 88 case 'A' : case 'E' : case 'I' : case 'O' : case 'U' : 89 if( n == 0 ) { code.append(symb );mtsz++; 90 } 91 break ; // only use vowel if leading char 92 case 'B' : 93 if( (n > 0 ) && 94 !(n + 1 == wdsz ) && // not MB at end of word 95 ( local.charAt(n - 1) == 'M')) { 96 code.append(symb); 97 } 98 else code.append(symb); 99 mtsz++ ; 100 break ; 101 case 'C' : // lots of C special cases 102 /* discard if SCI, SCE or SCY */ 103 if( ( n > 0 ) && 104 ( local.charAt(n-1) == 'S' ) && 105 ( n + 1 < wdsz ) && 106 ( frontv.indexOf( local.charAt(n + 1)) >= 0 )){ break ;} 107 tmpS = local.toString(); 108 if( tmpS.indexOf("CIA", n ) == n ) { // "CIA" -> X 109 code.append('X' ); mtsz++; break ; 110 } 111 if( ( n + 1 < wdsz ) && 112 (frontv.indexOf( local.charAt(n+1) )>= 0 )){ 113 code.append('S');mtsz++; break ; // CI,CE,CY -> S 114 } 115 if(( n > 0) && 116 ( tmpS.indexOf("SCH",n-1 )== n-1 )){ // SCH->sk 117 code.append('K') ; mtsz++;break ; 118 } 119 if( tmpS.indexOf("CH", n ) == n ){ // detect CH 120 if((n == 0 ) && 121 (wdsz >= 3 ) && // CH consonant -> K consonant 122 (vowels.indexOf( local.charAt( 2) ) < 0 )){ 123 code.append('K'); 124 } 125 else { code.append('X'); // CHvowel -> X 126 } 127 mtsz++; 128 } 129 else { code.append('K' );mtsz++; 130 } 131 break ; 132 case 'D' : 133 if(( n + 2 < wdsz )&& // DGE DGI DGY -> J 134 ( local.charAt(n+1) == 'G' )&& 135 (frontv.indexOf( local.charAt(n+2) )>= 0)){ 136 code.append('J' ); n += 2 ; 137 } 138 else { code.append( 'T' ); 139 } 140 mtsz++; 141 break ; 142 case 'G' : // GH silent at end or before consonant 143 if(( n + 2 == wdsz )&& 144 (local.charAt(n+1) == 'H' )) break ; 145 if(( n + 2 < wdsz ) && 146 (local.charAt(n+1) == 'H' )&& 147 (vowels.indexOf( local.charAt(n+2)) < 0 )) break ; 148 tmpS = local.toString(); 149 if((n > 0) && 150 ( tmpS.indexOf("GN", n ) == n)|| 151 ( tmpS.indexOf("GNED",n) == n )) break ; // silent G 152 if(( n > 0 ) && 153 (local.charAt(n-1) == 'G')) hard = true ; 154 else hard = false ; 155 if((n+1 < wdsz) && 156 (frontv.indexOf( local.charAt(n+1) ) >= 0 )&& 157 (!hard) ) code.append( 'J' ); 158 else code.append('K'); 159 mtsz++; 160 break ; 161 case 'H': 162 if( n + 1 == wdsz ) break ; // terminal H 163 if((n > 0) && 164 (varson.indexOf( local.charAt(n-1)) >= 0)) break ; 165 if( vowels.indexOf( local.charAt(n+1)) >=0 ){ 166 code.append('H') ; mtsz++;// Hvowel 167 } 168 break; 169 case 'F': case 'J' : case 'L' : 170 case 'M': case 'N' : case 'R' : 171 code.append( symb ); mtsz++; break ; 172 case 'K' : 173 if( n > 0 ){ // not initial 174 if( local.charAt( n -1) != 'C' ) { 175 code.append(symb ); 176 } 177 } 178 else code.append( symb ); // initial K 179 mtsz++ ; 180 break ; 181 case 'P' : 182 if((n + 1 < wdsz) && // PH -> F 183 (local.charAt( n+1) == 'H'))code.append('F'); 184 else code.append( symb ); 185 mtsz++; 186 break ; 187 case 'Q' : 188 code.append('K' );mtsz++; break ; 189 case 'S' : 190 tmpS = local.toString(); 191 if((tmpS.indexOf("SH", n )== n) || 192 (tmpS.indexOf("SIO",n )== n) || 193 (tmpS.indexOf("SIA",n )== n)) code.append('X'); 194 else code.append( 'S' ); 195 mtsz++ ; 196 break ; 197 case 'T' : 198 tmpS = local.toString(); // TIA TIO -> X 199 if((tmpS.indexOf("TIA",n )== n)|| 200 (tmpS.indexOf("TIO",n )== n) ){ 201 code.append('X'); mtsz++; break; 202 } 203 if( tmpS.indexOf("TCH",n )==n) break; 204 // substitute numeral 0 for TH (resembles theta after all) 205 if( tmpS.indexOf("TH", n )==n) code.append('0'); 206 else code.append( 'T' ); 207 mtsz++ ; 208 break ; 209 case 'V' : 210 code.append('F'); mtsz++;break ; 211 case 'W' : case 'Y' : // silent if not followed by vowel 212 if((n+1 < wdsz) && 213 (vowels.indexOf( local.charAt(n+1))>=0)){ 214 code.append( symb );mtsz++; 215 } 216 break ; 217 case 'X' : 218 code.append('K'); code.append('S');mtsz += 2; 219 break ; 220 case 'Z' : 221 code.append('S'); mtsz++; break ; 222 } // end switch 223 n++ ; 224 } // end else from symb != 'C' 225 if( mtsz > 4 )code.setLength( 4); 226 } 227 return code.toString(); 228 } // end static method metaPhone() 229 230 }

This page was automatically generated by Maven