1 package com.bga.wbrogden.metaphone;
2
3 /* Metaphone.java
4 * A class to generate phonetic code and keep lists of objects
5 * retrievable by a phonetic code.
6 * reference: Computer Language of Dec. 1990, p 39
7 * "Hanging on the Metaphone" by Lawrence Philips
8 *
9 * This Java implementation, Copyright 1997, William B. Brogden
10 * is hereby released for all uses. I would appreciate hearing about it
11 * if you find a good use for the class. December, 1997
12 * wbrogden@bga.com CompuServe 75415,610
13 */
14
15 /*
16 * List functionality removed: 2001-06-21 bayard@generationjava.com
17 */
18
19 /*
20 * Notes:
21 * The static method metaPhone converts an input String into a code.
22 * All input is converted to upper case.
23 * Limitations: Input format is expected to be a single ASCII word
24 * with only characters in the A - Z range, no punctuation or numbers.
25 *
26 */
27
28 import java.util.* ;
29 import java.io.* ;
30
31 public class Metaphone extends Object {
32
33 static String vowels = "AEIOU" ;
34 static String frontv = "EIY" ;
35 static String varson = "CSPTG" ;
36
37 static final int maxCodeLen = 4 ;
38
39 static public String metaPhone( String txt ){
40 int mtsz = 0 ;
41 boolean hard = false ;
42 if(( txt == null ) ||
43 ( txt.length() == 0 )) return "" ;
44 // single character is itself
45 if( txt.length() == 1 ) return txt.toUpperCase() ;
46 //
47 char[] inwd = txt.toUpperCase().toCharArray() ;
48 //
49 String tmpS ;
50 StringBuffer local = new StringBuffer( 40 ); // manipulate
51 StringBuffer code = new StringBuffer( 10 ) ; // output
52 // handle initial 2 characters exceptions
53 switch( inwd[0] ){
54 case 'K': case 'G' : case 'P' : /* looking for KN, etc*/
55 if( inwd[1] == 'N')local.append(inwd, 1, inwd.length - 1 );
56 else local.append( inwd );
57 break;
58 case 'A': /* looking for AE */
59 if( inwd[1] == 'E' )local.append(inwd, 1, inwd.length - 1 );
60 else local.append( inwd );
61 break;
62 case 'W' : /* looking for WR or WH */
63 if( inwd[1] == 'R' ){ // WR -> R
64 local.append(inwd, 1, inwd.length - 1 ); break ;
65 }
66 if( inwd[1] == 'H'){
67 local.append(inwd, 1, inwd.length - 1 );
68 local.setCharAt( 0,'W'); // WH -> W
69 }
70 else local.append( inwd );
71 break;
72 case 'X' : /* initial X becomes S */
73 inwd[0] = 'S' ;local.append( inwd );
74 break ;
75 default :
76 local.append( inwd );
77 } // now local has working string with initials fixed
78 int wdsz = local.length();
79 int n = 0 ;
80 while((mtsz < maxCodeLen ) && // max code size of 4 works well
81 (n < wdsz ) ){
82 char symb = local.charAt(n) ;
83 // remove duplicate letters except C
84 if(( symb != 'C' ) &&
85 (n > 0 ) && ( local.charAt(n - 1 ) == symb )) n++ ;
86 else{ // not dup
87 switch( symb ){
88 case 'A' : case 'E' : case 'I' : case 'O' : case 'U' :
89 if( n == 0 ) { code.append(symb );mtsz++;
90 }
91 break ; // only use vowel if leading char
92 case 'B' :
93 if( (n > 0 ) &&
94 !(n + 1 == wdsz ) && // not MB at end of word
95 ( local.charAt(n - 1) == 'M')) {
96 code.append(symb);
97 }
98 else code.append(symb);
99 mtsz++ ;
100 break ;
101 case 'C' : // lots of C special cases
102 /* discard if SCI, SCE or SCY */
103 if( ( n > 0 ) &&
104 ( local.charAt(n-1) == 'S' ) &&
105 ( n + 1 < wdsz ) &&
106 ( frontv.indexOf( local.charAt(n + 1)) >= 0 )){ break ;}
107 tmpS = local.toString();
108 if( tmpS.indexOf("CIA", n ) == n ) { // "CIA" -> X
109 code.append('X' ); mtsz++; break ;
110 }
111 if( ( n + 1 < wdsz ) &&
112 (frontv.indexOf( local.charAt(n+1) )>= 0 )){
113 code.append('S');mtsz++; break ; // CI,CE,CY -> S
114 }
115 if(( n > 0) &&
116 ( tmpS.indexOf("SCH",n-1 )== n-1 )){ // SCH->sk
117 code.append('K') ; mtsz++;break ;
118 }
119 if( tmpS.indexOf("CH", n ) == n ){ // detect CH
120 if((n == 0 ) &&
121 (wdsz >= 3 ) && // CH consonant -> K consonant
122 (vowels.indexOf( local.charAt( 2) ) < 0 )){
123 code.append('K');
124 }
125 else { code.append('X'); // CHvowel -> X
126 }
127 mtsz++;
128 }
129 else { code.append('K' );mtsz++;
130 }
131 break ;
132 case 'D' :
133 if(( n + 2 < wdsz )&& // DGE DGI DGY -> J
134 ( local.charAt(n+1) == 'G' )&&
135 (frontv.indexOf( local.charAt(n+2) )>= 0)){
136 code.append('J' ); n += 2 ;
137 }
138 else { code.append( 'T' );
139 }
140 mtsz++;
141 break ;
142 case 'G' : // GH silent at end or before consonant
143 if(( n + 2 == wdsz )&&
144 (local.charAt(n+1) == 'H' )) break ;
145 if(( n + 2 < wdsz ) &&
146 (local.charAt(n+1) == 'H' )&&
147 (vowels.indexOf( local.charAt(n+2)) < 0 )) break ;
148 tmpS = local.toString();
149 if((n > 0) &&
150 ( tmpS.indexOf("GN", n ) == n)||
151 ( tmpS.indexOf("GNED",n) == n )) break ; // silent G
152 if(( n > 0 ) &&
153 (local.charAt(n-1) == 'G')) hard = true ;
154 else hard = false ;
155 if((n+1 < wdsz) &&
156 (frontv.indexOf( local.charAt(n+1) ) >= 0 )&&
157 (!hard) ) code.append( 'J' );
158 else code.append('K');
159 mtsz++;
160 break ;
161 case 'H':
162 if( n + 1 == wdsz ) break ; // terminal H
163 if((n > 0) &&
164 (varson.indexOf( local.charAt(n-1)) >= 0)) break ;
165 if( vowels.indexOf( local.charAt(n+1)) >=0 ){
166 code.append('H') ; mtsz++;// Hvowel
167 }
168 break;
169 case 'F': case 'J' : case 'L' :
170 case 'M': case 'N' : case 'R' :
171 code.append( symb ); mtsz++; break ;
172 case 'K' :
173 if( n > 0 ){ // not initial
174 if( local.charAt( n -1) != 'C' ) {
175 code.append(symb );
176 }
177 }
178 else code.append( symb ); // initial K
179 mtsz++ ;
180 break ;
181 case 'P' :
182 if((n + 1 < wdsz) && // PH -> F
183 (local.charAt( n+1) == 'H'))code.append('F');
184 else code.append( symb );
185 mtsz++;
186 break ;
187 case 'Q' :
188 code.append('K' );mtsz++; break ;
189 case 'S' :
190 tmpS = local.toString();
191 if((tmpS.indexOf("SH", n )== n) ||
192 (tmpS.indexOf("SIO",n )== n) ||
193 (tmpS.indexOf("SIA",n )== n)) code.append('X');
194 else code.append( 'S' );
195 mtsz++ ;
196 break ;
197 case 'T' :
198 tmpS = local.toString(); // TIA TIO -> X
199 if((tmpS.indexOf("TIA",n )== n)||
200 (tmpS.indexOf("TIO",n )== n) ){
201 code.append('X'); mtsz++; break;
202 }
203 if( tmpS.indexOf("TCH",n )==n) break;
204 // substitute numeral 0 for TH (resembles theta after all)
205 if( tmpS.indexOf("TH", n )==n) code.append('0');
206 else code.append( 'T' );
207 mtsz++ ;
208 break ;
209 case 'V' :
210 code.append('F'); mtsz++;break ;
211 case 'W' : case 'Y' : // silent if not followed by vowel
212 if((n+1 < wdsz) &&
213 (vowels.indexOf( local.charAt(n+1))>=0)){
214 code.append( symb );mtsz++;
215 }
216 break ;
217 case 'X' :
218 code.append('K'); code.append('S');mtsz += 2;
219 break ;
220 case 'Z' :
221 code.append('S'); mtsz++; break ;
222 } // end switch
223 n++ ;
224 } // end else from symb != 'C'
225 if( mtsz > 4 )code.setLength( 4);
226 }
227 return code.toString();
228 } // end static method metaPhone()
229
230 }
This page was automatically generated by Maven