Quadcap Embeddable Database

com/quadcap/util/text/Soundex.java

Go to the documentation of this file.
00001 package com.quadcap.util.text; 00002 00003 /* Copyright 1997 - 2003 Quadcap Software. All rights reserved. 00004 * 00005 * This software is distributed under the Quadcap Free Software License. 00006 * This software may be used or modified for any purpose, personal or 00007 * commercial. Open Source redistributions are permitted. Commercial 00008 * redistribution of larger works derived from, or works which bundle 00009 * this software requires a "Commercial Redistribution License"; see 00010 * http://www.quadcap.com/purchase. 00011 * 00012 * Redistributions qualify as "Open Source" under one of the following terms: 00013 * 00014 * Redistributions are made at no charge beyond the reasonable cost of 00015 * materials and delivery. 00016 * 00017 * Redistributions are accompanied by a copy of the Source Code or by an 00018 * irrevocable offer to provide a copy of the Source Code for up to three 00019 * years at the cost of materials and delivery. Such redistributions 00020 * must allow further use, modification, and redistribution of the Source 00021 * Code under substantially the same terms as this license. 00022 * 00023 * Redistributions of source code must retain the copyright notices as they 00024 * appear in each source code file, these license terms, and the 00025 * disclaimer/limitation of liability set forth as paragraph 6 below. 00026 * 00027 * Redistributions in binary form must reproduce this Copyright Notice, 00028 * these license terms, and the disclaimer/limitation of liability set 00029 * forth as paragraph 6 below, in the documentation and/or other materials 00030 * provided with the distribution. 00031 * 00032 * The Software is provided on an "AS IS" basis. No warranty is 00033 * provided that the Software is free of defects, or fit for a 00034 * particular purpose. 00035 * 00036 * Limitation of Liability. Quadcap Software shall not be liable 00037 * for any damages suffered by the Licensee or any third party resulting 00038 * from use of the Software. 00039 */ 00040 00041 /** 00042 * SOUNDEX Utilities. 00043 * 00044 * @author Stan Bailes 00045 */ 00046 public class Soundex { 00047 static final OctetMap alpha = new OctetMap('a', 'z'); 00048 static final OctetMap Alpha = new OctetMap('A', 'Z'); 00049 static { 00050 Alpha.include('a', 'z'); 00051 } 00052 00053 /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */ 00054 static final String sMap = "01230120022455012623010202"; 00055 static final char scode(int c) { 00056 if (alpha.has(c)) { 00057 return sMap.charAt(c - 'a'); 00058 } else { 00059 return sMap.charAt(c - 'A'); 00060 } 00061 } 00062 00063 public static final String soundex(String s) { 00064 char[] ret = new char[4]; 00065 char last = 'x'; 00066 int pos = 0; 00067 for (int i = 0; i < s.length() && pos < 4; i++) { 00068 int c = s.charAt(i) & 0xff; 00069 if (Alpha.has(c)) { 00070 if (pos == 0) { 00071 ret[pos++] = Character.toUpperCase((char)c); 00072 } else { 00073 char code = scode(c); 00074 if (code != '0' && code != last) { 00075 ret[pos++] = code; 00076 last = code; 00077 } 00078 } 00079 } 00080 } 00081 if (pos == 0) return ""; 00082 while (pos < 4) ret[pos++] = '0'; 00083 return new String(ret); 00084 } 00085 00086 public static final int difference(String a, String b) { 00087 String sa = soundex(a); 00088 String sb = soundex(b); 00089 int diff = 0; 00090 for (int i = 0; i < 4; i++) { 00091 if (sa.charAt(i) == sb.charAt(i)) diff++; 00092 } 00093 return diff; 00094 } 00095 00096 //#ifndef RELEASE 00097 static String[] data = { 00098 "blather", "blabber", "Smith", "Smyth", 00099 "abcdefghijklmnopqrstuvwxyz", "a", "b", "bed", "BBD", 00100 "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "smithers", "smothers", "brothers" 00101 }; 00102 public static void main(String[] args) { 00103 for (int i = 0; i < data.length; i++) { 00104 System.out.println(soundex(data[i]) + ": " + data[i]); 00105 } 00106 } 00107 //#endif 00108 }