org.apache.commons.codec.language.Soundex Java Examples
The following examples show how to use
org.apache.commons.codec.language.Soundex.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FuzzyMatch.java From hop with Apache License 2.0 | 6 votes |
private String getEncodedMF( String value, Integer algorithmType ) { String encodedValueMF = ""; switch ( algorithmType ) { case FuzzyMatchMeta.OPERATION_TYPE_METAPHONE: encodedValueMF = ( new Metaphone() ).metaphone( value ); break; case FuzzyMatchMeta.OPERATION_TYPE_DOUBLE_METAPHONE: encodedValueMF = ( ( new DoubleMetaphone() ).doubleMetaphone( value ) ); break; case FuzzyMatchMeta.OPERATION_TYPE_SOUNDEX: encodedValueMF = ( new Soundex() ).encode( value ); break; case FuzzyMatchMeta.OPERATION_TYPE_REFINED_SOUNDEX: encodedValueMF = ( new RefinedSoundex() ).encode( value ); break; default: break; } return encodedValueMF; }
Example #2
Source File: TestPhoneticFilter.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testEmptyTerm() throws IOException { Encoder encoders[] = new Encoder[] { new Metaphone(), new DoubleMetaphone(), new Soundex(), new RefinedSoundex(), new Caverphone2() }; for (final Encoder e : encoders) { Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new KeywordTokenizer(); return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, random().nextBoolean())); } }; checkOneTerm(a, "", ""); a.close(); } }
Example #3
Source File: FuzzyMatch.java From pentaho-kettle with Apache License 2.0 | 6 votes |
private String getEncodedMF( String value, Integer algorithmType ) { String encodedValueMF = ""; switch ( algorithmType ) { case FuzzyMatchMeta.OPERATION_TYPE_METAPHONE: encodedValueMF = ( new Metaphone() ).metaphone( value ); break; case FuzzyMatchMeta.OPERATION_TYPE_DOUBLE_METAPHONE: encodedValueMF = ( ( new DoubleMetaphone() ).doubleMetaphone( value ) ); break; case FuzzyMatchMeta.OPERATION_TYPE_SOUNDEX: encodedValueMF = ( new Soundex() ).encode( value ); break; case FuzzyMatchMeta.OPERATION_TYPE_REFINED_SOUNDEX: encodedValueMF = ( new RefinedSoundex() ).encode( value ); break; default: break; } return encodedValueMF; }
Example #4
Source File: TestPhoneticFilter.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testAlgorithms() throws Exception { assertAlgorithm(new Metaphone(), true, "aaa bbb ccc easgasg", new String[] { "A", "aaa", "B", "bbb", "KKK", "ccc", "ESKS", "easgasg" }); assertAlgorithm(new Metaphone(), false, "aaa bbb ccc easgasg", new String[] { "A", "B", "KKK", "ESKS" }); assertAlgorithm(new DoubleMetaphone(), true, "aaa bbb ccc easgasg", new String[] { "A", "aaa", "PP", "bbb", "KK", "ccc", "ASKS", "easgasg" }); assertAlgorithm(new DoubleMetaphone(), false, "aaa bbb ccc easgasg", new String[] { "A", "PP", "KK", "ASKS" }); assertAlgorithm(new Soundex(), true, "aaa bbb ccc easgasg", new String[] { "A000", "aaa", "B000", "bbb", "C000", "ccc", "E220", "easgasg" }); assertAlgorithm(new Soundex(), false, "aaa bbb ccc easgasg", new String[] { "A000", "B000", "C000", "E220" }); assertAlgorithm(new RefinedSoundex(), true, "aaa bbb ccc easgasg", new String[] { "A0", "aaa", "B1", "bbb", "C3", "ccc", "E034034", "easgasg" }); assertAlgorithm(new RefinedSoundex(), false, "aaa bbb ccc easgasg", new String[] { "A0", "B1", "C3", "E034034" }); assertAlgorithm(new Caverphone2(), true, "Darda Karleen Datha Carlene", new String[] { "TTA1111111", "Darda", "KLN1111111", "Karleen", "TTA1111111", "Datha", "KLN1111111", "Carlene" }); assertAlgorithm(new Caverphone2(), false, "Darda Karleen Datha Carlene", new String[] { "TTA1111111", "KLN1111111", "TTA1111111", "KLN1111111" }); assertAlgorithm(new Nysiis(), true, "aaa bbb ccc easgasg", new String[] { "A", "aaa", "B", "bbb", "C", "ccc", "EASGAS", "easgasg" }); assertAlgorithm(new Nysiis(), false, "aaa bbb ccc easgasg", new String[] { "A", "B", "C", "EASGAS" }); }
Example #5
Source File: ValueDataUtil.java From hop with Apache License 2.0 | 4 votes |
public static String get_SoundEx( IValueMeta metaA, Object dataA ) { if ( dataA == null ) { return null; } return ( new Soundex() ).encode( dataA.toString() ); }
Example #6
Source File: ValueDataUtil.java From pentaho-kettle with Apache License 2.0 | 4 votes |
public static String get_SoundEx( ValueMetaInterface metaA, Object dataA ) { if ( dataA == null ) { return null; } return ( new Soundex() ).encode( dataA.toString() ); }
Example #7
Source File: PhoneticTokenFilterFactory.java From crate with Apache License 2.0 | 4 votes |
public PhoneticTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { super(indexSettings, name, settings); this.languageset = null; this.nametype = null; this.ruletype = null; this.maxcodelength = 0; this.isDaitchMokotoff = false; this.replace = settings.getAsBoolean("replace", true); // weird, encoder is null at last step in SimplePhoneticAnalysisTests, so we set it to metaphone as default String encodername = settings.get("encoder", "metaphone"); if ("metaphone".equalsIgnoreCase(encodername)) { this.encoder = new Metaphone(); } else if ("soundex".equalsIgnoreCase(encodername)) { this.encoder = new Soundex(); } else if ("caverphone1".equalsIgnoreCase(encodername)) { this.encoder = new Caverphone1(); } else if ("caverphone2".equalsIgnoreCase(encodername)) { this.encoder = new Caverphone2(); } else if ("caverphone".equalsIgnoreCase(encodername)) { this.encoder = new Caverphone2(); } else if ("refined_soundex".equalsIgnoreCase(encodername) || "refinedSoundex".equalsIgnoreCase(encodername)) { this.encoder = new RefinedSoundex(); } else if ("cologne".equalsIgnoreCase(encodername)) { this.encoder = new ColognePhonetic(); } else if ("double_metaphone".equalsIgnoreCase(encodername) || "doubleMetaphone".equalsIgnoreCase(encodername)) { this.encoder = null; this.maxcodelength = settings.getAsInt("max_code_len", 4); } else if ("bm".equalsIgnoreCase(encodername) || "beider_morse".equalsIgnoreCase(encodername) || "beidermorse".equalsIgnoreCase(encodername)) { this.encoder = null; this.languageset = settings.getAsList("languageset"); String ruleType = settings.get("rule_type", "approx"); if ("approx".equalsIgnoreCase(ruleType)) { ruletype = RuleType.APPROX; } else if ("exact".equalsIgnoreCase(ruleType)) { ruletype = RuleType.EXACT; } else { throw new IllegalArgumentException("No matching rule type [" + ruleType + "] for beider morse encoder"); } String nameType = settings.get("name_type", "generic"); if ("GENERIC".equalsIgnoreCase(nameType)) { nametype = NameType.GENERIC; } else if ("ASHKENAZI".equalsIgnoreCase(nameType)) { nametype = NameType.ASHKENAZI; } else if ("SEPHARDIC".equalsIgnoreCase(nameType)) { nametype = NameType.SEPHARDIC; } } else if ("koelnerphonetik".equalsIgnoreCase(encodername)) { this.encoder = new KoelnerPhonetik(); } else if ("haasephonetik".equalsIgnoreCase(encodername)) { this.encoder = new HaasePhonetik(); } else if ("nysiis".equalsIgnoreCase(encodername)) { this.encoder = new Nysiis(); } else if ("daitch_mokotoff".equalsIgnoreCase(encodername)) { this.encoder = null; this.isDaitchMokotoff = true; } else { throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter"); } }