Java Code Examples for org.apache.commons.lang3.StringUtils#getLevenshteinDistance()
The following examples show how to use
org.apache.commons.lang3.StringUtils#getLevenshteinDistance() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ContactsSyncAdapterService.java From haxsync with GNU General Public License v2.0 | 6 votes |
private static String matches(Set<String> phoneContacts, String fbContact, int maxdistance){ if (maxdistance == 0){ if (phoneContacts.contains(fbContact)){ return fbContact; } return null; //return phoneContacts.contains(fbContact); } int bestDistance = maxdistance; String bestMatch = null; for (String contact : phoneContacts){ int distance = StringUtils.getLevenshteinDistance(contact != null ? contact.toLowerCase() : "", fbContact != null ? fbContact.toLowerCase() : ""); if( distance <= bestDistance){ //Log.i("FOUND MATCH", "Phone Contact: " + contact +" FB Contact: " + fbContact +" distance: " + distance + "max distance: " +maxdistance); bestMatch = contact; bestDistance = distance; } } return bestMatch; }
Example 2
Source File: AliasHandler.java From Truck-Factor with MIT License | 6 votes |
private static Map<String, List<String>> findAliases(List<String> allDevelopers, int distance, int minSize) { int newDistance = distance; List<String> copyList = new CopyOnWriteArrayList<String>(allDevelopers); Map<String, List<String>> aliases = new HashMap<String, List<String>>(); for (String developer1 : copyList) { copyList.remove (developer1); for (String developer2 : copyList) { if(developer1.length()>=minSize){ int localDistance = StringUtils.getLevenshteinDistance(convertToUTFLower(developer1), convertToUTFLower(developer2)); if (distance == -1){ newDistance = developer1.split(" ").length; } if (!developer1.equals(developer2) && localDistance<=newDistance){ if(!aliases.containsKey(developer1)) aliases.put(developer1, new ArrayList<String>()); aliases.get(developer1).add(developer2); copyList.remove(developer2); } } } } return aliases; }
Example 3
Source File: NewAliasHandler.java From Truck-Factor with MIT License | 6 votes |
private static Map<String, List<String>> findAliases(List<String> allDevelopers, int distance, int minSize) { int newDistance = distance; List<String> copyList = new CopyOnWriteArrayList<String>(allDevelopers); Map<String, List<String>> aliases = new HashMap<String, List<String>>(); for (String developer1 : copyList) { copyList.remove (developer1); for (String developer2 : copyList) { if(developer1.length()>=minSize){ int localDistance = StringUtils.getLevenshteinDistance(convertToUTFLower(developer1), convertToUTFLower(developer2)); if (distance == -1){ newDistance = developer1.split(" ").length; } if (!developer1.equals(developer2) && localDistance<=newDistance){ if(!aliases.containsKey(developer1)) aliases.put(developer1, new ArrayList<String>()); aliases.get(developer1).add(developer2); copyList.remove(developer2); } } } } return aliases; }
Example 4
Source File: AliasesIdentifier.java From Truck-Factor with MIT License | 6 votes |
private static Map<Developer, List<Developer>> findAliases(List<Developer> allDevelopers, int distance, int minSize) { int newDistance = distance; List<Developer> copyList = new CopyOnWriteArrayList<Developer>(allDevelopers); Map<Developer, List<Developer>> aliases = new HashMap<Developer, List<Developer>>(); for (Developer developer1 : copyList) { copyList.remove (developer1); for (Developer developer2 : copyList) { if(developer1.getId()!=developer2.getId() && developer1.getName().length()>=minSize){ int localDistance = StringUtils.getLevenshteinDistance(convertToUTFLower(developer1.getName()), convertToUTFLower(developer2.getName())); if (distance == -1){ newDistance = developer1.getName().split(" ").length; } if (!developer1.getName().equals(developer2.getName()) && localDistance<=newDistance){ if(!aliases.containsKey(developer1)) aliases.put(developer1, new ArrayList<Developer>()); aliases.get(developer1).add(developer2); copyList.remove(developer2); } } } } return aliases; }
Example 5
Source File: LevenshteinAutomatonTest.java From dictomaton with Apache License 2.0 | 5 votes |
/** * Generate a word, create a dictionary of permutations permutations that are created using random edit operations, * and check that Levenshtein automaton for that word finds permutations within its edit distance. * * @param minLength The minimum lenth of the generated word. * @param maxLength The maximum length of the generated word. * @param nPermutations The number of permutations to generate. * @param nRandomEditOperations The maximum number of random edit operations. * @param distance Test the levenshtein automaton with this edit distance. * @throws DictionaryBuilderException */ private void generateAndCheckPermutations(int minLength, int maxLength, int nPermutations, int nRandomEditOperations, int distance) throws DictionaryBuilderException { String str = randomString(minLength + (maxLength - minLength + 1)); TreeSet<String> all = new TreeSet<>(); Set<String> shouldHave = new HashSet<>(); for (int i = 0; i < nPermutations; ++i) { int n = d_rng.nextInt(nRandomEditOperations); StringBuilder permutedBuilder = new StringBuilder(str); for (int perm = 0; perm < n; ++perm) d_editOperations[d_rng.nextInt(d_editOperations.length)].apply(permutedBuilder); String permuted = permutedBuilder.toString(); all.add(permuted); if (StringUtils.getLevenshteinDistance(str, permuted) <= distance) shouldHave.add(permuted); } Dictionary dict = new DictionaryBuilder().addAll(all).build(); LevenshteinAutomaton la = new LevenshteinAutomaton(str, distance); Assert.assertEquals(shouldHave, la.intersectionLanguage(dict)); }
Example 6
Source File: ParameterMatchMockStrategy.java From jvm-sandbox-repeater with Apache License 2.0 | 5 votes |
private double calcSimilarity(Invocation invocation , MockRequest request, String requestSerialized) throws SerializeException { String requestSerializedTarget; if (CollectionUtils.isNotEmpty(request.getModifiedInvocationIdentity()) && request.getModifiedInvocationIdentity().contains(invocation.getIdentity())) { requestSerializedTarget = SerializerWrapper.hessianSerialize(invocation.getRequest(),request.getEvent().javaClassLoader); } else { requestSerializedTarget = invocation.getRequestSerialized(); } int distance = StringUtils.getLevenshteinDistance(requestSerialized, requestSerializedTarget); return 1 - (double) distance / Math.max(requestSerialized.length(), requestSerializedTarget.length()); }
Example 7
Source File: StringDistanceMetrics.java From dungeon with BSD 3-Clause "New" or "Revised" License | 5 votes |
static int levenshteinDistance(final String a, final String b) { if (!CommandLimits.isWithinMaximumCommandLength(a)) { throw new IllegalArgumentException("input is too big."); } if (!CommandLimits.isWithinMaximumCommandLength(b)) { throw new IllegalArgumentException("input is too big."); } return StringUtils.getLevenshteinDistance(a, b); }
Example 8
Source File: StringSimilarityCallable.java From OpenID-Attacker with GNU General Public License v2.0 | 5 votes |
@Override public Float call() throws Exception { //Instant startComputation = Instant.now(); float result = StringUtils.getLevenshteinDistance(s1, s2); //Instant endComputation = Instant.now(); //Duration duration = Duration.between(startComputation, endComputation); //System.out.println("length: " + s1.length() + "/" + s2.length() + ", duration: " + (duration.toNanos() / 1000000000) + " s, " + "result: " + result); //System.out.println("duration levenshtein: " + (duration.toNanos() / 1000000000) + " s"); return result; }
Example 9
Source File: Importer.java From 10000sentences with Apache License 2.0 | 5 votes |
protected boolean sentenceOK(SentenceVO s) { String targ = s.getTargetSentence(); String known = s.getKnownSentence(); if (StringUtils.equals(targ, known)) { //System.out.printf("Same: %s <-> %s\n", targ, known); return false; } int tLen = targ.length(); int kLen = known.length(); if (StringUtils.getLevenshteinDistance(targ, known) < 0.2 * (tLen + kLen) / 2.) { //System.out.printf("Too similar: %s <-> %s\n", targ, known); return false; } if (tLen < 50 && kLen < 50) { return true; } if (Math.max(tLen, kLen) / Math.min(tLen, kLen) > 3) { //System.out.printf("Nope: %s <-> %s\n", known, targ); return false; } if (NUMBER_DELIMITER.matcher(targ).matches() || NUMBER_DELIMITER.matcher(known).matches()) { //System.out.printf("Has numbers: %s <-> %s\n", known, targ); return false; } return true; }
Example 10
Source File: GuildUtils.java From FlareBot with MIT License | 5 votes |
/** * Gets a {@link Role} that matches a string. Case doesn't matter. * * @param s The String to get a role from * @param guildId The id of the {@link Guild} to get the role from * @param channel The channel to send an error message to if anything goes wrong. * @return null if the role doesn't, otherwise a list of roles matching the string */ public static Role getRole(String s, String guildId, TextChannel channel) { Guild guild = Getters.getGuildById(guildId); Role role = guild.getRoles().stream() .filter(r -> r.getName().equalsIgnoreCase(s)) .findFirst().orElse(null); if (role != null) return role; try { role = guild.getRoleById(Long.parseLong(s.replaceAll("[^0-9]", ""))); if (role != null) return role; } catch (NumberFormatException | NullPointerException ignored) { } if (channel != null) { if (guild.getRolesByName(s, true).isEmpty()) { String closest = null; int distance = LEVENSHTEIN_DISTANCE; for (Role role1 : guild.getRoles().stream().filter(role1 -> FlareBotManager.instance().getGuild(guildId).getSelfAssignRoles() .contains(role1.getId())).collect(Collectors.toList())) { int currentDistance = StringUtils.getLevenshteinDistance(role1.getName(), s); if (currentDistance < distance) { distance = currentDistance; closest = role1.getName(); } } MessageUtils.sendErrorMessage("That role does not exist! " + (closest != null ? "Maybe you mean `" + closest + "`" : ""), channel); return null; } else { return guild.getRolesByName(s, true).get(0); } } return null; }
Example 11
Source File: RequestSender.java From Airachnid-Burp-Extension with GNU General Public License v3.0 | 5 votes |
/** * Testing if the responses of two requests are similar. This is the not the same as the same, rather there is a * threshold set in the static parameters of the class. * @param firstString * @param secondString * @return Test if similar */ private static boolean testSimilar(String firstString, String secondString) { // int fuzzyDist = StringUtils.getFuzzyDistance(firstString, secondString, Locale.getDefault()); double jaroDist = StringUtils.getJaroWinklerDistance(firstString, secondString); int levenDist = StringUtils.getLevenshteinDistance(firstString, secondString); // BurpExtender.print("============================================"); // BurpExtender.print("Fuzzy Distance:" + fuzzyDist); // BurpExtender.print(" Jaro Winkler Distance:" + jaroDist); // BurpExtender.print(" Levenshtein Distance:" + levenDist); // BurpExtender.print("============================================"); return jaroDist >= JARO_THRESHOLD || levenDist <= LEVENSHTEIN_THRESHOLD; }
Example 12
Source File: GeoNameResolver.java From lucene-geo-gazetteer with Apache License 2.0 | 4 votes |
/** * Select the best match for each location name extracted from a document, * choosing from among a list of lists of candidate matches. Filter uses the * following features: 1) edit distance between name and the resolved name, * choose smallest one 2) content (haven't implemented) * * @param resolvedEntities * final result for the input stream * @param allCandidates * each location name may hits several documents, this is the * collection for all hitted documents * @param count * Number of results for one locations * @throws IOException * @throws RuntimeException */ private void pickBestCandidates( HashMap<String, List<Location>> resolvedEntities, HashMap<String, List<Location>> allCandidates, int count) { for (String extractedName : allCandidates.keySet()) { List<Location> cur = allCandidates.get(extractedName); if(cur.isEmpty()) continue;//continue if no results found int maxWeight = Integer.MIN_VALUE ; //In case weight is equal for all return top element int bestIndex = 0; //Priority queue to return top elements PriorityQueue<Location> pq = new PriorityQueue<>(cur.size(), new Comparator<Location>() { @Override public int compare(Location o1, Location o2) { return Integer.compare(o2.getWeight(), o1.getWeight()); } }); for (int i = 0; i < cur.size(); ++i) { int weight = 0; // get cur's ith resolved entry's name String resolvedName = String.format(" %s ", cur.get(i).getName()); if (resolvedName.contains(String.format(" %s ", extractedName))) { // Assign a weight as per configuration if extracted name is found as a exact word in name weight = WEIGHT_NAME_MATCH; } else if (resolvedName.contains(extractedName)) { // Assign a weight as per configuration if extracted name is found partly in name weight = WEIGHT_NAME_PART_MATCH; } // get all alternate names of cur's ith resolved entry's String[] altNames = cur.get(i).getAlternateNames().split(","); float altEditDist = 0; for(String altName : altNames){ if(altName.contains(extractedName)){ altEditDist+=StringUtils.getLevenshteinDistance(extractedName, altName); } } //lesser the edit distance more should be the weight weight += getCalibratedWeight(altNames.length, altEditDist); //Give preference to sorted results. 0th result should have more priority weight += (cur.size()-i) * WEIGHT_SORT_ORDER; cur.get(i).setWeight(weight); if (weight > maxWeight) { maxWeight = weight; bestIndex = i; } pq.add(cur.get(i)) ; } if (bestIndex == -1) continue; List<Location> resultList = new ArrayList<>(); for(int i =0 ; i< count && !pq.isEmpty() ; i++){ resultList.add(pq.poll()); } resolvedEntities.put(extractedName, resultList); } }
Example 13
Source File: StringTools.java From CogStack-Pipeline with Apache License 2.0 | 4 votes |
public static int getLevenshteinDistance(String str1, String str2) { return StringUtils.getLevenshteinDistance(str1, str2); }
Example 14
Source File: FuzzyMatch.java From datacollector with Apache License 2.0 | 4 votes |
private static int calculateLevenshteinDistance(String s1, String s2) { int distance = StringUtils.getLevenshteinDistance(s1, s2); double ratio = ((double) distance) / (Math.max(s1.length(), s2.length())); return 100 - (int)(ratio * 100); }
Example 15
Source File: DistanceStringFilter.java From Indra with MIT License | 4 votes |
@Override public boolean matches(String t1, String t2) { return t1.length() >= this.threshold && StringUtils.getLevenshteinDistance(t1, t2) < min; }
Example 16
Source File: LevenshteinRanker.java From Stargraph with MIT License | 4 votes |
@Override double computeStringDistance(CharSequence s1, CharSequence s2) { return StringUtils.getLevenshteinDistance(s1, s2); }
Example 17
Source File: IngestHTRIntoAbbyyXML.java From TranskribusCore with GNU General Public License v3.0 | 2 votes |
private static void compareVersions() { StringUtils.getLevenshteinDistance("fly", "ant"); }
Example 18
Source File: CandidateIEObject.java From TableDisentangler with GNU General Public License v3.0 | 2 votes |
/** * Calculate levenshtein. Calculation should be done with normalized pattern. * * @param pattern the pattern * @return the int */ public int calculateLevenshtein(String pattern) { return StringUtils.getLevenshteinDistance(NormalizedPattern,pattern); }