EN
Java - calculate Levenshtein distance between strings
7
points
In this short article, we would like to show simple Java implementation for the Levenstein distance algorithm.
Levenstein distance algorithm is used to measure the difference between two sequences (e.g. between two strings).
When the algorithm returns
0
it means: compared objects are equal.
Simple implementation:
import java.util.Objects;
public class Program {
private static int findMin(int a, int b, int c) {
int min = Math.min(a, b);
return Math.min(min, c);
}
private static int calculateLevenshteinDistance(String a, String b) {
int aLimit = a.length() + 1;
int bLimit = b.length() + 1;
int[][] distance = new int[aLimit][];
for (int i = 0; i < aLimit; ++i) {
distance[i] = new int[bLimit];
}
for (int i = 0; i < aLimit; ++i) {
distance[i][0] = i;
}
for (int j = 0; j < bLimit; ++j) {
distance[0][j] = j;
}
for (int i = 1; i < aLimit; ++i) {
for (int j = 1; j < bLimit; ++j) {
char aChar = a.charAt(i - 1);
char bChar = b.charAt(j - 1);
distance[i][j] = findMin(
distance[i - 1][j] + 1,
distance[i][j - 1] + 1,
distance[i - 1][j - 1] + (Objects.equals(aChar, bChar) ? 0 : 1) // + substitution cost
);
}
}
return distance[a.length()][b.length()];
};
// Usage example:
public static void main(String[] args) {
System.out.println(calculateLevenshteinDistance("Chris", "Chris")); // 0
System.out.println(calculateLevenshteinDistance("John1", "John2")); // 1
System.out.println(calculateLevenshteinDistance("Google", "Gogle")); // 1
System.out.println(calculateLevenshteinDistance("Ann", "Matt" )); // 4
System.out.println(calculateLevenshteinDistance("CHRIS", "Chris")); // 4
}
}
Output:
0
1
1
4
4
Levenstein distance algorithm with case-insensitive
It is necessary to wrap the existing algorithm with toLowerCase()
or toUpperCase()
string transformation.
import java.util.Objects;
public class Program {
private static int findMin(int a, int b, int c) {
int min = Math.min(a, b);
return Math.min(min, c);
}
private static int calculateLevenshteinDistance(String a, String b) {
int aLimit = a.length() + 1;
int bLimit = b.length() + 1;
int[][] distance = new int[aLimit][];
for (int i = 0; i < aLimit; ++i) {
distance[i] = new int[bLimit];
}
for (int i = 0; i < aLimit; ++i) {
distance[i][0] = i;
}
for (int j = 0; j < bLimit; ++j) {
distance[0][j] = j;
}
for (int i = 1; i < aLimit; ++i) {
for (int j = 1; j < bLimit; ++j) {
char aChar = a.charAt(i - 1);
char bChar = b.charAt(j - 1);
distance[i][j] = findMin(
distance[i - 1][j] + 1,
distance[i][j - 1] + 1,
distance[i - 1][j - 1] + (Objects.equals(aChar, bChar) ? 0 : 1) // + substitution cost
);
}
}
return distance[a.length()][b.length()];
};
private static int calculateImprovedLevenshteinDistance(String a, String b) {
return calculateLevenshteinDistance (a.toLowerCase(), b.toLowerCase());
};
// Usage example:
public static void main(String[] args) {
System.out.println(calculateImprovedLevenshteinDistance("CHRIS", "Chris")); // 0
System.out.println(calculateImprovedLevenshteinDistance("JOHN1", "John2")); // 1
System.out.println(calculateImprovedLevenshteinDistance("GOOGLE", "Gogle")); // 1
System.out.println(calculateImprovedLevenshteinDistance("ANN", "Matt" )); // 3
}
}
Output:
0
1
1
3