EN
Java - calculate Levenshtein distance between strings
7 points
In this short article, we would like to show simple Java implementation for the Levenstein distance algorithm.
Levenstein distance algorithm is used to measure the difference between two sequences (e.g. between two strings).
When the algorithm returns
0
it means: compared objects are equal.
Simple implementation:
xxxxxxxxxx
1
import java.util.Objects;
2
3
public class Program {
4
5
private static int findMin(int a, int b, int c) {
6
int min = Math.min(a, b);
7
return Math.min(min, c);
8
}
9
10
private static int calculateLevenshteinDistance(String a, String b) {
11
int aLimit = a.length() + 1;
12
int bLimit = b.length() + 1;
13
int[][] distance = new int[aLimit][];
14
for (int i = 0; i < aLimit; ++i) {
15
distance[i] = new int[bLimit];
16
}
17
for (int i = 0; i < aLimit; ++i) {
18
distance[i][0] = i;
19
}
20
for (int j = 0; j < bLimit; ++j) {
21
distance[0][j] = j;
22
}
23
for (int i = 1; i < aLimit; ++i) {
24
for (int j = 1; j < bLimit; ++j) {
25
char aChar = a.charAt(i - 1);
26
char bChar = b.charAt(j - 1);
27
distance[i][j] = findMin(
28
distance[i - 1][j] + 1,
29
distance[i][j - 1] + 1,
30
distance[i - 1][j - 1] + (Objects.equals(aChar, bChar) ? 0 : 1) // + substitution cost
31
);
32
}
33
}
34
return distance[a.length()][b.length()];
35
};
36
37
38
// Usage example:
39
40
public static void main(String[] args) {
41
42
System.out.println(calculateLevenshteinDistance("Chris", "Chris")); // 0
43
System.out.println(calculateLevenshteinDistance("John1", "John2")); // 1
44
System.out.println(calculateLevenshteinDistance("Google", "Gogle")); // 1
45
System.out.println(calculateLevenshteinDistance("Ann", "Matt" )); // 4
46
47
System.out.println(calculateLevenshteinDistance("CHRIS", "Chris")); // 4
48
}
49
}
Output:
xxxxxxxxxx
1
0
2
1
3
1
4
4
5
4
It is necessary to wrap the existing algorithm with toLowerCase()
or toUpperCase()
string transformation.
xxxxxxxxxx
1
import java.util.Objects;
2
3
public class Program {
4
5
private static int findMin(int a, int b, int c) {
6
int min = Math.min(a, b);
7
return Math.min(min, c);
8
}
9
10
private static int calculateLevenshteinDistance(String a, String b) {
11
int aLimit = a.length() + 1;
12
int bLimit = b.length() + 1;
13
int[][] distance = new int[aLimit][];
14
for (int i = 0; i < aLimit; ++i) {
15
distance[i] = new int[bLimit];
16
}
17
for (int i = 0; i < aLimit; ++i) {
18
distance[i][0] = i;
19
}
20
for (int j = 0; j < bLimit; ++j) {
21
distance[0][j] = j;
22
}
23
for (int i = 1; i < aLimit; ++i) {
24
for (int j = 1; j < bLimit; ++j) {
25
char aChar = a.charAt(i - 1);
26
char bChar = b.charAt(j - 1);
27
distance[i][j] = findMin(
28
distance[i - 1][j] + 1,
29
distance[i][j - 1] + 1,
30
distance[i - 1][j - 1] + (Objects.equals(aChar, bChar) ? 0 : 1) // + substitution cost
31
);
32
}
33
}
34
return distance[a.length()][b.length()];
35
};
36
37
private static int calculateImprovedLevenshteinDistance(String a, String b) {
38
return calculateLevenshteinDistance (a.toLowerCase(), b.toLowerCase());
39
};
40
41
42
// Usage example:
43
44
public static void main(String[] args) {
45
46
System.out.println(calculateImprovedLevenshteinDistance("CHRIS", "Chris")); // 0
47
System.out.println(calculateImprovedLevenshteinDistance("JOHN1", "John2")); // 1
48
System.out.println(calculateImprovedLevenshteinDistance("GOOGLE", "Gogle")); // 1
49
System.out.println(calculateImprovedLevenshteinDistance("ANN", "Matt" )); // 3
50
}
51
}
Output:
xxxxxxxxxx
1
0
2
1
3
1
4
3