EN
Java - validate encoded URI component characters
7 points
In this short article, we would like to show how to check if some text is correctly encoded URI component using Java.
Note: to know what characters should be escaped encoding URI component check this article.
Quick solution:
xxxxxxxxxx
1
package example;
2
3
import java.util.regex.Matcher;
4
import java.util.regex.Pattern;
5
6
public class Example {
7
8
private static final Pattern VALIDATION_PATTERN = Pattern.compile("^[0-9a-zA-Z-_.!~*'()%]*$");
9
10
private static boolean validateURIComponent(String text) {
11
Matcher matcher = VALIDATION_PATTERN.matcher(text);
12
return matcher.matches();
13
}
14
15
public static void main(String []args) {
16
17
// Usage example:
18
19
System.out.println(validateURIComponent("abcdefghijklmnopqrstuvwxyz")); // normal English letters
20
System.out.println(validateURIComponent("ABCDEFGHIJKLMNOPQRSTUVWXYZ")); // capital English letters
21
System.out.println(validateURIComponent("-_.!~*'()")); // not escaped special characters
22
System.out.println(validateURIComponent("%")); // escape character
23
24
System.out.println(validateURIComponent("@#$^&+=[]")); // should be escaped
25
System.out.println(validateURIComponent("ąćęłńóśźż")); // should be escaped
26
// etc.
27
}
28
}
This section contains a solution that does not use regular expressions what makes the below source code optimal (faster in executing).
Program.java file:
xxxxxxxxxx
1
package logic;
2
3
public class Program {
4
5
public static void main(String[] args) {
6
7
// Usage example:
8
9
System.out.println(URIComponentUtils.validateURIComponent("abcdefghijklmnopqrstuvwxyz")); // normal English letters
10
System.out.println(URIComponentUtils.validateURIComponent("ABCDEFGHIJKLMNOPQRSTUVWXYZ")); // capital English letters
11
System.out.println(URIComponentUtils.validateURIComponent("-_.!~*'()")); // not escaped special characters
12
System.out.println(URIComponentUtils.validateURIComponent("%")); // escape character
13
14
System.out.println(URIComponentUtils.validateURIComponent("@#$^&+=[]")); // should be escaped
15
System.out.println(URIComponentUtils.validateURIComponent("ąćęłńóśźż")); // should be escaped
16
// etc.
17
}
18
}
URIComponentUtils.java file:
xxxxxxxxxx
1
package logic;
2
3
public class URIComponentUtils {
4
5
// Validation pattern: ^[0-9a-zA-Z-_.!~*'()%]*$
6
//
7
// https://dirask.com/posts/ASCII-Table-pJ3Y0j
8
// https://dirask.com/posts/JavaScript-encode-escape-URL-characters-D7ZJo1
9
//
10
private static final boolean[] VALIDATION_CODES = {
11
false, // SPACE
12
true, // ! // allowed
13
false, // "
14
false, // #
15
false, // $
16
true, // % // allowed
17
false, // &
18
true, // ' // allowed
19
true, // ( // allowed
20
true, // ) // allowed
21
true, // * // allowed
22
false, // +
23
false, // ,
24
true, // - // allowed
25
true, // . // allowed
26
false, // /
27
true, // 0 // allowed
28
true, // 1 // allowed
29
true, // 2 // allowed
30
true, // 3 // allowed
31
true, // 4 // allowed
32
true, // 5 // allowed
33
true, // 6 // allowed
34
true, // 7 // allowed
35
true, // 8 // allowed
36
true, // 9 // allowed
37
false, // :
38
false, // ;
39
false, // <
40
false, // =
41
false, // >
42
false, // ?
43
44
false, // @
45
true, // A // allowed
46
true, // B // allowed
47
true, // C // allowed
48
true, // D // allowed
49
true, // E // allowed
50
true, // F // allowed
51
true, // G // allowed
52
true, // H // allowed
53
true, // I // allowed
54
true, // J // allowed
55
true, // K // allowed
56
true, // L // allowed
57
true, // M // allowed
58
true, // N // allowed
59
true, // O // allowed
60
true, // P // allowed
61
true, // Q // allowed
62
true, // R // allowed
63
true, // S // allowed
64
true, // T // allowed
65
true, // U // allowed
66
true, // V // allowed
67
true, // W // allowed
68
true, // X // allowed
69
true, // Y // allowed
70
true, // Z // allowed
71
false, // [
72
false, // \
73
false, // ]
74
false, // ^
75
true, // _ // allowed
76
77
false, // `
78
true, // a // allowed
79
true, // b // allowed
80
true, // c // allowed
81
true, // d // allowed
82
true, // e // allowed
83
true, // f // allowed
84
true, // g // allowed
85
true, // h // allowed
86
true, // i // allowed
87
true, // j // allowed
88
true, // k // allowed
89
true, // l // allowed
90
true, // m // allowed
91
true, // n // allowed
92
true, // o // allowed
93
true, // p // allowed
94
true, // q // allowed
95
true, // r // allowed
96
true, // s // allowed
97
true, // t // allowed
98
true, // u // allowed
99
true, // v // allowed
100
true, // w // allowed
101
true, // x // allowed
102
true, // y // allowed
103
true, // z // allowed
104
false, // {
105
false, // |
106
false, // }
107
true, // ~ // allowed
108
false // DEL
109
};
110
111
private static boolean validateURIComponent(String text) {
112
int length = text.length();
113
for (int i = 0; i < length; ++i) {
114
int code = text.codePointAt(i);
115
if (code < 32 || code > 127 || !VALIDATION_CODES[code - 32]) {
116
return false;
117
}
118
}
119
return true;
120
}
121
122
public static void main(String[] args) {
123
124
// Usage example:
125
126
System.out.println(validateURIComponent("abcdefghijklmnopqrstuvwxyz")); // normal English letters
127
System.out.println(validateURIComponent("ABCDEFGHIJKLMNOPQRSTUVWXYZ")); // capital English letters
128
System.out.println(validateURIComponent("-_.!~*'()")); // not escaped special characters
129
System.out.println(validateURIComponent("%")); // escape character
130
131
System.out.println(validateURIComponent("@#$^&+=[]")); // should be escaped
132
System.out.println(validateURIComponent("ąćęłńóśźż")); // should be escaped
133
// etc.
134
}
135
}