Languages
[Edit]
EN

Java - validate encoded URI component characters

7 points
Created by:
Burhan-Boyce
358

In this short article, we would like to show how to check if some text is correctly encoded URI component using Java.

Note: to know what characters should be escaped encoding URI component check this article.

Quick solution:

package example;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Example {

    private static final Pattern VALIDATION_PATTERN = Pattern.compile("^[0-9a-zA-Z-_.!~*'()%]*$");

    private static boolean validateURIComponent(String text) {
        Matcher matcher = VALIDATION_PATTERN.matcher(text);
        return matcher.matches();
    }

    public static void main(String []args) {

        // Usage example:
        
        System.out.println(validateURIComponent("abcdefghijklmnopqrstuvwxyz"));  // normal English letters
        System.out.println(validateURIComponent("ABCDEFGHIJKLMNOPQRSTUVWXYZ"));  // capital English letters
        System.out.println(validateURIComponent("-_.!~*'()"));                   // not escaped special characters
        System.out.println(validateURIComponent("%"));                           // escape character
        
        System.out.println(validateURIComponent("@#$^&+=[]"));   // should be escaped
        System.out.println(validateURIComponent("ąćęłńóśźż"));   // should be escaped
                                                                 // etc.
    }
}

 

Optimal solution

This section contains a solution that does not use regular expressions what makes the below source code optimal (faster in executing).

Program.java file:

package logic;

public class Program {

	public static void main(String[] args) {

		// Usage example:

		System.out.println(URIComponentUtils.validateURIComponent("abcdefghijklmnopqrstuvwxyz"));  // normal English letters
		System.out.println(URIComponentUtils.validateURIComponent("ABCDEFGHIJKLMNOPQRSTUVWXYZ"));  // capital English letters
		System.out.println(URIComponentUtils.validateURIComponent("-_.!~*'()"));                   // not escaped special characters
		System.out.println(URIComponentUtils.validateURIComponent("%"));                           // escape character

		System.out.println(URIComponentUtils.validateURIComponent("@#$^&+=[]"));   // should be escaped
		System.out.println(URIComponentUtils.validateURIComponent("ąćęłńóśźż"));   // should be escaped
		                                                         // etc.
	}
}

URIComponentUtils.java file:

package logic;

public class URIComponentUtils {

	// Validation pattern:  ^[0-9a-zA-Z-_.!~*'()%]*$
	//
	// https://dirask.com/posts/ASCII-Table-pJ3Y0j
	// https://dirask.com/posts/JavaScript-encode-escape-URL-characters-D7ZJo1
	//
	private static final boolean[] VALIDATION_CODES = {
			false,  // SPACE
			true,   //   !     // allowed
			false,  //   "
			false,  //   #
			false,  //   $
			true,   //   %    // allowed
			false,  //   &
			true,   //   '    // allowed
			true,   //   (    // allowed
			true,   //   )    // allowed
			true,   //   *    // allowed
			false,  //   +
			false,  //   ,
			true,   //   -    // allowed
			true,   //   .    // allowed
			false,  //   /
			true,   //   0    // allowed
			true,   //   1    // allowed
			true,   //   2    // allowed
			true,   //   3    // allowed
			true,   //   4    // allowed
			true,   //   5    // allowed
			true,   //   6    // allowed
			true,   //   7    // allowed
			true,   //   8    // allowed
			true,   //   9    // allowed
			false,  //   :
			false,  //   ;
			false,  //   <
			false,  //   =
			false,  //   >
			false,  //   ?

			false,  //   @
			true,   //   A    // allowed
			true,   //   B    // allowed
			true,   //   C    // allowed
			true,   //   D    // allowed
			true,   //   E    // allowed
			true,   //   F    // allowed
			true,   //   G    // allowed
			true,   //   H    // allowed
			true,   //   I    // allowed
			true,   //   J    // allowed
			true,   //   K    // allowed
			true,   //   L    // allowed
			true,   //   M    // allowed
			true,   //   N    // allowed
			true,   //   O    // allowed
			true,   //   P    // allowed
			true,   //   Q    // allowed
			true,   //   R    // allowed
			true,   //   S    // allowed
			true,   //   T    // allowed
			true,   //   U    // allowed
			true,   //   V    // allowed
			true,   //   W    // allowed
			true,   //   X    // allowed
			true,   //   Y    // allowed
			true,   //   Z    // allowed
			false,  //   [
			false,  //   \
			false,  //   ]
			false,  //   ^
			true,   //   _    // allowed

			false,  //   `
			true,   //   a    // allowed
			true,   //   b    // allowed
			true,   //   c    // allowed
			true,   //   d    // allowed
			true,   //   e    // allowed
			true,   //   f    // allowed
			true,   //   g    // allowed
			true,   //   h    // allowed
			true,   //   i    // allowed
			true,   //   j    // allowed
			true,   //   k    // allowed
			true,   //   l    // allowed
			true,   //   m    // allowed
			true,   //   n    // allowed
			true,   //   o    // allowed
			true,   //   p    // allowed
			true,   //   q    // allowed
			true,   //   r    // allowed
			true,   //   s    // allowed
			true,   //   t    // allowed
			true,   //   u    // allowed
			true,   //   v    // allowed
			true,   //   w    // allowed
			true,   //   x    // allowed
			true,   //   y    // allowed
			true,   //   z    // allowed
			false,  //   {
			false,  //   |
			false,  //   }
			true,   //   ~    // allowed
			false   //  DEL
	};

	private static boolean validateURIComponent(String text) {
		int length = text.length();
		for (int i = 0; i < length; ++i) {
			int code = text.codePointAt(i);
			if (code < 32 || code > 127 || !VALIDATION_CODES[code - 32]) {
				return false;
			}
		}
		return true;
	}

	public static void main(String[] args) {

		// Usage example:

		System.out.println(validateURIComponent("abcdefghijklmnopqrstuvwxyz"));  // normal English letters
		System.out.println(validateURIComponent("ABCDEFGHIJKLMNOPQRSTUVWXYZ"));  // capital English letters
		System.out.println(validateURIComponent("-_.!~*'()"));                   // not escaped special characters
		System.out.println(validateURIComponent("%"));                           // escape character

		System.out.println(validateURIComponent("@#$^&+=[]"));   // should be escaped
		System.out.println(validateURIComponent("ąćęłńóśźż"));   // should be escaped
		                                                         // etc.
	}
}

See also

  1. Java - encodeURIComponent equivalent in JavaScript

  2. Java - decodeURIComponent equivalent in JavaScript

Native Advertising
🚀
Get your tech brand or product in front of software developers.
For more information Contact us
Dirask - we help you to
solve coding problems.
Ask question.

❤️💻 🙂

Join