
JavaScript - convert string to bytes array (UTF-8)

In this short article, we would like to show, how using JavaScript, convert string to UTF-8 bytes array.


Practical examples

1. Custom solution

This solution works under older web borsers and Node.js.

// ONLINE-RUNNER:browser;

const toBytes = (text) => {
	const surrogate = encodeURIComponent(text);
  	const result = [];
    for (let i = 0; i < surrogate.length;) {
        const character = surrogate[i];
		i += 1;
        if (character === '%') {
        	const hex = surrogate.substring(i, i += 2);
			if (hex) {
				result.push(parseInt(hex, 16));
        } else {
    return result;

// Usage example:

const bytes = toBytes('Some text here...'); // converts string to UTF-8 bytes

console.log(bytes);  // [83, 111, 109, 101, 32, 116, 101, 120, 116, 32, 104, 101, 114, 101, 46, 46, 46]


2. Embedded solution

This solution appeard in the major web browsers around 2014-2020 and in Node.js v11.

// ONLINE-RUNNER:browser;

const encoder = new TextEncoder('UTF-8');

const toBytes = (text) => {
  	return encoder.encode(text);

// Usage example:

const bytes = toBytes('Some text here...'); // converts string to UTF-8 bytes

console.log(bytes);  // [83, 111, 109, 101, 32, 116, 101, 120, 116, 32, 104, 101, 114, 101, 46, 46, 46]


3. Optimal solution

This solution has quite good performance, it works in older web browsers and Node.js.

// ONLINE-RUNNER:browser;

const toBytes = (text) => {
    const result = [];
    for (let i = 0; i < text.length; i += 1) {
        const hi = text.charCodeAt(i);
        if (hi < 0x0080) {
            // code point range: U+0000 - U+007F
            // bytes: 0xxxxxxx
        if (hi < 0x0800) {
            // code point range: U+0080 - U+07FF
            // bytes: 110xxxxx 10xxxxxx
            result.push(0xC0 | hi >> 6,
                        0x80 | hi       & 0x3F);
        if (hi < 0xD800 || hi >= 0xE000 ) {
            // code point range: U+0800 - U+FFFF
            // bytes: 1110xxxx 10xxxxxx 10xxxxxx	
            result.push(0xE0 | hi >> 12,
                        0x80 | hi >>  6 & 0x3F,
                        0x80 | hi       & 0x3F);
        i += 1;
        if (i < text.length) {
            // surrogate pair
            const lo = text.charCodeAt(i);
            const code = 0x00010000 + (hi & 0x03FF) << 10 | lo & 0x03FF;
            // code point range: U+10000 - U+10FFFF
            // bytes: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
            result.push(0xF0 | code >> 18,
                        0x80 | code >> 12 & 0x3F,
                        0x80 | code >>  6 & 0x3F,
                        0x80 | code       & 0x3F);
        } else {
    return result;

// Usage example:

const bytes = toBytes('Some text here...'); // converts string to UTF-8 bytes

console.log(bytes);  // [83, 111, 109, 101, 32, 116, 101, 120, 116, 32, 104, 101, 114, 101, 46, 46, 46]


