56 lines
1.3 KiB
JavaScript
56 lines
1.3 KiB
JavaScript
'use strict'
|
|
|
|
module.exports = function encodeUtf8 (input) {
|
|
var result = []
|
|
var size = input.length
|
|
|
|
for (var index = 0; index < size; index++) {
|
|
var point = input.charCodeAt(index)
|
|
|
|
if (point >= 0xD800 && point <= 0xDBFF && size > index + 1) {
|
|
var second = input.charCodeAt(index + 1)
|
|
|
|
if (second >= 0xDC00 && second <= 0xDFFF) {
|
|
// https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
|
|
point = (point - 0xD800) * 0x400 + second - 0xDC00 + 0x10000
|
|
index += 1
|
|
}
|
|
}
|
|
|
|
// US-ASCII
|
|
if (point < 0x80) {
|
|
result.push(point)
|
|
continue
|
|
}
|
|
|
|
// 2-byte UTF-8
|
|
if (point < 0x800) {
|
|
result.push((point >> 6) | 192)
|
|
result.push((point & 63) | 128)
|
|
continue
|
|
}
|
|
|
|
// 3-byte UTF-8
|
|
if (point < 0xD800 || (point >= 0xE000 && point < 0x10000)) {
|
|
result.push((point >> 12) | 224)
|
|
result.push(((point >> 6) & 63) | 128)
|
|
result.push((point & 63) | 128)
|
|
continue
|
|
}
|
|
|
|
// 4-byte UTF-8
|
|
if (point >= 0x10000 && point <= 0x10FFFF) {
|
|
result.push((point >> 18) | 240)
|
|
result.push(((point >> 12) & 63) | 128)
|
|
result.push(((point >> 6) & 63) | 128)
|
|
result.push((point & 63) | 128)
|
|
continue
|
|
}
|
|
|
|
// Invalid character
|
|
result.push(0xEF, 0xBF, 0xBD)
|
|
}
|
|
|
|
return new Uint8Array(result).buffer
|
|
}
|