You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
145 lines
5.9 KiB
145 lines
5.9 KiB
"use strict"; |
|
var __importDefault = (this && this.__importDefault) || function (mod) { |
|
return (mod && mod.__esModule) ? mod : { "default": mod }; |
|
}; |
|
Object.defineProperty(exports, "__esModule", { value: true }); |
|
exports.decodeXML = exports.decodeHTMLStrict = exports.decodeHTML = exports.determineBranch = exports.JUMP_OFFSET_BASE = exports.BinTrieFlags = exports.xmlDecodeTree = exports.htmlDecodeTree = void 0; |
|
var decode_data_html_1 = __importDefault(require("./generated/decode-data-html")); |
|
exports.htmlDecodeTree = decode_data_html_1.default; |
|
var decode_data_xml_1 = __importDefault(require("./generated/decode-data-xml")); |
|
exports.xmlDecodeTree = decode_data_xml_1.default; |
|
var decode_codepoint_1 = __importDefault(require("./decode_codepoint")); |
|
var BinTrieFlags; |
|
(function (BinTrieFlags) { |
|
BinTrieFlags[BinTrieFlags["HAS_VALUE"] = 32768] = "HAS_VALUE"; |
|
BinTrieFlags[BinTrieFlags["BRANCH_LENGTH"] = 32512] = "BRANCH_LENGTH"; |
|
BinTrieFlags[BinTrieFlags["MULTI_BYTE"] = 128] = "MULTI_BYTE"; |
|
BinTrieFlags[BinTrieFlags["JUMP_TABLE"] = 127] = "JUMP_TABLE"; |
|
})(BinTrieFlags = exports.BinTrieFlags || (exports.BinTrieFlags = {})); |
|
exports.JUMP_OFFSET_BASE = 48 /* ZERO */ - 1; |
|
function getDecoder(decodeTree) { |
|
return function decodeHTMLBinary(str, strict) { |
|
var ret = ""; |
|
var lastIdx = 0; |
|
var strIdx = 0; |
|
while ((strIdx = str.indexOf("&", strIdx)) >= 0) { |
|
ret += str.slice(lastIdx, strIdx); |
|
lastIdx = strIdx; |
|
// Skip the "&" |
|
strIdx += 1; |
|
// If we have a numeric entity, handle this separately. |
|
if (str.charCodeAt(strIdx) === 35 /* NUM */) { |
|
// Skip the leading "&#". For hex entities, also skip the leading "x". |
|
var start = strIdx + 1; |
|
var base = 10; |
|
var cp = str.charCodeAt(start); |
|
if ((cp | 32 /* To_LOWER_BIT */) === 120 /* LOWER_X */) { |
|
base = 16; |
|
strIdx += 1; |
|
start += 1; |
|
} |
|
while (((cp = str.charCodeAt(++strIdx)) >= 48 /* ZERO */ && |
|
cp <= 57 /* NINE */) || |
|
(base === 16 && |
|
(cp | 32 /* To_LOWER_BIT */) >= 97 /* LOWER_A */ && |
|
(cp | 32 /* To_LOWER_BIT */) <= 102 /* LOWER_F */)) |
|
; |
|
if (start !== strIdx) { |
|
var entity = str.substring(start, strIdx); |
|
var parsed = parseInt(entity, base); |
|
if (str.charCodeAt(strIdx) === 59 /* SEMI */) { |
|
strIdx += 1; |
|
} |
|
else if (strict) { |
|
continue; |
|
} |
|
ret += decode_codepoint_1.default(parsed); |
|
lastIdx = strIdx; |
|
} |
|
continue; |
|
} |
|
var result = null; |
|
var excess = 1; |
|
var treeIdx = 0; |
|
var current = decodeTree[treeIdx]; |
|
for (; strIdx < str.length; strIdx++, excess++) { |
|
treeIdx = determineBranch(decodeTree, current, treeIdx + 1, str.charCodeAt(strIdx)); |
|
if (treeIdx < 0) |
|
break; |
|
current = decodeTree[treeIdx]; |
|
// If the branch is a value, store it and continue |
|
if (current & BinTrieFlags.HAS_VALUE) { |
|
// If we have a legacy entity while parsing strictly, just skip the number of bytes |
|
if (strict && str.charCodeAt(strIdx) !== 59 /* SEMI */) { |
|
// No need to consider multi-byte values, as the legacy entity is always a single byte |
|
treeIdx += 1; |
|
} |
|
else { |
|
// If this is a surrogate pair, combine the higher bits from the node with the next byte |
|
result = |
|
current & BinTrieFlags.MULTI_BYTE |
|
? String.fromCharCode(decodeTree[++treeIdx], decodeTree[++treeIdx]) |
|
: String.fromCharCode(decodeTree[++treeIdx]); |
|
excess = 0; |
|
} |
|
} |
|
} |
|
if (result != null) { |
|
ret += result; |
|
lastIdx = strIdx - excess + 1; |
|
} |
|
} |
|
return ret + str.slice(lastIdx); |
|
}; |
|
} |
|
function determineBranch(decodeTree, current, nodeIdx, char) { |
|
if (current <= 128) { |
|
return char === current ? nodeIdx : -1; |
|
} |
|
var branchCount = (current & BinTrieFlags.BRANCH_LENGTH) >> 8; |
|
if (branchCount === 0) { |
|
return -1; |
|
} |
|
if (branchCount === 1) { |
|
return char === decodeTree[nodeIdx] ? nodeIdx + 1 : -1; |
|
} |
|
var jumpOffset = current & BinTrieFlags.JUMP_TABLE; |
|
if (jumpOffset) { |
|
var value = char - exports.JUMP_OFFSET_BASE - jumpOffset; |
|
return value < 0 || value > branchCount |
|
? -1 |
|
: decodeTree[nodeIdx + value] - 1; |
|
} |
|
// Binary search for the character. |
|
var lo = nodeIdx; |
|
var hi = lo + branchCount - 1; |
|
while (lo <= hi) { |
|
var mid = (lo + hi) >>> 1; |
|
var midVal = decodeTree[mid]; |
|
if (midVal < char) { |
|
lo = mid + 1; |
|
} |
|
else if (midVal > char) { |
|
hi = mid - 1; |
|
} |
|
else { |
|
return decodeTree[mid + branchCount]; |
|
} |
|
} |
|
return -1; |
|
} |
|
exports.determineBranch = determineBranch; |
|
var htmlDecoder = getDecoder(decode_data_html_1.default); |
|
var xmlDecoder = getDecoder(decode_data_xml_1.default); |
|
function decodeHTML(str) { |
|
return htmlDecoder(str, false); |
|
} |
|
exports.decodeHTML = decodeHTML; |
|
function decodeHTMLStrict(str) { |
|
return htmlDecoder(str, true); |
|
} |
|
exports.decodeHTMLStrict = decodeHTMLStrict; |
|
function decodeXML(str) { |
|
return xmlDecoder(str, true); |
|
} |
|
exports.decodeXML = decodeXML;
|
|
|