Skip to content

Commit

Permalink
use output buffer
Browse files Browse the repository at this point in the history
  • Loading branch information
samthor committed Apr 28, 2020
1 parent 0f6c69c commit f8a589e
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 42 deletions.
78 changes: 43 additions & 35 deletions text.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ FastTextEncoder.prototype['encode'] = function(string, options={stream: false})
const len = string.length;

let at = 0; // output position
let tlen = Math.max(32, len + (len >> 1) + 7); // 1.5x size
let target = new Uint8Array((tlen >> 3) << 3); // ... but at 8 byte offset
let tlen = Math.max(32, len + (len >>> 1) + 7); // 1.5x size
let target = new Uint8Array((tlen >>> 3) << 3); // ... but at 8 byte offset

while (pos < len) {
let value = string.charCodeAt(pos++);
Expand All @@ -78,7 +78,7 @@ FastTextEncoder.prototype['encode'] = function(string, options={stream: false})
if (at + 4 > target.length) {
tlen += 8; // minimum extra
tlen *= (1.0 + (pos / string.length) * 2); // take 2x the remaining
tlen = (tlen >> 3) << 3; // 8 byte offset
tlen = (tlen >>> 3) << 3; // 8 byte offset

const update = new Uint8Array(tlen);
update.set(target);
Expand All @@ -89,14 +89,14 @@ FastTextEncoder.prototype['encode'] = function(string, options={stream: false})
target[at++] = value; // ASCII
continue;
} else if ((value & 0xfffff800) === 0) { // 2-byte
target[at++] = ((value >> 6) & 0x1f) | 0xc0;
target[at++] = ((value >>> 6) & 0x1f) | 0xc0;
} else if ((value & 0xffff0000) === 0) { // 3-byte
target[at++] = ((value >> 12) & 0x0f) | 0xe0;
target[at++] = ((value >> 6) & 0x3f) | 0x80;
target[at++] = ((value >>> 12) & 0x0f) | 0xe0;
target[at++] = ((value >>> 6) & 0x3f) | 0x80;
} else if ((value & 0xffe00000) === 0) { // 4-byte
target[at++] = ((value >> 18) & 0x07) | 0xf0;
target[at++] = ((value >> 12) & 0x3f) | 0x80;
target[at++] = ((value >> 6) & 0x3f) | 0x80;
target[at++] = ((value >>> 18) & 0x07) | 0xf0;
target[at++] = ((value >>> 12) & 0x3f) | 0x80;
target[at++] = ((value >>> 6) & 0x3f) | 0x80;
} else {
continue; // out of range
}
Expand Down Expand Up @@ -142,6 +142,7 @@ FastTextDecoder.prototype['decode'] = function(buffer, options={stream: false})

// Accept Uint8Array instances as-is.
let bytes = buffer;
let inputIndex = 0;

// Look for ArrayBufferView, which isn't a real type, but basically represents
// all the valid TypedArray types plus DataView. They all have ".buffer" as
Expand All @@ -150,59 +151,66 @@ FastTextDecoder.prototype['decode'] = function(buffer, options={stream: false})
bytes = new Uint8Array(buffer.buffer);
}

let pos = 0;
let pending = [];
// Create a working buffer for UTF-16 code points, but don't generate one
// which is too large for small input sizes. UTF-8 to UCS-16 conversion is
// going to be at most 1:1, if all code points are ASCII. The other extreme
// is 4-byte UTF-8, which results in two UCS-16 points, but this is still 50%
// fewer entries in the output.
const pendingSize = Math.min(256 * 256, buffer.length + 1);
const pending = new Uint16Array(pendingSize);
const chunks = [];
let pendingIndex = 0;

for (;;) {
const more = pos < bytes.length;
const more = inputIndex < bytes.length;

// If there's no more data or we're >65k bytes, create a chunk.
// This isn't done at the end by simply slicing the data into equal sized
// chunks as we might hit a surrogate pair.
if (!more || (pos & 0x10000)) {
chunks.push(String.fromCharCode.apply(null, pending));
// If there's no more data or there'd be no room for two UTF-16 values,
// create a chunk. This isn't done at the end by simply slicing the data
// into equal sized chunks as we might hit a surrogate pair.
if (!more || (pendingIndex >= pendingSize - 1)) {
// nb. .apply and friends are *really slow*. Low-hanging fruit is to
// expand this to literally pass pending[0], pending[1], ... etc, but
// the output code expands pretty fast in this case.
chunks.push(String.fromCharCode.apply(null, pending.subarray(0, pendingIndex)));

if (!more) {
return chunks.join('');
}

// Move the buffer forward and create another chunk.
pending = [];
bytes = bytes.subarray(pos);
pos = 0;
bytes = bytes.subarray(inputIndex);
inputIndex = 0;
pendingIndex = 0;
}

// The native TextDecoder will generate "REPLACEMENT CHARACTER" where the
// input data is invalid. Here, we blindly parse the data even if it's
// wrong: e.g., if a 3-byte sequence doesn't have two valid continuations.

const byte1 = bytes[pos++];
if (byte1 === 0) {
pending.push(0);
} else if ((byte1 & 0x80) === 0) { // 1-byte
pending.push(byte1);
const byte1 = bytes[inputIndex++];
if ((byte1 & 0x80) === 0) { // 1-byte or null
pending[pendingIndex++] = byte1;
} else if ((byte1 & 0xe0) === 0xc0) { // 2-byte
const byte2 = bytes[pos++] & 0x3f;
pending.push(((byte1 & 0x1f) << 6) | byte2);
const byte2 = bytes[inputIndex++] & 0x3f;
pending[pendingIndex++] = ((byte1 & 0x1f) << 6) | byte2;
} else if ((byte1 & 0xf0) === 0xe0) { // 3-byte
const byte2 = bytes[pos++] & 0x3f;
const byte3 = bytes[pos++] & 0x3f;
pending.push(((byte1 & 0x1f) << 12) | (byte2 << 6) | byte3);
const byte2 = bytes[inputIndex++] & 0x3f;
const byte3 = bytes[inputIndex++] & 0x3f;
pending[pendingIndex++] = ((byte1 & 0x1f) << 12) | (byte2 << 6) | byte3;
} else if ((byte1 & 0xf8) === 0xf0) { // 4-byte
const byte2 = bytes[pos++] & 0x3f;
const byte3 = bytes[pos++] & 0x3f;
const byte4 = bytes[pos++] & 0x3f;
const byte2 = bytes[inputIndex++] & 0x3f;
const byte3 = bytes[inputIndex++] & 0x3f;
const byte4 = bytes[inputIndex++] & 0x3f;

// this can be > 0xffff, so possibly generate surrogates
let codepoint = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0c) | (byte3 << 0x06) | byte4;
if (codepoint > 0xffff) {
// codepoint &= ~0x10000;
codepoint -= 0x10000;
pending.push((codepoint >>> 10) & 0x3ff | 0xd800);
pending[pendingIndex++] = (codepoint >>> 10) & 0x3ff | 0xd800;
codepoint = 0xdc00 | codepoint & 0x3ff;
}
pending.push(codepoint);
pending[pendingIndex++] = codepoint;
} else {
// invalid initial byte
}
Expand Down
10 changes: 5 additions & 5 deletions text.min.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit f8a589e

Please sign in to comment.