Skip to content

Commit

Permalink
feat(rle-pack): update data format, custom repeat sizes, rename fns
Browse files Browse the repository at this point in the history
- add support for custom run-length group thresholds
- rename => encode() / decode()
- decode() returns Uint8/16/32Array based on given word size
- add encode() error handling (arg checks)
- update tests & readme
- add diagram

BREAKING CHANGE: new API and encoding format, see readme
for details
  • Loading branch information
postspectacular committed Aug 24, 2018
1 parent 89b4ad5 commit 694a253
Show file tree
Hide file tree
Showing 6 changed files with 114 additions and 41 deletions.
31 changes: 31 additions & 0 deletions assets/dot/rle-layout.dot
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
digraph g {
fontname=Inconsolata;
rankdir=LR;
node[shape=square,width=1.5,fontname=Inconsolata];

subgraph cluster0 {
label="header";
style="filled";
bgcolor="#eeeeee";
num[label="num values\n(32 bits)"];
wordsize[label="word size\n(5 bits)"];
rlesizes[label="RLE sizes\n(4x4 bits)"];
}

subgraph cluster1 {
label="each value";
bgcolor="#cccccc";
flag[label="RLE flag\n(1 bit)"];
val[label="value\n(word size)"];

subgraph cluster1b {
bgcolor="#aaaaaa";
label="repeats only";
repeatid[label="repeat ID\n(2 bits)"];
repeat[label="repeats\n(varying)"];
}
}

num -> wordsize -> rlesizes -> flag -> val -> repeatid -> repeat;

}
Binary file added assets/rle-layout.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
41 changes: 29 additions & 12 deletions packages/rle-pack/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,31 @@ This project is part of the

## About

Binary run-length encoding packer/unpacker w/ support for flexible input
word sizes and repeat bit widths.
Binary run-length encoding packer/unpacker w/ support for customizable
input word sizes (1 - 32 bits) and repeat count (run-length) bit sizes
(1 - 16 bits). The encoder uses 4 different repeat group sizes
(thresholds) to minimize the number of bits used to store the run
lengths. The range of supported run lengths is 16 bits (i.e. 65536
repetitions). If a value is repeated more often than that, the remainder
will be encoded using additional RLE chunks...

Encoding format:
### Encoding format

- 32 bits - original size in bytes (header)
![data layout](https://github.com/thi-ng/umbrella/tree/master/packages/assets/rle-layout.png)

- 32 bits - original number of words
- 5 bits - word size
- 16 bits - RLE repeat group bit sizes (default: 3, 4, 8, 16)

Then per value:

- 1 bit - encoding flag (1 = RLE encoded, 0 = single occurrence)
- n bits - value

The following is only used for repeated values:
The following are only used for repeated values:

- 2 bits - repeat class
- 3/4/8/16 bits - repeat count - 1 (if > 0x10000 then split into chunks...)
- m bits - repeat count - 1 (if greater than max group size then split into chunks...)

Brief overview for 8-bit word size (default):

Expand Down Expand Up @@ -63,15 +72,23 @@ let rle = require("@thi.ng/rle-pack");
src = new Uint8Array(1024);
src.set([1,1,1,1,1,2,2,2,2,3,3,3,4,4,5,4,4,3,3,3,2,2,2,2,1,1,1,1,1], 512);

// pack data (word size = 3 bits, i.e. value range 0 - 7)
packed = rle.encodeBytes(src, src.length, 3);
// Uint8Array [0, 0, 4, 0, 140, 7, 254, 73, 67, 177, 96, 87, 3, 98, 161, 201, 35, 1, 226]
// pack data
packed = rle.encode(src, src.length);
// Uint8Array [0,0,4,0,65,27,252,3,1,255,128,146,4,56,24,160,129,2,193,3,3,20,8,112,18,64,48,30,32]

packed.length
// 19 => 1.85% of original
// 29 => 2.83% of original

// pack with custom word size (3 bits, i.e. our value range is only 0-7)
// and use custom repeat group sizes suitable for our data
alt = rle.encode(src, src.length, 3, [1, 2, 3, 9]);
// Uint8Array [0,0,4,0,24,9,68,127,249,165,61,182,21,195,109,79,52,143,196]

alt.length
// 19 => 1.85% of original, ~65% of default config

// unpack
unpacked = rle.decodeBytes(packed, 3);
unpacked = new Uint8Array(rle.decode(alt));
```

## Authors
Expand All @@ -80,4 +97,4 @@ unpacked = rle.decodeBytes(packed, 3);

## License

© 2017 Karsten Schmidt // Apache Software License 2.0
© 2017 - 2018 Karsten Schmidt // Apache Software License 2.0
5 changes: 3 additions & 2 deletions packages/rle-pack/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
"typescript": "^3.0.1"
},
"dependencies": {
"@thi.ng/bitstream": "^0.4.15"
"@thi.ng/bitstream": "^0.4.15",
"@thi.ng/errors": "^0.1.6"
},
"keywords": [
"binary",
Expand All @@ -42,4 +43,4 @@
"publishConfig": {
"access": "public"
}
}
}
61 changes: 40 additions & 21 deletions packages/rle-pack/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,29 +1,38 @@
import { BitInputStream, BitOutputStream } from "@thi.ng/bitstream";
import { illegalArgs } from "@thi.ng/errors/illegal-arguments";

const RLE_SIZES = [3, 4, 8, 16];
export type RLESizes = [number, number, number, number];

/**
* Compresses input using dynamically sized RLE compression and returns
* result as `Uint8Array`.
*
* @param src
* @param num number of input words
* @param wordSize in bits, MUST be <= 8
* @param wordSize in bits, range 1 - 32
*/
export function encodeBytes(src: Iterable<number>, num: number, wordSize = 8) {
const stream = new BitOutputStream(Math.ceil(num * wordSize / 8)).write(num, 32);
export function encode(src: Iterable<number>, num: number, wordSize = 8, rleSizes: RLESizes = [3, 4, 8, 16]) {
(wordSize < 1 || wordSize > 32) && illegalArgs("word size (1-32 bits only)");
const out = new BitOutputStream(Math.ceil(num * wordSize / 8) + 4 + 2 + 1)
.write(num, 32)
.write(wordSize, 5);
rleSizes.forEach((x) => {
(x < 1 || x > 16) && illegalArgs("RLE repeat size (1-16 bits only)");
out.write(x - 1, 4);
});
const [rle0, rle1, rle2, rle3] = rleSizes.map((x) => 1 << x);
const n1 = num - 1;
let val;
let tail = true;
let n = 0;
let i = 0;
const write = () => {
stream.write(n > 0 ? 1 : 0, 1);
stream.write(val, wordSize);
out.write(n > 0 ? 1 : 0, 1);
out.write(val, wordSize);
if (n > 0) {
const t = (n < 0x8) ? 0 : (n < 0x10) ? 1 : (n < 0x100) ? 2 : 3;
stream.write(t, 2);
stream.write(n, RLE_SIZES[t]);
const t = (n < rle0) ? 0 : (n < rle1) ? 1 : (n < rle2) ? 2 : 3;
out.write(t, 2);
out.write(n, rleSizes[t]);
n = 0;
}
};
Expand All @@ -34,37 +43,47 @@ export function encodeBytes(src: Iterable<number>, num: number, wordSize = 8) {
write();
val = x;
} else {
if (++n === 0x10000) {
if (++n === rle3) {
n--;
write();
tail = (i < n1);
}
}
if (i === n1) {
break;
}
i++;
}
if (tail) {
write();
}
return stream.bytes();
return out.bytes();
}

export function decodeBytes(src: Uint8Array, wordSize = 8) {
export function decode(src: Uint8Array) {
const input = new BitInputStream(src);
const ws1 = wordSize + 1;
const num = input.read(32);
const out = new Uint8Array(num);
const flag = 1 << wordSize;
const mask = flag - 1;
const wordSize = input.read(5);
const rleSizes = [0, 0, 0, 0].map(() => input.read(4) + 1);
const out = arrayForWordSize(wordSize, num);
let x, j;
for (let i = 0; i < num;) {
x = input.read(ws1);
if (x & flag) {
j = i + 1 + input.read(RLE_SIZES[input.read(2)]);
out.fill(x & mask, i, j);
if (input.readBit()) {
x = input.read(wordSize);
j = i + 1 + input.read(rleSizes[input.read(2)]);
out.fill(x, i, j);
i = j;
} else {
out[i++] = x & mask;
out[i++] = input.read(wordSize);
}
}
return out;
}

const arrayForWordSize = (ws: number, n: number) => {
return new (ws < 9 ?
Uint8Array :
ws < 17 ?
Uint16Array :
Uint32Array)(n);
}
17 changes: 11 additions & 6 deletions packages/rle-pack/test/index.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
import * as assert from "assert";
import { encodeBytes, decodeBytes } from "../src/index";
import { encode, decode } from "../src/index";

const src = new Uint8Array(1024);
src.set([1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 4, 4, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 1], 512);

describe("rle-pack", () => {
it("3bit", () => {
const src = new Uint8Array(1024);
src.set([1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 4, 4, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 1], 512);
const packed = encodeBytes(src, src.length, 3);
assert.deepEqual(packed, [0, 0, 4, 0, 140, 7, 254, 73, 67, 177, 96, 87, 3, 98, 161, 201, 35, 1, 226]);
const dest = decodeBytes(packed, 3);
let packed = encode(src, src.length, 3);
assert.deepEqual(packed, [0, 0, 4, 0, 25, 27, 252, 96, 63, 242, 74, 29, 139, 2, 184, 27, 21, 14, 73, 24, 15, 16]);
let dest = decode(packed);
assert.deepEqual(dest, src);
packed = encode(src, src.length, 3, [1, 2, 4, 9]);
assert.deepEqual(packed, [0, 0, 4, 0, 24, 9, 196, 127, 249, 146, 158, 219, 10, 225, 182, 167, 153, 35, 241, 0]);
dest = decode(packed);
assert.deepEqual(dest, src);
});
});

0 comments on commit 694a253

Please sign in to comment.