Skip to content

Commit

Permalink
feat(simd): add mag2/4, magsq2/4, move/extract inline fns, update tes…
Browse files Browse the repository at this point in the history
…ts, readme
  • Loading branch information
postspectacular committed Oct 28, 2019
1 parent d09f09e commit 00ce05b
Show file tree
Hide file tree
Showing 11 changed files with 126 additions and 10 deletions.
4 changes: 4 additions & 0 deletions packages/simd/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ for sources:
- `invsqrt4_f32` (!)
- `madd4_f32`
- `maddn4_f32`
- `mag2_f32_aos`
- `mag4_f32_aos`
- `magsq2_f32_aos`
- `magsq4_f32_aos`
- `max4_f32`
- `min4_f32`
- `mix4_f32`
Expand Down
6 changes: 3 additions & 3 deletions packages/simd/assembly/dot.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { hadd2_f32, hadd4_f32 } from "./hadd";
import { __hadd2_f32, __hadd4_f32 } from "./inline/hadd";

/**
* Takes two densely packed vec2 AOS buffers `a` and `b`, computes their
Expand All @@ -23,7 +23,7 @@ export function dot2_f32_aos(
const res = out;
num >>= 1;
for (; num-- > 0; ) {
const m = hadd2_f32(f32x4.mul(v128.load(a), v128.load(b)));
const m = __hadd2_f32(f32x4.mul(v128.load(a), v128.load(b)));
f32.store(out, f32x4.extract_lane(m, 0));
f32.store(out, f32x4.extract_lane(m, 2), 4);
out += 8;
Expand Down Expand Up @@ -62,7 +62,7 @@ export function dot4_f32_aos(
sb <<= 2;
// a1*b1 + a2*b2 + a3*b3 + a4*b4
for (; num-- > 0; ) {
f32.store(out, hadd4_f32(f32x4.mul(v128.load(a), v128.load(b))));
f32.store(out, __hadd4_f32(f32x4.mul(v128.load(a), v128.load(b))));
out += so;
a += sa;
b += sb;
Expand Down
2 changes: 2 additions & 0 deletions packages/simd/assembly/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ export * from "./clamp";
export * from "./dot";
export * from "./madd";
export * from "./maddn";
export * from "./mag";
export * from "./magsq";
export * from "./max";
export * from "./min";
export * from "./mix";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
*/
// @ts-ignore: decorator
@inline
export function hadd2_f32(v: v128): v128 {
export function __hadd2_f32(v: v128): v128 {
return f32x4.add(v, v128.shuffle<f32>(v, v, 1, 0, 3, 2));
}

Expand All @@ -23,7 +23,7 @@ export function hadd2_f32(v: v128): v128 {
*/
// @ts-ignore: decorator
@inline
export function hadd4_f32(v: v128): f32 {
export function __hadd4_f32(v: v128): f32 {
v = f32x4.add(v, v128.shuffle<f32>(v, v, 2, 3, 0, 1));
return f32x4.extract_lane(v, 0) + f32x4.extract_lane(v, 1);
}
13 changes: 13 additions & 0 deletions packages/simd/assembly/inline/magsq.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { __hadd2_f32, __hadd4_f32 } from "./hadd";

// @ts-ignore: decorator
@inline
export function __magsq2(v: v128): v128 {
return __hadd2_f32(f32x4.mul(v, v));
}

// @ts-ignore: decorator
@inline
export function __magsq4(v: v128): f32 {
return __hadd4_f32(f32x4.mul(v, v));
}
32 changes: 32 additions & 0 deletions packages/simd/assembly/mag.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import { __magsq2, __magsq4 } from "./inline/magsq";

export function mag2_f32_aos(out: usize, a: usize, num: usize): usize {
const res = out;
num >>= 1;
for (; num-- > 0; ) {
const v = __magsq2(v128.load(a));
f32.store(out, sqrt(f32x4.extract_lane(v, 0)));
f32.store(out, sqrt(f32x4.extract_lane(v, 2)), 4);
out += 8;
a += 16;
}
return res;
}

export function mag4_f32_aos(
out: usize,
a: usize,
num: usize,
so: usize,
sa: usize
): usize {
const res = out;
so <<= 2;
sa <<= 2;
for (; num-- > 0; ) {
f32.store(out, sqrt(__magsq4(v128.load(a))));
out += so;
a += sa;
}
return res;
}
32 changes: 32 additions & 0 deletions packages/simd/assembly/magsq.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import { __magsq2, __magsq4 } from "./inline/magsq";

export function magsq2_f32_aos(out: usize, a: usize, num: usize): usize {
const res = out;
num >>= 1;
for (; num-- > 0; ) {
const v = __magsq2(v128.load(a));
f32.store(out, f32x4.extract_lane(v, 0));
f32.store(out, f32x4.extract_lane(v, 2), 4);
out += 8;
a += 16;
}
return res;
}

export function magsq4_f32_aos(
out: usize,
a: usize,
num: usize,
so: usize,
sa: usize
): usize {
const res = out;
so <<= 2;
sa <<= 2;
for (; num-- > 0; ) {
f32.store(out, __magsq4(v128.load(a)));
out += so;
a += sa;
}
return res;
}
6 changes: 3 additions & 3 deletions packages/simd/assembly/normalize.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { hadd2_f32, hadd4_f32 } from "./hadd";
import { __magsq2, __magsq4 } from "./inline/magsq";

export function normalize2_f32_aos(
out: usize,
Expand All @@ -10,7 +10,7 @@ export function normalize2_f32_aos(
num >>= 1;
for (; num-- > 0; ) {
const v = v128.load(a);
let vm = hadd2_f32(f32x4.mul(v, v));
let vm = __magsq2(v);
const m1 = f32x4.extract_lane(vm, 0);
const m2 = f32x4.extract_lane(vm, 2);
vm = f32x4.replace_lane(
Expand Down Expand Up @@ -43,7 +43,7 @@ export function normalize4_f32_aos(
const res = out;
for (; num-- > 0; ) {
const v = v128.load(a);
const mag = hadd4_f32(f32x4.mul(v, v));
const mag = __magsq4(v);
v128.store(
out,
mag > f32.EPSILON
Expand Down
4 changes: 2 additions & 2 deletions packages/simd/assembly/sum.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { hadd4_f32 } from "./hadd";
import { __hadd4_f32 } from "./inline/hadd";

export function sum4_f32(a: usize, num: usize, sa: usize): f32 {
sa <<= 2;
Expand All @@ -7,5 +7,5 @@ export function sum4_f32(a: usize, num: usize, sa: usize): f32 {
acc = f32x4.add(acc, v128.load(a));
a += sa;
}
return hadd4_f32(acc);
return __hadd4_f32(acc);
}
10 changes: 10 additions & 0 deletions packages/simd/src/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,16 @@ export interface SIMD {
// prettier-ignore
maddn4_f32(out: number, a: number, b: number, c: number, num: number, so: number, sa: number, sc: number): number;

mag2_f32_aos(out: number, a: number, num: number): number;

magsq2_f32_aos(out: number, a: number, num: number): number;

// prettier-ignore
mag4_f32_aos( out: number, a: number, num: number, so: number, sa: number): number;

// prettier-ignore
magsq4_f32_aos( out: number, a: number, num: number, so: number, sa: number): number;

// prettier-ignore
max4_f32(out: number, a: number, b: number, num: number, so: number, sa: number, sb: number): number;

Expand Down
23 changes: 23 additions & 0 deletions packages/simd/test/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,29 @@ simd.msubn4_f32(1024, 0, 11, 32, 2, 4, 4, 4);
// prettier-ignore
assertEqual(res_f32(1024, 8), [-89, -178, -267, -356, -445, -534, -623, -712]);

// magsq2
// magsq4
simd.f32.set([1, 2, 10, 20, -100, 200, 100, -200]);
simd.magsq2_f32_aos(1024, 0, 4);
assertEqualDelta(res_f32(1024, 4), [
1 * 1 + 2 * 2,
10 * 10 + 20 * 20,
100 * 100 + 200 * 200,
100 * 100 + 200 * 200
]);
simd.mag2_f32_aos(1024, 0, 4);
assertEqualDelta(res_f32(1024, 4), [
Math.sqrt(5),
Math.sqrt(500),
Math.sqrt(50000),
Math.sqrt(50000)
]);

simd.magsq4_f32_aos(1024, 0, 2, 1, 4);
assertEqualDelta(res_f32(1024, 2), [505, 100000]);
simd.mag4_f32_aos(1024, 0, 2, 1, 4);
assertEqualDelta(res_f32(1024, 2), [Math.sqrt(505), Math.sqrt(100000)]);

// mix4_f32
// mixn4_f32
// prettier-ignore
Expand Down

0 comments on commit 00ce05b

Please sign in to comment.