Skip to content

Commit

Permalink
vector: use asm opcode mnemonics
Browse files Browse the repository at this point in the history
There's no change in the binary output, just less mystery in the asm.

These mnemonics were introduced in Go 1.10:
https://golang.org/doc/go1.10#asm and https://golang.org/cl/75490

Current stable release (as of 2018-11-10) is Go 1.11, and
https://golang.org/doc/devel/release.html#policy says that Go 1.9 and
below are therefore no longer supported.

Change-Id: I1f9a63521bc8d5e8f8d395605f62bf7fb6a63bc5
Reviewed-on: https://go-review.googlesource.com/c/148997
Reviewed-by: Dmitri Shuralyov <[email protected]>
  • Loading branch information
egorovcharenko9 committed Jan 8, 2024
1 parent 8d4bcf7 commit 0461995
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 98 deletions.
106 changes: 26 additions & 80 deletions vector/acc_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -139,17 +139,9 @@ fxAccOpOverLoop4:
// y = abs(x)
// y >>= 2 // Shift by 2*ϕ - 16.
// y = min(y, fxAlmost65536)
//
// pabsd %xmm1,%xmm2
// psrld $0x2,%xmm2
// pminud %xmm5,%xmm2
//
// Hopefully we'll get these opcode mnemonics into the assembler for Go
// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
// it's similar.
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
PABSD X1, X2
PSRLL $2, X2
PMINUD X5, X2

// z = convertToInt32(y)
// No-op.
Expand Down Expand Up @@ -182,13 +174,10 @@ fxAccOpOverLoop4:
PSRLQ $32, X11

// Multiply by magic, shift by magic.
//
// pmuludq %xmm10,%xmm0
// pmuludq %xmm10,%xmm11
BYTE $0x66; BYTE $0x41; BYTE $0x0f; BYTE $0xf4; BYTE $0xc2
BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0xf4; BYTE $0xda
PSRLQ $47, X0
PSRLQ $47, X11
PMULULQ X10, X0
PMULULQ X10, X11
PSRLQ $47, X0
PSRLQ $47, X11

// Merge the two registers back to one, X11, and add maskA.
PSLLQ $32, X11
Expand Down Expand Up @@ -223,17 +212,9 @@ fxAccOpOverLoop1:
// y = abs(x)
// y >>= 2 // Shift by 2*ϕ - 16.
// y = min(y, fxAlmost65536)
//
// pabsd %xmm1,%xmm2
// psrld $0x2,%xmm2
// pminud %xmm5,%xmm2
//
// Hopefully we'll get these opcode mnemonics into the assembler for Go
// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
// it's similar.
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
PABSD X1, X2
PSRLL $2, X2
PMINUD X5, X2

// z = convertToInt32(y)
// No-op.
Expand Down Expand Up @@ -346,17 +327,9 @@ fxAccOpSrcLoop4:
// y = abs(x)
// y >>= 2 // Shift by 2*ϕ - 16.
// y = min(y, fxAlmost65536)
//
// pabsd %xmm1,%xmm2
// psrld $0x2,%xmm2
// pminud %xmm5,%xmm2
//
// Hopefully we'll get these opcode mnemonics into the assembler for Go
// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
// it's similar.
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
PABSD X1, X2
PSRLL $2, X2
PMINUD X5, X2

// z = convertToInt32(y)
// No-op.
Expand Down Expand Up @@ -390,17 +363,9 @@ fxAccOpSrcLoop1:
// y = abs(x)
// y >>= 2 // Shift by 2*ϕ - 16.
// y = min(y, fxAlmost65536)
//
// pabsd %xmm1,%xmm2
// psrld $0x2,%xmm2
// pminud %xmm5,%xmm2
//
// Hopefully we'll get these opcode mnemonics into the assembler for Go
// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
// it's similar.
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
PABSD X1, X2
PSRLL $2, X2
PMINUD X5, X2

// z = convertToInt32(y)
// No-op.
Expand Down Expand Up @@ -492,17 +457,9 @@ fxAccMaskLoop4:
// y = abs(x)
// y >>= 2 // Shift by 2*ϕ - 16.
// y = min(y, fxAlmost65536)
//
// pabsd %xmm1,%xmm2
// psrld $0x2,%xmm2
// pminud %xmm5,%xmm2
//
// Hopefully we'll get these opcode mnemonics into the assembler for Go
// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
// it's similar.
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
PABSD X1, X2
PSRLL $2, X2
PMINUD X5, X2

// z = convertToInt32(y)
// No-op.
Expand Down Expand Up @@ -534,17 +491,9 @@ fxAccMaskLoop1:
// y = abs(x)
// y >>= 2 // Shift by 2*ϕ - 16.
// y = min(y, fxAlmost65536)
//
// pabsd %xmm1,%xmm2
// psrld $0x2,%xmm2
// pminud %xmm5,%xmm2
//
// Hopefully we'll get these opcode mnemonics into the assembler for Go
// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
// it's similar.
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
PABSD X1, X2
PSRLL $2, X2
PMINUD X5, X2

// z = convertToInt32(y)
// No-op.
Expand Down Expand Up @@ -696,13 +645,10 @@ flAccOpOverLoop4:
PSRLQ $32, X11

// Multiply by magic, shift by magic.
//
// pmuludq %xmm10,%xmm0
// pmuludq %xmm10,%xmm11
BYTE $0x66; BYTE $0x41; BYTE $0x0f; BYTE $0xf4; BYTE $0xc2
BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0xf4; BYTE $0xda
PSRLQ $47, X0
PSRLQ $47, X11
PMULULQ X10, X0
PMULULQ X10, X11
PSRLQ $47, X0
PSRLQ $47, X11

// Merge the two registers back to one, X11, and add maskA.
PSLLQ $32, X11
Expand Down
25 changes: 7 additions & 18 deletions vector/gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -296,17 +296,9 @@ const (
// y = abs(x)
// y >>= 2 // Shift by 2*ϕ - 16.
// y = min(y, fxAlmost65536)
//
// pabsd %xmm1,%xmm2
// psrld $0x2,%xmm2
// pminud %xmm5,%xmm2
//
// Hopefully we'll get these opcode mnemonics into the assembler for Go
// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
// it's similar.
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
PABSD X1, X2
PSRLL $2, X2
PMINUD X5, X2
`
flClampAndScale = `
// y = x & flSignMask
Expand Down Expand Up @@ -356,13 +348,10 @@ const (
MOVOU X0, X11
PSRLQ $32, X11
// Multiply by magic, shift by magic.
//
// pmuludq %xmm10,%xmm0
// pmuludq %xmm10,%xmm11
BYTE $0x66; BYTE $0x41; BYTE $0x0f; BYTE $0xf4; BYTE $0xc2
BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0xf4; BYTE $0xda
PSRLQ $47, X0
PSRLQ $47, X11
PMULULQ X10, X0
PMULULQ X10, X11
PSRLQ $47, X0
PSRLQ $47, X11
// Merge the two registers back to one, X11, and add maskA.
PSLLQ $32, X11
XORPS X0, X11
Expand Down

0 comments on commit 0461995

Please sign in to comment.