Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[msan] Support vst{2,3,4}_lane instructions #101215

Merged
merged 3 commits into from
Aug 9, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 40 additions & 18 deletions llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3873,38 +3873,49 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOriginForNaryOp(I);
}

/// Handle Arm NEON vector store intrinsics (vst{2,3,4} and vst1x_{2,3,4}).
/// Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
/// and vst{2,3,4}lane).
///
/// Arm NEON vector store intrinsics have the output address (pointer) as the
/// last argument, with the initial arguments being the inputs. They return
/// void.
/// last argument, with the initial arguments being the inputs (and lane
/// number for vst{2,3,4}lane). They return void.
///
/// - st4 interleaves the output e.g., st4 (inA, inB, inC, inD, outP) writes
/// abcdabcdabcdabcd... into *outP
/// - st1_x4 is non-interleaved e.g., st1_x4 (inA, inB, inC, inD, outP)
/// writes aaaa...bbbb...cccc...dddd... into *outP
/// - st4lane has arguments of (inA, inB, inC, inD, lane, outP)
/// These instructions can all be instrumented with essentially the same
/// MSan logic, simply by applying the corresponding intrinsic to the shadow.
void handleNEONVectorStoreIntrinsic(IntrinsicInst &I) {
void handleNEONVectorStoreIntrinsic(IntrinsicInst &I, bool useLane) {
IRBuilder<> IRB(&I);

// Don't use getNumOperands() because it includes the callee
int numArgOperands = I.arg_size();
assert(numArgOperands >= 1);

// The last arg operand is the output
// The last arg operand is the output (pointer)
assert(numArgOperands >= 1);
Value *Addr = I.getArgOperand(numArgOperands - 1);
assert(Addr->getType()->isPointerTy());
unsigned int skipTrailingOperands = 1;
thurstond marked this conversation as resolved.
Show resolved Hide resolved

if (ClCheckAccessAddress)
insertShadowCheck(Addr, &I);

SmallVector<Value *, 8> Shadows;
// Every arg operand, other than the last one, is an input vector
for (int i = 0; i < numArgOperands - 1; i++) {
// Second-last operand is the lane number (for vst{2,3,4}lane)
if (useLane) {
skipTrailingOperands++;
assert(numArgOperands >= (int)skipTrailingOperands);
thurstond marked this conversation as resolved.
Show resolved Hide resolved
assert(isa<IntegerType>(
I.getArgOperand(numArgOperands - skipTrailingOperands)->getType()));
}

SmallVector<Value *, 8> ShadowArgs;
// All the initial operands are the inputs
for (unsigned int i = 0; i < numArgOperands - skipTrailingOperands; i++) {
assert(isa<FixedVectorType>(I.getArgOperand(i)->getType()));
Value *Shadow = getShadow(&I, i);
Shadows.append(1, Shadow);
ShadowArgs.append(1, Shadow);
}

// MSan's GetShadowTy assumes the LHS is the type we want the shadow for
Expand All @@ -3921,29 +3932,33 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
FixedVectorType *OutputVectorTy = FixedVectorType::get(
cast<FixedVectorType>(I.getArgOperand(0)->getType())->getElementType(),
cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements() *
(numArgOperands - 1));
(numArgOperands - skipTrailingOperands));
Type *OutputShadowTy = getShadowTy(OutputVectorTy);

if (useLane)
ShadowArgs.append(1,
I.getArgOperand(numArgOperands - skipTrailingOperands));

Value *OutputShadowPtr, *OutputOriginPtr;
// AArch64 NEON does not need alignment (unless OS requires it)
std::tie(OutputShadowPtr, OutputOriginPtr) = getShadowOriginPtr(
Addr, IRB, OutputShadowTy, Align(1), /*isStore*/ true);
Shadows.append(1, OutputShadowPtr);
ShadowArgs.append(1, OutputShadowPtr);

// CreateIntrinsic will select the correct (integer) type for the
// intrinsic; the original instruction I may have either integer- or
// float-type inputs.
CallInst *CI =
IRB.CreateIntrinsic(IRB.getVoidTy(), I.getIntrinsicID(), Shadows);
IRB.CreateIntrinsic(IRB.getVoidTy(), I.getIntrinsicID(), ShadowArgs);
setShadow(&I, CI);

if (MS.TrackOrigins) {
// TODO: if we modelled the vst* instruction more precisely, we could
// more accurately track the origins (e.g., if both inputs are
// uninitialized for vst2, we currently blame the second input, even
// though part of the output depends only on the first input).
//
// This is particularly imprecise for vst{2,3,4}lane, since only one
// lane of each input is actually copied to the output.
OriginCombiner OC(this, IRB);
for (int i = 0; i < numArgOperands - 1; i++)
for (unsigned int i = 0; i < numArgOperands - skipTrailingOperands; i++)
OC.Add(I.getArgOperand(i));

const DataLayout &DL = F.getDataLayout();
Expand Down Expand Up @@ -4316,7 +4331,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case Intrinsic::aarch64_neon_st2:
case Intrinsic::aarch64_neon_st3:
case Intrinsic::aarch64_neon_st4: {
handleNEONVectorStoreIntrinsic(I);
handleNEONVectorStoreIntrinsic(I, false);
break;
}

case Intrinsic::aarch64_neon_st2lane:
case Intrinsic::aarch64_neon_st3lane:
case Intrinsic::aarch64_neon_st4lane: {
handleNEONVectorStoreIntrinsic(I, true);
break;
}

Expand Down
Loading
Loading