Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

arm64 disassembler #2127

Merged
merged 10 commits into from
Oct 5, 2022
Prev Previous commit
Next Next commit
Resolve indirect addresses in disassembly (#2118)
Co-authored-by: Jan Vorlicek <[email protected]>
  • Loading branch information
adamsitnik and janvorli committed Sep 23, 2022
commit 43bf96bbf1a01266b9e9a6a17fae3e0a26417994
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@
<ProjectReference Include="..\..\src\BenchmarkDotNet\BenchmarkDotNet.csproj" />
<ProjectReference Include="..\..\src\BenchmarkDotNet.Diagnostics.Windows\BenchmarkDotNet.Diagnostics.Windows.csproj" />
</ItemGroup>
</Project>
</Project>
6 changes: 6 additions & 0 deletions src/BenchmarkDotNet.Disassembler.x64/DataContracts.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ public class Asm : SourceCode
public int InstructionLength { get; set; }
public Instruction? IntelInstruction { get; set; }

public ulong? ReferencedAddress { get; set; }
public bool IsReferencedAddressIndirect { get; set; }
// TODO: this is a hack
public IReadOnlyDictionary<ulong, string> AddressToNameMapping { get; set; }
public IReadOnlyDictionary<ulong, string> AddressToLabelMapping { get; set; }

#if !CLRMDV1
public Gee.External.Capstone.Arm64.Arm64Instruction Arm64Instruction { get; set; }
#endif
Expand Down
149 changes: 141 additions & 8 deletions src/BenchmarkDotNet/Disassemblers/Arm64Disassembler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,51 +7,184 @@

namespace BenchmarkDotNet.Disassemblers
{
internal struct RegisterValueAccumulator
{
private enum State
{
LookingForMovz,
ExpectingMovk,
LookingForPossibleLdr
}

private State _state;
private long _value;
private int _expectedMovkShift;
private Arm64RegisterId _registerId;
private ClrRuntime _runtime;

public void Init(ClrRuntime runtime)
{
_state = State.LookingForMovz;
_expectedMovkShift = 0;
_value = 0;
_registerId = Arm64RegisterId.Invalid;
_runtime = runtime;
}

public void Feed(Arm64Instruction instruction)
{
Arm64InstructionDetail details = instruction.Details;

switch (_state)
{
case State.LookingForMovz:
if (instruction.Id == Arm64InstructionId.ARM64_INS_MOVZ)
{
_registerId = details.Operands[0].Register.Id;
_value = details.Operands[1].Immediate;
_state = State.ExpectingMovk;
_expectedMovkShift = 16;
}
break;
case State.ExpectingMovk:
if (instruction.Id == Arm64InstructionId.ARM64_INS_MOVK &&
details.Operands[0].Register.Id == _registerId &&
details.Operands[1].ShiftOperation == Arm64ShiftOperation.ARM64_SFT_LSL &&
details.Operands[1].ShiftValue == _expectedMovkShift)
{
_value = _value | (instruction.Details.Operands[1].Immediate << details.Operands[1].ShiftValue);
_expectedMovkShift += 16;
break;
}
_state = State.LookingForPossibleLdr;
goto case State.LookingForPossibleLdr;
case State.LookingForPossibleLdr:
if (instruction.Id == Arm64InstructionId.ARM64_INS_LDR &&
_registerId == details.Operands[0].Register.Id && // Target of the LDR is the register we are tracking
details.Operands[1].Type == Arm64OperandType.Memory &&
details.Operands[1].Memory.Base.Id == _registerId && // The source address is in the register we are tracking
details.Operands[1].Memory.Displacement == 0 && // There is no displacement
details.Operands[1].Memory.Index == null) // And there is no extra index register
{
// Simulate the LDR instruction.
long newValue = (long)_runtime.DataTarget.DataReader.ReadPointer((ulong)_value);
//Console.WriteLine($"Reading from memory at {_value:X}, got {newValue:X}");
_value = newValue;
if (_value == 0)
{
_state = State.LookingForMovz;
}
}
else if (instruction.Id == Arm64InstructionId.ARM64_INS_CBZ ||
instruction.Id == Arm64InstructionId.ARM64_INS_CBNZ ||
instruction.Id == Arm64InstructionId.ARM64_INS_B && details.ConditionCode != Arm64ConditionCode.Invalid)
{
// ignore conditional branches
//Console.WriteLine($"Ignoring conditional branch {instruction.Id}");
}
else if (details.BelongsToGroup(Arm64InstructionGroupId.ARM64_GRP_BRANCH_RELATIVE) ||
details.BelongsToGroup(Arm64InstructionGroupId.ARM64_GRP_CALL) ||
details.BelongsToGroup(Arm64InstructionGroupId.ARM64_GRP_JUMP))
{
// We've encountered an unconditional jump or call, the accumulated registers value is not valid anymore
//Console.WriteLine($"Resetting state at branch");
_state = State.LookingForMovz;
}
else if (instruction.Id == Arm64InstructionId.ARM64_INS_MOVZ)
{
// Another constant loading is starting
_state = State.LookingForMovz;
goto case State.LookingForMovz;
}
else
{
// Finally check if the current instruction modified the register that was accumulating the constant
// and reset the state machine in case it did.
foreach (Arm64Register reg in details.AllWrittenRegisters)
{
// Some unexpected instruction overwriting the accumulated register
if (reg.Id == _registerId)
{
//Console.WriteLine($"Resetting state at register writing");
_state = State.LookingForMovz;
}
}
}
break;
}
}

public bool HasValue => _state == State.ExpectingMovk || _state == State.LookingForPossibleLdr;

public long Value { get { return _value; } }

public Arm64RegisterId RegisterId { get { return _registerId; } }
}

internal class Arm64Disassembler : ClrMdV2Disassembler<Arm64Instruction>
{
protected override IEnumerable<Asm> Decode(byte[] code, ulong startAddress, State state, int depth, ClrMethod currentMethod)
{
Console.WriteLine($"Was asked to decode {currentMethod.Signature} from {code.Length} byte array ({string.Join(",", code.Select(b => b.ToString("X")))})");

const Arm64DisassembleMode disassembleMode = Arm64DisassembleMode.Arm;
using (CapstoneArm64Disassembler disassembler = CapstoneDisassembler.CreateArm64Disassembler(disassembleMode))
{
// Enables disassemble details, which are disabled by default, to provide more detailed information on
// disassembled binary code.
disassembler.EnableInstructionDetails = true;
disassembler.DisassembleSyntax = DisassembleSyntax.Intel;
RegisterValueAccumulator accumulator = new RegisterValueAccumulator();
accumulator.Init(state.Runtime);

Arm64Instruction[] instructions = disassembler.Disassemble(code, (long)startAddress);
foreach (Arm64Instruction instruction in instructions)
{
// TODO: use the accumulated address
// TODO: set the isIndirect correctly
bool isIndirect = false;
ulong address = 0;
if (TryGetReferencedAddress(instruction, accumulator, (uint)state.Runtime.DataTarget.DataReader.PointerSize, out address, out isIndirect))
{
TryTranslateAddressToName(address, isAddressPrecodeMD: false, state, isIndirect, depth, currentMethod);
}

accumulator.Feed(instruction);

yield return new Asm()
{
InstructionPointer = (ulong)instruction.Address,
InstructionLength = instruction.Bytes.Length,
Arm64Instruction = instruction
Arm64Instruction = instruction,
ReferencedAddress = (address > ushort.MaxValue) ? address : null,
IsReferencedAddressIndirect = isIndirect,
AddressToNameMapping = state.AddressToNameMapping
};
}
}
}

protected override bool TryGetReferencedAddressT(Arm64Instruction instruction, uint pointerSize, out ulong referencedAddress)
=> TryGetReferencedAddress(instruction, pointerSize, out referencedAddress);

internal static bool TryGetReferencedAddress(Arm64Instruction instruction, uint pointerSize, out ulong referencedAddress)
internal static bool TryGetReferencedAddress(Arm64Instruction instruction, RegisterValueAccumulator accumulator, uint pointerSize, out ulong referencedAddress, out bool isReferencedAddressIndirect)
{
if (instruction.Details.BelongsToGroup(Arm64InstructionGroupId.ARM64_GRP_BRANCH_RELATIVE))
if ((instruction.Id == Arm64InstructionId.ARM64_INS_BR || instruction.Id == Arm64InstructionId.ARM64_INS_BLR) && instruction.Details.Operands[0].Register.Id == accumulator.RegisterId && accumulator.HasValue)
{
referencedAddress = (ulong)accumulator.Value;
isReferencedAddressIndirect = true;
return true;
}
else if (instruction.Details.BelongsToGroup(Arm64InstructionGroupId.ARM64_GRP_BRANCH_RELATIVE))
{
// One of the operands is the address
for (int i = 0; i < instruction.Details.Operands.Length; i++)
{
if (instruction.Details.Operands[i].Type == Arm64OperandType.Immediate)
{
referencedAddress = (ulong)instruction.Details.Operands[i].Immediate;
isReferencedAddressIndirect = false;
return true;
}
}
}
referencedAddress = 0;
isReferencedAddressIndirect = false;
return false;
}
}
Expand Down
16 changes: 10 additions & 6 deletions src/BenchmarkDotNet/Disassemblers/ClrMdV2Disassembler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -253,15 +253,12 @@ public bool Equals(Sharp x, Sharp y)

internal abstract class ClrMdV2Disassembler<T> : ClrMdV2Disassembler
{
protected abstract bool TryGetReferencedAddressT(T instruction, uint pointerSize, out ulong referencedAddress);
// protected abstract bool TryGetReferencedAddressT(T instruction, uint pointerSize, out ulong referencedAddress);

protected void TryTranslateAddressToName(T instruction, State state, int depth, ClrMethod currentMethod)
protected void TryTranslateAddressToName(ulong address, bool isAddressPrecodeMD, State state, bool isIndirectCallOrJump, int depth, ClrMethod currentMethod)
{
var runtime = state.Runtime;

if (!TryGetReferencedAddressT(instruction, (uint)runtime.DataTarget.DataReader.PointerSize, out ulong address))
return;

if (state.AddressToNameMapping.ContainsKey(address))
return;

Expand All @@ -282,7 +279,14 @@ protected void TryTranslateAddressToName(T instruction, State state, int depth,
var methodDescriptor = runtime.GetMethodByHandle(address);
if (!(methodDescriptor is null))
{
state.AddressToNameMapping.Add(address, $"MD_{methodDescriptor.Signature}");
if (isAddressPrecodeMD)
{
state.AddressToNameMapping.Add(address, $"Precode of {methodDescriptor.Signature}");
}
else
{
state.AddressToNameMapping.Add(address, $"MD_{methodDescriptor.Signature}");
}
return;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,10 @@ internal static IReadOnlyList<Element> Prettify(DisassembledMethod method, Disas
// first of all, we search of referenced addresses (jump|calls)
var referencedAddresses = new HashSet<ulong>();
foreach (var asm in asmInstructions)
if (asm.IntelInstruction.HasValue)
if (IntelDisassembler.TryGetReferencedAddress(asm.IntelInstruction.Value, disassemblyResult.PointerSize, out ulong referencedAddress))
referencedAddresses.Add(referencedAddress);
else if (asm.Arm64Instruction is not null)
if (Arm64Disassembler.TryGetReferencedAddress(asm.Arm64Instruction, disassemblyResult.PointerSize, out referencedAddress))
referencedAddresses.Add(referencedAddress);
if (asm.ReferencedAddress != null)
{
referencedAddresses.Add(asm.ReferencedAddress.Value);
}

// for every IP that is referenced, we emit a uinque label
var addressesToLabels = new Dictionary<ulong, string>();
Expand All @@ -70,15 +68,16 @@ internal static IReadOnlyList<Element> Prettify(DisassembledMethod method, Disas
}
else if (instruction is Asm asm)
{
asm.AddressToLabelMapping = addressesToLabels;
// this IP is referenced by some jump|call, so we add a label
if (addressesToLabels.TryGetValue(asm.InstructionPointer, out string label))
{
prettified.Add(new Label(label));
}

if ((asm.IntelInstruction.HasValue && IntelDisassembler.TryGetReferencedAddress(asm.IntelInstruction.Value, disassemblyResult.PointerSize, out ulong referencedAddress))
|| (asm.Arm64Instruction is not null && Arm64Disassembler.TryGetReferencedAddress(asm.Arm64Instruction, disassemblyResult.PointerSize, out referencedAddress)))
if (asm.ReferencedAddress != null)
{
ulong referencedAddress = asm.ReferencedAddress.Value;
// jump or a call within same method
if (addressesToLabels.TryGetValue(referencedAddress, out string translated))
{
Expand All @@ -89,7 +88,12 @@ internal static IReadOnlyList<Element> Prettify(DisassembledMethod method, Disas
// call to a known method
if (disassemblyResult.AddressToNameMapping.ContainsKey(referencedAddress))
{
prettified.Add(new Element(CodeFormatter.Format(asm, formatterWithGlobalSymbols, config.PrintInstructionAddresses, disassemblyResult.PointerSize), asm));
string comment = string.Empty;
if (asm.IsReferencedAddressIndirect)
{
comment = "; " + disassemblyResult.AddressToNameMapping[referencedAddress];
}
prettified.Add(new Element(CodeFormatter.Format(asm, formatterWithGlobalSymbols, config.PrintInstructionAddresses, disassemblyResult.PointerSize) + comment, asm));
continue;
}
}
Expand Down
18 changes: 17 additions & 1 deletion src/BenchmarkDotNet/Disassemblers/InstructionFormatter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,23 @@ internal static string Format(SourceCode sourceCode, Formatter formatter, bool p
case Asm asm when asm.IntelInstruction.HasValue:
return InstructionFormatter.Format(asm.IntelInstruction.Value, formatter, printInstructionAddresses, pointerSize);
case Asm asm when asm.Arm64Instruction is not null:
return $"{asm.Arm64Instruction.Mnemonic} {asm.Arm64Instruction.Operand}"; // TODO: implement proper formatting
{
string operand = asm.Arm64Instruction.Operand;

// Symbolize branch and call instructions target with immediate address argument
if (asm.Arm64Instruction.Details.BelongsToGroup(Gee.External.Capstone.Arm64.Arm64InstructionGroupId.ARM64_GRP_BRANCH_RELATIVE) &&
asm.ReferencedAddress != null &&
!asm.IsReferencedAddressIndirect &&
(asm.AddressToNameMapping.TryGetValue(asm.ReferencedAddress.Value, out string text) ||
asm?.AddressToLabelMapping.TryGetValue(asm.ReferencedAddress.Value, out text) == true))
{
string partToReplace = $"#0x{asm.ReferencedAddress.Value:x}";
operand = operand.Replace(partToReplace, text);
}

string instructionAddress = printInstructionAddresses ? $"{asm.Arm64Instruction.Address:X16} " : string.Empty;
return $"{instructionAddress}{asm.Arm64Instruction.Mnemonic} {operand}";
}
case Sharp sharp:
return sharp.Text;
case MonoCode mono:
Expand Down
Loading