diff options
-rw-r--r-- | README.md | 37 | ||||
-rw-r--r-- | src/display.rs | 13 | ||||
-rw-r--r-- | src/lib.rs | 71 | ||||
-rw-r--r-- | tests/from_brain.rs | 4 |
4 files changed, 59 insertions, 66 deletions
@@ -3,7 +3,33 @@ [](https://crates.io/crates/yaxpeax-hexagon) [](https://docs.rs/yaxpeax-hexagon) -qualcomm hexagon decoder implemented as part of the yaxpeax project, implementing traits provided by `yaxpeax-arch`. +Qualcomm Hexagon decoder implemented as part of the yaxpeax project, implementing traits provided by `yaxpeax-arch`. + +support is good enough to make sense of many programs, but not complete: + +- [x] instructions mentioned in the V62 and V73 manuals. +- [x] system instructions documented in V62 and earlier +- [ ] undocumented system instructions in V73 and later +- [ ] HVX (in any version) +- [ ] duplex instructions + +between V67 and V73, Qualcomm decided to remove most mentions of the Hexagon +supervisor mode from their manuals. the LLVM target has had support for these +instructions since late 2023, so LLVM-derived disassemblers should support +them. it is not immediately clear to me that system instructions have the same +encodings or semantic on later architectures - i largely lack programs known to +target newer versions to validate that disassembly still looks reasonable. + +Hexagon in real use seems to rely on a hypervisor (probably +Qualcomm-maintained? similar to their +[minivm](https://github.com/quic/hexagonMVM)?) which system instructions are +intended to support, then "User" and "Guest" modes which are more openly +documented in public manuals. none the less, `hexagonMVM` uses these +now-undocumented system instructions [for system register +management](https://github.com/quic/hexagonMVM/blob/db795a9/minivm.S#L259), TLB +management later on, traps, and so on. these system instructions are also +important to process to make sense of the entrypoints of in-the-wild Hexagon +firmware images. ### features @@ -18,13 +44,14 @@ the canonical copy of `yaxpeax-hexagon` is at [https://git.iximeow.net/yaxpeax-h ### see also -* [idp\_hexagon](https://github.com/n-o-o-n/idp_hexagon): IDA pro module for hexagon. heavily derived from LLVM. +* [idp\_hexagon](https://github.com/n-o-o-n/idp_hexagon): IDA pro module for Hexagon. heavily derived from LLVM. * [llvm](https://github.com/llvm/llvm-project/tree/e03f427/llvm/lib/Target/Hexagon) -* [r2hexagon](https://github.com/radareorg/r2hexagon): radare2's hexagon disassembler. generated from manuals. -* [hexag00n](https://github.com/programa-stic/hexag00n): python-based hexagon disassembler with IDA plugin +* [r2hexagon](https://github.com/radareorg/r2hexagon): radare2's Hexagon disassembler. generated from manuals. +* [hexag00n](https://github.com/programa-stic/hexag00n): python-based Hexagon disassembler with IDA plugin * [hexagon](https://github.com/gsmk/hexagon): another IDA pro processor module. wrapper for Sourcery CodeBench. * [nogaxeh](https://github.com/ANSSI-FR/nogaxeh): another IDA pro processor module -* [rz-hexagon](https://github.com/rizinorg/rz-hexagon): hexagon disassembler for rizin. generated from LLVM. +* [rz-hexagon](https://github.com/rizinorg/rz-hexagon): Hexagon disassembler for rizin. generated from LLVM. ### changelog + a changelog across crate versions is maintained in the `CHANGELOG` file located in the repo, as well as [online](https://git.iximeow.net/yaxpeax-hexagon/tree/CHANGELOG). diff --git a/src/display.rs b/src/display.rs index b17cc90..fd9c101 100644 --- a/src/display.rs +++ b/src/display.rs @@ -935,7 +935,18 @@ impl fmt::Display for Operand { write!(f, "$+#{}", rel) } Operand::Gpr { reg } => { - write!(f, "R{}", reg) + const NAMES: [&'static str; 32] = [ + "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", + "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15", + "R16", "R17", "R18", "R19", "R20", "R21", "R22", "R23", + "R24", "R25", "R26", "R27", + // the three R29 through R31 general registers support subroutines and the Software + // Stack. ... they have symbol aliases that indicate when these registers are accessed + // as subroutine and stack registers (V73 Section 2.1) + "R28", "SP", "FP", "LR", + ]; + + f.write_str(NAMES[*reg as usize]) } Operand::Cr { reg } => { // V69 Table 2-2 Aliased control registers @@ -399,9 +399,10 @@ impl Default for InstFlags { #[derive(Debug, Copy, Clone, PartialEq, Eq)] #[repr(u16)] pub enum Opcode { - /// TODO: remove. should never be shown. implies an instruction was parially decoded but - /// accepted? + // this variant should never be seen externally. + #[doc(hidden)] BUG, + // V73 Section 10.9 // > NOTE: When a constant extender is explicitly specified with a GP-relative load/store, the // > processor ignores the value in GP and creates the effective address directly from the 32-bit @@ -899,59 +900,6 @@ impl Opcode { } } -/* -/// TODO: don't know if this will be useful, but this is how V73 is described.. it also appears to -/// be the overall structure of the processor at least back to V5x. -/// TODO: how far back does this organization reflect reality? all the way to V2? -enum ExecutionUnit { - /// Load/store unit - /// LD, ST, ALU32, MEMOP, NV, SYSTEM - S0, - /// Load/store unit - /// LD, ST, ALU32 - S1, - /// X unit - /// XTYPE, ALU32, J, JR - S2, - /// X unit - /// XTYPE, ALU32, J, CR - S3 -} -*/ - -/// V73 Section 2.1: -/// > thirty-two 32-bit general-purpose registers (named R0 through R31) -/// -// TODO: figure out what of this needs to stick around -#[allow(dead_code)] -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -struct GPR(u8); - -// TODO: figure out what of this needs to stick around -#[allow(dead_code)] -impl GPR { - const SP: GPR = GPR(29); - const FP: GPR = GPR(30); - const LR: GPR = GPR(31); -} - -impl fmt::Display for GPR { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - const NAMES: [&'static str; 32] = [ - "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", - "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15", - "R16", "R17", "R18", "R19", "R20", "R21", "R22", "R23", - "R24", "R25", "R26", "R27", - // the three R29 through R31 general registers support subroutines and the Software - // Stack. ... they have symbol aliases that indicate when these registers are accessed - // as subroutine and stack registers (V73 Section 2.1) - "R28", "SP", "FP", "LR", - ]; - - f.write_str(NAMES[self.0 as usize]) - } -} - /// V73 Section 2.2: /// > the Hexagon processor includes a set of 32-bit control registers that provide access to /// > processor features such as the program counter, hardware loops, and vector predicates. @@ -1141,6 +1089,14 @@ pub enum Operand { PCRel32 { rel: i32 }, /// `Rn`, a 32-bit register `R<reg>` + /// + /// V73 Section 2.1: + /// > thirty-two 32-bit general-purpose registers (named R0 through R31) + /// + /// the last three, `R29, R30, R31` are, when possible, shown as `SP, FR, LR`. they are not + /// necessarily required to serve the purposes of stack pointer, frame register, or link + /// register. they are, however, described as such by the manual and almost certainly used that + /// way by actual code. Gpr { reg: u8 }, /// `Cn`, a 32-bit control register `C<reg>` Cr { reg: u8 }, @@ -1705,8 +1661,7 @@ fn decode_packet< while !end { if current_word >= 4 { - panic!("TODO: instruction too large"); - // Err(DecodeError::InstructionTooLarge) + return Err(DecodeError::InvalidOpcode); } let inst: u32 = handler.read_inst_word(words)?; @@ -1731,7 +1686,7 @@ fn decode_packet< /* duplex instruction */ // see table 10-2 // exactly how subinstructions are encoded is unclear... - println!("duplex,"); + return Err(DecodeError::InvalidOpcode); } 0b01 | 0b10 => { /* nothing to do here */ } 0b11 => { diff --git a/tests/from_brain.rs b/tests/from_brain.rs index 6a5dd17..213d189 100644 --- a/tests/from_brain.rs +++ b/tests/from_brain.rs @@ -122,10 +122,10 @@ fn inst_0011() { test_display(&0b0011_1000100_00100_11_1_0_0010_101_11111u32.to_le_bytes(), "{ if (!P1) memb(R4+#5) = #-1 }"); test_invalid(&0b0011_1000111_00100_11_1_0_0010_101_11111u32.to_le_bytes(), DecodeError::InvalidOpcode); - test_display(&0b0011_1010000_00100_11_1_0_0010_100_11111u32.to_le_bytes(), "{ R31 = memb(R4 + R2<<3) }"); + test_display(&0b0011_1010000_00100_11_1_0_0010_100_11111u32.to_le_bytes(), "{ LR = memb(R4 + R2<<3) }"); test_display(&0b0011_1010001_00100_11_1_0_0010_100_11110u32.to_le_bytes(), "{ R31:30 = memub(R4 + R2<<3) }"); - test_display(&0b0011_1011010_00100_11_1_0_0010_100_11110u32.to_le_bytes(), "{ memh(R4 + R2<<3) = R30 }"); + test_display(&0b0011_1011010_00100_11_1_0_0010_100_11110u32.to_le_bytes(), "{ memh(R4 + R2<<3) = FP }"); test_display(&0b0011_1011011_00100_11_1_0_0010_100_11110u32.to_le_bytes(), "{ memh(R4 + R2<<3) = R30.H }"); test_display(&0b0011_1011101_00100_11_1_0_0010_100_10110u32.to_le_bytes(), "{ memw(R4 + R2<<3) = R6.new }"); |