summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md37
-rw-r--r--src/display.rs13
-rw-r--r--src/lib.rs71
-rw-r--r--tests/from_brain.rs4
4 files changed, 59 insertions, 66 deletions
diff --git a/README.md b/README.md
index 81f320d..2619be8 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,33 @@
[![crate](https://img.shields.io/crates/v/yaxpeax-hexagon.svg?logo=rust)](https://crates.io/crates/yaxpeax-hexagon)
[![documentation](https://docs.rs/yaxpeax-hexagon/badge.svg)](https://docs.rs/yaxpeax-hexagon)
-qualcomm hexagon decoder implemented as part of the yaxpeax project, implementing traits provided by `yaxpeax-arch`.
+Qualcomm Hexagon decoder implemented as part of the yaxpeax project, implementing traits provided by `yaxpeax-arch`.
+
+support is good enough to make sense of many programs, but not complete:
+
+- [x] instructions mentioned in the V62 and V73 manuals.
+- [x] system instructions documented in V62 and earlier
+- [ ] undocumented system instructions in V73 and later
+- [ ] HVX (in any version)
+- [ ] duplex instructions
+
+between V67 and V73, Qualcomm decided to remove most mentions of the Hexagon
+supervisor mode from their manuals. the LLVM target has had support for these
+instructions since late 2023, so LLVM-derived disassemblers should support
+them. it is not immediately clear to me that system instructions have the same
+encodings or semantic on later architectures - i largely lack programs known to
+target newer versions to validate that disassembly still looks reasonable.
+
+Hexagon in real use seems to rely on a hypervisor (probably
+Qualcomm-maintained? similar to their
+[minivm](https://github.com/quic/hexagonMVM)?) which system instructions are
+intended to support, then "User" and "Guest" modes which are more openly
+documented in public manuals. none the less, `hexagonMVM` uses these
+now-undocumented system instructions [for system register
+management](https://github.com/quic/hexagonMVM/blob/db795a9/minivm.S#L259), TLB
+management later on, traps, and so on. these system instructions are also
+important to process to make sense of the entrypoints of in-the-wild Hexagon
+firmware images.
### features
@@ -18,13 +44,14 @@ the canonical copy of `yaxpeax-hexagon` is at [https://git.iximeow.net/yaxpeax-h
### see also
-* [idp\_hexagon](https://github.com/n-o-o-n/idp_hexagon): IDA pro module for hexagon. heavily derived from LLVM.
+* [idp\_hexagon](https://github.com/n-o-o-n/idp_hexagon): IDA pro module for Hexagon. heavily derived from LLVM.
* [llvm](https://github.com/llvm/llvm-project/tree/e03f427/llvm/lib/Target/Hexagon)
-* [r2hexagon](https://github.com/radareorg/r2hexagon): radare2's hexagon disassembler. generated from manuals.
-* [hexag00n](https://github.com/programa-stic/hexag00n): python-based hexagon disassembler with IDA plugin
+* [r2hexagon](https://github.com/radareorg/r2hexagon): radare2's Hexagon disassembler. generated from manuals.
+* [hexag00n](https://github.com/programa-stic/hexag00n): python-based Hexagon disassembler with IDA plugin
* [hexagon](https://github.com/gsmk/hexagon): another IDA pro processor module. wrapper for Sourcery CodeBench.
* [nogaxeh](https://github.com/ANSSI-FR/nogaxeh): another IDA pro processor module
-* [rz-hexagon](https://github.com/rizinorg/rz-hexagon): hexagon disassembler for rizin. generated from LLVM.
+* [rz-hexagon](https://github.com/rizinorg/rz-hexagon): Hexagon disassembler for rizin. generated from LLVM.
### changelog
+
a changelog across crate versions is maintained in the `CHANGELOG` file located in the repo, as well as [online](https://git.iximeow.net/yaxpeax-hexagon/tree/CHANGELOG).
diff --git a/src/display.rs b/src/display.rs
index b17cc90..fd9c101 100644
--- a/src/display.rs
+++ b/src/display.rs
@@ -935,7 +935,18 @@ impl fmt::Display for Operand {
write!(f, "$+#{}", rel)
}
Operand::Gpr { reg } => {
- write!(f, "R{}", reg)
+ const NAMES: [&'static str; 32] = [
+ "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7",
+ "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15",
+ "R16", "R17", "R18", "R19", "R20", "R21", "R22", "R23",
+ "R24", "R25", "R26", "R27",
+ // the three R29 through R31 general registers support subroutines and the Software
+ // Stack. ... they have symbol aliases that indicate when these registers are accessed
+ // as subroutine and stack registers (V73 Section 2.1)
+ "R28", "SP", "FP", "LR",
+ ];
+
+ f.write_str(NAMES[*reg as usize])
}
Operand::Cr { reg } => {
// V69 Table 2-2 Aliased control registers
diff --git a/src/lib.rs b/src/lib.rs
index dcb99fb..fc399e3 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -399,9 +399,10 @@ impl Default for InstFlags {
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[repr(u16)]
pub enum Opcode {
- /// TODO: remove. should never be shown. implies an instruction was parially decoded but
- /// accepted?
+ // this variant should never be seen externally.
+ #[doc(hidden)]
BUG,
+
// V73 Section 10.9
// > NOTE: When a constant extender is explicitly specified with a GP-relative load/store, the
// > processor ignores the value in GP and creates the effective address directly from the 32-bit
@@ -899,59 +900,6 @@ impl Opcode {
}
}
-/*
-/// TODO: don't know if this will be useful, but this is how V73 is described.. it also appears to
-/// be the overall structure of the processor at least back to V5x.
-/// TODO: how far back does this organization reflect reality? all the way to V2?
-enum ExecutionUnit {
- /// Load/store unit
- /// LD, ST, ALU32, MEMOP, NV, SYSTEM
- S0,
- /// Load/store unit
- /// LD, ST, ALU32
- S1,
- /// X unit
- /// XTYPE, ALU32, J, JR
- S2,
- /// X unit
- /// XTYPE, ALU32, J, CR
- S3
-}
-*/
-
-/// V73 Section 2.1:
-/// > thirty-two 32-bit general-purpose registers (named R0 through R31)
-///
-// TODO: figure out what of this needs to stick around
-#[allow(dead_code)]
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-struct GPR(u8);
-
-// TODO: figure out what of this needs to stick around
-#[allow(dead_code)]
-impl GPR {
- const SP: GPR = GPR(29);
- const FP: GPR = GPR(30);
- const LR: GPR = GPR(31);
-}
-
-impl fmt::Display for GPR {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- const NAMES: [&'static str; 32] = [
- "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7",
- "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15",
- "R16", "R17", "R18", "R19", "R20", "R21", "R22", "R23",
- "R24", "R25", "R26", "R27",
- // the three R29 through R31 general registers support subroutines and the Software
- // Stack. ... they have symbol aliases that indicate when these registers are accessed
- // as subroutine and stack registers (V73 Section 2.1)
- "R28", "SP", "FP", "LR",
- ];
-
- f.write_str(NAMES[self.0 as usize])
- }
-}
-
/// V73 Section 2.2:
/// > the Hexagon processor includes a set of 32-bit control registers that provide access to
/// > processor features such as the program counter, hardware loops, and vector predicates.
@@ -1141,6 +1089,14 @@ pub enum Operand {
PCRel32 { rel: i32 },
/// `Rn`, a 32-bit register `R<reg>`
+ ///
+ /// V73 Section 2.1:
+ /// > thirty-two 32-bit general-purpose registers (named R0 through R31)
+ ///
+ /// the last three, `R29, R30, R31` are, when possible, shown as `SP, FR, LR`. they are not
+ /// necessarily required to serve the purposes of stack pointer, frame register, or link
+ /// register. they are, however, described as such by the manual and almost certainly used that
+ /// way by actual code.
Gpr { reg: u8 },
/// `Cn`, a 32-bit control register `C<reg>`
Cr { reg: u8 },
@@ -1705,8 +1661,7 @@ fn decode_packet<
while !end {
if current_word >= 4 {
- panic!("TODO: instruction too large");
- // Err(DecodeError::InstructionTooLarge)
+ return Err(DecodeError::InvalidOpcode);
}
let inst: u32 = handler.read_inst_word(words)?;
@@ -1731,7 +1686,7 @@ fn decode_packet<
/* duplex instruction */
// see table 10-2
// exactly how subinstructions are encoded is unclear...
- println!("duplex,");
+ return Err(DecodeError::InvalidOpcode);
}
0b01 | 0b10 => { /* nothing to do here */ }
0b11 => {
diff --git a/tests/from_brain.rs b/tests/from_brain.rs
index 6a5dd17..213d189 100644
--- a/tests/from_brain.rs
+++ b/tests/from_brain.rs
@@ -122,10 +122,10 @@ fn inst_0011() {
test_display(&0b0011_1000100_00100_11_1_0_0010_101_11111u32.to_le_bytes(), "{ if (!P1) memb(R4+#5) = #-1 }");
test_invalid(&0b0011_1000111_00100_11_1_0_0010_101_11111u32.to_le_bytes(), DecodeError::InvalidOpcode);
- test_display(&0b0011_1010000_00100_11_1_0_0010_100_11111u32.to_le_bytes(), "{ R31 = memb(R4 + R2<<3) }");
+ test_display(&0b0011_1010000_00100_11_1_0_0010_100_11111u32.to_le_bytes(), "{ LR = memb(R4 + R2<<3) }");
test_display(&0b0011_1010001_00100_11_1_0_0010_100_11110u32.to_le_bytes(), "{ R31:30 = memub(R4 + R2<<3) }");
- test_display(&0b0011_1011010_00100_11_1_0_0010_100_11110u32.to_le_bytes(), "{ memh(R4 + R2<<3) = R30 }");
+ test_display(&0b0011_1011010_00100_11_1_0_0010_100_11110u32.to_le_bytes(), "{ memh(R4 + R2<<3) = FP }");
test_display(&0b0011_1011011_00100_11_1_0_0010_100_11110u32.to_le_bytes(), "{ memh(R4 + R2<<3) = R30.H }");
test_display(&0b0011_1011101_00100_11_1_0_0010_100_10110u32.to_le_bytes(), "{ memw(R4 + R2<<3) = R6.new }");