4 files changed, 59 insertions, 66 deletions
diff --git a/README.md b/README.md
index 81f320d..2619be8 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,33 @@
 [![crate](https://img.shields.io/crates/v/yaxpeax-hexagon.svg?logo=rust)](https://crates.io/crates/yaxpeax-hexagon)
 [![documentation](https://docs.rs/yaxpeax-hexagon/badge.svg)](https://docs.rs/yaxpeax-hexagon)
 
-qualcomm hexagon decoder implemented as part of the yaxpeax project, implementing traits provided by `yaxpeax-arch`.
+Qualcomm Hexagon decoder implemented as part of the yaxpeax project, implementing traits provided by `yaxpeax-arch`.
+
+support is good enough to make sense of many programs, but not complete:
+
+- [x] instructions mentioned in the V62 and V73 manuals.
+- [x] system instructions documented in V62 and earlier
+- [ ] undocumented system instructions in V73 and later
+- [ ] HVX (in any version)
+- [ ] duplex instructions
+
+between V67 and V73, Qualcomm decided to remove most mentions of the Hexagon
+supervisor mode from their manuals. the LLVM target has had support for these
+instructions since late 2023, so LLVM-derived disassemblers should support
+them. it is not immediately clear to me that system instructions have the same
+encodings or semantic on later architectures - i largely lack programs known to
+target newer versions to validate that disassembly still looks reasonable.
+
+Hexagon in real use seems to rely on a hypervisor (probably
+Qualcomm-maintained? similar to their
+[minivm](https://github.com/quic/hexagonMVM)?) which system instructions are
+intended to support, then "User" and "Guest" modes which are more openly
+documented in public manuals. none the less, `hexagonMVM` uses these
+now-undocumented system instructions [for system register
+management](https://github.com/quic/hexagonMVM/blob/db795a9/minivm.S#L259), TLB
+management later on, traps, and so on. these system instructions are also
+important to process to make sense of the entrypoints of in-the-wild Hexagon
+firmware images.
 
 ### features
 
@@ -18,13 +44,14 @@ the canonical copy of `yaxpeax-hexagon` is at [https://git.iximeow.net/yaxpeax-h
 
 ### see also
 
-* [idp\_hexagon](https://github.com/n-o-o-n/idp_hexagon): IDA pro module for hexagon. heavily derived from LLVM.
+* [idp\_hexagon](https://github.com/n-o-o-n/idp_hexagon): IDA pro module for Hexagon. heavily derived from LLVM.
 * [llvm](https://github.com/llvm/llvm-project/tree/e03f427/llvm/lib/Target/Hexagon)
-* [r2hexagon](https://github.com/radareorg/r2hexagon): radare2's hexagon disassembler. generated from manuals.
-* [hexag00n](https://github.com/programa-stic/hexag00n): python-based hexagon disassembler with IDA plugin
+* [r2hexagon](https://github.com/radareorg/r2hexagon): radare2's Hexagon disassembler. generated from manuals.
+* [hexag00n](https://github.com/programa-stic/hexag00n): python-based Hexagon disassembler with IDA plugin
 * [hexagon](https://github.com/gsmk/hexagon): another IDA pro processor module. wrapper for Sourcery CodeBench.
 * [nogaxeh](https://github.com/ANSSI-FR/nogaxeh): another IDA pro processor module
-* [rz-hexagon](https://github.com/rizinorg/rz-hexagon): hexagon disassembler for rizin. generated from LLVM.
+* [rz-hexagon](https://github.com/rizinorg/rz-hexagon): Hexagon disassembler for rizin. generated from LLVM.
 
 ### changelog
+
 a changelog across crate versions is maintained in the `CHANGELOG` file located in the repo, as well as [online](https://git.iximeow.net/yaxpeax-hexagon/tree/CHANGELOG).
diff --git a/src/display.rs b/src/display.rs
index b17cc90..fd9c101 100644
--- a/src/display.rs
+++ b/src/display.rs
@@ -935,7 +935,18 @@ impl fmt::Display for Operand {
                 write!(f, "$+#{}", rel)
             }
             Operand::Gpr { reg } => {
-                write!(f, "R{}", reg)
+                const NAMES: [&'static str; 32] = [
+                    "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7",
+                    "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15",
+                    "R16", "R17", "R18", "R19", "R20", "R21", "R22", "R23",
+                    "R24", "R25", "R26", "R27",
+                    // the three R29 through R31 general registers support subroutines and the Software
+                    // Stack. ... they have symbol aliases that indicate when these registers are accessed
+                    // as subroutine and stack registers (V73 Section 2.1)
+                    "R28", "SP", "FP", "LR",
+                ];
+
+                f.write_str(NAMES[*reg as usize])
             }
             Operand::Cr { reg } => {
                 // V69 Table 2-2 Aliased control registers
diff --git a/src/lib.rs b/src/lib.rs
index dcb99fb..fc399e3 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -399,9 +399,10 @@ impl Default for InstFlags {
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 #[repr(u16)]
 pub enum Opcode {
-    /// TODO: remove. should never be shown. implies an instruction was parially decoded but
-    /// accepted?
+    // this variant should never be seen externally.
+    #[doc(hidden)]
     BUG,
+
     // V73 Section 10.9
     // > NOTE: When a constant extender is explicitly specified with a GP-relative load/store, the
     // > processor ignores the value in GP and creates the effective address directly from the 32-bit
@@ -899,59 +900,6 @@ impl Opcode {
     }
 }
 
-/*
-/// TODO: don't know if this will be useful, but this is how V73 is described.. it also appears to
-/// be the overall structure of the processor at least back to V5x.
-/// TODO: how far back does this organization reflect reality? all the way to V2?
-enum ExecutionUnit {
-    /// Load/store unit
-    /// LD, ST, ALU32, MEMOP, NV, SYSTEM
-    S0,
-    /// Load/store unit
-    /// LD, ST, ALU32
-    S1,
-    /// X unit
-    /// XTYPE, ALU32, J, JR
-    S2,
-    /// X unit
-    /// XTYPE, ALU32, J, CR
-    S3
-}
-*/
-
-/// V73 Section 2.1:
-/// > thirty-two 32-bit general-purpose registers (named R0 through R31)
-///
-// TODO: figure out what of this needs to stick around
-#[allow(dead_code)]
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-struct GPR(u8);
-
-// TODO: figure out what of this needs to stick around
-#[allow(dead_code)]
-impl GPR {
-    const SP: GPR = GPR(29);
-    const FP: GPR = GPR(30);
-    const LR: GPR = GPR(31);
-}
-
-impl fmt::Display for GPR {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        const NAMES: [&'static str; 32] = [
-            "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7",
-            "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15",
-            "R16", "R17", "R18", "R19", "R20", "R21", "R22", "R23",
-            "R24", "R25", "R26", "R27",
-            // the three R29 through R31 general registers support subroutines and the Software
-            // Stack. ... they have symbol aliases that indicate when these registers are accessed
-            // as subroutine and stack registers (V73 Section 2.1)
-            "R28", "SP", "FP", "LR",
-        ];
-
-        f.write_str(NAMES[self.0 as usize])
-    }
-}
-
 /// V73 Section 2.2:
 /// > the Hexagon processor includes a set of 32-bit control registers that provide access to
 /// > processor features such as the program counter, hardware loops, and vector predicates.
@@ -1141,6 +1089,14 @@ pub enum Operand {
     PCRel32 { rel: i32 },
 
     /// `Rn`, a 32-bit register `R<reg>`
+    ///
+    /// V73 Section 2.1:
+    /// > thirty-two 32-bit general-purpose registers (named R0 through R31)
+    ///
+    /// the last three, `R29, R30, R31` are, when possible, shown as `SP, FR, LR`. they are not
+    /// necessarily required to serve the purposes of stack pointer, frame register, or link
+    /// register. they are, however, described as such by the manual and almost certainly used that
+    /// way by actual code.
     Gpr { reg: u8 },
     /// `Cn`, a 32-bit control register `C<reg>`
     Cr { reg: u8 },
@@ -1705,8 +1661,7 @@ fn decode_packet<
 
     while !end {
         if current_word >= 4 {
-            panic!("TODO: instruction too large");
-            // Err(DecodeError::InstructionTooLarge)
+            return Err(DecodeError::InvalidOpcode);
         }
 
         let inst: u32 = handler.read_inst_word(words)?;
@@ -1731,7 +1686,7 @@ fn decode_packet<
                 /* duplex instruction */
                 // see table 10-2
                 // exactly how subinstructions are encoded is unclear...
-                println!("duplex,");
+                return Err(DecodeError::InvalidOpcode);
             }
             0b01 | 0b10 => { /* nothing to do here */ }
             0b11 => {
diff --git a/tests/from_brain.rs b/tests/from_brain.rs
index 6a5dd17..213d189 100644
--- a/tests/from_brain.rs
+++ b/tests/from_brain.rs
@@ -122,10 +122,10 @@ fn inst_0011() {
     test_display(&0b0011_1000100_00100_11_1_0_0010_101_11111u32.to_le_bytes(), "{ if (!P1) memb(R4+#5) = #-1 }");
     test_invalid(&0b0011_1000111_00100_11_1_0_0010_101_11111u32.to_le_bytes(), DecodeError::InvalidOpcode);
 
-    test_display(&0b0011_1010000_00100_11_1_0_0010_100_11111u32.to_le_bytes(), "{ R31 = memb(R4 + R2<<3) }");
+    test_display(&0b0011_1010000_00100_11_1_0_0010_100_11111u32.to_le_bytes(), "{ LR = memb(R4 + R2<<3) }");
     test_display(&0b0011_1010001_00100_11_1_0_0010_100_11110u32.to_le_bytes(), "{ R31:30 = memub(R4 + R2<<3) }");
 
-    test_display(&0b0011_1011010_00100_11_1_0_0010_100_11110u32.to_le_bytes(), "{ memh(R4 + R2<<3) = R30 }");
+    test_display(&0b0011_1011010_00100_11_1_0_0010_100_11110u32.to_le_bytes(), "{ memh(R4 + R2<<3) = FP }");
     test_display(&0b0011_1011011_00100_11_1_0_0010_100_11110u32.to_le_bytes(), "{ memh(R4 + R2<<3) = R30.H }");
     test_display(&0b0011_1011101_00100_11_1_0_0010_100_10110u32.to_le_bytes(), "{ memw(R4 + R2<<3) = R6.new }");