From 97447561496921f2a086ebeb993464012a560570 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sat, 17 Jul 2021 21:33:58 -0700 Subject: implement an lc87 decoder --- .gitignore | 2 + CHANGELOG | 3 + Cargo.toml | 17 ++ README.md | 26 +++ src/display.rs | 164 ++++++++++++++++++ src/lib.rs | 517 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ test/test.rs | 31 ++++ 7 files changed, 760 insertions(+) create mode 100644 .gitignore create mode 100644 CHANGELOG create mode 100644 Cargo.toml create mode 100644 README.md create mode 100644 src/display.rs create mode 100644 src/lib.rs create mode 100644 test/test.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..96ef6c0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +Cargo.lock diff --git a/CHANGELOG b/CHANGELOG new file mode 100644 index 0000000..92c531a --- /dev/null +++ b/CHANGELOG @@ -0,0 +1,3 @@ +# 1.0.0 + +* first release, decoder exists diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..19b252a --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,17 @@ +[package] + +name = "yaxpeax-lc87" +version = "1.0.0" +authors = [ "iximeow " ] +license = "0BSD" +repository = "http://git.iximeow.net/yaxpeax-lc87/" +description = "lc87 decoder for the yaxpeax project" +keywords = ["diassembler", "lc87", "sanyo"] +edition = "2018" + +[dependencies] +yaxpeax-arch = { version = "0.2.4", default-features = false, features = [] } + +[[test]] +name = "test" +path = "test/test.rs" diff --git a/README.md b/README.md new file mode 100644 index 0000000..28b4d99 --- /dev/null +++ b/README.md @@ -0,0 +1,26 @@ +## yaxpeax-lc87 + +[![crate](https://img.shields.io/crates/v/yaxpeax-lc87.svg?logo=rust)](https://crates.io/crates/yaxpeax-lc87) +[![documentation](https://docs.rs/yaxpeax-lc87/badge.svg)](https://docs.rs/yaxpeax-lc87) + +an `lc87` decoder implemented as part of the yaxpeax proect, including traits provided by [`yaxpeax-arch`](https://git.iximeow.net/yaxpeax-arch/about/). + +users of this library will either want to use [quick and dirty APIs](https://docs.rs/yaxpeax-lc87/latest/yaxpeax_lc87/index.html#usage), or more generic decode interfaces from `yaxpeax-arch` - appropriate when mixing `yaxpeax-lc87` with other `yaxpeax` decoders, such as `yaxpeax-x86`. + +### features + +* it exists +* pretty small? +* `#[no_std]` + +### it exists + +i'm aware of only one other `lc87` decoder on the internet: [chrisnoisel's Ghidra work](https://github.com/chrisnoisel/ghidra/tree/lc87). + +### pretty small? + +the `lc87` instruction set is very small. the decoder is about 300 lines of Rust. it seems plausible that there is more rodata in the form of opcode strings, than actual code to disassemble instructions. + +### `#[no_std]` + +if, for some reason, you want to disassemble `lc87` instructions without the Rust standard library around, that should work. this is primarily for consistency with other decoders than any need, and is not particularly tested. diff --git a/src/display.rs b/src/display.rs new file mode 100644 index 0000000..39a7943 --- /dev/null +++ b/src/display.rs @@ -0,0 +1,164 @@ +use core::fmt; + +impl fmt::Display for crate::Instruction { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.opcode)?; + for i in 0..self.operand_count { + f.write_str(" ")?; + write!(f, "{:?}", self.operands[i as usize])?; + if i + 1 < self.operand_count { + f.write_str(",")?; + } + } + Ok(()) + } +} + +impl fmt::Debug for crate::Operand { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + ::fmt(self, f) + } +} + +impl fmt::Display for crate::Operand { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use crate::Operand::*; + match self { + Nothing => { Ok(()) }, + BranchAbsU17 { addr } => { + if addr < &10 { + write!(f, "{}", addr) + } else { + write!(f, "0x{:x}", addr) + } + } + BranchRelU12 { rel } => { + if rel < &10 { + write!(f, "$+{}", rel) + } else { + write!(f, "$+0x{:x}", rel) + } + } + ImmU8 { imm } => { + write!(f, "#{:02x}h", imm) + } + ImmU16 { imm } => { + write!(f, "#{:04x}h", imm) + } + AbsU16 { addr } => { + write!(f, "{:04x}h", addr) + } + BitIndex { index } => { + write!(f, "{}", index) + } + IndirectReg { n } => { + write!(f, "[R{}]", n) + } + IndirectRegPlusC { n } => { + write!(f, "[R{}, C]", n) + } + R0Offset { off } => { + if off < &10 { + write!(f, "[{}]", off) + } else { + write!(f, "[{:x}h]", off) + } + } + } + } +} + +impl fmt::Debug for crate::Opcode { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + ::fmt(self, f) + } +} + +impl fmt::Display for crate::Opcode { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use crate::Opcode::*; + match self { + ADD => f.write_str("add"), + ADDC => f.write_str("addc"), + ADDDC => f.write_str("adddc"), + AND => f.write_str("and"), + BE => f.write_str("be"), + BN => f.write_str("bn"), + BNE => f.write_str("bne"), + BNM => f.write_str("bnm"), + BNZ => f.write_str("bnz"), + BNZW => f.write_str("bnzw"), + BP => f.write_str("bp"), + BPC => f.write_str("bpc"), + BPM => f.write_str("bpm"), + BR => f.write_str("br"), + BZ => f.write_str("bz"), + CALL => f.write_str("call"), + CLR1 => f.write_str("clr1"), + CLR1M => f.write_str("clr1m"), + DBNZ => f.write_str("dbnz"), + DBZ => f.write_str("dbz"), + DEC => f.write_str("dec"), + DECL => f.write_str("decl"), + DECW => f.write_str("decw"), + DIV16 => f.write_str("div16"), + DIV24 => f.write_str("div24"), + FADD => f.write_str("fadd"), + FADDC => f.write_str("faddc"), + FADDCW => f.write_str("faddcw"), + FADDW => f.write_str("faddw"), + FAND => f.write_str("fand"), + FANDW => f.write_str("fandw"), + FNOR => f.write_str("fnor"), + FNORW => f.write_str("fnorw"), + FOR => f.write_str("for"), + FORW => f.write_str("forw"), + FSUB => f.write_str("fsub"), + FSUBC => f.write_str("fsubc"), + FSUBCW => f.write_str("fsubcw"), + FSUBW => f.write_str("fsubw"), + FXOR => f.write_str("fxor"), + FXORW => f.write_str("fxorw"), + INC => f.write_str("inc"), + INCL => f.write_str("incl"), + INCW => f.write_str("incw"), + JMP => f.write_str("jmp"), + LD => f.write_str("ld"), + LDCW => f.write_str("ldcw"), + LDW => f.write_str("ldw"), + LDX => f.write_str("ldx"), + MOV => f.write_str("mov"), + MUL16 => f.write_str("mul16"), + MUL24 => f.write_str("mul24"), + NOP => f.write_str("nop"), + NOT1 => f.write_str("not1"), + NOT1M => f.write_str("not1m"), + OR => f.write_str("or"), + POP => f.write_str("pop"), + POPW => f.write_str("popw"), + POP_BA => f.write_str("pop_ba"), + POP_P => f.write_str("pop_p"), + PUHS_BA => f.write_str("puhs_ba"), + PUSH => f.write_str("push"), + PUSHW => f.write_str("pushw"), + PUSH_P => f.write_str("push_p"), + RCALL => f.write_str("rcall"), + RCALLA => f.write_str("rcalla"), + RET => f.write_str("ret"), + RETI => f.write_str("reti"), + ROL => f.write_str("rol"), + ROLC => f.write_str("rolc"), + ROR => f.write_str("ror"), + RORC => f.write_str("rorc"), + SET1 => f.write_str("set1"), + SET1M => f.write_str("set1m"), + ST => f.write_str("st"), + STW => f.write_str("stw"), + STX => f.write_str("stx"), + SUB => f.write_str("sub"), + SUBC => f.write_str("subc"), + XCH => f.write_str("xch"), + XCHW => f.write_str("xchw"), + } + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..6469dc5 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,517 @@ +//! # `yaxpeax-lc87`, a decoder for the LC87 instruction set +//! +//! the LC87 instruction set is used in the LC87 series of microcontrollers, originally developed +//! by Sanyo, whose semiconductor division was acquired by [ON Semiconductor in +//! 2011](https://www.onsemi.com/PowerSolutions/newsItem.do?article=2458). LC87 parts are typically +//! named `LC87*`, are all 8-bit controllers, and range between 10-pin 8kb-of-flash and 100-pin +//! 256kb-of-flash sizes. +//! +//! in theory there exists an `LC87 Series Users's Manual` but it appears to have never existed online in original Japanese, or English translation. (the existence of an English translation is suspected but unconfirmed). by coincidence, LC87 instructions are described in the public `LC872H00` datasheet, describing specifically `LC872H00` parts. because the instruction set is shared across the LC87 family of microcontrollers, the instruction set listing in this manual describes the instruction set of the rest of the family. +//! +//! datasheet: [`ANDLC872H00-D.PDF`](https://www.onsemi.com/pub/Collateral/ANDLC872H00-D.PDF). +//! `sha256: 9cefe73a252468bbbfb81a28e59cb9444c4c49586a616c873958b39ad4fa7b35` +//! +//! ## usage +//! +//! the fastest way to decode an lc87 instruction is through +//! [`InstDecoder::decode_slice()`]: +//! ``` +//! use yaxpeax_lc87::InstDecoder; +//! +//! let inst = InstDecoder::decode_slice(&[0x0a, 0x10, 0x3f]).unwrap(); +//! +//! assert_eq!("bp 0010h, 2, $+0x3f", inst.to_string()); +//! ``` +//! +//! opcodes and operands are available on the decoded instruction, as well as its length and +//! operand count: +//! ``` +//! use yaxpeax_lc87::{InstDecoder, Operand}; +//! +//! let inst = InstDecoder::decode_slice(&[0x0a, 0x10, 0x3f]).unwrap(); +//! +//! assert_eq!("bp 0010h, 2, $+0x3f", inst.to_string()); +//! assert_eq!(inst.operand_count(), 3); +//! assert_eq!(inst.len(), 3); +//! assert_eq!(inst.operand(0).unwrap(), Operand::AbsU16 { addr: 0x0010 }); +//! assert_eq!(inst.operand(1).unwrap(), Operand::BitIndex { index: 2 }); +//! ``` +//! +//! additionally, `yaxpeax-lc87` implements `yaxpeax-arch` traits for generic use, such as +//! [`yaxpeax_arch::LengthedInstruction`]. [`yaxpeax_arch::Arch`] is implemented by +//! the unit struct [`LC87`]. +//! +//! ## `#![no_std]` +//! +//! `yaxpeax-lc87` should support `no_std` usage, but this is entirely untested. + +#![no_std] + +mod display; + +use yaxpeax_arch::{AddressDiff, Arch, Decoder, LengthedInstruction, Reader, StandardDecodeError}; + +/// a trivial struct for [`yaxpeax_arch::Arch`] to be implemented on. it's only interesting for the +/// associated type parameters. +#[derive(Hash, Eq, PartialEq, Debug, Copy, Clone)] +pub struct LC87; + +impl Arch for LC87 { + type Address = u16; + type Word = u8; + type Instruction = Instruction; + type Decoder = InstDecoder; + type DecodeError = StandardDecodeError; + type Operand = Operand; +} + +/// an `lc87` instruction. +/// +/// `lc87` instructions have an [`Opcode`] and up to three [`Operand`]s. they are no more than four +/// bytes long. +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub struct Instruction { + opcode: Opcode, + operands: [Operand; 3], + operand_count: u8, + length: u8, +} + +impl Default for Instruction { + fn default() -> Instruction { + Instruction { + opcode: Opcode::NOP, + operands: [Operand::Nothing, Operand::Nothing, Operand::Nothing], + operand_count: 0, + length: 0, + } + } +} + +impl Instruction { + fn reset_operands(&mut self) { + self.operands = [Operand::Nothing, Operand::Nothing, Operand::Nothing]; + self.operand_count = 0; + } + + fn with_operand(&mut self, operand: Operand) { + self.operands[self.operand_count as usize] = operand; + self.operand_count += 1; + } + + pub fn len(&self) -> u8 { + self.length + } + + pub fn operand_count(&self) -> u8 { + self.operand_count + } + + pub fn operand(&self, idx: u8) -> Option { + self.operands.get(idx as usize).cloned() + } +} + +impl LengthedInstruction for Instruction { + type Unit = AddressDiff<::Address>; + fn min_size() -> Self::Unit { + AddressDiff::from_const(1) + } + fn len(&self) -> Self::Unit { + AddressDiff::from_const(self.length as u16) + } +} + +impl yaxpeax_arch::Instruction for Instruction { + fn well_defined(&self) -> bool { true } +} + +/// an operand for an `lc87` instruction. +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub enum Operand { + /// no operand in this position. + /// + /// reaching this as a user of `yaxpeax_lc87` is almost certainly a bug. `Instruction::operand` + /// will return `None` rather than `Operand::Nothing`. + Nothing, + /// branch to the absolute address `addr`. + BranchAbsU17 { addr: u32 }, + /// branch to the relative address `rel` (`PC` + `inst.len()` + `rel`). + BranchRelU12 { rel: u16 }, // in practice it looks like all branches are forward? + /// an 8-bit immediate. + /// + /// the meaning of this immediate is opcode-dependent, but usually a value used for a bitwise + /// or arithmetic operation. + ImmU8 { imm: u8 }, + /// a 16-bit immediate. + /// + /// this is only used for the wide `mov` that loads both `A` and `B` registers at once. `A` + /// gets the low byte, `B` gets the high byte. + ImmU16 { imm: u16 }, + /// a memory access to an absolute 16-bit address. + AbsU16 { addr: u16 }, + /// an address of a bit in some byte. + /// + /// this is coupled with some memory operand which specifies the byte in question. the usage of + /// the bit selected by address/bit varies by opcode. + BitIndex { index: u8 }, + /// a memory access to the address specified by indirect register `Rn`. + /// + /// n may only be in the range `[0, 63]`, inclusive. indirect registers are pairs of bytes `n * + /// 2` and `n * 2 + 1` from zero. for example, indirect register 5 would select the address + /// formed by the word at memory `0x0a` and `0x0b`. + IndirectReg { n: u8 }, + /// a memory access to the address specified by indirect register `Rn` plus signed displacement + /// from register `C`. + /// + /// n may only be in the range `[0, 63]`, inclusive. indirect registers are pairs of bytes `n * + /// 2` and `n * 2 + 1` from zero. for example, indirect register 5 would select the address + /// formed by the word at memory `0x0a` and `0x0b`. + IndirectRegPlusC { n: u8 }, + /// a memory access to the address specified by indirect register `R0` plus the signed offset + /// `off`. + /// + /// `off` may only be in the range `[-64, 64]`, inclusive. + R0Offset { off: i8 }, +} + +/// an `lc87` instruction's operation. +/// +/// instruction descriptions are best referenced from the +/// [`lc87` manual](https://www.onsemi.com/pub/Collateral/ANDLC872H00-D.PDF) for the moment. +#[allow(non_camel_case_types)] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub enum Opcode { + ADD, + ADDC, + ADDDC, + AND, + BE, + BN, + BNE, + BNM, + BNZ, + BNZW, + BP, + BPC, + BPM, + BR, + BZ, + CALL, + CLR1, + CLR1M, + DBNZ, + DBZ, + DEC, + DECL, + DECW, + DIV16, + DIV24, + FADD, + FADDC, + FADDCW, + FADDW, + FAND, + FANDW, + FNOR, + FNORW, + FOR, + FORW, + FSUB, + FSUBC, + FSUBCW, + FSUBW, + FXOR, + FXORW, + INC, + INCL, + INCW, + JMP, + LD, + LDCW, + LDW, + LDX, + MOV, + MUL16, + MUL24, + NOP, + NOT1, + NOT1M, + OR, + POP, + POPW, + POP_BA, + POP_P, + PUHS_BA, + PUSH, + PUSHW, + PUSH_P, + RCALL, + RCALLA, + RET, + RETI, + ROL, + ROLC, + ROR, + RORC, + SET1, + SET1M, + ST, + STW, + STX, + SUB, + SUBC, + XCH, + XCHW, +} + +/// an `lc87` instruction decoder. +/// +/// there are no decode options for `lc87`, so this is a trivial struct that exists only for the +/// [`yaxpeax_arch::Decoder`] trait impl. +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct InstDecoder { } + +impl InstDecoder { + /// decode a slice of bytes into an instruction (or error) + /// + /// this is just a higher-level interface to the [`InstDecoder`] impl of + /// [`yaxpeax_arch::Decoder`]. + pub fn decode_slice(data: &[u8]) -> Result::DecodeError> { + use yaxpeax_arch::U8Reader; + + InstDecoder::default() + .decode(&mut U8Reader::new(data)) + } +} + +impl Default for InstDecoder { + fn default() -> Self { + InstDecoder { } + } +} + +impl Decoder for InstDecoder { + fn decode_into::Address, ::Word>>(&self, inst: &mut Instruction, words: &mut T) -> Result<(), ::DecodeError> { + inst.length = 0; + inst.reset_operands(); + words.mark(); + let word = words.next()?; + + use Opcode::*; + use Operand::*; + const fn invalid() -> Result { + Err(StandardDecodeError::InvalidOpcode) + } + + let low = word & 0x0f; + if low == 0 { + const LOW_ZERO_OPCODE_TABLE: [Result; 16] = [ + Ok(NOP), Ok(RCALLA), Ok(JMP), Ok(CALL), + Ok(NOP), invalid(), Ok(PUHS_BA),Ok(POP_BA), + Ok(PUSH_P), Ok(POP_P), Ok(RET), Ok(RETI), + Ok(ROR), Ok(RORC), Ok(ROL), Ok(ROLC), + ]; + inst.opcode = LOW_ZERO_OPCODE_TABLE[(word as usize) >> 4]?; + if word == 0x20 || word == 0x30 { + inst.with_operand(BranchAbsU17 { + addr: + ((word as u32 & 1) << 16) | + ((words.next()? as u32) << 8) | + words.next()? as u32 + }); + } else if word == 0x40 { + let mut selector = words.next()?; + selector = (selector << 2) | (selector >> 6); + if selector > 3 { + return Err(StandardDecodeError::InvalidOpcode); + } else { + inst.opcode = [DIV16, MUL16, DIV24, MUL24][selector as usize]; + } + } + } else if low == 1 { + const LOW_ONE_OPCODE_TABLE: [Opcode; 16] = [ + BE, BNE, JMP, CALL, + BNZ, BNZW, PUSH, LDX, + LD, STX, ADD, ADDDC, + SUB, SUBC, OR, AND, + ]; + inst.opcode = LOW_ONE_OPCODE_TABLE[(word as usize) >> 4]; + if word < 0x20 { // word == 0x01 || word == 0x11 + inst.with_operand(ImmU8 { imm: words.next()? }); + inst.with_operand(BranchRelU12 { rel: words.next()? as u16 }); + } else if word < 0x40 { // word == 0x21 || word == 0x31 + inst.with_operand(BranchAbsU17 { + addr: + ((word as u32 & 1) << 16) | + ((words.next()? as u32) << 8) | + words.next()? as u32 + }); + } else if word < 0x60 { // word == 0x41 || word == 0x51 + inst.with_operand(BranchRelU12 { rel: words.next()? as u16 }); + } else if word == 0x71 || word == 0x91 { + inst.with_operand(typical_operand_decode(2, words)?); + } else { + inst.with_operand(ImmU8 { imm: words.next()? }); + } + } else if low <= 6 { + const LOW_OPCODE_TABLE: [Opcode; 16] = [ + BE, BNE, DBNZ, DBZ, + MOV, XCH, PUSH, POP, + LD, ST, ADD, ADDC, + SUB, SUBC, OR, AND, + ]; + + inst.opcode = LOW_OPCODE_TABLE[(word as usize) >> 4]; + inst.with_operand(typical_operand_decode(word & 7, words)?); + if inst.opcode == MOV { + inst.with_operand(ImmU8 { imm: words.next()? }); + let (op_0, op_1) = inst.operands.split_at_mut(1); + core::mem::swap(&mut op_0[0], &mut op_1[0]); + } + } else if low == 7 { + const LOW_SEVEN_OPCODE_TABLE: [Opcode; 16] = [ + LDW, STW, PUSHW, POPW, + LDW, XCHW, PUSHW, POPW, + LDW, STW, BPC, NOT1M, + BNM, CLR1M, BPM, SET1M, + ]; + inst.opcode = LOW_SEVEN_OPCODE_TABLE[(word as usize) >> 4]; + + if word < 0x40 { // 0x07, 0x17, 0x27, 0x37 + inst.with_operand(typical_operand_decode(2, words)?); + } else if word < 0x50 { // 0x47 + inst.with_operand(ImmU16 { + imm: (words.next()? as u16) | ((words.next()? as u16) << 8) + }) + } else if word < 0xa0 { // 0x57, 0x67, 0x77, 0x87, 0x97 + inst.with_operand(typical_operand_decode(6, words)?); + } else { + let low = words.next()?; + let high = words.next()?; + + inst.with_operand(AbsU16 { addr: ((high as u16 & 0x3f) << 8) | (low as u16) }); + inst.with_operand(BitIndex { index: high >> 5 }); + let (op_0, op_1) = inst.operands.split_at_mut(1); + core::mem::swap(&mut op_0[0], &mut op_1[0]); + if word & 0b0000_0000 == 0 { + inst.with_operand(BranchRelU12 { rel: words.next()? as u16 }); + } + } + } else { // the upper half of opcode space by low nibble: 0bXXXX_1XXX. + // there's a batch of opcodes that use the low three bits as operands, another large + // chunk of instruction space + let opc_bits = word >> 5; + const HIGH_OPCODE_TABLE: [Option; 8] = [ + Some(BP), Some(BN), Some(RCALL), Some(BR), + None, Some(NOT1), Some(CLR1), Some(SET1), + ]; + if let Some(opc) = HIGH_OPCODE_TABLE[opc_bits as usize] { + inst.opcode = opc; + if opc_bits == 2 || opc_bits == 3 { + inst.with_operand(BranchRelU12 { + rel: + ((word as u16) & 0b00001_0000) << 7 | + ((word as u16) & 0b00000_0111) << 8 | + words.next()? as u16 + }); + } else { + let mut addr = words.next()? as u16; + if word & 0b0001_0000 != 0 { + addr += 0xfe00; + } + inst.with_operand(AbsU16 { addr }); + inst.with_operand(BitIndex { index: word & 7 }); + + if word < 0x40 { + inst.with_operand(BranchRelU12 { rel: words.next()? as u16 }); + } + } + } else { + if word < 0x90 { + if word < 0x89 { + inst.opcode = LDCW; + inst.with_operand(typical_operand_decode(2, words)?); + } else if word < 0x8a { + inst.opcode = BZ; + inst.with_operand(BranchRelU12 { rel: words.next()? as u16 }); + } else if word < 0x8e { + inst.opcode = INC; + inst.with_operand(typical_operand_decode(word & 7, words)?); + } else if word < 0x8f { + inst.opcode = INCL; + inst.with_operand(typical_operand_decode(6, words)?); + } else { + inst.opcode = INCW; + inst.with_operand(typical_operand_decode(word & 7, words)?); + } + } else { + if word < 0x99 { + const OPC_98_TABLE: [Opcode; 16] = [ + FADD, FADDC, FSUB, FSUBC, + FNOR, FAND, FOR, FXOR, + FADDW, FADDCW, FSUBW, FSUBCW, + FNORW, FANDW, FORW, FXORW, + ]; + let word = words.next()? as usize; + let word = (word >> 4) | (word << 4); + if word >= 16 { + return Err(StandardDecodeError::InvalidOpcode); + } + inst.opcode = OPC_98_TABLE[word as usize]; + } else if word < 0x9a { + inst.opcode = BNZ; + inst.with_operand(BranchRelU12 { rel: words.next()? as u16 }); + } else if word < 0x9e { + inst.opcode = DEC; + inst.with_operand(typical_operand_decode(word & 7, words)?); + } else if word < 0x9f { + inst.opcode = DECL; + inst.with_operand(typical_operand_decode(6, words)?); + } else { + inst.opcode = DECW; + inst.with_operand(typical_operand_decode(word & 7, words)?); + } + } + } + } + + inst.length = words.offset() as u8; + Ok(()) + } +} + +fn typical_operand_decode::Address, ::Word>>(operand_kind: u8, words: &mut T) -> Result::DecodeError> { + // all of these are at least one additional word.. + let operand = words.next()?; + + match operand_kind { + 2 => { + if operand < 0x80 { + if operand & 1 == 0 { + Ok(Operand::IndirectReg { n: (operand >> 1) & 0x3f }) + } else { + Ok(Operand::IndirectRegPlusC { n: (operand >> 1) & 0x3f }) + } + } else { + Ok(Operand::R0Offset { off: (((operand & 0x7f) as i8) << 1) >> 1 }) + } + } + 3 => { + Ok(Operand::AbsU16 { addr: operand as u16 + 0xfe00 }) + } + 4 => { + Ok(Operand::AbsU16 { addr: operand as u16 }) + } + 5 => { + Ok(Operand::AbsU16 { addr: operand as u16 + 0x100 }) + } + 6 => { + let high = words.next()?; + Ok(Operand::AbsU16 { addr: operand as u16 | ((high as u16) << 8) }) + } + _ => { + unreachable!() + } + } +} diff --git a/test/test.rs b/test/test.rs new file mode 100644 index 0000000..3fe9dbe --- /dev/null +++ b/test/test.rs @@ -0,0 +1,31 @@ +use yaxpeax_arch::Decoder; + +fn test_display(data: &[u8], expected: &'static str) { + let mut reader = yaxpeax_arch::U8Reader::new(data); + match yaxpeax_lc87::InstDecoder::default().decode(&mut reader) { + Ok(instr) => { + let displayed = instr.to_string(); + assert_eq!(&displayed, expected); + assert_eq!(data.len() as u8, instr.len()); + } + Err(e) => { + panic!("failed to decode {:02x?}: {}", data, e); + } + } +} + +#[test] +fn test_disassembly() { +// test_display(&[0x43, 0x0a, 0x1f], "mov #13h, spl"); + test_display(&[0x43, 0x0a, 0x1f], "mov #1fh, fe0ah"); +// test_display(&[0x43, 0x0b, 0x00], "mov #00h, sph"); + test_display(&[0x43, 0x0b, 0x00], "mov #00h, fe0bh"); + test_display(&[0x47, 0x34, 0x12], "ldw #1234h"); + test_display(&[0x47, 0x78, 0x56], "ldw #5678h"); + test_display(&[0x97, 0x12, 0xfe], "stw fe12h"); + test_display(&[0x97, 0xc0, 0x00], "stw 00c0h"); + test_display(&[0x49, 0x00], "rcall $+0x100"); + test_display(&[0x59, 0x00], "rcall $+0x900"); + test_display(&[0x08, 0x10, 0x3f], "bp 0010h, 0, $+0x3f"); + test_display(&[0x0a, 0x10, 0x3f], "bp 0010h, 2, $+0x3f"); +} -- cgit v1.1