aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoriximeow <me@iximeow.net>2021-10-31 12:37:07 -0700
committeriximeow <me@iximeow.net>2021-10-31 12:37:07 -0700
commita54ad2dffffc8c66917c75786045f7c0cdc7d5eb (patch)
tree5eca16ddb97d85fc288e0722039b004549539245
parentf7df8cf15d5ff4a8ede83bad9ea0c2ba7dbd8040 (diff)
SIMD load/store (multiple structure)
in addition to the decoding support, objdump reporting of `{}` selection of multiple registers seems to be inconsistent. stick to the manual's preferred `{v1, v2, v3, v4}` nomenclature instead of `{v1-v4}`. reorder a few tests in test_openblas_simd_loadstore to group instructions by decode category
-rw-r--r--src/armv8/a64.rs117
-rw-r--r--test/armv8/a64.rs60
2 files changed, 145 insertions, 32 deletions
diff --git a/src/armv8/a64.rs b/src/armv8/a64.rs
index e62d3b6..da0df7c 100644
--- a/src/armv8/a64.rs
+++ b/src/armv8/a64.rs
@@ -3644,11 +3644,124 @@ impl Decoder<ARMv8> for InstDecoder {
},
0b00100 => {
// AdvSIMD load/store multiple structures
- return Err(DecodeError::IncompleteDecoder);
+ let Rt = word & 0x1f;
+ let Rn = (word >> 5) & 0x1f;
+ let size = (word >> 10) & 0x03;
+ let opcode_bits = (word >> 12) & 0x0f;
+ let Rm = (word >> 16) & 0x1f;
+ if Rm != 0 {
+ return Err(DecodeError::InvalidOperand);
+ }
+ let L = (word >> 22) & 0x01;
+ let Q = (word >> 30) & 0x01;
+ let datasize = if Q == 1 { SIMDSizeCode::Q } else { SIMDSizeCode::D };
+
+ const OPCODES: &[Result<(Opcode, u8), DecodeError>] = &[
+ // opcode == 0b0000
+ Ok((Opcode::ST4, 4)),
+ Err(DecodeError::InvalidOpcode),
+ Ok((Opcode::ST1, 4)),
+ Err(DecodeError::InvalidOpcode),
+ // opcode == 0b0100
+ Ok((Opcode::ST3, 3)),
+ Err(DecodeError::InvalidOpcode),
+ Ok((Opcode::ST1, 3)),
+ Ok((Opcode::ST1, 1)),
+ // opcode == 0b1000
+ Ok((Opcode::ST2, 2)),
+ Err(DecodeError::InvalidOpcode),
+ Ok((Opcode::ST1, 2)),
+ Err(DecodeError::InvalidOpcode),
+ ];
+
+ let (opcode, num_regs) = OPCODES[opcode_bits as usize]?;
+
+ inst.opcode = if L == 0 {
+ opcode
+ } else {
+ if opcode == Opcode::ST1 {
+ Opcode::LD1
+ } else if opcode == Opcode::ST2 {
+ Opcode::LD2
+ } else if opcode == Opcode::ST3 {
+ Opcode::LD3
+ } else {
+ Opcode::LD4
+ }
+ };
+ const SIZES: [SIMDSizeCode; 4] = [
+ SIMDSizeCode::B,
+ SIMDSizeCode::H,
+ SIMDSizeCode::S,
+ SIMDSizeCode::D,
+ ];
+ inst.operands = [
+ Operand::SIMDRegisterGroup(datasize, Rt as u16, SIZES[size as usize], num_regs),
+ Operand::RegPostIndex(Rn as u16, 0),
+ Operand::Nothing,
+ Operand::Nothing,
+ ];
},
0b00101 => {
// AdvSIMD load/store multiple structures (post-indexed)
- return Err(DecodeError::IncompleteDecoder);
+ let Rt = word & 0x1f;
+ let Rn = (word >> 5) & 0x1f;
+ let size = (word >> 10) & 0x03;
+ let opcode_bits = (word >> 12) & 0x0f;
+ let Rm = (word >> 16) & 0x1f;
+ let L = (word >> 22) & 0x01;
+ let Q = (word >> 30) & 0x01;
+ let datasize = if Q == 1 { SIMDSizeCode::Q } else { SIMDSizeCode::D };
+
+ const OPCODES: &[Result<(Opcode, u8), DecodeError>] = &[
+ // opcode == 0b0000
+ Ok((Opcode::ST4, 4)),
+ Err(DecodeError::InvalidOpcode),
+ Ok((Opcode::ST1, 4)),
+ Err(DecodeError::InvalidOpcode),
+ // opcode == 0b0100
+ Ok((Opcode::ST3, 3)),
+ Err(DecodeError::InvalidOpcode),
+ Ok((Opcode::ST1, 3)),
+ Ok((Opcode::ST1, 1)),
+ // opcode == 0b1000
+ Ok((Opcode::ST2, 2)),
+ Err(DecodeError::InvalidOpcode),
+ Ok((Opcode::ST1, 2)),
+ Err(DecodeError::InvalidOpcode),
+ ];
+
+ let (opcode, num_regs) = OPCODES[opcode_bits as usize]?;
+
+ inst.opcode = if L == 0 {
+ opcode
+ } else {
+ if opcode == Opcode::ST1 {
+ Opcode::LD1
+ } else if opcode == Opcode::ST2 {
+ Opcode::LD2
+ } else if opcode == Opcode::ST3 {
+ Opcode::LD3
+ } else {
+ Opcode::LD4
+ }
+ };
+ const SIZES: [SIMDSizeCode; 4] = [
+ SIMDSizeCode::B,
+ SIMDSizeCode::H,
+ SIMDSizeCode::S,
+ SIMDSizeCode::D,
+ ];
+ inst.operands = [
+ Operand::SIMDRegisterGroup(datasize, Rt as u16, SIZES[size as usize], num_regs),
+ if Rm == 31 {
+ Operand::RegPostIndex(Rn as u16, (datasize.width() * (num_regs as u16)) as i32)
+ } else {
+ Operand::RegPostIndexReg(Rn as u16, Rm as u16)
+ },
+ Operand::Nothing,
+ Operand::Nothing,
+ ];
},
0b00110 => {
// AdvSIMD load/store single structure
diff --git a/test/armv8/a64.rs b/test/armv8/a64.rs
index 4d2815a..405c550 100644
--- a/test/armv8/a64.rs
+++ b/test/armv8/a64.rs
@@ -2722,12 +2722,23 @@ fn test_openblas_bitwise() {
#[test]
fn test_openblas_simd_loadstore() {
const TESTS: &[([u8; 4], &'static str)] = &[
- ([0x88, 0x28, 0x00, 0x0c], "st1 {v8.2s-v11.2s}, [x4]"),
- ([0x80, 0x2c, 0x00, 0x0c], "st1 {v0.1d-v3.1d}, [x4]"),
- ([0x84, 0x2c, 0x00, 0x0c], "st1 {v4.1d-v7.1d}, [x4]"),
- ([0x80, 0x2d, 0x00, 0x0c], "st1 {v0.1d-v3.1d}, [x12]"),
- ([0x20, 0x2e, 0x00, 0x0c], "st1 {v0.1d-v3.1d}, [x17]"),
- ([0x24, 0x2e, 0x00, 0x0c], "st1 {v4.1d-v7.1d}, [x17]"),
+ ([0x00, 0xd8, 0x21, 0x5e], "scvtf s0, s0"),
+ ([0x82, 0xd8, 0x61, 0x5e], "scvtf d2, d4"),
+ ([0x01, 0x00, 0x62, 0x9e], "scvtf d1, x0"),
+ ([0x03, 0x00, 0x62, 0x9e], "scvtf d3, x0"),
+ ([0x69, 0x03, 0x62, 0x9e], "scvtf d9, x27"),
+ ([0x88, 0x03, 0x62, 0x9e], "scvtf d8, x28"),
+ ([0x22, 0x00, 0x22, 0x1e], "scvtf s2, w1"),
+ ([0xac, 0x02, 0x22, 0x1e], "scvtf s12, w21"),
+ ([0x00, 0x00, 0x62, 0x1e], "scvtf d0, w0"),
+ ([0x8a, 0x03, 0x62, 0x1e], "scvtf d10, w28"),
+ ([0x03, 0xe4, 0x00, 0x2f], "movi d3, 0x0"),
+ ([0x88, 0x28, 0x00, 0x0c], "st1 {v8.2s, v9.2s, v10.2s, v11.2s}, [x4]"),
+ ([0x80, 0x2c, 0x00, 0x0c], "st1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x4]"),
+ ([0x84, 0x2c, 0x00, 0x0c], "st1 {v4.1d, v5.1d, v6.1d, v7.1d}, [x4]"),
+ ([0x80, 0x2d, 0x00, 0x0c], "st1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x12]"),
+ ([0x20, 0x2e, 0x00, 0x0c], "st1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x17]"),
+ ([0x24, 0x2e, 0x00, 0x0c], "st1 {v4.1d, v5.1d, v6.1d, v7.1d}, [x17]"),
([0x60, 0x78, 0x00, 0x0c], "st1 {v0.2s}, [x3]"),
([0x62, 0x78, 0x00, 0x0c], "st1 {v2.2s}, [x3]"),
([0x88, 0x79, 0x00, 0x0c], "st1 {v8.2s}, [x12]"),
@@ -2763,9 +2774,9 @@ fn test_openblas_simd_loadstore() {
([0x22, 0x84, 0xc2, 0x0d], "ld1 {v2.d}[0], [x1], x2"),
([0x61, 0x80, 0xc4, 0x0d], "ld1 {v1.s}[0], [x3], x4"),
([0x24, 0xc9, 0xdf, 0x0d], "ld1r {v4.2s}, [x9], 0x4"), // TODO: could use a test for "ld1r {v4.2s}, [x9]"
- ([0x88, 0x28, 0x00, 0x4c], "st1 {v8.4s-v11.4s}, [x4]"),
- ([0x60, 0x2d, 0x00, 0x4c], "st1 {v0.2d-v3.2d}, [x11]"),
- ([0x9c, 0x2e, 0x00, 0x4c], "st1 {v28.2d-v31.2d}, [x20]"),
+ ([0x88, 0x28, 0x00, 0x4c], "st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x4]"),
+ ([0x60, 0x2d, 0x00, 0x4c], "st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x11]"),
+ ([0x9c, 0x2e, 0x00, 0x4c], "st1 {v28.2d, v29.2d, v30.2d, v31.2d}, [x20]"),
([0x60, 0x7c, 0x00, 0x4c], "st1 {v0.2d}, [x3]"),
([0x62, 0x7c, 0x00, 0x4c], "st1 {v2.2d}, [x3]"),
([0x88, 0x7d, 0x00, 0x4c], "st1 {v8.2d}, [x12]"),
@@ -2779,12 +2790,12 @@ fn test_openblas_simd_loadstore() {
([0x66, 0xad, 0x00, 0x4c], "st1 {v6.2d, v7.2d}, [x11]"),
([0xc8, 0xad, 0x00, 0x4c], "st1 {v8.2d, v9.2d}, [x14]"),
([0xec, 0xad, 0x00, 0x4c], "st1 {v12.2d, v13.2d}, [x15]"),
- ([0x21, 0x28, 0x40, 0x4c], "ld1 {v1.4s-v4.4s}, [x1]"),
- ([0x22, 0x2c, 0x40, 0x4c], "ld1 {v2.2d-v5.2d}, [x1]"),
- ([0x61, 0x2c, 0x40, 0x4c], "ld1 {v1.2d-v4.2d}, [x3]"),
- ([0xb0, 0x2c, 0x40, 0x4c], "ld1 {v16.2d-v19.2d}, [x5]"),
- ([0x80, 0x2d, 0x40, 0x4c], "ld1 {v0.2d-v3.2d}, [x12]"),
- ([0xa4, 0x2d, 0x40, 0x4c], "ld1 {v4.2d-v7.2d}, [x13]"),
+ ([0x21, 0x28, 0x40, 0x4c], "ld1 {v1.4s, v2.4s, v3.4s, v4.4s}, [x1]"),
+ ([0x22, 0x2c, 0x40, 0x4c], "ld1 {v2.2d, v3.2d, v4.2d, v5.2d}, [x1]"),
+ ([0x61, 0x2c, 0x40, 0x4c], "ld1 {v1.2d, v2.2d, v3.2d, v4.2d}, [x3]"),
+ ([0xb0, 0x2c, 0x40, 0x4c], "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x5]"),
+ ([0x80, 0x2d, 0x40, 0x4c], "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x12]"),
+ ([0xa4, 0x2d, 0x40, 0x4c], "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x13]"),
([0x68, 0x79, 0x40, 0x4c], "ld1 {v8.4s}, [x11]"),
([0x6c, 0x79, 0x40, 0x4c], "ld1 {v12.4s}, [x11]"),
([0x00, 0x7e, 0x40, 0x4c], "ld1 {v0.2d}, [x16]"),
@@ -2801,8 +2812,8 @@ fn test_openblas_simd_loadstore() {
([0xa4, 0x8d, 0x40, 0x4c], "ld2 {v4.2d, v5.2d}, [x13]"),
([0xa6, 0x8d, 0x40, 0x4c], "ld2 {v6.2d, v7.2d}, [x13]"),
([0xa3, 0x7c, 0x86, 0x4c], "st1 {v3.2d}, [x5], x6"),
- ([0x61, 0x2c, 0x9f, 0x4c], "st1 {v1.2d-v4.2d}, [x3], 0x40"),
- ([0xb0, 0x2c, 0x9f, 0x4c], "st1 {v16.2d-v19.2d}, [x5], 0x40"),
+ ([0x61, 0x2c, 0x9f, 0x4c], "st1 {v1.2d, v2.2d, v3.2d, v4.2d}, [x3], 0x40"),
+ ([0xb0, 0x2c, 0x9f, 0x4c], "st1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x5], 0x40"),
([0x24, 0x78, 0x9f, 0x4c], "st1 {v4.4s}, [x1], 0x10"),
([0xa5, 0x7d, 0x9f, 0x4c], "st1 {v5.2d}, [x13], 0x10"),
([0xa4, 0x88, 0x9f, 0x4c], "st2 {v4.4s, v5.4s}, [x5], 0x20"),
@@ -2810,8 +2821,8 @@ fn test_openblas_simd_loadstore() {
([0xb0, 0xad, 0x9f, 0x4c], "st1 {v16.2d, v17.2d}, [x13], 0x20"),
([0x20, 0x7c, 0xc2, 0x4c], "ld1 {v0.2d}, [x1], x2"),
([0x46, 0x7d, 0xc6, 0x4c], "ld1 {v6.2d}, [x10], x6"),
- ([0x20, 0x0c, 0xdf, 0x4c], "ld4 {v0.2d-v3.2d}, [x1], 0x40"),
- ([0x51, 0x2d, 0xdf, 0x4c], "ld1 {v17.2d-v20.2d}, [x10], 0x40"),
+ ([0x20, 0x0c, 0xdf, 0x4c], "ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x1], 0x40"),
+ ([0x51, 0x2d, 0xdf, 0x4c], "ld1 {v17.2d, v18.2d, v19.2d, v20.2d}, [x10], 0x40"),
([0x20, 0x78, 0xdf, 0x4c], "ld1 {v0.4s}, [x1], 0x10"),
([0x21, 0x78, 0xdf, 0x4c], "ld1 {v1.4s}, [x1], 0x10"),
([0x46, 0x7d, 0xdf, 0x4c], "ld1 {v6.2d}, [x10], 0x10"),
@@ -2987,17 +2998,6 @@ fn test_openblas_simd_loadstore() {
([0x88, 0x7c, 0x00, 0x2d], "stp s8, s31, [x4]"),
([0x03, 0x84, 0x00, 0x2d], "stp s3, s1, [x0, 0x4]"),
([0x13, 0xdc, 0x3f, 0x2d], "stp s19, s23, [x0, -0x4]"),
- ([0x03, 0xe4, 0x00, 0x2f], "movi d3, 0x0"),
- ([0x00, 0xd8, 0x21, 0x5e], "scvtf s0, s0"),
- ([0x82, 0xd8, 0x61, 0x5e], "scvtf d2, d4"),
- ([0x01, 0x00, 0x62, 0x9e], "scvtf d1, x0"),
- ([0x03, 0x00, 0x62, 0x9e], "scvtf d3, x0"),
- ([0x69, 0x03, 0x62, 0x9e], "scvtf d9, x27"),
- ([0x88, 0x03, 0x62, 0x9e], "scvtf d8, x28"),
- ([0x22, 0x00, 0x22, 0x1e], "scvtf s2, w1"),
- ([0xac, 0x02, 0x22, 0x1e], "scvtf s12, w21"),
- ([0x00, 0x00, 0x62, 0x1e], "scvtf d0, w0"),
- ([0x8a, 0x03, 0x62, 0x1e], "scvtf d10, w28"),
];
let errs = run_tests(TESTS);