From a54ad2dffffc8c66917c75786045f7c0cdc7d5eb Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 31 Oct 2021 12:37:07 -0700 Subject: SIMD load/store (multiple structure) in addition to the decoding support, objdump reporting of `{}` selection of multiple registers seems to be inconsistent. stick to the manual's preferred `{v1, v2, v3, v4}` nomenclature instead of `{v1-v4}`. reorder a few tests in test_openblas_simd_loadstore to group instructions by decode category --- test/armv8/a64.rs | 60 +++++++++++++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) (limited to 'test/armv8') diff --git a/test/armv8/a64.rs b/test/armv8/a64.rs index 4d2815a..405c550 100644 --- a/test/armv8/a64.rs +++ b/test/armv8/a64.rs @@ -2722,12 +2722,23 @@ fn test_openblas_bitwise() { #[test] fn test_openblas_simd_loadstore() { const TESTS: &[([u8; 4], &'static str)] = &[ - ([0x88, 0x28, 0x00, 0x0c], "st1 {v8.2s-v11.2s}, [x4]"), - ([0x80, 0x2c, 0x00, 0x0c], "st1 {v0.1d-v3.1d}, [x4]"), - ([0x84, 0x2c, 0x00, 0x0c], "st1 {v4.1d-v7.1d}, [x4]"), - ([0x80, 0x2d, 0x00, 0x0c], "st1 {v0.1d-v3.1d}, [x12]"), - ([0x20, 0x2e, 0x00, 0x0c], "st1 {v0.1d-v3.1d}, [x17]"), - ([0x24, 0x2e, 0x00, 0x0c], "st1 {v4.1d-v7.1d}, [x17]"), + ([0x00, 0xd8, 0x21, 0x5e], "scvtf s0, s0"), + ([0x82, 0xd8, 0x61, 0x5e], "scvtf d2, d4"), + ([0x01, 0x00, 0x62, 0x9e], "scvtf d1, x0"), + ([0x03, 0x00, 0x62, 0x9e], "scvtf d3, x0"), + ([0x69, 0x03, 0x62, 0x9e], "scvtf d9, x27"), + ([0x88, 0x03, 0x62, 0x9e], "scvtf d8, x28"), + ([0x22, 0x00, 0x22, 0x1e], "scvtf s2, w1"), + ([0xac, 0x02, 0x22, 0x1e], "scvtf s12, w21"), + ([0x00, 0x00, 0x62, 0x1e], "scvtf d0, w0"), + ([0x8a, 0x03, 0x62, 0x1e], "scvtf d10, w28"), + ([0x03, 0xe4, 0x00, 0x2f], "movi d3, 0x0"), + ([0x88, 0x28, 0x00, 0x0c], "st1 {v8.2s, v9.2s, v10.2s, v11.2s}, [x4]"), + ([0x80, 0x2c, 0x00, 0x0c], "st1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x4]"), + ([0x84, 0x2c, 0x00, 0x0c], "st1 {v4.1d, v5.1d, v6.1d, v7.1d}, [x4]"), + ([0x80, 0x2d, 0x00, 0x0c], "st1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x12]"), + ([0x20, 0x2e, 0x00, 0x0c], "st1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x17]"), + ([0x24, 0x2e, 0x00, 0x0c], "st1 {v4.1d, v5.1d, v6.1d, v7.1d}, [x17]"), ([0x60, 0x78, 0x00, 0x0c], "st1 {v0.2s}, [x3]"), ([0x62, 0x78, 0x00, 0x0c], "st1 {v2.2s}, [x3]"), ([0x88, 0x79, 0x00, 0x0c], "st1 {v8.2s}, [x12]"), @@ -2763,9 +2774,9 @@ fn test_openblas_simd_loadstore() { ([0x22, 0x84, 0xc2, 0x0d], "ld1 {v2.d}[0], [x1], x2"), ([0x61, 0x80, 0xc4, 0x0d], "ld1 {v1.s}[0], [x3], x4"), ([0x24, 0xc9, 0xdf, 0x0d], "ld1r {v4.2s}, [x9], 0x4"), // TODO: could use a test for "ld1r {v4.2s}, [x9]" - ([0x88, 0x28, 0x00, 0x4c], "st1 {v8.4s-v11.4s}, [x4]"), - ([0x60, 0x2d, 0x00, 0x4c], "st1 {v0.2d-v3.2d}, [x11]"), - ([0x9c, 0x2e, 0x00, 0x4c], "st1 {v28.2d-v31.2d}, [x20]"), + ([0x88, 0x28, 0x00, 0x4c], "st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x4]"), + ([0x60, 0x2d, 0x00, 0x4c], "st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x11]"), + ([0x9c, 0x2e, 0x00, 0x4c], "st1 {v28.2d, v29.2d, v30.2d, v31.2d}, [x20]"), ([0x60, 0x7c, 0x00, 0x4c], "st1 {v0.2d}, [x3]"), ([0x62, 0x7c, 0x00, 0x4c], "st1 {v2.2d}, [x3]"), ([0x88, 0x7d, 0x00, 0x4c], "st1 {v8.2d}, [x12]"), @@ -2779,12 +2790,12 @@ fn test_openblas_simd_loadstore() { ([0x66, 0xad, 0x00, 0x4c], "st1 {v6.2d, v7.2d}, [x11]"), ([0xc8, 0xad, 0x00, 0x4c], "st1 {v8.2d, v9.2d}, [x14]"), ([0xec, 0xad, 0x00, 0x4c], "st1 {v12.2d, v13.2d}, [x15]"), - ([0x21, 0x28, 0x40, 0x4c], "ld1 {v1.4s-v4.4s}, [x1]"), - ([0x22, 0x2c, 0x40, 0x4c], "ld1 {v2.2d-v5.2d}, [x1]"), - ([0x61, 0x2c, 0x40, 0x4c], "ld1 {v1.2d-v4.2d}, [x3]"), - ([0xb0, 0x2c, 0x40, 0x4c], "ld1 {v16.2d-v19.2d}, [x5]"), - ([0x80, 0x2d, 0x40, 0x4c], "ld1 {v0.2d-v3.2d}, [x12]"), - ([0xa4, 0x2d, 0x40, 0x4c], "ld1 {v4.2d-v7.2d}, [x13]"), + ([0x21, 0x28, 0x40, 0x4c], "ld1 {v1.4s, v2.4s, v3.4s, v4.4s}, [x1]"), + ([0x22, 0x2c, 0x40, 0x4c], "ld1 {v2.2d, v3.2d, v4.2d, v5.2d}, [x1]"), + ([0x61, 0x2c, 0x40, 0x4c], "ld1 {v1.2d, v2.2d, v3.2d, v4.2d}, [x3]"), + ([0xb0, 0x2c, 0x40, 0x4c], "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x5]"), + ([0x80, 0x2d, 0x40, 0x4c], "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x12]"), + ([0xa4, 0x2d, 0x40, 0x4c], "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x13]"), ([0x68, 0x79, 0x40, 0x4c], "ld1 {v8.4s}, [x11]"), ([0x6c, 0x79, 0x40, 0x4c], "ld1 {v12.4s}, [x11]"), ([0x00, 0x7e, 0x40, 0x4c], "ld1 {v0.2d}, [x16]"), @@ -2801,8 +2812,8 @@ fn test_openblas_simd_loadstore() { ([0xa4, 0x8d, 0x40, 0x4c], "ld2 {v4.2d, v5.2d}, [x13]"), ([0xa6, 0x8d, 0x40, 0x4c], "ld2 {v6.2d, v7.2d}, [x13]"), ([0xa3, 0x7c, 0x86, 0x4c], "st1 {v3.2d}, [x5], x6"), - ([0x61, 0x2c, 0x9f, 0x4c], "st1 {v1.2d-v4.2d}, [x3], 0x40"), - ([0xb0, 0x2c, 0x9f, 0x4c], "st1 {v16.2d-v19.2d}, [x5], 0x40"), + ([0x61, 0x2c, 0x9f, 0x4c], "st1 {v1.2d, v2.2d, v3.2d, v4.2d}, [x3], 0x40"), + ([0xb0, 0x2c, 0x9f, 0x4c], "st1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x5], 0x40"), ([0x24, 0x78, 0x9f, 0x4c], "st1 {v4.4s}, [x1], 0x10"), ([0xa5, 0x7d, 0x9f, 0x4c], "st1 {v5.2d}, [x13], 0x10"), ([0xa4, 0x88, 0x9f, 0x4c], "st2 {v4.4s, v5.4s}, [x5], 0x20"), @@ -2810,8 +2821,8 @@ fn test_openblas_simd_loadstore() { ([0xb0, 0xad, 0x9f, 0x4c], "st1 {v16.2d, v17.2d}, [x13], 0x20"), ([0x20, 0x7c, 0xc2, 0x4c], "ld1 {v0.2d}, [x1], x2"), ([0x46, 0x7d, 0xc6, 0x4c], "ld1 {v6.2d}, [x10], x6"), - ([0x20, 0x0c, 0xdf, 0x4c], "ld4 {v0.2d-v3.2d}, [x1], 0x40"), - ([0x51, 0x2d, 0xdf, 0x4c], "ld1 {v17.2d-v20.2d}, [x10], 0x40"), + ([0x20, 0x0c, 0xdf, 0x4c], "ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x1], 0x40"), + ([0x51, 0x2d, 0xdf, 0x4c], "ld1 {v17.2d, v18.2d, v19.2d, v20.2d}, [x10], 0x40"), ([0x20, 0x78, 0xdf, 0x4c], "ld1 {v0.4s}, [x1], 0x10"), ([0x21, 0x78, 0xdf, 0x4c], "ld1 {v1.4s}, [x1], 0x10"), ([0x46, 0x7d, 0xdf, 0x4c], "ld1 {v6.2d}, [x10], 0x10"), @@ -2987,17 +2998,6 @@ fn test_openblas_simd_loadstore() { ([0x88, 0x7c, 0x00, 0x2d], "stp s8, s31, [x4]"), ([0x03, 0x84, 0x00, 0x2d], "stp s3, s1, [x0, 0x4]"), ([0x13, 0xdc, 0x3f, 0x2d], "stp s19, s23, [x0, -0x4]"), - ([0x03, 0xe4, 0x00, 0x2f], "movi d3, 0x0"), - ([0x00, 0xd8, 0x21, 0x5e], "scvtf s0, s0"), - ([0x82, 0xd8, 0x61, 0x5e], "scvtf d2, d4"), - ([0x01, 0x00, 0x62, 0x9e], "scvtf d1, x0"), - ([0x03, 0x00, 0x62, 0x9e], "scvtf d3, x0"), - ([0x69, 0x03, 0x62, 0x9e], "scvtf d9, x27"), - ([0x88, 0x03, 0x62, 0x9e], "scvtf d8, x28"), - ([0x22, 0x00, 0x22, 0x1e], "scvtf s2, w1"), - ([0xac, 0x02, 0x22, 0x1e], "scvtf s12, w21"), - ([0x00, 0x00, 0x62, 0x1e], "scvtf d0, w0"), - ([0x8a, 0x03, 0x62, 0x1e], "scvtf d10, w28"), ]; let errs = run_tests(TESTS); -- cgit v1.1