aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoriximeow <me@iximeow.net>2026-03-27 07:05:18 +0000
committeriximeow <me@iximeow.net>2026-05-25 00:59:27 +0000
commit6c32405ca9930f393d8ca45d22df1b5a1c7c8653 (patch)
tree3986d0c5a4544c1bebe35b8095cf3d04920e3f20
parent35358d573bccd0776e55187564ccd6c72c9f9c34 (diff)
more accurate mov seg-to-gpr operand size
-rw-r--r--CHANGELOG6
-rw-r--r--src/long_mode/mod.rs2
-rw-r--r--src/protected_mode/mod.rs2
-rw-r--r--test/long_mode/mod.rs10
-rw-r--r--test/protected_mode/mod.rs10
-rw-r--r--test/real_mode/mod.rs6
6 files changed, 34 insertions, 2 deletions
diff --git a/CHANGELOG b/CHANGELOG
index a4275f0..8d88ec7 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -10,6 +10,12 @@
* push-immediate, pushf, popf, enter, leave, and xlat now all report a correct memory
access size, fixing the prior behavior of reporting no memory access size at
all
+* 64-bit mode: mov seg-to-reg uses 32-bit GPRs for the destination rather than 16-bit.
+ * this is more accurate to the semantic of the instruction, which is why other disassemblers
+ report it this way; for register destinations specifically the segment selector is
+ zero-extended to 64 bits for storage. writing to "eax" in this way implies the 32->64 bit
+ zero-extend, whereas writing to "ax" does not imply any zero-extension. mov reg-to-seg
+ is unchanged and uses a 16-bit form for source GPR.
## 2.0.0
diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs
index 500c9f9..2971313 100644
--- a/src/long_mode/mod.rs
+++ b/src/long_mode/mod.rs
@@ -8106,7 +8106,7 @@ fn read_operands<
instruction.operand_count = 2;
if mem_oper == OperandSpec::RegMMM {
- instruction.regs[1].bank = RegisterBank::W;
+ instruction.regs[1].bank = RegisterBank::D;
} else {
instruction.mem_size = 2;
}
diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs
index 10bc9d6..6701071 100644
--- a/src/protected_mode/mod.rs
+++ b/src/protected_mode/mod.rs
@@ -7905,7 +7905,7 @@ fn read_operands<
instruction.operand_count = 2;
if mem_oper == OperandSpec::RegMMM {
- instruction.regs[1].bank = RegisterBank::W;
+ instruction.regs[1].bank = RegisterBank::D;
} else {
instruction.mem_size = 2;
}
diff --git a/test/long_mode/mod.rs b/test/long_mode/mod.rs
index a76bbda..dc82cd4 100644
--- a/test/long_mode/mod.rs
+++ b/test/long_mode/mod.rs
@@ -1252,12 +1252,22 @@ fn test_mov() {
test_display(&[0x0f, 0x97, 0x08], "seta byte [rax]");
// test_display(&[0xd6], "salc");
test_display(&[0x8e, 0x00], "mov es, word [rax]");
+ test_display(&[0x8e, 0xc0], "mov es, ax");
+ test_display(&[0x8c, 0xc0], "mov eax, es");
// cs is not an allowed destination
test_invalid(&[0x8e, 0x08]);
test_display(&[0x8e, 0x10], "mov ss, word [rax]");
+ test_display(&[0x8e, 0xd0], "mov ss, ax");
+ test_display(&[0x8c, 0xd0], "mov eax, ss");
test_display(&[0x8e, 0x18], "mov ds, word [rax]");
+ test_display(&[0x8e, 0xd8], "mov ds, ax");
+ test_display(&[0x8c, 0xd8], "mov eax, ds");
test_display(&[0x8e, 0x20], "mov fs, word [rax]");
+ test_display(&[0x8e, 0xe0], "mov fs, ax");
+ test_display(&[0x8c, 0xe0], "mov eax, fs");
test_display(&[0x8e, 0x28], "mov gs, word [rax]");
+ test_display(&[0x8e, 0xe8], "mov gs, ax");
+ test_display(&[0x8c, 0xe8], "mov eax, gs");
test_invalid(&[0x8e, 0x30]);
test_invalid(&[0x8e, 0x38]);
}
diff --git a/test/protected_mode/mod.rs b/test/protected_mode/mod.rs
index 29e9ec4..78a061a 100644
--- a/test/protected_mode/mod.rs
+++ b/test/protected_mode/mod.rs
@@ -1075,12 +1075,22 @@ fn test_mov() {
test_display(&[0x0f, 0x97, 0x08], "seta byte [eax]");
// test_display(&[0xd6], "salc");
test_display(&[0x8e, 0x00], "mov es, word [eax]");
+ test_display(&[0x8e, 0xc0], "mov es, ax");
+ test_display(&[0x8c, 0xc0], "mov eax, es");
// cs is not an allowed destination
test_invalid(&[0x8e, 0x08]);
test_display(&[0x8e, 0x10], "mov ss, word [eax]");
+ test_display(&[0x8e, 0xd0], "mov ss, ax");
+ test_display(&[0x8c, 0xd0], "mov eax, ss");
test_display(&[0x8e, 0x18], "mov ds, word [eax]");
+ test_display(&[0x8e, 0xd8], "mov ds, ax");
+ test_display(&[0x8c, 0xd8], "mov eax, ds");
test_display(&[0x8e, 0x20], "mov fs, word [eax]");
+ test_display(&[0x8e, 0xe0], "mov fs, ax");
+ test_display(&[0x8c, 0xe0], "mov eax, fs");
test_display(&[0x8e, 0x28], "mov gs, word [eax]");
+ test_display(&[0x8e, 0xe8], "mov gs, ax");
+ test_display(&[0x8c, 0xe8], "mov eax, gs");
test_invalid(&[0x8e, 0x30]);
test_invalid(&[0x8e, 0x38]);
}
diff --git a/test/real_mode/mod.rs b/test/real_mode/mod.rs
index 844a95e..5fb109f 100644
--- a/test/real_mode/mod.rs
+++ b/test/real_mode/mod.rs
@@ -16981,8 +16981,14 @@ fn test_real_mode() {
test_display(&[0x8b, 0x4c, 0x10], "mov cx, word [si + 0x10]");
test_display(&[0x8d, 0x53, 0x08], "lea dx, word [bp + di * 1 + 0x8]");
test_display(&[0x8e, 0x00], "mov es, word [bx + si * 1]");
+ test_display(&[0x8e, 0xc0], "mov es, ax");
+ test_display(&[0x8c, 0xc0], "mov ax, es");
test_display(&[0x8e, 0x10], "mov ss, word [bx + si * 1]");
+ test_display(&[0x8e, 0xd0], "mov ss, ax");
+ test_display(&[0x8c, 0xd0], "mov ax, ss");
test_display(&[0x8e, 0x18], "mov ds, word [bx + si * 1]");
+ test_display(&[0x8e, 0xd8], "mov ds, ax");
+ test_display(&[0x8c, 0xd8], "mov ax, ds");
test_display(&[0x8e, 0x20], "mov fs, word [bx + si * 1]");
test_display(&[0x8e, 0x28], "mov gs, word [bx + si * 1]");
test_display(&[0x8f, 0x00], "pop word [bx + si * 1]");