diff options
author | iximeow <me@iximeow.net> | 2022-07-22 02:15:40 -0700 |
---|---|---|
committer | iximeow <me@iximeow.net> | 2023-01-02 08:50:22 -0800 |
commit | e6e7fabfc907eaf016520d9c2a2e128b0d28f019 (patch) | |
tree | c08280a4dc9cbd3d02926fd8e6476d514c10393d /data/generate_opcode.py | |
parent | 78b40bb12a265d866b01547924defb10aec283a8 (diff) |
yax builds again with opcodes generated by type
Diffstat (limited to 'data/generate_opcode.py')
-rw-r--r-- | data/generate_opcode.py | 318 |
1 files changed, 318 insertions, 0 deletions
diff --git a/data/generate_opcode.py b/data/generate_opcode.py new file mode 100644 index 0000000..e02785d --- /dev/null +++ b/data/generate_opcode.py @@ -0,0 +1,318 @@ +import json + +isa_data = json.loads(open("./x86_64.json").read()) + +class IsaData: + def __init__(self, data): + isa_extensions = isa_data['sets'] + microarchitectures = isa_data['uarch'] + + loaded_exts = {} + for ext in isa_extensions: + loaded_exts[ext['name']] = ext + + loaded_uarches = {} + for arch in microarchitectures: + loaded_uarches[arch['name']] = arch + + self.exts = loaded_exts + self.uarches = loaded_uarches + self.referenced_sets = set([]) + + def compile_uarch(self, arch): + uarch = self.uarches[arch] + sets = self.uarch_sets(arch) + + out = { + "name": uarch['name'], + "vendor": uarch['vendor'], + "extensions": list(sets), + "instructions": list(self.sets2instructions(sets)) + } + + return out + + def uarch_sets(self, arch): + sets = set([]) + uarch = self.uarches[arch] + + for item in uarch['sets']: + if item[0] == '/': + continue + elif item[0] == '+': + sets |= self.uarch_sets(item[1:]) + else: + sets.add(item) + + sets = self.resolve_sets(sets) + + self.referenced_sets |= sets + return sets + + def unused_extensions(self): + all_sets = set(self.exts.keys()) + return all_sets - self.referenced_sets + + def used_extensions(self): + return self.referenced_sets + + def sets2instructions(self, sets): + instructions = set([]) + for ext in sets: + ext = self.exts[ext] + for item in ext['new']: + if item[0] != '+': + instructions.add(item) + return instructions + + def resolve_sets(self, sets): + resolved_sets = set(list(sets)) + self.referenced_sets |= resolved_sets + + for ext in sets: + ext = self.exts[ext] + for item in ext['new']: + if item[0] == '/': + continue + elif item[0] == '+': + new_sets = self.resolve_sets(set([item[1:]])) + self.referenced_sets |= new_sets + resolved_sets |= new_sets + + return resolved_sets + +isa_data = IsaData(isa_data) + +for (arch, data) in isa_data.uarches.items(): + arch = isa_data.compile_uarch(arch) +# print("x: {}".format(json.dumps(arch))) + +print("unused sets: {}".format(json.dumps(list(isa_data.unused_extensions())))) +print("used sets: {}".format(json.dumps(list(isa_data.used_extensions())))) + +ROOTS = ["real_mode", "protected_mode", "long_mode", "x86_generic"] + +ALL_INSTRUCTIONS = set([]) +INSTRUCTION_NUMS = {} + +for root in ROOTS: + arch = isa_data.compile_uarch(root) + print("name: {}, {} instructions".format(arch['name'], len(arch['instructions']))) + ALL_INSTRUCTIONS |= set(arch['instructions']) + +print("opcode enum to follow...") + +class Output: + def __init__(self, f): + self.indentation = 0 + self.out = f + self.should_indent = False + + def begin_block(self, content): + self.write(content) + self.write(" {") + self.newline() + self.indent() + + def end_block(self): + self.outdent() + self.write("}") + self.newline() + + def indent(self): + self.indentation += 1 + + def outdent(self): + self.indentation -= 1 + + def write(self, content): + if self.should_indent: + self.out.write(" " * self.indentation) + self.should_indent = False + self.out.write(content) + + def newline(self): + self.out.write("\n") + self.should_indent = True + + def comment(self, content): + if not self.should_indent: + # we've written something on this line. add a space to be reader-friendly. + self.out.write(" ") + self.write("// ") + self.writeline(content) + + def writeline(self, content): + self.write(content) + self.newline() + + def close(self): + self.out.close() + +OPCODE_REPR = "u16" +OPCODE_ANNOTATIONS = [ + "#[allow(non_camel_case_types)]", + "#[derive(Copy, Clone, Debug, Eq, PartialEq)]", + "#[non_exhaustive]", + "#[repr({})]".format(OPCODE_REPR) +] + +f = open("../src/generated/mod.rs", "w") +f = Output(f) +f.writeline("pub(crate) mod opcode;") +f.writeline("pub(crate) mod imp;") +f.newline() + +for root in ROOTS: + if root == "x86_generic": + continue + f.begin_block("pub(crate) mod {}".format(root)) + f.writeline("pub(crate) use super::opcode::{}::Opcode as Opcode;".format(root)) + f.writeline("pub(crate) use super::imp::{}::revise_instruction as revise_instruction;".format(root)) + f.end_block() + +f = open("../src/generated/opcode.rs", "w") +f = Output(f) +for annotation in OPCODE_ANNOTATIONS: + f.writeline(annotation) +f.begin_block("pub enum Opcode") + +insts = list(ALL_INSTRUCTIONS) +insts.sort() +for (i, inst) in enumerate(insts): + INSTRUCTION_NUMS[inst] = i + if inst == "invalid": + f.writeline("Invalid,") + else: + f.writeline("{},".format(str(inst).upper())) +f.end_block() + +f.newline() + +f.writeline("pub(crate) const MNEMONICS: &'static [&'static str] = &[") +f.indent() +for (_i, inst) in enumerate(insts): + f.writeline('"{}",'.format(inst.lower())) +f.outdent() +f.writeline("];") +f.newline() + +for root in ROOTS: + if root == "x86_generic": + continue + f.begin_block("pub(crate) mod {}".format(root)) + + for annotation in OPCODE_ANNOTATIONS: + f.writeline(annotation) + f.begin_block("pub enum Opcode") + arch = isa_data.compile_uarch(root) + insts = arch['instructions'] + insts.sort() + for inst in insts: + if inst == "invalid": + f.writeline("Invalid = super::Opcode::Invalid as {},".format(OPCODE_REPR)) + else: + f.writeline("{} = super::Opcode::{} as {},".format(str(inst).upper(), str(inst).upper(), OPCODE_REPR)) + f.end_block() + f.end_block() + + f.newline() + +f = open("../src/generated/imp.rs", "w") +f = Output(f) + +# f.writeline("fn main() {}\n") + +# f.comment("should be `{}::DecodeError` but i want to compile this on its own while bootstrapping") + +# f.writeline('#[path="/toy/yaxpeax/arch/x86/src/{}/mod.rs"]'.format(root)) +# f.writeline("mod structs;") +# f.writeline("use structs::{InstDecoder, Instruction};") + +# f.begin_block("enum DecodeError") +# f.writeline("InvalidOpcode,") +# f.end_block() + +# f.newline() + + +for root in ROOTS: + if root == "x86_generic": + continue + arch = isa_data.compile_uarch(root) + + f.begin_block("pub(crate) mod {}".format(root)) + f.writeline("use crate::generated::{}::Opcode;".format(root)) + f.writeline("use crate::{}::{{InstDecoder, Instruction, DecodeError}};".format(root)) + + f.begin_block("impl InstDecoder") + for ext in arch['extensions']: + f.begin_block("fn feature_{}(&self) -> bool".format(ext)) + f.writeline("true") + f.end_block() + f.end_block() + + f.begin_block("pub(crate) fn revise_instruction(decoder: &InstDecoder, inst: &mut Instruction) -> Result<(), DecodeError>") + + f.begin_block("if inst.prefixes.evex().is_some()") + f.begin_block("if !decoder.avx512()") + f.writeline("return Err(DecodeError::InvalidOpcode);") + f.outdent() + f.writeline("} else {") + f.indent() + f.writeline("return Ok(());") + f.end_block() + f.end_block() + + f.newline() + + f.comment("for some instructions (tzcnt), not having an extension means the instruction is") + f.comment("interpreted as another, rather than being simply rejected.") + f.comment("we might still reject the alternate instruction later, if the extension adding *it*") + f.comment("is also not supported.") + f.begin_block("if inst.opcode == Opcode::TZCNT") + f.begin_block("if !decoder.bmi1()") + f.comment("tzcnt is only supported if bmi1 is enabled. without bmi1, this decodes as bsf.") + f.writeline("inst.opcode = Opcode::BSF;") + f.end_block() + f.end_block() + + f.newline() + + f.begin_block("match inst.opcode") + f.comment("we'll never be rejecting the instruction `Invalid`") + f.writeline("Opcode::Invalid => {}") + for ext in arch['extensions']: + ext_data = isa_data.exts[ext]['new'] + suffix_needed = False + any_entries = False + for (i, inst) in enumerate(ext_data): + if inst[0] == '+': + continue + + # special-cased `invalid` above; it's always present. + if inst == "invalid": + continue + + if suffix_needed: + f.writeline("|") + + f.write("Opcode::{} ".format(inst.upper())) + suffix_needed = True + any_entries = True + if not any_entries: + print("no entries for ext {}".format(ext)) + continue + f.begin_block("=>") + f.begin_block("if !decoder.feature_{}()".format(ext)) + f.writeline("return Err(DecodeError::InvalidOpcode);\n") + f.end_block() + f.end_block() + f.end_block() + f.writeline("Ok(())") + f.end_block() + f.end_block() + f.newline() + +f.close() + |