Improve disassembler to more properly deal with bullshit code

This commit is contained in:
2022-06-11 22:04:14 +02:00
parent 241a8f66bc
commit 8bd11678ab
4 changed files with 79 additions and 57 deletions

58
Cargo.lock generated
View File

@@ -1,70 +1,70 @@
# This file is automatically @generated by Cargo. # This file is automatically @generated by Cargo.
# It is not intended for manual editing. # It is not intended for manual editing.
version = 3
[[package]] [[package]]
name = "lazy_static" name = "lazy_static"
version = "1.3.0" version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "0.4.4" version = "1.0.39"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f"
dependencies = [ dependencies = [
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "unicode-ident",
] ]
[[package]] [[package]]
name = "quote" name = "quote"
version = "0.6.3" version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1"
dependencies = [ dependencies = [
"proc-macro2 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", "proc-macro2",
] ]
[[package]] [[package]]
name = "rustijvm" name = "rustijvm"
version = "1.0.0" version = "1.1.0"
dependencies = [ dependencies = [
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static",
"serde 1.0.65 (registry+https://github.com/rust-lang/crates.io-index)", "serde",
"serde_derive 1.0.65 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive",
] ]
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.65" version = "1.0.137"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1"
[[package]] [[package]]
name = "serde_derive" name = "serde_derive"
version = "1.0.65" version = "1.0.137"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be"
dependencies = [ dependencies = [
"proc-macro2 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", "proc-macro2",
"quote 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", "quote",
"syn 0.14.1 (registry+https://github.com/rust-lang/crates.io-index)", "syn",
] ]
[[package]] [[package]]
name = "syn" name = "syn"
version = "0.14.1" version = "1.0.96"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0748dd251e24453cb8717f0354206b91557e4ec8703673a4b30208f2abaf1ebf"
dependencies = [ dependencies = [
"proc-macro2 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", "proc-macro2",
"quote 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", "quote",
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "unicode-ident",
] ]
[[package]] [[package]]
name = "unicode-xid" name = "unicode-ident"
version = "0.1.0" version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee"
[metadata]
"checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14"
"checksum proc-macro2 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "1fa93823f53cfd0f5ac117b189aed6cfdfb2cfc0a9d82e956dd7927595ed7d46"
"checksum quote 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "e44651a0dc4cdd99f71c83b561e221f714912d11af1a4dff0631f923d53af035"
"checksum serde 1.0.65 (registry+https://github.com/rust-lang/crates.io-index)" = "5d47469df098fe8701d4da22680da5145e83801bdaaafea0cf91a180436fc343"
"checksum serde_derive 1.0.65 (registry+https://github.com/rust-lang/crates.io-index)" = "35eff0f5f70b6a2e902a2bbf4b079be4aacb14afc9676ba4798e0486401cedcb"
"checksum syn 0.14.1 (registry+https://github.com/rust-lang/crates.io-index)" = "6dfd71b2be5a58ee30a6f8ea355ba8290d397131c00dfa55c3d34e6e13db5101"
"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"

View File

@@ -1,12 +1,12 @@
[package] [package]
name = "rustijvm" name = "rustijvm"
version = "1.0.0" version = "1.1.0"
authors = ["Jur van den Berg <Jurl.berg@gmail.com>"] authors = ["Jur van den Berg <Jurl.berg@gmail.com>"]
[dependencies] [dependencies]
serde = "1.0" serde = "1.0"
serde_derive = "1.0" serde_derive = "1.0"
lazy_static = "1.3.0" lazy_static = "1.4.0"
[features] [features]
default = ["bonus", "extra"] default = ["bonus", "extra"]

View File

@@ -5,6 +5,8 @@ use binread::BinReadable;
#[derive(Debug)] #[derive(Debug)]
pub struct Block { pub struct Block {
// Origin is part of the spec, and while we never use it, I'd rather keep it
#[allow(dead_code)]
origin: u32, origin: u32,
length: usize, length: usize,

View File

@@ -6,7 +6,7 @@ use ops;
use Result; use Result;
use std::clone::Clone; use std::clone::Clone;
use std::collections::HashMap; use std::collections::{HashMap, HashSet};
use std::fmt; use std::fmt;
use std::rc::Rc; use std::rc::Rc;
@@ -47,27 +47,18 @@ impl DebugSymbols {
} }
fn lookup_method(&self, location: usize) -> Option<String> { fn lookup_method(&self, location: usize) -> Option<String> {
match self.methods.get(&location) { self.methods.get(&location).map(|name| name.to_string())
Some(name) => Some(name.to_string()),
None => None,
}
} }
fn lookup_method_idx(&self, idx: usize) -> Option<String> { fn lookup_method_idx(&self, idx: usize) -> Option<String> {
if idx >= self.constants.len() { if idx >= self.constants.len() {
return None; return None;
} }
match self.methods.get(&self.constants[idx]) { self.methods.get(&self.constants[idx]).map(|name| name.to_string())
Some(name) => Some(name.to_string()),
None => None,
}
} }
fn lookup_label(&self, location: usize) -> Option<String> { fn lookup_label(&self, location: usize) -> Option<String> {
match self.labels.get(&location) { self.labels.get(&location).map(|name| name.to_string())
Some(name) => Some(name.to_string()),
None => None,
}
} }
} }
@@ -176,10 +167,7 @@ impl Method {
.instructions .instructions
.iter() .iter()
.filter(|e| !e.types.is_empty()) .filter(|e| !e.types.is_empty())
.filter(|e| match e.types[0] { .filter(|e| matches!(e.types[0], ops::Args::Var))
ops::Args::Var => true,
_ => false,
})
.map(|e| e.params[0]) .map(|e| e.params[0])
.max(); .max();
@@ -244,7 +232,7 @@ pub struct Disassembler {
text: Block, text: Block,
pool: Block, pool: Block,
symbols: Rc<DebugSymbols>, symbols: Rc<DebugSymbols>,
suspects: Vec<usize>, suspects: HashSet<usize>,
found: Vec<usize>, found: Vec<usize>,
} }
@@ -255,7 +243,7 @@ impl Disassembler {
text, text,
pool, pool,
symbols: Rc::new(symbols), symbols: Rc::new(symbols),
suspects: Vec::new(), suspects: HashSet::new(),
found: Vec::new(), found: Vec::new(),
} }
} }
@@ -278,6 +266,31 @@ impl Disassembler {
let (name, args) = match ops::num_to_op(opcode) { let (name, args) = match ops::num_to_op(opcode) {
ops::Operation::Invalid => { ops::Operation::Invalid => {
// There is a slight edge case here. It *could* be that the invalid op was a dead method.
// (i.e. method that is not called)
// I will not try to convince anyone this will catch all dead methods, but it's worth a shot
// So see if a constant with that *value* exists, if so add it to the suspects list and pray
if self.disassembly.constants.iter().any(|v| v.value == pos as i32) {
self.suspects.insert(pos);
// And rewind the tape to before we started parsing this to let it try again
return self.text.seek(pos);
}
// Fine another edge case then:
// usually the first method byte is 0x00 -> nop
// if this is the case, check if the nop is the method start
if let Some(previous_instruction) = method.instructions.last().cloned() {
if previous_instruction.op == 0x00 && self.disassembly.constants.iter().any(|v| v.value == previous_instruction.pos as i32) {
// Oh wow, this might work then
self.suspects.insert(previous_instruction.pos);
method.instructions.pop();
// And rewind the tape to before we started parsing this to let it try again
return self.text.seek(previous_instruction.pos);
}
}
eprintln!("INVALID OP 0x{:X}\n", opcode); eprintln!("INVALID OP 0x{:X}\n", opcode);
return Err("Invalid operation"); return Err("Invalid operation");
} }
@@ -375,9 +388,7 @@ impl Disassembler {
Ok(method) Ok(method)
} }
fn find_methods(&mut self) -> Result<Vec<usize>> { fn find_method_suspects(&mut self) -> Result<()> {
let mut res = Vec::new();
while self.text.has_i8() { while self.text.has_i8() {
let op = self.text.read_u8()?; let op = self.text.read_u8()?;
// INVOKEVIRTUAL // INVOKEVIRTUAL
@@ -391,13 +402,13 @@ impl Disassembler {
let v = self.text[constant.value as usize]; let v = self.text[constant.value as usize];
if v == 0 { if v == 0 {
res.push(constant.value as usize); self.suspects.insert(constant.value as usize);
} }
} }
} }
self.text.seek(0)?; self.text.seek(0)?;
Ok(res) Ok(())
} }
pub fn disassemble(&mut self) -> Result<Disassembly> { pub fn disassemble(&mut self) -> Result<Disassembly> {
@@ -406,7 +417,15 @@ impl Disassembler {
self.disassembly.constants.push(Constant::new(addr)); self.disassembly.constants.push(Constant::new(addr));
} }
self.suspects = self.find_methods()?; self.find_method_suspects()?;
if !self.symbols.methods.is_empty() {
for &pos in self.symbols.methods.keys() {
if pos == 0 {
continue;
}
self.suspects.insert(pos);
}
}
let mut main = self.disasm_method(-1, 0, 0, 0)?; let mut main = self.disasm_method(-1, 0, 0, 0)?;
main.update_vars(); main.update_vars();
@@ -442,6 +461,7 @@ impl Disassembly {
let mut symbols = DebugSymbols::default(); let mut symbols = DebugSymbols::default();
if let Ok(block) = reader.read_block() { if let Ok(block) = reader.read_block() {
symbols.add_methods(block)?; symbols.add_methods(block)?;
if let Ok(labels) = reader.read_block() { if let Ok(labels) = reader.read_block() {
symbols.add_labels(labels)?; symbols.add_labels(labels)?;
} }
@@ -470,7 +490,7 @@ impl Disassembly {
impl fmt::Display for Disassembly { impl fmt::Display for Disassembly {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if !self.constants.is_empty() { if self.constants.iter().any(|c| !c.method) {
writeln!(f, ".constant")?; writeln!(f, ".constant")?;
for (i, c) in self.constants.iter().enumerate() { for (i, c) in self.constants.iter().enumerate() {
if !c.method { if !c.method {