Improve disassembler to more properly deal with bullshit code

This commit is contained in:
2022-06-11 22:04:14 +02:00
parent 241a8f66bc
commit 8bd11678ab
4 changed files with 79 additions and 57 deletions

58
Cargo.lock generated
View File

@@ -1,70 +1,70 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "lazy_static"
version = "1.3.0"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "proc-macro2"
version = "0.4.4"
version = "1.0.39"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f"
dependencies = [
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-ident",
]
[[package]]
name = "quote"
version = "0.6.3"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1"
dependencies = [
"proc-macro2 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
"proc-macro2",
]
[[package]]
name = "rustijvm"
version = "1.0.0"
version = "1.1.0"
dependencies = [
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.65 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.65 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static",
"serde",
"serde_derive",
]
[[package]]
name = "serde"
version = "1.0.65"
version = "1.0.137"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1"
[[package]]
name = "serde_derive"
version = "1.0.65"
version = "1.0.137"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be"
dependencies = [
"proc-macro2 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)",
"syn 0.14.1 (registry+https://github.com/rust-lang/crates.io-index)",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "syn"
version = "0.14.1"
version = "1.0.96"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0748dd251e24453cb8717f0354206b91557e4ec8703673a4b30208f2abaf1ebf"
dependencies = [
"proc-macro2 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-xid"
version = "0.1.0"
name = "unicode-ident"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[metadata]
"checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14"
"checksum proc-macro2 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "1fa93823f53cfd0f5ac117b189aed6cfdfb2cfc0a9d82e956dd7927595ed7d46"
"checksum quote 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "e44651a0dc4cdd99f71c83b561e221f714912d11af1a4dff0631f923d53af035"
"checksum serde 1.0.65 (registry+https://github.com/rust-lang/crates.io-index)" = "5d47469df098fe8701d4da22680da5145e83801bdaaafea0cf91a180436fc343"
"checksum serde_derive 1.0.65 (registry+https://github.com/rust-lang/crates.io-index)" = "35eff0f5f70b6a2e902a2bbf4b079be4aacb14afc9676ba4798e0486401cedcb"
"checksum syn 0.14.1 (registry+https://github.com/rust-lang/crates.io-index)" = "6dfd71b2be5a58ee30a6f8ea355ba8290d397131c00dfa55c3d34e6e13db5101"
"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee"

View File

@@ -1,12 +1,12 @@
[package]
name = "rustijvm"
version = "1.0.0"
version = "1.1.0"
authors = ["Jur van den Berg <Jurl.berg@gmail.com>"]
[dependencies]
serde = "1.0"
serde_derive = "1.0"
lazy_static = "1.3.0"
lazy_static = "1.4.0"
[features]
default = ["bonus", "extra"]

View File

@@ -5,6 +5,8 @@ use binread::BinReadable;
#[derive(Debug)]
pub struct Block {
// Origin is part of the spec, and while we never use it, I'd rather keep it
#[allow(dead_code)]
origin: u32,
length: usize,

View File

@@ -6,7 +6,7 @@ use ops;
use Result;
use std::clone::Clone;
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use std::fmt;
use std::rc::Rc;
@@ -47,27 +47,18 @@ impl DebugSymbols {
}
fn lookup_method(&self, location: usize) -> Option<String> {
match self.methods.get(&location) {
Some(name) => Some(name.to_string()),
None => None,
}
self.methods.get(&location).map(|name| name.to_string())
}
fn lookup_method_idx(&self, idx: usize) -> Option<String> {
if idx >= self.constants.len() {
return None;
}
match self.methods.get(&self.constants[idx]) {
Some(name) => Some(name.to_string()),
None => None,
}
self.methods.get(&self.constants[idx]).map(|name| name.to_string())
}
fn lookup_label(&self, location: usize) -> Option<String> {
match self.labels.get(&location) {
Some(name) => Some(name.to_string()),
None => None,
}
self.labels.get(&location).map(|name| name.to_string())
}
}
@@ -176,10 +167,7 @@ impl Method {
.instructions
.iter()
.filter(|e| !e.types.is_empty())
.filter(|e| match e.types[0] {
ops::Args::Var => true,
_ => false,
})
.filter(|e| matches!(e.types[0], ops::Args::Var))
.map(|e| e.params[0])
.max();
@@ -244,7 +232,7 @@ pub struct Disassembler {
text: Block,
pool: Block,
symbols: Rc<DebugSymbols>,
suspects: Vec<usize>,
suspects: HashSet<usize>,
found: Vec<usize>,
}
@@ -255,7 +243,7 @@ impl Disassembler {
text,
pool,
symbols: Rc::new(symbols),
suspects: Vec::new(),
suspects: HashSet::new(),
found: Vec::new(),
}
}
@@ -278,6 +266,31 @@ impl Disassembler {
let (name, args) = match ops::num_to_op(opcode) {
ops::Operation::Invalid => {
// There is a slight edge case here. It *could* be that the invalid op was a dead method.
// (i.e. method that is not called)
// I will not try to convince anyone this will catch all dead methods, but it's worth a shot
// So see if a constant with that *value* exists, if so add it to the suspects list and pray
if self.disassembly.constants.iter().any(|v| v.value == pos as i32) {
self.suspects.insert(pos);
// And rewind the tape to before we started parsing this to let it try again
return self.text.seek(pos);
}
// Fine another edge case then:
// usually the first method byte is 0x00 -> nop
// if this is the case, check if the nop is the method start
if let Some(previous_instruction) = method.instructions.last().cloned() {
if previous_instruction.op == 0x00 && self.disassembly.constants.iter().any(|v| v.value == previous_instruction.pos as i32) {
// Oh wow, this might work then
self.suspects.insert(previous_instruction.pos);
method.instructions.pop();
// And rewind the tape to before we started parsing this to let it try again
return self.text.seek(previous_instruction.pos);
}
}
eprintln!("INVALID OP 0x{:X}\n", opcode);
return Err("Invalid operation");
}
@@ -375,9 +388,7 @@ impl Disassembler {
Ok(method)
}
fn find_methods(&mut self) -> Result<Vec<usize>> {
let mut res = Vec::new();
fn find_method_suspects(&mut self) -> Result<()> {
while self.text.has_i8() {
let op = self.text.read_u8()?;
// INVOKEVIRTUAL
@@ -391,13 +402,13 @@ impl Disassembler {
let v = self.text[constant.value as usize];
if v == 0 {
res.push(constant.value as usize);
self.suspects.insert(constant.value as usize);
}
}
}
self.text.seek(0)?;
Ok(res)
Ok(())
}
pub fn disassemble(&mut self) -> Result<Disassembly> {
@@ -406,7 +417,15 @@ impl Disassembler {
self.disassembly.constants.push(Constant::new(addr));
}
self.suspects = self.find_methods()?;
self.find_method_suspects()?;
if !self.symbols.methods.is_empty() {
for &pos in self.symbols.methods.keys() {
if pos == 0 {
continue;
}
self.suspects.insert(pos);
}
}
let mut main = self.disasm_method(-1, 0, 0, 0)?;
main.update_vars();
@@ -442,6 +461,7 @@ impl Disassembly {
let mut symbols = DebugSymbols::default();
if let Ok(block) = reader.read_block() {
symbols.add_methods(block)?;
if let Ok(labels) = reader.read_block() {
symbols.add_labels(labels)?;
}
@@ -470,7 +490,7 @@ impl Disassembly {
impl fmt::Display for Disassembly {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if !self.constants.is_empty() {
if self.constants.iter().any(|c| !c.method) {
writeln!(f, ".constant")?;
for (i, c) in self.constants.iter().enumerate() {
if !c.method {