Improve disassembler to more properly deal with bullshit code
This commit is contained in:
		
							
								
								
									
										58
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										58
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							| @@ -1,70 +1,70 @@ | |||||||
| # This file is automatically @generated by Cargo. | # This file is automatically @generated by Cargo. | ||||||
| # It is not intended for manual editing. | # It is not intended for manual editing. | ||||||
|  | version = 3 | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "lazy_static" | name = "lazy_static" | ||||||
| version = "1.3.0" | version = "1.4.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "proc-macro2" | name = "proc-macro2" | ||||||
| version = "0.4.4" | version = "1.0.39" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", |  "unicode-ident", | ||||||
| ] | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "quote" | name = "quote" | ||||||
| version = "0.6.3" | version = "1.0.18" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "proc-macro2 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", |  "proc-macro2", | ||||||
| ] | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "rustijvm" | name = "rustijvm" | ||||||
| version = "1.0.0" | version = "1.1.0" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", |  "lazy_static", | ||||||
|  "serde 1.0.65 (registry+https://github.com/rust-lang/crates.io-index)", |  "serde", | ||||||
|  "serde_derive 1.0.65 (registry+https://github.com/rust-lang/crates.io-index)", |  "serde_derive", | ||||||
| ] | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "serde" | name = "serde" | ||||||
| version = "1.0.65" | version = "1.0.137" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1" | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "serde_derive" | name = "serde_derive" | ||||||
| version = "1.0.65" | version = "1.0.137" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "proc-macro2 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", |  "proc-macro2", | ||||||
|  "quote 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", |  "quote", | ||||||
|  "syn 0.14.1 (registry+https://github.com/rust-lang/crates.io-index)", |  "syn", | ||||||
| ] | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "syn" | name = "syn" | ||||||
| version = "0.14.1" | version = "1.0.96" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "0748dd251e24453cb8717f0354206b91557e4ec8703673a4b30208f2abaf1ebf" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "proc-macro2 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", |  "proc-macro2", | ||||||
|  "quote 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", |  "quote", | ||||||
|  "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", |  "unicode-ident", | ||||||
| ] | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "unicode-xid" | name = "unicode-ident" | ||||||
| version = "0.1.0" | version = "1.0.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" | ||||||
| [metadata] |  | ||||||
| "checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14" |  | ||||||
| "checksum proc-macro2 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "1fa93823f53cfd0f5ac117b189aed6cfdfb2cfc0a9d82e956dd7927595ed7d46" |  | ||||||
| "checksum quote 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "e44651a0dc4cdd99f71c83b561e221f714912d11af1a4dff0631f923d53af035" |  | ||||||
| "checksum serde 1.0.65 (registry+https://github.com/rust-lang/crates.io-index)" = "5d47469df098fe8701d4da22680da5145e83801bdaaafea0cf91a180436fc343" |  | ||||||
| "checksum serde_derive 1.0.65 (registry+https://github.com/rust-lang/crates.io-index)" = "35eff0f5f70b6a2e902a2bbf4b079be4aacb14afc9676ba4798e0486401cedcb" |  | ||||||
| "checksum syn 0.14.1 (registry+https://github.com/rust-lang/crates.io-index)" = "6dfd71b2be5a58ee30a6f8ea355ba8290d397131c00dfa55c3d34e6e13db5101" |  | ||||||
| "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" |  | ||||||
|   | |||||||
| @@ -1,12 +1,12 @@ | |||||||
| [package] | [package] | ||||||
| name = "rustijvm" | name = "rustijvm" | ||||||
| version = "1.0.0" | version = "1.1.0" | ||||||
| authors = ["Jur van den Berg <Jurl.berg@gmail.com>"] | authors = ["Jur van den Berg <Jurl.berg@gmail.com>"] | ||||||
|  |  | ||||||
| [dependencies] | [dependencies] | ||||||
| serde = "1.0" | serde = "1.0" | ||||||
| serde_derive = "1.0" | serde_derive = "1.0" | ||||||
| lazy_static = "1.3.0" | lazy_static = "1.4.0" | ||||||
|  |  | ||||||
| [features] | [features] | ||||||
| default = ["bonus", "extra"] | default = ["bonus", "extra"] | ||||||
|   | |||||||
| @@ -5,6 +5,8 @@ use binread::BinReadable; | |||||||
|  |  | ||||||
| #[derive(Debug)] | #[derive(Debug)] | ||||||
| pub struct Block { | pub struct Block { | ||||||
|  |     // Origin is part of the spec, and while we never use it, I'd rather keep it | ||||||
|  |     #[allow(dead_code)] | ||||||
|     origin: u32, |     origin: u32, | ||||||
|  |  | ||||||
|     length: usize, |     length: usize, | ||||||
|   | |||||||
| @@ -6,7 +6,7 @@ use ops; | |||||||
| use Result; | use Result; | ||||||
|  |  | ||||||
| use std::clone::Clone; | use std::clone::Clone; | ||||||
| use std::collections::HashMap; | use std::collections::{HashMap, HashSet}; | ||||||
| use std::fmt; | use std::fmt; | ||||||
| use std::rc::Rc; | use std::rc::Rc; | ||||||
|  |  | ||||||
| @@ -47,27 +47,18 @@ impl DebugSymbols { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn lookup_method(&self, location: usize) -> Option<String> { |     fn lookup_method(&self, location: usize) -> Option<String> { | ||||||
|         match self.methods.get(&location) { |         self.methods.get(&location).map(|name| name.to_string()) | ||||||
|             Some(name) => Some(name.to_string()), |  | ||||||
|             None => None, |  | ||||||
|         } |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn lookup_method_idx(&self, idx: usize) -> Option<String> { |     fn lookup_method_idx(&self, idx: usize) -> Option<String> { | ||||||
|         if idx >= self.constants.len() { |         if idx >= self.constants.len() { | ||||||
|             return None; |             return None; | ||||||
|         } |         } | ||||||
|         match self.methods.get(&self.constants[idx]) { |         self.methods.get(&self.constants[idx]).map(|name| name.to_string()) | ||||||
|             Some(name) => Some(name.to_string()), |  | ||||||
|             None => None, |  | ||||||
|         } |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn lookup_label(&self, location: usize) -> Option<String> { |     fn lookup_label(&self, location: usize) -> Option<String> { | ||||||
|         match self.labels.get(&location) { |         self.labels.get(&location).map(|name| name.to_string()) | ||||||
|             Some(name) => Some(name.to_string()), |  | ||||||
|             None => None, |  | ||||||
|         } |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -176,10 +167,7 @@ impl Method { | |||||||
|             .instructions |             .instructions | ||||||
|             .iter() |             .iter() | ||||||
|             .filter(|e| !e.types.is_empty()) |             .filter(|e| !e.types.is_empty()) | ||||||
|             .filter(|e| match e.types[0] { |             .filter(|e| matches!(e.types[0], ops::Args::Var)) | ||||||
|                 ops::Args::Var => true, |  | ||||||
|                 _ => false, |  | ||||||
|             }) |  | ||||||
|             .map(|e| e.params[0]) |             .map(|e| e.params[0]) | ||||||
|             .max(); |             .max(); | ||||||
|  |  | ||||||
| @@ -244,7 +232,7 @@ pub struct Disassembler { | |||||||
|     text: Block, |     text: Block, | ||||||
|     pool: Block, |     pool: Block, | ||||||
|     symbols: Rc<DebugSymbols>, |     symbols: Rc<DebugSymbols>, | ||||||
|     suspects: Vec<usize>, |     suspects: HashSet<usize>, | ||||||
|     found: Vec<usize>, |     found: Vec<usize>, | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -255,7 +243,7 @@ impl Disassembler { | |||||||
|             text, |             text, | ||||||
|             pool, |             pool, | ||||||
|             symbols: Rc::new(symbols), |             symbols: Rc::new(symbols), | ||||||
|             suspects: Vec::new(), |             suspects: HashSet::new(), | ||||||
|             found: Vec::new(), |             found: Vec::new(), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -278,6 +266,31 @@ impl Disassembler { | |||||||
|  |  | ||||||
|         let (name, args) = match ops::num_to_op(opcode) { |         let (name, args) = match ops::num_to_op(opcode) { | ||||||
|             ops::Operation::Invalid => { |             ops::Operation::Invalid => { | ||||||
|  |  | ||||||
|  |  | ||||||
|  |                 // There is a slight edge case here. It *could* be that the invalid op was a dead method. | ||||||
|  |                 // (i.e. method that is not called) | ||||||
|  |                 // I will not try to convince anyone this will catch all dead methods, but it's worth a shot | ||||||
|  |                 // So see if a constant with that *value* exists, if so add it to the suspects list and pray | ||||||
|  |                 if  self.disassembly.constants.iter().any(|v| v.value == pos as i32) { | ||||||
|  |                     self.suspects.insert(pos); | ||||||
|  |                     // And rewind the tape to before we started parsing this to let it try again | ||||||
|  |                     return self.text.seek(pos); | ||||||
|  |                 } | ||||||
|  |                 // Fine another edge case then: | ||||||
|  |                 // usually the first method byte is 0x00 -> nop | ||||||
|  |                 // if this is the case, check if the nop is the method start | ||||||
|  |                 if let Some(previous_instruction) = method.instructions.last().cloned() { | ||||||
|  |                     if previous_instruction.op == 0x00 && self.disassembly.constants.iter().any(|v| v.value == previous_instruction.pos as i32) { | ||||||
|  |                         // Oh wow, this might work then | ||||||
|  |                         self.suspects.insert(previous_instruction.pos); | ||||||
|  |                         method.instructions.pop(); | ||||||
|  |                         // And rewind the tape to before we started parsing this to let it try again | ||||||
|  |                         return self.text.seek(previous_instruction.pos); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |  | ||||||
|                 eprintln!("INVALID OP 0x{:X}\n", opcode); |                 eprintln!("INVALID OP 0x{:X}\n", opcode); | ||||||
|                 return Err("Invalid operation"); |                 return Err("Invalid operation"); | ||||||
|             } |             } | ||||||
| @@ -375,9 +388,7 @@ impl Disassembler { | |||||||
|         Ok(method) |         Ok(method) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn find_methods(&mut self) -> Result<Vec<usize>> { |     fn find_method_suspects(&mut self) -> Result<()> { | ||||||
|         let mut res = Vec::new(); |  | ||||||
|  |  | ||||||
|         while self.text.has_i8() { |         while self.text.has_i8() { | ||||||
|             let op = self.text.read_u8()?; |             let op = self.text.read_u8()?; | ||||||
|             // INVOKEVIRTUAL |             // INVOKEVIRTUAL | ||||||
| @@ -391,13 +402,13 @@ impl Disassembler { | |||||||
|  |  | ||||||
|                 let v = self.text[constant.value as usize]; |                 let v = self.text[constant.value as usize]; | ||||||
|                 if v == 0 { |                 if v == 0 { | ||||||
|                     res.push(constant.value as usize); |                     self.suspects.insert(constant.value as usize); | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|         self.text.seek(0)?; |         self.text.seek(0)?; | ||||||
|  |  | ||||||
|         Ok(res) |         Ok(()) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn disassemble(&mut self) -> Result<Disassembly> { |     pub fn disassemble(&mut self) -> Result<Disassembly> { | ||||||
| @@ -406,7 +417,15 @@ impl Disassembler { | |||||||
|             self.disassembly.constants.push(Constant::new(addr)); |             self.disassembly.constants.push(Constant::new(addr)); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         self.suspects = self.find_methods()?; |         self.find_method_suspects()?; | ||||||
|  |         if !self.symbols.methods.is_empty() { | ||||||
|  |             for &pos in self.symbols.methods.keys() { | ||||||
|  |                 if pos == 0 { | ||||||
|  |                     continue; | ||||||
|  |                 } | ||||||
|  |                 self.suspects.insert(pos); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|         let mut main = self.disasm_method(-1, 0, 0, 0)?; |         let mut main = self.disasm_method(-1, 0, 0, 0)?; | ||||||
|         main.update_vars(); |         main.update_vars(); | ||||||
| @@ -442,6 +461,7 @@ impl Disassembly { | |||||||
|         let mut symbols = DebugSymbols::default(); |         let mut symbols = DebugSymbols::default(); | ||||||
|         if let Ok(block) = reader.read_block() { |         if let Ok(block) = reader.read_block() { | ||||||
|             symbols.add_methods(block)?; |             symbols.add_methods(block)?; | ||||||
|  |  | ||||||
|             if let Ok(labels) = reader.read_block() { |             if let Ok(labels) = reader.read_block() { | ||||||
|                 symbols.add_labels(labels)?; |                 symbols.add_labels(labels)?; | ||||||
|             } |             } | ||||||
| @@ -470,7 +490,7 @@ impl Disassembly { | |||||||
|  |  | ||||||
| impl fmt::Display for Disassembly { | impl fmt::Display for Disassembly { | ||||||
|     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||||
|         if !self.constants.is_empty() { |         if self.constants.iter().any(|c| !c.method) { | ||||||
|             writeln!(f, ".constant")?; |             writeln!(f, ".constant")?; | ||||||
|             for (i, c) in self.constants.iter().enumerate() { |             for (i, c) in self.constants.iter().enumerate() { | ||||||
|                 if !c.method { |                 if !c.method { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user