diff --git a/src/bin/ijvdisasm.rs b/src/bin/ijvdisasm.rs new file mode 100644 index 0000000..e095510 --- /dev/null +++ b/src/bin/ijvdisasm.rs @@ -0,0 +1,22 @@ +extern crate rustijvm; + +use rustijvm::Disassembly; +use std::env; + + +fn main() { + let args: Vec = env::args().collect(); + + if args.len() < 2 { + println!("Usage: {} ijvmfile", args[0]); + return; + } + + let ijvmfile = &args[1]; + + + match Disassembly::disassemble_file(ijvmfile) { + Err(e) => eprintln!("{}", e), + Ok(d) => println!("{}", d), + } +} \ No newline at end of file diff --git a/src/disassembler.rs b/src/disassembler.rs new file mode 100644 index 0000000..61fa49d --- /dev/null +++ b/src/disassembler.rs @@ -0,0 +1,387 @@ +use block::Block; +use ijvmreader::IJVMReader; +use binread::{BinRead, BinReadable}; +use machine::MAGIC_HEADER; +use Result; +use ops; + +use std::clone::Clone; +use core::fmt; + +type OpCode = u8; + +#[derive(Debug, Clone)] +pub struct Instruction { + op: OpCode, + name: &'static str, + params: Vec, + types: Vec, + wide: bool, + pos: usize, + label: bool, +} + +impl fmt::Display for Instruction { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.label { + write!(f, "\nLBL_{}:\n", self.pos)?; + } + + write!(f, " {}", self.name)?; + + for (typ, val) in self.types.iter().zip(&self.params) { + match *typ { + ops::Args::Label => write!(f, " LBL_{}", val)?, + ops::Args::Constant => { + if self.op == 0xB6 { + write!(f, " func_{}", val)?; + } else { + write!(f, " CONST_{}", val)?; + } + }, + ops::Args::Var => write!(f, " var{}", val)?, + _ => write!(f, " {}", val)?, + } + } + + write!(f, "\n")?; + + Ok(()) + } +} + +#[derive(Debug, Clone)] +pub struct Constant { + value: i32, + method: bool, +} + +impl Constant { + fn new(value: i32) -> Self { + Self { + value, + method: false, + } + } +} + +#[derive(Debug, Clone)] +pub struct Method { + name: String, + pos: usize, + args: usize, + vars: usize, + + instructions: Vec, + labels: Vec, +} + +impl Method { + pub fn new(i: i32, pos: usize, args: usize, vars: usize) -> Self { + let name; + if pos == 0 { + name = "main".to_string(); + } else { + name = format!("func_{}", i); + } + Self { + name, + pos, + args, + vars, + + instructions: Vec::new(), + labels: Vec::new(), + } + } + + pub fn update_vars(&mut self) { + let max = self.instructions + .iter() + .filter(|e| !e.types.is_empty()) + .filter(|e| { + match e.types[0] { + ops::Args::Var => true, + _ => false, + } + }) + .map(|e| e.params[0] ) + .max(); + + if let Some(v) = max { + self.vars = v as usize + 1; + } + } + + pub fn mark_labels(&mut self) { + for label in &self.labels { + let opt = self.instructions.iter_mut().find(|e| e.pos == *label); + if let Some(instruction) = opt { + instruction.label = true; + } + } + } +} + +impl fmt::Display for Method { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut main_var_off = 0; + if self.name == "main" { + write!(f, ".main\n")?; + main_var_off = 1; + } else { + write!(f, ".method {}(", self.name)?; + let mut comma = ""; + for i in 1..=self.args { + write!(f, "{}var{}", comma, i)?; + comma = ", "; + } + write!(f,")\n")?; + } + + if self.vars > 0 { + write!(f, "\n.var\n")?; + for i in 1..=self.vars { + write!(f, " var{}\n", i + self.args - main_var_off)?; + } + write!(f, ".end-var\n")?; + } + + write!(f, "\n")?; + + for inst in &self.instructions { + write!(f, "{}", inst)?; + } + + + if self.name == "main" { + write!(f, "\n.end-main\n\n")?; + } else { + write!(f, "\n.end-method\n\n")?; + } + + Ok(()) + } +} + +#[derive(Debug)] +pub struct Disassembler { + disassembly: Disassembly, + text: Block, + pool: Block, + suspects: Vec, + found: Vec, +} + +impl Disassembler { + pub fn new(text: Block, pool: Block) -> Disassembler { + Disassembler { + disassembly: Disassembly::default(), + text, + pool, + suspects: Vec::new(), + found: Vec::new(), + } + } + + fn is_method(&self, addr: usize) -> bool { + self.suspects.contains(&addr) + } + + fn disasm_instruction(&mut self, method: &mut Method) -> Result<()> { + let pos = self.text.cur(); + let mut opcode = self.text.read_u8()?; + let mut wide = false; + let mut params = Vec::new(); + + // WIDE + if opcode == 0xC4 { + wide = true; + opcode = self.text.read_u8()?; + } + + let (name, args) = match ops::num_to_op(opcode) { + ops::Operation::Invalid(x) => { + eprintln!("INVALID OP 0x{:X}\n", x); + return Err("Invalid operation"); + }, + ops::Operation::Op(name, _, args) => (name, args), + }; + + for arg in args.clone() { + let v = match arg { + ops::Args::Byte => self.text.read_i8()? as i32, + ops::Args::Short => self.text.read_i16()? as i32, + ops::Args::Var => { + if wide { + self.text.read_u16()? as i32 + } else { + self.text.read_u8()? as i32 + } + }, + ops::Args::Label => { + let offset = self.text.read_i16()?; + let target = pos as i64 + offset as i64; + if target < 0 { + return Err("Invalid jump offset"); + } + method.labels.push(target as usize); + target as i32 + }, + ops::Args::Constant => self.text.read_u16()? as i32, + }; + params.push(v); + } + + let inst = Instruction { + op: opcode, + name, + params, + types: args, + wide, + pos, + label: false, + }; + + // eprintln!("i 0x{:x} ({}) @ {}", opcode, name, pos); + + // INVOKEVIRTUAL + if opcode == 0xB6 { + let target_addr: usize; + + { + let addr = &mut self.disassembly.constants[inst.params[0] as usize]; + if !addr.method { + addr.method = true; + } + target_addr = addr.value as usize; + } + + if !self.found.contains(&target_addr) { + self.found.push(target_addr); + let save_pos = self.text.cur(); + self.text.seek(target_addr)?; + + let arg_count = (self.text.read_u16()? - 1) as usize; + let var_count = self.text.read_u16()? as usize; + + let callee = self.disasm_method(inst.params[0], target_addr, arg_count, var_count)?; + self.disassembly.methods.push(callee); + + self.text.seek(save_pos)?; + } + } + + method.instructions.push(inst); + + Ok(()) + } + + fn disasm_method(&mut self, i: i32, pc: usize, args: usize, vars: usize) -> Result { + let mut method = Method::new(i, pc, args, vars); + + while self.text.has_i8() && !self.is_method(self.text.cur()) { + self.disasm_instruction(&mut method)?; + } + + method.mark_labels(); + + Ok(method) + } + + fn find_methods(&mut self) -> Result> { + let mut res = Vec::new(); + + while self.text.has_i8() { + let op = self.text.read_u8()?; + // INVOKEVIRTUAL + if op == 0xB6 { + let constant_index = self.text.read_u16()? as usize; + + let constant: &Constant = match self.disassembly.constants.get(constant_index) { + Some(x) => x, + None => continue, + }; + + let v = self.text[constant.value as usize]; + if v == 0 { + res.push(constant.value as usize); + } + } + } + self.text.seek(0)?; + + Ok(res) + } + + pub fn disassemble(&mut self) -> Result { + while self.pool.has_i32() { + self.disassembly.constants.push(Constant::new(self.pool.read_i32()?)); + } + + self.suspects = self.find_methods()?; + + let mut main = self.disasm_method(-1, 0, 0, 0)?; + main.update_vars(); + self.disassembly.methods.insert(0, main); + + + Ok(self.disassembly.clone()) + } +} + +#[derive(Debug, Default, Clone)] +pub struct Disassembly { + constants: Vec, + methods: Vec, +} + +impl Disassembly { + pub fn disassemble_reader(mut reader: IJVMReader) -> Result { + let magic = reader.read_u32()?; + if magic != MAGIC_HEADER { + return Err("Invalid magic header"); + } + let pool = match reader.read_block() { + Ok(a) => a, + Err(_) => return Err("Failed to read constants block") + }; + let text = match reader.read_block() { + Ok(block) => block, + Err(_) => return Err("Failed to read text block") + }; + + Disassembler::new(text, pool).disassemble() + } + + pub fn disassemble_file(file: &str) -> Result { + let reader = IJVMReader::new(file)?; + Self::disassemble_reader(reader) + } + + pub fn new_from_slice(source: &[u8]) -> Result { + let reader = IJVMReader::new_from_slice(source); + Self::disassemble_reader(reader) + } + +} + +impl fmt::Display for Disassembly { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.constants.len() > 0 { + write!(f, ".constant\n")?; + for (i, c) in self.constants.iter().enumerate() { + if !c.method { + write!(f, " CONST_{:<6} {}\n", i, c.value)?; + } + } + write!(f, ".end-constant\n\n")?; + } + + for method in &self.methods { + write!(f, "{}", method)?; + } + + Ok(()) + } +} diff --git a/src/lib.rs b/src/lib.rs index 00e5d8a..caadf4e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,9 @@ +extern crate core; + pub mod ijvmreader; pub mod binread; pub mod block; pub mod ops; -pub mod machine; pub mod stack; pub mod pool; pub mod frame; @@ -14,4 +15,8 @@ pub mod heap; type Result = ::std::result::Result; +pub mod machine; pub use machine::Machine; + +pub mod disassembler; +pub use disassembler::{Disassembler, Disassembly}; diff --git a/src/machine.rs b/src/machine.rs index 1482cd5..5a74f64 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -15,7 +15,7 @@ use netstack::NetStack; #[cfg(feature = "bonus:heap")] use heap::Heap; -const MAGIC_HEADER:u32 = 0x1DEA_DFAD; +pub const MAGIC_HEADER:u32 = 0x1DEA_DFAD; const ANTI_BS_SIZE:usize = 0xFF; pub struct Machine { @@ -85,7 +85,7 @@ impl Machine { pub fn step(&mut self) -> Result<()> { match self.block.read_op() { - Ok(Operation::Op(a, func)) => { + Ok(Operation::Op(a, func, _)) => { if cfg!(feature = "debug:instr") { println!("{}", a); println!("Stack: {:?}", self.cur_frame().stack.data); diff --git a/src/ops.rs b/src/ops.rs index b4ce7be..09440af 100644 --- a/src/ops.rs +++ b/src/ops.rs @@ -6,8 +6,17 @@ use std::io::{Read}; pub type OpFunc = fn(&mut Machine) -> Result<()>; +#[derive(Debug, Clone)] +pub enum Args { + Byte, + Short, + Var, + Label, + Constant, +} + pub enum Operation { - Op(&'static str, OpFunc), + Op(&'static str, OpFunc, Vec), Invalid(u8) } @@ -15,50 +24,51 @@ const JUMP_OFFSET: i32 = 3; pub fn num_to_op(op: u8) -> Operation { match op { - 0x00 => Operation::Op("NOP", nop), - 0x10 => Operation::Op("BIPUSH", bipush), - 0x13 => Operation::Op("LDC_W", ldc_w), - 0x15 => Operation::Op("ILOAD", iload), - 0x36 => Operation::Op("ISTORE", istore), - 0x57 => Operation::Op("POP", pop), - 0x59 => Operation::Op("DUP", dup), - 0x5F => Operation::Op("SWAP", swap), - 0x60 => Operation::Op("IADD", iadd), - 0x64 => Operation::Op("ISUB", isub), - 0x7E => Operation::Op("IAND", iand), - 0xB0 => Operation::Op("IOR", ior), - 0x84 => Operation::Op("IINC", iinc), - 0x99 => Operation::Op("IFEQ", ifeq), - 0x9b => Operation::Op("IFLT", iflt), - 0x9F => Operation::Op("IF_ICMPEQ", if_icmpeq), - 0xA7 => Operation::Op("GOTO", goto), - 0xAC => Operation::Op("IRETURN", ireturn), - 0xB6 => Operation::Op("INVOKEVIRTUAL", invokevirtual), - 0xC4 => Operation::Op("WIDE", wide), - 0xFC => Operation::Op("IN", _in), - 0xFD => Operation::Op("OUT", out), - 0xFF => Operation::Op("HALT", halt), + 0x00 => Operation::Op("NOP", nop, vec![]), + 0x10 => Operation::Op("BIPUSH", bipush, vec![Args::Byte]), + 0x13 => Operation::Op("LDC_W", ldc_w, vec![Args::Constant]), + 0x15 => Operation::Op("ILOAD", iload, vec![Args::Var]), + 0x36 => Operation::Op("ISTORE", istore, vec![Args::Var]), + 0x57 => Operation::Op("POP", pop, vec![]), + 0x59 => Operation::Op("DUP", dup, vec![]), + 0x5F => Operation::Op("SWAP", swap, vec![]), + 0x60 => Operation::Op("IADD", iadd, vec![]), + 0x64 => Operation::Op("ISUB", isub, vec![]), + 0x7E => Operation::Op("IAND", iand, vec![]), + 0xB0 => Operation::Op("IOR", ior, vec![]), + 0x84 => Operation::Op("IINC", iinc, vec![Args::Var, Args::Byte]), + 0x99 => Operation::Op("IFEQ", ifeq, vec![Args::Label]), + 0x9b => Operation::Op("IFLT", iflt, vec![Args::Label]), + 0x9F => Operation::Op("IF_ICMPEQ", if_icmpeq, vec![Args::Label]), + 0xA7 => Operation::Op("GOTO", goto, vec![Args::Label]), + 0xAC => Operation::Op("IRETURN", ireturn, vec![]), + 0xB6 => Operation::Op("INVOKEVIRTUAL", invokevirtual, vec![Args::Constant]), + 0xC4 => Operation::Op("WIDE", wide, vec![]), + 0xFC => Operation::Op("IN", _in, vec![]), + 0xFD => Operation::Op("OUT", out, vec![]), + 0xFE => Operation::Op("ERR", err, vec![]), + 0xFF => Operation::Op("HALT", halt, vec![]), #[cfg(feature = "extra:sleep")] - 0xF0 => Operation::Op("SLP", slp), + 0xF0 => Operation::Op("SLP", slp, vec![Args::Byte]), #[cfg(feature = "bonus:network")] - 0xE1 => Operation::Op("NETBIND", netbind), + 0xE1 => Operation::Op("NETBIND", netbind, vec![]), #[cfg(feature = "bonus:network")] - 0xE2 => Operation::Op("NETCONNECT", netconnect), + 0xE2 => Operation::Op("NETCONNECT", netconnect, vec![]), #[cfg(feature = "bonus:network")] - 0xE3 => Operation::Op("NETIN", netin), + 0xE3 => Operation::Op("NETIN", netin, vec![]), #[cfg(feature = "bonus:network")] - 0xE4 => Operation::Op("NETOUT", netout), + 0xE4 => Operation::Op("NETOUT", netout, vec![]), #[cfg(feature = "bonus:network")] - 0xE5 => Operation::Op("NETCLOSE", netclose), + 0xE5 => Operation::Op("NETCLOSE", netclose, vec![]), #[cfg(feature = "bonus:heap")] - 0xD1 => Operation::Op("NEWARRAY", newarray), + 0xD1 => Operation::Op("NEWARRAY", newarray, vec![]), #[cfg(feature = "bonus:heap")] - 0xD2 => Operation::Op("IALOAD", iaload), + 0xD2 => Operation::Op("IALOAD", iaload, vec![]), #[cfg(feature = "bonus:heap")] - 0xD3 => Operation::Op("IASTORE", iastore), + 0xD3 => Operation::Op("IASTORE", iastore, vec![]), x => Operation::Invalid(x) } @@ -126,6 +136,12 @@ fn goto(machine: &mut Machine) -> Result<()> { machine.block.jump(offset) } +fn err(machine: &mut Machine) -> Result<()> { + machine.halted = true; + eprintln!("MACHINE CALLED ERR"); + Ok(()) +} + fn halt(machine: &mut Machine) -> Result<()> { machine.halted = true; Ok(())