Add a fully functioning disassemblere?

This commit is contained in:
2018-06-01 23:43:33 +02:00
parent 0b8e28fb22
commit c6ac7ce4f7
5 changed files with 466 additions and 36 deletions

22
src/bin/ijvdisasm.rs Normal file
View File

@@ -0,0 +1,22 @@
extern crate rustijvm;
use rustijvm::Disassembly;
use std::env;
fn main() {
let args: Vec<String> = env::args().collect();
if args.len() < 2 {
println!("Usage: {} ijvmfile", args[0]);
return;
}
let ijvmfile = &args[1];
match Disassembly::disassemble_file(ijvmfile) {
Err(e) => eprintln!("{}", e),
Ok(d) => println!("{}", d),
}
}

387
src/disassembler.rs Normal file
View File

@@ -0,0 +1,387 @@
use block::Block;
use ijvmreader::IJVMReader;
use binread::{BinRead, BinReadable};
use machine::MAGIC_HEADER;
use Result;
use ops;
use std::clone::Clone;
use core::fmt;
type OpCode = u8;
#[derive(Debug, Clone)]
pub struct Instruction {
op: OpCode,
name: &'static str,
params: Vec<i32>,
types: Vec<ops::Args>,
wide: bool,
pos: usize,
label: bool,
}
impl fmt::Display for Instruction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if self.label {
write!(f, "\nLBL_{}:\n", self.pos)?;
}
write!(f, " {}", self.name)?;
for (typ, val) in self.types.iter().zip(&self.params) {
match *typ {
ops::Args::Label => write!(f, " LBL_{}", val)?,
ops::Args::Constant => {
if self.op == 0xB6 {
write!(f, " func_{}", val)?;
} else {
write!(f, " CONST_{}", val)?;
}
},
ops::Args::Var => write!(f, " var{}", val)?,
_ => write!(f, " {}", val)?,
}
}
write!(f, "\n")?;
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct Constant {
value: i32,
method: bool,
}
impl Constant {
fn new(value: i32) -> Self {
Self {
value,
method: false,
}
}
}
#[derive(Debug, Clone)]
pub struct Method {
name: String,
pos: usize,
args: usize,
vars: usize,
instructions: Vec<Instruction>,
labels: Vec<usize>,
}
impl Method {
pub fn new(i: i32, pos: usize, args: usize, vars: usize) -> Self {
let name;
if pos == 0 {
name = "main".to_string();
} else {
name = format!("func_{}", i);
}
Self {
name,
pos,
args,
vars,
instructions: Vec::new(),
labels: Vec::new(),
}
}
pub fn update_vars(&mut self) {
let max = self.instructions
.iter()
.filter(|e| !e.types.is_empty())
.filter(|e| {
match e.types[0] {
ops::Args::Var => true,
_ => false,
}
})
.map(|e| e.params[0] )
.max();
if let Some(v) = max {
self.vars = v as usize + 1;
}
}
pub fn mark_labels(&mut self) {
for label in &self.labels {
let opt = self.instructions.iter_mut().find(|e| e.pos == *label);
if let Some(instruction) = opt {
instruction.label = true;
}
}
}
}
impl fmt::Display for Method {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut main_var_off = 0;
if self.name == "main" {
write!(f, ".main\n")?;
main_var_off = 1;
} else {
write!(f, ".method {}(", self.name)?;
let mut comma = "";
for i in 1..=self.args {
write!(f, "{}var{}", comma, i)?;
comma = ", ";
}
write!(f,")\n")?;
}
if self.vars > 0 {
write!(f, "\n.var\n")?;
for i in 1..=self.vars {
write!(f, " var{}\n", i + self.args - main_var_off)?;
}
write!(f, ".end-var\n")?;
}
write!(f, "\n")?;
for inst in &self.instructions {
write!(f, "{}", inst)?;
}
if self.name == "main" {
write!(f, "\n.end-main\n\n")?;
} else {
write!(f, "\n.end-method\n\n")?;
}
Ok(())
}
}
#[derive(Debug)]
pub struct Disassembler {
disassembly: Disassembly,
text: Block,
pool: Block,
suspects: Vec<usize>,
found: Vec<usize>,
}
impl Disassembler {
pub fn new(text: Block, pool: Block) -> Disassembler {
Disassembler {
disassembly: Disassembly::default(),
text,
pool,
suspects: Vec::new(),
found: Vec::new(),
}
}
fn is_method(&self, addr: usize) -> bool {
self.suspects.contains(&addr)
}
fn disasm_instruction(&mut self, method: &mut Method) -> Result<()> {
let pos = self.text.cur();
let mut opcode = self.text.read_u8()?;
let mut wide = false;
let mut params = Vec::new();
// WIDE
if opcode == 0xC4 {
wide = true;
opcode = self.text.read_u8()?;
}
let (name, args) = match ops::num_to_op(opcode) {
ops::Operation::Invalid(x) => {
eprintln!("INVALID OP 0x{:X}\n", x);
return Err("Invalid operation");
},
ops::Operation::Op(name, _, args) => (name, args),
};
for arg in args.clone() {
let v = match arg {
ops::Args::Byte => self.text.read_i8()? as i32,
ops::Args::Short => self.text.read_i16()? as i32,
ops::Args::Var => {
if wide {
self.text.read_u16()? as i32
} else {
self.text.read_u8()? as i32
}
},
ops::Args::Label => {
let offset = self.text.read_i16()?;
let target = pos as i64 + offset as i64;
if target < 0 {
return Err("Invalid jump offset");
}
method.labels.push(target as usize);
target as i32
},
ops::Args::Constant => self.text.read_u16()? as i32,
};
params.push(v);
}
let inst = Instruction {
op: opcode,
name,
params,
types: args,
wide,
pos,
label: false,
};
// eprintln!("i 0x{:x} ({}) @ {}", opcode, name, pos);
// INVOKEVIRTUAL
if opcode == 0xB6 {
let target_addr: usize;
{
let addr = &mut self.disassembly.constants[inst.params[0] as usize];
if !addr.method {
addr.method = true;
}
target_addr = addr.value as usize;
}
if !self.found.contains(&target_addr) {
self.found.push(target_addr);
let save_pos = self.text.cur();
self.text.seek(target_addr)?;
let arg_count = (self.text.read_u16()? - 1) as usize;
let var_count = self.text.read_u16()? as usize;
let callee = self.disasm_method(inst.params[0], target_addr, arg_count, var_count)?;
self.disassembly.methods.push(callee);
self.text.seek(save_pos)?;
}
}
method.instructions.push(inst);
Ok(())
}
fn disasm_method(&mut self, i: i32, pc: usize, args: usize, vars: usize) -> Result<Method> {
let mut method = Method::new(i, pc, args, vars);
while self.text.has_i8() && !self.is_method(self.text.cur()) {
self.disasm_instruction(&mut method)?;
}
method.mark_labels();
Ok(method)
}
fn find_methods(&mut self) -> Result<Vec<usize>> {
let mut res = Vec::new();
while self.text.has_i8() {
let op = self.text.read_u8()?;
// INVOKEVIRTUAL
if op == 0xB6 {
let constant_index = self.text.read_u16()? as usize;
let constant: &Constant = match self.disassembly.constants.get(constant_index) {
Some(x) => x,
None => continue,
};
let v = self.text[constant.value as usize];
if v == 0 {
res.push(constant.value as usize);
}
}
}
self.text.seek(0)?;
Ok(res)
}
pub fn disassemble(&mut self) -> Result<Disassembly> {
while self.pool.has_i32() {
self.disassembly.constants.push(Constant::new(self.pool.read_i32()?));
}
self.suspects = self.find_methods()?;
let mut main = self.disasm_method(-1, 0, 0, 0)?;
main.update_vars();
self.disassembly.methods.insert(0, main);
Ok(self.disassembly.clone())
}
}
#[derive(Debug, Default, Clone)]
pub struct Disassembly {
constants: Vec<Constant>,
methods: Vec<Method>,
}
impl Disassembly {
pub fn disassemble_reader(mut reader: IJVMReader) -> Result<Disassembly> {
let magic = reader.read_u32()?;
if magic != MAGIC_HEADER {
return Err("Invalid magic header");
}
let pool = match reader.read_block() {
Ok(a) => a,
Err(_) => return Err("Failed to read constants block")
};
let text = match reader.read_block() {
Ok(block) => block,
Err(_) => return Err("Failed to read text block")
};
Disassembler::new(text, pool).disassemble()
}
pub fn disassemble_file(file: &str) -> Result<Disassembly> {
let reader = IJVMReader::new(file)?;
Self::disassemble_reader(reader)
}
pub fn new_from_slice(source: &[u8]) -> Result<Disassembly> {
let reader = IJVMReader::new_from_slice(source);
Self::disassemble_reader(reader)
}
}
impl fmt::Display for Disassembly {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if self.constants.len() > 0 {
write!(f, ".constant\n")?;
for (i, c) in self.constants.iter().enumerate() {
if !c.method {
write!(f, " CONST_{:<6} {}\n", i, c.value)?;
}
}
write!(f, ".end-constant\n\n")?;
}
for method in &self.methods {
write!(f, "{}", method)?;
}
Ok(())
}
}

View File

@@ -1,8 +1,9 @@
extern crate core;
pub mod ijvmreader;
pub mod binread;
pub mod block;
pub mod ops;
pub mod machine;
pub mod stack;
pub mod pool;
pub mod frame;
@@ -14,4 +15,8 @@ pub mod heap;
type Result<T> = ::std::result::Result<T, &'static str>;
pub mod machine;
pub use machine::Machine;
pub mod disassembler;
pub use disassembler::{Disassembler, Disassembly};

View File

@@ -15,7 +15,7 @@ use netstack::NetStack;
#[cfg(feature = "bonus:heap")]
use heap::Heap;
const MAGIC_HEADER:u32 = 0x1DEA_DFAD;
pub const MAGIC_HEADER:u32 = 0x1DEA_DFAD;
const ANTI_BS_SIZE:usize = 0xFF;
pub struct Machine {
@@ -85,7 +85,7 @@ impl Machine {
pub fn step(&mut self) -> Result<()> {
match self.block.read_op() {
Ok(Operation::Op(a, func)) => {
Ok(Operation::Op(a, func, _)) => {
if cfg!(feature = "debug:instr") {
println!("{}", a);
println!("Stack: {:?}", self.cur_frame().stack.data);

View File

@@ -6,8 +6,17 @@ use std::io::{Read};
pub type OpFunc = fn(&mut Machine) -> Result<()>;
#[derive(Debug, Clone)]
pub enum Args {
Byte,
Short,
Var,
Label,
Constant,
}
pub enum Operation {
Op(&'static str, OpFunc),
Op(&'static str, OpFunc, Vec<Args>),
Invalid(u8)
}
@@ -15,50 +24,51 @@ const JUMP_OFFSET: i32 = 3;
pub fn num_to_op(op: u8) -> Operation {
match op {
0x00 => Operation::Op("NOP", nop),
0x10 => Operation::Op("BIPUSH", bipush),
0x13 => Operation::Op("LDC_W", ldc_w),
0x15 => Operation::Op("ILOAD", iload),
0x36 => Operation::Op("ISTORE", istore),
0x57 => Operation::Op("POP", pop),
0x59 => Operation::Op("DUP", dup),
0x5F => Operation::Op("SWAP", swap),
0x60 => Operation::Op("IADD", iadd),
0x64 => Operation::Op("ISUB", isub),
0x7E => Operation::Op("IAND", iand),
0xB0 => Operation::Op("IOR", ior),
0x84 => Operation::Op("IINC", iinc),
0x99 => Operation::Op("IFEQ", ifeq),
0x9b => Operation::Op("IFLT", iflt),
0x9F => Operation::Op("IF_ICMPEQ", if_icmpeq),
0xA7 => Operation::Op("GOTO", goto),
0xAC => Operation::Op("IRETURN", ireturn),
0xB6 => Operation::Op("INVOKEVIRTUAL", invokevirtual),
0xC4 => Operation::Op("WIDE", wide),
0xFC => Operation::Op("IN", _in),
0xFD => Operation::Op("OUT", out),
0xFF => Operation::Op("HALT", halt),
0x00 => Operation::Op("NOP", nop, vec![]),
0x10 => Operation::Op("BIPUSH", bipush, vec![Args::Byte]),
0x13 => Operation::Op("LDC_W", ldc_w, vec![Args::Constant]),
0x15 => Operation::Op("ILOAD", iload, vec![Args::Var]),
0x36 => Operation::Op("ISTORE", istore, vec![Args::Var]),
0x57 => Operation::Op("POP", pop, vec![]),
0x59 => Operation::Op("DUP", dup, vec![]),
0x5F => Operation::Op("SWAP", swap, vec![]),
0x60 => Operation::Op("IADD", iadd, vec![]),
0x64 => Operation::Op("ISUB", isub, vec![]),
0x7E => Operation::Op("IAND", iand, vec![]),
0xB0 => Operation::Op("IOR", ior, vec![]),
0x84 => Operation::Op("IINC", iinc, vec![Args::Var, Args::Byte]),
0x99 => Operation::Op("IFEQ", ifeq, vec![Args::Label]),
0x9b => Operation::Op("IFLT", iflt, vec![Args::Label]),
0x9F => Operation::Op("IF_ICMPEQ", if_icmpeq, vec![Args::Label]),
0xA7 => Operation::Op("GOTO", goto, vec![Args::Label]),
0xAC => Operation::Op("IRETURN", ireturn, vec![]),
0xB6 => Operation::Op("INVOKEVIRTUAL", invokevirtual, vec![Args::Constant]),
0xC4 => Operation::Op("WIDE", wide, vec![]),
0xFC => Operation::Op("IN", _in, vec![]),
0xFD => Operation::Op("OUT", out, vec![]),
0xFE => Operation::Op("ERR", err, vec![]),
0xFF => Operation::Op("HALT", halt, vec![]),
#[cfg(feature = "extra:sleep")]
0xF0 => Operation::Op("SLP", slp),
0xF0 => Operation::Op("SLP", slp, vec![Args::Byte]),
#[cfg(feature = "bonus:network")]
0xE1 => Operation::Op("NETBIND", netbind),
0xE1 => Operation::Op("NETBIND", netbind, vec![]),
#[cfg(feature = "bonus:network")]
0xE2 => Operation::Op("NETCONNECT", netconnect),
0xE2 => Operation::Op("NETCONNECT", netconnect, vec![]),
#[cfg(feature = "bonus:network")]
0xE3 => Operation::Op("NETIN", netin),
0xE3 => Operation::Op("NETIN", netin, vec![]),
#[cfg(feature = "bonus:network")]
0xE4 => Operation::Op("NETOUT", netout),
0xE4 => Operation::Op("NETOUT", netout, vec![]),
#[cfg(feature = "bonus:network")]
0xE5 => Operation::Op("NETCLOSE", netclose),
0xE5 => Operation::Op("NETCLOSE", netclose, vec![]),
#[cfg(feature = "bonus:heap")]
0xD1 => Operation::Op("NEWARRAY", newarray),
0xD1 => Operation::Op("NEWARRAY", newarray, vec![]),
#[cfg(feature = "bonus:heap")]
0xD2 => Operation::Op("IALOAD", iaload),
0xD2 => Operation::Op("IALOAD", iaload, vec![]),
#[cfg(feature = "bonus:heap")]
0xD3 => Operation::Op("IASTORE", iastore),
0xD3 => Operation::Op("IASTORE", iastore, vec![]),
x => Operation::Invalid(x)
}
@@ -126,6 +136,12 @@ fn goto(machine: &mut Machine) -> Result<()> {
machine.block.jump(offset)
}
fn err(machine: &mut Machine) -> Result<()> {
machine.halted = true;
eprintln!("MACHINE CALLED ERR");
Ok(())
}
fn halt(machine: &mut Machine) -> Result<()> {
machine.halted = true;
Ok(())