Rewrite the Java bindings.

This brings the Java API up to par with Python feature-wise and substantially
simplifies the hook implementation, enabling proper bounds-checked hooks.

The rewrite strives for compatibility with the previous API, but there are some
breaking changes. It is possible to push closer to full backwards compatibility
if required, at the cost of reintroducing some of the suboptimal designs. Here
are the main points of breakage:

- ReadHook and WriteHook are gone, replaced simply by MemHook. Hooking valid
  memory accesses now requires a type parameter. This enables fetch and
  read-after hooks with a unified API and a single callback object.
- mem_read now takes an int, not a long. We are unable to allocate more than 2GB
  in a single request anyway (Java limitation).
- Instruction hooks now require specifying the instruction explicitly, instead
  of guessing based on the hook type. This is necessary to distinguish
  sysenter/syscall and ARM64 mrs/msr/sys/sysl, without excessively bloating the
  library with redundant hook types. Bounds must also be specified, to support
  bounds-checked instruction hooks.
- Reading object-type registers (any register larger than 64 bits, or registers
  with special formats) requires a second argument to reg_read. This allows us
  to provide a fast reg_read that returns a long for the common cases, while
  still supporting a more general reg_read for other registers.
- mem_map_ptr is rewritten to take a *direct* java.nio.Buffer, which enables
  many more use cases than a simple byte array, and improves performance (a
  byte array cannot really be used as a mapped buffer without GC-pinning it,
  which hurts the GC performance).
- Context handling API is redesigned to be safer and more object-oriented.

A lot of bugs are fixed with this implementation:
- Unicorn instances can be properly garbage-collected, instead of hanging around
  forever in the Unicorn.unicorns table.
- Hooks no longer fire outside of their bounds (#1164), and in fact, hook bounds
  are properly respected (previously, all hooks were just registered globally to
  all addresses).
- Hooks are substantially faster, as they are now dispatched directly via a
  single method call rather than being indirected through invokeCallbacks.
- Loading vector registers works now, rather than crashing the VM (#1539).

Several features are now enabled in the Java implementation:

- All of the current ctl_* calls are implemented.
- mmio_map is implemented.
- New virtual TLB mode is implemented.
- reading/writing Context registers is implemented.
- New hook types are added: TcgOpcodeHook, EdgeGeneratedHook,
  InvalidInstructionHook, TlbFillHook, and the instruction hooks Arm64SysHook,
  CpuidHook.
- All known special registers are supported.
This commit is contained in:
Robert Xiao
2023-05-06 17:49:41 -07:00
parent 8777bb6ae6
commit aa430587cc
33 changed files with 2991 additions and 1421 deletions

View File

@@ -100,7 +100,7 @@ public class SampleNetworkAuditing {
long buf = ecx;
long count = edx;
byte[] content = uc.mem_read(buf, count);
byte[] content = uc.mem_read(buf, (int) count);
String msg = String.format("write data=%s count=%d to fd(%d)",
new String(content), count, fd);
@@ -166,7 +166,7 @@ public class SampleNetworkAuditing {
long addrlen =
toInt(uc.mem_read(args + SIZE_REG * 2, SIZE_REG));
byte[] sock_addr = uc.mem_read(umyaddr, addrlen);
byte[] sock_addr = uc.mem_read(umyaddr, (int) addrlen);
String msg = String.format("fd(%d) bind to %s", fd,
parse_sock_address(sock_addr));
@@ -181,7 +181,7 @@ public class SampleNetworkAuditing {
long addrlen =
toInt(uc.mem_read(args + SIZE_REG * 2, SIZE_REG));
byte[] sock_addr = uc.mem_read(uservaddr, addrlen);
byte[] sock_addr = uc.mem_read(uservaddr, (int) addrlen);
String msg = String.format("fd(%d) connect to %s", fd,
parse_sock_address(sock_addr));
fd_chains.add_log(fd, msg);
@@ -211,7 +211,7 @@ public class SampleNetworkAuditing {
long upeer_len = toInt(uc.mem_read(upeer_addrlen, 4));
byte[] sock_addr =
uc.mem_read(upeer_sockaddr, upeer_len);
uc.mem_read(upeer_sockaddr, (int) upeer_len);
String msg =
String.format("fd(%d) accept client with upeer=%s",
@@ -227,7 +227,7 @@ public class SampleNetworkAuditing {
long flags =
toInt(uc.mem_read(args + SIZE_REG * 3, SIZE_REG));
byte[] buf = uc.mem_read(buff, length);
byte[] buf = uc.mem_read(buff, (int) length);
String msg = String.format("fd(%d) send data=%s", fd,
new String(buf));
fd_chains.add_log(fd, msg);

View File

@@ -104,7 +104,8 @@ public class Sample_x86 {
}
private static class MyWriteInvalidHook implements EventMemHook {
public boolean hook(Unicorn u, long address, int size, long value,
public boolean hook(Unicorn u, int type, long address, int size,
long value,
Object user) {
System.out.printf(
">>> Missing memory is being WRITE at 0x%x, data size = %d, data value = 0x%x\n",
@@ -131,16 +132,18 @@ public class Sample_x86 {
}
}
private static class MyRead64Hook implements ReadHook {
public void hook(Unicorn u, long address, int size, Object user) {
private static class MyRead64Hook implements MemHook {
public void hook(Unicorn u, int type, long address, int size,
long value, Object user) {
System.out.printf(
">>> Memory is being READ at 0x%x, data size = %d\n", address,
size);
}
}
private static class MyWrite64Hook implements WriteHook {
public void hook(Unicorn u, long address, int size, long value,
private static class MyWrite64Hook implements MemHook {
public void hook(Unicorn u, int type, long address, int size,
long value,
Object user) {
System.out.printf(
">>> Memory is being WRITE at 0x%x, data size = %d, data value = 0x%x\n",
@@ -295,9 +298,9 @@ public class Sample_x86 {
u.hook_add(new MyCodeHook(), 1, 0, null);
// handle IN instruction
u.hook_add(new MyInHook(), null);
u.hook_add(new MyInHook(), Unicorn.UC_X86_INS_IN, 1, 0, null);
// handle OUT instruction
u.hook_add(new MyOutHook(), null);
u.hook_add(new MyOutHook(), Unicorn.UC_X86_INS_OUT, 1, 0, null);
// emulate machine code in infinite time
u.emu_start(ADDRESS, ADDRESS + X86_CODE32_INOUT.length, 0, 0);
@@ -454,6 +457,7 @@ public class Sample_x86 {
// intercept invalid memory events
u.hook_add(new MyWriteInvalidHook(), Unicorn.UC_HOOK_MEM_WRITE_UNMAPPED,
1, 0,
null);
// emulate machine code in infinite time
@@ -591,10 +595,10 @@ public class Sample_x86 {
u.hook_add(new MyCode64Hook(), ADDRESS, ADDRESS + 20, null);
// tracing all memory WRITE access (with @begin > @end)
u.hook_add(new MyWrite64Hook(), 1, 0, null);
u.hook_add(new MyWrite64Hook(), Unicorn.UC_HOOK_MEM_WRITE, 1, 0, null);
// tracing all memory READ access (with @begin > @end)
u.hook_add(new MyRead64Hook(), 1, 0, null);
u.hook_add(new MyRead64Hook(), Unicorn.UC_HOOK_MEM_READ, 1, 0, null);
// emulate machine code in infinite time (last param = 0), or when
// finishing all the code.

View File

@@ -58,8 +58,8 @@ public class Sample_x86_mmr {
// read the registers back out
eax = (int) ((Long) uc.reg_read(Unicorn.UC_X86_REG_EAX)).longValue();
ldtr2 = (X86_MMR) uc.reg_read(Unicorn.UC_X86_REG_LDTR);
gdtr2 = (X86_MMR) uc.reg_read(Unicorn.UC_X86_REG_GDTR);
ldtr2 = (X86_MMR) uc.reg_read(Unicorn.UC_X86_REG_LDTR, null);
gdtr2 = (X86_MMR) uc.reg_read(Unicorn.UC_X86_REG_GDTR, null);
System.out.printf(">>> EAX = 0x%x\n", eax);