Solving CONFidence 2015 DeobfuscateMe Task late
We got second place in CONFidence CTF, organized by Dragon Sector, yay!
One of the reversables we couldn’t crack during the CTF, because we ran out of time writing a symbolic execution engine to automate finding the solution. The binary is mirrored here, for archival purposes.
When opening up the binary in IDA Pro, one finds that there is only one check
function being called by the main
program:
|
|
The check
is a monster function scattered throughout the whole binary:
.text:080484E0 xchg edx, ebx
.text:080484E2 xor eax, 0FFFFFFFFh
.text:080484E5 xchg ebx, ecx
.text:080484E7 jmp loc_804EC31
.text:080484E7; ----------------------------------------------------------------------------
.text:0804EC31 xchg ecx, edx
.text:0804EC33 xchg eax, edx
.text:0804EC34 jmp loc_804E211
.text:0804EC34; ----------------------------------------------------------------------------
.text:0804E211 shl edx, 1Bh
.text:0804E214 jmp loc_804865E
.text:0804E214 ; ---------------------------------------------------------------------------
.text:0804865E or edx, 0A869CBC9h
.text:08048664 xchg ebx, edx
.text:08048666 jmp loc_805ACEE
.text:0804866B ; ---------------------------------------------------------------------------
.text:0805ACEE sub ebx, ebp
.text:0805ACF0 jmp loc_805724C
.text:0805ACF0 ; ---------------------------------------------------------------------------
.text:0805724C add ecx, eax
.text:0805724E sub ebx, ebp
.text:08057250 jmp loc_80551C8
.text:08057255 ; ---------------------------------------------------------------------------
.text:080551C8 xor ebx, ebx
.text:080551CA add ebx, eax
.text:080551CC xchg ebx, edx
.text:080551CE jmp loc_8052800
.text:080551CE ; ---------------------------------------------------------------------------
.text:08052800 xchg eax, ecx
.text:08052801 xchg edx, ebx
.text:08052803 neg ebx
.text:08052805 jmp loc_805478E
.text:08052805 ; ---------------------------------------------------------------------------
.text:0805478E add eax, ebx
.text:08054790 xchg edx, ebx
.text:08054792 xchg eax, ebx
.text:08054793 jmp loc_804A82A
.text:08054793 ; ---------------------------------------------------------------------------
We made a reasonable guess that the actual checker code consists of position
independent instructions only, and implemented a quick script that would follow
the jmp
instructions and collect all other instructions into a nice linear
block. This block is still massive in size:
.text:080484E0 public _check
.text:080484E0 _check proc near ; CODE XREF: main+3C↑p
.text:080484E0 xchg edx, ebx
.text:080484E2 xor eax, 0FFFFFFFFh
.text:080484E5 xchg ebx, ecx
.text:080484E7 xchg ecx, edx
.text:080484E9 xchg eax, edx
.text:080484EA shl edx, 1Bh
.text:080484ED or edx, 0A869CBC9h
.text:080484F3 xchg ebx, edx
.text:080484F5 sub ebx, ebp
.text:080484F7 add ecx, eax
.text:080484F9 sub ebx, ebp
.text:080484FB xor ebx, ebx
.text:080484FD add ebx, eax
.text:080484FF xchg ebx, edx
.text:08048501 xchg eax, ecx
.text:08048502 xchg edx, ebx
.text:08048504 neg ebx
.text:08048506 add eax, ebx
.text:08048508 xchg edx, ebx
.text:0804850A xchg eax, ebx
.text:0804850B xor edx, edx
.text:0804850D add edx, 0FFBB4586h
.text:08048513 sub edx, 81622B0Fh
.text:08048519 add edx, 2276E2FAh
.text:0804851F xchg eax, ecx
.text:08048520 xor edx, edx
.text:08048522 xchg ebx, ecx
.text:08048524 xchg eax, ebx
.text:08048525 xchg eax, ecx
.text:08048526 xchg ebx, ecx
.text:08048528 sub edx, 0D51EAFBFh
.text:0804852E xchg edx, ecx
.text:08048530 xchg eax, edx
.text:08048531 add ecx, 0C6538058h
.text:08048537 xchg ebx, ecx
.text:08048539 xchg eax, ecx
.text:0804853A xchg edx, ecx
.text:0804853C rol ebx, 7
.text:0804853F xchg eax, ecx
.text:08048540 xchg eax, ebx
.text:08048541 xchg edx, ecx
.text:08048543 xchg eax, edx
.text:08048544 xchg eax, ecx
.text:08048545 push ebp
.text:08048546 xchg eax, ebx
.text:08048547 xchg eax, ebx
.text:08048548 xor ebp, 0FFFFFFFFh
.text:0804854B xor ebp, 0FFFFFFFFh
.text:0804854E add ebp, 0A10FA35Fh
.text:08048554 xchg ebx, edx
.text:08048556 add ebp, 421A5649h
.text:0804855C sub ebp, 6411B5D9h
.text:08048562 add ebp, 5F1EE010h
.text:08048568 sub ebp, 6CE7BCF1h
.text:0804856E add ebp, 7BF9A346h
.text:08048574 add ebp, 1AE87B85h
.text:0804857A sub ebp, 0A2625870h
.text:08048580 add ebp, 3964C289h
.text:08048586 sub ebp, 5696C2F9h
.text:0804858C add ebp, 6A6C431Dh
.text:08048592 sub ebp, 31FFBBDh
.text:08048598 add ebp, 0ED1C37C5h
.text:0804859E sub ebp, 8268B0A1h
.text:080485A4 add ebp, 0BCF7B3DAh
.text:080485AA add ebp, 57DC1681h
.text:080485B0 sub ebp, 457AE9D4h
.text:080485B6 add ebp, 371FF325h
.text:080485BC xchg ebx, edx
.text:080485BE sub ebp, 0CADDD25Ch
.text:080485C4 add ebp, 0D8684E43h
.text:080485CA sub ebp, 9AF2C4E4h
.text:080485D0 add ebp, 3B7ABE8Dh
.text:080485D6 add ebp, 80C7BF2Eh
.text:080485DC sub ebp, 591082D9h
.text:080485E2 sub ebp, 69D2F7C2h
.text:080485E8 add ebp, 7733786h
.text:080485EE sub ebp, 33CFB444h
.text:080485F4 sub ebp, 0AAE24EACh
.
.
.
s
n
i
p
.
.
.
.text:080520F8 xchg eax, ecx
.text:080520F9 neg ebp
.text:080520FB xchg eax, ebx
.text:080520FC add eax, ebp
.text:080520FE xchg eax, edx
.text:080520FF xchg edx, ebx
.text:08052101 xchg ecx, ebx
.text:08052103 xchg eax, ecx
.text:08052104 xchg ebx, edx
.text:08052106 xchg eax, ecx
.text:08052107 mov esi, edx
.text:08052109 xchg ebx, ecx
.text:0805210B neg esi
.text:0805210D mov ebp, esi
.text:0805210F neg ebp
.text:08052111 add ebx, ebp
.text:08052113 xchg edx, ebx
.text:08052115 xchg ecx, ebx
.text:08052117 xchg eax, edx
.text:08052118 xchg eax, ebx
.text:08052119 xchg edx, ecx
.text:0805211B xchg eax, edx
.text:0805211C xchg eax, ecx
.text:0805211D xchg eax, ebx
.text:0805211E xchg edx, ebx
.text:08052120 xchg eax, edx
.text:08052121 xchg edx, ebx
.text:08052123 mov ebp, esi
.text:08052125 and ebp, 72EDEC3Ch
.text:0805212B xor esi, 72EDEC3Ch
.text:08052131 xor esi, ebp
.text:08052133 xor edi, 0FFFFFFFFh
.text:08052136 xchg eax, ecx
.text:08052137 xchg edx, ebx
.text:08052139 xchg ebx, ecx
.text:0805213B xchg ecx, ebx
.text:0805213D pop ebp
.text:0805213E sub edi, ecx
.text:08052140 xchg ecx, ebx
.text:08052142 add edi, ebx
.text:08052144 retn
(note the address range changing by about 40k from top to bottom.)
It is clear that manual analysis is not an option. During the CTF, we had a hard time automating the analysis. The idea was basically to mark the top of the stack as input variable, then translate every instruction to math and then use a SMT solver like z3 to query for possible values of the input variable such that the output would become non-zero.
After the CTF we found a blog poist by Dmytrio Olekiuk from whom we stole quite significant parts of our solution. The script relies on OpenREIL to lift x86 code to math and on z3 to compute the solution:
#!/usr/bin/python3
import sys, os, random, struct, unittest
from capstone import *
from capstone.x86 import *
from pyopenreil.REIL import *
from pyopenreil.utils import asm
from pyopenreil.symbolic import *
import pyopenreil.VM as VM
import struct
import z3
UNDEF = None
class Val(object):
def __init__(self, val = UNDEF, exp = None):
self.val, self.exp = val, exp
def __str__(self):
return str(self.exp) if self.is_symbolic() else hex(self.val)
def is_symbolic(self):
# check if value is symbolic
return self.val is None
def is_concrete(self):
# check if value is concrete
return not self.is_symbolic()
def to_z3(self, state, size):
#
# generate Z3 expression that represents this value
#
def _z3_size(size):
return { U1: 1, U8: 8, U16: 16, U32: 32, U64: 64 }[ size ]
def _z3_exp(exp, size):
if isinstance(exp, SymVal):
if state.has_key(exp.name):
return state[exp.name]
else:
return z3.BitVec(exp.name, _z3_size(exp.size))
elif isinstance(exp, SymConst):
return z3.BitVecVal(exp.val, _z3_size(exp.size))
elif isinstance(exp, SymExp):
a, b = exp.a, exp.b
assert isinstance(a, SymVal) or isinstance(a, SymConst)
assert b is None or isinstance(b, SymVal) or isinstance(b, SymConst)
assert b is None or a.size == b.size
a = a if a is None else _z3_exp(a, a.size)
b = b if b is None else _z3_exp(b, b.size)
# makes 1 bit bitvectors from booleans
_z3_bool_to_bv = lambda exp: z3.If(exp, z3.BitVecVal(1, 1), z3.BitVecVal(0, 1))
# z3 expression from SymExp
ret = { I_ADD: lambda: a + b,
I_SUB: lambda: a - b,
I_NEG: lambda: -a,
I_MUL: lambda: a * b,
I_DIV: lambda: z3.UDiv(a, b),
I_MOD: lambda: z3.URem(a, b),
I_SHR: lambda: z3.LShR(a, b),
I_SHL: lambda: a << b,
I_OR: lambda: a | b,
I_AND: lambda: a & b,
I_XOR: lambda: a ^ b,
I_NOT: lambda: ~a,
I_EQ: lambda: _z3_bool_to_bv(a == b),
I_LT: lambda: _z3_bool_to_bv(z3.ULT(a, b)) }[exp.op]()
size_src = _z3_size(exp.a.size)
size_dst = _z3_size(size)
if size_src > size_dst:
# convert to smaller value
return z3.Extract(size_dst - 1, 0, ret)
elif size_src < size_dst:
# convert to bigger value
return z3.Concat(z3.BitVecVal(0, size_dst - size_src), ret)
else:
return ret
else:
assert False
if self.is_concrete():
# use concrete value
return z3.BitVecVal(self.val, _z3_size(size))
else:
# build Z3 expression
return _z3_exp(self.exp, size)
class Mem(VM.Mem):
class _Val(Val):
def __init__(self, size, seed, val = UNDEF, exp = None):
# Each byte in memory must have size and hash
# of full value that uses it.
self.size, self.seed = size, seed
super(Mem._Val, self).__init__(val = val, exp = exp)
def pack(self, size, val):
ret = []
seed = random.randrange(0x10000000, 0x7fffffff)
if val.is_symbolic():
for i in range(self.map_length[size]):
ret.append(self._Val(size, seed, exp = val.exp))
else:
ret = map(lambda val: self._Val(size, seed, val),
list(super(Mem, self).pack(size, val.val)))
return ret
def unpack(self, size, data):
first = data[0]
items, symbolic = [], True
assert len(data) == self.map_length[first.size]
for val in data:
# check if all bytes belongs to the same value
assert val.size == first.size and \
val.seed == first.seed
if val.is_concrete():
items.append(val.val)
symbolic = False
if symbolic:
return Val(exp = first.exp)
else:
return Val(super(Mem, self).unpack(size, ''.join(items)))
def read(self, addr, size):
return self._read(addr, size)
def write(self, addr, size, data):
self._write(addr, size, data)
class Math(VM.Math):
def eval(self, op, a = None, b = None):
concrete = True
a_val = a if a is None else a.val
b_val = b if b is None else b.val
# determinate symbolic/concrete operation mode
if a_val is not None and a_val.is_symbolic(): concrete = False
if b_val is not None and b_val.is_symbolic(): concrete = False
if concrete:
a_reg = a if a is None else VM.Reg(a.size, a.get_val())
b_reg = b if b is None else VM.Reg(b.size, b.get_val())
# compute and return concrete value
return Val(val = super(Math, self).eval(op, a_reg, b_reg))
else:
assert a is not None
assert op in [ I_STR, I_NOT ] or b is not None
# get symbolic representation of the arguments
a_sym = a if a is None else a.to_symbolic()
b_sym = b if b is None else b.to_symbolic()
# make a symbolic expression
exp = a_sym if op == I_STR else SymExp(op, a_sym, b_sym)
# return symbolic value
return Val(exp = exp)
class Reg(VM.Reg):
def to_symbolic(self):
# get symbolic representation of register contents
if self.val.is_concrete():
# use concrete value
return SymConst(self.get_val(), self.size)
else:
if self.regs_map.has_key(self.name):
return SymVal(self.regs_map[self.name], self.size)
# use symbolic value
return SymVal(self.name, self.size) if self.val.exp is None \
else self.val.exp
def get_val(self):
# get concrete value of the register if it's available
assert self.val.is_concrete()
return super(Reg, self).get_val(self.val.val)
def str_val(self):
return str(self.val.exp) if self.val.is_symbolic() \
else super(Reg, self).str_val(self.val.val)
class Cpu(VM.Cpu):
DEF_REG = Reg
DEF_REG_VAL = Val()
DEF_R_DFLAG = Val(1)
class State(object):
def __init__(self, regs = None, mem = None):
self.regs, self.mem = regs, mem
def __init__(self, arch):
self.known_state = []
self.regs_map, self.regs_cnt, self.regs_list = {}, {}, []
super(Cpu, self).__init__(arch, mem = Mem(strict = False), math = Math())
def reg(self, name, val = None):
reg = super(Cpu, self).reg(name, val)
reg.regs_map = self.regs_map
if val is not None:
if self.insn is None: addr = inum = 0
else: addr, inum = self.insn.ir_addr()
cnt = 0
cnt_key = ( reg.name, addr, inum )
if self.regs_cnt.has_key(cnt_key):
self.regs_cnt[cnt_key] += 1
cnt = self.regs_cnt[cnt_key]
else:
self.regs_cnt[cnt_key] = 0
new_name = '%s_%x_%x_%x' % (reg.name, addr, inum, cnt)
reg_copy = super(Cpu, self).reg(new_name, reg.val, reg.size)
self.regs_map[reg.name] = new_name
self.regs_list.append(reg_copy)
return reg
def arg(self, arg):
if arg.type == A_CONST:
return self.DEF_REG(arg.size, Val(arg.val))
else:
return super(Cpu, self).arg(arg)
def set_ip(self, val):
super(Cpu, self).set_ip(Val(val))
def insn_jcc(self, insn, a, b, c):
if a.val.is_symbolic():
raise Exception('I_JCC with symbolic condition at ' +
'%s, giving up' % str(insn.ir_addr()))
elif c.val.is_symbolic():
raise Exception('I_JCC with symbolic location at ' +
'%s, giving up' % str(insn.ir_addr()))
else:
return super(Cpu, self).insn_jcc(insn, a, b, c)
def insn_stm(self, insn, a, b, c):
if c.val.is_symbolic():
raise Exception('I_STM with symbolic write address at ' +
'%s, giving up' % str(insn.ir_addr()))
else:
# store a to memory
self.mem.store(c.get_val(), insn.a.size, a.val)
return None
def insn_ldm(self, insn, a, b, c):
print(a.val.exp)
if a.val.is_symbolic():
# Dirtiest of all hacks!!
if len(str(a.val.exp).split('+ 0x')) == 2:
self.reg(insn.c, self.mem.load(0x13370000 +
int(str(a.val.exp).split('+ 0x')[1][:-1], 16), insn.c.size))
return None
else:
self.reg(insn.c, self.mem.load(0x13370000, insn.c.size))
return None
raise Exception('I_LDM with symbolic read address at ' +
'%s, giving up' % str(insn.ir_addr()))
else:
# read from memory to c
self.reg(insn.c, self.mem.load(a.get_val(), insn.c.size))
return None
def execute(self, insn):
print insn.to_str()
return super(Cpu, self).execute(insn)
def run(self, storage, addr = 0L, stop_at = None):
print
super(Cpu, self).run(storage, addr = addr, stop_at = stop_at)
def to_z3(self, state = None):
state = {} if state is None else state
for reg in self.regs_list:
# get Z3 expression for each register
state[reg.name] = reg.val.to_z3(state, reg.size)
return state
md = Cs(CS_ARCH_X86, CS_MODE_32)
md.detail = True
c = open('DeobfuscateMe.elf', 'rb').read()
vdiff = 0x08048000
rip = 0x080484E0 - vdiff
buf = b''
lreg = None
d = 0
l = {
X86_REG_EAX: 0, # uinput
X86_REG_EBX: 0,
X86_REG_ECX: 0,
X86_REG_EDX: 0,
X86_REG_EDI: 0,
X86_REG_ESI: 0,
X86_REG_EBP: 0,
X86_REG_ESP: 0,
}
op = [ None, None, None ]
# cleaning script to remove jumps
while True:
i = [ i for i in md.disasm(c[rip:rip+16], rip + vdiff) ][0]
rip += i.size
for x in range(len(i.operands)):
op[x] = i.operands[x]
if i.id == X86_INS_JMP:
rip = op[0].imm - vdiff
continue
elif i.id == X86_INS_XCHG:
l[op[0].reg], l[op[1].reg] = l[op[1].reg], l[op[0].reg]
if l[op[0].reg] == 0 and l[op[1].reg] == 0:
continue
elif i.id == X86_INS_MOV:
if op[1].type == X86_OP_REG:
l[op[0].reg] = l[op[1].reg]
if l[op[1].reg] == 0:
continue
else:
l[op[0].reg] = 1
elif i.id in [ X86_INS_XOR ]:
if op[1].type == X86_OP_REG:
#if op[1].reg == op[0].reg:
# l[op[0].reg] = 1
#else:
if l[op[1].reg] == 0 and l[op[0].reg] == 0:
continue
elif l[op[0].reg] == 0:
continue
elif i.id == X86_INS_PUSH:
if op[0].type == X86_OP_REG:
if l[op[0].reg] == 0:
continue
else:
print("push constant?")
exit(-1)
elif i.id in [ X86_INS_AND, X86_INS_OR, X86_INS_ADD, X86_INS_SUB,
X86_INS_SHL, X86_INS_SHR, X86_INS_ROR, X86_INS_ROL, X86_INS_NEG ]:
if op[1].type == X86_OP_REG:
if l[op[1].reg] == 0 or l[op[0].reg] == 0:
continue
elif l[op[0].reg] == 0:
continue
elif i.id == X86_INS_SHL:
if op[0].reg == X86_REG_EDI:
instr = b'\x01\xff' # add edi, edi
elif op[0].reg == X86_REG_ESI:
instr = b'\x01\xf6' # add esi, esi
elif op[0].reg == X86_REG_EDX:
instr = b'\x01\xd2' # add edx, edx
elif op[0].reg == X86_REG_ECX:
instr = b'\x01\xc9' # add ecx, ecx
elif op[0].reg == X86_REG_EAX:
instr = b'\x01\xc0' # add eax, eax
elif op[0].reg == X86_REG_EBX:
instr = b'\x01\xdb' # add ebx, ebx
else:
print("SHL with unknown operand:")
print("0x%x:\t%s\t%s" %(i.address, i.mnemonic, i.op_str))
exit(-1)
buf += instr * op[1].imm
continue
elif i.id == X86_INS_RET:
buf += i.bytes
print("0x%x:\t%s\t%s" %(i.address, i.mnemonic, i.op_str))
break
else:
buf += i.bytes
print("0x%x:\t%s\t%s" %(i.address, i.mnemonic, i.op_str))
continue
buf += i.bytes
print("0x%x:\t%s\t%s" %(i.address, i.mnemonic, i.op_str))
c = bytearray(c)
for i in range(len(buf)):
c[0x080484E0 - vdiff + i] = buf[i]
# write cleaned version to disk
open('deobfuscated.elf', 'wb').write(c)
reader = ReaderRaw(ARCH_X86, buf)
tr = CodeStorageTranslator(reader)
bb = tr.get_bb(0)
cpu = Cpu(ARCH_X86)
abi = VM.Abi(cpu, tr, no_reset = True)
ret = 0x41414141
indata = 0x13370000
cpu.mem.store(indata, U32, Val(struct.unpack("<I", b'Drgn')[0], U32))
for i in range(1, 16):
cpu.mem.store(indata + i * 4, U32, Val(exp = SymVal('FLAG_{}'.format(i), U32)))
# create stack with pointer to symbolic arguments for check()
stack = abi.pushargs([Val(indata)])
# dummy return address
stack.push(Val(ret))
# initialize emulator's registers
cpu.reg('esp', Val(stack.top, U32))
cpu.reg('eax', Val(0, U32))
cpu.reg('ebp', Val(0, U32))
cpu.reg('ebx', Val(0, U32))
cpu.reg('ecx', Val(0, U32))
cpu.reg('edx', Val(0, U32))
cpu.reg('edi', Val(0, U32))
cpu.reg('esi', Val(0, U32))
# run until stop
try: cpu.run(tr, 0, stop_at = [0 + len(buf) - 1])
#except VM.CpuStop as e:
except:
print 'STOP at', hex(cpu.reg('eip').get_val())
# get Z3 expressions list for current CPU state
state = cpu.to_z3()
for reg in ['eax', 'ebx', 'ecx' , 'edx', 'esi', 'edi', 'ebp']:
print(reg)
# create SMT solver
solver = z3.Solver()
# add constraint for return value
solver.add(0 == cpu.reg(reg).val.to_z3(state, U32))
check = solver.check()
if check == z3.sat:
model = solver.model()
for s in sorted(list(model), key = lambda x: int(x.name()[len('FLAG_'):])):
print(s, hex(model[s].as_long()))
for s in sorted(list(model), key = lambda x: int(x.name()[len('FLAG_'):])):
try:
sys.stdout.write(struct.pack("<L", model[s].as_long()).decode('ascii'))
except: pass
print('')
This yields the flag DrgnS{y0u_sh0uld_h4v3_iT_s01ved_by_h4nd!!!}
.