Cherry referring to my last name Forward and Reverse Engineering

Solving CONFidence 2015 DeobfuscateMe Task late

We got second place in CONFidence CTF, organized by Dragon Sector, yay!

One of the reversables we couldn’t crack during the CTF, because we ran out of time writing a symbolic execution engine to automate finding the solution. The binary is mirrored here, for archival purposes.

When opening up the binary in IDA Pro, one finds that there is only one check function being called by the main program:

int __cdecl main(int argc, const char **argv, const char **envp)
  char v4[68]; // [esp+0h] [ebp-48h] BYREF

  printf("Flag: ");
  scanf("%63s", v4);
  if ( check(v4) ) // calls to 0x080484E0
    puts("Nope :(");
    puts("Nice, that was the flag!");
  return 0;

The check is a monster function scattered throughout the whole binary:

.text:080484E0                 xchg    edx, ebx
.text:080484E2                 xor     eax, 0FFFFFFFFh
.text:080484E5                 xchg    ebx, ecx
.text:080484E7                 jmp     loc_804EC31
.text:080484E7; ----------------------------------------------------------------------------
.text:0804EC31                 xchg    ecx, edx
.text:0804EC33                 xchg    eax, edx
.text:0804EC34                 jmp     loc_804E211
.text:0804EC34; ----------------------------------------------------------------------------
.text:0804E211                 shl     edx, 1Bh
.text:0804E214                 jmp     loc_804865E
.text:0804E214 ; ---------------------------------------------------------------------------
.text:0804865E                 or      edx, 0A869CBC9h
.text:08048664                 xchg    ebx, edx
.text:08048666                 jmp     loc_805ACEE
.text:0804866B ; ---------------------------------------------------------------------------
.text:0805ACEE                 sub     ebx, ebp
.text:0805ACF0                 jmp     loc_805724C
.text:0805ACF0 ; ---------------------------------------------------------------------------
.text:0805724C                 add     ecx, eax
.text:0805724E                 sub     ebx, ebp
.text:08057250                 jmp     loc_80551C8
.text:08057255 ; ---------------------------------------------------------------------------
.text:080551C8                 xor     ebx, ebx
.text:080551CA                 add     ebx, eax
.text:080551CC                 xchg    ebx, edx
.text:080551CE                 jmp     loc_8052800
.text:080551CE ; ---------------------------------------------------------------------------
.text:08052800                 xchg    eax, ecx
.text:08052801                 xchg    edx, ebx
.text:08052803                 neg     ebx
.text:08052805                 jmp     loc_805478E
.text:08052805 ; ---------------------------------------------------------------------------
.text:0805478E                 add     eax, ebx
.text:08054790                 xchg    edx, ebx
.text:08054792                 xchg    eax, ebx
.text:08054793                 jmp     loc_804A82A
.text:08054793 ; ---------------------------------------------------------------------------

We made a reasonable guess that the actual checker code consists of position independent instructions only, and implemented a quick script that would follow the jmp instructions and collect all other instructions into a nice linear block. This block is still massive in size:

.text:080484E0                 public _check
.text:080484E0 _check          proc near               ; CODE XREF: main+3C↑p
.text:080484E0                 xchg    edx, ebx
.text:080484E2                 xor     eax, 0FFFFFFFFh
.text:080484E5                 xchg    ebx, ecx
.text:080484E7                 xchg    ecx, edx
.text:080484E9                 xchg    eax, edx
.text:080484EA                 shl     edx, 1Bh
.text:080484ED                 or      edx, 0A869CBC9h
.text:080484F3                 xchg    ebx, edx
.text:080484F5                 sub     ebx, ebp
.text:080484F7                 add     ecx, eax
.text:080484F9                 sub     ebx, ebp
.text:080484FB                 xor     ebx, ebx
.text:080484FD                 add     ebx, eax
.text:080484FF                 xchg    ebx, edx
.text:08048501                 xchg    eax, ecx
.text:08048502                 xchg    edx, ebx
.text:08048504                 neg     ebx
.text:08048506                 add     eax, ebx
.text:08048508                 xchg    edx, ebx
.text:0804850A                 xchg    eax, ebx
.text:0804850B                 xor     edx, edx
.text:0804850D                 add     edx, 0FFBB4586h
.text:08048513                 sub     edx, 81622B0Fh
.text:08048519                 add     edx, 2276E2FAh
.text:0804851F                 xchg    eax, ecx
.text:08048520                 xor     edx, edx
.text:08048522                 xchg    ebx, ecx
.text:08048524                 xchg    eax, ebx
.text:08048525                 xchg    eax, ecx
.text:08048526                 xchg    ebx, ecx
.text:08048528                 sub     edx, 0D51EAFBFh
.text:0804852E                 xchg    edx, ecx
.text:08048530                 xchg    eax, edx
.text:08048531                 add     ecx, 0C6538058h
.text:08048537                 xchg    ebx, ecx
.text:08048539                 xchg    eax, ecx
.text:0804853A                 xchg    edx, ecx
.text:0804853C                 rol     ebx, 7
.text:0804853F                 xchg    eax, ecx
.text:08048540                 xchg    eax, ebx
.text:08048541                 xchg    edx, ecx
.text:08048543                 xchg    eax, edx
.text:08048544                 xchg    eax, ecx
.text:08048545                 push    ebp
.text:08048546                 xchg    eax, ebx
.text:08048547                 xchg    eax, ebx
.text:08048548                 xor     ebp, 0FFFFFFFFh
.text:0804854B                 xor     ebp, 0FFFFFFFFh
.text:0804854E                 add     ebp, 0A10FA35Fh
.text:08048554                 xchg    ebx, edx
.text:08048556                 add     ebp, 421A5649h
.text:0804855C                 sub     ebp, 6411B5D9h
.text:08048562                 add     ebp, 5F1EE010h
.text:08048568                 sub     ebp, 6CE7BCF1h
.text:0804856E                 add     ebp, 7BF9A346h
.text:08048574                 add     ebp, 1AE87B85h
.text:0804857A                 sub     ebp, 0A2625870h
.text:08048580                 add     ebp, 3964C289h
.text:08048586                 sub     ebp, 5696C2F9h
.text:0804858C                 add     ebp, 6A6C431Dh
.text:08048592                 sub     ebp, 31FFBBDh
.text:08048598                 add     ebp, 0ED1C37C5h
.text:0804859E                 sub     ebp, 8268B0A1h
.text:080485A4                 add     ebp, 0BCF7B3DAh
.text:080485AA                 add     ebp, 57DC1681h
.text:080485B0                 sub     ebp, 457AE9D4h
.text:080485B6                 add     ebp, 371FF325h
.text:080485BC                 xchg    ebx, edx
.text:080485BE                 sub     ebp, 0CADDD25Ch
.text:080485C4                 add     ebp, 0D8684E43h
.text:080485CA                 sub     ebp, 9AF2C4E4h
.text:080485D0                 add     ebp, 3B7ABE8Dh
.text:080485D6                 add     ebp, 80C7BF2Eh
.text:080485DC                 sub     ebp, 591082D9h
.text:080485E2                 sub     ebp, 69D2F7C2h
.text:080485E8                 add     ebp, 7733786h
.text:080485EE                 sub     ebp, 33CFB444h
.text:080485F4                 sub     ebp, 0AAE24EACh


.text:080520F8                 xchg    eax, ecx
.text:080520F9                 neg     ebp
.text:080520FB                 xchg    eax, ebx
.text:080520FC                 add     eax, ebp
.text:080520FE                 xchg    eax, edx
.text:080520FF                 xchg    edx, ebx
.text:08052101                 xchg    ecx, ebx
.text:08052103                 xchg    eax, ecx
.text:08052104                 xchg    ebx, edx
.text:08052106                 xchg    eax, ecx
.text:08052107                 mov     esi, edx
.text:08052109                 xchg    ebx, ecx
.text:0805210B                 neg     esi
.text:0805210D                 mov     ebp, esi
.text:0805210F                 neg     ebp
.text:08052111                 add     ebx, ebp
.text:08052113                 xchg    edx, ebx
.text:08052115                 xchg    ecx, ebx
.text:08052117                 xchg    eax, edx
.text:08052118                 xchg    eax, ebx
.text:08052119                 xchg    edx, ecx
.text:0805211B                 xchg    eax, edx
.text:0805211C                 xchg    eax, ecx
.text:0805211D                 xchg    eax, ebx
.text:0805211E                 xchg    edx, ebx
.text:08052120                 xchg    eax, edx
.text:08052121                 xchg    edx, ebx
.text:08052123                 mov     ebp, esi
.text:08052125                 and     ebp, 72EDEC3Ch
.text:0805212B                 xor     esi, 72EDEC3Ch
.text:08052131                 xor     esi, ebp
.text:08052133                 xor     edi, 0FFFFFFFFh
.text:08052136                 xchg    eax, ecx
.text:08052137                 xchg    edx, ebx
.text:08052139                 xchg    ebx, ecx
.text:0805213B                 xchg    ecx, ebx
.text:0805213D                 pop     ebp
.text:0805213E                 sub     edi, ecx
.text:08052140                 xchg    ecx, ebx
.text:08052142                 add     edi, ebx
.text:08052144                 retn

(note the address range changing by about 40k from top to bottom.)

It is clear that manual analysis is not an option. During the CTF, we had a hard time automating the analysis. The idea was basically to mark the top of the stack as input variable, then translate every instruction to math and then use a SMT solver like z3 to query for possible values of the input variable such that the output would become non-zero.

After the CTF we found a blog poist by Dmytrio Olekiuk from whom we stole quite significant parts of our solution. The script relies on OpenREIL to lift x86 code to math and on z3 to compute the solution:

import sys, os, random, struct, unittest

from capstone import *
from capstone.x86 import *

from pyopenreil.REIL import *
from pyopenreil.utils import asm
from pyopenreil.symbolic import *
import pyopenreil.VM as VM

import struct
import z3

UNDEF = None

class Val(object):

    def __init__(self, val = UNDEF, exp = None):

        self.val, self.exp = val, exp

    def __str__(self):

        return str(self.exp) if self.is_symbolic() else hex(self.val)

    def is_symbolic(self):

        # check if value is symbolic
        return self.val is None

    def is_concrete(self):

        # check if value is concrete
        return not self.is_symbolic()

    def to_z3(self, state, size):

        # generate Z3 expression that represents this value

        def _z3_size(size):

            return { U1: 1, U8: 8, U16: 16, U32: 32, U64: 64 }[ size ]

        def _z3_exp(exp, size):

            if isinstance(exp, SymVal):

                if state.has_key(

                    return state[]


                    return z3.BitVec(, _z3_size(exp.size))

            elif isinstance(exp, SymConst):

                return z3.BitVecVal(exp.val, _z3_size(exp.size))

            elif isinstance(exp, SymExp):

                a, b = exp.a, exp.b

                assert isinstance(a, SymVal) or isinstance(a, SymConst)
                assert b is None or isinstance(b, SymVal) or isinstance(b, SymConst)
                assert b is None or a.size == b.size

                a = a if a is None else _z3_exp(a, a.size)
                b = b if b is None else _z3_exp(b, b.size)

                # makes 1 bit bitvectors from booleans
                _z3_bool_to_bv = lambda exp: z3.If(exp, z3.BitVecVal(1, 1), z3.BitVecVal(0, 1))

                # z3 expression from SymExp
                ret = { I_ADD: lambda: a + b,
                        I_SUB: lambda: a - b,
                        I_NEG: lambda: -a,
                        I_MUL: lambda: a * b,
                        I_DIV: lambda: z3.UDiv(a, b),
                        I_MOD: lambda: z3.URem(a, b),
                        I_SHR: lambda: z3.LShR(a, b),
                        I_SHL: lambda: a << b,
                         I_OR: lambda: a | b,
                        I_AND: lambda: a & b,
                        I_XOR: lambda: a ^ b,
                        I_NOT: lambda: ~a,
                         I_EQ: lambda: _z3_bool_to_bv(a == b),
                         I_LT: lambda: _z3_bool_to_bv(z3.ULT(a, b)) }[exp.op]()

                size_src = _z3_size(exp.a.size)
                size_dst = _z3_size(size)

                if size_src > size_dst:

                    # convert to smaller value
                    return z3.Extract(size_dst - 1, 0, ret)

                elif size_src < size_dst:

                    # convert to bigger value
                    return z3.Concat(z3.BitVecVal(0, size_dst - size_src), ret)


                    return ret


                assert False

        if self.is_concrete():

            # use concrete value
            return z3.BitVecVal(self.val, _z3_size(size))


            # build Z3 expression
            return _z3_exp(self.exp, size)

class Mem(VM.Mem):

    class _Val(Val):

        def __init__(self, size, seed, val = UNDEF, exp = None):

            # Each byte in memory must have size and hash
            # of full value that uses it.
            self.size, self.seed = size, seed

            super(Mem._Val, self).__init__(val = val, exp = exp)

    def pack(self, size, val):

        ret = []
        seed = random.randrange(0x10000000, 0x7fffffff)

        if val.is_symbolic():

            for i in range(self.map_length[size]):

                ret.append(self._Val(size, seed, exp = val.exp))


            ret = map(lambda val: self._Val(size, seed, val),
                      list(super(Mem, self).pack(size, val.val)))

        return ret

    def unpack(self, size, data):

        first = data[0]
        items, symbolic = [], True

        assert len(data) == self.map_length[first.size]

        for val in data:

            # check if all bytes belongs to the same value
            assert val.size == first.size and \
                   val.seed == first.seed

            if val.is_concrete():

                symbolic = False

        if symbolic:
            return Val(exp = first.exp)


            return Val(super(Mem, self).unpack(size, ''.join(items)))

    def read(self, addr, size):

        return self._read(addr, size)

    def write(self, addr, size, data):

        self._write(addr, size, data)

class Math(VM.Math):

    def eval(self, op, a = None, b = None):

        concrete = True

        a_val = a if a is None else a.val
        b_val = b if b is None else b.val

        # determinate symbolic/concrete operation mode
        if a_val is not None and a_val.is_symbolic(): concrete = False
        if b_val is not None and b_val.is_symbolic(): concrete = False

        if concrete:

            a_reg = a if a is None else VM.Reg(a.size, a.get_val())
            b_reg = b if b is None else VM.Reg(b.size, b.get_val())

            # compute and return concrete value
            return Val(val = super(Math, self).eval(op, a_reg, b_reg))


            assert a is not None
            assert op in [ I_STR, I_NOT ] or b is not None

            # get symbolic representation of the arguments
            a_sym = a if a is None else a.to_symbolic()
            b_sym = b if b is None else b.to_symbolic()

            # make a symbolic expression
            exp = a_sym if op == I_STR else SymExp(op, a_sym, b_sym)

            # return symbolic value
            return Val(exp = exp)

class Reg(VM.Reg):

    def to_symbolic(self):

        # get symbolic representation of register contents
        if self.val.is_concrete():

            # use concrete value
            return SymConst(self.get_val(), self.size)


            if self.regs_map.has_key(

                return SymVal(self.regs_map[], self.size)

            # use symbolic value
            return SymVal(, self.size) if self.val.exp is None \
                                                else self.val.exp

    def get_val(self):

        # get concrete value of the register if it's available
        assert self.val.is_concrete()

        return super(Reg, self).get_val(self.val.val)

    def str_val(self):

        return str(self.val.exp) if self.val.is_symbolic() \
                                 else super(Reg, self).str_val(self.val.val)

class Cpu(VM.Cpu):

    DEF_REG = Reg
    DEF_REG_VAL = Val()

    DEF_R_DFLAG = Val(1)

    class State(object):

        def __init__(self, regs = None, mem = None):

            self.regs, self.mem = regs, mem

    def __init__(self, arch):

        self.known_state = []
        self.regs_map, self.regs_cnt, self.regs_list = {}, {}, []

        super(Cpu, self).__init__(arch, mem = Mem(strict = False), math = Math())

    def reg(self, name, val = None):

        reg = super(Cpu, self).reg(name, val)
        reg.regs_map = self.regs_map

        if val is not None:

            if self.insn is None: addr = inum = 0
            else: addr, inum = self.insn.ir_addr()

            cnt = 0
            cnt_key = (, addr, inum )

            if self.regs_cnt.has_key(cnt_key):

                self.regs_cnt[cnt_key] += 1
                cnt = self.regs_cnt[cnt_key]


                self.regs_cnt[cnt_key] = 0

            new_name = '%s_%x_%x_%x' % (, addr, inum, cnt)
            reg_copy = super(Cpu, self).reg(new_name, reg.val, reg.size)

            self.regs_map[] = new_name

        return reg

    def arg(self, arg):

        if arg.type == A_CONST:

            return self.DEF_REG(arg.size, Val(arg.val))


            return super(Cpu, self).arg(arg)

    def set_ip(self, val):

        super(Cpu, self).set_ip(Val(val))

    def insn_jcc(self, insn, a, b, c):

        if a.val.is_symbolic():

            raise Exception('I_JCC with symbolic condition at ' +
                            '%s, giving up' % str(insn.ir_addr()))

        elif c.val.is_symbolic():

            raise Exception('I_JCC with symbolic location at ' +
                            '%s, giving up' % str(insn.ir_addr()))

            return super(Cpu, self).insn_jcc(insn, a, b, c)

    def insn_stm(self, insn, a, b, c):

        if c.val.is_symbolic():

            raise Exception('I_STM with symbolic write address at ' +
                            '%s, giving up' % str(insn.ir_addr()))

            # store a to memory
  , insn.a.size, a.val)
            return None

    def insn_ldm(self, insn, a, b, c):

        if a.val.is_symbolic():
            # Dirtiest of all hacks!!
            if len(str(a.val.exp).split('+ 0x')) == 2:
                self.reg(insn.c, self.mem.load(0x13370000 +
                    int(str(a.val.exp).split('+ 0x')[1][:-1], 16), insn.c.size))
                return None
                self.reg(insn.c, self.mem.load(0x13370000, insn.c.size))
                return None
            raise Exception('I_LDM with symbolic read address at ' +
                            '%s, giving up' % str(insn.ir_addr()))

            # read from memory to c
            self.reg(insn.c, self.mem.load(a.get_val(), insn.c.size))
            return None

    def execute(self, insn):

        print insn.to_str()

        return super(Cpu, self).execute(insn)

    def run(self, storage, addr = 0L, stop_at = None):


        super(Cpu, self).run(storage, addr = addr, stop_at = stop_at)

    def to_z3(self, state = None):

        state = {} if state is None else state

        for reg in self.regs_list:

            # get Z3 expression for each register
            state[] = reg.val.to_z3(state, reg.size)

        return state

md = Cs(CS_ARCH_X86, CS_MODE_32)
md.detail = True

c = open('DeobfuscateMe.elf', 'rb').read()
vdiff = 0x08048000
rip = 0x080484E0 - vdiff
buf = b''

lreg = None
d = 0
l = {
        X86_REG_EAX: 0, # uinput
        X86_REG_EBX: 0,
        X86_REG_ECX: 0,
        X86_REG_EDX: 0,
        X86_REG_EDI: 0,
        X86_REG_ESI: 0,
        X86_REG_EBP: 0,
        X86_REG_ESP: 0,
op = [ None, None, None ]

# cleaning script to remove jumps
while True:
    i = [ i for i in md.disasm(c[rip:rip+16], rip + vdiff) ][0]
    rip += i.size
    for x in range(len(i.operands)):
        op[x] = i.operands[x]

    if == X86_INS_JMP:
        rip = op[0].imm - vdiff
    elif == X86_INS_XCHG:
        l[op[0].reg], l[op[1].reg] = l[op[1].reg], l[op[0].reg]
        if l[op[0].reg] == 0 and l[op[1].reg] == 0:
    elif == X86_INS_MOV:
        if op[1].type == X86_OP_REG:
            l[op[0].reg] = l[op[1].reg]
            if l[op[1].reg] == 0:
            l[op[0].reg] = 1
    elif in [ X86_INS_XOR ]:
        if op[1].type == X86_OP_REG:
            #if op[1].reg == op[0].reg:
            #    l[op[0].reg] = 1
            if l[op[1].reg] == 0 and l[op[0].reg] == 0:
        elif l[op[0].reg] == 0:
    elif == X86_INS_PUSH:
        if op[0].type == X86_OP_REG:
            if l[op[0].reg] == 0:
            print("push constant?")
    elif in [ X86_INS_AND, X86_INS_OR, X86_INS_ADD, X86_INS_SUB,
            X86_INS_SHL, X86_INS_SHR, X86_INS_ROR, X86_INS_ROL, X86_INS_NEG ]:
        if op[1].type == X86_OP_REG:
            if l[op[1].reg] == 0 or l[op[0].reg] == 0:
        elif l[op[0].reg] == 0:
    elif == X86_INS_SHL:
        if op[0].reg == X86_REG_EDI:
            instr = b'\x01\xff' # add edi, edi
        elif op[0].reg == X86_REG_ESI:
            instr = b'\x01\xf6' # add esi, esi
        elif op[0].reg == X86_REG_EDX:
            instr = b'\x01\xd2' # add edx, edx
        elif op[0].reg == X86_REG_ECX:
            instr = b'\x01\xc9' # add ecx, ecx
        elif op[0].reg == X86_REG_EAX:
            instr = b'\x01\xc0' # add eax, eax
        elif op[0].reg == X86_REG_EBX:
            instr = b'\x01\xdb' # add ebx, ebx
            print("SHL with unknown operand:")
            print("0x%x:\t%s\t%s" %(i.address, i.mnemonic, i.op_str))
        buf += instr * op[1].imm
    elif == X86_INS_RET:
        buf += i.bytes
        print("0x%x:\t%s\t%s" %(i.address, i.mnemonic, i.op_str))
        buf += i.bytes
        print("0x%x:\t%s\t%s" %(i.address, i.mnemonic, i.op_str))

    buf += i.bytes
    print("0x%x:\t%s\t%s" %(i.address, i.mnemonic, i.op_str))

c = bytearray(c)
for i in range(len(buf)):
    c[0x080484E0 - vdiff + i] = buf[i]

# write cleaned version to disk
open('deobfuscated.elf', 'wb').write(c)
reader = ReaderRaw(ARCH_X86, buf)
tr = CodeStorageTranslator(reader)
bb = tr.get_bb(0)

cpu = Cpu(ARCH_X86)
abi = VM.Abi(cpu, tr, no_reset = True)

ret = 0x41414141
indata = 0x13370000, U32, Val(struct.unpack("<I", b'Drgn')[0], U32))
for i in range(1, 16): + i * 4, U32, Val(exp = SymVal('FLAG_{}'.format(i), U32)))

# create stack with pointer to symbolic arguments for check()
stack = abi.pushargs([Val(indata)])

# dummy return address

# initialize emulator's registers
cpu.reg('esp', Val(, U32))
cpu.reg('eax', Val(0, U32))
cpu.reg('ebp', Val(0, U32))
cpu.reg('ebx', Val(0, U32))
cpu.reg('ecx', Val(0, U32))
cpu.reg('edx', Val(0, U32))
cpu.reg('edi', Val(0, U32))
cpu.reg('esi', Val(0, U32))

# run until stop
try:, 0, stop_at = [0 + len(buf) - 1])
#except VM.CpuStop as e:

    print 'STOP at', hex(cpu.reg('eip').get_val())

    # get Z3 expressions list for current CPU state
    state = cpu.to_z3()

    for reg in ['eax', 'ebx', 'ecx' , 'edx', 'esi', 'edi', 'ebp']:

        # create SMT solver
        solver = z3.Solver()

        # add constraint for return value
        solver.add(0 == cpu.reg(reg).val.to_z3(state, U32))
        check = solver.check()
        if check == z3.sat:
            model = solver.model()
            for s in sorted(list(model), key = lambda x: int([len('FLAG_'):])):
                print(s, hex(model[s].as_long()))
            for s in sorted(list(model), key = lambda x: int([len('FLAG_'):])):
                    sys.stdout.write(struct.pack("<L", model[s].as_long()).decode('ascii'))
                except: pass


This yields the flag DrgnS{y0u_sh0uld_h4v3_iT_s01ved_by_h4nd!!!}.