Skip to content

Commit

Permalink
Merge pull request #10 from ohreteam/koki
Browse files Browse the repository at this point in the history
PHO, DCE, def and use vars, TAC builders, rearrange
  • Loading branch information
kokifish authored Jan 12, 2025
2 parents 1d81834 + 940923f commit e8415bd
Show file tree
Hide file tree
Showing 23 changed files with 764 additions and 159 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ python examples\abc_decompile.py name.abc
#### Logical Code

```bash
python examples\dis_demo.py xxx.abc.dis # put isa.yaml from arkcompiler_ets_runtime to ./ohre/abcre/dis/isa.yaml
python examples\dis_demo.py xxx.abc.dis # put isa.yaml from arkcompiler_ets_runtime to ./ohre/abcre/dis/enum/isa.yaml
```


Expand Down
7 changes: 5 additions & 2 deletions examples/dis_demo.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import argparse

import ohre
from ohre.abcre.dis.ControlFlow import ControlFlow
from ohre.abcre.dis.PandaReverser import PandaReverser
from ohre.abcre.dis.DisFile import DisFile
from ohre.misc import Log
Expand Down Expand Up @@ -34,9 +33,13 @@
panda_re.split_native_code_block(FUNC_IDX)
print(f">> CF built {panda_re.dis_file.methods[FUNC_IDX]._debug_vstr()}")
panda_re.trans_NAC_to_TAC(method_id=FUNC_IDX)
panda_re._code_lifting_algorithms(FUNC_IDX)

# nac_total = panda_re.get_insts_total()
# for idx in range(panda_re.method_len()):
# panda_re.split_native_code_block(idx)
# print(f">> [{idx}/{panda_re.method_len()}] CF built {panda_re.dis_file.methods[idx]._debug_vstr()}")
# panda_re.trans_NAC_to_TAC(method_id=idx)
panda_re._code_lifting_algorithms(FUNC_IDX)
# tac_total = panda_re.get_insts_total()
# todo_tac = panda_re.get_tac_unknown_count()
# print(f"todo_tac {todo_tac}/tac {tac_total} {todo_tac/tac_total:.4f} / nac {nac_total} {todo_tac/nac_total:.4f}")
121 changes: 112 additions & 9 deletions ohre/abcre/dis/AsmArg.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,89 @@
from typing import Any, Dict, Iterable, List, Tuple, Union

from ohre.abcre.dis.AsmTypes import AsmTypes
from ohre.abcre.dis.enum.AsmTypes import AsmTypes
from ohre.abcre.dis.DebugBase import DebugBase
from ohre.misc import Log, utils


class AsmArg(DebugBase):
def __init__(self, arg_type: AsmTypes = AsmTypes.UNKNOWN,
name: str = "", value=None, obj_ref=None, paras_len: int = None):
name: str = "", value=None, ref_base=None, paras_len: int = None):
self.type = arg_type
# name: e.g. for v0, type is VAR, name is v0(stored without truncating the prefix v)
self.name: str = name
# value: may be set in the subsequent analysis
self.value = value
self.obj_ref = obj_ref
self.value = value # if type is ARRAY, value is AsmArg list
self.ref_base = ref_base # AsmArg
self.paras_len: Union[int, None] = paras_len # for method object, store paras len here

@property
def len(self):
return len(self.name)
if (len(self.name) > 0):
return len(self.name)
return len(self.type)

def __len__(self) -> int:
return self.len

def __eq__(self, rhs):
if isinstance(rhs, AsmArg):
if (self.type == rhs.type and self.name == rhs.name):
if (self.ref_base is None and rhs.ref_base is None):
return True
elif (self.ref_base is None and rhs.ref_base is not None):
return False
elif (self.ref_base is not None and rhs.ref_base is None):
return False
else: # both is NOT None
if (self.ref_base == rhs.ref_base):
return True
else:
return False
else:
return False
return False

def __ne__(self, other):
return not self.__eq__(other)

def __hash__(self):
ref_base = self.ref_base if self.ref_base is not None else 'None'
return hash((self.type, self.name, ref_base))

def __repr__(self):
return f"Arg({self._debug_str()})"

def set_ref(self, ref_ed_arg):
self.ref_base = ref_ed_arg

@classmethod
def build_arg(cls, s: str): # return VAR v0 v1... or ARG a0 a1...
assert isinstance(s, str) and len(s) > 0
if (s.startswith("v")):
return AsmArg(AsmTypes.VAR, s)
if (s.startswith("a")):
return AsmArg(AsmTypes.ARG, s)
if (s.startswith("tmp")):
return AsmArg(AsmTypes.VAR, s)
Log.error(f"build_arg failed: s={s}")

@classmethod
def build_acc(cls): # return AsmArg(AsmTypes.ACC)
return cls.ACC()

@classmethod
def ACC(cls): # return AsmArg(AsmTypes.ACC)
return AsmArg(AsmTypes.ACC)

@classmethod
def build_arr(cls, args: List, name: str = ""): # element of args should be AsmArg
return AsmArg(AsmTypes.ARRAY, name=name, value=list(args))

@classmethod
def build_this(cls):
# this always stored at a2
return AsmArg(AsmTypes.ARG, name="a2")

def build_next_arg(self): # arg is AsmArg
# if self is v5, return v6; if self is a0, return a1; just num_part+=1
num_part: str = self.name[1:]
Expand All @@ -43,16 +95,67 @@ def build_next_arg(self): # arg is AsmArg
def is_value_valid(self) -> bool: # TODO: for some types, value is not valid, judge it
pass

def is_acc(self) -> bool:
if (self.type == AsmTypes.ACC):
return True
return False

def get_all_args_recursively(self, include_self: bool = True) -> List:
out = list()
if (include_self):
out.append(self)
if (isinstance(self.ref_base, AsmArg)):
out.append(self.ref_base)
if (self.value is not None and isinstance(self.value, Iterable)): # if type is ARRAY
for v in self.value:
if (isinstance(v, AsmArg)):
out.append(v)
return out

def _common_error_check(self):
if (self.type == AsmTypes.FIELD):
if (self.ref_base is None or len(self.name) == 0):
Log.error(f"[ArgCC] A filed without ref_base or name len==0: name {self.name} len {len(self.name)}")
if (self.type == AsmTypes.MODULE):
if (len(self.name) == 0):
Log.error(f"[ArgCC] A module without name: len {len(self.name)}")
if (self.type == AsmTypes.METHOD):
if (len(self.name) == 0):
Log.error(f"[ArgCC] A method without name: len {len(self.name)}")
if (self.type == AsmTypes.LABEL):
if (len(self.name) == 0):
Log.error(f"[ArgCC] A label without name: len {len(self.name)}")

def _debug_str(self):
out = f"{AsmTypes.get_code_name(self.type)}-{self.name}"
self._common_error_check()
out = ""
if (self.type == AsmTypes.FIELD):
if (self.ref_base is not None):
out += f"{self.ref_base}[{self.name}]"
else:
if (self.ref_base is not None):
out += f"{self.ref_base}->"
out += f"{self.name}"
if (len(self.name) == 0):
out += f"{AsmTypes.get_code_name(self.type)}"
if (self.value is not None):
out += f"({self.value})"
if (self.obj_ref is not None):
out += f"//ref:{self.obj_ref}"
if (self.paras_len is not None):
out += f"(paras_len={self.paras_len})"
return out

def _debug_vstr(self):
out = f"{self._debug_str()}"
self._common_error_check()
out = ""
if (self.type == AsmTypes.FIELD):
if (self.ref_base is not None):
out += f"{self.ref_base}[{AsmTypes.get_code_name(self.type)}-{self.name}]"
else:
if (self.ref_base is not None):
out += f"{self.ref_base}->"
out += f"{AsmTypes.get_code_name(self.type)}-{self.name}"
if (self.value is not None):
out += f"({self.value})"
if (self.paras_len is not None):
out += f"(paras_len={self.paras_len})"
return out
2 changes: 1 addition & 1 deletion ohre/abcre/dis/AsmLiteral.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Any, Dict, Iterable, List, Tuple, Union

from ohre.abcre.dis.CODE_LV import CODE_LV
from ohre.abcre.dis.enum.CODE_LV import CODE_LV
from ohre.abcre.dis.DebugBase import DebugBase
from ohre.misc import Log, utils

Expand Down
24 changes: 19 additions & 5 deletions ohre/abcre/dis/AsmMethod.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
from typing import Any, Dict, Iterable, List, Tuple, Union

from ohre.abcre.dis.AsmTypes import AsmTypes
from ohre.abcre.dis.AsmArg import AsmArg
from ohre.abcre.dis.CODE_LV import CODE_LV
from ohre.abcre.dis.AsmRecord import AsmRecord
from ohre.abcre.dis.CodeBlock import CodeBlock
from ohre.abcre.dis.CodeBlocks import CodeBlocks
from ohre.abcre.dis.ControlFlow import ControlFlow
from ohre.misc import Log, utils
from ohre.abcre.dis.DebugBase import DebugBase
from ohre.abcre.dis.enum.AsmTypes import AsmTypes
from ohre.abcre.dis.enum.CODE_LV import CODE_LV
from ohre.abcre.dis.TAC import TAC
from ohre.misc import Log, utils


def is_label_line(s: str): # single str in a single line endswith ":", maybe label?
Expand Down Expand Up @@ -90,7 +90,9 @@ def _insert_variable_virtual_block(self):
tac_l.append(TAC.tac_assign(AsmArg.build_arg(name), AsmArg(AsmTypes.UNKNOWN)))
if (ty != "any"):
Log.error(f"Var NOT any! {self.name} {ty} {name}")
self.code_blocks.insert_front(CodeBlock(tac_l, self.code_blocks.blocks[0]))
cb_1st = CodeBlock(tac_l)
cb_1st.add_next_cb(self.code_blocks.blocks[0])
self.code_blocks.insert_front(cb_1st)

def _split_file_class_method_name(self, records: List[AsmRecord]):
# split 'file_class_method' to 'file_class' and 'method'
Expand Down Expand Up @@ -150,7 +152,9 @@ def _process_method_inst(self, lines: List[str]) -> List[List[str]]:
if (is_label_line(line)):
insts.append([line])
l_n += 1
if (len(line.strip()) == 0): # skip empty line
elif (len(line.strip()) == 0): # skip empty line
l_n += 1
elif (".catchall" in line.strip()): # skip empty line
l_n += 1
elif (is_method_end_line(line)): # process END
return insts
Expand Down Expand Up @@ -235,6 +239,16 @@ def split_native_code_block(self):
self.code_blocks = ControlFlow.split_native_code_block(self.code_blocks)
self.code_blocks.set_level(CODE_LV.NATIVE_BLOCK_SPLITED)

def get_insts_total(self):
return self.code_blocks.get_insts_total()

def get_args(self, start_pos: int = 0) -> List[AsmArg]:
ret: List[AsmArg] = list()
for i in range(start_pos, len(self.args)):
ty, name = self.args[i]
ret.append(AsmArg.build_arg(name))
return ret


if __name__ == "__main__":
temp = [
Expand Down
2 changes: 1 addition & 1 deletion ohre/abcre/dis/AsmRecord.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Any, Dict, Iterable, List, Tuple, Union

from ohre.abcre.dis.AsmTypes import AsmTypes
from ohre.abcre.dis.enum.AsmTypes import AsmTypes
from ohre.abcre.dis.DebugBase import DebugBase
from ohre.misc import Log

Expand Down
2 changes: 1 addition & 1 deletion ohre/abcre/dis/AsmString.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Any, Dict, Iterable, List, Tuple, Union

from ohre.abcre.dis.AsmTypes import AsmTypes
from ohre.abcre.dis.enum.AsmTypes import AsmTypes
from ohre.abcre.dis.DebugBase import DebugBase
from ohre.misc import Log

Expand Down
53 changes: 44 additions & 9 deletions ohre/abcre/dis/CodeBlock.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import copy
from typing import Any, Dict, Iterable, List, Tuple, Union

from ohre.abcre.dis.AsmArg import AsmArg
from ohre.abcre.dis.DebugBase import DebugBase
from ohre.abcre.dis.enum.NACTYPE import NACTYPE
from ohre.abcre.dis.enum.TACTYPE import TACTYPE
from ohre.abcre.dis.NAC import NAC
from ohre.abcre.dis.NACTYPE import NACTYPE
from ohre.abcre.dis.TAC import TAC
from ohre.abcre.dis.TACTYPE import TACTYPE


class CodeBlock(DebugBase): # asm instruction(NAC) cantained
def __init__(self, in_l: Union[List[List[str]], List[NAC], List[TAC]], next_cb_list=None):
def __init__(self, in_l: Union[List[List[str]], List[NAC], List[TAC]], next_cb_list: set = None):
assert len(in_l) >= 0
self.insts: Union[List[NAC], List[TAC]] = list()
if (isinstance(in_l[0], NAC)): # NAC in list
Expand All @@ -20,36 +21,70 @@ def __init__(self, in_l: Union[List[List[str]], List[NAC], List[TAC]], next_cb_l
for inst in in_l:
assert len(inst) > 0
self.insts.append(NAC(inst))
self.next_cb_list: set[CodeBlock] = set()
if (next_cb_list is None):
self.next_cb_list = set()
else:
self.next_cb_list = next_cb_list

self.use_vars: set[AsmArg] = None

def get_slice_block(self, idx_start: int, idx_end: int):
return CodeBlock(copy.deepcopy(self.insts[idx_start: idx_end]))

def add_next_cb(self, cb):
self.next_cb_list.add(cb)

def empty_next_cb(self):
def empty_next_cbs(self):
self.next_cb_list = set()

def get_all_next_cb(self):
return self.next_cb_list

def set_use_vars(self, use_vars: set):
self.use_vars = use_vars

def get_use_vars(self) -> set[AsmArg]:
if (self.use_vars is not None):
return self.use_vars
return set()

def get_all_next_cbs_use_vars(self, get_current_cb=False) -> set[AsmArg]:
# recursively
ret = set()
if (get_current_cb):
ret.update(self.get_use_vars())
for cb in self.next_cb_list:
ret.update(cb.get_all_next_cbs_use_vars(True))
return ret

def is_no_next_cb(self):
if (self.next_cb_list is None or len(self.next_cb_list) == 0):
return True
return False

def replace_insts(self, tac_l: List[TAC]):
self.insts = tac_l

def get_insts_len(self) -> int:
return len(self.insts)

def __len__(self) -> int:
return len(self.insts)

def _debug_str(self) -> str:
out = f"CB: insts({len(self.insts)}) "
out = f"CB: insts({len(self.insts)})"
if (len(self.next_cb_list)):
out += f"[next_CB: {self.next_cb_list}]"
out += f"-[next_CB:{self.next_cb_list}]"
else:
out += f"[NO next_CB]"
out += f"-[NO next_CB]"
return out

def _debug_vstr(self) -> str:
out = self._debug_str() + "\n"
for i in range(len(self.insts)):
if (self.insts[i].type == TACTYPE.LABEL):
out += f"{i}".ljust(4, "-") + f"{self.insts[i]._debug_vstr()}\n"
out += f"{i}".ljust(4, "-") + f"{self.insts[i]._debug_str()}\n"
else:
out += f"{i}".ljust(4, " ") + f"{self.insts[i]._debug_vstr()}\n"
out += f"{i}".ljust(4, " ") + f"{self.insts[i]._debug_str()}\n"
return out.strip()
8 changes: 7 additions & 1 deletion ohre/abcre/dis/CodeBlocks.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import copy
from typing import Any, Dict, Iterable, List, Tuple, Union

from ohre.abcre.dis.CODE_LV import CODE_LV
from ohre.abcre.dis.enum.CODE_LV import CODE_LV
from ohre.abcre.dis.CodeBlock import CodeBlock
from ohre.abcre.dis.DebugBase import DebugBase
from ohre.misc import Log, utils
Expand Down Expand Up @@ -56,3 +56,9 @@ def _debug_vstr(self) -> str:

def insert_front(self, code_block: CodeBlock):
self.blocks.insert(0, code_block)

def get_insts_total(self) -> int:
total = 0
for cb in self.blocks:
total += cb.get_insts_len()
return total
Loading

0 comments on commit e8415bd

Please sign in to comment.