Skip to content

Commit

Permalink
Merge pull request #11 from ohreteam/koki
Browse files Browse the repository at this point in the history
CopyPropagation, PHO features, DCE features, more tac builder, object support, etc
  • Loading branch information
kokifish authored Jan 18, 2025
2 parents e8415bd + 6710e81 commit 21d2c49
Show file tree
Hide file tree
Showing 17 changed files with 745 additions and 219 deletions.
34 changes: 28 additions & 6 deletions examples/dis_demo.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,35 @@
import argparse
import os
import subprocess

import ohre
from ohre.abcre.dis.PandaReverser import PandaReverser
from ohre.abcre.dis.DisFile import DisFile
from ohre.abcre.dis.PandaReverser import PandaReverser
from ohre.core import oh_app, oh_hap
from ohre.misc import Log

TMP_HAP_EXTRACT = "tmp_hap_extract"
TMP_APP_EXTRACT = "tmp_app_extract"
ARK_DISASM = "path2ark_disasm"

if __name__ == "__main__": # clear; pip install -e .; python3 examples/dis_demo.py name.abc.dis
Log.init_log("abcre", ".")
ohre.set_log_level("info")
ohre.set_log_print(True)
Log.info(f"START {__file__}")
parser = argparse.ArgumentParser()
parser.add_argument("dis_path", type=str, help="path to the dis file (ark_disasm-ed abc)")
parser.add_argument("in_path", type=str, help="path to the dis file (ark_disasm-ed abc) or hap/app")
arg = parser.parse_args()
dis_path = arg.dis_path
dis_file: DisFile = DisFile(dis_path)
in_path = arg.in_path
if (in_path.endswith(".dis")):
dis_file: DisFile = DisFile(in_path)
elif (in_path.endswith(".hap")):
hhap = oh_hap.oh_hap(in_path)
hhap.extract_all_to(TMP_HAP_EXTRACT)
abc_file = os.path.join(TMP_HAP_EXTRACT, "ets", "modules.abc")
dis_file = f"{os.path.splitext(os.path.basename(in_path))[0]}.abc.dis" # os.path.splitext(file_name)[0]
result = subprocess.run([ARK_DISASM, abc_file, dis_file], capture_output=True, text=True)
dis_file: DisFile = DisFile(dis_file)
panda_re: PandaReverser = PandaReverser(dis_file)
print(f"> panda_re: {panda_re}")

Expand All @@ -28,18 +43,25 @@
print(f">> {asmstr}")

# === reverse truly START
FUNC_IDX = 5 # 5: onWindowStageCreate, call loadContent and pass a mothod as para; 7: mothod that used as para
FUNC_IDX = 12 # 5: onWindowStageCreate, call loadContent and pass a mothod as para; 7: mothod that used as para
# print(f">> before CF {dis_file.methods[FUNC_IDX]._debug_vstr()}")
panda_re.split_native_code_block(FUNC_IDX)
print(f">> CF built {panda_re.dis_file.methods[FUNC_IDX]._debug_vstr()}")
panda_re.trans_NAC_to_TAC(method_id=FUNC_IDX)
print(f">> TAC built {panda_re.dis_file.methods[FUNC_IDX]._debug_vstr()}")
panda_re._code_lifting_algorithms(FUNC_IDX)
print(f">> after lifting {panda_re.dis_file.methods[FUNC_IDX]._debug_vstr()}")

# nac_total = panda_re.get_insts_total()
# for idx in range(panda_re.method_len()):
# panda_re.split_native_code_block(idx)
# print(f">> [{idx}/{panda_re.method_len()}] CF built {panda_re.dis_file.methods[idx]._debug_vstr()}")
# panda_re.trans_NAC_to_TAC(method_id=idx)
# tac_total = panda_re.get_insts_total()
# for idx in range(panda_re.method_len()):
# panda_re._code_lifting_algorithms(method_id=idx)
# print(f">> [{idx}/{panda_re.method_len()}] after lift {panda_re.dis_file.methods[idx]._debug_vstr()}")
# todo_tac = panda_re.get_tac_unknown_count()
# print(f"todo_tac {todo_tac}/tac {tac_total} {todo_tac/tac_total:.4f} / nac {nac_total} {todo_tac/nac_total:.4f}")
# final_tac_total = panda_re.get_insts_total()
# print(f"todo_tac {todo_tac}/{tac_total} {todo_tac/tac_total:.4f} / nac {nac_total} {todo_tac/nac_total:.4f}")
# print(f"lifting_algorithms {final_tac_total}/{tac_total} {final_tac_total/tac_total:.4f}")
179 changes: 160 additions & 19 deletions ohre/abcre/dis/AsmArg.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,33 +12,34 @@ def __init__(self, arg_type: AsmTypes = AsmTypes.UNKNOWN,
# name: e.g. for v0, type is VAR, name is v0(stored without truncating the prefix v)
self.name: str = name
# value: may be set in the subsequent analysis
self.value = value # if type is ARRAY, value is AsmArg list
# type is ARRAY: value is list[AsmArg]
# type is OBJECT: value is list[AsmArg]: AsmArg(name:key, value:any value)
self.value = value
self.ref_base = ref_base # AsmArg
self.paras_len: Union[int, None] = paras_len # for method object, store paras len here
if (self.is_value_valid() == False):
Log.error(f"AsmArg value is NOT valid, type {self.type_str} value {type(value)} {value}")

@property
def len(self):
if (len(self.name) > 0):
return len(self.name)
return len(self.type)

@property
def type_str(self) -> str:
return AsmTypes.get_code_name(self.type)

def __len__(self) -> int:
return self.len

def __eq__(self, rhs):
if isinstance(rhs, AsmArg):
if (self.type == rhs.type and self.name == rhs.name):
if (self.ref_base is None and rhs.ref_base is None):
if (self.ref_base == rhs.ref_base and self.value == rhs.value and self.paras_len == rhs.paras_len):
return True
elif (self.ref_base is None and rhs.ref_base is not None):
return False
elif (self.ref_base is not None and rhs.ref_base is None):
else:
return False
else: # both is NOT None
if (self.ref_base == rhs.ref_base):
return True
else:
return False
else:
return False
return False
Expand All @@ -53,15 +54,55 @@ def __hash__(self):
def __repr__(self):
return f"Arg({self._debug_str()})"

def obj_has_key(self, key) -> bool:
# if self is OBJECT and key exists in self.value, return True
if (not isinstance(self.value, Iterable)):
return False
key_name_str: str = ""
if (isinstance(key, AsmArg)):
key_name_str = key.name
elif (isinstance(key, str)):
key_name_str = key
else:
Log.error(f"ERROR! obj_has_key key {type(key)} {key}")
for arg in self.value:
if (key_name_str == arg.name):
return True
return False

def set_object_key_value(self, key: str, value: str, create=False):
if (self.type != AsmTypes.OBJECT):
return False
for arg in self.value:
if (key == arg.name):
arg.value = value
return True
return False

def set_ref(self, ref_ed_arg):
self.ref_base = ref_ed_arg

def is_has_ref(self) -> bool:
if (self.ref_base is not None):
return True
else:
return False

def is_no_ref(self) -> bool:
return not self.is_has_ref()

@classmethod
def build_arg(cls, s: str): # return VAR v0 v1... or ARG a0 a1...
assert isinstance(s, str) and len(s) > 0
if (s.startswith("v")):
return AsmArg(AsmTypes.VAR, s)
if (s.startswith("a")):
if (s == "a0"):
return cls.build_FunctionObject()
elif (s == "a1"):
return cls.build_NewTarget()
elif (s == "a2"):
return cls.build_this()
return AsmArg(AsmTypes.ARG, s)
if (s.startswith("tmp")):
return AsmArg(AsmTypes.VAR, s)
Expand All @@ -79,10 +120,39 @@ def ACC(cls): # return AsmArg(AsmTypes.ACC)
def build_arr(cls, args: List, name: str = ""): # element of args should be AsmArg
return AsmArg(AsmTypes.ARRAY, name=name, value=list(args))

@classmethod
def build_object(cls, in_kv: Dict = None, name: str = "", ref_base=None): # element of args should be AsmArg
obj_value_l = list()
if (isinstance(in_kv, Iterable)):
for k, v in in_kv.items():
if (isinstance(v, int)):
obj_value_l.append(AsmArg(AsmTypes.IMM, name=k, value=v))
elif (isinstance(v, float)):
obj_value_l.append(AsmArg(AsmTypes.IMM, name=k, value=v))
elif (isinstance(v, str)):
obj_value_l.append(AsmArg(AsmTypes.STR, name=k, value=v))
elif (v is None):
obj_value_l.append(AsmArg(AsmTypes.UNDEFINED, name=k, value=None))
else:
Log.error(f"ERROR! build_object k {k} {type(k)} v {v} {type(v)} name {name}")
if (len(obj_value_l) == 0):
obj_value_l = None
return AsmArg(AsmTypes.OBJECT, name=name, value=obj_value_l, ref_base=ref_base)

@classmethod
def build_FunctionObject(cls):
# FunctionObject always stored at a0
return AsmArg(AsmTypes.ARG, name="FunctionObject")

@classmethod
def build_NewTarget(cls):
# NewTarget always stored at a1
return AsmArg(AsmTypes.ARG, name="NewTarget")

@classmethod
def build_this(cls):
# this always stored at a2
return AsmArg(AsmTypes.ARG, name="a2")
return AsmArg(AsmTypes.ARG, name="this")

def build_next_arg(self): # arg is AsmArg
# if self is v5, return v6; if self is a0, return a1; just num_part+=1
Expand All @@ -92,14 +162,60 @@ def build_next_arg(self): # arg is AsmArg
num += 1
return AsmArg(self.type, f"{self.name[0]}{num}")

def is_value_valid(self) -> bool: # TODO: for some types, value is not valid, judge it
pass
def is_value_valid(self) -> bool:
if (self.value is None):
return True
if (self.type == AsmTypes.IMM):
if (isinstance(self.value, int) or isinstance(self.value, float)):
return True
return False
if (self.type == AsmTypes.STR or self.type == AsmTypes.LABEL):
if (isinstance(self.value, str)):
return True
return False
if (self.type == AsmTypes.METHOD_OBJ):
if (isinstance(self.value, str)):
return True
return False
if (self.type == AsmTypes.OBJECT):
if (isinstance(self.value, Iterable)):
return True
return False
if (self.type == AsmTypes.ARRAY):
if (isinstance(self.value, list)):
return True
return False
if (self.type == AsmTypes.NULL or self.type == AsmTypes.INF or self.type == AsmTypes.NAN
or self.type == AsmTypes.UNDEFINED or self.type == AsmTypes.HOLE):
return False
Log.error(f"is_value_valid NOT supported logic type {self.type_str} value {type(self.value)} {self.value}")
return True

def is_acc(self) -> bool:
if (self.type == AsmTypes.ACC):
return True
return False

def is_imm(self) -> bool:
if (self.type == AsmTypes.IMM):
return True
return False

def is_field(self) -> bool:
if (self.type == AsmTypes.FIELD):
return True
return False

def is_unknown(self) -> bool:
if (self.type == AsmTypes.UNKNOWN):
return True
return False

def is_temp_var_like(self) -> bool:
if ((self.type == AsmTypes.VAR or self.type == AsmTypes.ACC) and self.is_no_ref()):
return True
return False

def get_all_args_recursively(self, include_self: bool = True) -> List:
out = list()
if (include_self):
Expand All @@ -126,14 +242,35 @@ def _common_error_check(self):
if (len(self.name) == 0):
Log.error(f"[ArgCC] A label without name: len {len(self.name)}")

def _debug_str(self):
def _debug_str_obj(self, detail=False):
out = ""
if (self.ref_base is not None):
out += f"{self.ref_base}->"
if (detail):
out += f"OBJ:{self.name}"
else:
out += f"{self.name}"
if (isinstance(self.value, Iterable)):
out += "{"
for v_arg in self.value:
out += f"{v_arg.name}:{v_arg.value}, "
out += "}"
elif (self.value is not None):
out += "{" + self.value + "}"
return out

def _debug_str(self, print_ref: bool = True):
self._common_error_check()
out = ""
if (self.type == AsmTypes.OBJECT):
return self._debug_str_obj()
if (self.type == AsmTypes.FIELD):
if (self.ref_base is not None):
if (print_ref and self.ref_base is not None):
out += f"{self.ref_base}[{self.name}]"
else:
out += f"[field:{self.name}]"
else:
if (self.ref_base is not None):
if (print_ref and self.ref_base is not None):
out += f"{self.ref_base}->"
out += f"{self.name}"
if (len(self.name) == 0):
Expand All @@ -144,14 +281,18 @@ def _debug_str(self):
out += f"(paras_len={self.paras_len})"
return out

def _debug_vstr(self):
def _debug_vstr(self, print_ref: bool = True):
self._common_error_check()
out = ""
if (self.type == AsmTypes.OBJECT):
return self._debug_str_obj(detail=True)
if (self.type == AsmTypes.FIELD):
if (self.ref_base is not None):
if (print_ref and self.ref_base is not None):
out += f"{self.ref_base}[{AsmTypes.get_code_name(self.type)}-{self.name}]"
else:
out += f"[{AsmTypes.get_code_name(self.type)}-{self.name}]"
else:
if (self.ref_base is not None):
if (print_ref and self.ref_base is not None):
out += f"{self.ref_base}->"
out += f"{AsmTypes.get_code_name(self.type)}-{self.name}"
if (self.value is not None):
Expand Down
Loading

0 comments on commit 21d2c49

Please sign in to comment.