From e4d4ad574732aa63d99912f29f3fc7ff0401cf9f Mon Sep 17 00:00:00 2001 From: thefool0 <74161809+thefool0000@users.noreply.github.com> Date: Tue, 31 Dec 2024 00:06:42 +0800 Subject: [PATCH] feat: support unified diff and git headers Signed-off-by: thefool0 <74161809+thefool0000@users.noreply.github.com> --- src/Patche/utils/header.py | 54 ++++++++++++++++++++++++++ src/Patche/utils/parse.py | 78 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+) create mode 100644 src/Patche/utils/header.py diff --git a/src/Patche/utils/header.py b/src/Patche/utils/header.py new file mode 100644 index 0000000..2153a7e --- /dev/null +++ b/src/Patche/utils/header.py @@ -0,0 +1,54 @@ +import re +from typing import Iterator, Optional + +from Patche.model import Header + +HEADER_OLD = re.compile(r"^--- ([^\t\n]+)(?:\t([^\n]*)|)$") +HEADER_NEW = re.compile(r"^\+\+\+ ([^\t\n]+)(?:\t([^\n]*)|)$") +HUNK_START = re.compile(r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@") +CHANGE_LINE = re.compile(r"^([- +\\])(.*)$") +GIT_HEADER = re.compile(r"^diff --git a/(.*) b/(.*)$") + + +def parse_header(lines: Iterator[str]) -> Optional[Header]: + """解析 diff 头部信息""" + first_line = next(lines, "") + + # 尝试解析 git header + git_match = GIT_HEADER.match(first_line) + if git_match: + # 跳过 index 行 + next(lines, "") + old_line = next(lines, "") + new_line = next(lines, "") + + old_match = HEADER_OLD.match(old_line) + new_match = HEADER_NEW.match(new_line) + + if old_match and new_match: + return Header( + index_path=git_match.group(1), + old_path=old_match.group(1), + old_version=old_match.group(2) if old_match.group(2) else None, + new_path=new_match.group(1), + new_version=new_match.group(2) if new_match.group(2) else None, + ) + return None + + # 回退迭代器以处理统一 diff 格式 + lines = iter([first_line] + list(lines)) + old_line = next(lines, "") + new_line = next(lines, "") + + old_match = HEADER_OLD.match(old_line) + new_match = HEADER_NEW.match(new_line) + + if old_match and new_match: + return Header( + index_path=None, + old_path=old_match.group(1), + old_version=old_match.group(2) if old_match.group(2) else None, + new_path=new_match.group(1), + new_version=new_match.group(2) if new_match.group(2) else None, + ) + return None diff --git a/src/Patche/utils/parse.py b/src/Patche/utils/parse.py index 8a42b43..20db8b9 100644 --- a/src/Patche/utils/parse.py +++ b/src/Patche/utils/parse.py @@ -1,10 +1,12 @@ import re +from typing import List, Optional from whatthepatch_pydantic import parse_patch as wtp_parse_patch from whatthepatch_pydantic.model import Diff as WTPDiff from Patche.config import settings from Patche.model import Change, Diff, Hunk, Patch +from Patche.utils.header import CHANGE_LINE, HEADER_OLD, HUNK_START, parse_header git_diffcmd_header = re.compile("^diff --git a/(.+) b/(.+)$") unified_diff_header = re.compile("^---\s{1}") @@ -94,6 +96,82 @@ def wtp_diff_to_diff(wtp_diff: WTPDiff) -> Diff: ) +def parse_unified_diff(text: str) -> Optional[List[Diff]]: + """解析 unified diff 格式的补丁""" + lines = iter(text.splitlines()) + diffs: List[Diff] = [] + + while True: + try: + header = parse_header(lines) + if not header: + break + + changes: List[Change] = [] + hunk_index = 0 + + for line in lines: + # 检查是否是新的 diff 块开始 + if HEADER_OLD.match(line): + lines = iter([line] + list(lines)) + break + + # 解析 hunk 头 + hunk_match = HUNK_START.match(line) + if hunk_match: + old_start = int(hunk_match.group(1)) + old_count = int(hunk_match.group(2) or "1") + new_start = int(hunk_match.group(3)) + new_count = int(hunk_match.group(4) or "1") + + old_current = old_start + new_current = new_start + continue + + # 解析变更行 + change_match = CHANGE_LINE.match(line) + if change_match: + change_type = change_match.group(1) + content = change_match.group(2) + + if change_type == " ": + # 上下文行 + changes.append( + Change( + old=old_current, + new=new_current, + line=content, + hunk=hunk_index, + ) + ) + old_current += 1 + new_current += 1 + elif change_type == "-": + # 删除行 + changes.append( + Change( + old=old_current, new=None, line=content, hunk=hunk_index + ) + ) + old_current += 1 + elif change_type == "+": + # 新增行 + changes.append( + Change( + old=None, new=new_current, line=content, hunk=hunk_index + ) + ) + new_current += 1 + + if header and changes: + diffs.append(Diff(header=header, changes=changes, text=text)) + + except StopIteration: + break + + return diffs if diffs else None + + def parse_patch(text: str) -> Patch: """ Parse a patch file