diff --git a/Convention/Runtime/String.py b/Convention/Runtime/String.py index 83317dd..2b42b12 100644 --- a/Convention/Runtime/String.py +++ b/Convention/Runtime/String.py @@ -115,14 +115,199 @@ def GetEditorDistanceAndOperations( edit_distance = m + n - 2 * lcs[m][n] return edit_distance, merged_operations +def _build_line_lcs(lines1: List[str], lines2: List[str]) -> List[List[int]]: + """ + 构建行级LCS动态规划表 + """ + m, n = len(lines1), len(lines2) + lcs = [[0] * (n + 1) for _ in range(m + 1)] + + # 使用哈希加速行比较 + hash1 = [hash(line) for line in lines1] + hash2 = [hash(line) for line in lines2] + + for i in range(1, m + 1): + for j in range(1, n + 1): + if hash1[i-1] == hash2[j-1] and lines1[i-1] == lines2[j-1]: + lcs[i][j] = lcs[i-1][j-1] + 1 + else: + lcs[i][j] = max(lcs[i-1][j], lcs[i][j-1]) + + return lcs + +def _extract_line_operations(lines1: List[str], lines2: List[str], lcs: List[List[int]]) -> List[Tuple[str, int, int, List[str]]]: + """ + 从LCS表提取行级操作序列 + 返回: (操作类型, 起始行号, 结束行号, 行内容列表) + """ + operations = [] + m, n = len(lines1), len(lines2) + i, j = m, n + + while i > 0 or j > 0: + if i > 0 and j > 0 and lines1[i-1] == lines2[j-1]: + i -= 1 + j -= 1 + elif j > 0 and (i == 0 or lcs[i][j-1] >= lcs[i-1][j]): + operations.insert(0, ("add", i, i, [lines2[j-1]])) + j -= 1 + else: + operations.insert(0, ("delete", i-1, i, [lines1[i-1]])) + i -= 1 + + # 合并连续的同类行操作 + merged = [] + for op_type, start, end, lines in operations: + if merged and merged[-1][0] == op_type and merged[-1][2] == start: + merged[-1] = (op_type, merged[-1][1], end, merged[-1][3] + lines) + else: + merged.append((op_type, start, end, lines)) + + return merged + +def _char_diff_in_region(s1: str, s2: str) -> List[Tuple[str, int, int, str]]: + """ + 对小范围区域进行字符级LCS比较 + 返回相对于输入字符串的位置 + """ + m, n = len(s1), len(s2) + + # 快速路径 + if m == 0 and n == 0: + return [] + if m == 0: + return [("add", 0, 0, s2)] + if n == 0: + return [("delete", 0, m, s1)] + if s1 == s2: + return [] + + # 字符级LCS + lcs = [[0] * (n + 1) for _ in range(m + 1)] + + for i in range(1, m + 1): + for j in range(1, n + 1): + if s1[i-1] == s2[j-1]: + lcs[i][j] = lcs[i-1][j-1] + 1 + else: + lcs[i][j] = max(lcs[i-1][j], lcs[i][j-1]) + + # 回溯生成操作 + operations = [] + i, j = m, n + + while i > 0 or j > 0: + if i > 0 and j > 0 and s1[i-1] == s2[j-1]: + i -= 1 + j -= 1 + elif j > 0 and (i == 0 or lcs[i][j-1] >= lcs[i-1][j]): + operations.insert(0, ("add", i, i, s2[j-1])) + j -= 1 + else: + operations.insert(0, ("delete", i-1, i, s1[i-1])) + i -= 1 + + # 合并连续操作 + merged = [] + for op_type, start, end, content in operations: + if merged and merged[-1][0] == op_type: + last_op = merged[-1] + if op_type == "add" and last_op[2] == start: + merged[-1] = (op_type, last_op[1], end, last_op[3] + content) + elif op_type == "delete" and last_op[2] == start: + merged[-1] = (op_type, last_op[1], end, last_op[3] + content) + else: + merged.append((op_type, start, end, content)) + else: + merged.append((op_type, start, end, content)) + + return merged + def GetDiffOperations( s1:str, s2:str, ) -> List[Tuple[Literal["add","delete"], int, int, str]]: """ - 计算两个字符串的差异操作序列 + 计算两个字符串的差异操作序列(混合行级+字符级算法) 操作格式: (操作类型, 开始位置, 结束位置, 内容) - 位置基于源字符串s1 + 位置基于源字符串s1的字符偏移 """ + # 快速路径 + if s1 == s2: + return [] + if not s1: + return [("add", 0, 0, s2)] + if not s2: + return [("delete", 0, len(s1), s1)] - return operations \ No newline at end of file + # 阶段1: 分行并建立位置映射 + lines1 = s1.split('\n') + lines2 = s2.split('\n') + + # 构建行号到字符位置的映射 + line_offsets_s1 = [0] + for line in lines1[:-1]: + line_offsets_s1.append(line_offsets_s1[-1] + len(line) + 1) # +1 for '\n' + + line_offsets_s2 = [0] + for line in lines2[:-1]: + line_offsets_s2.append(line_offsets_s2[-1] + len(line) + 1) + + # 阶段2: 行级LCS分析 + lcs = _build_line_lcs(lines1, lines2) + line_operations = _extract_line_operations(lines1, lines2, lcs) + + # 阶段3: 转换为字符级操作 + final_operations = [] + + for op_type, start_line, end_line, op_lines in line_operations: + if op_type == "add": + # 添加操作: 在s1的start_line位置插入 + char_pos = line_offsets_s1[start_line] if start_line < len(line_offsets_s1) else len(s1) + content = '\n'.join(op_lines) + + # 对于添加的行块,可以选择字符级细化或直接使用 + # 这里先直接使用行级结果 + final_operations.append(("add", char_pos, char_pos, content)) + + elif op_type == "delete": + # 删除操作: 删除s1的[start_line, end_line)行 + char_start = line_offsets_s1[start_line] + if end_line < len(lines1): + char_end = line_offsets_s1[end_line] + else: + char_end = len(s1) + + content = '\n'.join(op_lines) + final_operations.append(("delete", char_start, char_end, content)) + + # 阶段4: 对于连续的删除+添加,尝试字符级精细比较 + optimized_operations = [] + i = 0 + while i < len(final_operations): + if (i + 1 < len(final_operations) and + final_operations[i][0] == "delete" and + final_operations[i+1][0] == "add" and + final_operations[i][2] == final_operations[i+1][1]): + + # 这是一个修改操作,进行字符级细化 + del_op = final_operations[i] + add_op = final_operations[i+1] + + old_text = del_op[3] + new_text = add_op[3] + base_pos = del_op[1] + + # 字符级比较 + char_ops = _char_diff_in_region(old_text, new_text) + + # 调整位置到全局坐标 + for op_type, rel_start, rel_end, content in char_ops: + optimized_operations.append((op_type, base_pos + rel_start, base_pos + rel_end, content)) + + i += 2 + else: + optimized_operations.append(final_operations[i]) + i += 1 + + return optimized_operations \ No newline at end of file