import re import os # 解析SQL文件,提取表结构 def parse_sql_file(file_path, ignore_prefix=None): tables = {} # 读取文件内容 with open(file_path, 'r', encoding='utf-8') as f: content = f.read() # 匹配CREATE TABLE语句 table_pattern = re.compile(r'CREATE TABLE\s+`?([^`\s]+)`?\s*\(([^;]+)\)\s*(?:[^;]+);', re.DOTALL | re.IGNORECASE) matches = table_pattern.findall(content) for full_table_name, table_def in matches: # 处理表名,忽略前缀 table_name = full_table_name if ignore_prefix and table_name.startswith(ignore_prefix): table_name = table_name[len(ignore_prefix):] # 提取列定义 columns = [] # 匹配列定义,包括列名、类型、约束等 column_pattern = re.compile(r'\s*`?([^`\s,]+)`?\s+([^\s,]+)\s*([^,]+)(?:,|$)', re.DOTALL) column_matches = column_pattern.findall(table_def) for col_name, col_type, col_constraints in column_matches: # 清理约束中的换行符和多余空格 col_constraints = re.sub(r'\s+', ' ', col_constraints.strip()) columns.append((col_name, col_type, col_constraints)) # 提取主键 primary_key = None pk_pattern = re.compile(r'PRIMARY\s+KEY\s*\(`?([^`\s,]+)`?\)', re.IGNORECASE) pk_match = pk_pattern.search(table_def) if pk_match: primary_key = pk_match.group(1) # 提取索引 indexes = [] index_pattern = re.compile(r'INDEX\s+`?([^`\s]+)`?\s*\(([^)]+)\)', re.IGNORECASE) index_matches = index_pattern.findall(table_def) for index_name, index_cols in index_matches: indexes.append((index_name, index_cols.strip())) # 提取唯一约束 unique_constraints = [] unique_pattern = re.compile(r'UNIQUE\s+KEY\s*`?([^`\s]+)`?\s*\(([^)]+)\)', re.IGNORECASE) unique_matches = unique_pattern.findall(table_def) for unique_name, unique_cols in unique_matches: unique_constraints.append((unique_name, unique_cols.strip())) tables[table_name] = { 'full_name': full_table_name, 'columns': columns, 'primary_key': primary_key, 'indexes': indexes, 'unique_constraints': unique_constraints } return tables # 比较两个数据库表结构 def compare_databases(db1, db2, db1_name, db2_name): diffs = { 'only_in_db1': [], 'only_in_db2': [], 'column_diffs': {}, 'primary_key_diffs': {}, 'index_diffs': {}, 'unique_constraint_diffs': {} } # 找出只在db1中存在的表 for table_name in db1: if table_name not in db2: diffs['only_in_db1'].append(table_name) # 找出只在db2中存在的表 for table_name in db2: if table_name not in db1: diffs['only_in_db2'].append(table_name) # 比较共同存在的表 common_tables = set(db1.keys()) & set(db2.keys()) for table_name in common_tables: table1 = db1[table_name] table2 = db2[table_name] # 比较列 col_diffs = { 'only_in_db1': [], 'only_in_db2': [], 'type_diffs': [], 'constraint_diffs': [] } # 列名集合 cols1 = {col[0]: col for col in table1['columns']} cols2 = {col[0]: col for col in table2['columns']} # 只在db1中的列 for col_name in cols1: if col_name not in cols2: col_diffs['only_in_db1'].append(col_name) # 只在db2中的列 for col_name in cols2: if col_name not in cols1: col_diffs['only_in_db2'].append(col_name) # 比较列类型和约束 common_cols = set(cols1.keys()) & set(cols2.keys()) for col_name in common_cols: col1 = cols1[col_name] col2 = cols2[col_name] # 类型差异 if col1[1] != col2[1]: col_diffs['type_diffs'].append((col_name, col1[1], col2[1])) # 约束差异 if col1[2] != col2[2]: col_diffs['constraint_diffs'].append((col_name, col1[2], col2[2])) if any(col_diffs.values()): diffs['column_diffs'][table_name] = col_diffs # 比较主键 if table1['primary_key'] != table2['primary_key']: diffs['primary_key_diffs'][table_name] = (table1['primary_key'], table2['primary_key']) # 比较索引 index_diffs = { 'only_in_db1': [], 'only_in_db2': [], 'definition_diffs': [] } indexes1 = {idx[0]: idx[1] for idx in table1['indexes']} indexes2 = {idx[0]: idx[1] for idx in table2['indexes']} # 只在db1中的索引 for idx_name in indexes1: if idx_name not in indexes2: index_diffs['only_in_db1'].append((idx_name, indexes1[idx_name])) # 只在db2中的索引 for idx_name in indexes2: if idx_name not in indexes1: index_diffs['only_in_db2'].append((idx_name, indexes2[idx_name])) # 比较索引定义 common_indexes = set(indexes1.keys()) & set(indexes2.keys()) for idx_name in common_indexes: if indexes1[idx_name] != indexes2[idx_name]: index_diffs['definition_diffs'].append((idx_name, indexes1[idx_name], indexes2[idx_name])) if any(index_diffs.values()): diffs['index_diffs'][table_name] = index_diffs # 比较唯一约束 unique_diffs = { 'only_in_db1': [], 'only_in_db2': [], 'definition_diffs': [] } unique1 = {uc[0]: uc[1] for uc in table1['unique_constraints']} unique2 = {uc[0]: uc[1] for uc in table2['unique_constraints']} # 只在db1中的唯一约束 for uc_name in unique1: if uc_name not in unique2: unique_diffs['only_in_db1'].append((uc_name, unique1[uc_name])) # 只在db2中的唯一约束 for uc_name in unique2: if uc_name not in unique1: unique_diffs['only_in_db2'].append((uc_name, unique2[uc_name])) # 比较唯一约束定义 common_unique = set(unique1.keys()) & set(unique2.keys()) for uc_name in common_unique: if unique1[uc_name] != unique2[uc_name]: unique_diffs['definition_diffs'].append((uc_name, unique1[uc_name], unique2[uc_name])) if any(unique_diffs.values()): diffs['unique_constraint_diffs'][table_name] = unique_diffs return diffs # 打印差异报告 # 生成Markdown格式的差异报告 def generate_markdown_report(diffs, db1_name, db2_name, db1_table_count, db2_table_count): report = [] # 报告标题 report.append(f"# 数据库差异报告: {db1_name} vs {db2_name}") report.append("\n## 1. 表数量统计") report.append("| 数据库文件 | 表数量 |") report.append("|------------|--------|") report.append(f"| {db1_name} | {db1_table_count} |") report.append(f"| {db2_name} | {db2_table_count} |") # 表存在性差异 report.append("\n## 2. 表存在性差异") # 仅在db1中的表 if diffs['only_in_db1']: report.append(f"\n### 2.1 仅在 {db1_name} 中存在的表 ({len(diffs['only_in_db1'])} 个)") report.append("| 表名 |") report.append("|------|") for table in sorted(diffs['only_in_db1']): report.append(f"| {table} |") # 仅在db2中的表 if diffs['only_in_db2']: report.append(f"\n### 2.2 仅在 {db2_name} 中存在的表 ({len(diffs['only_in_db2'])} 个)") report.append("| 表名 |") report.append("|------|") for table in sorted(diffs['only_in_db2']): report.append(f"| {table} |") # 列结构差异 if diffs['column_diffs']: report.append(f"\n## 3. 列结构差异的表 ({len(diffs['column_diffs'])} 个)") for table, col_diffs in diffs['column_diffs'].items(): report.append(f"\n### 3.1 表: {table}") # 仅在db1中的列 if col_diffs['only_in_db1']: report.append(f"\n#### 3.1.1 仅在 {db1_name} 中存在的列") report.append("| 列名 |") report.append("|------|") for col in col_diffs['only_in_db1']: report.append(f"| {col} |") # 仅在db2中的列 if col_diffs['only_in_db2']: report.append(f"\n#### 3.1.2 仅在 {db2_name} 中存在的列") report.append("| 列名 |") report.append("|------|") for col in col_diffs['only_in_db2']: report.append(f"| {col} |") # 列类型差异 if col_diffs['type_diffs']: report.append(f"\n#### 3.1.3 列类型差异") report.append(f"| 列名 | {db1_name} | {db2_name} |") report.append("|------|------------|------------|") for col_name, type1, type2 in col_diffs['type_diffs']: report.append(f"| {col_name} | {type1} | {type2} |") # 列约束差异 if col_diffs['constraint_diffs']: report.append(f"\n#### 3.1.4 列约束差异") report.append(f"| 列名 | {db1_name} | {db2_name} |") report.append("|------|------------|------------|") for col_name, constraint1, constraint2 in col_diffs['constraint_diffs']: report.append(f"| {col_name} | {constraint1} | {constraint2} |") # 主键差异 if diffs['primary_key_diffs']: report.append(f"\n## 4. 主键差异的表 ({len(diffs['primary_key_diffs'])} 个)") report.append(f"| 表名 | {db1_name} | {db2_name} |") report.append("|------|------------|------------|") for table, (pk1, pk2) in diffs['primary_key_diffs'].items(): report.append(f"| {table} | {pk1} | {pk2} |") # 索引差异 if diffs['index_diffs']: report.append(f"\n## 5. 索引差异的表 ({len(diffs['index_diffs'])} 个)") for table, idx_diffs in diffs['index_diffs'].items(): report.append(f"\n### 5.1 表: {table}") # 仅在db1中的索引 if idx_diffs['only_in_db1']: report.append(f"\n#### 5.1.1 仅在 {db1_name} 中存在的索引") report.append("| 索引名 | 索引列 |") report.append("|--------|--------|") for idx_name, idx_cols in idx_diffs['only_in_db1']: report.append(f"| {idx_name} | {idx_cols} |") # 仅在db2中的索引 if idx_diffs['only_in_db2']: report.append(f"\n#### 5.1.2 仅在 {db2_name} 中存在的索引") report.append("| 索引名 | 索引列 |") report.append("|--------|--------|") for idx_name, idx_cols in idx_diffs['only_in_db2']: report.append(f"| {idx_name} | {idx_cols} |") # 索引定义差异 if idx_diffs['definition_diffs']: report.append(f"\n#### 5.1.3 索引定义差异") report.append(f"| 索引名 | {db1_name} | {db2_name} |") report.append("|--------|------------|------------|") for idx_name, idx1, idx2 in idx_diffs['definition_diffs']: report.append(f"| {idx_name} | {idx1} | {idx2} |") # 唯一约束差异 if diffs['unique_constraint_diffs']: report.append(f"\n## 6. 唯一约束差异的表 ({len(diffs['unique_constraint_diffs'])} 个)") for table, uc_diffs in diffs['unique_constraint_diffs'].items(): report.append(f"\n### 6.1 表: {table}") # 仅在db1中的唯一约束 if uc_diffs['only_in_db1']: report.append(f"\n#### 6.1.1 仅在 {db1_name} 中存在的唯一约束") report.append("| 约束名 | 约束列 |") report.append("|--------|--------|") for uc_name, uc_cols in uc_diffs['only_in_db1']: report.append(f"| {uc_name} | {uc_cols} |") # 仅在db2中的唯一约束 if uc_diffs['only_in_db2']: report.append(f"\n#### 6.1.2 仅在 {db2_name} 中存在的唯一约束") report.append("| 约束名 | 约束列 |") report.append("|--------|--------|") for uc_name, uc_cols in uc_diffs['only_in_db2']: report.append(f"| {uc_name} | {uc_cols} |") # 唯一约束定义差异 if uc_diffs['definition_diffs']: report.append(f"\n#### 6.1.3 唯一约束定义差异") report.append(f"| 约束名 | {db1_name} | {db2_name} |") report.append("|--------|------------|------------|") for uc_name, uc1, uc2 in uc_diffs['definition_diffs']: report.append(f"| {uc_name} | {uc1} | {uc2} |") report.append("\n## 7. 总结") report.append("差异比较完成!") return '\n'.join(report) # 主函数 def main(): # 文件路径 db1_path = r'D:\projects\shop-projects\backend\docs\db\niushop_database.sql' db2_path = r'D:\projects\shop-projects\backend\docs\db\init_v2.0_with_data.sql' report_path = r'D:\projects\shop-projects\backend\docs\db\database_diff_report.md' # 解析数据库结构 print(f"正在解析 {db1_path}...") db1 = parse_sql_file(db1_path) db1_table_count = len(db1) print(f"解析完成,共 {db1_table_count} 个表") print(f"\n正在解析 {db2_path}...") db2 = parse_sql_file(db2_path, ignore_prefix='lucky_') db2_table_count = len(db2) print(f"解析完成,共 {db2_table_count} 个表") # 比较差异 print("\n正在比较数据库差异...") diffs = compare_databases(db1, db2, 'niushop_database.sql', 'init_v2.0_with_data.sql') # 生成Markdown差异报告 print("\n正在生成Markdown差异报告...") report = generate_markdown_report(diffs, 'niushop_database.sql', 'init_v2.0_with_data.sql', db1_table_count, db2_table_count) # 保存报告到文件 with open(report_path, 'w', encoding='utf-8') as f: f.write(report) print(f"\n差异报告已生成: {report_path}") print("差异比较完成!") if __name__ == "__main__": main()