import tkinter as tk
from tkinter import filedialog
import pandas as pd
import re
import subprocess
import os
def import_excel():
root = tk.Tk()
root.withdraw()
file_paths = filedialog.askopenfilenames(filetypes=[("Excel Files", "*.xlsx;*.xls")])
if file_paths:
combined_df = pd.DataFrame()
for file_path in file_paths:
try:
df = pd.read_excel(file_path)
df_filtered = df.loc[df.notna().sum(axis=1) >= 1].copy()
found_subject_code = False
found_debit = False
for i in range(len(df_filtered)):
for j in range(len(df_filtered.columns)):
cell_value = str(df_filtered.iloc[i, j]).lower()
if not found_subject_code and (re.search(r'编码|科目编码|科目代码', cell_value)):
df_filtered.rename(columns={df_filtered.columns[j]: '科目代码'}, inplace=True)
if j < len(df_filtered.columns) - 1:
df_filtered.rename(columns={df_filtered.columns[j + 1]: '科目名称'}, inplace=True)
found_subject_code = True
if not found_debit and re.search(r'借方|方向', cell_value):
df_filtered.rename(columns={df_filtered.columns[j]: '期初借方'}, inplace=True)
if j < len(df_filtered.columns) - 1:
df_filtered.rename(columns={df_filtered.columns[j + 1]: '期初贷方'}, inplace=True)
found_debit = True
if found_subject_code and found_debit:
break
if found_subject_code and found_debit:
break
found_credit = False
last_credit_index = None
for j in range(len(df_filtered.columns) - 1, -1, -1):
for i in range(len(df_filtered)):
cell_value = str(df_filtered.iloc[i, j]).lower()
if re.search(r'贷方|期末余额', cell_value):
last_credit_index = j
found_credit = True
break
if found_credit:
break
if found_credit:
df_filtered.rename(columns={df_filtered.columns[last_credit_index]: '期末贷方'}, inplace=True)
if last_credit_index > 0:
df_filtered.rename(columns={df_filtered.columns[last_credit_index - 1]: '期末借方'}, inplace=True)
if last_credit_index - 2 >= 0:
df_filtered.rename(columns={df_filtered.columns[last_credit_index - 2]: '贷方金额'}, inplace=True)
if last_credit_index - 3 >= 0:
df_filtered.rename(columns={df_filtered.columns[last_credit_index - 3]: '借方金额'}, inplace=True)
try:
subject_name_index = df_filtered.columns.get_loc('科目名称')
debit_index = df_filtered.columns.get_loc('期初借方')
if subject_name_index + 1 == debit_index:
# 插入一个空列
df_filtered.insert(subject_name_index + 1, '辅助核算', '')
else:
df_filtered.rename(columns={df_filtered.columns[subject_name_index + 1]: '辅助核算'}, inplace=True)
except KeyError:
pass
file_name = os.path.basename(file_path)
df_filtered.insert(0, '公司名称', file_name)
df_filtered.insert(1, '唯一编码', '')
if 'B' in df_filtered.columns:
df_filtered.drop(columns='B', inplace=True)
column_order = ['公司名称', '唯一编码', '科目代码', '科目名称', '辅助核算', '期初借方', '期初贷方', '借方金额', '贷方金额', '期末借方', '期末贷方']
# 调整列顺序
existing_columns = [col for col in df_filtered.columns if col not in column_order]
final_column_order = column_order[:]
for col in existing_columns:
final_column_order.append(col)
df_filtered = df_filtered.reindex(columns=final_column_order)
# 清除 C 列和 D 列的格式和空格
def clean_text(value):
if isinstance(value, str):
# 去除两端空格和换行符,以及中间多余的空格
value = ' '.join(value.strip().split())
return value
df_filtered[['科目代码', '科目名称']] = df_filtered[['科目代码', '科目名称']].applymap(clean_text)
# 处理 C 列的空值
科目代码列 = df_filtered['科目代码']
填充值 = None
填充列表 = []
for index, value in 科目代码列.items():
if pd.isna(value):
if 填充值 is None:
填充列表.append(None)
else:
填充列表.append(f"{填充值}_{index}")
else:
填充值 = value
填充列表.append(value)
df_filtered['科目代码'] = 填充列表
# 将 F 列到 K 列的空值填充为 0,并设置为数值格式,保留两位小数,添加千分符
columns_to_format = df_filtered.columns[5:11] # 假设 F 列到 K 列的索引为 5 到 10
for col in columns_to_format:
if pd.api.types.is_numeric_dtype(df_filtered[col]):
df_filtered[col] = df_filtered[col].fillna(0)
df_filtered[col] = df_filtered[col].apply(lambda x: "{:,.2f}".format(x))
df_filtered[col] = pd.to_numeric(df_filtered[col].str.replace(',', ''))
# 从 L 列到 AL 列添加新列及名称
new_columns = ["期初调整", "期末调整", "明细", "现金", "代码长度", "期初", "筛选", "四位代码", "分类", "一级科目", "明细-", "报表科目", "重分分类", "重分科目", "重分类", "汇算清缴", "空行", "期末", "款项性质", "底稿编号", "重底稿编号", "期间", "6", "1年以内", "1年以上", "9", "编码去重"]
start_index = len(df_filtered.columns)
for col_name in new_columns:
df_filtered.insert(start_index, col_name, '')
start_index += 1
# 在期间列(AG 列)填充“本期”
df_filtered['期间'] = '本期'
# 删除 D 列(科目名称)为空的行
df_filtered = df_filtered.dropna(subset=['科目名称'])
# 对 C 列科目代码进行连续重复检查和修改
prev_code = None
def modify_consecutive_duplicate_codes(row):
nonlocal prev_code
code = row['科目代码']
if pd.notna(code):
if code == prev_code:
return f"{code}_{row.name}"
else:
prev_code = code
return code
df_filtered['科目代码'] = df_filtered.apply(modify_consecutive_duplicate_codes, axis=1)
# 将当前文件处理结果添加到 combined_df 中
combined_df = pd.concat([combined_df, df_filtered], ignore_index=True)
except Exception as e:
print(f"读取文件 {file_path} 时发生错误: {e}")
# 输出修改后的数据,设置显示选项使中间列不被省略
pd.set_option('display.max_columns', None)
styled_df = combined_df.style.set_properties(**{'text-align': 'left'})
print(styled_df.data.to_string())
# 恢复默认显示选项
pd.reset_option('display.max_columns')
# 将整理好的数据保存到 Excel 文件,并将工作表命名为 "余"
with pd.ExcelWriter("余t.xlsx") as writer:
combined_df.to_excel(writer, sheet_name="余", index=False)
# 打开生成的 Excel 文件
try:
if os.name == 'nt': # Windows
subprocess.call(['start', 'excel', "余t.xlsx"], shell=True)
elif os.name == 'posix': # macOS 或 Linux
subprocess.call(['open', '-a', 'Microsoft Excel', "余t.xlsx"])
except Exception as e:
print(f"打开文件时出错: {e}")
if __name__ == "__main__":
import_excel()