当前位置:首页 / 文章测试 / 导入余额表python程序

导入余额表python程序

开始打字练习

import tkinter as tk

from tkinter import filedialog

import pandas as pd

import re

import subprocess

import os

def import_excel():

root = tk.Tk()

root.withdraw()

file_paths = filedialog.askopenfilenames(filetypes=[("Excel Files", "*.xlsx;*.xls")])

if file_paths:

combined_df = pd.DataFrame()

for file_path in file_paths:

try:

df = pd.read_excel(file_path)

df_filtered = df.loc[df.notna().sum(axis=1) >= 1].copy()

found_subject_code = False

found_debit = False

for i in range(len(df_filtered)):

for j in range(len(df_filtered.columns)):

cell_value = str(df_filtered.iloc[i, j]).lower()

if not found_subject_code and (re.search(r'编码|科目编码|科目代码', cell_value)):

df_filtered.rename(columns={df_filtered.columns[j]: '科目代码'}, inplace=True)

if j < len(df_filtered.columns) - 1:

df_filtered.rename(columns={df_filtered.columns[j + 1]: '科目名称'}, inplace=True)

found_subject_code = True

if not found_debit and re.search(r'借方|方向', cell_value):

df_filtered.rename(columns={df_filtered.columns[j]: '期初借方'}, inplace=True)

if j < len(df_filtered.columns) - 1:

df_filtered.rename(columns={df_filtered.columns[j + 1]: '期初贷方'}, inplace=True)

found_debit = True

if found_subject_code and found_debit:

break

if found_subject_code and found_debit:

break

found_credit = False

last_credit_index = None

for j in range(len(df_filtered.columns) - 1, -1, -1):

for i in range(len(df_filtered)):

cell_value = str(df_filtered.iloc[i, j]).lower()

if re.search(r'贷方|期末余额', cell_value):

last_credit_index = j

found_credit = True

break

if found_credit:

break

if found_credit:

df_filtered.rename(columns={df_filtered.columns[last_credit_index]: '期末贷方'}, inplace=True)

if last_credit_index > 0:

df_filtered.rename(columns={df_filtered.columns[last_credit_index - 1]: '期末借方'}, inplace=True)

if last_credit_index - 2 >= 0:

df_filtered.rename(columns={df_filtered.columns[last_credit_index - 2]: '贷方金额'}, inplace=True)

if last_credit_index - 3 >= 0:

df_filtered.rename(columns={df_filtered.columns[last_credit_index - 3]: '借方金额'}, inplace=True)

try:

subject_name_index = df_filtered.columns.get_loc('科目名称')

debit_index = df_filtered.columns.get_loc('期初借方')

if subject_name_index + 1 == debit_index:

# 插入一个空列

df_filtered.insert(subject_name_index + 1, '辅助核算', '')

else:

df_filtered.rename(columns={df_filtered.columns[subject_name_index + 1]: '辅助核算'}, inplace=True)

except KeyError:

pass

file_name = os.path.basename(file_path)

df_filtered.insert(0, '公司名称', file_name)

df_filtered.insert(1, '唯一编码', '')

if 'B' in df_filtered.columns:

df_filtered.drop(columns='B', inplace=True)

column_order = ['公司名称', '唯一编码', '科目代码', '科目名称', '辅助核算', '期初借方', '期初贷方', '借方金额', '贷方金额', '期末借方', '期末贷方']

# 调整列顺序

existing_columns = [col for col in df_filtered.columns if col not in column_order]

final_column_order = column_order[:]

for col in existing_columns:

final_column_order.append(col)

df_filtered = df_filtered.reindex(columns=final_column_order)

# 清除 C 列和 D 列的格式和空格

def clean_text(value):

if isinstance(value, str):

# 去除两端空格和换行符,以及中间多余的空格

value = ' '.join(value.strip().split())

return value

df_filtered[['科目代码', '科目名称']] = df_filtered[['科目代码', '科目名称']].applymap(clean_text)

# 处理 C 列的空值

科目代码列 = df_filtered['科目代码']

填充值 = None

填充列表 = []

for index, value in 科目代码列.items():

if pd.isna(value):

if 填充值 is None:

填充列表.append(None)

else:

填充列表.append(f"{填充值}_{index}")

else:

填充值 = value

填充列表.append(value)

df_filtered['科目代码'] = 填充列表

# 将 F 列到 K 列的空值填充为 0,并设置为数值格式,保留两位小数,添加千分符

columns_to_format = df_filtered.columns[5:11] # 假设 F 列到 K 列的索引为 5 到 10

for col in columns_to_format:

if pd.api.types.is_numeric_dtype(df_filtered[col]):

df_filtered[col] = df_filtered[col].fillna(0)

df_filtered[col] = df_filtered[col].apply(lambda x: "{:,.2f}".format(x))

df_filtered[col] = pd.to_numeric(df_filtered[col].str.replace(',', ''))

# 从 L 列到 AL 列添加新列及名称

new_columns = ["期初调整", "期末调整", "明细", "现金", "代码长度", "期初", "筛选", "四位代码", "分类", "一级科目", "明细-", "报表科目", "重分分类", "重分科目", "重分类", "汇算清缴", "空行", "期末", "款项性质", "底稿编号", "重底稿编号", "期间", "6", "1年以内", "1年以上", "9", "编码去重"]

start_index = len(df_filtered.columns)

for col_name in new_columns:

df_filtered.insert(start_index, col_name, '')

start_index += 1

# 在期间列(AG 列)填充“本期”

df_filtered['期间'] = '本期'

# 删除 D 列(科目名称)为空的行

df_filtered = df_filtered.dropna(subset=['科目名称'])

# 对 C 列科目代码进行连续重复检查和修改

prev_code = None

def modify_consecutive_duplicate_codes(row):

nonlocal prev_code

code = row['科目代码']

if pd.notna(code):

if code == prev_code:

return f"{code}_{row.name}"

else:

prev_code = code

return code

df_filtered['科目代码'] = df_filtered.apply(modify_consecutive_duplicate_codes, axis=1)

# 将当前文件处理结果添加到 combined_df 中

combined_df = pd.concat([combined_df, df_filtered], ignore_index=True)

except Exception as e:

print(f"读取文件 {file_path} 时发生错误: {e}")

# 输出修改后的数据,设置显示选项使中间列不被省略

pd.set_option('display.max_columns', None)

styled_df = combined_df.style.set_properties(**{'text-align': 'left'})

print(styled_df.data.to_string())

# 恢复默认显示选项

pd.reset_option('display.max_columns')

# 将整理好的数据保存到 Excel 文件,并将工作表命名为 "余"

with pd.ExcelWriter("余t.xlsx") as writer:

combined_df.to_excel(writer, sheet_name="余", index=False)

# 打开生成的 Excel 文件

try:

if os.name == 'nt': # Windows

subprocess.call(['start', 'excel', "余t.xlsx"], shell=True)

elif os.name == 'posix': # macOS 或 Linux

subprocess.call(['open', '-a', 'Microsoft Excel', "余t.xlsx"])

except Exception as e:

print(f"打开文件时出错: {e}")

if __name__ == "__main__":

import_excel()

声明:以上文章均为用户自行发布,仅供打字交流使用,不代表本站观点,本站不承担任何法律责任,特此声明!如果有侵犯到您的权利,请及时联系我们删除。