Tuchuanhuhuhu commited on
Commit
40e7d1c
·
1 Parent(s): 6431f64

优化Excel文档加载方式

Browse files
Files changed (2) hide show
  1. modules/llama_func.py +4 -1
  2. modules/utils.py +5 -9
modules/llama_func.py CHANGED
@@ -70,7 +70,10 @@ def get_documents(file_src):
70
  text_raw = loader.load_data(file=filepath)[0].text
71
  elif file_type == ".xlsx":
72
  logging.debug("Loading Excel...")
73
- text_raw = excel_to_string(filepath)
 
 
 
74
  else:
75
  logging.debug("Loading text file...")
76
  with open(filepath, "r", encoding="utf-8") as f:
 
70
  text_raw = loader.load_data(file=filepath)[0].text
71
  elif file_type == ".xlsx":
72
  logging.debug("Loading Excel...")
73
+ text_list = excel_to_string(filepath)
74
+ for elem in text_list:
75
+ documents.append(Document(elem))
76
+ continue
77
  else:
78
  logging.debug("Loading text file...")
79
  with open(filepath, "r", encoding="utf-8") as f:
modules/utils.py CHANGED
@@ -504,15 +504,15 @@ def add_details(lst):
504
  return nodes
505
 
506
 
507
- def sheet_to_string(sheet):
508
- result = ""
509
  for index, row in sheet.iterrows():
510
  row_string = ""
511
  for column in sheet.columns:
512
  row_string += f"{column}: {row[column]}, "
513
  row_string = row_string.rstrip(", ")
514
  row_string += "."
515
- result += row_string + "\n"
516
  return result
517
 
518
  def excel_to_string(file_path):
@@ -520,17 +520,13 @@ def excel_to_string(file_path):
520
  excel_file = pd.read_excel(file_path, engine='openpyxl', sheet_name=None)
521
 
522
  # 初始化结果字符串
523
- result = ""
524
 
525
  # 遍历每一个工作表
526
  for sheet_name, sheet_data in excel_file.items():
527
- # 将工作表名称添加到结果字符串
528
- result += f"Sheet: {sheet_name}\n"
529
 
530
  # 处理当前工作表并添加到结果字符串
531
- result += sheet_to_string(sheet_data)
532
 
533
- # 在不同工作表之间添加分隔符
534
- result += "\n" + ("-" * 20) + "\n\n"
535
 
536
  return result
 
504
  return nodes
505
 
506
 
507
+ def sheet_to_string(sheet, sheet_name = None):
508
+ result = []
509
  for index, row in sheet.iterrows():
510
  row_string = ""
511
  for column in sheet.columns:
512
  row_string += f"{column}: {row[column]}, "
513
  row_string = row_string.rstrip(", ")
514
  row_string += "."
515
+ result.append(row_string)
516
  return result
517
 
518
  def excel_to_string(file_path):
 
520
  excel_file = pd.read_excel(file_path, engine='openpyxl', sheet_name=None)
521
 
522
  # 初始化结果字符串
523
+ result = []
524
 
525
  # 遍历每一个工作表
526
  for sheet_name, sheet_data in excel_file.items():
 
 
527
 
528
  # 处理当前工作表并添加到结果字符串
529
+ result += sheet_to_string(sheet_data, sheet_name=sheet_name)
530
 
 
 
531
 
532
  return result