import re from tname import * from Rfile import * def Strip(seq_file): contents = j_reads(seq_file.name) ina = Name() ina = ina + r"input.tsv" # 结果文件名称 # 去除序列文件中的空格,并写入新的文件中 for i in range(0, len(contents) - 1): if contents[i][0] != '>' and contents[i + 1][0] != '>': content = contents[i].split() content = content[0] else: content = contents[i] with open(ina, "a") as f: f.write(content) # 最后一行特殊,单独写入 with open(ina, "a") as f: f.write(contents[len(contents) - 1]) return ina def Merge(smi_file, seq_file): smile = j_read(smi_file.name) smile = smile.strip("\n") # 读取去掉换行后的文件 contents = j_reads(seq_file.name) name = Name() name = name + r"kcat_input.tsv" # 结果文件名称 with open(name, "a") as f3: f3.write("Substrate Name Substrate SMILES Protein Sequence") f3.write("\n") for i in range(0, len(contents)): if i % 2 == 1: with open(name, "a") as f3: # 写入索引 f3.write(">seq" + str(int((i - 1) / 2))) f3.write("\t") # 写入smile名称 f3.write(smile) f3.write("\t") # 写入序列 f3.write(contents[i]) return name def Merge_All(smi_file, seq_file): smile = j_read(smi_file.name) smile = smile.strip("\n") # 读取去掉换行后的文件 contents = j_reads(seq_file.name) name = Name() name = name + r"kcat_input.tsv" # 结果文件名称 with open(name, "a") as f3: f3.write("Substrate Name Substrate SMILES Protein Sequence") f3.write("\n") for i in range(0, len(contents)): if i % 2 == 1: with open(name, "a") as f3: # 写入索引 # f3.write(">seq" + str(int((i - 1) / 2))) info = re.sub(' ', '_', contents[i - 1]) info = re.sub('\n', '', info) f3.write(info) f3.write("\t") # 写入smile名称 f3.write(smile) f3.write("\t") # 写入序列 f3.write(contents[i]) return name