jie_test4 / Preinput_Merge.py
jie1's picture
Update Preinput_Merge.py
a41df2a
raw
history blame
No virus
2.37 kB
import re
from tname import *
from Rfile import *
def Strip(seq_file):
contents = j_reads(seq_file.name)
ina = Name()
ina = ina + r"input.tsv" # 结果文件名称
# 去除序列文件中的空格,并写入新的文件中
for i in range(0, len(contents) - 1):
if contents[i][0] != '>' and contents[i + 1][0] != '>':
content = contents[i].split()
content = content[0]
else:
content = contents[i]
with open(ina, "a") as f:
f.write(content)
# 最后一行特殊,单独写入
with open(ina, "a") as f:
f.write(contents[len(contents) - 1])
return ina
def Merge(smi_file, seq_file):
smile = j_read(smi_file.name)
smile = smile.strip("\n")
# 读取去掉换行后的文件
contents = j_reads(seq_file.name)
name = Name()
name = name + r"kcat_input.tsv" # 结果文件名称
with open(name, "a") as f3:
f3.write("Substrate Name Substrate SMILES Protein Sequence")
f3.write("\n")
for i in range(0, len(contents)):
if i % 2 == 1:
with open(name, "a") as f3:
# 写入索引
f3.write(">seq" + str(int((i - 1) / 2)))
f3.write("\t")
# 写入smile名称
f3.write(smile)
f3.write("\t")
# 写入序列
f3.write(contents[i])
return name
def Merge_All(smi_file, seq_file):
smile = j_read(smi_file.name)
smile = smile.strip("\n")
# 读取去掉换行后的文件
contents = j_reads(seq_file.name)
name = Name()
name = name + r"kcat_input.tsv" # 结果文件名称
with open(name, "a") as f3:
f3.write("Substrate Name Substrate SMILES Protein Sequence")
f3.write("\n")
for i in range(0, len(contents)):
if i % 2 == 1:
with open(name, "a") as f3:
# 写入索引
# f3.write(">seq" + str(int((i - 1) / 2)))
info = re.sub(' ', '_', contents[i - 1])
info = re.sub('\n', '', info)
f3.write(info)
f3.write("\t")
# 写入smile名称
f3.write(smile)
f3.write("\t")
# 写入序列
f3.write(contents[i])
return name