File size: 2,365 Bytes
a41df2a a0303d8 8c122e9 a0303d8 a41df2a a0303d8 8c122e9 a0303d8 4537091 a0303d8 8c122e9 a0303d8 a41df2a 8c122e9 a0303d8 a41df2a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import re
from tname import *
from Rfile import *
def Strip(seq_file):
contents = j_reads(seq_file.name)
ina = Name()
ina = ina + r"input.tsv" # 结果文件名称
# 去除序列文件中的换行,并写入新的文件中
for i in range(0, len(contents) - 1):
if contents[i][0] != '>' and contents[i + 1][0] != '>':
content = contents[i].split()
content = content[0]
else:
content = contents[i]
with open(ina, "a") as f:
f.write(content)
# 最后一行特殊,单独写入
with open(ina, "a") as f:
f.write(contents[len(contents) - 1])
return ina
def Merge(smi_file, seq_file):
smile = j_read(smi_file.name)
smile = smile.strip("\n")
# 读取去掉换行后的文件
contents = j_reads(seq_file.name)
name = Name()
name = name + r"kcat_input.tsv" # 结果文件名称
with open(name, "a") as f3:
f3.write("Substrate Name Substrate SMILES Protein Sequence")
f3.write("\n")
for i in range(0, len(contents)):
if i % 2 == 1:
with open(name, "a") as f3:
# 写入索引
f3.write(">seq" + str(int((i - 1) / 2)))
f3.write("\t")
# 写入smile名称
f3.write(smile)
f3.write("\t")
# 写入序列
f3.write(contents[i])
return name
def Merge_All(smi_file, seq_file):
smile = j_read(smi_file.name)
smile = smile.strip("\n")
# 读取去掉换行后的文件
contents = j_reads(seq_file.name)
name = Name()
name = name + r"kcat_input.tsv" # 结果文件名称
with open(name, "a") as f3:
f3.write("Substrate Name Substrate SMILES Protein Sequence")
f3.write("\n")
for i in range(0, len(contents)):
if i % 2 == 1:
with open(name, "a") as f3:
# 写入索引
# f3.write(">seq" + str(int((i - 1) / 2)))
info = re.sub(' ', '_', contents[i - 1])
info = re.sub('\n', '', info)
f3.write(info)
f3.write("\t")
# 写入smile名称
f3.write(smile)
f3.write("\t")
# 写入序列
f3.write(contents[i])
return name
|