Upload Preinput_Merge.py
Browse files- Preinput_Merge.py +55 -0
Preinput_Merge.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from tname import *
|
3 |
+
|
4 |
+
|
5 |
+
def Strip(seq_file):
|
6 |
+
with open(seq_file.name, "r") as f:
|
7 |
+
contents = f.readlines()
|
8 |
+
ina = Name()
|
9 |
+
ina = ina + r"input.tsv" # 结果文件名称
|
10 |
+
|
11 |
+
# 去除序列文件中的空格,并写入新的文件中
|
12 |
+
for i in range(0, len(contents) - 1):
|
13 |
+
if contents[i][0] != '>' and contents[i + 1][0] != '>':
|
14 |
+
content = contents[i].split()
|
15 |
+
content = content[0]
|
16 |
+
else:
|
17 |
+
content = contents[i]
|
18 |
+
with open(ina, "a") as f:
|
19 |
+
f.write(content)
|
20 |
+
# 最后一行特殊,单独写入
|
21 |
+
with open(ina, "a") as f:
|
22 |
+
f.write(contents[len(contents) - 1])
|
23 |
+
return ina
|
24 |
+
|
25 |
+
|
26 |
+
def Merge(smi_file, seq_file):
|
27 |
+
with open(smi_file.name, "r") as f1:
|
28 |
+
smile = f1.readline()
|
29 |
+
smile = smile.strip("\n")
|
30 |
+
|
31 |
+
# 读取去掉空格后的文件
|
32 |
+
with open(seq_file.name, "r") as f:
|
33 |
+
contents = f.readlines()
|
34 |
+
|
35 |
+
name = Name()
|
36 |
+
name = name + r"kcat_input.tsv" # 结果文件名称
|
37 |
+
|
38 |
+
with open(name, "a") as f3:
|
39 |
+
f3.write("Substrate Name Substrate SMILES Protein Sequence")
|
40 |
+
f3.write("\n")
|
41 |
+
|
42 |
+
for i in range(0, len(contents)):
|
43 |
+
if i % 2 == 1:
|
44 |
+
with open(name, "a") as f3:
|
45 |
+
# 写入索引
|
46 |
+
# content = contents[i - 1].split()
|
47 |
+
# f3.write(content[0])
|
48 |
+
f3.write(">seq" + str(int((i - 1) / 2)))
|
49 |
+
f3.write("\t")
|
50 |
+
# 写入smile名称
|
51 |
+
f3.write(smile)
|
52 |
+
f3.write("\t")
|
53 |
+
# 写入序列
|
54 |
+
f3.write(contents[i])
|
55 |
+
return name
|