File size: 2,365 Bytes
a41df2a
 
a0303d8
8c122e9
a0303d8
a41df2a
a0303d8
8c122e9
a0303d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8c122e9
a0303d8
 
a41df2a
8c122e9
a0303d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a41df2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import re

from tname import *
from Rfile import *


def Strip(seq_file):
    contents = j_reads(seq_file.name)
    ina = Name()
    ina = ina + r"input.tsv"  # 结果文件名称

    # 去除序列文件中的空格,并写入新的文件中
    for i in range(0, len(contents) - 1):
        if contents[i][0] != '>' and contents[i + 1][0] != '>':
            content = contents[i].split()
            content = content[0]
        else:
            content = contents[i]
        with open(ina, "a") as f:
            f.write(content)
    # 最后一行特殊,单独写入
    with open(ina, "a") as f:
        f.write(contents[len(contents) - 1])
    return ina


def Merge(smi_file, seq_file):
    smile = j_read(smi_file.name)
    smile = smile.strip("\n")

    # 读取去掉换行后的文件
    contents = j_reads(seq_file.name)

    name = Name()
    name = name + r"kcat_input.tsv"  # 结果文件名称

    with open(name, "a") as f3:
        f3.write("Substrate Name	Substrate SMILES	Protein Sequence")
        f3.write("\n")

    for i in range(0, len(contents)):
        if i % 2 == 1:
            with open(name, "a") as f3:
                # 写入索引
                f3.write(">seq" + str(int((i - 1) / 2)))
                f3.write("\t")
                # 写入smile名称
                f3.write(smile)
                f3.write("\t")
                # 写入序列
                f3.write(contents[i])
    return name


def Merge_All(smi_file, seq_file):
    smile = j_read(smi_file.name)
    smile = smile.strip("\n")

    # 读取去掉换行后的文件
    contents = j_reads(seq_file.name)

    name = Name()
    name = name + r"kcat_input.tsv"  # 结果文件名称

    with open(name, "a") as f3:
        f3.write("Substrate Name	Substrate SMILES	Protein Sequence")
        f3.write("\n")

    for i in range(0, len(contents)):
        if i % 2 == 1:
            with open(name, "a") as f3:
                # 写入索引
                # f3.write(">seq" + str(int((i - 1) / 2)))
                info = re.sub(' ', '_', contents[i - 1])
                info = re.sub('\n', '', info)
                f3.write(info)
                f3.write("\t")
                # 写入smile名称
                f3.write(smile)
                f3.write("\t")
                # 写入序列
                f3.write(contents[i])
    return name