Spaces:
Sleeping
Sleeping
""" | |
Helper function | |
""" | |
def get_sequence_example(example): | |
print(example) | |
if example.startswith("Amino Acid Long Sequence"): | |
return """\ | |
>CCP45025.1|FEATURES|hmdarg|isoniazid|kasA | |
GARAGVMTPVSACSSGSEAIAHAWRQIVMGDADVAVCGGVEGPIEALPIAAFSMMRAMST | |
RNDEPERASRPFDKDRDGFVFGEAGALMLIETEEHAKARGAKPLARLLGAGITSDAFHMV | |
APAADGVRAGRAMTRSLELAGLSPADIDHVNAHGTATPIGDAAEANAIRVAGCDQAAVYA | |
PKSALGHSIGAVGALESVLTVLTLRDGVIPPTLNYETPDPEIDLDVVAGEPRYGDYRYAV | |
NNSFGFGGHN | |
>gi:505065763:ref:WP_015252865.1:|FEATURES|deeparg|tunicamycin|tmrB | |
GSFGSGKTQTAFELHRRLNPSYVYDPEKMGFALRSMVPQEIAKDDFQSYPLWRAFNYSLL""" | |
elif example.startswith("Amino Acid Short Sequence"): | |
return """\ | |
>gi:505065763:ref:WP_015252865.1:|FEATURES|deeparg|tunicamycin|tmrB_0 | |
RGIIIVPMTIVYPEYFNEIIGRLRQEGRIV | |
>AGQ48857.1|FEATURES|hmdarg|pleuromutilin|eatAv_0 | |
GNFSIYEEQKKLRDEFEMAQNEKLKKEVSR""" | |
elif example.startswith("Nucleotide Long Sequence"): | |
return """\ | |
>AJ635405|FEATURES|resfinder|beta-lactam|blaLEN9 | |
GGATGGTGGAAATGGATCTGGCCAGCGGCCGCACGCTGGCCGCCTGGCGCGCCGATGAACGCTTTCCCATGGTGAGCACCTTTAAAGTGCTGCTGTGCGGCGCGGTGCTGGCGCGGGTGGATGCCGGGCTCGAACAACTGGATCGGCGGATCCACTACCGCCAGCAGGATCTGGTGGACTACTCCCCGGTCAGCGAAAAACACCTTGTCGACGGGATGACGATCGGCGAACTCTGTGCCGCCGCCATCACCCTGAGCGATAACAGCGCTGGCAATCTGCTGCTGGCCACCGTCGGCGGCCCCGCGGGATTAACTGCCTTTCTGCGCCAGATCGGTGACAACGTCACCCGTCTTGACCGCTGGGAAACGGCACTGAATGAGGCGCTTCCCGGCGACGCGCGCGACACCACCACCCCGGCCAGCATGGCCGCCACGCTGCGCAAACTACTGACCGCGCAGCATCTGAGCGCCCGT""" | |
elif example.startswith("Nucleotide Short Sequence"): | |
return """\ | |
>S60108|FEATURES|resfinder|aminoglycoside|kgmB_0 | |
CCGCACCCGGCTCCCGGACCCGGCGATCCCGAGGACCCGAGGCTGGCGGAGGTCGTCGACGCGGTCCGGTCCAGCAGGCGCTACCAGAGCGTCGCGCCCG | |
>APOK01000044|FEATURES|resfinder|beta-lactam|blaOXA-290_0 | |
ACATATGATGGGCAAACATTTCAAGAATATGGCAATGCGTTGAGTCGATCGAATACGGCTTATATTCCAGCCTCAACCTTCAAGATGTTAAATGCTCTGA""" | |
def classify_sequence(sequence): | |
nucleotide_chars = set("ATGC-") | |
if all(char in nucleotide_chars for char in sequence): | |
return "nt" # "Nucleotide" | |
else: | |
return "aa" # "Amino Acid | |
def count_length_sequences(sequence): | |
count_length_sequences = [] | |
for i in sequence: | |
count_length_sequences.append(len(i)) | |
max_sequence_length = max(count_length_sequences) | |
return max_sequence_length | |
def classify_sequence_type_length(sequence): | |
sequence_type = classify_sequence(sequence[0]) | |
sequence_length = count_length_sequences(sequence) | |
if sequence_type == "nt": # nucleotide | |
if sequence_length > 150: | |
sequence_length_type = "l" # "long" | |
else: | |
sequence_length_type = "s" # "short" | |
else: # amino acid | |
if sequence_length > 50 : | |
sequence_length_type = "l" | |
else: | |
sequence_length_type = "s" | |
return (sequence_type, sequence_length_type) | |