""" Helper function """ def get_sequence_example(example): print(example) if example.startswith("Amino Acid Long Sequence"): return """\ >CCP45025.1|FEATURES|hmdarg|isoniazid|kasA GARAGVMTPVSACSSGSEAIAHAWRQIVMGDADVAVCGGVEGPIEALPIAAFSMMRAMST RNDEPERASRPFDKDRDGFVFGEAGALMLIETEEHAKARGAKPLARLLGAGITSDAFHMV APAADGVRAGRAMTRSLELAGLSPADIDHVNAHGTATPIGDAAEANAIRVAGCDQAAVYA PKSALGHSIGAVGALESVLTVLTLRDGVIPPTLNYETPDPEIDLDVVAGEPRYGDYRYAV NNSFGFGGHN >gi:505065763:ref:WP_015252865.1:|FEATURES|deeparg|tunicamycin|tmrB GSFGSGKTQTAFELHRRLNPSYVYDPEKMGFALRSMVPQEIAKDDFQSYPLWRAFNYSLL""" elif example.startswith("Amino Acid Short Sequence"): return """\ >gi:505065763:ref:WP_015252865.1:|FEATURES|deeparg|tunicamycin|tmrB_0 RGIIIVPMTIVYPEYFNEIIGRLRQEGRIV >AGQ48857.1|FEATURES|hmdarg|pleuromutilin|eatAv_0 GNFSIYEEQKKLRDEFEMAQNEKLKKEVSR""" elif example.startswith("Nucleotide Long Sequence"): return """\ >AJ635405|FEATURES|resfinder|beta-lactam|blaLEN9 GGATGGTGGAAATGGATCTGGCCAGCGGCCGCACGCTGGCCGCCTGGCGCGCCGATGAACGCTTTCCCATGGTGAGCACCTTTAAAGTGCTGCTGTGCGGCGCGGTGCTGGCGCGGGTGGATGCCGGGCTCGAACAACTGGATCGGCGGATCCACTACCGCCAGCAGGATCTGGTGGACTACTCCCCGGTCAGCGAAAAACACCTTGTCGACGGGATGACGATCGGCGAACTCTGTGCCGCCGCCATCACCCTGAGCGATAACAGCGCTGGCAATCTGCTGCTGGCCACCGTCGGCGGCCCCGCGGGATTAACTGCCTTTCTGCGCCAGATCGGTGACAACGTCACCCGTCTTGACCGCTGGGAAACGGCACTGAATGAGGCGCTTCCCGGCGACGCGCGCGACACCACCACCCCGGCCAGCATGGCCGCCACGCTGCGCAAACTACTGACCGCGCAGCATCTGAGCGCCCGT""" elif example.startswith("Nucleotide Short Sequence"): return """\ >S60108|FEATURES|resfinder|aminoglycoside|kgmB_0 CCGCACCCGGCTCCCGGACCCGGCGATCCCGAGGACCCGAGGCTGGCGGAGGTCGTCGACGCGGTCCGGTCCAGCAGGCGCTACCAGAGCGTCGCGCCCG >APOK01000044|FEATURES|resfinder|beta-lactam|blaOXA-290_0 ACATATGATGGGCAAACATTTCAAGAATATGGCAATGCGTTGAGTCGATCGAATACGGCTTATATTCCAGCCTCAACCTTCAAGATGTTAAATGCTCTGA""" def classify_sequence(sequence): nucleotide_chars = set("ATGC-") if all(char in nucleotide_chars for char in sequence): return "nt" # "Nucleotide" else: return "aa" # "Amino Acid def count_length_sequences(sequence): count_length_sequences = [] for i in sequence: count_length_sequences.append(len(i)) max_sequence_length = max(count_length_sequences) return max_sequence_length def classify_sequence_type_length(sequence): sequence_type = classify_sequence(sequence[0]) sequence_length = count_length_sequences(sequence) if sequence_type == "nt": # nucleotide if sequence_length > 150: sequence_length_type = "l" # "long" else: sequence_length_type = "s" # "short" else: # amino acid if sequence_length > 50 : sequence_length_type = "l" else: sequence_length_type = "s" return (sequence_type, sequence_length_type)