"""Gradio demo for schemist.""" from typing import Iterable, List, Union from io import TextIOWrapper import os os.environ["COMMANDLINE_ARGS"] = "--no-gradio-queue" from carabiner import cast, print_err from carabiner.pd import read_table import gradio as gr import nemony as nm import numpy as np import pandas as pd from rdkit.Chem import Draw, Mol import schemist as sch from schemist.converting import ( _TO_FUNCTIONS, _FROM_FUNCTIONS, convert_string_representation, _x2mol, ) from schemist.tables import converter def load_input_data(file: TextIOWrapper) -> pd.DataFrame: df = read_table(file.name) string_cols = list(df.select_dtypes(exclude=[np.number])) df = gr.Dataframe(value=df, visible=True) return df, gr.Dropdown(choices=string_cols, interactive=True) def _clean_split_input(strings: str) -> List[str]: return [s2.strip() for s in strings.split("\n") for s2 in s.split(",")] def _convert_input( strings: str, input_representation: str = 'smiles', output_representation: Union[Iterable[str], str] = 'smiles' ) -> List[str]: strings = _clean_split_input(strings) converted = convert_string_representation( strings=strings, input_representation=input_representation, output_representation=output_representation, ) return {key: list(map(str, cast(val, to=list))) for key, val in converted.items()} def convert_one( strings: str, input_representation: str = 'smiles', output_representation: Union[Iterable[str], str] = 'smiles' ): df = pd.DataFrame({ input_representation: _clean_split_input(strings), }) return gr.DataFrame( convert_file( df=df, column=input_representation, input_representation=input_representation, output_representation=output_representation, ), visible=True ) def convert_file( df: pd.DataFrame, column: str = 'smiles', input_representation: str = 'smiles', output_representation: Union[str, Iterable[str]] = 'smiles' ): message = f"Converting from {input_representation} to {output_representation}..." print_err(message) gr.Info(message, duration=3) errors, df = converter( df=df, column=column, input_representation=input_representation, output_representation=output_representation, ) df = df[ cast(output_representation, to=list) + [col for col in df if col not in output_representation] ] all_err = sum(err for key, err in errors.items()) message = ( f"Converted {df.shape[0]} molecules from " f"{input_representation} to {output_representation} " f"with {all_err} errors!" ) print_err(message) gr.Info(message, duration=5) return df def draw_one( strings: Union[Iterable[str], str], input_representation: str = 'smiles' ): _ids = _convert_input( strings, input_representation, ["inchikey", "id"], ) mols = cast(_x2mol(_clean_split_input(strings), input_representation), to=list) if isinstance(mols, Mol): mols = [mols] return Draw.MolsToGridImage( mols, molsPerRow=min(3, len(mols)), subImgSize=(300, 300), legends=["\n".join(items) for items in zip(*_ids.values())], ) def download_table( df: pd.DataFrame ) -> str: df_hash = nm.hash(pd.util.hash_pandas_object(df).values) filename = f"converted-{df_hash}.csv" df.to_csv(filename, index=False) return gr.DownloadButton(value=filename, visible=True) with gr.Blocks() as demo: gr.Markdown( """ # Chemical string format converter """ ) with gr.Tab(label="Paste one per line"): input_format_single = gr.Dropdown( label="Input string format", choices=list(_FROM_FUNCTIONS), value="smiles", interactive=True, ) input_line = gr.Textbox( label="Input", placeholder="Paste your molecule here, one per line", lines=2, interactive=True, submit_btn=True, ) output_format_single = gr.CheckboxGroup( label="Output format", choices=list(_TO_FUNCTIONS), value=["id", "pubchem_name"], interactive=True, ) download_single = gr.DownloadButton( label="Download converted data", visible=False, ) with gr.Row(): output_line = gr.DataFrame( label="Converted", interactive=False, visible=False, ) drawing = gr.Image(label="Chemical structures") gr.on( [ # go_button.click, input_line.submit, ], fn=convert_one, inputs=[ input_line, input_format_single, output_format_single, ], outputs={ output_line, } ).then( draw_one, inputs=[ input_line, input_format_single, ], outputs=drawing, ).then( download_table, inputs=output_line, outputs=download_single ) with gr.Tab("Convert a file"): input_file = gr.File( label="Upload a table of chemical compounds here", file_types=[".xlsx", ".csv", ".tsv", ".txt"], ) with gr.Row(): input_column = gr.Dropdown( label="Input column name", choices=[], ) input_format = gr.Dropdown( label="Input string format", choices=list(_FROM_FUNCTIONS), value="smiles", interactive=True, ) output_format = gr.CheckboxGroup( label="Output format", choices=list(_TO_FUNCTIONS), value=["id", "selfies"], interactive=True, ) go_button2 = gr.Button( value="Convert molecules!", ) download = gr.DownloadButton( label="Download converted data", visible=False, ) input_data = gr.Dataframe( label="Input data", max_height=100, visible=False, interactive=False, ) input_file.upload( load_input_data, inputs=[input_file], outputs=[input_data, input_column] ) go_button2.click( convert_file, inputs=[ input_data, input_column, input_format, output_format, ], outputs={ input_data, } ).then( download_table, inputs=input_data, outputs=download ) if __name__ == "__main__": demo.queue() demo.launch(share=True)