chem-converter / app.py
eachanjohnson
Sat Oct 12 18:02:57 UTC 2024 :: HF Spaces deployment
05fa320
"""Gradio demo for schemist."""
from typing import Iterable, List, Union
from io import TextIOWrapper
import os
os.environ["COMMANDLINE_ARGS"] = "--no-gradio-queue"
from carabiner import cast, print_err
from carabiner.pd import read_table
import gradio as gr
import nemony as nm
import numpy as np
import pandas as pd
from rdkit.Chem import Draw, Mol
import schemist as sch
from schemist.converting import (
_TO_FUNCTIONS,
_FROM_FUNCTIONS,
convert_string_representation,
_x2mol,
)
from schemist.tables import converter
def load_input_data(file: TextIOWrapper) -> pd.DataFrame:
df = read_table(file.name)
string_cols = list(df.select_dtypes(exclude=[np.number]))
df = gr.Dataframe(value=df, visible=True)
return df, gr.Dropdown(choices=string_cols, interactive=True)
def _clean_split_input(strings: str) -> List[str]:
return [s2.strip() for s in strings.split("\n") for s2 in s.split(",")]
def _convert_input(
strings: str,
input_representation: str = 'smiles',
output_representation: Union[Iterable[str], str] = 'smiles'
) -> List[str]:
strings = _clean_split_input(strings)
converted = convert_string_representation(
strings=strings,
input_representation=input_representation,
output_representation=output_representation,
)
return {key: list(map(str, cast(val, to=list))) for key, val in converted.items()}
def convert_one(
strings: str,
input_representation: str = 'smiles',
output_representation: Union[Iterable[str], str] = 'smiles'
):
df = pd.DataFrame({
input_representation: _clean_split_input(strings),
})
return gr.DataFrame(
convert_file(
df=df,
column=input_representation,
input_representation=input_representation,
output_representation=output_representation,
),
visible=True
)
def convert_file(
df: pd.DataFrame,
column: str = 'smiles',
input_representation: str = 'smiles',
output_representation: Union[str, Iterable[str]] = 'smiles'
):
message = f"Converting from {input_representation} to {output_representation}..."
print_err(message)
gr.Info(message, duration=3)
errors, df = converter(
df=df,
column=column,
input_representation=input_representation,
output_representation=output_representation,
)
df = df[
cast(output_representation, to=list) +
[col for col in df if col not in output_representation]
]
all_err = sum(err for key, err in errors.items())
message = (
f"Converted {df.shape[0]} molecules from "
f"{input_representation} to {output_representation} "
f"with {all_err} errors!"
)
print_err(message)
gr.Info(message, duration=5)
return df
def draw_one(
strings: Union[Iterable[str], str],
input_representation: str = 'smiles'
):
_ids = _convert_input(
strings,
input_representation,
["inchikey", "id"],
)
mols = cast(_x2mol(_clean_split_input(strings), input_representation), to=list)
if isinstance(mols, Mol):
mols = [mols]
return Draw.MolsToGridImage(
mols,
molsPerRow=min(3, len(mols)),
subImgSize=(300, 300),
legends=["\n".join(items) for items in zip(*_ids.values())],
)
def download_table(
df: pd.DataFrame
) -> str:
df_hash = nm.hash(pd.util.hash_pandas_object(df).values)
filename = f"converted-{df_hash}.csv"
df.to_csv(filename, index=False)
return gr.DownloadButton(value=filename, visible=True)
with gr.Blocks() as demo:
gr.Markdown(
"""
# Chemical string format converter
"""
)
with gr.Tab(label="Paste one per line"):
input_format_single = gr.Dropdown(
label="Input string format",
choices=list(_FROM_FUNCTIONS),
value="smiles",
interactive=True,
)
input_line = gr.Textbox(
label="Input",
placeholder="Paste your molecule here, one per line",
lines=2,
interactive=True,
submit_btn=True,
)
output_format_single = gr.CheckboxGroup(
label="Output format",
choices=list(_TO_FUNCTIONS),
value=["id", "pubchem_name"],
interactive=True,
)
download_single = gr.DownloadButton(
label="Download converted data",
visible=False,
)
with gr.Row():
output_line = gr.DataFrame(
label="Converted",
interactive=False,
visible=False,
)
drawing = gr.Image(label="Chemical structures")
gr.on(
[
# go_button.click,
input_line.submit,
],
fn=convert_one,
inputs=[
input_line,
input_format_single,
output_format_single,
],
outputs={
output_line,
}
).then(
draw_one,
inputs=[
input_line,
input_format_single,
],
outputs=drawing,
).then(
download_table,
inputs=output_line,
outputs=download_single
)
with gr.Tab("Convert a file"):
input_file = gr.File(
label="Upload a table of chemical compounds here",
file_types=[".xlsx", ".csv", ".tsv", ".txt"],
)
with gr.Row():
input_column = gr.Dropdown(
label="Input column name",
choices=[],
)
input_format = gr.Dropdown(
label="Input string format",
choices=list(_FROM_FUNCTIONS),
value="smiles",
interactive=True,
)
output_format = gr.CheckboxGroup(
label="Output format",
choices=list(_TO_FUNCTIONS),
value=["id", "selfies"],
interactive=True,
)
go_button2 = gr.Button(
value="Convert molecules!",
)
download = gr.DownloadButton(
label="Download converted data",
visible=False,
)
input_data = gr.Dataframe(
label="Input data",
max_height=100,
visible=False,
interactive=False,
)
input_file.upload(
load_input_data,
inputs=[input_file],
outputs=[input_data, input_column]
)
go_button2.click(
convert_file,
inputs=[
input_data,
input_column,
input_format,
output_format,
],
outputs={
input_data,
}
).then(
download_table,
inputs=input_data,
outputs=download
)
if __name__ == "__main__":
demo.queue()
demo.launch(share=True)