Spaces:

omri374
/

med-copilot

Running

App Files Files Community

initial commit

by presidio - opened 5 days ago

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+430

-14

Files changed (5) hide show

README.md +33 -14
llm_calls.py +128 -0
med_streamlit.py +262 -0
requirements.txt +7 -0
rp_logo.jpg +0 -0

README.md CHANGED Viewed

@@ -1,14 +1,33 @@
----
-title: Med Copilot
-emoji: 🐢
-colorFrom: indigo
-colorTo: red
-sdk: streamlit
-sdk_version: 1.42.2
-app_file: app.py
-pinned: false
-license: mit
-short_description: Medication Research CoPilot using Perplexity.AI and OpenAI
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# Medication research CoPilot
+## Introduction
+This project is a web application that allows users to perform an AI based analysis of medications.
+Users start with either a list of medications, or a dataset from a previous interaction with the tool,
+can ask questions, modify, delete or add columns and rows, and download the updated dataset.
+## Installation
+To install the project, clone the repository and run the following command:
+```bash
+conda create --name med-copilot python=3.10
+conda activate med-copilot
+pip install -r requirements.txt
+```
+## Running the application
+To run the application, run the following command:
+```bash
+streamlit run med_streamlit.py
+```
+### Using the application
+1. Upload a dataset with a list of medications. The dataset should be in an Excel file with a sheet called "Data". If you are continuing the work from a previous session, upload the data that was downloaded on the last interaction.
+2. Define the AI service to use- Perplexity or OpenAI.
+3. Input the API key for the service. For Perplexity, see [here](https://docs.perplexity.ai/guides/getting-started). For OpenAI, see [here](https://platform.openai.com/api-keys).
+4. Input the prompt for the AI service. See below for more details.
+5. Inspect the dataset, explanations and references to make sure the responses are correct.
+6. Download the updated dataset by clicking on the "Download" button.
+## Prompt
+Note that the default system prompt can be found [here](med_streamlit.py).
+Consider modifying the prompt to better suit your needs, for example for a specific disease or condition.

llm_calls.py ADDED Viewed

	@@ -0,0 +1,128 @@

+from typing import Dict, List
+from openai import OpenAI
+import requests
+import json
+import simplejson
+from pydantic import BaseModel
+class AnswerFormat(BaseModel):
+    dataset: List[Dict]
+    explanations: str
+    references: str
+def query_perplexity(
+        system_prompt: str,
+        user_prompt: str,
+        json_data: str,
+        api_key: str,
+        url="https://api.perplexity.ai/chat/completions",
+        model="sonar-pro",
+):
+    """Query Perplexity AI API for a response.
+    Args:
+        system_prompt (str): System message providing AI context.
+        user_prompt (str): User's query.
+        json_data (str): JSON data representing the current dataset.
+        api_key (str): Perplexity AI API key.
+        url (str): API endpoint.
+        model (str): Perplexity AI model to use.
+        max_tokens (int): Maximum number of tokens in the response.
+        temperature (float): Sampling temperature for randomness.
+        top_p (float): Nucleus sampling parameter.
+        top_k (int): Top-k filtering.
+        presence_penalty (float): Encourages new token diversity.
+        frequency_penalty (float): Penalizes frequent tokens.
+        return_images (bool): Whether to include images in response.
+        return_related_questions (bool): Whether to include related questions.
+        search_domain_filter (str or None): Domain filter for web search.
+        search_recency_filter (str or None): Recency filter for web search.
+        stream (bool): Whether to stream response.
+    Returns:
+        str: Parsed JSON response from Perplexity AI API.
+    """
+    payload = {
+        "model": model,
+        "messages": [
+            {"role": "system", "content": f"{system_prompt}\n"
+                                          f"Make sure you add the citations found to the references key"},
+            {"role": "user", "content": f"Here is the dataset: {json_data}\n\n"
+                                        f"User query:\n"
+                                        f"{user_prompt}"},
+        ],
+        "response_format": {
+		    "type": "json_schema",
+        "json_schema": {"schema": AnswerFormat.model_json_schema()},
+    },
+    }
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json",
+    }
+    response = requests.post(url, json=payload, headers=headers)
+    if response.status_code == 200:
+        response_json = response.json()
+        return response_json["choices"][0]["message"]["content"]
+    else:
+        return f"API request failed with status code {response.status_code}, details: {response.text}"
+def query_openai(system_prompt: str, user_prompt: str, json_data: str, openai_client: OpenAI) -> str:
+    """Query OpenAI API for a response.
+    Args:
+        system_prompt (str): System prompt providing context to the AI.
+        user_prompt (str): User's query.
+        json_data (str): JSON data representing the current dataset.
+        openai_client (OpenAI): OpenAI client instance with API key set.
+    Returns:
+        str: JSON response from the API.
+    """
+    response = openai_client.chat.completions.create(
+        model="gpt-4-turbo",
+        messages=[
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": f"Here is the dataset: {json_data}"},
+            {"role": "user", "content": user_prompt},
+        ],
+        response_format={"type": "json_object"},
+    )
+    if len(response.choices) > 0:
+        content = response.choices[0].message.content
+        return content
+    else:
+        return "Bad response from OpenAI"
+def validate_llm_response(response: str) -> dict:
+    # extract dict from json
+    try:
+        return json.loads(response)
+    except json.JSONDecodeError:
+        try:
+            return simplejson.loads(response)  # More forgiving JSON parser
+        except simplejson.JSONDecodeError:
+            return None  # JSON is too broken to fix
+    # Validate expected keys
+    required_keys = {"dataset", "explanation", "references"}
+    if not required_keys.issubset(response.keys()):
+        raise ValueError(f"Missing required keys: {required_keys - response.keys()}")
+    return response  # Return as a structured dictionary

med_streamlit.py ADDED Viewed

	@@ -0,0 +1,262 @@

+import os
+import json
+import io
+from typing import Dict, List
+import pandas as pd
+import streamlit as st
+from dotenv import load_dotenv
+from openai import OpenAI
+import llm_calls
+from llm_calls import validate_llm_response
+# Load environment variables
+load_dotenv()
+CONDITION_NAME = "Retinitis Pigmentosa (RP)"
+SYSTEM_PROMPT = f"""
+You are a medical assistant specialized in modifying structured medical data.
+You will receive JSON input representing a dataset of medications for {CONDITION_NAME}.
+Your task is to:
+- Answer user requests about the provided medication data
+- Either Add new columns or rows if requested, or modify existing ones
+- Provide references, explanations and additional remarks
+Always return only a JSON object with:
+- "dataset": updated dataset
+- "explanation": explanation of changes and additional information related to the findings.
+Specify the change made for each medication
+- "references": References for findings, i.e. links to scientific papers or websites.
+Specify which reference relates to which finding on each medication.
+Additional guidelines:
+1. Please respond in valid JSON format only.
+2. Make sure the JSON is valid, e.g. has no unterminated strings or missing commas.
+3. Ensure the response starts with `{{` and ends with `}}` without any trailing text.
+"""
+def update_dataframe(records: List[Dict] | pd.DataFrame):
+    """Update the DataFrame with new records. """
+    print(f"UPDATING DATAFRAME: {records}")
+    if isinstance(records, pd.DataFrame):
+        new_data = records
+    else:
+        new_data = pd.DataFrame(records)
+    st.session_state.df = new_data  # Assign the updated DataFrame
+    #st.rerun()  # Trigger a rerun
+# Page config
+st.set_page_config(layout="wide", page_title="RP Medication Analyzer")
+col1, col2 = st.columns([2, 18])
+col1.image("rp_logo.jpg", use_container_width=True)
+col2.title("Analyze RP Related Medications")
+# Sidebar for API Key settings
+with st.sidebar:
+    st.subheader("Select AI service")
+    llm_provider = st.radio(options=["Perplexity.ai", "OpenAI"], index=0, label="API")
+    api_key = None  # Initialize API key
+    if llm_provider == "OpenAI":
+        st.subheader("OpenAI API key")
+        api_base_input = st.text_input(
+            "Enter API Base (Leave empty to use env variable)",
+            value=os.environ.get("OPENAI_API_BASE", ""),
+        )
+        api_key_input = st.text_input(
+            "Enter API Key",
+            type="password",
+            value=os.environ.get("OPENAI_API_KEY", ""),
+        )
+        openai_api_base = api_base_input if api_base_input else os.environ.get("OPENAI_API_BASE")
+        api_key = api_key_input if api_key_input else os.environ.get("OPENAI_API_KEY")
+        # Validate API key presence
+        if not api_key:
+            st.error("🚨 OpenAI API key is required!")
+        openai_client = OpenAI(api_key=api_key)
+        openai_client.api_base = openai_api_base
+    elif llm_provider == "Perplexity.ai":
+        st.subheader("Perplexity.ai API key")
+        api_key_input = st.text_input(
+            "Enter API Key",
+            type="password",
+            value=os.environ.get("PERPLEXITY_API_KEY", ""),
+        )
+        api_key = api_key_input if api_key_input else os.environ.get("PERPLEXITY_API_KEY")
+        # Validate API key presence
+        if not api_key:
+            st.error("🚨 Perplexity.ai API key is required!")
+# Ensure session persistence
+if "df" not in st.session_state:
+    st.session_state.df = None
+if "uploaded_file" not in st.session_state:
+    st.session_state.uploaded_file = None
+if "explanation" not in st.session_state:
+    st.session_state.explanation = "No modifications yet."
+if "references" not in st.session_state:
+    st.session_state.references = "No additional references."
+if "last_prompt" not in st.session_state:
+    st.session_state.last_prompt = ""
+if "last_response" not in st.session_state:
+    st.session_state.last_response = {}
+if "history" not in st.session_state:
+    st.session_state.history = []  # Stores all past interactions
+# File uploader
+file = st.file_uploader("Upload an Excel file", type=["xlsx"])
+print(f"FILE: {file}")
+if file and file != st.session_state.uploaded_file:
+    try:
+        with pd.ExcelFile(file) as xls:
+            if "Metadata" in xls.sheet_names:
+                st.session_state.history = pd.read_excel(xls, sheet_name="Metadata").to_dict(orient="records")
+            if "Data" in xls.sheet_names:
+                data_df = pd.read_excel(xls, sheet_name="Data")
+                update_dataframe(data_df)
+            else:
+                st.error("🚨 No 'Data' sheet found in the uploaded file. Make sure the file has it")
+        st.session_state.uploaded_file = file
+        print("File uploaded successfully!")
+        st.success("✅ File uploaded successfully!")
+    except Exception as e:
+        print(f"Error reading file: {e}")
+        st.error(f"🚨 Error reading file: {e}")
+if st.session_state.df is not None:
+    st.write("### Updated Dataset")
+    st.dataframe(st.session_state.df, use_container_width=True)
+else:
+    st.warning("⚠️ Upload a file to proceed.")
+# Explanation & remarks
+if st.session_state.explanation:
+    with st.expander("Explanation and remarks"):
+        st.info(st.session_state.explanation)
+if st.session_state.references:
+    with st.expander("References"):
+        st.warning(st.session_state.references)
+if st.session_state.last_prompt:
+    with st.expander("📜 Sent Prompt"):
+        st.code(st.session_state.last_prompt, language="plaintext")
+# if st.session_state.last_response:
+#     with st.expander("🧠 LLM Response (Raw)"):
+#         st.json(st.session_state.last_response)
+# User query input
+input_text = st.chat_input("Type your prompt here")
+# 🚨 Validate: Ensure both API key and dataset are present before making an API call
+if input_text:
+    if not api_key:
+        st.error("🚨 API key is missing! Please provide a valid key before proceeding.")
+    elif st.session_state.df is None:
+        st.error("🚨 No dataset uploaded! Please upload an Excel file.")
+    else:
+        # Convert dataframe to JSON for LLM processing
+        json_data = st.session_state.df.to_json(orient="records")
+        print(json_data)
+        with st.spinner(f"Processing request: *{input_text}*..."):
+            response = None  # Ensure response is defined before use
+            # Call the appropriate LLM provider
+            if llm_provider == "OpenAI":
+                response = llm_calls.query_openai(
+                    system_prompt=SYSTEM_PROMPT,
+                    user_prompt=input_text,
+                    json_data=json_data,
+                    openai_client=openai_client,
+                )
+            elif llm_provider == "Perplexity.ai":
+                response = llm_calls.query_perplexity(
+                    system_prompt=SYSTEM_PROMPT,
+                    user_prompt=input_text,
+                    json_data=json_data,
+                    api_key=api_key,
+                )
+            print(f"Response:{response}")
+        # Ensure response exists before processing
+        if response:
+            st.session_state.df = None
+            try:
+                parsed_response = validate_llm_response(response)
+                print(f"Parsed response: {parsed_response}")
+                st.session_state.last_prompt = input_text
+                st.session_state.last_response = response  # Keep full JSON response
+                # Display structured output
+                if "error" in parsed_response:
+                    st.error(parsed_response["error"])
+                else:
+                    print(f"Parsed data: {parsed_response['dataset']}")
+                    update_dataframe(parsed_response["dataset"])
+                    st.session_state.explanation = parsed_response["explanation"]
+                    st.session_state.references = parsed_response["references"]
+                    st.session_state.history.append({
+                        "Prompt": input_text,
+                        "Explanation": parsed_response["explanation"],
+                        "References": parsed_response["references"]
+                    })
+            except json.JSONDecodeError:
+                st.error("🚨 Error parsing response: Invalid JSON format.")
+            except Exception as e:
+                st.error(f"🚨 Unexpected error: {e}")
+            st.rerun()
+# 📥 Download Updated Excel
+if st.session_state.df is not None:
+    st.sidebar.subheader("Download Updated Dataset")
+    def generate_excel(dataframe, history):
+        output_stream = io.BytesIO()
+        with pd.ExcelWriter(output_stream, engine="xlsxwriter") as writer:
+            dataframe.to_excel(writer, index=False, sheet_name="Data")
+            # Convert history to DataFrame and save in a new sheet
+            if history:
+                history_df = pd.DataFrame(history)
+                history_df.to_excel(writer, index=False, sheet_name="Metadata")
+            workbook = writer.book
+            # Apply word wrapping
+            for sheet_name in ["Data", "Metadata"]:
+                if sheet_name in writer.sheets:
+                    worksheet = writer.sheets[sheet_name]
+                    wrap_format = workbook.add_format({"text_wrap": True, "align": "top", "valign": "top"})
+                    # Apply word wrap to all columns
+                    df_to_format = dataframe if sheet_name == "Data" else history_df
+                    for col_num, col_name in enumerate(df_to_format.columns):
+                        worksheet.set_column(col_num, col_num, 30, wrap_format)  # Adjust width if needed
+        output_stream.seek(0)
+        return output_stream
+    st.sidebar.download_button(
+        "📥 Download Excel File",
+        data=generate_excel(st.session_state.df, st.session_state.history),
+        file_name="updated_dataset.xlsx",
+        mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+pandasai
+streamlit
+openai
+openpyxl
+xlsxwriter
+pydantic
+simplejson

rp_logo.jpg ADDED Viewed