Files changed (5) hide show
  1. README.md +33 -14
  2. llm_calls.py +128 -0
  3. med_streamlit.py +262 -0
  4. requirements.txt +7 -0
  5. rp_logo.jpg +0 -0
README.md CHANGED
@@ -1,14 +1,33 @@
1
- ---
2
- title: Med Copilot
3
- emoji: 🐒
4
- colorFrom: indigo
5
- colorTo: red
6
- sdk: streamlit
7
- sdk_version: 1.42.2
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- short_description: Medication Research CoPilot using Perplexity.AI and OpenAI
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Medication research CoPilot
2
+
3
+ ## Introduction
4
+ This project is a web application that allows users to perform an AI based analysis of medications.
5
+ Users start with either a list of medications, or a dataset from a previous interaction with the tool,
6
+ can ask questions, modify, delete or add columns and rows, and download the updated dataset.
7
+
8
+ ## Installation
9
+ To install the project, clone the repository and run the following command:
10
+ ```bash
11
+ conda create --name med-copilot python=3.10
12
+ conda activate med-copilot
13
+ pip install -r requirements.txt
14
+ ```
15
+
16
+ ## Running the application
17
+ To run the application, run the following command:
18
+ ```bash
19
+ streamlit run med_streamlit.py
20
+ ```
21
+
22
+ ### Using the application
23
+ 1. Upload a dataset with a list of medications. The dataset should be in an Excel file with a sheet called "Data". If you are continuing the work from a previous session, upload the data that was downloaded on the last interaction.
24
+ 2. Define the AI service to use- Perplexity or OpenAI.
25
+ 3. Input the API key for the service. For Perplexity, see [here](https://docs.perplexity.ai/guides/getting-started). For OpenAI, see [here](https://platform.openai.com/api-keys).
26
+ 4. Input the prompt for the AI service. See below for more details.
27
+ 5. Inspect the dataset, explanations and references to make sure the responses are correct.
28
+ 6. Download the updated dataset by clicking on the "Download" button.
29
+
30
+ ## Prompt
31
+ Note that the default system prompt can be found [here](med_streamlit.py).
32
+ Consider modifying the prompt to better suit your needs, for example for a specific disease or condition.
33
+
llm_calls.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List
2
+
3
+ from openai import OpenAI
4
+
5
+ import requests
6
+ import json
7
+ import simplejson
8
+
9
+ from pydantic import BaseModel
10
+
11
+ class AnswerFormat(BaseModel):
12
+ dataset: List[Dict]
13
+ explanations: str
14
+ references: str
15
+
16
+
17
+ def query_perplexity(
18
+ system_prompt: str,
19
+ user_prompt: str,
20
+ json_data: str,
21
+ api_key: str,
22
+ url="https://api.perplexity.ai/chat/completions",
23
+ model="sonar-pro",
24
+ ):
25
+ """Query Perplexity AI API for a response.
26
+
27
+ Args:
28
+ system_prompt (str): System message providing AI context.
29
+ user_prompt (str): User's query.
30
+ json_data (str): JSON data representing the current dataset.
31
+ api_key (str): Perplexity AI API key.
32
+ url (str): API endpoint.
33
+ model (str): Perplexity AI model to use.
34
+ max_tokens (int): Maximum number of tokens in the response.
35
+ temperature (float): Sampling temperature for randomness.
36
+ top_p (float): Nucleus sampling parameter.
37
+ top_k (int): Top-k filtering.
38
+ presence_penalty (float): Encourages new token diversity.
39
+ frequency_penalty (float): Penalizes frequent tokens.
40
+ return_images (bool): Whether to include images in response.
41
+ return_related_questions (bool): Whether to include related questions.
42
+ search_domain_filter (str or None): Domain filter for web search.
43
+ search_recency_filter (str or None): Recency filter for web search.
44
+ stream (bool): Whether to stream response.
45
+
46
+ Returns:
47
+ str: Parsed JSON response from Perplexity AI API.
48
+ """
49
+
50
+ payload = {
51
+ "model": model,
52
+ "messages": [
53
+ {"role": "system", "content": f"{system_prompt}\n"
54
+ f"Make sure you add the citations found to the references key"},
55
+ {"role": "user", "content": f"Here is the dataset: {json_data}\n\n"
56
+ f"User query:\n"
57
+ f"{user_prompt}"},
58
+ ],
59
+ "response_format": {
60
+ "type": "json_schema",
61
+ "json_schema": {"schema": AnswerFormat.model_json_schema()},
62
+ },
63
+ }
64
+
65
+ headers = {
66
+ "Authorization": f"Bearer {api_key}",
67
+ "Content-Type": "application/json",
68
+ }
69
+
70
+ response = requests.post(url, json=payload, headers=headers)
71
+
72
+ if response.status_code == 200:
73
+ response_json = response.json()
74
+ return response_json["choices"][0]["message"]["content"]
75
+ else:
76
+ return f"API request failed with status code {response.status_code}, details: {response.text}"
77
+
78
+
79
+
80
+ def query_openai(system_prompt: str, user_prompt: str, json_data: str, openai_client: OpenAI) -> str:
81
+ """Query OpenAI API for a response.
82
+
83
+ Args:
84
+ system_prompt (str): System prompt providing context to the AI.
85
+ user_prompt (str): User's query.
86
+ json_data (str): JSON data representing the current dataset.
87
+ openai_client (OpenAI): OpenAI client instance with API key set.
88
+
89
+ Returns:
90
+ str: JSON response from the API.
91
+ """
92
+
93
+ response = openai_client.chat.completions.create(
94
+ model="gpt-4-turbo",
95
+ messages=[
96
+ {"role": "system", "content": system_prompt},
97
+ {"role": "user", "content": f"Here is the dataset: {json_data}"},
98
+ {"role": "user", "content": user_prompt},
99
+ ],
100
+ response_format={"type": "json_object"},
101
+ )
102
+
103
+ if len(response.choices) > 0:
104
+ content = response.choices[0].message.content
105
+ return content
106
+ else:
107
+ return "Bad response from OpenAI"
108
+
109
+
110
+ def validate_llm_response(response: str) -> dict:
111
+
112
+ # extract dict from json
113
+ try:
114
+ return json.loads(response)
115
+ except json.JSONDecodeError:
116
+ try:
117
+ return simplejson.loads(response) # More forgiving JSON parser
118
+ except simplejson.JSONDecodeError:
119
+ return None # JSON is too broken to fix
120
+
121
+ # Validate expected keys
122
+ required_keys = {"dataset", "explanation", "references"}
123
+ if not required_keys.issubset(response.keys()):
124
+ raise ValueError(f"Missing required keys: {required_keys - response.keys()}")
125
+
126
+ return response # Return as a structured dictionary
127
+
128
+
med_streamlit.py ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import io
4
+ from typing import Dict, List
5
+
6
+ import pandas as pd
7
+ import streamlit as st
8
+ from dotenv import load_dotenv
9
+ from openai import OpenAI
10
+
11
+ import llm_calls
12
+ from llm_calls import validate_llm_response
13
+
14
+ # Load environment variables
15
+ load_dotenv()
16
+
17
+ CONDITION_NAME = "Retinitis Pigmentosa (RP)"
18
+
19
+ SYSTEM_PROMPT = f"""
20
+ You are a medical assistant specialized in modifying structured medical data.
21
+ You will receive JSON input representing a dataset of medications for {CONDITION_NAME}.
22
+
23
+ Your task is to:
24
+ - Answer user requests about the provided medication data
25
+ - Either Add new columns or rows if requested, or modify existing ones
26
+ - Provide references, explanations and additional remarks
27
+ Always return only a JSON object with:
28
+ - "dataset": updated dataset
29
+ - "explanation": explanation of changes and additional information related to the findings.
30
+ Specify the change made for each medication
31
+ - "references": References for findings, i.e. links to scientific papers or websites.
32
+ Specify which reference relates to which finding on each medication.
33
+
34
+ Additional guidelines:
35
+ 1. Please respond in valid JSON format only.
36
+ 2. Make sure the JSON is valid, e.g. has no unterminated strings or missing commas.
37
+ 3. Ensure the response starts with `{{` and ends with `}}` without any trailing text.
38
+ """
39
+
40
+
41
+ def update_dataframe(records: List[Dict] | pd.DataFrame):
42
+ """Update the DataFrame with new records. """
43
+ print(f"UPDATING DATAFRAME: {records}")
44
+ if isinstance(records, pd.DataFrame):
45
+ new_data = records
46
+ else:
47
+ new_data = pd.DataFrame(records)
48
+
49
+ st.session_state.df = new_data # Assign the updated DataFrame
50
+ #st.rerun() # Trigger a rerun
51
+
52
+
53
+ # Page config
54
+ st.set_page_config(layout="wide", page_title="RP Medication Analyzer")
55
+ col1, col2 = st.columns([2, 18])
56
+ col1.image("rp_logo.jpg", use_container_width=True)
57
+ col2.title("Analyze RP Related Medications")
58
+
59
+ # Sidebar for API Key settings
60
+ with st.sidebar:
61
+ st.subheader("Select AI service")
62
+ llm_provider = st.radio(options=["Perplexity.ai", "OpenAI"], index=0, label="API")
63
+
64
+ api_key = None # Initialize API key
65
+
66
+ if llm_provider == "OpenAI":
67
+ st.subheader("OpenAI API key")
68
+ api_base_input = st.text_input(
69
+ "Enter API Base (Leave empty to use env variable)",
70
+ value=os.environ.get("OPENAI_API_BASE", ""),
71
+ )
72
+ api_key_input = st.text_input(
73
+ "Enter API Key",
74
+ type="password",
75
+ value=os.environ.get("OPENAI_API_KEY", ""),
76
+ )
77
+
78
+ openai_api_base = api_base_input if api_base_input else os.environ.get("OPENAI_API_BASE")
79
+ api_key = api_key_input if api_key_input else os.environ.get("OPENAI_API_KEY")
80
+
81
+ # Validate API key presence
82
+ if not api_key:
83
+ st.error("🚨 OpenAI API key is required!")
84
+
85
+ openai_client = OpenAI(api_key=api_key)
86
+ openai_client.api_base = openai_api_base
87
+
88
+ elif llm_provider == "Perplexity.ai":
89
+ st.subheader("Perplexity.ai API key")
90
+ api_key_input = st.text_input(
91
+ "Enter API Key",
92
+ type="password",
93
+ value=os.environ.get("PERPLEXITY_API_KEY", ""),
94
+ )
95
+ api_key = api_key_input if api_key_input else os.environ.get("PERPLEXITY_API_KEY")
96
+
97
+ # Validate API key presence
98
+ if not api_key:
99
+ st.error("🚨 Perplexity.ai API key is required!")
100
+
101
+
102
+ # Ensure session persistence
103
+ if "df" not in st.session_state:
104
+ st.session_state.df = None
105
+ if "uploaded_file" not in st.session_state:
106
+ st.session_state.uploaded_file = None
107
+ if "explanation" not in st.session_state:
108
+ st.session_state.explanation = "No modifications yet."
109
+ if "references" not in st.session_state:
110
+ st.session_state.references = "No additional references."
111
+ if "last_prompt" not in st.session_state:
112
+ st.session_state.last_prompt = ""
113
+ if "last_response" not in st.session_state:
114
+ st.session_state.last_response = {}
115
+ if "history" not in st.session_state:
116
+ st.session_state.history = [] # Stores all past interactions
117
+
118
+ # File uploader
119
+ file = st.file_uploader("Upload an Excel file", type=["xlsx"])
120
+
121
+ print(f"FILE: {file}")
122
+ if file and file != st.session_state.uploaded_file:
123
+ try:
124
+ with pd.ExcelFile(file) as xls:
125
+ if "Metadata" in xls.sheet_names:
126
+ st.session_state.history = pd.read_excel(xls, sheet_name="Metadata").to_dict(orient="records")
127
+ if "Data" in xls.sheet_names:
128
+ data_df = pd.read_excel(xls, sheet_name="Data")
129
+ update_dataframe(data_df)
130
+ else:
131
+ st.error("🚨 No 'Data' sheet found in the uploaded file. Make sure the file has it")
132
+
133
+ st.session_state.uploaded_file = file
134
+ print("File uploaded successfully!")
135
+ st.success("βœ… File uploaded successfully!")
136
+ except Exception as e:
137
+ print(f"Error reading file: {e}")
138
+ st.error(f"🚨 Error reading file: {e}")
139
+
140
+
141
+ if st.session_state.df is not None:
142
+ st.write("### Updated Dataset")
143
+ st.dataframe(st.session_state.df, use_container_width=True)
144
+ else:
145
+ st.warning("⚠️ Upload a file to proceed.")
146
+
147
+ # Explanation & remarks
148
+ if st.session_state.explanation:
149
+ with st.expander("Explanation and remarks"):
150
+ st.info(st.session_state.explanation)
151
+ if st.session_state.references:
152
+ with st.expander("References"):
153
+ st.warning(st.session_state.references)
154
+ if st.session_state.last_prompt:
155
+ with st.expander("πŸ“œ Sent Prompt"):
156
+ st.code(st.session_state.last_prompt, language="plaintext")
157
+
158
+ # if st.session_state.last_response:
159
+ # with st.expander("🧠 LLM Response (Raw)"):
160
+ # st.json(st.session_state.last_response)
161
+
162
+ # User query input
163
+ input_text = st.chat_input("Type your prompt here")
164
+
165
+ # 🚨 Validate: Ensure both API key and dataset are present before making an API call
166
+ if input_text:
167
+ if not api_key:
168
+ st.error("🚨 API key is missing! Please provide a valid key before proceeding.")
169
+ elif st.session_state.df is None:
170
+ st.error("🚨 No dataset uploaded! Please upload an Excel file.")
171
+ else:
172
+ # Convert dataframe to JSON for LLM processing
173
+ json_data = st.session_state.df.to_json(orient="records")
174
+ print(json_data)
175
+ with st.spinner(f"Processing request: *{input_text}*..."):
176
+ response = None # Ensure response is defined before use
177
+
178
+ # Call the appropriate LLM provider
179
+ if llm_provider == "OpenAI":
180
+ response = llm_calls.query_openai(
181
+ system_prompt=SYSTEM_PROMPT,
182
+ user_prompt=input_text,
183
+ json_data=json_data,
184
+ openai_client=openai_client,
185
+ )
186
+ elif llm_provider == "Perplexity.ai":
187
+ response = llm_calls.query_perplexity(
188
+ system_prompt=SYSTEM_PROMPT,
189
+ user_prompt=input_text,
190
+ json_data=json_data,
191
+ api_key=api_key,
192
+ )
193
+
194
+ print(f"Response:{response}")
195
+
196
+ # Ensure response exists before processing
197
+ if response:
198
+ st.session_state.df = None
199
+ try:
200
+ parsed_response = validate_llm_response(response)
201
+ print(f"Parsed response: {parsed_response}")
202
+
203
+ st.session_state.last_prompt = input_text
204
+ st.session_state.last_response = response # Keep full JSON response
205
+
206
+ # Display structured output
207
+ if "error" in parsed_response:
208
+ st.error(parsed_response["error"])
209
+ else:
210
+ print(f"Parsed data: {parsed_response['dataset']}")
211
+ update_dataframe(parsed_response["dataset"])
212
+ st.session_state.explanation = parsed_response["explanation"]
213
+ st.session_state.references = parsed_response["references"]
214
+ st.session_state.history.append({
215
+ "Prompt": input_text,
216
+ "Explanation": parsed_response["explanation"],
217
+ "References": parsed_response["references"]
218
+ })
219
+ except json.JSONDecodeError:
220
+ st.error("🚨 Error parsing response: Invalid JSON format.")
221
+ except Exception as e:
222
+ st.error(f"🚨 Unexpected error: {e}")
223
+
224
+ st.rerun()
225
+
226
+
227
+ # πŸ“₯ Download Updated Excel
228
+ if st.session_state.df is not None:
229
+ st.sidebar.subheader("Download Updated Dataset")
230
+
231
+ def generate_excel(dataframe, history):
232
+ output_stream = io.BytesIO()
233
+ with pd.ExcelWriter(output_stream, engine="xlsxwriter") as writer:
234
+ dataframe.to_excel(writer, index=False, sheet_name="Data")
235
+ # Convert history to DataFrame and save in a new sheet
236
+ if history:
237
+ history_df = pd.DataFrame(history)
238
+ history_df.to_excel(writer, index=False, sheet_name="Metadata")
239
+
240
+ workbook = writer.book
241
+
242
+ # Apply word wrapping
243
+ for sheet_name in ["Data", "Metadata"]:
244
+ if sheet_name in writer.sheets:
245
+ worksheet = writer.sheets[sheet_name]
246
+ wrap_format = workbook.add_format({"text_wrap": True, "align": "top", "valign": "top"})
247
+
248
+ # Apply word wrap to all columns
249
+ df_to_format = dataframe if sheet_name == "Data" else history_df
250
+ for col_num, col_name in enumerate(df_to_format.columns):
251
+ worksheet.set_column(col_num, col_num, 30, wrap_format) # Adjust width if needed
252
+
253
+ output_stream.seek(0)
254
+ return output_stream
255
+
256
+
257
+ st.sidebar.download_button(
258
+ "πŸ“₯ Download Excel File",
259
+ data=generate_excel(st.session_state.df, st.session_state.history),
260
+ file_name="updated_dataset.xlsx",
261
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
262
+ )
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ pandasai
2
+ streamlit
3
+ openai
4
+ openpyxl
5
+ xlsxwriter
6
+ pydantic
7
+ simplejson
rp_logo.jpg ADDED