from jarvis.db.figshare import data import pandas as pd import streamlit as st import os from openai import OpenAI from dotenv import load_dotenv # Load environment variables from .env file load_dotenv() # Retrieve API key from environment variable api_key = os.getenv("OPENAI_API_KEY") if api_key is None: st.error("API key not found. Make sure to set the OPENAI_API_KEY environment variable.") raise ValueError("API key not found") client = OpenAI(api_key=api_key) @st.cache_data() def load_data(): dataset = data(dataset="dft_3d") return pd.DataFrame(dataset) def process_materials_data(query, df, target_property, threshold, optimization_goal): if df.empty: return "No data available for analysis." # Data Interpretation and Recommendations summary = df.describe().to_dict() recommendations_prompt = f"Based on the following summary statistics of {target_property}:\n{summary}, provide a summary of the results, key findings, and actionable recommendations based on the data and answer the user query {query} based on the dataset {df}." try: response = client.chat.completions.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": "You are a materials science assistant summarizing data insights and making recommendations."}, {"role": "user", "content": recommendations_prompt} ] ) insights = response.choices[0].message.content.strip() except Exception as e: insights = f"Error generating recommendations: {e}" # Trends Analysis and Candidates Suggestions comparison_prompt = f"Analyze trends in the {target_property} data for the following materials:\n{df.head(13).to_dict()}. Identify promising candidates with {target_property} {optimization_goal} {threshold} and provide a detailed comparison of materials. Include the user query {query} answer as well and dont ask any follow up questions" try: response = client.chat.completions.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": "You are a materials scientist analyzing trends and suggesting promising materials."}, {"role": "user", "content": comparison_prompt} ] ) trends = response.choices[0].message.content.strip() except Exception as e: trends = f"Error analyzing trends: {e}" # Hypothesis Generation hypothesis_prompt = f"Generate a hypothesis based on trends in the {target_property} data:\n{df.head(5).to_dict()} based on the user query {query} and also suggest possible material combinations which could improve the performance of material. Don't ask any follow up questions" try: response = client.chat.completions.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": "You are a research assistant generating hypotheses based on material data."}, {"role": "user", "content": hypothesis_prompt} ] ) hypothesis = response.choices[0].message.content.strip() except Exception as e: hypothesis = f"Error generating hypothesis: {e}" # Research Report Based on Query and Findings report = f""" # Research Report for Query: {query.capitalize()} ## 1. Findings from the Query: Based on the user query **'{query}'**, the following key findings and insights were obtained from the dataset: - **Insights and Recommendations**: {insights} - **Trends and Comparisons**: {trends} ## 2. Hypothesis: Based on the analysis, the following hypothesis was generated: {hypothesis} ## 3. Recommendations for Further Research: It is suggested to explore the materials with higher or lower values of {target_property}, considering the trends identified, to optimize the material properties for the query query {query}. """ return insights, trends, hypothesis, report st.title("Piezoelectric Material Discovery Assistant") dataset = load_data() query = st.text_input("Enter your query (e.g., 'Find materials with high piezoelectric constants'):") df = pd.DataFrame(dataset) target_property = st.selectbox("Select a property to analyze:", ["dfpt_piezo_max_dij", "optb88vdw_bandgap", "density","min_ir_mode","max_ir_mode"]) threshold = st.number_input("Threshold value:", min_value=0.0, value=1.0) optimization_goal = st.selectbox("Optimization goal:", ["above", "below"]) if st.button("Analyze Data"): filtered_df = df[df[target_property].notnull()] insights, trends, hypothesis, report = process_materials_data(query, filtered_df, target_property, threshold, optimization_goal) st.write("### Insights and Recommendations") st.write(insights) st.write("### Trends and Promising Candidates") st.write(trends) st.write("### Generated Hypothesis") st.write(hypothesis) st.write("### Research Report") st.download_button("Download Research Report", report, file_name="research_report.txt")