Spaces:

DrishtiSharma
/

technical-measurements-extractor-for-patents

Sleeping

App Files Files Community

DrishtiSharma commited on Dec 23, 2024

Commit

650f7c0

verified ·

1 Parent(s): 744dcb8

Create app.py

Browse files

Files changed (1) hide show

app.py +117 -0

app.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import os
+from datetime import datetime
+import streamlit as st
+from patentwiz import preprocess_data, qa_agent
+# Check if the API key is loaded
+api_key = os.getenv("OPENAI_API_KEY")
+if not api_key:
+    st.error("OPENAI_API_KEY not found! Please set it in the environment variables or Hugging Face Secrets.")
+    st.stop()
+PROMPT = """
+Task: Carefully review the given patent text and extract as much physical measurements information such as length/distance, mass/weight, time, temperature, Volume, area, speed, pressure, energy, power, electric current
+and voltage, frequency, force, acceleration, density, resistivity, magnetic field strength, and luminous intensity as much as possible.
+We are particularly interested in physical measurements including substance that was measured, Value of the measurement, and Unit of the measurement, and measurement type mentioned in the text.
+For each measurement, please provide the following details:
+- The substance that was measured. (substance)
+- The specific value or range that was measured. (Measured Value)
+- The unit of the measurement, if provided. (Unit)
+- The type of measurement being conducted (e.g., diameter, size, etc.)
+Format your response in a structured JSON-like format, as follows:
+{"Content": [
+    {
+      "Measurement_substance": "substance",
+      "Measured_value": "value",
+      "Measured_unit": "unit",
+      "measurement_type": "type"
+    },
+    // ... additional measurements, if present
+  ]
+}
+If multiple measurements are present in the text, each should be listed as a separate object within the "Content" array.
+Example: If the text includes the sentence, "The resulting BaCO3 had a crystallite size of between about 20 and 40 nm", the output should be:
+{"Content": [
+    {
+      "Measurement_substance": "BaCO3",
+      "Measured_value": "between about 20 and 40",
+      "Measured_unit": "nm",
+      "measurement_type": "crystallite size"
+    }
+  ]
+}
+Try to provide as complete and accurate information as possible. Print only the formatted JSON response.
+"""
+# Title and description
+st.title("Technical Measurements Extractor for Patents")
+st.write(
+    "Analyze patents to extract physical measurements such as length, mass, time, and more. "
+    "Provide a date to download patents, and analyze them using GPT models."
+)
+# User Input Section
+st.header("Enter Details for Patent Analysis")
+user_date_input = st.text_input("Enter a date in the format 'YYYY-MM-DD':", value="2024-06-16")
+num_patents_to_analyze = st.number_input(
+    "Number of patents to analyze:", min_value=1, value=1, step=1, help="Specify how many patents you want to analyze."
+)
+model_choice = st.selectbox(
+    "Select a model for analysis:", ["gpt-3.5-turbo", "gpt-4"], help="Choose the OpenAI GPT model for the analysis."
+)
+logging_enabled = st.checkbox("Enable logging?", value=False, help="Toggle logging for debugging purposes.")
+# Run Analysis Button
+if st.button("Analyze Patents"):
+    if not user_date_input:
+        st.error("Please enter a valid date!")
+    else:
+        try:
+            # Parse date input
+            input_date = datetime.strptime(user_date_input, "%Y-%m-%d")
+            year, month, day = input_date.year, input_date.month, input_date.day
+            # Step 1: Download and preprocess patents
+            with st.spinner("Downloading and extracting patents..."):
+                saved_patent_names = preprocess_data.parse_and_save_patents(
+                    year, month, day, logging_enabled
+                )
+                if not saved_patent_names:
+                    st.error("No patents found for the given date.")
+                    st.stop()
+                st.success(f"{len(saved_patent_names)} patents found and processed!")
+            # Step 2: Analyze patents using GPT
+            random_patents = saved_patent_names[:num_patents_to_analyze]
+            total_cost = 0
+            results = []
+            st.write("Starting patent analysis...")
+            for i, patent_file in enumerate(random_patents):
+                cost, output = qa_agent.call_QA_to_json(
+                    PROMPT,
+                    year,
+                    month,
+                    day,
+                    saved_patent_names,
+                    i,
+                    logging_enabled,
+                    model_choice,
+                )
+                total_cost += cost
+                results.append(output)
+            # Step 3: Display results
+            st.write(f"**Total Cost:** ${total_cost:.4f}")
+            st.write("### Analysis Results:")
+            for idx, result in enumerate(results):
+                st.subheader(f"Patent {idx + 1}")
+                st.json(result)
+        except ValueError as ve:
+            st.error(f"Invalid date format: {ve}")
+        except Exception as e:
+            st.error(f"An unexpected error occurred: {e}")