DrishtiSharma commited on
Commit
650f7c0
·
verified ·
1 Parent(s): 744dcb8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -0
app.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from datetime import datetime
3
+ import streamlit as st
4
+ from patentwiz import preprocess_data, qa_agent
5
+
6
+ # Check if the API key is loaded
7
+ api_key = os.getenv("OPENAI_API_KEY")
8
+ if not api_key:
9
+ st.error("OPENAI_API_KEY not found! Please set it in the environment variables or Hugging Face Secrets.")
10
+ st.stop()
11
+
12
+ PROMPT = """
13
+ Task: Carefully review the given patent text and extract as much physical measurements information such as length/distance, mass/weight, time, temperature, Volume, area, speed, pressure, energy, power, electric current
14
+ and voltage, frequency, force, acceleration, density, resistivity, magnetic field strength, and luminous intensity as much as possible.
15
+ We are particularly interested in physical measurements including substance that was measured, Value of the measurement, and Unit of the measurement, and measurement type mentioned in the text.
16
+ For each measurement, please provide the following details:
17
+ - The substance that was measured. (substance)
18
+ - The specific value or range that was measured. (Measured Value)
19
+ - The unit of the measurement, if provided. (Unit)
20
+ - The type of measurement being conducted (e.g., diameter, size, etc.)
21
+ Format your response in a structured JSON-like format, as follows:
22
+ {"Content": [
23
+ {
24
+ "Measurement_substance": "substance",
25
+ "Measured_value": "value",
26
+ "Measured_unit": "unit",
27
+ "measurement_type": "type"
28
+ },
29
+ // ... additional measurements, if present
30
+ ]
31
+ }
32
+ If multiple measurements are present in the text, each should be listed as a separate object within the "Content" array.
33
+ Example: If the text includes the sentence, "The resulting BaCO3 had a crystallite size of between about 20 and 40 nm", the output should be:
34
+ {"Content": [
35
+ {
36
+ "Measurement_substance": "BaCO3",
37
+ "Measured_value": "between about 20 and 40",
38
+ "Measured_unit": "nm",
39
+ "measurement_type": "crystallite size"
40
+ }
41
+ ]
42
+ }
43
+ Try to provide as complete and accurate information as possible. Print only the formatted JSON response.
44
+ """
45
+
46
+ # Title and description
47
+ st.title("Technical Measurements Extractor for Patents")
48
+ st.write(
49
+ "Analyze patents to extract physical measurements such as length, mass, time, and more. "
50
+ "Provide a date to download patents, and analyze them using GPT models."
51
+ )
52
+
53
+ # User Input Section
54
+ st.header("Enter Details for Patent Analysis")
55
+ user_date_input = st.text_input("Enter a date in the format 'YYYY-MM-DD':", value="2024-06-16")
56
+
57
+ num_patents_to_analyze = st.number_input(
58
+ "Number of patents to analyze:", min_value=1, value=1, step=1, help="Specify how many patents you want to analyze."
59
+ )
60
+
61
+ model_choice = st.selectbox(
62
+ "Select a model for analysis:", ["gpt-3.5-turbo", "gpt-4"], help="Choose the OpenAI GPT model for the analysis."
63
+ )
64
+
65
+ logging_enabled = st.checkbox("Enable logging?", value=False, help="Toggle logging for debugging purposes.")
66
+
67
+ # Run Analysis Button
68
+ if st.button("Analyze Patents"):
69
+ if not user_date_input:
70
+ st.error("Please enter a valid date!")
71
+ else:
72
+ try:
73
+ # Parse date input
74
+ input_date = datetime.strptime(user_date_input, "%Y-%m-%d")
75
+ year, month, day = input_date.year, input_date.month, input_date.day
76
+
77
+ # Step 1: Download and preprocess patents
78
+ with st.spinner("Downloading and extracting patents..."):
79
+ saved_patent_names = preprocess_data.parse_and_save_patents(
80
+ year, month, day, logging_enabled
81
+ )
82
+ if not saved_patent_names:
83
+ st.error("No patents found for the given date.")
84
+ st.stop()
85
+ st.success(f"{len(saved_patent_names)} patents found and processed!")
86
+
87
+ # Step 2: Analyze patents using GPT
88
+ random_patents = saved_patent_names[:num_patents_to_analyze]
89
+ total_cost = 0
90
+ results = []
91
+
92
+ st.write("Starting patent analysis...")
93
+ for i, patent_file in enumerate(random_patents):
94
+ cost, output = qa_agent.call_QA_to_json(
95
+ PROMPT,
96
+ year,
97
+ month,
98
+ day,
99
+ saved_patent_names,
100
+ i,
101
+ logging_enabled,
102
+ model_choice,
103
+ )
104
+ total_cost += cost
105
+ results.append(output)
106
+
107
+ # Step 3: Display results
108
+ st.write(f"**Total Cost:** ${total_cost:.4f}")
109
+ st.write("### Analysis Results:")
110
+ for idx, result in enumerate(results):
111
+ st.subheader(f"Patent {idx + 1}")
112
+ st.json(result)
113
+
114
+ except ValueError as ve:
115
+ st.error(f"Invalid date format: {ve}")
116
+ except Exception as e:
117
+ st.error(f"An unexpected error occurred: {e}")