Create main.py
Browse files- patentwiz/main.py +123 -0
patentwiz/main.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import nltk
|
2 |
+
|
3 |
+
nltk.download("all", quiet=True)
|
4 |
+
from datetime import datetime
|
5 |
+
import random
|
6 |
+
import json
|
7 |
+
from . import preprocess_data
|
8 |
+
from . import qaagent
|
9 |
+
|
10 |
+
|
11 |
+
PROMPT = """
|
12 |
+
Task: Carefully review the given patent text and extract as much physical measurements information such as length/distance, mass/weight, time, temperature, Volume, area, speed, pressure, energy, power, electric current
|
13 |
+
and voltage, frequency, force, acceleration, density, resistivity, magnetic field strength, and luminous intensity as much as possible.
|
14 |
+
We are particularly interested in physical measurements including substance that was measured, Value of the measurement, and Unit of the measurement, and measurement type mentioned in the text.
|
15 |
+
For each measurement, please provide the following details:
|
16 |
+
- The substance that was measured. (substance)
|
17 |
+
- The specific value or range that was measured. (Measured Value)
|
18 |
+
- The unit of the measurement, if provided. (Unit)
|
19 |
+
- The type of measurement being conducted (e.g., diameter, size, etc.)
|
20 |
+
Format your response in a structured JSON-like format, as follows:
|
21 |
+
{"Content": [
|
22 |
+
{
|
23 |
+
"Measurement_substance": "substance",
|
24 |
+
"Measured_value": "value",
|
25 |
+
"Measured_unit": "unit",
|
26 |
+
"measurement_type": "type"
|
27 |
+
},
|
28 |
+
// ... additional measurements, if present
|
29 |
+
]
|
30 |
+
}
|
31 |
+
If multiple measurements are present in the text, each should be listed as a separate object within the "Content" array.
|
32 |
+
Example: If the text includes the sentence, "The resulting BaCO3 had a crystallite size of between about 20 and 40 nm", the output should be:
|
33 |
+
{"Content": [
|
34 |
+
{
|
35 |
+
"Measurement_substance": "BaCO3",
|
36 |
+
"Measured_value": "between about 20 and 40",
|
37 |
+
"Measured_unit": "nm",
|
38 |
+
"measurement_type": "crystallite size"
|
39 |
+
}
|
40 |
+
]
|
41 |
+
}
|
42 |
+
Try to provide as complete and accurate information as possible. Print only the formatted JSON response.
|
43 |
+
"""
|
44 |
+
|
45 |
+
|
46 |
+
def main():
|
47 |
+
"""
|
48 |
+
Main function to:
|
49 |
+
- Authenticate with OpenAI
|
50 |
+
- Receive and parse date input from the user
|
51 |
+
- Extract and print year, month, day
|
52 |
+
- Preprocess patent data
|
53 |
+
- Analyze selected patents using GPT-3.5 Turbo
|
54 |
+
- Print results including cost and optionally output
|
55 |
+
"""
|
56 |
+
print("Starting the patent analysis process...")
|
57 |
+
# Step 1: Input the date from the user
|
58 |
+
user_date_input = input("Enter a date in the format 'YYYY-MM-DD': ")
|
59 |
+
|
60 |
+
# Step 2: Parse the input date into a datetime object
|
61 |
+
try:
|
62 |
+
input_date = datetime.strptime(user_date_input, "%Y-%m-%d")
|
63 |
+
except ValueError:
|
64 |
+
print(
|
65 |
+
"Invalid date format. Please enter a valid date in the format 'YYYY-MM-DD'."
|
66 |
+
)
|
67 |
+
return
|
68 |
+
|
69 |
+
# Step 3: Extract date components
|
70 |
+
year = input_date.year
|
71 |
+
month = input_date.month
|
72 |
+
day = input_date.day
|
73 |
+
|
74 |
+
print("Year:", year)
|
75 |
+
print("Month:", month)
|
76 |
+
print("Day:", day)
|
77 |
+
|
78 |
+
# Step 4: Get random patents number from user
|
79 |
+
num_patents_to_analyze = int(
|
80 |
+
input("Enter the number of patents you want to analyze: ")
|
81 |
+
)
|
82 |
+
|
83 |
+
logging_choice = input("Do you want to log the results? (yes/no): ").strip().lower()
|
84 |
+
logging_enabled = logging_choice == "yes"
|
85 |
+
|
86 |
+
model_choice = input(
|
87 |
+
"Select a model for analysis: 1. gpt-3.5-turbo 2. gpt-4"
|
88 |
+
).strip()
|
89 |
+
|
90 |
+
if model_choice == "1":
|
91 |
+
model_name = "gpt-3.5-turbo"
|
92 |
+
elif model_choice == "2":
|
93 |
+
model_name = "gpt-4"
|
94 |
+
else:
|
95 |
+
print("Invalid choice, defaulting to gpt-3.5-turbo.")
|
96 |
+
model_name = "gpt-3.5-turbo"
|
97 |
+
|
98 |
+
print("Processing patents...")
|
99 |
+
# Step 5: Parse and save patents
|
100 |
+
saved_patent_names = preprocess_data.parse_and_save_patents(year, month, day, False)
|
101 |
+
|
102 |
+
# Step 6: Select random patents and analyze
|
103 |
+
random_patents = random.sample(saved_patent_names, num_patents_to_analyze)
|
104 |
+
|
105 |
+
gpt_3_results = {}
|
106 |
+
total_cost_gpt3 = 0
|
107 |
+
|
108 |
+
# Step 7: Process patents with GPT-3.5 Turbo
|
109 |
+
for i in range(len(random_patents)):
|
110 |
+
cost, output = qaagent.call_QA_to_json(
|
111 |
+
PROMPT, year, month, day, random_patents, i, logging_enabled, model_name
|
112 |
+
)
|
113 |
+
|
114 |
+
total_cost_gpt3 += cost
|
115 |
+
|
116 |
+
average_cost_gpt3 = total_cost_gpt3 / num_patents_to_analyze
|
117 |
+
|
118 |
+
print("Patent analysis process completed successfully.")
|
119 |
+
# Step 8: Print results
|
120 |
+
print("\nResults for GPT-3.5 Turbo:")
|
121 |
+
print("Number of patents analyzed:", num_patents_to_analyze)
|
122 |
+
print("Total cost for analyzing all patents:", total_cost_gpt3)
|
123 |
+
print("Average cost per patent:", average_cost_gpt3)
|