DrishtiSharma commited on
Commit
b295f13
·
verified ·
1 Parent(s): eb569a5

Create main.py

Browse files
Files changed (1) hide show
  1. patentwiz/main.py +123 -0
patentwiz/main.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+
3
+ nltk.download("all", quiet=True)
4
+ from datetime import datetime
5
+ import random
6
+ import json
7
+ from . import preprocess_data
8
+ from . import qaagent
9
+
10
+
11
+ PROMPT = """
12
+ Task: Carefully review the given patent text and extract as much physical measurements information such as length/distance, mass/weight, time, temperature, Volume, area, speed, pressure, energy, power, electric current
13
+ and voltage, frequency, force, acceleration, density, resistivity, magnetic field strength, and luminous intensity as much as possible.
14
+ We are particularly interested in physical measurements including substance that was measured, Value of the measurement, and Unit of the measurement, and measurement type mentioned in the text.
15
+ For each measurement, please provide the following details:
16
+ - The substance that was measured. (substance)
17
+ - The specific value or range that was measured. (Measured Value)
18
+ - The unit of the measurement, if provided. (Unit)
19
+ - The type of measurement being conducted (e.g., diameter, size, etc.)
20
+ Format your response in a structured JSON-like format, as follows:
21
+ {"Content": [
22
+ {
23
+ "Measurement_substance": "substance",
24
+ "Measured_value": "value",
25
+ "Measured_unit": "unit",
26
+ "measurement_type": "type"
27
+ },
28
+ // ... additional measurements, if present
29
+ ]
30
+ }
31
+ If multiple measurements are present in the text, each should be listed as a separate object within the "Content" array.
32
+ Example: If the text includes the sentence, "The resulting BaCO3 had a crystallite size of between about 20 and 40 nm", the output should be:
33
+ {"Content": [
34
+ {
35
+ "Measurement_substance": "BaCO3",
36
+ "Measured_value": "between about 20 and 40",
37
+ "Measured_unit": "nm",
38
+ "measurement_type": "crystallite size"
39
+ }
40
+ ]
41
+ }
42
+ Try to provide as complete and accurate information as possible. Print only the formatted JSON response.
43
+ """
44
+
45
+
46
+ def main():
47
+ """
48
+ Main function to:
49
+ - Authenticate with OpenAI
50
+ - Receive and parse date input from the user
51
+ - Extract and print year, month, day
52
+ - Preprocess patent data
53
+ - Analyze selected patents using GPT-3.5 Turbo
54
+ - Print results including cost and optionally output
55
+ """
56
+ print("Starting the patent analysis process...")
57
+ # Step 1: Input the date from the user
58
+ user_date_input = input("Enter a date in the format 'YYYY-MM-DD': ")
59
+
60
+ # Step 2: Parse the input date into a datetime object
61
+ try:
62
+ input_date = datetime.strptime(user_date_input, "%Y-%m-%d")
63
+ except ValueError:
64
+ print(
65
+ "Invalid date format. Please enter a valid date in the format 'YYYY-MM-DD'."
66
+ )
67
+ return
68
+
69
+ # Step 3: Extract date components
70
+ year = input_date.year
71
+ month = input_date.month
72
+ day = input_date.day
73
+
74
+ print("Year:", year)
75
+ print("Month:", month)
76
+ print("Day:", day)
77
+
78
+ # Step 4: Get random patents number from user
79
+ num_patents_to_analyze = int(
80
+ input("Enter the number of patents you want to analyze: ")
81
+ )
82
+
83
+ logging_choice = input("Do you want to log the results? (yes/no): ").strip().lower()
84
+ logging_enabled = logging_choice == "yes"
85
+
86
+ model_choice = input(
87
+ "Select a model for analysis: 1. gpt-3.5-turbo 2. gpt-4"
88
+ ).strip()
89
+
90
+ if model_choice == "1":
91
+ model_name = "gpt-3.5-turbo"
92
+ elif model_choice == "2":
93
+ model_name = "gpt-4"
94
+ else:
95
+ print("Invalid choice, defaulting to gpt-3.5-turbo.")
96
+ model_name = "gpt-3.5-turbo"
97
+
98
+ print("Processing patents...")
99
+ # Step 5: Parse and save patents
100
+ saved_patent_names = preprocess_data.parse_and_save_patents(year, month, day, False)
101
+
102
+ # Step 6: Select random patents and analyze
103
+ random_patents = random.sample(saved_patent_names, num_patents_to_analyze)
104
+
105
+ gpt_3_results = {}
106
+ total_cost_gpt3 = 0
107
+
108
+ # Step 7: Process patents with GPT-3.5 Turbo
109
+ for i in range(len(random_patents)):
110
+ cost, output = qaagent.call_QA_to_json(
111
+ PROMPT, year, month, day, random_patents, i, logging_enabled, model_name
112
+ )
113
+
114
+ total_cost_gpt3 += cost
115
+
116
+ average_cost_gpt3 = total_cost_gpt3 / num_patents_to_analyze
117
+
118
+ print("Patent analysis process completed successfully.")
119
+ # Step 8: Print results
120
+ print("\nResults for GPT-3.5 Turbo:")
121
+ print("Number of patents analyzed:", num_patents_to_analyze)
122
+ print("Total cost for analyzing all patents:", total_cost_gpt3)
123
+ print("Average cost per patent:", average_cost_gpt3)