DrishtiSharma commited on
Commit
986681b
·
verified ·
1 Parent(s): 1ac4555

Create temp.py

Browse files
Files changed (1) hide show
  1. mylab/temp.py +75 -0
mylab/temp.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
4
+ from datasets import load_dataset
5
+ import pandas as pd
6
+
7
+ # Model selection
8
+ model_name = "juliaannjose/finetuned_model"
9
+
10
+ @st.cache_resource
11
+
12
+ def load_model(model_name):
13
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
14
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
15
+ classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
16
+ return tokenizer, model, classifier
17
+
18
+ tokenizer, model, classifier = load_model(model_name)
19
+
20
+ # Load dataset with training and validation data
21
+ with st.spinner("Loading patent dataset..."):
22
+ dataset_dict = load_dataset(
23
+ "HUPD/hupd",
24
+ name="sample",
25
+ data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
26
+ icpr_label=None,
27
+ train_filing_start_date="2016-01-01",
28
+ train_filing_end_date="2016-01-31",
29
+ val_filing_start_date="2017-01-22",
30
+ val_filing_end_date="2017-01-31",
31
+ )
32
+ df_train = pd.DataFrame(dataset_dict["train"])
33
+ df_val = pd.DataFrame(dataset_dict["validation"])
34
+ df = pd.concat([df_train, df_val], ignore_index=True)
35
+
36
+ # Structure the DataFrame
37
+ df = df[["patent_number", "decision", "abstract", "claims", "filing_date"]]
38
+ PAN = df["patent_number"].drop_duplicates()
39
+
40
+ # Streamlit UI
41
+ st.title("Patentability Predictor")
42
+
43
+ with st.form("patent-form"):
44
+ make_choice = st.selectbox("Select the Patent Application Number:", PAN)
45
+ submitted = st.form_submit_button(label="Submit")
46
+
47
+ if submitted:
48
+ abstract = df["abstract"].loc[df["patent_number"] == make_choice].values[0]
49
+ claims = df["claims"].loc[df["patent_number"] == make_choice].values[0]
50
+ decision = df["decision"].loc[df["patent_number"] == make_choice].values[0]
51
+
52
+ st.subheader(":blue[Patent Abstract]")
53
+ st.info(abstract)
54
+ st.subheader(":blue[Patent Claims]")
55
+ st.info(claims)
56
+
57
+ # Combine abstract and claims for a comprehensive prediction
58
+ input_text = abstract + " " + claims
59
+ inputs = tokenizer(input_text, truncation=True, padding=True, return_tensors="pt")
60
+
61
+ with torch.no_grad():
62
+ outputs = model(**inputs)
63
+ probabilities = torch.nn.functional.softmax(outputs.logits, dim=1)
64
+
65
+ id2label = {0: "REJECTED", 1: "ACCEPTED"}
66
+ predicted_class_id = probabilities.argmax().item()
67
+ pred_label = id2label[predicted_class_id]
68
+
69
+ st.subheader(":green[Prediction Result]")
70
+ if pred_label == "ACCEPTED":
71
+ st.success(f"The patent is likely to be **ACCEPTED** with a score of {probabilities[0][1].item():.2f}.")
72
+ else:
73
+ st.error(f"The patent is likely to be **REJECTED** with a score of {probabilities[0][0].item():.2f}.")
74
+
75
+ st.write(f"**Decision Summary:** {decision}")