DrishtiSharma's picture
Create temp.py
986681b verified
raw
history blame
2.92 kB
import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from datasets import load_dataset
import pandas as pd
# Model selection
model_name = "juliaannjose/finetuned_model"
@st.cache_resource
def load_model(model_name):
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
return tokenizer, model, classifier
tokenizer, model, classifier = load_model(model_name)
# Load dataset with training and validation data
with st.spinner("Loading patent dataset..."):
dataset_dict = load_dataset(
"HUPD/hupd",
name="sample",
data_files="https://huggingface.co./datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
icpr_label=None,
train_filing_start_date="2016-01-01",
train_filing_end_date="2016-01-31",
val_filing_start_date="2017-01-22",
val_filing_end_date="2017-01-31",
)
df_train = pd.DataFrame(dataset_dict["train"])
df_val = pd.DataFrame(dataset_dict["validation"])
df = pd.concat([df_train, df_val], ignore_index=True)
# Structure the DataFrame
df = df[["patent_number", "decision", "abstract", "claims", "filing_date"]]
PAN = df["patent_number"].drop_duplicates()
# Streamlit UI
st.title("Patentability Predictor")
with st.form("patent-form"):
make_choice = st.selectbox("Select the Patent Application Number:", PAN)
submitted = st.form_submit_button(label="Submit")
if submitted:
abstract = df["abstract"].loc[df["patent_number"] == make_choice].values[0]
claims = df["claims"].loc[df["patent_number"] == make_choice].values[0]
decision = df["decision"].loc[df["patent_number"] == make_choice].values[0]
st.subheader(":blue[Patent Abstract]")
st.info(abstract)
st.subheader(":blue[Patent Claims]")
st.info(claims)
# Combine abstract and claims for a comprehensive prediction
input_text = abstract + " " + claims
inputs = tokenizer(input_text, truncation=True, padding=True, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
probabilities = torch.nn.functional.softmax(outputs.logits, dim=1)
id2label = {0: "REJECTED", 1: "ACCEPTED"}
predicted_class_id = probabilities.argmax().item()
pred_label = id2label[predicted_class_id]
st.subheader(":green[Prediction Result]")
if pred_label == "ACCEPTED":
st.success(f"The patent is likely to be **ACCEPTED** with a score of {probabilities[0][1].item():.2f}.")
else:
st.error(f"The patent is likely to be **REJECTED** with a score of {probabilities[0][0].item():.2f}.")
st.write(f"**Decision Summary:** {decision}")