Spaces:
Running
Running
File size: 2,921 Bytes
986681b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from datasets import load_dataset
import pandas as pd
# Model selection
model_name = "juliaannjose/finetuned_model"
@st.cache_resource
def load_model(model_name):
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
return tokenizer, model, classifier
tokenizer, model, classifier = load_model(model_name)
# Load dataset with training and validation data
with st.spinner("Loading patent dataset..."):
dataset_dict = load_dataset(
"HUPD/hupd",
name="sample",
data_files="https://huggingface.co./datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
icpr_label=None,
train_filing_start_date="2016-01-01",
train_filing_end_date="2016-01-31",
val_filing_start_date="2017-01-22",
val_filing_end_date="2017-01-31",
)
df_train = pd.DataFrame(dataset_dict["train"])
df_val = pd.DataFrame(dataset_dict["validation"])
df = pd.concat([df_train, df_val], ignore_index=True)
# Structure the DataFrame
df = df[["patent_number", "decision", "abstract", "claims", "filing_date"]]
PAN = df["patent_number"].drop_duplicates()
# Streamlit UI
st.title("Patentability Predictor")
with st.form("patent-form"):
make_choice = st.selectbox("Select the Patent Application Number:", PAN)
submitted = st.form_submit_button(label="Submit")
if submitted:
abstract = df["abstract"].loc[df["patent_number"] == make_choice].values[0]
claims = df["claims"].loc[df["patent_number"] == make_choice].values[0]
decision = df["decision"].loc[df["patent_number"] == make_choice].values[0]
st.subheader(":blue[Patent Abstract]")
st.info(abstract)
st.subheader(":blue[Patent Claims]")
st.info(claims)
# Combine abstract and claims for a comprehensive prediction
input_text = abstract + " " + claims
inputs = tokenizer(input_text, truncation=True, padding=True, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
probabilities = torch.nn.functional.softmax(outputs.logits, dim=1)
id2label = {0: "REJECTED", 1: "ACCEPTED"}
predicted_class_id = probabilities.argmax().item()
pred_label = id2label[predicted_class_id]
st.subheader(":green[Prediction Result]")
if pred_label == "ACCEPTED":
st.success(f"The patent is likely to be **ACCEPTED** with a score of {probabilities[0][1].item():.2f}.")
else:
st.error(f"The patent is likely to be **REJECTED** with a score of {probabilities[0][0].item():.2f}.")
st.write(f"**Decision Summary:** {decision}") |