Spaces:
Sleeping
Sleeping
import streamlit as st | |
import torch | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline | |
from datasets import load_dataset | |
import pandas as pd | |
# Model selection | |
model_name = "juliaannjose/finetuned_model" | |
def load_model(model_name): | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer) | |
return tokenizer, model, classifier | |
tokenizer, model, classifier = load_model(model_name) | |
# Load dataset with training and validation data | |
with st.spinner("Loading patent dataset..."): | |
dataset_dict = load_dataset( | |
"HUPD/hupd", | |
name="sample", | |
data_files="https://huggingface.co./datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather", | |
icpr_label=None, | |
train_filing_start_date="2016-01-01", | |
train_filing_end_date="2016-01-31", | |
val_filing_start_date="2017-01-22", | |
val_filing_end_date="2017-01-31", | |
) | |
df_train = pd.DataFrame(dataset_dict["train"]) | |
df_val = pd.DataFrame(dataset_dict["validation"]) | |
df = pd.concat([df_train, df_val], ignore_index=True) | |
# Structure the DataFrame | |
df = df[["patent_number", "decision", "abstract", "claims", "filing_date"]] | |
PAN = df["patent_number"].drop_duplicates() | |
# Streamlit UI | |
st.title("Patentability Predictor") | |
with st.form("patent-form"): | |
make_choice = st.selectbox("Select the Patent Application Number:", PAN) | |
submitted = st.form_submit_button(label="Submit") | |
if submitted: | |
abstract = df["abstract"].loc[df["patent_number"] == make_choice].values[0] | |
claims = df["claims"].loc[df["patent_number"] == make_choice].values[0] | |
decision = df["decision"].loc[df["patent_number"] == make_choice].values[0] | |
st.subheader(":blue[Patent Abstract]") | |
st.info(abstract) | |
st.subheader(":blue[Patent Claims]") | |
st.info(claims) | |
# Combine abstract and claims for a comprehensive prediction | |
input_text = abstract + " " + claims | |
inputs = tokenizer(input_text, truncation=True, padding=True, return_tensors="pt") | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
probabilities = torch.nn.functional.softmax(outputs.logits, dim=1) | |
id2label = {0: "REJECTED", 1: "ACCEPTED"} | |
predicted_class_id = probabilities.argmax().item() | |
pred_label = id2label[predicted_class_id] | |
st.subheader(":green[Prediction Result]") | |
if pred_label == "ACCEPTED": | |
st.success(f"The patent is likely to be **ACCEPTED** with a score of {probabilities[0][1].item():.2f}.") | |
else: | |
st.error(f"The patent is likely to be **REJECTED** with a score of {probabilities[0][0].item():.2f}.") | |
st.write(f"**Decision Summary:** {decision}") |