import streamlit as st import torch from transformers import AutoTokenizer, AutoModelForSequenceClassification from datasets import load_dataset import pandas as pd # Model selection: Use a fine-tuned model for patent classification model_name = "juliaannjose/finetuned_model" @st.cache_resource def load_model(model_name): tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name) return tokenizer, model tokenizer, model = load_model(model_name) # Load dataset with training and validation data for more comprehensive analysis with st.spinner("Loading patent dataset..."): dataset_dict = load_dataset( "HUPD/hupd", name="sample", data_files="https://huggingface.co./datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather", icpr_label=None, train_filing_start_date="2016-01-01", train_filing_end_date="2016-01-31", val_filing_start_date="2017-01-22", val_filing_end_date="2017-01-31", ) df_train = pd.DataFrame(dataset_dict["train"]) df_val = pd.DataFrame(dataset_dict["validation"]) df = pd.concat([df_train, df_val], ignore_index=True) # Clean and structure the DataFrame df = df[["patent_number", "decision", "abstract", "claims", "filing_date"]] PAN = df["patent_number"].drop_duplicates() # Streamlit UI st.title("Harvard USPTO Patentability Predictor") with st.form("patent-form"): make_choice = st.selectbox("Select the Patent Application Number:", PAN) submitted = st.form_submit_button(label="Submit") if submitted: abstract = df["abstract"].loc[df["patent_number"] == make_choice].values[0] claims = df["claims"].loc[df["patent_number"] == make_choice].values[0] decision = df["decision"].loc[df["patent_number"] == make_choice].values[0] st.subheader(":blue[Patent Abstract]") st.info(abstract) st.subheader(":blue[Patent Claims]") st.info(claims) # Combine abstract and claims for a comprehensive prediction input_text = abstract + " " + claims inputs = tokenizer(input_text, truncation=True, padding=True, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) probabilities = torch.nn.functional.softmax(outputs.logits, dim=1) id2label = {0: "REJECTED", 1: "ACCEPTED"} predicted_class_id = probabilities.argmax().item() pred_label = id2label[predicted_class_id] st.subheader(":green[Prediction Result]") if pred_label == "ACCEPTED": st.success(f"The patent is likely to be **ACCEPTED** with a score of {probabilities[0][1].item():.2f}.") else: st.error(f"The patent is likely to be **REJECTED** with a score of {probabilities[0][0].item():.2f}.") st.write(f"**Decision Summary:** {decision}")