File size: 2,921 Bytes
986681b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from datasets import load_dataset
import pandas as pd

# Model selection
model_name = "juliaannjose/finetuned_model"

@st.cache_resource

def load_model(model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
    return tokenizer, model, classifier

tokenizer, model, classifier = load_model(model_name)

# Load dataset with training and validation data 
with st.spinner("Loading patent dataset..."):
    dataset_dict = load_dataset(
        "HUPD/hupd",
        name="sample",
        data_files="https://huggingface.co./datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
        icpr_label=None,
        train_filing_start_date="2016-01-01",
        train_filing_end_date="2016-01-31",
        val_filing_start_date="2017-01-22",
        val_filing_end_date="2017-01-31",
    )
    df_train = pd.DataFrame(dataset_dict["train"])
    df_val = pd.DataFrame(dataset_dict["validation"])
    df = pd.concat([df_train, df_val], ignore_index=True)

# Structure the DataFrame
df = df[["patent_number", "decision", "abstract", "claims", "filing_date"]]
PAN = df["patent_number"].drop_duplicates()

# Streamlit UI
st.title("Patentability Predictor")

with st.form("patent-form"):
    make_choice = st.selectbox("Select the Patent Application Number:", PAN)
    submitted = st.form_submit_button(label="Submit")

    if submitted:
        abstract = df["abstract"].loc[df["patent_number"] == make_choice].values[0]
        claims = df["claims"].loc[df["patent_number"] == make_choice].values[0]
        decision = df["decision"].loc[df["patent_number"] == make_choice].values[0]

        st.subheader(":blue[Patent Abstract]")
        st.info(abstract)
        st.subheader(":blue[Patent Claims]")
        st.info(claims)

        # Combine abstract and claims for a comprehensive prediction
        input_text = abstract + " " + claims
        inputs = tokenizer(input_text, truncation=True, padding=True, return_tensors="pt")

        with torch.no_grad():
            outputs = model(**inputs)
            probabilities = torch.nn.functional.softmax(outputs.logits, dim=1)

        id2label = {0: "REJECTED", 1: "ACCEPTED"}
        predicted_class_id = probabilities.argmax().item()
        pred_label = id2label[predicted_class_id]

        st.subheader(":green[Prediction Result]")
        if pred_label == "ACCEPTED":
            st.success(f"The patent is likely to be **ACCEPTED** with a score of {probabilities[0][1].item():.2f}.")
        else:
            st.error(f"The patent is likely to be **REJECTED** with a score of {probabilities[0][0].item():.2f}.")

        st.write(f"**Decision Summary:** {decision}")