patentability / quick_ref.txt
DrishtiSharma's picture
Update quick_ref.txt
76beeac verified
raw
history blame
2.78 kB
https://huggingface.co./spaces/juliaannjose/hupd_patent_classifier/
import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset
import pandas as pd
# finetuned model
language_model_path = "juliaannjose/finetuned_model"
# load the dataset to
# use the patent number, abstract and claim columns for UI
with st.spinner("Loading..."):
dataset_dict = load_dataset(
"HUPD/hupd",
name="sample",
data_files="https://huggingface.co./datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
icpr_label=None,
train_filing_start_date="2016-01-01",
train_filing_end_date="2016-01-21",
val_filing_start_date="2016-01-22",
val_filing_end_date="2016-01-31",
)
df_train = pd.DataFrame(dataset_dict["train"])
df_val = pd.DataFrame(dataset_dict["validation"])
df = pd.concat([df_train, df_val], ignore_index=True)
# drop down menu with patent numbers
_patent_id = st.selectbox(
"Select the Patent Number",
options=df["patent_number"],
)
# display abstract and claim
def get_abs_claim(_pid):
# get abstract and claim corresponding to this patent id
_abs = df.loc[df["patent_number"] == _pid]["abstract"]
_cl = df.loc[df["patent_number"] == _pid]["claims"]
return _abs.values[0], _cl.values[0]
_abstract, _claim = get_abs_claim(_patent_id)
st.title("Abstract:") # display abstract
st.write(_abstract)
st.title("Claim:") # display claims
st.write(_claim)
# model and tokenizer initialization
@st.cache_resource
def load_model(language_model_path):
tokenizer = AutoTokenizer.from_pretrained(language_model_path)
model = AutoModelForSequenceClassification.from_pretrained(language_model_path)
return tokenizer, model
tokenizer, model = load_model(language_model_path)
# input to our model
input_text = _abstract + _claim
# get tokens
inputs = tokenizer(
input_text,
truncation=True,
padding=True,
return_tensors="pt",
)
# get predictions
id2label = {0: "REJECTED", 1: "ACCEPTED"}
# when submit button clicked, run the model and get result
if st.button("Submit"):
with torch.no_grad():
outputs = model(**inputs)
probability = torch.nn.functional.softmax(outputs.logits, dim=1)
predicted_class_id = probability.argmax().item()
pred_label = id2label[predicted_class_id]
st.title("Predicted Patentability")
if probability[0][0] > probability[0][1]:
st.write("Rejection Score:")
st.write(probability[0][0].item())
else:
st.write("Acceptance Score:")
st.write(probability[0][1].item())
st.write("Result:", pred_label)
-----------------------------------------------------------