Spaces:
Sleeping
Sleeping
https://huggingface.co./spaces/juliaannjose/hupd_patent_classifier/ | |
import streamlit as st | |
import torch | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
from datasets import load_dataset | |
import pandas as pd | |
# finetuned model | |
language_model_path = "juliaannjose/finetuned_model" | |
# load the dataset to | |
# use the patent number, abstract and claim columns for UI | |
with st.spinner("Loading..."): | |
dataset_dict = load_dataset( | |
"HUPD/hupd", | |
name="sample", | |
data_files="https://huggingface.co./datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather", | |
icpr_label=None, | |
train_filing_start_date="2016-01-01", | |
train_filing_end_date="2016-01-21", | |
val_filing_start_date="2016-01-22", | |
val_filing_end_date="2016-01-31", | |
) | |
df_train = pd.DataFrame(dataset_dict["train"]) | |
df_val = pd.DataFrame(dataset_dict["validation"]) | |
df = pd.concat([df_train, df_val], ignore_index=True) | |
# drop down menu with patent numbers | |
_patent_id = st.selectbox( | |
"Select the Patent Number", | |
options=df["patent_number"], | |
) | |
# display abstract and claim | |
def get_abs_claim(_pid): | |
# get abstract and claim corresponding to this patent id | |
_abs = df.loc[df["patent_number"] == _pid]["abstract"] | |
_cl = df.loc[df["patent_number"] == _pid]["claims"] | |
return _abs.values[0], _cl.values[0] | |
_abstract, _claim = get_abs_claim(_patent_id) | |
st.title("Abstract:") # display abstract | |
st.write(_abstract) | |
st.title("Claim:") # display claims | |
st.write(_claim) | |
# model and tokenizer initialization | |
@st.cache_resource | |
def load_model(language_model_path): | |
tokenizer = AutoTokenizer.from_pretrained(language_model_path) | |
model = AutoModelForSequenceClassification.from_pretrained(language_model_path) | |
return tokenizer, model | |
tokenizer, model = load_model(language_model_path) | |
# input to our model | |
input_text = _abstract + _claim | |
# get tokens | |
inputs = tokenizer( | |
input_text, | |
truncation=True, | |
padding=True, | |
return_tensors="pt", | |
) | |
# get predictions | |
id2label = {0: "REJECTED", 1: "ACCEPTED"} | |
# when submit button clicked, run the model and get result | |
if st.button("Submit"): | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
probability = torch.nn.functional.softmax(outputs.logits, dim=1) | |
predicted_class_id = probability.argmax().item() | |
pred_label = id2label[predicted_class_id] | |
st.title("Predicted Patentability") | |
if probability[0][0] > probability[0][1]: | |
st.write("Rejection Score:") | |
st.write(probability[0][0].item()) | |
else: | |
st.write("Acceptance Score:") | |
st.write(probability[0][1].item()) | |
st.write("Result:", pred_label) | |
----------------------------------------------------------- | |