khanfou commited on
Commit
bdfb35b
·
unverified ·
1 Parent(s): d7d9bbc

Delete app_1.py

Browse files
Files changed (1) hide show
  1. app_1.py +0 -28
app_1.py DELETED
@@ -1,28 +0,0 @@
1
- import os
2
- import pandas as pd
3
- import streamlit as st
4
- HG_DIR = '/nlp/scr/msuzgun/cache_extra/huggingface'
5
- # Specify HG cache dirs -- currently use only for 2.7b model
6
- os.environ['TRANSFORMERS_CACHE'] = f'{HG_DIR}/transformers'
7
- os.environ['HF_HOME'] = HG_DIR
8
-
9
- ## Import relevant libraries and dependencies
10
- #pip install datasets
11
- # Pretty print
12
- from pprint import pprint
13
- # Datasets load_dataset function
14
- from datasets import load_dataset
15
- # Transformers Autokenizer
16
- #from transformers import AutoTokenizer
17
- #tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')
18
- # Standard PyTorch DataLoader
19
- from torch.utils.data import DataLoader
20
-
21
-
22
- dataset_dict = load_dataset('HUPD/hupd',name='sample',data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather", cache_dir ='/u/scr/nlp/data/HUPD',icpr_label=None,train_filing_start_date='2016-01-01',train_filing_end_date='2016-01-31',val_filing_start_date='2017-01-01',val_filing_end_date='2017-01-31')
23
-
24
- df = pd.DataFrame.from_dict(dataset_dict["train"])
25
-
26
- # Create a DataFrame object from list
27
- df = pd.DataFrame(df,columns =['patent_number','decision', 'abstract', 'claims','filing_date'])
28
- st.dataframe(df)