Spaces:
Sleeping
Sleeping
File size: 6,122 Bytes
14fa848 30d04c6 197e844 14fa848 30d04c6 b5dd5bc 30d04c6 b5dd5bc 30d04c6 b5dd5bc 30d04c6 197e844 100e8bf 197e844 c89a5c0 197e844 c89a5c0 100e8bf 7edd56d 100e8bf c89a5c0 100e8bf c89a5c0 100e8bf c89a5c0 100e8bf 197e844 c89a5c0 197e844 c89a5c0 197e844 c89a5c0 197e844 30d04c6 7edd56d c89a5c0 7edd56d 30d04c6 b5dd5bc 30d04c6 100e8bf 30d04c6 c89a5c0 197e844 30d04c6 b2cc9b7 30d04c6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 |
import streamlit as st
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForTokenClassification
import pandas as pd
@st.cache_resource()
def load_trained_model():
tokenizer = AutoTokenizer.from_pretrained("LampOfSocrates/bert-cased-plodcw-sourav")
model = AutoModelForTokenClassification.from_pretrained("LampOfSocrates/bert-cased-plodcw-sourav")
# Mapping labels
id2label = model.config.id2label
# Print the label mapping
print(f"Can recognise the following labels {id2label}")
# Load the NER model and tokenizer from Hugging Face
#ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
ner_pipeline = pipeline("ner", model=model, tokenizer = tokenizer)
return ner_pipeline
def load_random_examples(dataset_name, num_examples=5):
"""
Load random examples from the specified Hugging Face dataset.
Args:
dataset_name (str): The name of the dataset to load.
num_examples (int): The number of random examples to load.
Returns:
pd.DataFrame: A DataFrame containing the random examples.
"""
# Load the dataset
from datasets import load_dataset
dataset = load_dataset("surrey-nlp/PLOD-CW")
# Convert the dataset to a pandas DataFrame
df = pd.DataFrame(dataset['test'])
# Select random examples
random_examples = df.sample(n=1)
tokens = random_examples.tokens
ner_tags = random_examples.ner_tags
return pd.DataFrame((tokens, ner_tags))
def render_entities(tokens, entities):
"""
Renders a page with a 2-column table showing the entity corresponding to each token.
"""
# Custom CSS for chilled and cool theme
st.markdown("""
<style>
body {
font-family: 'Arial', sans-serif;
background-color: #f0f0f5;
color: #333333;
}
table {
width: 100%;
border-collapse: collapse;
}
th, td {
padding: 12px;
text-align: left;
border-bottom: 1px solid #dddddd;
}
th {
background-color: #4CAF50;
color: white;
width: 16.66%;
}
tr:hover {
background-color: #f5f5f5;
}
td {
width: 16.66%;
}
</style>
""", unsafe_allow_html=True)
# Title and description
st.title("Model predicted Token vs Entities Table")
st.write("This table shows the entity corresponding to each token in a cool and chilled theme.")
# Create the table
table_data = {"Token": tokens, "Entity": entities}
st.table(table_data)
def render_random_examples():
"""
Render random examples from the PLOD-CW dataset in a Streamlit table.
"""
# Load random examples
# Custom CSS for chilled and cool theme
st.markdown("""
<style>
body {
font-family: 'Arial', sans-serif;
background-color: #f0f0f5;
color: #333333;
}
table {
width: 100%;
border-collapse: collapse;
}
th, td {
padding: 12px;
text-align: left;
border-bottom: 1px solid #dddddd;
}
th {
background-color: #4CAF50;
color: white;
width: 16.66%;
}
tr:hover {
background-color: #f5f5f5;
}
td {
width: 16.66%;
}
</style>
""", unsafe_allow_html=True)
# Title and description
st.title("Random Examples from PLOD-CW")
st.write("This table shows 1 random examples from the PLOD-CW dataset in a cool and chilled theme.")
# Add a button to select a different set of random samples
if st.button('Show another set of random examples'):
st.session_state['random_examples'] = load_random_examples("surrey-nlp/PLOD-CW")
# Load random examples if not already loaded
if 'random_examples' not in st.session_state:
st.session_state['random_examples'] = load_random_examples("surrey-nlp/PLOD-CW")
# Display the table
st.table(st.session_state['random_examples'])
def prep_page():
model = load_trained_model()
# Streamlit app
# Page configuration
#st.set_page_config(page_title="NER Token Entities", layout="centered")
st.title("Named Entity Recognition with BERT on PLOD-CW")
st.write("Enter a sentence to see the named entities recognized by the model.")
# Text input
text = st.text_area("Enter your sentence here:")
# Perform NER and display results
if text:
st.write("Entities recognized:")
entities = model(text)
# Create a dictionary to map entity labels to colors
label_colors = {
'B-LF': 'lightblue',
'B-O': 'lightgreen',
'B-AC': 'lightcoral',
'I-LF': 'lightyellow'
}
# Prepare the HTML output with styled entities
def get_entity_html(text, entities):
html = ""
last_idx = 0
for entity in entities:
start = entity['start']
end = entity['end']
label = entity['entity']
entity_text = text[start:end]
color = label_colors.get(label, 'lightgray')
# Append the text before the entity
html += text[last_idx:start]
# Append the entity with styling
html += f'<mark style="background-color: {color}; border-radius: 3px;">{entity_text}</mark>'
last_idx = end
# Append any remaining text after the last entity
html += text[last_idx:]
return html
# Generate and display the styled HTML
styled_text = get_entity_html(text, entities)
st.markdown(styled_text, unsafe_allow_html=True)
render_entities(text, entities)
render_random_examples()
if __name__ == '__main__':
prep_page() |