Spaces:

LampOfSocrates
/

hf-nlp-cw-group27

Sleeping

File size: 6,122 Bytes

import streamlit as st
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForTokenClassification
import pandas as pd

@st.cache_resource()
def load_trained_model():
    
    tokenizer = AutoTokenizer.from_pretrained("LampOfSocrates/bert-cased-plodcw-sourav")
    model = AutoModelForTokenClassification.from_pretrained("LampOfSocrates/bert-cased-plodcw-sourav")
    # Mapping labels
    id2label = model.config.id2label
    # Print the label mapping
    print(f"Can recognise the following labels {id2label}")

    # Load the NER model and tokenizer from Hugging Face
    #ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
    ner_pipeline = pipeline("ner", model=model, tokenizer = tokenizer)
    return ner_pipeline

def load_random_examples(dataset_name, num_examples=5):
    """
    Load random examples from the specified Hugging Face dataset.
    Args:
        dataset_name (str): The name of the dataset to load.
        num_examples (int): The number of random examples to load.
    Returns:
        pd.DataFrame: A DataFrame containing the random examples.
    """
    # Load the dataset
    from datasets import load_dataset
    dataset = load_dataset("surrey-nlp/PLOD-CW")
    
    # Convert the dataset to a pandas DataFrame
    df = pd.DataFrame(dataset['test'])
    
    # Select random examples
    random_examples = df.sample(n=1)

    tokens = random_examples.tokens
    ner_tags = random_examples.ner_tags

    return pd.DataFrame((tokens, ner_tags))


def render_entities(tokens, entities):
    """
    Renders a page with a 2-column table showing the entity corresponding to each token.
    """
    
    # Custom CSS for chilled and cool theme
    st.markdown("""
        <style>
        body {
            font-family: 'Arial', sans-serif;
            background-color: #f0f0f5;
            color: #333333;
        }
        table {
            width: 100%;
            border-collapse: collapse;
        }
        th, td {
            padding: 12px;
            text-align: left;
            border-bottom: 1px solid #dddddd;
        }
        th {
            background-color: #4CAF50;
            color: white;
            width: 16.66%;
        }
        tr:hover {
            background-color: #f5f5f5;
        }
        td {
            width: 16.66%;
        }
        </style>
        """, unsafe_allow_html=True)

    # Title and description
    st.title("Model predicted Token vs Entities Table")
    st.write("This table shows the entity corresponding to each token in a cool and chilled theme.")

    # Create the table
    table_data = {"Token": tokens, "Entity": entities}
    st.table(table_data)

def render_random_examples():
    """
    Render random examples from the PLOD-CW dataset in a Streamlit table.
    """
    # Load random examples
    
    # Custom CSS for chilled and cool theme
    st.markdown("""
        <style>
        body {
            font-family: 'Arial', sans-serif;
            background-color: #f0f0f5;
            color: #333333;
        }
        table {
            width: 100%;
            border-collapse: collapse;
        }
        th, td {
            padding: 12px;
            text-align: left;
            border-bottom: 1px solid #dddddd;
        }
        th {
            background-color: #4CAF50;
            color: white;
            width: 16.66%;
        }
        tr:hover {
            background-color: #f5f5f5;
        }
        td {
            width: 16.66%;
        }
        </style>
        """, unsafe_allow_html=True)

    # Title and description
    st.title("Random Examples from PLOD-CW")
    st.write("This table shows 1 random examples from the PLOD-CW dataset in a cool and chilled theme.")

    # Add a button to select a different set of random samples
    if st.button('Show another set of random examples'):
        st.session_state['random_examples'] = load_random_examples("surrey-nlp/PLOD-CW")

    # Load random examples if not already loaded
    if 'random_examples' not in st.session_state:
        st.session_state['random_examples'] = load_random_examples("surrey-nlp/PLOD-CW")

    # Display the table
    st.table(st.session_state['random_examples'])

def prep_page():
    model = load_trained_model()

    # Streamlit app
    # Page configuration
    #st.set_page_config(page_title="NER Token Entities", layout="centered")

    st.title("Named Entity Recognition with BERT on PLOD-CW")
    st.write("Enter a sentence to see the named entities recognized by the model.")

    # Text input
    text = st.text_area("Enter your sentence here:")

    # Perform NER and display results
    if text:
        st.write("Entities recognized:")
        entities = model(text)
    
        # Create a dictionary to map entity labels to colors
        label_colors = {
            'B-LF': 'lightblue',
            'B-O': 'lightgreen',
            'B-AC': 'lightcoral',
            'I-LF': 'lightyellow'
        }
    
        # Prepare the HTML output with styled entities
        def get_entity_html(text, entities):
            html = ""
            last_idx = 0
            for entity in entities:
                start = entity['start']
                end = entity['end']
                label = entity['entity']
                entity_text = text[start:end]
                color = label_colors.get(label, 'lightgray')
            
                # Append the text before the entity
                html += text[last_idx:start]
                # Append the entity with styling
                html += f'<mark style="background-color: {color}; border-radius: 3px;">{entity_text}</mark>'
                last_idx = end
        
            # Append any remaining text after the last entity
            html += text[last_idx:]
            return html
    
        # Generate and display the styled HTML
        styled_text = get_entity_html(text, entities)
        
        st.markdown(styled_text, unsafe_allow_html=True)

        render_entities(text, entities)

    render_random_examples()



if __name__ == '__main__':
    
    prep_page()