import streamlit as st import json from datasets import load_dataset st.set_page_config(page_title="Large GitHub Issues", layout="wide") st.title("Issues with large text") @st.cache() def load_data(): ds = load_dataset("loubnabnl/large-text-issues", split="train") return ds def print_issue(events): for event in events: st.markdown("""---""") masked_author = f"masked as {event['masked_author']}" if "masked_author" in event else "" st.markdown(f"**Author:** {event['author']} {masked_author}, {event['action']} {event['type']} with title: {event['title']}.\ Text size is: **{event['size']}** and Number of lines is: **{event['nb_lines']}**") st.code(f"{event['text']}", language="none") samples = load_data() col1, _ = st.columns([2, 4]) with col1: index_example = st.number_input(f"Index of the chosen conversation from the existing {len(samples)}", min_value=0, max_value=len(samples)-1, value=0, step=1) st.write(f"Issue size: {samples[index_example]['text_size_no_bots']}\n\n") print_issue(samples[index_example]["events"])