File size: 4,062 Bytes
0648f19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import streamlit as st
import pandas as pd

import plotly.graph_objects as go
import plotly.express as px

from ast import literal_eval
from lxml import etree as ET


def prepare_data():
    data = pd.read_csv(
        "data/2002829_mapped_roles.csv", index_col=0, 
        converters={"frame": literal_eval, "changed_roles": literal_eval, "unchanged_roles": literal_eval, "roles": literal_eval}
    )
    
    frame_to_scenario, frame_to_super_scenario = load_kicktionary_info()

    # extract information from "frame" tuples, filter & reorder columns
    data_ = (
        data
            .assign(sentence_idx=data["frame"].apply(lambda frame: frame[0]))
            .assign(frame_idx=data["frame"].apply(lambda frame: frame[1]))
            .assign(frame_name=data["frame"].apply(lambda frame: frame[2]))
            .assign(frame_scenario=data["frame"].apply(lambda frame: frame_to_scenario[frame[2]]))
            .assign(frame_super_scenario=data["frame"].apply(lambda frame: frame_to_super_scenario[frame[2]]))
            .assign(frame_target=data["frame"].apply(lambda frame: frame[3]))
            .drop(columns=["frame"])
    )[["sentence_idx", "frame_idx", "frame_name", "frame_scenario", "frame_super_scenario", "frame_target", "changed_roles", "roles"]]

    # assign value in 0 < t < 1 to represent each frame instance's "time" point in the article
    max_sent = max(data_["sentence_idx"])
    max_frame_per_sent = data_.groupby("sentence_idx").agg({"frame_idx": max}).reset_index()
    sent_to_max_frame = dict(zip(max_frame_per_sent["sentence_idx"], max_frame_per_sent["frame_idx"]))
    data_with_time = data_.assign(
        time_point= (data_
            .apply(lambda row: (row["sentence_idx"] + row["frame_idx"] / (sent_to_max_frame[row["sentence_idx"]])) / (max_sent + 1), axis=1)
        )
    )
    data_with_first_roles = data_with_time.assign(
        first_role = data_with_time["changed_roles"].apply(lambda roles: roles[0] if len(roles) > 0 else None)
    )
    return data_with_first_roles


def load_kicktionary_info():

    kicktionary = ET.parse("kicktionary_lu_info.xml")
    frame_to_scenario = {
        lu.attrib["frame"]: lu.attrib["scenario"]
        for lu in kicktionary.xpath(".//LEXICAL-UNIT") if lu.attrib["frame"]
    }
    frame_to_super_scenario = {
        lu.attrib["frame"]: lu.attrib["super-scenario"]
        for lu in kicktionary.xpath(".//LEXICAL-UNIT") if lu.attrib["frame"]
    }

    return frame_to_scenario, frame_to_super_scenario


def explore_timeline():

    with st.container():

        st.title("Football Perspective Chains")
        
        frame_label_map = {
            "frame_name": "frames",
            "frame_scenario": "scenarios (groups of related frames)",
            "frame_super_scenario": "super scenarios (groups of related scenarios)"
        }
        frame_column = st.selectbox(
            label="Display frames as: ", 
            options=("frame_name", "frame_scenario", "frame_super_scenario"),
            format_func=lambda label: frame_label_map[label]
        )

        st.header("Timeline")
        data = prepare_data()
        time_scatter = (
            data
                .dropna(axis=0, subset=["first_role"])
                .plot.scatter(
                    x="first_role", y="time_point", backend="plotly", color=frame_column
                )
        )
        time_scatter.update_traces(marker_size=20)
        time_scatter.update_layout(height=1000)
        st.plotly_chart(time_scatter)

        st.header("Overall focus")
        focus_bar = data.dropna(axis=0, subset=["first_role"])["first_role"].value_counts().plot.bar(y="first_role", backend="plotly")
        st.plotly_chart(focus_bar)

        st.header("Focus by frame")

        for team in ["Man. United", "Rangers"]:
            st.subheader(team)
            frame_bar = data.dropna(axis=0, subset=["first_role"])["frame_scenario"].value_counts().plot.bar(y=frame_column, backend="plotly")
            st.plotly_chart(frame_bar)




if __name__ == "__main__":
    explore_timeline()