File size: 13,599 Bytes
09cb456
 
c94d489
 
 
09cb456
 
 
9707f2e
 
c94d489
09cb456
c94d489
2ef814a
e6e1696
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fbe06b5
e6e1696
 
 
 
 
 
 
fbe06b5
e6e1696
 
 
 
 
 
 
 
 
 
 
 
 
 
fbe06b5
2ef814a
7fca4bd
e6e1696
 
 
 
 
 
 
 
 
 
 
 
2ef814a
e6e1696
 
 
 
 
 
 
 
2ef814a
c94d489
e6e1696
69c4df6
1bccaa4
69c4df6
 
 
 
 
 
 
1bccaa4
e6e1696
c94d489
 
 
 
 
 
 
 
 
 
 
 
 
e6e1696
c94d489
e6e1696
69c4df6
1bccaa4
69c4df6
 
 
 
 
 
 
1bccaa4
e6e1696
c94d489
 
 
 
 
 
 
 
 
 
 
 
 
e6e1696
c94d489
e6e1696
1bccaa4
69c4df6
 
 
 
 
 
 
1bccaa4
43acaa9
 
 
e6e1696
 
43acaa9
 
 
09cb456
43acaa9
c94d489
43acaa9
 
 
c94d489
 
 
 
 
43acaa9
 
 
 
c94d489
 
 
 
 
 
 
43acaa9
c94d489
e6e1696
69c4df6
1bccaa4
69c4df6
 
 
 
 
 
 
1bccaa4
e6e1696
c94d489
 
e6e1696
c94d489
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
09cb456
e6e1696
09cb456
 
 
 
e6e1696
 
 
 
 
d7215bf
 
 
 
e6e1696
 
d7215bf
e6e1696
d7215bf
e6e1696
d7215bf
 
 
e6e1696
 
d7215bf
 
e6e1696
70a2667
9707f2e
c94d489
 
 
60e8bdb
 
9707f2e
60e8bdb
 
 
2137d07
e6e1696
 
 
 
 
 
 
 
 
 
 
b64b0b7
9707f2e
09cb456
 
 
 
 
 
 
 
 
 
0fac78f
09cb456
 
 
e6e1696
c94d489
e6e1696
 
 
 
c94d489
 
 
e6e1696
 
c94d489
e6e1696
 
 
c94d489
 
 
 
e6e1696
 
 
 
 
c94d489
 
 
d7215bf
c94d489
d7215bf
c94d489
 
 
d7215bf
c94d489
d7215bf
c94d489
 
 
d7215bf
c94d489
d7215bf
c94d489
 
 
d7215bf
c94d489
d7215bf
 
e6e1696
1bccaa4
 
09cb456
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import date, timedelta
import random



# [All the scheduling functions and analytics functions here]



import pandas as pd
import random
from itertools import combinations, product
from datetime import date, timedelta

def generate_schedule_from_data(conference_team_df, available_dates):
    # Extract unique conferences
    conferences = conference_team_df['Conference'].unique()
    # Ensure 'Conference' and 'Team' columns are present
    if 'Conference' not in conference_team_df or 'Team' not in conference_team_df:
        raise ValueError("The CSV file must contain 'Conference' and 'Team' columns.")
    
    # Generate intra-conference matches
    intra_conference_matches = []
    for conf in conferences:
        teams_in_conf = conference_team_df[conference_team_df['Conference'] == conf]['Team'].tolist()
        # Each team plays each other team in their conference twice
        matches = list(combinations(teams_in_conf, 2))
        intra_conference_matches.extend(matches)
        intra_conference_matches.extend([(team2, team1) for team1, team2 in matches])
    
    # Generate inter-conference matches (limit these to 1 per team)
    inter_conference_matches = []
    for team, conference in zip(conference_team_df['Team'], conference_team_df['Conference']):
        other_conferences = [conf for conf in conferences if conf != conference]
        other_teams = conference_team_df[conference_team_df['Conference'].isin(other_conferences)]['Team'].tolist()
        matches = random.sample([(team, other_team) for other_team in other_teams], 1)
        inter_conference_matches.extend(matches)
    
    # Combine the matches
    combined_schedule = intra_conference_matches + inter_conference_matches
    scheduled_matches = assign_dates_to_matches(combined_schedule, available_dates)

        
    # Convert to DataFrame
    schedule_df = pd.DataFrame(scheduled_matches, columns=['Team 1', 'Team 2', 'Date'])
    schedule_df['Conference 1'] = schedule_df['Team 1'].map(conference_team_df.set_index('Team').to_dict()['Conference'])
    schedule_df['Conference 2'] = schedule_df['Team 2'].map(conference_team_df.set_index('Team').to_dict()['Conference'])
    return schedule_df

# To use this function, load your data into a DataFrame and call this function:
# df = pd.read_csv('path/to/your/csv')
# schedule_df = generate_schedule_from_data(df)


# 6. generate_mock_historical_data
def generate_mock_historical_data(schedule_df):
    # Generate random scores for each team in each game
    schedule_df['Score 1'] = [random.randint(50, 100) for _ in range(len(schedule_df))]
    schedule_df['Score 2'] = [random.randint(50, 100) for _ in range(len(schedule_df))]

    # Assume the historical data is from the previous year
    schedule_df['Date'] = schedule_df['Date'] - pd.DateOffset(years=1)

    return schedule_df

# To use this function, pass the generated schedule DataFrame:
# historical_data = generate_mock_historical_data(schedule_df)

# Assign dates to matches
def generate_available_dates(start_date, num_days=300):
    available_dates = [start_date + timedelta(days=i) for i in range(num_days) if (start_date + timedelta(days=i)).weekday() in [0, 2, 3, 5]]
    return available_dates

def assign_dates_to_matches(matches, available_dates):
    num_dates = len(available_dates)
    return [(match[0], match[1], available_dates[i % num_dates]) for i, match in enumerate(matches)]

# Team Workload Analysis
def team_workload_analysis(schedule_df, conference_team_df):
    # Check if the DataFrame is None
    if schedule_df is None:
        plt.figure(figsize=(10, 6))
        plt.text(0.5, 0.5, 'Please generate the schedule first before viewing analytics.', 
                 horizontalalignment='center', verticalalignment='center', 
                 fontsize=14, color='red')
        plt.axis('off')
        plt.tight_layout()
        plt.show()
        return
    
    """Generate a bar chart showing the number of matches each team has per week."""
    schedule_df['Week'] = schedule_df['Date'].dt.isocalendar().week
    team_counts = schedule_df.groupby(['Week', 'Team 1']).size().unstack().fillna(0)
    
    # Plot
    team_counts.plot(kind='bar', stacked=True, figsize=(15, 7), cmap='Oranges')
    plt.title('Team Workload Analysis')
    plt.ylabel('Number of Matches')
    plt.xlabel('Week Number')
    plt.tight_layout()
    plt.legend(title='Teams', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.show()


# Match Distribution
def match_distribution(schedule_df, conference_team_df):
    # Check if the DataFrame is None
    if schedule_df is None:
        plt.figure(figsize=(10, 6))
        plt.text(0.5, 0.5, 'Please generate the schedule first before viewing analytics.', 
                 horizontalalignment='center', verticalalignment='center', 
                 fontsize=14, color='red')
        plt.axis('off')
        plt.tight_layout()
        plt.show()
        return
    
    """Generate a histogram showing match distribution across months."""
    schedule_df['Month'] = schedule_df['Date'].dt.month_name()
    month_order = ['November', 'December', 'January', 'February', 'March']
    
    # Plot
    plt.figure(figsize=(10, 6))
    sns.countplot(data=schedule_df, x='Month', order=month_order, palette='Oranges_r')
    plt.title('Match Distribution Across Months')
    plt.ylabel('Number of Matches')
    plt.xlabel('Month')
    plt.tight_layout()
    plt.show()


# Inter-Conference Match Analysis
def inter_conference_analysis(schedule_df, conference_team_df):
    if schedule_df is None:
        plt.figure(figsize=(10, 6))
        plt.text(0.5, 0.5, 'Please generate the schedule first before viewing analytics.', 
                 horizontalalignment='center', verticalalignment='center', 
                 fontsize=14, color='red')
        plt.axis('off')
        plt.tight_layout()
        plt.show()
        return

    # Mapping teams to their conferences from the conference_team_df
    team_to_conference = conference_team_df.set_index('Team')['Conference'].to_dict()
    schedule_df['Conference 1'] = schedule_df['Team 1'].map(team_to_conference)
    schedule_df['Conference 2'] = schedule_df['Team 2'].map(team_to_conference)

    # Filtering out the intra-conference matches
    inter_conference_df = schedule_df[schedule_df['Conference 1'] != schedule_df['Conference 2']]
    
    # Creating a crosstab for the heatmap
    heatmap_data = pd.crosstab(inter_conference_df['Conference 1'], inter_conference_df['Conference 2'])

    # Ensuring every conference combination has a value
    all_conferences = set(conference_team_df['Conference'])
    for conf in all_conferences:
        if conf not in heatmap_data.columns:
            heatmap_data[conf] = 0
        if conf not in heatmap_data.index:
            heatmap_data.loc[conf] = 0

    heatmap_data = heatmap_data.loc[sorted(all_conferences), sorted(all_conferences)]

    # Plotting the heatmap
    plt.figure(figsize=(8, 6))
    sns.heatmap(heatmap_data, annot=True, cmap='Oranges', linewidths=.5, cbar_kws={'label': 'Number of Matches'})
    plt.title('Inter-Conference Match Analysis')
    plt.ylabel('Conference 1')
    plt.xlabel('Conference 2')
    plt.show()


# Commissioner Analytics
def commissioner_analytics(schedule_df, conference_team_df, commissioners):
    # Check if the DataFrame is None
    if schedule_df is None:
        plt.figure(figsize=(10, 6))
        plt.text(0.5, 0.5, 'Please generate the schedule first before viewing analytics.', 
                 horizontalalignment='center', verticalalignment='center', 
                 fontsize=14, color='red')
        plt.axis('off')
        plt.tight_layout()
        plt.show()
        return
    
    """Generate a bar chart showing matches overseen by each commissioner."""
    # Assuming each commissioner oversees a specific conference
    comm_dict = {conf: comm for conf, comm in zip(conference_team_df['Conference'].unique(), commissioners)}
    schedule_df['Commissioner'] = schedule_df['Conference 1'].map(comm_dict)
    
    # Count matches overseen by each commissioner
    commissioner_counts = schedule_df['Commissioner'].value_counts()
    
    # Plot using matplotlib
    plt.figure(figsize=(10, 6))
    plt.bar(commissioner_counts.index, commissioner_counts.values, color='orange')
    plt.title('Matches Overseen by Each Commissioner')
    plt.ylabel('Number of Matches')
    plt.xlabel('Commissioner')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()




# Streamlit App

st.title("Basketball Game Schedule Generator")

st.set_option('deprecation.showPyplotGlobalUse', False)

# UI for CSV File Uploader
uploaded_file = st.file_uploader("Choose a CSV file", type=['csv'])

start_date = date(2022, 11, 6)
available_dates = generate_available_dates(start_date)


# Load the Uploaded CSV File
if uploaded_file is not None:
    st.session_state.df = pd.read_csv(uploaded_file)
    st.write('Uploaded CSV file:')
    st.write(st.session_state.df)

    # Generate Schedule using Uploaded Data
    if st.button("Generate Schedule"):
        st.session_state.schedule_df = generate_schedule_from_data(st.session_state.df, available_dates)
        st.write('Generated Schedule:')
        st.write(st.session_state.schedule_df)
else:
    st.warning("Please upload a CSV file to proceed.")


# Initialize session state for schedule_df and st.session_state.historical_data
if 'schedule_df' not in st.session_state:
    st.session_state.schedule_df = None

if 'st.session_state.historical_data' not in st.session_state:
    st.session_state.historical_data = None

#if st.session_state.historical_data is None:
#    st.session_state.historical_data = generate_mock_historical_data(st.session_state.schedule_df)
#    st.session_state.historical_data['Date'] = pd.to_datetime(st.session_state.historical_data['Date'])

if st.button("Generate Mock Historical Data"):
    # Only generate historical data if it hasn’t been generated already
    if st.session_state.historical_data is None:
        # Ensure that the schedule has been generated before generating historical data
        if st.session_state.schedule_df is not None:
            # Generate the mock historical data based on the generated schedule
            st.session_state.historical_data = generate_mock_historical_data(st.session_state.schedule_df)
            st.write('Generated Mock Historical Data:')
            st.write(st.session_state.historical_data)
        else:
            st.warning("Please generate the schedule first before generating mock historical data.")

    
# Configuration UI
st.header("Configuration")


commissioners = st.multiselect("Add commissioners:", options=[], default=[])

add_commissioner = st.text_input("New commissioner name:")
if add_commissioner:
    commissioners.append(add_commissioner)



# Schedule Viewing
st.header("View Schedule")

if st.session_state.schedule_df is not None:
    # Fetching the unique conferences from the schedule DataFrame
    conferences = st.session_state.schedule_df['Conference 1'].unique()
    conference_selector = st.selectbox("Select conference to view schedule:", options=["All"] + list(conferences))

    if conference_selector == "All":
        st.dataframe(st.session_state.schedule_df)
    else:
        # Filtering the schedule based on the selected conference
        filtered_schedule = st.session_state.schedule_df[(st.session_state.schedule_df["Conference 1"] == conference_selector) | (st.session_state.schedule_df["Conference 2"] == conference_selector)]
        st.dataframe(filtered_schedule)
else:
    st.warning("Schedule has not been generated yet.")


# Analytics & Comparisons
st.header("Analytics & Comparisons")
analytics_option = st.selectbox("Choose an analysis type:", ["Team Workload Analysis", "Match Distribution", "Inter-Conference Match Analysis", "Commissioner Analytics"])
if st.session_state.historical_data is not None:
    st.session_state.historical_data['Date'] = pd.to_datetime(st.session_state.historical_data['Date'])
else:
    st.error("Historical data has not been generated yet.")


if analytics_option == "Team Workload Analysis":
    st.subheader("Historical Data")
    st.pyplot(team_workload_analysis(st.session_state.historical_data, st.session_state.df))
    st.subheader("Current Data")
    st.pyplot(team_workload_analysis(st.session_state.schedule_df, st.session_state.df))

elif analytics_option == "Match Distribution":
    st.subheader("Historical Data")
    st.pyplot(match_distribution(st.session_state.historical_data, st.session_state.df))
    st.subheader("Current Data")
    st.pyplot(match_distribution(st.session_state.schedule_df, st.session_state.df))

elif analytics_option == "Inter-Conference Match Analysis":
    st.subheader("Historical Data")
    st.pyplot(inter_conference_analysis(st.session_state.historical_data, st.session_state.df))
    st.subheader("Current Data")
    st.pyplot(inter_conference_analysis(st.session_state.schedule_df, st.session_state.df))

elif analytics_option == "Commissioner Analytics":
    st.subheader("Historical Data")
    st.pyplot(commissioner_analytics(st.session_state.historical_data, st.session_state.df, commissioners))
    st.subheader("Current Data")
    st.pyplot(commissioner_analytics(st.session_state.schedule_df, st.session_state.df, commissioners))


else:
    st.warning("Please generate the schedule first before viewing analytics.")

# Export functionality can be added later