import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import matplotlib.dates as mdates import plotly.express as px import re from datetime import datetime, timedelta import warnings # Load the data df = pd.read_csv(r"fy21-24.csv", dtype={"FyWeek": "string", "Fy": "category", "Chaincode": "category", "Store": "category", "Address": "string", "Zipcode": "float", "City": "category", "State": "category", "Containercode": "category", "Itemtype": "category", "SalesVolume":"float", "UnitPrice":"float", "Sales":"float"}) # Convert columns df["Zipcode"] = df["Zipcode"].convert_dtypes() df["SalesVolume"] = df["SalesVolume"].convert_dtypes() # Title for the app st.title('Sales Data Dashboard') # Initialize session state for storing which card was clicked and item type if 'active_card' not in st.session_state: st.session_state['active_card'] = None if 'selected_item_type' not in st.session_state: st.session_state['selected_item_type'] = 'CORE' # Set default to 'CORE' # Initialize session state for storing the selected state and feature if 'selected_state' not in st.session_state: st.session_state['selected_state'] = df['State'].unique()[0] # Default to the first state if 'selected_feature' not in st.session_state: st.session_state['selected_feature'] = 'Chaincode' # Default to 'Chain Code' # Two columns for the card buttons col1, col2, col3, col4 = st.columns(4) # Define buttons for plot categories, update session state when clicked with col1: if st.button("Sales Volume Trend for Item Category"): st.session_state['active_card'] = 'card1' with col2: if st.button("Sales Volume & Unit Price Correlation for Item Category and Container Code"): st.session_state['active_card'] = 'card2' with col3: if st.button("Price vs Sales Trend by Year"): st.session_state['active_card'] = 'card3' with col4: if st.button("Total Sales Volume by Price Band"): st.session_state['active_card'] = 'card4' ########################################### CARD #1 #################################################### if st.session_state['active_card'] == 'card1': # Create short fiscal week display df['Fiscal Year'] = df['FyWeek'].apply(lambda x: int(x.split(' ')[1])) df['Week Number'] = df['FyWeek'].apply(lambda x: int(x.split('Week ')[1])) df = df.sort_values(by=['Fiscal Year', 'Week Number']) # Reformat 'Fiscal Week' for display (e.g., 'FY21W51') df['Fiscal Week Short'] = df.apply(lambda x: f"FY{x['Fiscal Year']%100}W{x['Week Number']}", axis=1) # Ensure the short fiscal week column is treated as a categorical variable and sorted by the order of appearance df['Fiscal Week Short'] = pd.Categorical(df['Fiscal Week Short'], categories=df['Fiscal Week Short'].unique(), ordered=True) # Dropdown for selecting the state (using session_state) st.session_state['selected_state'] = st.selectbox('Select State', df['State'].unique(), index=list(df['State'].unique()).index(st.session_state['selected_state'])) # Dropdown for selecting the feature for grouping (using session_state) st.session_state['selected_feature'] = st.selectbox('Select Feature for Grouping', ['Chaincode', 'Itemtype', 'FyWeek'], index=['Chaincode', 'Itemtype', 'FyWeek'].index(st.session_state['selected_feature'])) # Filter the dataframe based on selected state filtered_df = df[df['State'] == st.session_state['selected_state']] # Plot based on user's selected feature if st.session_state['selected_feature'] == 'Itemtype': st.subheader(f'Sales Data for {st.session_state["selected_state"]} - Grouped by Item Type') group_data = filtered_df.groupby(['FyWeek', 'Itemtype'])['SalesVolume'].sum().reset_index() fig = px.bar(group_data, x='FyWeek', y='SalesVolume', color='Itemtype', title=f'Sales Volume over Fiscal Week in {st.session_state["selected_state"]} by Item Type', labels={'SalesVolume': 'Sales Volume'}) elif st.session_state['selected_feature'] == 'Chaincode': st.subheader(f'Sales Data for {st.session_state["selected_state"]} - Grouped by Chain Code') group_data = filtered_df.groupby(['FyWeek', 'Chaincode'])['SalesVolume'].sum().reset_index() fig = px.bar(group_data, x='FyWeek', y='SalesVolume', color='Chaincode', title=f'Sales Volume over Fiscal Week in {st.session_state["selected_state"]} by Chain Code', labels={'SalesVolume': 'Sales Volume'}) elif st.session_state['selected_feature'] == 'FyWeek': st.subheader(f'Sales Data for {st.session_state["selected_state"]} - Grouped by Fiscal Week') group_data = filtered_df.groupby(['FyWeek'])['SalesVolume'].sum().reset_index() fig = px.bar(group_data, x='FyWeek', y='SalesVolume', title=f'Sales Volume over Fiscal Week in {st.session_state["selected_state"]}', labels={'SalesVolume': 'Sales Volume'}) # Display the interactive plot st.plotly_chart(fig) ########################################################################################################## ########################################### CARD #2 #################################################### # Card 2: Sales Volume & Unit Price Correlation plot for Item Category and Container Code if st.session_state['active_card'] == 'card2': # Dropdown to select item type (using session_state) st.session_state['selected_item_type'] = st.selectbox('Select Item Type', df['Itemtype'].unique(), index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type'])) st.subheader("Sales Volume & Unit Price Correlation for Container Codes") # Group the dataframe and prepare for plotting df = df.groupby(['FyWeek', 'Fy', 'Chaincode', 'Store', 'Address', 'Zipcode', 'City', 'State', 'Containercode', 'Itemtype'], observed=True).agg({ 'SalesVolume': 'sum', 'UnitPrice': 'mean', 'Sales': 'sum' }).reset_index() # Function to extract date from fiscal week def dt_from_fy_week(fyweek): fy, w = re.findall(r'\d+', fyweek) week1_start = datetime.strptime("{}-08-01".format(int(fy) - 1), "%Y-%m-%d") return (week1_start + timedelta(weeks=int(w) - 1)).date() # Add columns for date and promo to data df['Dt'] = df['FyWeek'].apply(dt_from_fy_week) df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce') df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3', '4', '5', '6']), 'Promo', 'NoPromo') df["Promo"] = df["Promo"].astype("category") # Split FyWeek into fiscal year and week number df['Week'] = df['FyWeek'].str.split().str[-1].astype(int) df['Year'] = df['FyWeek'].str.split().str[1].astype(int) # Filter the dataframe based on the selected item type filtered_df = df[df['Itemtype'] == st.session_state['selected_item_type']] # Find the top 3 container codes based on total SalesVolume top_3_containers = filtered_df.groupby('Containercode', observed=True)['SalesVolume'].sum().nlargest(3).index # Filter the data for only the top 3 container codes top_container_data = filtered_df[filtered_df['Containercode'].isin(top_3_containers)] # Group by Year, Week, Dt, and Containercode and aggregate SalesVolume and UnitPrice agg_df = top_container_data.groupby(['Containercode', 'Year', 'Week', 'Dt'], observed=True).agg({ 'SalesVolume': 'sum', 'UnitPrice': 'mean' }).reset_index() # Loop through the top 3 container codes and create separate plots for container in top_3_containers: container_data = agg_df[agg_df['Containercode'] == container] # Create a new figure for each container code fig, (axd, axp) = plt.subplots(2, 1, figsize=(10, 6)) # Plot SalesVolume sns.lineplot(data=container_data, x='Dt', y='SalesVolume', ax=axd) axd.set_title(f"SalesVolume - {container}") axd.grid(True, linestyle='--', color='gray', alpha=0.7) # Plot mean line for SalesVolume axd.axhline(container_data['SalesVolume'].mean(), ls="--", color="r") axd.xaxis.set_major_locator(mdates.MonthLocator(interval=2)) axd.set_xticklabels([]) # Plot UnitPrice sns.lineplot(data=container_data, x='Dt', y='UnitPrice', ax=axp, color='green', errorbar='sd') axp.set_title(f"UnitPrice - {container}") axp.grid(True, linestyle='--', color='gray', alpha=0.7) # Plot mean line for UnitPrice axp.axhline(container_data['UnitPrice'].mean(), ls="--", color="r") axp.xaxis.set_major_locator(mdates.MonthLocator(interval=2)) axp.tick_params(axis='x', rotation=90) # Adjust layout for each figure plt.tight_layout() # Display the plot in Streamlit st.pyplot(fig) ############################################################################################### ########################################### CARD #3 #################################################### # Check which card was selected using session state if st.session_state['active_card'] == 'card3': # Dropdown for selecting the Item Type st.session_state['selected_item_type'] = st.selectbox('Select Item Type', df['Itemtype'].unique(), index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type'])) df = df.groupby(['FyWeek','Fy','Chaincode','Store','Address','Zipcode','City','State','Containercode','Itemtype'],observed=True).agg({ 'SalesVolume': 'sum', 'UnitPrice':'mean', 'Sales': 'sum'}).reset_index() # add promo and date columns to data def dt_from_fy_week(fyweek): fy, w = re.findall(r'\d+', fyweek) week1_start = datetime.strptime("{}-08-01".format(int(fy)-1), "%Y-%m-%d") return (week1_start + timedelta(weeks=int(w)-1)).date() df['Dt'] = df['FyWeek'].apply(dt_from_fy_week) df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce') df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3','4','5','6']) , 'Promo', 'NoPromo') df["Promo"] = df["Promo"].astype("category") # Split FyWeek into fiscal year and week number df['Week'] = df['FyWeek'].str.split().str[-1].astype(int) # df_21['Year'] = df_21['Fy'].str.extract(r'(\d+)').astype(int) df['Year'] = df['FyWeek'].str.split().str[1].astype(int) # Define the fiscal years years = ["FY 2021", "FY 2022", "FY 2023", "FY 2024"] # Set up a 2x2 grid of subplots for the four years fig, axs = plt.subplots(2, 2, figsize=(12, 8)) # Loop through each year and create a plot in the grid for i, fy in enumerate(years): ax = axs.flat[i] # Plot Promo data sns.regplot(data=df[(df["Itemtype"] == st.session_state['selected_item_type']) & (df["Fy"] == fy) & (df["Promo"] == "Promo")], x="UnitPrice", y="SalesVolume", lowess=True, ci=None, marker='.', line_kws=dict(color="r"), ax=ax, label="Promo") # Plot NoPromo data sns.regplot(data=df[(df["Itemtype"] == st.session_state['selected_item_type']) & (df["Fy"] == fy) & (df["Promo"] == "NoPromo")], x="UnitPrice", y="SalesVolume", lowess=True, ci=None, marker='x', line_kws=dict(color="g"), ax=ax, label="NoPromo") # Set the title of each subplot ax.set_title(f"{st.session_state['selected_item_type']} - {fy}") ax.legend(loc="best") # Set the overall title for the figure fig.suptitle(f"Price vs SalesVolume for {st.session_state['selected_item_type']} across years") # Adjust layout to prevent overlap fig.tight_layout(rect=[0, 0, 1, 0.95]) # Display the plot st.pyplot(fig) ############################################################################################### ########################################### CARD #4 #################################################### if st.session_state['active_card'] == 'card4': # Define the fiscal years years = ['FY 2021', 'FY 2022', 'FY 2023', 'FY 2024'] df = df.groupby(['FyWeek','Fy','Chaincode','Store','Address','Zipcode','City','State','Containercode','Itemtype'],observed=True).agg({ 'SalesVolume': 'sum', 'UnitPrice':'mean', 'Sales': 'sum'}).reset_index() # Dropdown for selecting the Item Type (using session_state) st.session_state['selected_item_type'] = st.selectbox('Select Item Type', df['Itemtype'].unique(), index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type'])) # Set up a 2x2 grid of subplots for the four years fig, axes = plt.subplots(2, 2, figsize=(16, 12)) axes = axes.flatten() # To access axes easily in a loop # Loop through each year and plot the data for i, year in enumerate(years): # print(st.session_state['selected_item_type']) # Filter data for the specific year and item type selected cage_data = df[(df['Itemtype'] == st.session_state['selected_item_type']) & (df['Fy'] == year)] cage_data['Itemtype'] = cage_data['Itemtype'].cat.remove_unused_categories() cage_data['Containercode'] = cage_data['Containercode'].cat.remove_unused_categories() # print(cage_data['Itemtype'].unique()) relevant_container_codes = cage_data['Containercode'].unique() print(relevant_container_codes) # Calculate price bands lower_band = cage_data['UnitPrice'].quantile(0.25) median_band = cage_data['UnitPrice'].quantile(0.50) higher_band = cage_data['UnitPrice'].quantile(0.75) # Get data for each price band lower_band_data = cage_data[cage_data['UnitPrice'] <= lower_band] median_band_data = cage_data[(cage_data['UnitPrice'] > lower_band) & (cage_data['UnitPrice'] <= median_band)] higher_band_data = cage_data[cage_data['UnitPrice'] > higher_band] # print(lower_band_data['Containercode'].unique()) # Aggregate SalesVolume and average UnitPrice for each container code in each pricing band lower_band_agg = lower_band_data.groupby('Containercode',observed=True).agg( total_sales_volume=('SalesVolume', 'sum'), avg_unit_price=('UnitPrice', 'mean') ).reset_index() median_band_agg = median_band_data.groupby('Containercode',observed=True).agg( total_sales_volume=('SalesVolume', 'sum'), avg_unit_price=('UnitPrice', 'mean') ).reset_index() higher_band_agg = higher_band_data.groupby('Containercode',observed=True).agg( total_sales_volume=('SalesVolume', 'sum'), avg_unit_price=('UnitPrice', 'mean') ).reset_index() # Add the price band labels lower_band_agg['PriceBand'] = 'Lower Band' median_band_agg['PriceBand'] = 'Median Band' higher_band_agg['PriceBand'] = 'Higher Band' # Combine the data for plotting combined_data = pd.concat([lower_band_agg, median_band_agg, higher_band_agg]) combined_data = combined_data[combined_data['Containercode'].isin(relevant_container_codes)] # Plot Total Sales Volume for each price band in the current subplot sns.barplot(x='Containercode', y='total_sales_volume', hue='PriceBand', data=combined_data, ax=axes[i]) # Set the title and customize x-axis for each subplot axes[i].set_title(f"Total Sales Volume by Container Code and Price Band for {year}") axes[i].set_xlabel('Container Code') axes[i].set_ylabel('Total Sales Volume') axes[i].tick_params(axis='x', rotation=45) # Adjust the layout so titles and labels don't overlap plt.tight_layout() # Display the plot in Streamlit st.pyplot(fig) ###############################################################################################