Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import matplotlib.dates as mdates | |
import plotly.express as px | |
import plotly.graph_objects as go | |
import re | |
from datetime import datetime, timedelta | |
import warnings | |
import time | |
import dask.dataframe as dd | |
state_to_region = { | |
# WEST | |
'AK': 'WEST', 'CA': 'WEST', 'CO': 'WEST', 'HI': 'WEST', 'ID': 'WEST', | |
'MT': 'WEST', 'NV': 'WEST', 'OR': 'WEST', 'UT': 'WEST', 'WA': 'WEST', 'WY': 'WEST', | |
# SOUTHWEST | |
'AZ': 'SOUTHWEST', 'NM': 'SOUTHWEST', 'OK': 'SOUTHWEST', 'TX': 'SOUTHWEST', | |
# MIDWEST | |
'IL': 'MIDWEST', 'IN': 'MIDWEST', 'IA': 'MIDWEST', 'KS': 'MIDWEST', 'MI': 'MIDWEST', | |
'MN': 'MIDWEST', 'MO': 'MIDWEST', 'NE': 'MIDWEST', 'ND': 'MIDWEST', 'OH': 'MIDWEST', | |
'SD': 'MIDWEST', 'WI': 'MIDWEST', | |
# SOUTHEAST | |
'AL': 'SOUTHEAST', 'AR': 'SOUTHEAST', 'DE': 'SOUTHEAST', 'FL': 'SOUTHEAST', | |
'GA': 'SOUTHEAST', 'KY': 'SOUTHEAST', 'LA': 'SOUTHEAST', 'MD': 'SOUTHEAST', | |
'MS': 'SOUTHEAST', 'NC': 'SOUTHEAST', 'SC': 'SOUTHEAST', 'TN': 'SOUTHEAST', | |
'VA': 'SOUTHEAST', 'WV': 'SOUTHEAST', | |
# NORTHEAST | |
'CT': 'NORTHEAST', 'ME': 'NORTHEAST', 'MA': 'NORTHEAST', 'NH': 'NORTHEAST', | |
'NJ': 'NORTHEAST', 'NY': 'NORTHEAST', 'PA': 'NORTHEAST', 'RI': 'NORTHEAST', | |
'VT': 'NORTHEAST' | |
} | |
def date_from_week(year, week): | |
# Assuming the fiscal year starts in August and the week starts from August 1st | |
base_date = pd.to_datetime((year - 1).astype(str) + '-08-01') | |
dates = base_date + pd.to_timedelta((week - 1) * 7, unit='days') | |
return dates | |
def load_data(active_card): | |
# st.write(f"{active_card}") | |
# Define columns common to multiple cards if there are any | |
common_cols = ['FyWeek', 'Itemtype', 'Chaincode', 'State', 'SalesVolume', 'UnitPrice', 'Sales'] | |
# Columns specific to cards | |
card_specific_cols = { | |
'card1': ['FyWeek', 'Fy', 'State','Store','Address','Zipcode','City','Itemtype', 'Chaincode', 'Containercode', 'SalesVolume', 'UnitPrice', 'Sales'], | |
# 'card2': ['FyWeek', 'Fy', 'State','Store','Address','Zipcode','City','Itemtype', 'Chaincode', 'Containercode', 'SalesVolume', 'UnitPrice', 'Sales'], | |
'card3': ['FyWeek', 'Fy', 'State','Store','Address','Zipcode','City','Itemtype', 'Chaincode', 'Containercode', 'SalesVolume', 'UnitPrice', 'Sales'] # Added for PE calculation card | |
} | |
# Choose columns based on the active card | |
required_columns = card_specific_cols.get(active_card, common_cols) | |
# Define the data types for efficient memory usage | |
dtype_spec = { | |
'FyWeek': 'string', | |
'Fy': 'category', # Add data type for 'Fy' if it's used | |
'Itemtype': 'category', | |
'Chaincode': 'category', | |
'State': 'category', | |
"Store": "category", | |
'Containercode': 'category', | |
"Address": "string", | |
"Zipcode": "float", | |
"City": "category", | |
'SalesVolume': 'float', | |
'UnitPrice': 'float', | |
'Sales': 'float' | |
} | |
# Read only the necessary columns | |
# st.write(required_columns) | |
ddf = dd.read_csv("fy21-24.csv", usecols=required_columns, dtype=dtype_spec) | |
df = ddf.compute() | |
# st.write("+++++++++++++++++++++++") | |
if active_card in ['card1','card2', 'card3',]: | |
df = df.groupby(['FyWeek', 'Fy', 'Chaincode', 'Store', 'Address', 'Zipcode', 'City', 'State', 'Containercode', 'Itemtype'], observed=True).agg({ | |
'SalesVolume': 'sum', | |
'UnitPrice': 'mean', | |
'Sales': 'sum' | |
}).reset_index() | |
df[['FY', 'Week']] = df['FyWeek'].str.split(' Week ', expand=True) | |
df['Week'] = df['Week'].astype(int) # Convert 'Week' to int | |
df['Year'] = df['FY'].str[2:].astype(int) # Extract year part and convert to int | |
df['Dt'] = date_from_week(df['Year'], df['Week']) | |
# Add the region column based on state | |
df['Region'] = df['State'].map(state_to_region) | |
return df | |
# Display logo | |
st.image("bonnie.png", width=150) # Adjust width as needed | |
# Display title | |
# st.title("Price vs. Sales Volume Tracker Dashboard") | |
# Initialize session state for storing which card was clicked and item type | |
if 'active_card' not in st.session_state: | |
st.session_state['active_card'] = None | |
if 'selected_item_type' not in st.session_state: | |
st.session_state['selected_item_type'] = 'CORE' # Set default to 'CORE' | |
if 'selected_feature' not in st.session_state: | |
st.session_state['selected_feature'] = 'Chaincode' # Default to 'Chain Code' | |
# Card selection buttons with logic to reset session state on switch | |
col1, col3 = st.columns(2) | |
with col1: | |
if st.button("Sales Volume Trend"): | |
st.session_state['active_card'] = 'card1' | |
# Reset other selections when switching cards | |
st.session_state['selected_state'] = None | |
st.session_state['selected_chaincode'] = None | |
st.session_state['selected_itemtype'] = None | |
st.session_state['selected_containercode'] = None | |
# with col2: | |
# if st.button("Sales Volume vs Median Unit Price Trend"): | |
# st.session_state['active_card'] = 'card2' | |
# # Reset selections when switching cards | |
# st.session_state['selected_state'] = None | |
# st.session_state['selected_chaincode'] = None | |
# st.session_state['selected_itemtype'] = None | |
# st.session_state['selected_containercode'] = None | |
with col3: | |
if st.button("Price Elasticity Coefficient Trend YoY"): | |
st.session_state['active_card'] = 'card3' | |
# Reset selections when switching cards | |
st.session_state['selected_state'] = None | |
st.session_state['selected_chaincode'] = None | |
st.session_state['selected_itemtype'] = None | |
st.session_state['selected_containercode'] = None | |
# Load data for the current card | |
start_time = time.time() | |
df = load_data(st.session_state['active_card']) | |
time_taken = time.time() - start_time | |
st.write(f"Data loaded in {time_taken:.2f} seconds") | |
############################################ CARD #1 #################################################### | |
if st.session_state['active_card'] == 'card1': | |
# Step 1: Sales Volume vs FyWeek for the whole dataset (no filter) | |
st.subheader("Total Sales Volume by Fiscal Week") | |
df['FY_Week'] = df['FY'].astype(str) + '_' + df['Week'].astype(str) | |
# Split FY_Week again for correct sorting | |
if not df.empty and 'FY_Week' in df.columns: | |
total_sales_df = df.groupby('FY_Week', observed=True)['SalesVolume'].sum().reset_index() | |
total_sales_df[['FY', 'Week']] = total_sales_df['FY_Week'].str.split('_', expand=True) | |
total_sales_df['Week'] = total_sales_df['Week'].astype(int) | |
total_sales_df = total_sales_df.sort_values(by=['FY', 'Week']) | |
# Create a line chart using Plotly | |
fig = px.line(total_sales_df, x='FY_Week', y='SalesVolume', | |
labels={'SalesVolume': 'Sales Volume', 'FY_Week': 'Fiscal Week'}) | |
st.plotly_chart(fig) | |
# Step 2: Top 3 states based on sales volume as buttons/cards | |
top_states = df.groupby('State', observed=True)['SalesVolume'].sum().nlargest(3).index | |
st.write("### Top 3 Selling States in the last 4 years (drill down by state)") | |
col1, col2, col3 = st.columns(3) | |
if len(top_states) > 0 and col1.button(top_states[0]): | |
st.session_state['selected_state'] = top_states[0] | |
if len(top_states) > 1 and col2.button(top_states[1]): | |
st.session_state['selected_state'] = top_states[1] | |
if len(top_states) > 2 and col3.button(top_states[2]): | |
st.session_state['selected_state'] = top_states[2] | |
# If a state is selected, show the corresponding plot | |
if 'selected_state' in st.session_state and st.session_state['selected_state']: | |
selected_state = st.session_state['selected_state'] | |
# Step 3: Sales volume vs FyWeek for the selected state | |
st.subheader(f"Sales Volume by Fiscal Week for {selected_state} (drill down by Chaincode) ") | |
state_sales_df = df[df['State'] == selected_state].groupby('FY_Week', observed=True)['SalesVolume'].sum().reset_index() | |
if not state_sales_df.empty and 'FY_Week' in state_sales_df.columns: | |
state_sales_df[['FY', 'Week']] = state_sales_df['FY_Week'].str.split('_', expand=True) | |
state_sales_df['Week'] = state_sales_df['Week'].astype(int) | |
state_sales_df = state_sales_df.sort_values(by=['FY', 'Week']) | |
fig = px.line(state_sales_df, x='FY_Week', y='SalesVolume', | |
labels={'SalesVolume': 'Sales Volume', 'FY_Week': 'Fiscal Week'}) | |
st.plotly_chart(fig) | |
# Step 4: Top 3 chaincodes based on sales volume as buttons/cards | |
top_chaincodes = df[df['State'] == selected_state].groupby('Chaincode', observed=True)['SalesVolume'].sum().nlargest(3).index | |
st.write(f"### Top 3 selling Chaincode in {selected_state}:") | |
# Add a check to ensure top_chaincodes has values before accessing | |
col1, col2, col3 = st.columns(3) | |
if len(top_chaincodes) > 0 and col1.button(top_chaincodes[0]): | |
st.session_state['selected_chaincode'] = top_chaincodes[0] | |
if len(top_chaincodes) > 1 and col2.button(top_chaincodes[1]): | |
st.session_state['selected_chaincode'] = top_chaincodes[1] | |
if len(top_chaincodes) > 2 and col3.button(top_chaincodes[2]): | |
st.session_state['selected_chaincode'] = top_chaincodes[2] | |
# If a chaincode is selected, show the corresponding plot | |
if 'selected_chaincode' in st.session_state: | |
selected_chaincode = st.session_state['selected_chaincode'] | |
# Step 5: Sales volume vs FyWeek for the selected chaincode in the selected state | |
st.subheader(f"Sales Volume by Fiscal Week for {selected_chaincode} in {selected_state}") | |
chain_sales_df = df[(df['State'] == selected_state) & (df['Chaincode'] == selected_chaincode)].groupby('FY_Week', observed=True)['SalesVolume'].sum().reset_index() | |
if not chain_sales_df.empty and 'FY_Week' in chain_sales_df.columns: | |
chain_sales_df[['FY', 'Week']] = chain_sales_df['FY_Week'].str.split('_', expand=True) | |
chain_sales_df['Week'] = chain_sales_df['Week'].astype(int) | |
chain_sales_df = chain_sales_df.sort_values(by=['FY', 'Week']) | |
fig = px.line(chain_sales_df, x='FY_Week', y='SalesVolume', | |
# title=f'Sales Volume vs Fiscal Week in {selected_chaincode}, {selected_state}', | |
labels={'SalesVolume': 'Sales Volume', 'FY_Week': 'Fiscal Week'}) | |
st.plotly_chart(fig) | |
# Step 6: Top 3 itemtypes based on sales volume as buttons/cards | |
top_itemtypes = df[(df['State'] == selected_state) & (df['Chaincode'] == selected_chaincode)].groupby('Itemtype', observed=True)['SalesVolume'].sum().nlargest(3).index | |
st.write(f"### Top Item Type in {selected_chaincode}, {selected_state} (drill down by ItemType) :") | |
col1, col2, col3 = st.columns(3) | |
if len(top_itemtypes) > 0 and col1.button(top_itemtypes[0]): | |
st.session_state['selected_itemtype'] = top_itemtypes[0] | |
if len(top_itemtypes) > 1 and col2.button(top_itemtypes[1]): | |
st.session_state['selected_itemtype'] = top_itemtypes[1] | |
if len(top_itemtypes) > 2 and col3.button(top_itemtypes[2]): | |
st.session_state['selected_itemtype'] = top_itemtypes[2] | |
# If an itemtype is selected, show the corresponding dual-axis plot for Sales Volume & Unit Price | |
if 'selected_itemtype' in st.session_state: | |
selected_itemtype = st.session_state['selected_itemtype'] | |
# Step 7: Dual-axis plot for Sales volume and UnitPrice vs FyWeek for the selected itemtype | |
# st.subheader(f"Sales Volume & Unit Price vs Fiscal Week for {selected_itemtype} in {selected_chaincode}, {selected_state}") | |
item_sales_df = df[(df['State'] == selected_state) & (df['Chaincode'] == selected_chaincode) & (df['Itemtype'] == selected_itemtype)].groupby('FY_Week', observed=True).agg({ | |
'SalesVolume': 'sum', | |
'UnitPrice': 'mean' | |
}).reset_index() | |
if not item_sales_df.empty and 'FY_Week' in item_sales_df.columns: | |
item_sales_df[['FY', 'Week']] = item_sales_df['FY_Week'].str.split('_', expand=True) | |
item_sales_df['Week'] = item_sales_df['Week'].astype(int) | |
item_sales_df = item_sales_df.sort_values(by=['FY', 'Week']) | |
# Dual-axis plot using Plotly Graph Objects | |
fig = go.Figure() | |
# Add SalesVolume trace | |
fig.add_trace(go.Scatter( | |
x=item_sales_df['FY_Week'], | |
y=item_sales_df['SalesVolume'], | |
mode='lines+markers', | |
name='SalesVolume', | |
line=dict(color='blue'), | |
hovertemplate='SalesVolume: %{y}<br>Week-Year: %{x}' | |
)) | |
# Add UnitPrice trace with secondary Y-axis | |
fig.add_trace(go.Scatter( | |
x=item_sales_df['FY_Week'], | |
y=item_sales_df['UnitPrice'], | |
mode='lines+markers', | |
name='UnitPrice', | |
line=dict(color='green'), | |
yaxis='y2', | |
hovertemplate='UnitPrice: %{y}<br>Week-Year: %{x}' | |
)) | |
# Update layout for dual axes | |
fig.update_layout( | |
title=f"Sales Volume vs Unit Price by Fiscal Week for {selected_itemtype}, {selected_chaincode}, {selected_state}", | |
xaxis_title='Fiscal Week', | |
yaxis_title='Sales Volume', | |
yaxis2=dict(title='Unit Price', overlaying='y', side='right'), | |
legend=dict(x=0.9, y=1.15), | |
hovermode="x unified", # Show both values in a tooltip | |
height=600, | |
margin=dict(l=50, r=50, t=50, b=50) | |
) | |
# Rotate X-axis labels | |
fig.update_xaxes(tickangle=90) | |
# Display the Plotly figure in Streamlit | |
st.plotly_chart(fig, use_container_width=True) | |
# Step 8: Display Top/Bottom Container Codes and Stores | |
st.subheader("Top & Bottom 3 Container Codes and Stores") | |
# Get top and bottom 3 container codes based on SalesVolume | |
top_containercodes = df[(df['State'] == selected_state) & (df['Chaincode'] == selected_chaincode) & (df['Itemtype'] == selected_itemtype)].groupby('Containercode', observed=True)['SalesVolume'].sum().nlargest(3).reset_index() | |
bottom_containercodes = df[(df['State'] == selected_state) & (df['Chaincode'] == selected_chaincode) & (df['Itemtype'] == selected_itemtype)].groupby('Containercode', observed=True)['SalesVolume'].sum().nsmallest(3).reset_index() | |
# Get top and bottom 3 stores based on SalesVolume | |
top_stores = df[(df['State'] == selected_state) & (df['Chaincode'] == selected_chaincode) & (df['Itemtype'] == selected_itemtype)].groupby('Store', observed=True)['SalesVolume'].sum().nlargest(3).reset_index() | |
bottom_stores = df[(df['State'] == selected_state) & (df['Chaincode'] == selected_chaincode) & (df['Itemtype'] == selected_itemtype)].groupby('Store', observed=True)['SalesVolume'].sum().nsmallest(3).reset_index() | |
# Display top and bottom container codes side by side | |
st.write("### Container Codes:") | |
col1, col2 = st.columns(2) | |
with col1: | |
st.write("#### Top 3 Container Codes") | |
st.dataframe(top_containercodes) | |
with col2: | |
st.write("#### Bottom 3 Container Codes") | |
st.dataframe(bottom_containercodes) | |
# Display top and bottom stores side by side | |
st.write("### Stores:") | |
col3, col4 = st.columns(2) | |
with col3: | |
st.write("#### Top 3 Stores") | |
st.dataframe(top_stores) | |
with col4: | |
st.write("#### Bottom 3 Stores") | |
st.dataframe(bottom_stores) | |
########################################################################################################## | |
########################################### CARD #2 #################################################### | |
# if st.session_state['active_card'] == 'card2': | |
# # Identify the top 10 Itemtypes based on total SalesVolume | |
# top_10_itemtypes = df.groupby('Itemtype')['SalesVolume'].sum().nlargest(10).index | |
# # Filter the DataFrame to include only the top 10 Itemtypes | |
# df = df[df['Itemtype'].isin(top_10_itemtypes)] | |
# # Dropdown to select item type (using session_state) | |
# st.session_state['selected_item_type'] = st.selectbox( | |
# 'Select Item Type', df['Itemtype'].unique(), | |
# index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type'])) | |
# # Dropdown to select the grouping category (container code, chain code, or state) | |
# group_by_option = st.selectbox('Group by', ['Containercode', 'Chaincode', 'State','Region']) | |
# # Multi-select checkbox to select multiple years | |
# selected_years = st.multiselect('Select Year(s)', [2021, 2022, 2023, 2024], default=[2021]) | |
# st.subheader(f"Sales Volume & Unit Price Correlation for {group_by_option} in {', '.join(map(str, selected_years))}") | |
# # Convert 'Dt' column to datetime | |
# df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce') | |
# df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3', '4', '5', '6']), 'Promo', 'NoPromo') | |
# df["Promo"] = df["Promo"].astype("category") | |
# # Filter the dataframe based on the selected item type and selected years | |
# filtered_df = df[(df['Itemtype'] == st.session_state['selected_item_type']) & (df['Dt'].dt.year.isin(selected_years))] | |
# # Find the top 3 values based on total SalesVolume in the selected grouping category | |
# top_3_values = filtered_df.groupby(group_by_option, observed=True)['SalesVolume'].sum().nlargest(3).index | |
# # Filter the data for only the top 3 values | |
# top_group_data = filtered_df[filtered_df[group_by_option].isin(top_3_values)] | |
# # Aggregate data | |
# agg_df = top_group_data.groupby([group_by_option, 'Year', 'Week', 'Dt'], observed=True).agg({ | |
# 'SalesVolume': 'sum', | |
# 'UnitPrice': 'mean' | |
# }).reset_index() | |
# # Create a new column 'week-year' for X-axis labels | |
# agg_df['week-year'] = agg_df['Dt'].dt.strftime('%U-%Y') | |
# # Loop through the top 3 values and create separate plots using Plotly | |
# for value in top_3_values: | |
# value_data = agg_df[agg_df[group_by_option] == value] | |
# # Assuming you have 'value_data' from your previous code | |
# mean_sales_volume = value_data['SalesVolume'].mean() | |
# mean_unit_price = value_data['UnitPrice'].mean() | |
# # Create a Plotly figure | |
# fig = go.Figure() | |
# # Add SalesVolume trace | |
# fig.add_trace(go.Scatter( | |
# x=value_data['week-year'], | |
# y=value_data['SalesVolume'], | |
# mode='lines+markers', | |
# name='SalesVolume', | |
# line=dict(color='blue'), | |
# hovertemplate='SalesVolume: %{y}<br>Week-Year: %{x}' | |
# )) | |
# # Add UnitPrice trace on a secondary Y-axis | |
# fig.add_trace(go.Scatter( | |
# x=value_data['week-year'], | |
# y=value_data['UnitPrice'], | |
# mode='lines+markers', | |
# name='UnitPrice', | |
# line=dict(color='green'), | |
# yaxis='y2', | |
# hovertemplate='UnitPrice: %{y}<br>Week-Year: %{x}' | |
# )) | |
# # Add mean line for SalesVolume | |
# fig.add_shape(type="line", | |
# x0=value_data['week-year'].min(), x1=value_data['week-year'].max(), | |
# y0=mean_sales_volume, y1=mean_sales_volume, | |
# line=dict(color="blue", width=2, dash="dash"), | |
# xref='x', yref='y') | |
# # Add mean line for UnitPrice (on secondary Y-axis) | |
# fig.add_shape(type="line", | |
# x0=value_data['week-year'].min(), x1=value_data['week-year'].max(), | |
# y0=mean_unit_price, y1=mean_unit_price, | |
# line=dict(color="green", width=2, dash="dash"), | |
# xref='x', yref='y2') | |
# # Update layout for dual axes | |
# fig.update_layout( | |
# template='plotly_white', | |
# title=f"SalesVolume and UnitPrice - {value} ({group_by_option})", | |
# xaxis_title='Week-Year', | |
# yaxis_title='Sales Volume', | |
# yaxis2=dict(title='UnitPrice', overlaying='y', side='right'), | |
# legend=dict(x=0.9, y=1.15), | |
# hovermode="x unified", # Show both values in a tooltip | |
# height=600, | |
# margin=dict(l=50, r=50, t=50, b=50) | |
# ) | |
# # Rotate X-axis labels | |
# fig.update_xaxes(tickangle=90) | |
# # Display the Plotly figure in Streamlit | |
# st.plotly_chart(fig, use_container_width=True) | |
################################ | |
if st.session_state['active_card'] == 'card3': | |
# Dropdown for selecting the item type | |
item_type_options = df['Itemtype'].unique() | |
selected_item_type = st.selectbox("Select Item Type", item_type_options) | |
# Dropdown for selecting the region (multiple selection allowed) | |
region_options = df['Region'].dropna().unique() | |
selected_regions = st.multiselect("Select Region(s)", region_options, default=region_options) | |
# Filter data based on selected item type and selected regions | |
filtered_df = df[(df['Itemtype'] == selected_item_type) & (df['Region'].isin(selected_regions))] | |
# Group by Year, Region, Itemtype and Promo, and aggregate SalesVolume and UnitPrice | |
agg_df = filtered_df.groupby(['Fy', 'Region', 'Itemtype',]).agg({ | |
'SalesVolume': 'sum', | |
'UnitPrice': 'mean' | |
}).reset_index() | |
# Sort values by Region, Itemtype, Fy, and Promo for YOY calculation | |
agg_df = agg_df.sort_values(by=['Region', 'Itemtype', 'Fy',]) | |
# Calculate YOY percentage changes in Sales Volume and Unit Price | |
agg_df['SalesVolume_pct_change'] = agg_df.groupby(['Region', 'Itemtype',])['SalesVolume'].pct_change().round(3) * 100 | |
agg_df['UnitPrice_pct_change'] = agg_df.groupby(['Region', 'Itemtype', ])['UnitPrice'].pct_change().round(3) * 100 | |
# Calculate Price Elasticity Coefficient (PE) | |
agg_df['PE_Coeff'] = (agg_df['SalesVolume_pct_change'] / agg_df['UnitPrice_pct_change']).round(2) | |
# Exclude FY 2025 but keep FY 2021 even with NaN values | |
agg_df_filtered = agg_df[agg_df['Fy'] != 'FY 2025'] | |
# Drop rows where PE_Coeff is NaN (optional) | |
agg_df_filtered = agg_df_filtered.dropna(subset=['PE_Coeff']) | |
agg_df_filtered = agg_df_filtered.rename(columns={ | |
'SalesVolume_pct_change': 'SlVol%change', | |
'UnitPrice_pct_change': 'UnPr%change', | |
}) | |
agg_df_filtered = agg_df_filtered.reset_index(drop=True) | |
st.dataframe(agg_df_filtered) | |
st.write(agg_df_filtered.shape) | |
# Extract values for the current and previous years from row 1 and row 2 of the dataframe | |
current_year_row = agg_df_filtered.iloc[1] # Row 1 - Current Year | |
previous_year_row = agg_df_filtered.iloc[0] # Row 2 - Previous Year | |
# Extract values for Unit Price and Sales Volume | |
unit_price_current_year = current_year_row['UnitPrice'] | |
unit_price_previous_year = previous_year_row['UnitPrice'] | |
sales_volume_current_year = current_year_row['SalesVolume'] | |
sales_volume_previous_year = previous_year_row['SalesVolume'] | |
# Calculate percentage changes for Unit Price and Sales Volume | |
unit_price_pct = ((unit_price_current_year - unit_price_previous_year) / unit_price_previous_year) * 100 | |
sales_volume_pct = ((sales_volume_current_year - sales_volume_previous_year) / sales_volume_previous_year) * 100 | |
# Calculate PE Coefficient | |
pe_coeff = sales_volume_pct / unit_price_pct | |
st.markdown(f'''### Calculations for Price Elasticity Coefficient''') | |
st.latex(rf""" | |
\text{{Unit Price \% Change}} = \frac{{{unit_price_current_year:.2f} - {unit_price_previous_year:.2f}}}{{{unit_price_previous_year:.2f}}} \times 100 = {unit_price_pct:.2f}\% | |
""") | |
# Sales Volume % Change | |
st.latex(rf""" | |
\text{{Sales Volume \% Change}} = \frac{{{sales_volume_current_year:.2f} - {sales_volume_previous_year:.2f}}}{{{sales_volume_previous_year:.2f}}} \times 100 = {sales_volume_pct:.2f}\% | |
""") | |
# PE Coefficient | |
st.latex(rf""" | |
\text{{PE Coefficient}} = \frac{{{sales_volume_pct:.2f}}}{{{unit_price_pct:.2f}}} = {pe_coeff:.2f} | |
""") | |
# Explanation for PE Coefficient Conditions | |
st.markdown(f""" | |
### Interpretation of Price Elasticity (PE) Coefficient: | |
The Price Elasticity (PE) coefficient reflects how sensitive sales volume is to changes in unit price. | |
- If the **PE coefficient is positive**: | |
1. When the price increases, sales volume increases. | |
2. When the price decreases, sales volume decreases. | |
- If the **PE coefficient is negative**: | |
1. When the price increases, sales volume decreases. | |
2. When the price decreases, sales volume increases. | |
""") | |
# Dynamic analysis based on the calculated PE coefficient and signs of changes | |
if unit_price_pct > 0 and sales_volume_pct > 0: | |
st.warning(f""" | |
Both unit price and sales volume increased (refer first and second row of the table). The PE coefficient of **{pe_coeff:.2f}** indicates that for every 1% increase in unit price, sales volume increased by approximately **{pe_coeff:.2f}%**. | |
""") | |
elif unit_price_pct < 0 and sales_volume_pct < 0: | |
st.warning(f""" | |
Both unit price and sales volume decreased (refer first and second row of the table). The PE coefficient of **{pe_coeff:.2f}** suggests that for every 1% decrease in unit price, sales volume decreased by approximately **{pe_coeff:.2f}%**. | |
""") | |
elif unit_price_pct > 0 and sales_volume_pct < 0: | |
st.warning(f""" | |
The unit price increased while sales volume decreased (refer first and second row of the table). The negative PE coefficient of **{pe_coeff:.2f}** means that for every 1% increase in unit price, sales volume fell by approximately **{abs(pe_coeff):.2f}%**. | |
""") | |
elif unit_price_pct < 0 and sales_volume_pct > 0: | |
st.warning(f""" | |
The unit price decreased while sales volume increased (refer first and second row of the table). The negative PE coefficient of **{pe_coeff:.2f}** implies that for every 1% decrease in unit price, sales volume increased by approximately **{abs(pe_coeff):.2f}%**. | |
""") | |
# Plot the PE Coefficient with Plotly | |
fig = px.line( | |
agg_df_filtered, | |
x='Fy', | |
y='PE_Coeff', # Differentiate between Promo and NoPromo | |
color='Region', # Differentiate lines by Region | |
title=f"Price Elasticity Coefficient (PE) by Year for {selected_item_type}", | |
labels={'Fy': 'Fiscal Year', 'PE_Coeff': 'Price Elasticity Coefficient'}, | |
markers=True | |
) | |
# Customize layout and show plot | |
fig.update_layout( | |
height=600, | |
width=1000, | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
#################### CARD-3 MONTHLY IMPLEMENTATION ######################### | |
# Ensure 'Dt' column is in datetime format | |
df['Dt'] = pd.to_datetime(df['Dt']) | |
# Extract fiscal year and month from 'Dt' column | |
df['FY'] = df['Dt'].dt.year.astype(str) | |
df['Month'] = df['Dt'].dt.month.astype(str) | |
# Create FY_Month column | |
df['FY_Month'] = df['FY'] + '_' + df['Month'] | |
# Filter data based on selected item type and selected regions | |
filtered_df = df[(df['Itemtype'] == selected_item_type) & (df['Region'].isin(selected_regions))] | |
# Group by Year, Region, Itemtype and aggregate SalesVolume and UnitPrice | |
agg_df = filtered_df.groupby(['FY_Month', 'Region', 'Itemtype']).agg({ | |
'SalesVolume': 'sum', | |
'UnitPrice': 'mean' | |
}).reset_index() | |
# Split FY_Month again for correct sorting | |
agg_df[['FY', 'Month']] = agg_df['FY_Month'].str.split('_', expand=True) | |
agg_df['Month'] = agg_df['Month'].astype(int) | |
agg_df['FY'] = agg_df['FY'].astype(int) | |
# Combine FY and Month back into a datetime-like format for proper sorting | |
agg_df['FY_Month_dt'] = pd.to_datetime(agg_df['FY'].astype(str) + agg_df['Month'].astype(str).str.zfill(2), format='%Y%m') | |
# Sort values by Region, Itemtype, and FY_Month_dt | |
agg_df = agg_df.sort_values(by=['Region', 'Itemtype', 'FY_Month_dt']) | |
# Calculate YOY percentage changes in Sales Volume and Unit Price | |
agg_df['SalesVolume_pct_change'] = agg_df.groupby(['Region', 'Itemtype'])['SalesVolume'].pct_change().round(3) * 100 | |
agg_df['UnitPrice_pct_change'] = agg_df.groupby(['Region', 'Itemtype'])['UnitPrice'].pct_change().round(3) * 100 | |
# Calculate Price Elasticity Coefficient (PE) | |
agg_df['PE_Coeff'] = (agg_df['SalesVolume_pct_change'] / agg_df['UnitPrice_pct_change']).round(2) | |
# Exclude FY 2021 and FY 2025 | |
agg_df_filtered = agg_df[~agg_df['FY'].astype(str).str.contains('2020|2021|2025')] | |
# Drop rows where PE_Coeff is NaN (optional) | |
agg_df_filtered = agg_df_filtered.dropna(subset=['PE_Coeff']) | |
agg_df_filtered = agg_df_filtered[(agg_df_filtered['PE_Coeff'] < 1000) & (agg_df_filtered['PE_Coeff'] > -1000)] | |
# Plot the PE Coefficient with Plotly | |
fig = go.Figure() | |
# Iterate through each selected region and plot separately | |
for region in selected_regions: | |
# Filter the DataFrame for the current region | |
region_df = agg_df_filtered[agg_df_filtered['Region'] == region] | |
# Add a line trace for the region | |
fig.add_trace(go.Scatter( | |
x=region_df['FY_Month_dt'], # Use the datetime-like column for correct sorting | |
y=region_df['PE_Coeff'], | |
mode='lines+markers', | |
name=region, # Set the name to the region to appear in the legend | |
line=dict(width=2), | |
marker=dict(size=6), | |
)) | |
# Customize layout | |
fig.update_layout( | |
title=f"Price Elasticity Coefficient (PE) by Year-Month for {selected_item_type}", | |
xaxis_title="Fiscal Year_Month", | |
yaxis_title="Price Elasticity Coefficient (PE)", | |
height=600, | |
width=1000, | |
legend_title="Region", | |
xaxis=dict( | |
tickformat='%Y-%m', # Format X-axis ticks as Year-Month | |
) | |
) | |
# Show the plot in Streamlit | |
st.plotly_chart(fig, use_container_width=True) | |