tave-st commited on
Commit
d6a157a
1 Parent(s): 9032cfe

correct histogram vertical bars

Browse files
Files changed (1) hide show
  1. pages/clustering.py +17 -11
pages/clustering.py CHANGED
@@ -38,9 +38,9 @@ The **frequency** denotes how frequently a customer has ordered.
38
 
39
  There 3 available clusters for this metric:
40
 
41
- - cluster 0: denotes a customer that purchases one or few times (range [{}, {}])
42
- - cluster 1: these customer have a discrete amount of orders (range [{}, {}])
43
- - cluster 2: these customer purchases lots of times (range [{}, {}])
44
 
45
  -------
46
  """.lstrip()
@@ -50,9 +50,9 @@ The **recency** refers to how recently a customer has bought;
50
 
51
  There 3 available clusters for this metric:
52
 
53
- - cluster 0: the last order of these client is long time ago (range [{}, {}])
54
- - cluster 1: these are clients that purchases something not very recently (range [{}, {}])
55
- - cluster 2: the last order of these client is a few days/weeks ago (range [{}, {}])
56
 
57
  -------
58
  """.lstrip()
@@ -63,9 +63,9 @@ from your business.
63
 
64
  There 3 available clusters for this metric:
65
 
66
- - cluster 0: these clients spent little money (range [{}, {}])
67
- - cluster 1: these clients spent a considerable amount of money (range [{}, {}])
68
- - cluster 2: these clients spent lots of money (range [{}, {}])
69
 
70
  -------
71
  """.lstrip()
@@ -234,7 +234,7 @@ def categorize_user(recency_cluster, frequency_cluster, monetary_cluster):
234
  def plot_rfm_distribution(df_rfm: pd.DataFrame, cluster_info: Dict[str, List[int]]):
235
  """Plots 3 histograms for the RFM metrics."""
236
 
237
- for x in ("Revenue", "Frequency", "Recency"):
238
  fig = px.histogram(
239
  df_rfm,
240
  x=x,
@@ -244,8 +244,14 @@ def plot_rfm_distribution(df_rfm: pd.DataFrame, cluster_info: Dict[str, List[int
244
  # Get the max value in the cluster info. The cluster info is a list of min - max
245
  # values per cluster.
246
  values = cluster_info[f"{x}_cluster"]
 
247
  # Add vertical bar on each cluster end. But skip the last cluster.
248
- for n_cluster, i in enumerate(range(1, len(values)-1, 2)):
 
 
 
 
 
249
  fig.add_vline(
250
  x=values[i],
251
  annotation_text=f"End of cluster {n_cluster+1}",
 
38
 
39
  There 3 available clusters for this metric:
40
 
41
+ - cluster 1: denotes a customer that purchases one or few times (range [{}, {}])
42
+ - cluster 2: these customer have a discrete amount of orders (range [{}, {}])
43
+ - cluster 3: these customer purchases lots of times (range [{}, {}])
44
 
45
  -------
46
  """.lstrip()
 
50
 
51
  There 3 available clusters for this metric:
52
 
53
+ - cluster 1: the last order of these client is long time ago (range [{}, {}])
54
+ - cluster 2: these are clients that purchases something not very recently (range [{}, {}])
55
+ - cluster 3: the last order of these client is a few days/weeks ago (range [{}, {}])
56
 
57
  -------
58
  """.lstrip()
 
63
 
64
  There 3 available clusters for this metric:
65
 
66
+ - cluster 1: these clients spent little money (range [{}, {}])
67
+ - cluster 2: these clients spent a considerable amount of money (range [{}, {}])
68
+ - cluster 3: these clients spent lots of money (range [{}, {}])
69
 
70
  -------
71
  """.lstrip()
 
234
  def plot_rfm_distribution(df_rfm: pd.DataFrame, cluster_info: Dict[str, List[int]]):
235
  """Plots 3 histograms for the RFM metrics."""
236
 
237
+ for x, to_reverse in zip(("Revenue", "Frequency", "Recency"), (False, False, True)):
238
  fig = px.histogram(
239
  df_rfm,
240
  x=x,
 
244
  # Get the max value in the cluster info. The cluster info is a list of min - max
245
  # values per cluster.
246
  values = cluster_info[f"{x}_cluster"]
247
+ print(values)
248
  # Add vertical bar on each cluster end. But skip the last cluster.
249
+ loop_range = list(enumerate(range(1, len(values)-1, 2)))
250
+ if to_reverse:
251
+ loop_range = zip((2, 1), range(len(values)-1, 1, -2))
252
+ for n_cluster, i in loop_range:
253
+ print(x)
254
+ print(values[i])
255
  fig.add_vline(
256
  x=values[i],
257
  annotation_text=f"End of cluster {n_cluster+1}",