Spaces:
Runtime error
Runtime error
correct histogram vertical bars
Browse files- pages/clustering.py +17 -11
pages/clustering.py
CHANGED
@@ -38,9 +38,9 @@ The **frequency** denotes how frequently a customer has ordered.
|
|
38 |
|
39 |
There 3 available clusters for this metric:
|
40 |
|
41 |
-
- cluster
|
42 |
-
- cluster
|
43 |
-
- cluster
|
44 |
|
45 |
-------
|
46 |
""".lstrip()
|
@@ -50,9 +50,9 @@ The **recency** refers to how recently a customer has bought;
|
|
50 |
|
51 |
There 3 available clusters for this metric:
|
52 |
|
53 |
-
- cluster
|
54 |
-
- cluster
|
55 |
-
- cluster
|
56 |
|
57 |
-------
|
58 |
""".lstrip()
|
@@ -63,9 +63,9 @@ from your business.
|
|
63 |
|
64 |
There 3 available clusters for this metric:
|
65 |
|
66 |
-
- cluster
|
67 |
-
- cluster
|
68 |
-
- cluster
|
69 |
|
70 |
-------
|
71 |
""".lstrip()
|
@@ -234,7 +234,7 @@ def categorize_user(recency_cluster, frequency_cluster, monetary_cluster):
|
|
234 |
def plot_rfm_distribution(df_rfm: pd.DataFrame, cluster_info: Dict[str, List[int]]):
|
235 |
"""Plots 3 histograms for the RFM metrics."""
|
236 |
|
237 |
-
for x in ("Revenue", "Frequency", "Recency"):
|
238 |
fig = px.histogram(
|
239 |
df_rfm,
|
240 |
x=x,
|
@@ -244,8 +244,14 @@ def plot_rfm_distribution(df_rfm: pd.DataFrame, cluster_info: Dict[str, List[int
|
|
244 |
# Get the max value in the cluster info. The cluster info is a list of min - max
|
245 |
# values per cluster.
|
246 |
values = cluster_info[f"{x}_cluster"]
|
|
|
247 |
# Add vertical bar on each cluster end. But skip the last cluster.
|
248 |
-
|
|
|
|
|
|
|
|
|
|
|
249 |
fig.add_vline(
|
250 |
x=values[i],
|
251 |
annotation_text=f"End of cluster {n_cluster+1}",
|
|
|
38 |
|
39 |
There 3 available clusters for this metric:
|
40 |
|
41 |
+
- cluster 1: denotes a customer that purchases one or few times (range [{}, {}])
|
42 |
+
- cluster 2: these customer have a discrete amount of orders (range [{}, {}])
|
43 |
+
- cluster 3: these customer purchases lots of times (range [{}, {}])
|
44 |
|
45 |
-------
|
46 |
""".lstrip()
|
|
|
50 |
|
51 |
There 3 available clusters for this metric:
|
52 |
|
53 |
+
- cluster 1: the last order of these client is long time ago (range [{}, {}])
|
54 |
+
- cluster 2: these are clients that purchases something not very recently (range [{}, {}])
|
55 |
+
- cluster 3: the last order of these client is a few days/weeks ago (range [{}, {}])
|
56 |
|
57 |
-------
|
58 |
""".lstrip()
|
|
|
63 |
|
64 |
There 3 available clusters for this metric:
|
65 |
|
66 |
+
- cluster 1: these clients spent little money (range [{}, {}])
|
67 |
+
- cluster 2: these clients spent a considerable amount of money (range [{}, {}])
|
68 |
+
- cluster 3: these clients spent lots of money (range [{}, {}])
|
69 |
|
70 |
-------
|
71 |
""".lstrip()
|
|
|
234 |
def plot_rfm_distribution(df_rfm: pd.DataFrame, cluster_info: Dict[str, List[int]]):
|
235 |
"""Plots 3 histograms for the RFM metrics."""
|
236 |
|
237 |
+
for x, to_reverse in zip(("Revenue", "Frequency", "Recency"), (False, False, True)):
|
238 |
fig = px.histogram(
|
239 |
df_rfm,
|
240 |
x=x,
|
|
|
244 |
# Get the max value in the cluster info. The cluster info is a list of min - max
|
245 |
# values per cluster.
|
246 |
values = cluster_info[f"{x}_cluster"]
|
247 |
+
print(values)
|
248 |
# Add vertical bar on each cluster end. But skip the last cluster.
|
249 |
+
loop_range = list(enumerate(range(1, len(values)-1, 2)))
|
250 |
+
if to_reverse:
|
251 |
+
loop_range = zip((2, 1), range(len(values)-1, 1, -2))
|
252 |
+
for n_cluster, i in loop_range:
|
253 |
+
print(x)
|
254 |
+
print(values[i])
|
255 |
fig.add_vline(
|
256 |
x=values[i],
|
257 |
annotation_text=f"End of cluster {n_cluster+1}",
|