Spaces:
Sleeping
Sleeping
Update pages/Statistics.py
Browse files- pages/Statistics.py +86 -39
pages/Statistics.py
CHANGED
@@ -4,16 +4,15 @@ import streamlit as st
|
|
4 |
import pandas as pd
|
5 |
import plotly.express as px
|
6 |
import plotly.graph_objects as go
|
|
|
|
|
7 |
import re
|
8 |
from datetime import datetime
|
9 |
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
)
|
15 |
-
|
16 |
-
import pycountry
|
17 |
|
18 |
# Definiowanie t艂umacze艅 dla zak艂adki "Statystyki"
|
19 |
page_translations = {
|
@@ -27,12 +26,11 @@ page_translations = {
|
|
27 |
'fraud_percentage': "Procent oszustw",
|
28 |
'history_title': "Historia analizowanych wiadomo艣ci",
|
29 |
'frauds_over_time': "Liczba wykrytych oszustw w czasie",
|
30 |
-
'risk_distribution': "Rozk艂ad ocen ryzyka oszustwa",
|
31 |
-
'fraud_country_distribution': "Rozk艂ad oszustw wed艂ug kraj贸w",
|
32 |
-
'fraud_trend_title': "Trendy oszustw w czasie",
|
33 |
'risk_distribution_title': "Rozk艂ad ocen ryzyka oszustwa",
|
34 |
'fraud_country_distribution_title': "Rozk艂ad oszustw wed艂ug kraj贸w",
|
35 |
-
'
|
|
|
|
|
36 |
'fraud_vs_nonfraud': "Procentowy podzia艂: Oszustwa vs Bezpieczne",
|
37 |
'no_data': "Brak dost臋pnych danych do wy艣wietlenia.",
|
38 |
'download_button': "馃摜 Pobierz dane jako CSV"
|
@@ -47,12 +45,11 @@ page_translations = {
|
|
47 |
'fraud_percentage': "Betrugsprozentsatz",
|
48 |
'history_title': "Analyseverlauf der Nachrichten",
|
49 |
'frauds_over_time': "Anzahl der erkannten Betr眉gereien im Laufe der Zeit",
|
50 |
-
'risk_distribution': "Verteilung der Betrugsrisikobewertungen",
|
51 |
-
'fraud_country_distribution': "Betrug nach L盲ndern",
|
52 |
-
'fraud_trend_title': "Betrugstrends im Laufe der Zeit",
|
53 |
'risk_distribution_title': "Verteilung der Betrugsrisikobewertungen",
|
54 |
'fraud_country_distribution_title': "Betrug nach L盲ndern",
|
55 |
-
'
|
|
|
|
|
56 |
'fraud_vs_nonfraud': "Prozentanteil: Betrug vs Sichere Nachrichten",
|
57 |
'no_data': "Keine Daten zur Anzeige verf眉gbar.",
|
58 |
'download_button': "馃摜 Daten als CSV herunterladen"
|
@@ -67,28 +64,66 @@ page_translations = {
|
|
67 |
'fraud_percentage': "Fraud Percentage",
|
68 |
'history_title': "History of Analyzed Messages",
|
69 |
'frauds_over_time': "Number of Detected Frauds Over Time",
|
70 |
-
'risk_distribution': "Distribution of Fraud Risk Scores",
|
71 |
-
'fraud_country_distribution': "Fraud Distribution by Countries",
|
72 |
-
'fraud_trend_title': "Fraud Trends Over Time",
|
73 |
'risk_distribution_title': "Distribution of Fraud Risk Scores",
|
74 |
'fraud_country_distribution_title': "Fraud Distribution by Countries",
|
75 |
-
'
|
|
|
|
|
76 |
'fraud_vs_nonfraud': "Fraud vs Safe Messages Percentage",
|
77 |
'no_data': "No data available to display.",
|
78 |
'download_button': "馃摜 Download data as CSV"
|
79 |
}
|
80 |
}
|
81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
def main(language):
|
83 |
translations = page_translations[language]
|
84 |
|
85 |
-
# Pobieranie danych z
|
86 |
-
stats =
|
87 |
-
history =
|
88 |
|
89 |
# Kluczowe metryki
|
90 |
-
total_analyses = stats
|
91 |
-
total_frauds_detected = stats
|
92 |
|
93 |
# Wy艣wietlenie metryk
|
94 |
st.title(translations['header'])
|
@@ -107,11 +142,15 @@ def main(language):
|
|
107 |
col3.metric(label=translations['fraud_percentage'], value=f"{fraud_percentage:.2f}%")
|
108 |
|
109 |
# Wizualizacja procentowego podzia艂u oszustw
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
|
|
|
|
|
|
|
|
115 |
|
116 |
# Wy艣wietlenie historii analiz w tabeli
|
117 |
if history:
|
@@ -130,8 +169,6 @@ def main(language):
|
|
130 |
# Dodanie kolumny 'date' dla wizualizacji
|
131 |
df_history['date'] = df_history['timestamp'].dt.date
|
132 |
|
133 |
-
# Usuni臋cie sekcji wyszukiwania i filtrowania po dacie
|
134 |
-
|
135 |
# Wy艣wietlenie tabeli historii
|
136 |
st.dataframe(df_history[['timestamp', 'phone_number', 'risk_assessment']], height=300)
|
137 |
|
@@ -148,8 +185,8 @@ def main(language):
|
|
148 |
# Trend oszustw w czasie
|
149 |
st.markdown(f"### {translations['fraud_trend_title']}")
|
150 |
fraud_over_time = df_history.groupby(df_history['timestamp'].dt.date)['phone_number'].count().reset_index()
|
151 |
-
fraud_over_time.rename(columns={'phone_number': 'frauds_detected'}, inplace=True)
|
152 |
-
fig_trend = px.line(fraud_over_time, x='
|
153 |
st.plotly_chart(fig_trend, use_container_width=True)
|
154 |
|
155 |
# Rozk艂ad ocen ryzyka
|
@@ -161,16 +198,23 @@ def main(language):
|
|
161 |
df_history['risk_score'] = df_history['risk_assessment'].apply(extract_risk_score)
|
162 |
risk_distribution = df_history['risk_score'].value_counts().sort_index().reset_index()
|
163 |
risk_distribution.columns = ['risk_score', 'count']
|
164 |
-
fig_risk = px.bar(risk_distribution, x='risk_score', y='count', title=translations['risk_distribution'],
|
|
|
|
|
165 |
st.plotly_chart(fig_risk, use_container_width=True)
|
166 |
|
167 |
# Rozk艂ad oszustw wed艂ug kraj贸w
|
168 |
st.markdown(f"### {translations['fraud_country_distribution_title']}")
|
169 |
-
def
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
|
|
|
|
|
|
|
|
|
|
174 |
fraud_countries = df_history['country'].value_counts().reset_index()
|
175 |
fraud_countries.columns = ['country', 'counts']
|
176 |
|
@@ -197,3 +241,6 @@ def main(language):
|
|
197 |
st.plotly_chart(fig_map, use_container_width=True)
|
198 |
else:
|
199 |
st.info(translations['no_data'])
|
|
|
|
|
|
|
|
4 |
import pandas as pd
|
5 |
import plotly.express as px
|
6 |
import plotly.graph_objects as go
|
7 |
+
import json
|
8 |
+
import os
|
9 |
import re
|
10 |
from datetime import datetime
|
11 |
|
12 |
+
# Definiowanie 艣cie偶ek do plik贸w JSON
|
13 |
+
FAKE_NUMBERS_FILE = 'data/fake_numbers.json'
|
14 |
+
HISTORY_FILE = 'data/history.json'
|
15 |
+
STATS_FILE = 'data/stats.json'
|
|
|
|
|
|
|
16 |
|
17 |
# Definiowanie t艂umacze艅 dla zak艂adki "Statystyki"
|
18 |
page_translations = {
|
|
|
26 |
'fraud_percentage': "Procent oszustw",
|
27 |
'history_title': "Historia analizowanych wiadomo艣ci",
|
28 |
'frauds_over_time': "Liczba wykrytych oszustw w czasie",
|
|
|
|
|
|
|
29 |
'risk_distribution_title': "Rozk艂ad ocen ryzyka oszustwa",
|
30 |
'fraud_country_distribution_title': "Rozk艂ad oszustw wed艂ug kraj贸w",
|
31 |
+
'fraud_trend_title': "Trendy oszustw w czasie",
|
32 |
+
'risk_distribution': "Rozk艂ad ocen ryzyka oszustwa",
|
33 |
+
'fraud_country_distribution': "Rozk艂ad oszustw wed艂ug kraj贸w",
|
34 |
'fraud_vs_nonfraud': "Procentowy podzia艂: Oszustwa vs Bezpieczne",
|
35 |
'no_data': "Brak dost臋pnych danych do wy艣wietlenia.",
|
36 |
'download_button': "馃摜 Pobierz dane jako CSV"
|
|
|
45 |
'fraud_percentage': "Betrugsprozentsatz",
|
46 |
'history_title': "Analyseverlauf der Nachrichten",
|
47 |
'frauds_over_time': "Anzahl der erkannten Betr眉gereien im Laufe der Zeit",
|
|
|
|
|
|
|
48 |
'risk_distribution_title': "Verteilung der Betrugsrisikobewertungen",
|
49 |
'fraud_country_distribution_title': "Betrug nach L盲ndern",
|
50 |
+
'fraud_trend_title': "Betrugstrends im Laufe der Zeit",
|
51 |
+
'risk_distribution': "Verteilung der Betrugsrisikobewertungen",
|
52 |
+
'fraud_country_distribution': "Betrug nach L盲ndern",
|
53 |
'fraud_vs_nonfraud': "Prozentanteil: Betrug vs Sichere Nachrichten",
|
54 |
'no_data': "Keine Daten zur Anzeige verf眉gbar.",
|
55 |
'download_button': "馃摜 Daten als CSV herunterladen"
|
|
|
64 |
'fraud_percentage': "Fraud Percentage",
|
65 |
'history_title': "History of Analyzed Messages",
|
66 |
'frauds_over_time': "Number of Detected Frauds Over Time",
|
|
|
|
|
|
|
67 |
'risk_distribution_title': "Distribution of Fraud Risk Scores",
|
68 |
'fraud_country_distribution_title': "Fraud Distribution by Countries",
|
69 |
+
'fraud_trend_title': "Fraud Trends Over Time",
|
70 |
+
'risk_distribution': "Distribution of Fraud Risk Scores",
|
71 |
+
'fraud_country_distribution': "Fraud Distribution by Countries",
|
72 |
'fraud_vs_nonfraud': "Fraud vs Safe Messages Percentage",
|
73 |
'no_data': "No data available to display.",
|
74 |
'download_button': "馃摜 Download data as CSV"
|
75 |
}
|
76 |
}
|
77 |
|
78 |
+
def load_json(file_path):
|
79 |
+
"""Funkcja do 艂adowania danych z pliku JSON."""
|
80 |
+
if not os.path.exists(file_path):
|
81 |
+
return []
|
82 |
+
with open(file_path, 'r', encoding='utf-8') as file:
|
83 |
+
try:
|
84 |
+
data = json.load(file)
|
85 |
+
return data
|
86 |
+
except json.JSONDecodeError:
|
87 |
+
st.error(f"Nie mo偶na za艂adowa膰 danych z {file_path}. Plik jest uszkodzony.")
|
88 |
+
return []
|
89 |
+
|
90 |
+
def save_json(file_path, data):
|
91 |
+
"""Funkcja do zapisywania danych do pliku JSON."""
|
92 |
+
with open(file_path, 'w', encoding='utf-8') as file:
|
93 |
+
json.dump(data, file, ensure_ascii=False, indent=4)
|
94 |
+
|
95 |
+
def get_stats_from_json():
|
96 |
+
"""Funkcja do pobierania statystyk z pliku stats.json."""
|
97 |
+
if not os.path.exists(STATS_FILE):
|
98 |
+
# Inicjalizacja statystyk, je艣li plik nie istnieje
|
99 |
+
stats = {
|
100 |
+
"total_analyses": 0,
|
101 |
+
"total_frauds_detected": 0
|
102 |
+
}
|
103 |
+
save_json(STATS_FILE, stats)
|
104 |
+
else:
|
105 |
+
with open(STATS_FILE, 'r', encoding='utf-8') as file:
|
106 |
+
stats = json.load(file)
|
107 |
+
return stats
|
108 |
+
|
109 |
+
def get_history_from_json():
|
110 |
+
"""Funkcja do pobierania historii analiz z pliku history.json."""
|
111 |
+
return load_json(HISTORY_FILE)
|
112 |
+
|
113 |
+
def get_fake_numbers_from_json():
|
114 |
+
"""Funkcja do pobierania fa艂szywych numer贸w z pliku fake_numbers.json."""
|
115 |
+
return load_json(FAKE_NUMBERS_FILE)
|
116 |
+
|
117 |
def main(language):
|
118 |
translations = page_translations[language]
|
119 |
|
120 |
+
# Pobieranie danych z plik贸w JSON
|
121 |
+
stats = get_stats_from_json()
|
122 |
+
history = get_history_from_json()
|
123 |
|
124 |
# Kluczowe metryki
|
125 |
+
total_analyses = stats.get("total_analyses", 0)
|
126 |
+
total_frauds_detected = stats.get("total_frauds_detected", 0)
|
127 |
|
128 |
# Wy艣wietlenie metryk
|
129 |
st.title(translations['header'])
|
|
|
142 |
col3.metric(label=translations['fraud_percentage'], value=f"{fraud_percentage:.2f}%")
|
143 |
|
144 |
# Wizualizacja procentowego podzia艂u oszustw
|
145 |
+
if total_analyses > 0:
|
146 |
+
fraud_data = [total_frauds_detected, total_analyses - total_frauds_detected]
|
147 |
+
fraud_labels = ['Fraud', 'Non-Fraud']
|
148 |
+
fig_fraud_pie = go.Figure(data=[go.Pie(labels=fraud_labels, values=fraud_data, hole=.3,
|
149 |
+
marker_colors=['#FF6347', '#4682B4'])])
|
150 |
+
fig_fraud_pie.update_layout(title_text=translations['fraud_vs_nonfraud'])
|
151 |
+
st.plotly_chart(fig_fraud_pie, use_container_width=True)
|
152 |
+
else:
|
153 |
+
st.info(translations['no_data'])
|
154 |
|
155 |
# Wy艣wietlenie historii analiz w tabeli
|
156 |
if history:
|
|
|
169 |
# Dodanie kolumny 'date' dla wizualizacji
|
170 |
df_history['date'] = df_history['timestamp'].dt.date
|
171 |
|
|
|
|
|
172 |
# Wy艣wietlenie tabeli historii
|
173 |
st.dataframe(df_history[['timestamp', 'phone_number', 'risk_assessment']], height=300)
|
174 |
|
|
|
185 |
# Trend oszustw w czasie
|
186 |
st.markdown(f"### {translations['fraud_trend_title']}")
|
187 |
fraud_over_time = df_history.groupby(df_history['timestamp'].dt.date)['phone_number'].count().reset_index()
|
188 |
+
fraud_over_time.rename(columns={'timestamp': 'date', 'phone_number': 'frauds_detected'}, inplace=True)
|
189 |
+
fig_trend = px.line(fraud_over_time, x='date', y='frauds_detected', title=translations['frauds_over_time'])
|
190 |
st.plotly_chart(fig_trend, use_container_width=True)
|
191 |
|
192 |
# Rozk艂ad ocen ryzyka
|
|
|
198 |
df_history['risk_score'] = df_history['risk_assessment'].apply(extract_risk_score)
|
199 |
risk_distribution = df_history['risk_score'].value_counts().sort_index().reset_index()
|
200 |
risk_distribution.columns = ['risk_score', 'count']
|
201 |
+
fig_risk = px.bar(risk_distribution, x='risk_score', y='count', title=translations['risk_distribution'],
|
202 |
+
labels={'risk_score': 'Risk Score', 'count': 'Number of Messages'},
|
203 |
+
color='risk_score', color_continuous_scale=px.colors.sequential.RdBu)
|
204 |
st.plotly_chart(fig_risk, use_container_width=True)
|
205 |
|
206 |
# Rozk艂ad oszustw wed艂ug kraj贸w
|
207 |
st.markdown(f"### {translations['fraud_country_distribution_title']}")
|
208 |
+
def get_phone_info(phone_number):
|
209 |
+
"""Zwraca informacje o kraju na podstawie numeru telefonu."""
|
210 |
+
if phone_number.startswith('+48'):
|
211 |
+
return 'Poland', None
|
212 |
+
elif phone_number.startswith('+49'):
|
213 |
+
return 'Germany', None
|
214 |
+
else:
|
215 |
+
return 'Unknown', None
|
216 |
+
|
217 |
+
df_history['country'] = df_history['phone_number'].apply(lambda x: get_phone_info(x)[0])
|
218 |
fraud_countries = df_history['country'].value_counts().reset_index()
|
219 |
fraud_countries.columns = ['country', 'counts']
|
220 |
|
|
|
241 |
st.plotly_chart(fig_map, use_container_width=True)
|
242 |
else:
|
243 |
st.info(translations['no_data'])
|
244 |
+
else:
|
245 |
+
st.info(translations['no_data'])
|
246 |
+
|