Spaces:

rafaldembski
/

ScamDetector

Sleeping

File size: 13,561 Bytes

# utils/functions.py

import phonenumbers
from phonenumbers import geocoder, carrier
import re
import requests
import os
import json
from datetime import datetime
import logging

# Konfiguracja logowania
logging.basicConfig(filename='app.log', level=logging.ERROR, format='%(asctime)s %(levelname)s:%(message)s')

# Ścieżka do pliku JSON przechowującego fałszywe numery
FAKE_NUMBERS_FILE = 'fake_numbers.json'

# Inicjalizacja pliku JSON przechowującego fałszywe numery
def init_fake_numbers_file():
    if not os.path.exists(FAKE_NUMBERS_FILE):
        with open(FAKE_NUMBERS_FILE, 'w') as f:
            json.dump([], f)
    else:
        # Sprawdzenie, czy plik nie jest pusty i zawiera prawidłowy JSON
        try:
            with open(FAKE_NUMBERS_FILE, 'r') as f:
                json.load(f)
        except json.JSONDecodeError:
            # Jeśli plik jest uszkodzony lub pusty, zresetuj go do pustej listy
            with open(FAKE_NUMBERS_FILE, 'w') as f:
                json.dump([], f)

# Dodanie numeru telefonu do pliku JSON
def add_fake_number(phone_number):
    try:
        with open(FAKE_NUMBERS_FILE, 'r') as f:
            fake_numbers = json.load(f)
    except (json.JSONDecodeError, FileNotFoundError):
        fake_numbers = []
    
    if not any(entry["phone_number"] == phone_number for entry in fake_numbers):
        fake_numbers.append({
            "phone_number": phone_number,
            "reported_at": datetime.now().isoformat()
        })
        try:
            with open(FAKE_NUMBERS_FILE, 'w') as f:
                json.dump(fake_numbers, f, indent=4)
            return True
        except Exception as e:
            logging.error(f"Nie udało się zapisać numeru {phone_number}: {e}")
            return False
    else:
        return False  # Numer już istnieje

# Sprawdzenie, czy numer telefonu jest w pliku JSON
def is_fake_number(phone_number):
    try:
        with open(FAKE_NUMBERS_FILE, 'r') as f:
            fake_numbers = json.load(f)
        return any(entry["phone_number"] == phone_number for entry in fake_numbers)
    except (json.JSONDecodeError, FileNotFoundError):
        return False

# Pobierz klucz API z zmiennej środowiskowej
API_KEY = os.getenv('SAMBANOVA_API_KEY')

# Funkcja do weryfikacji numeru telefonu
def get_phone_info(phone_number):
    try:
        parsed_number = phonenumbers.parse(phone_number, None)
        country = geocoder.description_for_number(parsed_number, 'pl')
        operator = carrier.name_for_number(parsed_number, 'pl')
        return country, operator
    except phonenumbers.NumberParseException:
        return None, None

# Proste sprawdzenia heurystyczne wiadomości
def simple_checks(message):
    warnings = []
    # Słowa kluczowe często używane w oszustwach
    scam_keywords = ['pieniądze', 'przelew', 'hasło', 'kod', 'nagroda', 'wygrana', 'pilne', 'pomoc', 'opłata']
    if any(keyword in message.lower() for keyword in scam_keywords):
        warnings.append("Wiadomość zawiera słowa kluczowe związane z potencjalnym oszustwem.")
    # Sprawdzenie obecności linków
    if re.search(r'http[s]?://', message):
        warnings.append("Wiadomość zawiera link.")
    # Sprawdzenie, czy nadawca prosi o poufne informacje
    if re.search(r'\b(podaj|prześlij|udostępnij)\b.*\b(hasło|kod|dane osobowe|numer konta)\b', message.lower()):
        warnings.append("Wiadomość zawiera prośbę o poufne informacje.")
    return warnings

# Funkcja do analizy wiadomości za pomocą API SambaNova z głębszym procesem myślenia
def analyze_message(message, phone_number, additional_info, api_key, language):
    if not api_key:
        logging.error("Brak klucza API.")
        return "Brak klucza API.", "Brak klucza API.", "Brak klucza API."
    
    url = "https://api.sambanova.ai/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {api_key}"
    }
    
    # Definiowanie system prompt na podstawie języka
    system_prompts = {
        'Polish': """
Jesteś zaawansowanym asystentem AI specjalizującym się w identyfikacji fałszywych wiadomości SMS. Twoim zadaniem jest przeprowadzenie szczegółowej analizy wiadomości, wykorzystując głęboki proces myślenia i dostarczając kompleksową ocenę. Twoja odpowiedź powinna być podzielona na trzy sekcje:

<analysis>
**Analiza Treści Wiadomości:**
- Przeprowadź szczegółową analizę treści wiadomości, identyfikując potencjalne czerwone flagi, takie jak błędy językowe, prośby o dane osobowe, pilne prośby o kontakt itp.
- Opisz kontekst językowy i kulturowy wiadomości.
- Zidentyfikuj wszelkie elementy, które mogą sugerować, że wiadomość jest próbą wyłudzenia informacji lub pieniędzy.
</analysis>

<risk_assessment>
**Ocena Ryzyka Oszustwa:**
- Na podstawie analizy treści i dostępnych informacji oceń prawdopodobieństwo, że wiadomość jest oszustwem. Użyj skali od 1 do 10, gdzie 1 oznacza bardzo niskie ryzyko, a 10 bardzo wysokie ryzyko.
- Wyjaśnij, jakie czynniki wpływają na tę ocenę.
</risk_assessment>

<recommendations>
**Zalecenia dla Użytkownika:**
- Podaj jasne i konkretne zalecenia dotyczące dalszych kroków, które użytkownik powinien podjąć.
- Uwzględnij sugestie dotyczące bezpieczeństwa, takie jak blokowanie nadawcy, zgłaszanie wiadomości do odpowiednich instytucji, czy też ignorowanie wiadomości.
- Jeśli to możliwe, zasugeruj dodatkowe środki ostrożności, które użytkownik może podjąć, aby chronić swoje dane osobowe i finansowe.
</recommendations>

Twoja odpowiedź powinna być sformatowana dokładnie w powyższy sposób, używając znaczników <analysis>, <risk_assessment> i <recommendations>. Upewnij się, że każda sekcja jest wypełniona kompletnie i szczegółowo.
        """,
        'German': """
Du bist ein fortgeschrittener KI-Assistent, spezialisiert auf die Identifizierung gefälschter SMS-Nachrichten. Deine Aufgabe ist es, eine detaillierte Analyse der Nachricht durchzuführen, indem du einen tiefgreifenden Denkprozess nutzt und eine umfassende Bewertung lieferst. Deine Antwort sollte in drei Abschnitte unterteilt sein:

<analysis>
**Nachrichteninhaltsanalyse:**
- Führe eine detaillierte Analyse des Nachrichteninhalts durch und identifiziere potenzielle rote Flaggen wie sprachliche Fehler, Aufforderungen zur Preisgabe persönlicher Daten, dringende Kontaktanfragen usw.
- Beschreibe den sprachlichen und kulturellen Kontext der Nachricht.
- Identifiziere alle Elemente, die darauf hindeuten könnten, dass die Nachricht ein Versuch ist, Informationen oder Geld zu erlangen.
</analysis>

<risk_assessment>
**Betrugsrisikobewertung:**
- Basierend auf der Inhaltsanalyse und den verfügbaren Informationen, bewerte die Wahrscheinlichkeit, dass die Nachricht ein Betrug ist. Verwende eine Skala von 1 bis 10, wobei 1 sehr geringes Risiko und 10 sehr hohes Risiko bedeutet.
- Erkläre, welche Faktoren diese Bewertung beeinflussen.
</risk_assessment>

<recommendations>
**Empfehlungen für den Benutzer:**
- Gib klare und konkrete Empfehlungen zu den nächsten Schritten, die der Benutzer unternehmen sollte.
- Berücksichtige Sicherheitsempfehlungen wie das Blockieren des Absenders, das Melden der Nachricht an entsprechende Behörden oder das Ignorieren der Nachricht.
- Wenn möglich, schlage zusätzliche Vorsichtsmaßnahmen vor, die der Benutzer ergreifen kann, um seine persönlichen und finanziellen Daten zu schützen.
</recommendations>

Deine Antwort sollte genau nach den oben genannten Richtlinien formatiert sein und die Markierungen <analysis>, <risk_assessment> und <recommendations> verwenden. Stelle sicher, dass jeder Abschnitt vollständig und detailliert ausgefüllt ist.
        """,
        'English': """
You are an advanced AI assistant specializing in identifying fake SMS messages. Your task is to conduct a detailed analysis of the message, utilizing a deep thinking process and providing a comprehensive assessment. Your response should be divided into three sections:

<analysis>
**Message Content Analysis:**
- Conduct a detailed analysis of the message content, identifying potential red flags such as language errors, requests for personal information, urgent contact requests, etc.
- Describe the linguistic and cultural context of the message.
- Identify any elements that may suggest the message is an attempt to solicit information or money.
</analysis>

<risk_assessment>
**Fraud Risk Assessment:**
- Based on the content analysis and available information, assess the likelihood that the message is fraudulent. Use a scale from 1 to 10, where 1 indicates very low risk and 10 indicates very high risk.
- Explain the factors that influence this assessment.
</risk_assessment>

<recommendations>
**User Recommendations:**
- Provide clear and concrete recommendations regarding the next steps the user should take.
- Include security suggestions such as blocking the sender, reporting the message to appropriate authorities, or ignoring the message.
- If possible, suggest additional precautionary measures the user can take to protect their personal and financial information.
</recommendations>

Your response should be formatted exactly as specified above, using the <analysis>, <risk_assessment>, and <recommendations> tags. Ensure that each section is thoroughly and comprehensively filled out.
        """
    }
    
    system_prompt = system_prompts.get(language, system_prompts['English'])  # Default to English if language not found
    
    user_prompt = f"""Analyze the following message for potential fraud:

Message: "{message}"
Sender's Phone Number: "{phone_number}"

Additional Information:
{additional_info}

Provide your analysis and conclusions following the guidelines above."""
    
    payload = {
        "model": "Meta-Llama-3.1-8B-Instruct",
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        "max_tokens": 1000,
        "temperature": 0.2,
        "top_p": 0.9,
        "stop": ["<|eot_id|>"]
    }

    try:
        response = requests.post(url, headers=headers, json=payload)
        if response.status_code == 200:
            data = response.json()
            ai_response = data['choices'][0]['message']['content']
            # Parsowanie odpowiedzi
            analysis = re.search(r'<analysis>(.*?)</analysis>', ai_response, re.DOTALL)
            risk_assessment = re.search(r'<risk_assessment>(.*?)</risk_assessment>', ai_response, re.DOTALL)
            recommendations = re.search(r'<recommendations>(.*?)</recommendations>', ai_response, re.DOTALL)

            analysis_text = analysis.group(1).strip() if analysis else "No analysis available."
            risk_text = risk_assessment.group(1).strip() if risk_assessment else "No risk assessment available."
            recommendations_text = recommendations.group(1).strip() if recommendations else "No recommendations available."

            return analysis_text, risk_text, recommendations_text
        else:
            logging.error(f"API Error: {response.status_code} - {response.text}")
            return f"API Error: {response.status_code} - {response.text}", "Analysis Error.", "Analysis Error."
    except Exception as e:
        logging.error(f"API Connection Error: {e}")
        return f"API Connection Error: {e}", "Analysis Error.", "Analysis Error."

# Inicjalizacja pliku statystyk
def init_stats_file():
    stats_file = 'stats.json'
    if not os.path.exists(stats_file):
        with open(stats_file, 'w') as f:
            json.dump({"total_analyses": 0, "total_frauds_detected": 0}, f)

# Pobranie statystyk
def get_stats():
    stats_file = 'stats.json'
    try:
        with open(stats_file, 'r') as f:
            stats = json.load(f)
        return stats
    except (json.JSONDecodeError, FileNotFoundError):
        return {"total_analyses": 0, "total_frauds_detected": 0}

# Aktualizacja statystyk analizy
def update_stats(fraud_detected=False):
    stats_file = 'stats.json'
    try:
        with open(stats_file, 'r') as f:
            stats = json.load(f)
    except (json.JSONDecodeError, FileNotFoundError):
        stats = {"total_analyses": 0, "total_frauds_detected": 0}

    stats["total_analyses"] += 1
    if fraud_detected:
        stats["total_frauds_detected"] += 1

    with open(stats_file, 'w') as f:
        json.dump(stats, f, indent=4)

# Inicjalizacja pliku historii analiz
def init_history_file():
    history_file = 'history.json'
    if not os.path.exists(history_file):
        with open(history_file, 'w') as f:
            json.dump([], f)

# Dodanie wpisu do historii analiz
def add_to_history(message, phone_number, analysis, risk, recommendations):
    history_file = 'history.json'
    try:
        with open(history_file, 'r') as f:
            history = json.load(f)
    except (json.JSONDecodeError, FileNotFoundError):
        history = []

    history.append({
        "timestamp": datetime.now().isoformat(),
        "message": message,
        "phone_number": phone_number,
        "analysis": analysis,
        "risk_assessment": risk,
        "recommendations": recommendations
    })

    with open(history_file, 'w') as f:
        json.dump(history, f, indent=4)

# Pobranie historii analiz
def get_history():
    history_file = 'history.json'
    try:
        with open(history_file, 'r') as f:
            history = json.load(f)
        return history
    except (json.JSONDecodeError, FileNotFoundError):
        return []