enesmanan commited on
Commit
2667fe6
·
verified ·
1 Parent(s): 581c8a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -79
app.py CHANGED
@@ -1,13 +1,10 @@
1
  import os
2
- import time
3
- import requests
4
- import re
5
  import pandas as pd
6
  import plotly.express as px
7
  import gradio as gr
8
  from dotenv import load_dotenv
9
  from scripts.review_summarizer import analyze_reviews
10
- from scrape.trendyol_scraper_origin import scrape_comments as selenium_scrape
11
 
12
  load_dotenv()
13
  GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')
@@ -48,83 +45,11 @@ def create_star_plot(df):
48
  )
49
  return fig
50
 
51
- def scrape_product_comments_v2(url):
52
- headers = {
53
- "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
54
- "accept-language": "en-US,en;q=0.9",
55
- "cache-control": "max-age=0",
56
- "upgrade-insecure-requests": "1",
57
- "user-agent": "Mozilla/5.0 (iPad; CPU OS 14_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) FxiOS/129.0 Mobile/15E148 Safari/605.1.15"
58
- }
59
-
60
- # Extract product_id using regex
61
- match = re.search(r"-p-(\d+)", url)
62
- if not match:
63
- raise ValueError("Product ID not found in URL")
64
-
65
- product_id = match.group(1)
66
- api_url = f"https://apigw.trendyol.com/discovery-web-websfxsocialreviewrating-santral/product-reviews-detailed?contentId={product_id}&page=1&order=DESC&orderBy=Score&channelId=1"
67
-
68
- def fetch_reviews(api_url, headers):
69
- all_reviews = []
70
- response = requests.get(api_url, headers=headers)
71
- if response.status_code != 200:
72
- raise ConnectionError(f"Initial request failed: {response.status_code}")
73
-
74
- data = response.json()
75
- total_pages = data["result"]["productReviews"]["totalPages"]
76
- all_reviews.extend(data["result"]["productReviews"]["content"])
77
-
78
- for page in range(2, total_pages + 1):
79
- paginated_url = api_url.replace("page=1", f"page={page}")
80
- response = requests.get(paginated_url, headers=headers)
81
- if response.status_code == 200:
82
- page_data = response.json()
83
- all_reviews.extend(page_data["result"]["productReviews"]["content"])
84
- else:
85
- print(f"Failed to fetch page {page}: {response.status_code}")
86
-
87
- return all_reviews
88
-
89
- reviews = fetch_reviews(api_url, headers)
90
- reviews_df = pd.DataFrame(reviews)
91
- reviews_df = reviews_df.rename(columns={
92
- "id": "Kullanıcı_id",
93
- "userFullName": "Kullanıcı Adı",
94
- "comment": "Yorum",
95
- "lastModifiedDate": "Tarih",
96
- "rate": "Yıldız Sayısı"
97
- })
98
- reviews_df = reviews_df[["Kullanıcı_id", "Kullanıcı Adı", "Yorum", "Tarih", "Yıldız Sayısı"]]
99
- return reviews_df
100
-
101
- def scrape_product_comments(url, use_selenium=False):
102
- """
103
- Trendyol yorumlarını çeker. Önce API ile dener,
104
- başarısız olursa Selenium'a geçer.
105
- """
106
- try:
107
- if use_selenium:
108
- return selenium_scrape(url)
109
-
110
- # Önce API ile deneyelim
111
- df = scrape_product_comments_v2(url)
112
- if df is not None and len(df) > 0:
113
- return df
114
-
115
- # API başarısız olursa Selenium'a geç
116
- print("API scraping başarısız oldu, Selenium'a geçiliyor...")
117
- return selenium_scrape(url)
118
-
119
- except Exception as e:
120
- print(f"Scraping hatası: {str(e)}")
121
- return None
122
-
123
  def analyze_product(url, progress=gr.Progress()):
124
  try:
125
  # Fetch reviews
126
  progress(0.1, desc="Yorumlar çekiliyor...")
127
- df = scrape_product_comments(url)
128
 
129
  if df is None or len(df) == 0:
130
  return None, None, None, None, None, None, None, "Yorumlar çekilemedi. URL'yi kontrol edin."
@@ -194,8 +119,6 @@ with gr.Blocks(title="Trendyol Yorum Analizi") as demo:
194
  avg_rating = gr.Textbox(label="Ortalama Puan")
195
  positive_ratio = gr.Textbox(label="Olumlu Yorum Oranı")
196
 
197
-
198
-
199
  summary = gr.Markdown(label="📝 Genel Değerlendirme")
200
  info_message = gr.Markdown()
201
 
 
1
  import os
 
 
 
2
  import pandas as pd
3
  import plotly.express as px
4
  import gradio as gr
5
  from dotenv import load_dotenv
6
  from scripts.review_summarizer import analyze_reviews
7
+ from scrape.trendyol_scraper_origin import scrape_comments
8
 
9
  load_dotenv()
10
  GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')
 
45
  )
46
  return fig
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  def analyze_product(url, progress=gr.Progress()):
49
  try:
50
  # Fetch reviews
51
  progress(0.1, desc="Yorumlar çekiliyor...")
52
+ df = scrape_comments(url)
53
 
54
  if df is None or len(df) == 0:
55
  return None, None, None, None, None, None, None, "Yorumlar çekilemedi. URL'yi kontrol edin."
 
119
  avg_rating = gr.Textbox(label="Ortalama Puan")
120
  positive_ratio = gr.Textbox(label="Olumlu Yorum Oranı")
121
 
 
 
122
  summary = gr.Markdown(label="📝 Genel Değerlendirme")
123
  info_message = gr.Markdown()
124