Spaces:

molokhovdmitry
/

social-stat

Sleeping

App Files Files Community

molokhovdmitry commited on Mar 16, 2024

Commit

c988650

1 Parent(s): 23910e6

YouTube API is now encapsulated in YouTubeAPI class.

Browse files

Files changed (2) hide show

src/main.py +6 -6
src/yt_api.py +60 -55

src/main.py CHANGED Viewed

@@ -2,7 +2,7 @@ from fastapi import FastAPI, Response
 from pydantic_settings import BaseSettings, SettingsConfigDict
 import pandas as pd
-from src.yt_api import get_comments
 from src.models import init_emotions_model
@@ -17,6 +17,10 @@ settings = Settings()
 app = FastAPI(title='social-stat')
 emotions_clf = init_emotions_model()
 @app.get('/')
@@ -27,11 +31,7 @@ def home():
 @app.get('/predict')
 def predict(video_id):
     # Get comments
-    comments = get_comments(
-        video_id,
-        settings.MAX_COMMENT_SIZE,
-        settings.YT_API_KEY
-    )
     comments_df = pd.DataFrame(comments)
     # Predict emotions in batches

 from pydantic_settings import BaseSettings, SettingsConfigDict
 import pandas as pd
+from src.yt_api import YouTubeAPI
 from src.models import init_emotions_model
 app = FastAPI(title='social-stat')
 emotions_clf = init_emotions_model()
+yt_api = YouTubeAPI(
+    api_key=settings.YT_API_KEY,
+    max_comment_size=settings.MAX_COMMENT_SIZE
+)
 @app.get('/')
 @app.get('/predict')
 def predict(video_id):
     # Get comments
+    comments = yt_api.get_comments(video_id)
     comments_df = pd.DataFrame(comments)
     # Predict emotions in batches

src/yt_api.py CHANGED Viewed

@@ -2,66 +2,71 @@ import requests
 from pprint import pprint
-def get_comments(video_id, max_comment_size, api_key):
-    """Yields all `commentThreads` from a YouTube video in batches."""
-    # Get comments from the first page
-    response = get_response(video_id, api_key, max_results=100)
-    comment_list = response_to_comments(response, max_comment_size)
-    # Get comments from the other pages
-    while 'nextPageToken' in response.keys():
-        response = get_response(
-            video_id, api_key, page_token=response['nextPageToken'])
-        comment_list += (response_to_comments(response, max_comment_size))
-    return comment_list
-def get_response(video_id, api_key, page_token=None, max_results=100):
-    """Gets the response from YouTube API and converts it to JSON."""
-    url = 'https://youtube.googleapis.com/youtube/v3/commentThreads'
-    payload = {
-        'videoId': video_id,
-        'key': api_key,
-        'maxResults': max_results,
-        'part': 'snippet',
-        'pageToken': page_token,
-    }
-    response = requests.get(url, params=payload)
-    return response.json()
-def response_to_comments(response, max_comment_size):
-    """Converts JSON response to `comment_list` dict."""
-    comment_list = []
-    for full_comment in response['items']:
-        comment = full_comment['snippet']
-        can_reply = comment['canReply']
-        total_reply_count = comment['totalReplyCount']
-        comment = comment['topLevelComment']
-        comment_id = comment['id']
-        comment = comment['snippet']
-        # Skip if comment is too long
-        if len(comment['textDisplay']) > max_comment_size:
-            continue
-        try:
-            comment_list.append({
-                'comment_id': comment_id,
-                'video_id': comment['videoId'],
-                'channel_id': comment['authorChannelId']['value'],
-                'author_display_name': comment['authorDisplayName'],
-                'text_original': comment['textOriginal'],
-                'text_display': comment['textDisplay'],
-                'published_at': comment['publishedAt'].replace('T', ' ')[:-1],
-                'updated_at': comment['updatedAt'].replace('T', ' ')[:-1],
-                'like_count': comment['likeCount'],
-                'can_reply': can_reply,
-                'total_reply_count': total_reply_count,
-            })
-        except Exception as e:
-            print(f"Error: {e}\nComment:")
-            pprint(full_comment)
-    return comment_list

 from pprint import pprint
+class YouTubeAPI():
+    def __init__(self, api_key, max_comment_size):
+        self.api_key = api_key
+        self.url = 'https://youtube.googleapis.com/youtube/v3/commentThreads'
+        self.max_comment_size = max_comment_size
+    def get_comments(self, video_id):
+        """Returns a list of all `commentThreads` from a YouTube video."""
+        # Get comments from the first page
+        response = self.get_response(video_id, max_results=100)
+        comment_list = self.response_to_comments(response)
+        # Get comments from the other pages
+        while 'nextPageToken' in response.keys():
+            response = self.get_response(
+                video_id, page_token=response['nextPageToken'])
+            comment_list += (self.response_to_comments(response))
+        return comment_list
+    def get_response(self, video_id, page_token=None, max_results=100):
+        """Gets the response from YouTube API and converts it to JSON."""
+        url = 'https://youtube.googleapis.com/youtube/v3/commentThreads'
+        payload = {
+            'videoId': video_id,
+            'key': self.api_key,
+            'maxResults': max_results,
+            'part': 'snippet',
+            'pageToken': page_token,
+        }
+        response = requests.get(url, params=payload)
+        return response.json()
+    def response_to_comments(self, response):
+        """Converts JSON response to `comment_list` list."""
+        comment_list = []
+        for full_comment in response['items']:
+            comment = full_comment['snippet']
+            can_reply = comment['canReply']
+            total_reply_count = comment['totalReplyCount']
+            comment = comment['topLevelComment']
+            comment_id = comment['id']
+            comment = comment['snippet']
+            # Skip if comment is too long
+            if len(comment['textDisplay']) > self.max_comment_size:
+                continue
+            try:
+                published_at = comment['publishedAt']
+                comment_list.append({
+                    'comment_id': comment_id,
+                    'video_id': comment['videoId'],
+                    'channel_id': comment['authorChannelId']['value'],
+                    'author_display_name': comment['authorDisplayName'],
+                    'text_original': comment['textOriginal'],
+                    'text_display': comment['textDisplay'],
+                    'published_at': published_at.replace('T', ' ')[:-1],
+                    'updated_at': comment['updatedAt'].replace('T', ' ')[:-1],
+                    'like_count': comment['likeCount'],
+                    'can_reply': can_reply,
+                    'total_reply_count': total_reply_count,
+                })
+            except KeyError as e:
+                print(f"Error: {e}\nComment:")
+                pprint(full_comment)
+        return comment_list