molokhovdmitry commited on
Commit
c988650
·
1 Parent(s): 23910e6

YouTube API is now encapsulated in YouTubeAPI class.

Browse files
Files changed (2) hide show
  1. src/main.py +6 -6
  2. src/yt_api.py +60 -55
src/main.py CHANGED
@@ -2,7 +2,7 @@ from fastapi import FastAPI, Response
2
  from pydantic_settings import BaseSettings, SettingsConfigDict
3
  import pandas as pd
4
 
5
- from src.yt_api import get_comments
6
  from src.models import init_emotions_model
7
 
8
 
@@ -17,6 +17,10 @@ settings = Settings()
17
  app = FastAPI(title='social-stat')
18
 
19
  emotions_clf = init_emotions_model()
 
 
 
 
20
 
21
 
22
  @app.get('/')
@@ -27,11 +31,7 @@ def home():
27
  @app.get('/predict')
28
  def predict(video_id):
29
  # Get comments
30
- comments = get_comments(
31
- video_id,
32
- settings.MAX_COMMENT_SIZE,
33
- settings.YT_API_KEY
34
- )
35
  comments_df = pd.DataFrame(comments)
36
 
37
  # Predict emotions in batches
 
2
  from pydantic_settings import BaseSettings, SettingsConfigDict
3
  import pandas as pd
4
 
5
+ from src.yt_api import YouTubeAPI
6
  from src.models import init_emotions_model
7
 
8
 
 
17
  app = FastAPI(title='social-stat')
18
 
19
  emotions_clf = init_emotions_model()
20
+ yt_api = YouTubeAPI(
21
+ api_key=settings.YT_API_KEY,
22
+ max_comment_size=settings.MAX_COMMENT_SIZE
23
+ )
24
 
25
 
26
  @app.get('/')
 
31
  @app.get('/predict')
32
  def predict(video_id):
33
  # Get comments
34
+ comments = yt_api.get_comments(video_id)
 
 
 
 
35
  comments_df = pd.DataFrame(comments)
36
 
37
  # Predict emotions in batches
src/yt_api.py CHANGED
@@ -2,66 +2,71 @@ import requests
2
  from pprint import pprint
3
 
4
 
5
- def get_comments(video_id, max_comment_size, api_key):
6
- """Yields all `commentThreads` from a YouTube video in batches."""
 
 
 
7
 
8
- # Get comments from the first page
9
- response = get_response(video_id, api_key, max_results=100)
10
- comment_list = response_to_comments(response, max_comment_size)
11
 
12
- # Get comments from the other pages
13
- while 'nextPageToken' in response.keys():
14
- response = get_response(
15
- video_id, api_key, page_token=response['nextPageToken'])
16
- comment_list += (response_to_comments(response, max_comment_size))
17
 
18
- return comment_list
 
 
 
 
19
 
 
20
 
21
- def get_response(video_id, api_key, page_token=None, max_results=100):
22
- """Gets the response from YouTube API and converts it to JSON."""
23
- url = 'https://youtube.googleapis.com/youtube/v3/commentThreads'
24
- payload = {
25
- 'videoId': video_id,
26
- 'key': api_key,
27
- 'maxResults': max_results,
28
- 'part': 'snippet',
29
- 'pageToken': page_token,
30
- }
31
- response = requests.get(url, params=payload)
32
- return response.json()
33
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- def response_to_comments(response, max_comment_size):
36
- """Converts JSON response to `comment_list` dict."""
37
- comment_list = []
38
- for full_comment in response['items']:
39
- comment = full_comment['snippet']
40
- can_reply = comment['canReply']
41
- total_reply_count = comment['totalReplyCount']
42
- comment = comment['topLevelComment']
43
- comment_id = comment['id']
44
- comment = comment['snippet']
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- # Skip if comment is too long
47
- if len(comment['textDisplay']) > max_comment_size:
48
- continue
49
- try:
50
- comment_list.append({
51
- 'comment_id': comment_id,
52
- 'video_id': comment['videoId'],
53
- 'channel_id': comment['authorChannelId']['value'],
54
- 'author_display_name': comment['authorDisplayName'],
55
- 'text_original': comment['textOriginal'],
56
- 'text_display': comment['textDisplay'],
57
- 'published_at': comment['publishedAt'].replace('T', ' ')[:-1],
58
- 'updated_at': comment['updatedAt'].replace('T', ' ')[:-1],
59
- 'like_count': comment['likeCount'],
60
- 'can_reply': can_reply,
61
- 'total_reply_count': total_reply_count,
62
- })
63
- except Exception as e:
64
- print(f"Error: {e}\nComment:")
65
- pprint(full_comment)
66
-
67
- return comment_list
 
2
  from pprint import pprint
3
 
4
 
5
+ class YouTubeAPI():
6
+ def __init__(self, api_key, max_comment_size):
7
+ self.api_key = api_key
8
+ self.url = 'https://youtube.googleapis.com/youtube/v3/commentThreads'
9
+ self.max_comment_size = max_comment_size
10
 
11
+ def get_comments(self, video_id):
12
+ """Returns a list of all `commentThreads` from a YouTube video."""
 
13
 
14
+ # Get comments from the first page
15
+ response = self.get_response(video_id, max_results=100)
16
+ comment_list = self.response_to_comments(response)
 
 
17
 
18
+ # Get comments from the other pages
19
+ while 'nextPageToken' in response.keys():
20
+ response = self.get_response(
21
+ video_id, page_token=response['nextPageToken'])
22
+ comment_list += (self.response_to_comments(response))
23
 
24
+ return comment_list
25
 
26
+ def get_response(self, video_id, page_token=None, max_results=100):
27
+ """Gets the response from YouTube API and converts it to JSON."""
28
+ url = 'https://youtube.googleapis.com/youtube/v3/commentThreads'
29
+ payload = {
30
+ 'videoId': video_id,
31
+ 'key': self.api_key,
32
+ 'maxResults': max_results,
33
+ 'part': 'snippet',
34
+ 'pageToken': page_token,
35
+ }
36
+ response = requests.get(url, params=payload)
37
+ return response.json()
38
 
39
+ def response_to_comments(self, response):
40
+ """Converts JSON response to `comment_list` list."""
41
+ comment_list = []
42
+ for full_comment in response['items']:
43
+ comment = full_comment['snippet']
44
+ can_reply = comment['canReply']
45
+ total_reply_count = comment['totalReplyCount']
46
+ comment = comment['topLevelComment']
47
+ comment_id = comment['id']
48
+ comment = comment['snippet']
49
 
50
+ # Skip if comment is too long
51
+ if len(comment['textDisplay']) > self.max_comment_size:
52
+ continue
53
+ try:
54
+ published_at = comment['publishedAt']
55
+ comment_list.append({
56
+ 'comment_id': comment_id,
57
+ 'video_id': comment['videoId'],
58
+ 'channel_id': comment['authorChannelId']['value'],
59
+ 'author_display_name': comment['authorDisplayName'],
60
+ 'text_original': comment['textOriginal'],
61
+ 'text_display': comment['textDisplay'],
62
+ 'published_at': published_at.replace('T', ' ')[:-1],
63
+ 'updated_at': comment['updatedAt'].replace('T', ' ')[:-1],
64
+ 'like_count': comment['likeCount'],
65
+ 'can_reply': can_reply,
66
+ 'total_reply_count': total_reply_count,
67
+ })
68
+ except KeyError as e:
69
+ print(f"Error: {e}\nComment:")
70
+ pprint(full_comment)
71
 
72
+ return comment_list