Spaces:
Sleeping
Sleeping
molokhovdmitry
commited on
Commit
·
c988650
1
Parent(s):
23910e6
YouTube API is now encapsulated in YouTubeAPI class.
Browse files- src/main.py +6 -6
- src/yt_api.py +60 -55
src/main.py
CHANGED
@@ -2,7 +2,7 @@ from fastapi import FastAPI, Response
|
|
2 |
from pydantic_settings import BaseSettings, SettingsConfigDict
|
3 |
import pandas as pd
|
4 |
|
5 |
-
from src.yt_api import
|
6 |
from src.models import init_emotions_model
|
7 |
|
8 |
|
@@ -17,6 +17,10 @@ settings = Settings()
|
|
17 |
app = FastAPI(title='social-stat')
|
18 |
|
19 |
emotions_clf = init_emotions_model()
|
|
|
|
|
|
|
|
|
20 |
|
21 |
|
22 |
@app.get('/')
|
@@ -27,11 +31,7 @@ def home():
|
|
27 |
@app.get('/predict')
|
28 |
def predict(video_id):
|
29 |
# Get comments
|
30 |
-
comments = get_comments(
|
31 |
-
video_id,
|
32 |
-
settings.MAX_COMMENT_SIZE,
|
33 |
-
settings.YT_API_KEY
|
34 |
-
)
|
35 |
comments_df = pd.DataFrame(comments)
|
36 |
|
37 |
# Predict emotions in batches
|
|
|
2 |
from pydantic_settings import BaseSettings, SettingsConfigDict
|
3 |
import pandas as pd
|
4 |
|
5 |
+
from src.yt_api import YouTubeAPI
|
6 |
from src.models import init_emotions_model
|
7 |
|
8 |
|
|
|
17 |
app = FastAPI(title='social-stat')
|
18 |
|
19 |
emotions_clf = init_emotions_model()
|
20 |
+
yt_api = YouTubeAPI(
|
21 |
+
api_key=settings.YT_API_KEY,
|
22 |
+
max_comment_size=settings.MAX_COMMENT_SIZE
|
23 |
+
)
|
24 |
|
25 |
|
26 |
@app.get('/')
|
|
|
31 |
@app.get('/predict')
|
32 |
def predict(video_id):
|
33 |
# Get comments
|
34 |
+
comments = yt_api.get_comments(video_id)
|
|
|
|
|
|
|
|
|
35 |
comments_df = pd.DataFrame(comments)
|
36 |
|
37 |
# Predict emotions in batches
|
src/yt_api.py
CHANGED
@@ -2,66 +2,71 @@ import requests
|
|
2 |
from pprint import pprint
|
3 |
|
4 |
|
5 |
-
|
6 |
-
|
|
|
|
|
|
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
comment_list = response_to_comments(response, max_comment_size)
|
11 |
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
video_id, api_key, page_token=response['nextPageToken'])
|
16 |
-
comment_list += (response_to_comments(response, max_comment_size))
|
17 |
|
18 |
-
|
|
|
|
|
|
|
|
|
19 |
|
|
|
20 |
|
21 |
-
def get_response(
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
-
|
47 |
-
if len(comment['textDisplay']) > max_comment_size:
|
48 |
-
continue
|
49 |
-
try:
|
50 |
-
comment_list.append({
|
51 |
-
'comment_id': comment_id,
|
52 |
-
'video_id': comment['videoId'],
|
53 |
-
'channel_id': comment['authorChannelId']['value'],
|
54 |
-
'author_display_name': comment['authorDisplayName'],
|
55 |
-
'text_original': comment['textOriginal'],
|
56 |
-
'text_display': comment['textDisplay'],
|
57 |
-
'published_at': comment['publishedAt'].replace('T', ' ')[:-1],
|
58 |
-
'updated_at': comment['updatedAt'].replace('T', ' ')[:-1],
|
59 |
-
'like_count': comment['likeCount'],
|
60 |
-
'can_reply': can_reply,
|
61 |
-
'total_reply_count': total_reply_count,
|
62 |
-
})
|
63 |
-
except Exception as e:
|
64 |
-
print(f"Error: {e}\nComment:")
|
65 |
-
pprint(full_comment)
|
66 |
-
|
67 |
-
return comment_list
|
|
|
2 |
from pprint import pprint
|
3 |
|
4 |
|
5 |
+
class YouTubeAPI():
|
6 |
+
def __init__(self, api_key, max_comment_size):
|
7 |
+
self.api_key = api_key
|
8 |
+
self.url = 'https://youtube.googleapis.com/youtube/v3/commentThreads'
|
9 |
+
self.max_comment_size = max_comment_size
|
10 |
|
11 |
+
def get_comments(self, video_id):
|
12 |
+
"""Returns a list of all `commentThreads` from a YouTube video."""
|
|
|
13 |
|
14 |
+
# Get comments from the first page
|
15 |
+
response = self.get_response(video_id, max_results=100)
|
16 |
+
comment_list = self.response_to_comments(response)
|
|
|
|
|
17 |
|
18 |
+
# Get comments from the other pages
|
19 |
+
while 'nextPageToken' in response.keys():
|
20 |
+
response = self.get_response(
|
21 |
+
video_id, page_token=response['nextPageToken'])
|
22 |
+
comment_list += (self.response_to_comments(response))
|
23 |
|
24 |
+
return comment_list
|
25 |
|
26 |
+
def get_response(self, video_id, page_token=None, max_results=100):
|
27 |
+
"""Gets the response from YouTube API and converts it to JSON."""
|
28 |
+
url = 'https://youtube.googleapis.com/youtube/v3/commentThreads'
|
29 |
+
payload = {
|
30 |
+
'videoId': video_id,
|
31 |
+
'key': self.api_key,
|
32 |
+
'maxResults': max_results,
|
33 |
+
'part': 'snippet',
|
34 |
+
'pageToken': page_token,
|
35 |
+
}
|
36 |
+
response = requests.get(url, params=payload)
|
37 |
+
return response.json()
|
38 |
|
39 |
+
def response_to_comments(self, response):
|
40 |
+
"""Converts JSON response to `comment_list` list."""
|
41 |
+
comment_list = []
|
42 |
+
for full_comment in response['items']:
|
43 |
+
comment = full_comment['snippet']
|
44 |
+
can_reply = comment['canReply']
|
45 |
+
total_reply_count = comment['totalReplyCount']
|
46 |
+
comment = comment['topLevelComment']
|
47 |
+
comment_id = comment['id']
|
48 |
+
comment = comment['snippet']
|
49 |
|
50 |
+
# Skip if comment is too long
|
51 |
+
if len(comment['textDisplay']) > self.max_comment_size:
|
52 |
+
continue
|
53 |
+
try:
|
54 |
+
published_at = comment['publishedAt']
|
55 |
+
comment_list.append({
|
56 |
+
'comment_id': comment_id,
|
57 |
+
'video_id': comment['videoId'],
|
58 |
+
'channel_id': comment['authorChannelId']['value'],
|
59 |
+
'author_display_name': comment['authorDisplayName'],
|
60 |
+
'text_original': comment['textOriginal'],
|
61 |
+
'text_display': comment['textDisplay'],
|
62 |
+
'published_at': published_at.replace('T', ' ')[:-1],
|
63 |
+
'updated_at': comment['updatedAt'].replace('T', ' ')[:-1],
|
64 |
+
'like_count': comment['likeCount'],
|
65 |
+
'can_reply': can_reply,
|
66 |
+
'total_reply_count': total_reply_count,
|
67 |
+
})
|
68 |
+
except KeyError as e:
|
69 |
+
print(f"Error: {e}\nComment:")
|
70 |
+
pprint(full_comment)
|
71 |
|
72 |
+
return comment_list
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|