dailingx commited on
Commit
790c71e
·
verified ·
1 Parent(s): 40cbf4d

Update run.py

Browse files
Files changed (1) hide show
  1. run.py +83 -128
run.py CHANGED
@@ -1,135 +1,90 @@
1
- # app.py
2
  import gradio as gr
3
- from utils import VideoProcessor, AzureAPI, GoogleAPI, AnthropicAPI, OpenAIAPI
4
- from constraint import SYS_PROMPT, USER_PROMPT
5
- from datasets import load_dataset
6
- import tempfile
7
- import requests
8
-
9
- def load_hf_dataset(dataset_path, auth_token):
10
- dataset = load_dataset(dataset_path, token=auth_token)
11
-
12
- video_paths = dataset
13
- print("load done")
14
-
15
- return video_paths
16
-
17
- def fast_caption(sys_prompt, usr_prompt, temp, top_p, max_tokens, model, key, endpoint, video_src, video_hf, video_hf_auth, video_od, video_od_auth, video_gd, video_gd_auth, frame_format, frame_limit):
18
- print("begin caption")
19
- if video_src:
20
- video = video_src
21
- processor = VideoProcessor(frame_format=frame_format, frame_limit=frame_limit)
22
- frames = processor._decode(video)
23
-
24
- base64_list = processor.to_base64_list(frames)
25
- debug_image = processor.concatenate(frames)
26
-
27
- if not key or not endpoint:
28
- return "", f"API key or endpoint is missing. Processed {len(frames)} frames.", debug_image
29
-
30
- api = AzureAPI(key=key, endpoint=endpoint, model=model, temp=temp, top_p=top_p, max_tokens=max_tokens)
31
- caption = api.get_caption(sys_prompt, usr_prompt, base64_list)
32
- return f"{caption}", f"Using model '{model}' with {len(frames)} frames extracted.", debug_image
33
- elif video_hf and video_hf_auth:
34
- print("begin video_hf")
35
- # Handle Hugging Face dataset
36
- video_paths = load_hf_dataset(video_hf, video_hf_auth)
37
- video_paths = video_paths["train"]
38
- # Process all videos in the dataset
39
- all_captions = []
40
- for video_path_url in video_paths:
41
- print("video_path")
42
- video_path_url = video_path_url["id"]
43
- # 使用requests下载文件到临时文件
44
- response = requests.get(video_path_url, stream=True)
45
- if response.status_code == 200:
46
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
47
- temp_video_file.write(response.content)
48
- video_path = temp_video_file.name
49
- else:
50
- raise Exception(f"Failed to download video, status code: {response.status_code}")
51
-
52
- if video_path.endswith('.mp4'): # 假设我们只处理.mp4文件
53
- processor = VideoProcessor(frame_format=frame_format, frame_limit=frame_limit)
54
- frames = processor._decode(video_path)
55
- base64_list = processor.to_base64_list(frames)
56
- api = AzureAPI(key=key, endpoint=endpoint, model=model, temp=temp, top_p=top_p, max_tokens=max_tokens)
57
- caption = api.get_caption(sys_prompt, usr_prompt, base64_list)
58
- all_captions.append(caption)
59
- return "\n\n\n".join(all_captions), f"Processed {len(video_paths)} videos.", None
60
- # ... (Handle other sources)
61
- else:
62
- return "", "No video source selected.", None
63
-
64
- with gr.Blocks() as Core:
65
  with gr.Row(variant="panel"):
66
  with gr.Column(scale=6):
67
- with gr.Accordion("Debug", open=False):
68
- info = gr.Textbox(label="Info", interactive=False)
69
- frame = gr.Image(label="Frame", interactive=False)
70
- with gr.Accordion("Configuration", open=False):
71
- with gr.Row():
72
- temp = gr.Slider(0, 1, 0.3, step=0.1, label="Temperature")
73
- top_p = gr.Slider(0, 1, 0.75, step=0.1, label="Top-P")
74
- max_tokens = gr.Slider(512, 4096, 1024, step=1, label="Max Tokens")
 
 
 
 
 
 
 
 
75
  with gr.Row():
76
- frame_format = gr.Dropdown(label="Frame Format", value="JPEG", choices=["JPEG", "PNG"], interactive=False)
77
- frame_limit = gr.Slider(1, 100, 10, step=1, label="Frame Limits")
78
- with gr.Tabs():
79
- with gr.Tab("User"):
80
- usr_prompt = gr.Textbox(USER_PROMPT, label="User Prompt", lines=10, max_lines=100, show_copy_button=True)
81
- with gr.Tab("System"):
82
- sys_prompt = gr.Textbox(SYS_PROMPT, label="System Prompt", lines=10, max_lines=100, show_copy_button=True)
83
- with gr.Tabs():
84
- with gr.Tab("Azure"):
85
- result = gr.Textbox(label="Result", lines=15, max_lines=100, show_copy_button=True, interactive=False)
86
- with gr.Tab("Google"):
87
- result_gg = gr.Textbox(label="Result", lines=15, max_lines=100, show_copy_button=True, interactive=False)
88
- with gr.Tab("Anthropic"):
89
- result_ac = gr.Textbox(label="Result", lines=15, max_lines=100, show_copy_button=True, interactive=False)
90
- with gr.Tab("OpenAI"):
91
- result_oai = gr.Textbox(label="Result", lines=15, max_lines=100, show_copy_button=True, interactive=False)
92
 
93
- with gr.Column(scale=2):
94
- with gr.Column():
95
- with gr.Accordion("Model Provider", open=True):
96
- with gr.Tabs():
97
- with gr.Tab("Azure"):
98
- model = gr.Dropdown(label="Model", value="GPT-4o", choices=["GPT-4o", "GPT-4v"], interactive=False)
99
- key = gr.Textbox(label="Azure API Key")
100
- endpoint = gr.Textbox(label="Azure Endpoint")
101
- with gr.Tab("Google"):
102
- model_gg = gr.Dropdown(label="Model", value="Gemini-1.5-Flash", choices=["Gemini-1.5-Flash", "Gemini-1.5-Pro"], interactive=False)
103
- key_gg = gr.Textbox(label="Gemini API Key")
104
- endpoint_gg = gr.Textbox(label="Gemini API Endpoint")
105
- with gr.Tab("Anthropic"):
106
- model_ac = gr.Dropdown(label="Model", value="Claude-3-Opus", choices=["Claude-3-Opus", "Claude-3-Sonnet"], interactive=False)
107
- key_ac = gr.Textbox(label="Anthropic API Key")
108
- endpoint_ac = gr.Textbox(label="Anthropic Endpoint")
109
- with gr.Tab("OpenAI"):
110
- model_oai = gr.Dropdown(label="Model", value="GPT-4o", choices=["GPT-4o", "GPT-4v"], interactive=False)
111
- key_oai = gr.Textbox(label="OpenAI API Key")
112
- endpoint_oai = gr.Textbox(label="OpenAI Endpoint")
113
- with gr.Accordion("Data Source", open=True):
114
- with gr.Tabs():
115
- with gr.Tab("Upload"):
116
- video_src = gr.Video(sources="upload", show_label=False, show_share_button=False, mirror_webcam=False)
117
- with gr.Tab("HF"):
118
- video_hf = gr.Text(label="Huggingface File Path")
119
- video_hf_auth = gr.Text(label="Huggingface Token")
120
- with gr.Tab("Onedrive"):
121
- video_od = gr.Text("Microsoft Onedrive")
122
- video_od_auth = gr.Text(label="Microsoft Onedrive Token")
123
- with gr.Tab("Google Drive"):
124
- video_gd = gr.Text()
125
- video_gd_auth = gr.Text(label="Google Drive Access Token")
126
- caption_button = gr.Button("Caption", variant="primary", size="lg")
127
- caption_button.click(
128
- fast_caption,
129
- inputs=[sys_prompt, usr_prompt, temp, top_p, max_tokens, model, key, endpoint, video_src, video_hf, video_hf_auth, video_od, video_od_auth, video_gd, video_gd_auth, frame_format, frame_limit],
130
- outputs=[result, info, frame]
131
- )
132
 
133
- if __name__ == "__main__":
134
- Core.launch()
 
 
 
 
 
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from huggingface.user import HFUser, GR_CONF
3
+
4
+ Theme = gr.Theme.load(GR_CONF["theme"])
5
+ GR_CONF["theme"] = Theme
6
+
7
+ def login(token):
8
+ u = HFUser.from_token(token)
9
+ return u, u.name, gr.Column(visible=False)
10
+
11
+ def show_time(u, name):
12
+ return u.ping(name), gr.Column(visible=True)
13
+
14
+ def list_dataset(u, repo):
15
+ files = u.list_dataset(repo)
16
+ return gr.Dropdown(value=files[0], choices=files), gr.Column(visible=True), gr.Column(visible=False)
17
+
18
+ def fetch_parquet(u, fname):
19
+ _cache = u.fetch_file(fname)
20
+ return _cache
21
+
22
+ def split_parquet(u, file, batch_size):
23
+ batch_size = int(batch_size)
24
+ file_slice = u.split_parquet(file, batch_size)
25
+ return file_slice[0][0], file_slice, gr.Slider(value=0, maximum=batch_size-1), gr.Column(visible=True)
26
+
27
+ def select_video(chunks, epoch_idx , batch_idx):
28
+ epoch_idx = int(epoch_idx)
29
+ batch_idx = int(batch_idx)
30
+ return chunks[epoch_idx][batch_idx]
31
+
32
+ def show_lables():
33
+ return gr.Column(visible=True)
34
+
35
+ def next_chunks(video_chunks, epoch_idx):
36
+ length = len(video_chunks)
37
+ return (epoch_idx+1)%length, gr.Slider(value=0)
38
+
39
+ with gr.Blocks(**GR_CONF) as Core:
40
+ user = gr.State()
41
+ epoch_idx = gr.State(0)
42
+ video_chunks = gr.State()
43
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  with gr.Row(variant="panel"):
45
  with gr.Column(scale=6):
46
+ _video = gr.Video(height=720)
47
+ with gr.Column(scale=2):
48
+ with gr.Column() as Auth:
49
+ _token = gr.Textbox(label="Huggingface Token")
50
+ _auth = gr.Button("Auth", variant="primary", size="lg")
51
+
52
+ with gr.Row() as UUID:
53
+ name= gr.Textbox(label="Name", interactive=False, scale=1)
54
+ time= gr.Textbox(label="Time", interactive=False, scale=1)
55
+
56
+ with gr.Column(visible=False) as Repo:
57
+ raw_dataset= gr.Textbox("OpenVideo/pexels-raw", label="Raw Dataset")
58
+ _list = gr.Button("List", variant='secondary', size='sm')
59
+
60
+ with gr.Column(visible=False) as Batch:
61
+ file = gr.Dropdown(label="Parquet")
62
  with gr.Row():
63
+ _cache= gr.Textbox("Downloading", label="Cache")
64
+ batch_size= gr.Textbox("8", label="Batch")
65
+ _fetch = gr.Button("Fetch", variant='primary', size='sm')
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
+ with gr.Column(visible=False) as Pick:
68
+ _pick = gr.Slider(0, 7, value=0, step=1, label="Batch", info="Choose between 1 and $BATCH")
69
+ gr.Label()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
+ with gr.Row(variant="panel", visible=False) as Tag:
72
+ _human_tag = gr.Textbox(label="Tag", scale=2)
73
+ with gr.Column():
74
+ submit = gr.Button("Submit", variant="primary", size="sm", scale=1)
75
+ with gr.Row():
76
+ rst = gr.Button("Reset", variant="stop", size="sm", scale=1)
77
+ nxt = gr.Button("Next Batch", variant="secondary", size="sm", scale=1)
78
 
79
+ _auth.click(fn=login, inputs=_token, outputs=[user, name, Auth])
80
+ name.change(fn=show_time, inputs=[user, name], outputs=[time, Repo])
81
+ _list.click(fn=list_dataset, inputs=[user, raw_dataset], outputs=[file, Batch, Repo])
82
+ _fetch.click(fn=fetch_parquet, inputs=[user, file], outputs=[_cache] )
83
+ file.change(fn=fetch_parquet, inputs=[user, file], outputs=[_cache] )
84
+ _cache.change(fn=split_parquet, inputs=[user, _cache, batch_size], outputs=[_video, video_chunks, _pick, Pick])
85
+ _pick.change(fn=select_video, inputs=[video_chunks, epoch_idx, _pick], outputs=_video)
86
+ _video.change(fn=show_lables, outputs=Tag)
87
+ nxt.click(fn=next_chunks, inputs=[video_chunks, epoch_idx], outputs=[epoch_idx, _pick])
88
+
89
+ if __name__ == "__main__":
90
+ Core.launch()