Upload 11 files
Browse files- .gitattributes +8 -0
- README.md +36 -0
- VILD-a-VidLN-Kinetics-train.jsonl +3 -0
- VILD-a-VidLN-OVIS-train.jsonl +0 -0
- VILD-a-VidLN-Oops-train.jsonl +3 -0
- VILD-a-VidLN-Oops-val.jsonl +0 -0
- VILD-a-VidLN-UVO-all.jsonl +3 -0
- VILD-b-VideoChat.jsonl +3 -0
- VILD-b-VideoInstruct100K.jsonl +3 -0
- VILD-c-MiraData.jsonl +3 -0
- VILD-c-Open-Sora-Dataset.jsonl +3 -0
- VILD-d-Panda-70M.jsonl +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
VILD-a-VidLN-Kinetics-train.jsonl filter=lfs diff=lfs merge=lfs -text
|
37 |
+
VILD-a-VidLN-Oops-train.jsonl filter=lfs diff=lfs merge=lfs -text
|
38 |
+
VILD-a-VidLN-UVO-all.jsonl filter=lfs diff=lfs merge=lfs -text
|
39 |
+
VILD-b-VideoChat.jsonl filter=lfs diff=lfs merge=lfs -text
|
40 |
+
VILD-b-VideoInstruct100K.jsonl filter=lfs diff=lfs merge=lfs -text
|
41 |
+
VILD-c-MiraData.jsonl filter=lfs diff=lfs merge=lfs -text
|
42 |
+
VILD-c-Open-Sora-Dataset.jsonl filter=lfs diff=lfs merge=lfs -text
|
43 |
+
VILD-d-Panda-70M.jsonl filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# The VILD Dataset (VIdeo and Long-Description)
|
2 |
+
|
3 |
+
This dataset is proposed from [VideoCLIP-XL](https://arxiv.org/abs/2410.00741).
|
4 |
+
We establish an automatic data collection system, designed to aggregate sufficient and high-quality pairs from multiple data sources.
|
5 |
+
We have successfully collected over 2M (VIdeo, Long Description) pairs, denoted as our VILD dataset.
|
6 |
+
|
7 |
+
# Format
|
8 |
+
```json
|
9 |
+
{
|
10 |
+
"short_captions": [
|
11 |
+
"...",
|
12 |
+
],
|
13 |
+
"long_captions": [
|
14 |
+
"...",
|
15 |
+
],
|
16 |
+
"video_id": "..."
|
17 |
+
}
|
18 |
+
{
|
19 |
+
.....
|
20 |
+
},
|
21 |
+
.....
|
22 |
+
```
|
23 |
+
|
24 |
+
|
25 |
+
# Source
|
26 |
+
~~~
|
27 |
+
@misc{wang2024videoclipxladvancinglongdescription,
|
28 |
+
title={VideoCLIP-XL: Advancing Long Description Understanding for Video CLIP Models},
|
29 |
+
author={Jiapeng Wang and Chengyu Wang and Kunzhe Huang and Jun Huang and Lianwen Jin},
|
30 |
+
year={2024},
|
31 |
+
eprint={2410.00741},
|
32 |
+
archivePrefix={arXiv},
|
33 |
+
primaryClass={cs.CL},
|
34 |
+
url={https://arxiv.org/abs/2410.00741},
|
35 |
+
}
|
36 |
+
~~~
|
VILD-a-VidLN-Kinetics-train.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:82388a2a364b821f45c1693cf1dfe5f0c315dcc78f4e864ed43a755d145e5569
|
3 |
+
size 113156271
|
VILD-a-VidLN-OVIS-train.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
VILD-a-VidLN-Oops-train.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aec63ead07b2447036e30b1b20d7b121b8d9ae68d63fa409ea441e5a19af95f0
|
3 |
+
size 46049729
|
VILD-a-VidLN-Oops-val.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
VILD-a-VidLN-UVO-all.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7c39457f4c6f5bfe07d1169d8523114e7422ff3d552602b91052daab4059673
|
3 |
+
size 30377587
|
VILD-b-VideoChat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74bc42ac06af246314a5553492c2dde83da22167430bf354719223773ce2518c
|
3 |
+
size 46685938
|
VILD-b-VideoInstruct100K.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a19ff34523e73de27958821097914564575383daa3a1bf2bfc3efffdff0805e7
|
3 |
+
size 175620221
|
VILD-c-MiraData.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f630622ad36085d59855299352506d614a971fff1e1417331769979725231352
|
3 |
+
size 42743061
|
VILD-c-Open-Sora-Dataset.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a824d4395e972e3d9fd2a5ce578edc5031fd5f7e4112c9d7141e85e465f4d01
|
3 |
+
size 57968303
|
VILD-d-Panda-70M.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f25be88b2f005802364ab03c964b00a64f7663d785a451e597fb44cc58ec005c
|
3 |
+
size 1134381417
|