Delete xitong/xl_main.py
Browse files- xitong/xl_main.py +0 -83
xitong/xl_main.py
DELETED
@@ -1,83 +0,0 @@
|
|
1 |
-
import json
|
2 |
-
|
3 |
-
import openai
|
4 |
-
import os
|
5 |
-
from openai import OpenAI
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
prompt = '''
|
10 |
-
A Referential Dialogue question-and-answer pair is like this:
|
11 |
-
|
12 |
-
"conversations": [
|
13 |
-
{{
|
14 |
-
"from": "human",
|
15 |
-
"value": "Is there an event "a man in sunglasses hugging a woman in a black and white blouse" in the video?"
|
16 |
-
}},
|
17 |
-
{{
|
18 |
-
"from": "gpt",
|
19 |
-
"value": "In the image, there is a man [100.0, 116.16000000000001, 477.0, 437.8] and a woman in the video. The man [100.0, 116.16000000000001, 477.0, 437.8] has his arm around the woman , which suggests that he is hugging her. Given the appearance of these individuals and their positioning, the event "a man in sunglasses hugging a woman in a black and white blouse" is present in the video. Therefore, the answer is yes."
|
20 |
-
}}
|
21 |
-
]
|
22 |
-
|
23 |
-
Vidstg is a spatio-temporal video grounding dataset. Each video is annotated with a question-and-answer pair, and a series of bounding boxes that contain the trajectories of the corresponding target object.
|
24 |
-
|
25 |
-
Generate a referential dialogue question-answer pair from a video's question "{}" and the description "{}"
|
26 |
-
|
27 |
-
In this case, the target object is "{}".
|
28 |
-
|
29 |
-
The target object is initially at {} in frame {} and stops appearing at position {} in frame {}. Mention the position and frame of the target person in the answer.
|
30 |
-
|
31 |
-
Generate the JSON only.
|
32 |
-
'''
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
def main():
|
37 |
-
with open("Processed_Dataset/vidstg_train_processed.json", 'r') as json_file:
|
38 |
-
data = json.load(json_file)
|
39 |
-
|
40 |
-
all_results = []
|
41 |
-
|
42 |
-
API_KEY = "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"
|
43 |
-
os.environ['OPENAI_API_KEY'] =API_KEY
|
44 |
-
openai.api_key = os.getenv("OPENAI_API_KEY")
|
45 |
-
client = OpenAI()
|
46 |
-
|
47 |
-
for a_list in data[0:2]:
|
48 |
-
vid = a_list['vid']
|
49 |
-
question = a_list['question']
|
50 |
-
answer = a_list['answer']
|
51 |
-
target = a_list['target']
|
52 |
-
start_bbox = a_list['start_bbox']['bbox']
|
53 |
-
start_frame = a_list['start_frame']
|
54 |
-
end_bbox = a_list['end_bbox']['bbox']
|
55 |
-
end_frame = a_list['end_frame']
|
56 |
-
|
57 |
-
my_prompt = prompt.format(question, answer, target, start_bbox, start_frame, end_bbox, end_frame)
|
58 |
-
|
59 |
-
print(my_prompt)
|
60 |
-
|
61 |
-
response = client.chat.completions.create(
|
62 |
-
model="gpt-3.5-turbo",
|
63 |
-
messages=[
|
64 |
-
# {"role": "system", "content": "You will be provided with sentences and you have to predict the next sentence."},
|
65 |
-
{"role": "user", "content": my_prompt}
|
66 |
-
]
|
67 |
-
)
|
68 |
-
|
69 |
-
result_json = response.choices[0].message.content
|
70 |
-
print(result_json)
|
71 |
-
json_obj = json.loads(result_json)
|
72 |
-
json_obj["meta"] = {"vid": vid}
|
73 |
-
|
74 |
-
all_results.append(json_obj)
|
75 |
-
|
76 |
-
|
77 |
-
with open('results.json', 'w') as f:
|
78 |
-
json.dump(all_results, f, indent=4)
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
if __name__=='__main__':
|
83 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|