xhwang00 commited on
Commit
374db79
·
verified ·
1 Parent(s): 0c86cdd

Delete xitong/xl_main.py

Browse files
Files changed (1) hide show
  1. xitong/xl_main.py +0 -83
xitong/xl_main.py DELETED
@@ -1,83 +0,0 @@
1
- import json
2
-
3
- import openai
4
- import os
5
- from openai import OpenAI
6
-
7
-
8
-
9
- prompt = '''
10
- A Referential Dialogue question-and-answer pair is like this:
11
-
12
- "conversations": [
13
- {{
14
- "from": "human",
15
- "value": "Is there an event "a man in sunglasses hugging a woman in a black and white blouse" in the video?"
16
- }},
17
- {{
18
- "from": "gpt",
19
- "value": "In the image, there is a man [100.0, 116.16000000000001, 477.0, 437.8] and a woman in the video. The man [100.0, 116.16000000000001, 477.0, 437.8] has his arm around the woman , which suggests that he is hugging her. Given the appearance of these individuals and their positioning, the event "a man in sunglasses hugging a woman in a black and white blouse" is present in the video. Therefore, the answer is yes."
20
- }}
21
- ]
22
-
23
- Vidstg is a spatio-temporal video grounding dataset. Each video is annotated with a question-and-answer pair, and a series of bounding boxes that contain the trajectories of the corresponding target object.
24
-
25
- Generate a referential dialogue question-answer pair from a video's question "{}" and the description "{}"
26
-
27
- In this case, the target object is "{}".
28
-
29
- The target object is initially at {} in frame {} and stops appearing at position {} in frame {}. Mention the position and frame of the target person in the answer.
30
-
31
- Generate the JSON only.
32
- '''
33
-
34
-
35
-
36
- def main():
37
- with open("Processed_Dataset/vidstg_train_processed.json", 'r') as json_file:
38
- data = json.load(json_file)
39
-
40
- all_results = []
41
-
42
- API_KEY = "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"
43
- os.environ['OPENAI_API_KEY'] =API_KEY
44
- openai.api_key = os.getenv("OPENAI_API_KEY")
45
- client = OpenAI()
46
-
47
- for a_list in data[0:2]:
48
- vid = a_list['vid']
49
- question = a_list['question']
50
- answer = a_list['answer']
51
- target = a_list['target']
52
- start_bbox = a_list['start_bbox']['bbox']
53
- start_frame = a_list['start_frame']
54
- end_bbox = a_list['end_bbox']['bbox']
55
- end_frame = a_list['end_frame']
56
-
57
- my_prompt = prompt.format(question, answer, target, start_bbox, start_frame, end_bbox, end_frame)
58
-
59
- print(my_prompt)
60
-
61
- response = client.chat.completions.create(
62
- model="gpt-3.5-turbo",
63
- messages=[
64
- # {"role": "system", "content": "You will be provided with sentences and you have to predict the next sentence."},
65
- {"role": "user", "content": my_prompt}
66
- ]
67
- )
68
-
69
- result_json = response.choices[0].message.content
70
- print(result_json)
71
- json_obj = json.loads(result_json)
72
- json_obj["meta"] = {"vid": vid}
73
-
74
- all_results.append(json_obj)
75
-
76
-
77
- with open('results.json', 'w') as f:
78
- json.dump(all_results, f, indent=4)
79
-
80
-
81
-
82
- if __name__=='__main__':
83
- main()