xhwang00
/

cap6412-Project

Model card Files Files and versions Community

xhwang00 commited on Apr 24, 2024

Commit

374db79

·

verified ·

1 Parent(s): 0c86cdd

Delete xitong/xl_main.py

Files changed (1) hide show

xitong/xl_main.py +0 -83

xitong/xl_main.py DELETED Viewed

@@ -1,83 +0,0 @@
-import json
-import openai
-import os
-from openai import OpenAI
-prompt = '''
-A Referential Dialogue question-and-answer pair is like this:
-"conversations": [
-    {{
-        "from": "human",
-        "value": "Is there an event "a man in sunglasses hugging a woman in a black and white blouse" in the video?"
-    }},
-    {{
-        "from": "gpt",
-        "value": "In the image, there is a man [100.0, 116.16000000000001, 477.0, 437.8] and a woman  in the video. The man [100.0, 116.16000000000001, 477.0, 437.8] has his arm around the woman , which suggests that he is hugging her. Given the appearance of these individuals and their positioning, the event "a man in sunglasses hugging a woman in a black and white blouse" is present in the video. Therefore, the answer is yes."
-    }}
-]
-Vidstg is a spatio-temporal video grounding dataset. Each video is annotated with a question-and-answer pair, and a series of bounding boxes that contain the trajectories of the corresponding target object.
-Generate a referential dialogue question-answer pair from a video's question "{}" and the description  "{}"
-In this case, the target object is "{}".
-The target object is initially at {} in frame {} and stops appearing at position {} in frame {}. Mention the position and frame of the target person in the answer.
-Generate the JSON only.
-'''
-def main():
-    with open("Processed_Dataset/vidstg_train_processed.json", 'r') as json_file:
-        data = json.load(json_file)
-    all_results = []
-    API_KEY = "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"
-    os.environ['OPENAI_API_KEY'] =API_KEY
-    openai.api_key = os.getenv("OPENAI_API_KEY")
-    client = OpenAI()
-    for a_list in data[0:2]:
-        vid = a_list['vid']
-        question = a_list['question']
-        answer = a_list['answer']
-        target = a_list['target']
-        start_bbox = a_list['start_bbox']['bbox']
-        start_frame = a_list['start_frame']
-        end_bbox = a_list['end_bbox']['bbox']
-        end_frame = a_list['end_frame']
-        my_prompt = prompt.format(question, answer, target, start_bbox, start_frame, end_bbox, end_frame)
-        print(my_prompt)
-        response = client.chat.completions.create(
-            model="gpt-3.5-turbo",
-            messages=[
-                # {"role": "system", "content": "You will be provided with sentences and you have to predict the next sentence."},
-                {"role": "user", "content": my_prompt}
-            ]
-        )
-        result_json = response.choices[0].message.content
-        print(result_json)
-        json_obj = json.loads(result_json)
-        json_obj["meta"] = {"vid": vid}
-        all_results.append(json_obj)
-    with open('results.json', 'w') as f:
-        json.dump(all_results, f, indent=4)
-if __name__=='__main__':
-    main()