File size: 8,344 Bytes
1deb673
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
from datasets import load_dataset
import random
import os
ds = load_dataset("SushantGautam/kvasir-points")
# features: ['image_data', 'image_sha256', 'points', 'count', 'label', 'collection_method', 'classification', 'organ'],
random.seed(42)


GENERAL_PROMPTS_V1 = {
    "pointing": [
        "Point to {label}\nPlease say 'This isn't in the image.' if it is not in the image.",
        "Point to all occurrences of \"{label}\"",
        "Point to any {label} in the image",
        "Point to any {label} in the image.",
        "Point: Where are the {label}",
        "Show me where the {label} are",
        "Can you show me where the {label} are?",
        "Show me where the {label} are",
        "Show me where a {label} is",
        "Show me where a {label} is.",
        "If there are any {label} in the image? Show me where they are.",
        "Where are the {label}?",
        "Generate a list of points showing where the {label} are.",
        "Find the \"{label}\".",
        "Find a \"{label}\".",
        "Locate all {label}.",
        "Locate an {label}.",
        "Locate a {label}.",
        "Locate every {label}.",
        "Locate {label}.",
        "Locate the {label}.",
        "Object: {label}\nInstruction: Point to the object.",
        "find {label}",
        "find {label}.",
        "Point to every {label}",
        "find any {label} in the picture",
        "Find the {label}",
        "Find any {label}",
        "Point to a {label}",
        "Point to an {label}",
        "Look for {label} in the image and show me where they are.",
        "Help me find an object in the image by pointing to them.\nObject: {label}.",
        "I am looking for {label}, where can they be found in the image?",
        "Can you see any {label} in the image? Point to them.",
        "Point out each {label} in the image.",
        "Point out every {label} in the image.",
        "Point to the {label} in the image.",
        "Locate each {label} in the image.",
        "Can you point out all {label} in this image?",
        "Please find {label} and show me where they are.",
        "If there are any {label} present, indicate their positions.",
        "If there is a {label} present, indicate its positions.",
        "show me all visible {label}",
    ],
    "point_count": [
        "How many {label} are there?",
        "How many {label}?",
        "How many {label}.",
        "how many {label}.",
        "how many {label}?",
        "How many {label} are there in the image?",
        "Tell me how many {label} there are",
        "Tell me how many {label} there are and point to them.",
        "how many {label}",
        "Tell me where each {label} is.",
        "Tell me how many {label} are in the image",
        "count {label}",
        "count every {label}",
        "count each {label}",
        "count {label}.",
        "Count the {label}.",
        "How many {label} do you see?",
        "How many {label} are visible?",
        "Count all the {label}",
        "how mmny {label}?",
        "Count every {label} in the picture.",
        "Count all the {label}",
        "Count each {label}",
        "Point to and count the {label} in the picture.",
        "Point and count {label}",
        "Point to every {label}",
        "Locate the {label} and count them",
        "Locate every {label} and count them",
        "Find all the {label}. How many are there?",
        "Find each {label}. How many are there?",
        "Point at {label} and then tell me the count.",
        "What is the total number of {label} in the image?",
        "In all the picture, how many {label} are there?",
        "Point at the {label} and then count them.",
        "Point to all the visible {label} output the total count.",
        "Point to all the {label} visible and output the total count. \nPlease say 'This isn't in the image.' if it is not in the image.",
        "Point to all occurrences of \"{label}\" and output the total count.",
        "Show me where the {label} are and output the total count.",
        "Where are the {label}? How many are there?",
        "Generate list of points showing where the {label} are and output the total count.",
        "Object: {label}\nInstruction: Point to the object and output the total count.",
        "find any {label} in the picture and output the total count.",
        "Can you see any {label} in the image? Point to them and output the total count.",
        "Can you point out all {label} in this image? How many are there?",
        "If there are any {label} present, indicate their positions and output the total count.",
        "How many {label} are there in the image? Point to them and output the total count.",
        "How many {label} are there in the image?",
        "Give me the count of {label} in the image.",
        "How many {label} are visible in the image?",
        "How many {label} are there?",
        "In the image, how many {label} are there?",
        "Can you count the number of {label} in the image?",
        "Can you count every {label} in the picture?",
        "Can you see any {label} in the image? How many are there?",
        "Are there any {label} in the image? How many are there?",
        "If you see any {label} in the image, give me the count. Otherwise, say 'This isn't in the image.'",
        "Object: {label}\nInstruction: How many are there?",
    ],
    "count_then_point": [
        "Count the {label} in the image, then point to them.",
        "How many {label} are there? Point to them.",
        "Count every {label} in the picture, then point to them.",
        "Locate the {label} and count them, then point to them.",
        "Find all the {label}. How many are there? Point to them.",
        "Find each {label}. How many are there? Point to them.",
        "Point to and count the {label} in the picture.",
    ],
    "only_count": [
        "Count the {label} in the image.",
        "How many {label} are there?",
        "Count every {label} in the picture.",
        "Locate the {label} and count them.",
        "Find all the {label}. How many are there?",
        "Find each {label}. How many are there?",
    ],
}

jsonl=[]

import cv2
def molmo_coords(coords, w, h):
    return coords[0] / w * 100, coords[1] / h * 100

for idx, data in enumerate(ds['train']):
    # if idx > 10:
    #     break
    print(data)
    points = data['points'][0]

    image_name= "/home/sushant/D1/MIUA/kvasir-format/images/"+ data['image_sha256']+'.png'
    h, w = cv2.imread(image_name).shape[:2]
    # mol_points = [molmo_coords(p, h, w) for p in points]
    mol_points = points
    label = data['label']
    if label == 'normal':
        label= random.choice(["normal sperms", 'sperms'])
    elif label == 'pinhead':
        label = random.choice(['pinhead sperms', 'pinheads'])
    elif  label == 'cluster':
        label = random.choice(["clusters"," sperm clusters"])
    elif label == 'instrument':
        label = random.choice(["instruments"])
    elif label == 'polyps':
        label = random.choice(['polyps'])
    s = f"""<points {' '.join(f'x{i+1}="{x:.1f}" y{i+1}="{y:.1f}"' for i, (x, y) in enumerate(mol_points))} alt="{label}">{label}</points>""" 


    # qsn = random.choice(GENERAL_PROMPTS_V1["pointing"]).format(label=label)
    # qsn =  f"point to {label}, output its coordinates in JSON format"
    # entry =  {"query":json.dumps(qsn), "response": s, "images": [image_name]}

    entry= {"messages": [
                        # {"role": "system", "content": "You are a useful and harmless assistant"}, 
                    #    {"role": "user", "content": f"<|image|>  point to {label}"}, 
                       {"role": "user", "content": "<|image|> "  + random.choice(GENERAL_PROMPTS_V1["pointing"]).format(label=label)},
                        {"role": "assistant", "content": s}
                        ],
        "images": [image_name]}
    # if not os.path.exists(image_name):
    #     data['image_data'].save(image_name)
    # entry = {"messages": [
    #     {"role": "user", "content": qsn}, 
    #     {"role": "assistant", "content": s}],
    #     "images": [image_name]}
    print(entry)

    jsonl.append(entry)

    # breakpoint()

import json
with open("kvasir_valid.jsonl", "w") as f:
    for entry in jsonl:
        f.write(json.dumps(entry))
        f.write("\n")