[{"image_path": "objects365_v1_00045970.jpg", "text": "Please provide details for the area marked as in this photographic . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[1.691406272, 309.112609872, 147.685424832, 469.83563232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045970_crop.jpg", "text": "Please provide details for the area marked as in this photographic . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[1.691406272, 41.11260987200001, 147.685424832, 201.83563232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045970.jpg", "text": "Please provide details for the area marked as in this photographic . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, a handbag, a leather shoes, and two trolleys.", "boxes_value": [[1.691406272, 309.112609872, 147.685424832, 469.83563232], [75.67279052800001, 298.794799824, 149.386352512, 471.16534425599997], [23.259643584, 309.112609872, 75.91210937599999, 468.984741216], [26.97351072, 371.12567140799996, 46.481201152000004, 400.130493168], [128.070251456, 451.951232928, 147.685424832, 469.83563232], [53.753784192, 361.08978273599996, 102.785766592, 438.40948488000004], [1.691406272, 339.72369384, 40.024108864, 395.60626219200003]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00045970_crop.jpg", "text": "Please provide details for the area marked as in this photographic . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, a handbag, a leather shoes, and two trolleys.", "boxes_value": [[1.691406272, 41.11260987200001, 147.685424832, 201.83563232], [75.67279052800001, 30.794799823999995, 149.386352512, 203.16534425599997], [23.259643584, 41.11260987200001, 75.91210937599999, 200.98474121599997], [26.97351072, 103.12567140799996, 46.481201152000004, 132.130493168], [128.070251456, 183.95123292800002, 147.685424832, 201.83563232], [53.753784192, 93.08978273599996, 102.785766592, 170.40948488000004], [1.691406272, 71.72369384000001, 40.024108864, 127.60626219200003]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00045971.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Provide the coordinates for each element you describe.", "boxes_value": [[543.929321264, 0.4305419776, 683.7407226521999, 510.5554809344]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045971_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Provide the coordinates for each element you describe.", "boxes_value": [[35.92932126400001, 0.4305419776, 175, 510.5554809344]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045971.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Provide the coordinates for each element you describe. For your reference, objects involved in this region include four lamps, and a person.", "boxes_value": [[543.929321264, 0.4305419776, 683.7407226521999, 510.5554809344], [567.4322509799, 0.4305419776, 610.1309814123, 24.9700927488], [543.929321264, 99.8298950144, 573.7324218576, 154.0548095488], [644.3708495961, 128.6458740224, 667.58081056, 214.4788818432], [615.0006103608999, 163.500427264, 634.5195312742, 228.0628662272], [671.7701415932, 396.8341675008, 683.7407226521999, 510.5554809344]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00045971_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Provide the coordinates for each element you describe. For your reference, objects involved in this region include four lamps, and a person.", "boxes_value": [[35.92932126400001, 0.4305419776, 175, 510.5554809344], [59.432250979900004, 0.4305419776, 102.1309814123, 24.9700927488], [35.92932126400001, 99.8298950144, 65.7324218576, 154.0548095488], [136.37084959610002, 128.6458740224, 159.58081056000003, 214.4788818432], [107.00061036089994, 163.500427264, 126.51953127419995, 228.0628662272], [163.77014159320004, 396.8341675008, 175, 510.5554809344]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00045973.jpg", "text": "Please describe the region in the picture . Provide the coordinates for each element you describe.", "boxes_value": [[170.82116701170003, 272.534423808, 457.06481930700005, 438.98736571200004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045973_crop.jpg", "text": "Please describe the region in the picture . Provide the coordinates for each element you describe.", "boxes_value": [[71.82116701170003, 42.534423807999985, 358.06481930700005, 208.98736571200004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045973.jpg", "text": "Please describe the region in the picture . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a desk, a slippers, two sneakers, and two handbags.", "boxes_value": [[170.82116701170003, 272.534423808, 457.06481930700005, 438.98736571200004], [398.6107177545, 285.542480448, 457.7319336114, 384.077880864], [170.82116701170003, 393.90203856, 193.0119628734, 417.20135496], [235.5706176798, 422.267883312, 259.38323973, 435.94744871999995], [258.3699340881, 402.001831056, 279.1425781281, 438.98736571200004], [440.11669920360004, 324.15533448, 457.06481930700005, 358.41992188800003], [283.0333862163, 272.534423808, 297.74664309509996, 304.997070336]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00045973_crop.jpg", "text": "Please describe the region in the picture . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a desk, a slippers, two sneakers, and two handbags.", "boxes_value": [[71.82116701170003, 42.534423807999985, 358.06481930700005, 208.98736571200004], [299.6107177545, 55.54248044799999, 358.7319336114, 154.077880864], [71.82116701170003, 163.90203856, 94.0119628734, 187.20135496], [136.5706176798, 192.26788331199998, 160.38323973, 205.94744871999995], [159.36993408810002, 172.00183105600001, 180.1425781281, 208.98736571200004], [341.11669920360004, 94.15533448000002, 358.06481930700005, 128.41992188800003], [184.03338621630002, 42.534423807999985, 198.74664309509996, 74.99707033599998]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00045974.jpg", "text": "Please describe the content within the area displayed in the image . Provide the coordinates for each element you describe.", "boxes_value": [[310.4217529344, 310.8041992192, 411.1108398336, 470.509765632]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045974_crop.jpg", "text": "Please describe the content within the area displayed in the image . Provide the coordinates for each element you describe.", "boxes_value": [[25.42175293439999, 40.80419921919997, 126.11083983359998, 200.50976563199998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045974.jpg", "text": "Please describe the content within the area displayed in the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a bench, two flowers, two vases, and a potted plant.", "boxes_value": [[310.4217529344, 310.8041992192, 411.1108398336, 470.509765632], [310.4217529344, 313.8441772544, 369.87475584000003, 383.4667968512], [380.8967284992, 430.92724608, 411.1108398336, 452.8730468864], [385.0517577984, 454.5017700352, 410.5678711296, 475.1318359552], [364.72790530559996, 423.8422851584, 392.7623291136, 436.1500854272], [370.1763916032, 437.524353024, 389.6928711168, 470.509765632], [382.2237548544, 310.8041992192, 408.172119168, 348.1051025408]], "boxes_seq": [[0], [0], [1], [2, 4], [3, 5], [6]]}, {"image_path": "objects365_v1_00045974_crop.jpg", "text": "Please describe the content within the area displayed in the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a bench, two flowers, two vases, and a potted plant.", "boxes_value": [[25.42175293439999, 40.80419921919997, 126.11083983359998, 200.50976563199998], [25.42175293439999, 43.844177254399995, 84.87475584000003, 113.4667968512], [95.89672849919998, 160.92724607999997, 126.11083983359998, 182.8730468864], [100.05175779839999, 184.50177003520002, 125.56787112960001, 205.13183595520002], [79.72790530559996, 153.8422851584, 107.7623291136, 166.1500854272], [85.17639160319999, 167.524353024, 104.6928711168, 200.50976563199998], [97.2237548544, 40.80419921919997, 123.172119168, 78.10510254079998]], "boxes_seq": [[0], [0], [1], [2, 4], [3, 5], [6]]}, {"image_path": "objects365_v1_00045975.jpg", "text": "What's the story in the section of the included visual ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[235.1429443293, 0, 602.1268310581, 68.6531982336]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045975_crop.jpg", "text": "What's the story in the section of the included visual ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[92.14294432930001, 0, 459.12683105810004, 68.6531982336]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045975.jpg", "text": "What's the story in the section of the included visual ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a towel, a hanger, a cabinet, a helmet, a ladder, and a car.", "boxes_value": [[235.1429443293, 0, 602.1268310581, 68.6531982336], [532.1632080026, 26.5181274624, 556.7020263337, 116.0518188544], [407.4792480283, 29.1709594624, 443.9559326063, 53.0466308608], [558.7935790778, 10.5233154048, 602.1268310581, 68.6531982336], [300.3894758997, 14.8525337088, 334.85904531610004, 41.6250148352], [235.1429443293, 0, 266.44647216330003, 38.367736832], [13.8215942484, 34.3468627968, 652.7270507977, 457.9489135616]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00045975_crop.jpg", "text": "What's the story in the section of the included visual ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a towel, a hanger, a cabinet, a helmet, a ladder, and a car.", "boxes_value": [[92.14294432930001, 0, 459.12683105810004, 68.6531982336], [389.16320800259996, 26.5181274624, 413.7020263337, 85], [264.4792480283, 29.1709594624, 300.9559326063, 53.0466308608], [415.7935790778, 10.5233154048, 459.12683105810004, 68.6531982336], [157.38947589970002, 14.8525337088, 191.85904531610004, 41.6250148352], [92.14294432930001, 0, 123.44647216330003, 38.367736832], [0, 34.3468627968, 509.72705079770003, 85]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00045976.jpg", "text": "In the provided image , would you mind describing the selected area ? Provide the coordinates for all objects that you mention.", "boxes_value": [[105.5544433664, 476.27368166400004, 449.4509887488, 768.184326144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045976_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Provide the coordinates for all objects that you mention.", "boxes_value": [[86.5544433664, 73.27368166400004, 430.4509887488, 365]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045976.jpg", "text": "In the provided image , would you mind describing the selected area ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include five people, a glasses, and a sandals.", "boxes_value": [[105.5544433664, 476.27368166400004, 449.4509887488, 768.184326144], [287.3618774528, 296.9051513856, 426.7077026304, 766.6076659968], [138.4108886528, 227.4429931776, 355.5791625728, 699.9306640896], [105.5544433664, 697.2305908224, 211.984985344, 768.184326144], [336.3649902592, 664.9584960768, 449.4509887488, 767.0399169791999], [229.978698752, 476.27368166400004, 299.7086792192, 652.9544677632], [192.4057617408, 736.3205566464001, 207.1904907264, 756.8381347584], [304.6492309504, 715.501098624, 359.262329088, 765.2864990208]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00045976_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include five people, a glasses, and a sandals.", "boxes_value": [[86.5544433664, 73.27368166400004, 430.4509887488, 365], [268.3618774528, 0, 407.7077026304, 363.60766599680005], [119.4108886528, 0, 336.5791625728, 296.9306640896], [86.5544433664, 294.2305908224, 192.984985344, 365], [317.3649902592, 261.9584960768, 430.4509887488, 364.0399169791999], [210.978698752, 73.27368166400004, 280.7086792192, 249.9544677632], [173.4057617408, 333.32055664640006, 188.1904907264, 353.8381347584], [285.6492309504, 312.50109862399995, 340.262329088, 362.28649902079997]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00045977.jpg", "text": "Please enlighten me about the area in the photograph . Please mention the objects and their locations.", "boxes_value": [[635.641235328, 122.427978496, 767.8519286784, 447.7568969728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045977_crop.jpg", "text": "Please enlighten me about the area in the photograph . Please mention the objects and their locations.", "boxes_value": [[33.64123532799999, 81.427978496, 165.8519286784, 406.7568969728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045977.jpg", "text": "Please enlighten me about the area in the photograph . Please mention the objects and their locations. For your reference, objects involved in this region include a person, two sneakers, a handbag, and a cup.", "boxes_value": [[635.641235328, 122.427978496, 767.8519286784, 447.7568969728], [683.9249267712, 122.427978496, 767.8519286784, 447.7568969728], [745.5272216832, 421.997375488, 766.8666991872, 443.9465942528], [702.7526855424001, 425.4534301696, 724.8325195008, 447.9252319232], [647.1719970816, 392.639099136, 707.6452636416, 439.368469248], [635.641235328, 399.3007812608, 650.5944824064, 437.9918823424]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00045977_crop.jpg", "text": "Please enlighten me about the area in the photograph . Please mention the objects and their locations. For your reference, objects involved in this region include a person, two sneakers, a handbag, and a cup.", "boxes_value": [[33.64123532799999, 81.427978496, 165.8519286784, 406.7568969728], [81.92492677120003, 81.427978496, 165.8519286784, 406.7568969728], [143.52722168319997, 380.997375488, 164.86669918719997, 402.9465942528], [100.75268554240006, 384.4534301696, 122.8325195008, 406.9252319232], [45.17199708160001, 351.639099136, 105.64526364159997, 398.368469248], [33.64123532799999, 358.3007812608, 48.59448240639995, 396.9918823424]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00045979.jpg", "text": "Please share details about the rectangular region within the image . Give coordinates for the items you reference.", "boxes_value": [[0.26751712, 379.8771362304, 355.004211456, 513.444457984]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045979_crop.jpg", "text": "Please share details about the rectangular region within the image . Give coordinates for the items you reference.", "boxes_value": [[0.26751712, 33.877136230400026, 355.004211456, 166]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045979.jpg", "text": "Please share details about the rectangular region within the image . Give coordinates for the items you reference. For your reference, objects involved in this region include a chair, a flower, a carpet, a cup, and a tea pot.", "boxes_value": [[0.26751712, 379.8771362304, 355.004211456, 513.444457984], [0.26751712, 408.360595712, 66.720947264, 509.9214477312], [162.929199232, 418.693786624, 294.987915008, 513.444457984], [235.326477056, 389.0092773376, 355.004211456, 453.4867553792], [233.514587392, 379.8771362304, 247.381042496, 412.2322387456], [195.050231936, 398.8974609408, 217.771240256, 424.6567993344]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00045979_crop.jpg", "text": "Please share details about the rectangular region within the image . Give coordinates for the items you reference. For your reference, objects involved in this region include a chair, a flower, a carpet, a cup, and a tea pot.", "boxes_value": [[0.26751712, 33.877136230400026, 355.004211456, 166], [0.26751712, 62.36059571200002, 66.720947264, 163.92144773119998], [162.929199232, 72.69378662399998, 294.987915008, 166], [235.326477056, 43.0092773376, 355.004211456, 107.48675537920002], [233.514587392, 33.877136230400026, 247.381042496, 66.2322387456], [195.050231936, 52.89746094079999, 217.771240256, 78.65679933439998]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00045980.jpg", "text": "Please tell me about the area in the image . What does it contain? Specify the location of each mentioned object.", "boxes_value": [[605.926391589, 226.3826904064, 739.8704834055, 510.8411254784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045980_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Specify the location of each mentioned object.", "boxes_value": [[33.92639158899999, 71.3826904064, 167.87048340549995, 355.8411254784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045980.jpg", "text": "Please tell me about the area in the image . What does it contain? Specify the location of each mentioned object. For your reference, objects involved in this region include a chair, a person, a handbag, a hat, a sneakers, and a bottle.", "boxes_value": [[605.926391589, 226.3826904064, 739.8704834055, 510.8411254784], [615.3468017895, 307.70996096, 759.6473388345, 512.0782470656], [605.926391589, 226.3826904064, 739.8704834055, 510.8411254784], [618.183227556, 383.9110107648, 662.087036097, 471.7185058816], [631.581665067, 226.3775634944, 708.396972669, 264.2864379904], [606.9447021374999, 487.3071899648, 634.7470702859999, 509.627441408], [618.6937255965, 269.3755493376, 649.699462854, 306.3648071168]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00045980_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Specify the location of each mentioned object. For your reference, objects involved in this region include a chair, a person, a handbag, a hat, a sneakers, and a bottle.", "boxes_value": [[33.92639158899999, 71.3826904064, 167.87048340549995, 355.8411254784], [43.34680178949998, 152.70996096, 187.64733883450003, 357], [33.92639158899999, 71.3826904064, 167.87048340549995, 355.8411254784], [46.18322755600002, 228.91101076479998, 90.08703609700001, 316.7185058816], [59.58166506700002, 71.3775634944, 136.39697266899998, 109.28643799039997], [34.94470213749992, 332.3071899648, 62.74707028599994, 354.627441408], [46.69372559650003, 114.37554933759998, 77.69946285399999, 151.36480711680002]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00045981.jpg", "text": "Please elucidate the area of the image . Specify the location of each mentioned object.", "boxes_value": [[140.8957519248, 306.5411987456, 309.7742309776, 459.69335936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045981_crop.jpg", "text": "Please elucidate the area of the image . Specify the location of each mentioned object.", "boxes_value": [[42.89575192480001, 38.54119874560001, 211.77423097759998, 191.69335936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045981.jpg", "text": "Please elucidate the area of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include a desk, a cup, two bottles, and two keyboards.", "boxes_value": [[140.8957519248, 306.5411987456, 309.7742309776, 459.69335936], [1.2890625312, 176.4801025536, 477.74707031199995, 510.5093993984], [224.2073974288, 379.7070312448, 284.3521728384, 439.2317505024], [274.4313964656, 426.8307494912, 309.7742309776, 459.69335936], [206.225952168, 306.5411987456, 249.629455568, 398.9285278208], [109.1803588848, 393.426513664, 243.6796264768, 484.0466308608], [140.8957519248, 310.9638671872, 208.92010499039998, 346.4548339712]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00045981_crop.jpg", "text": "Please elucidate the area of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include a desk, a cup, two bottles, and two keyboards.", "boxes_value": [[42.89575192480001, 38.54119874560001, 211.77423097759998, 191.69335936], [0, 0, 253, 229], [126.2073974288, 111.70703124480002, 186.3521728384, 171.23175050240002], [176.43139646560002, 158.83074949119998, 211.77423097759998, 191.69335936], [108.22595216799999, 38.54119874560001, 151.629455568, 130.9285278208], [11.1803588848, 125.42651366400003, 145.6796264768, 216.0466308608], [42.89575192480001, 42.96386718719998, 110.92010499039998, 78.4548339712]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00045983.jpg", "text": "Detail the chosen region in the depicted scene . Specify the location of each mentioned object.", "boxes_value": [[638.9197998121, 238.789428736, 767.4814453014, 309.0878296064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045983_crop.jpg", "text": "Detail the chosen region in the depicted scene . Specify the location of each mentioned object.", "boxes_value": [[32.919799812099996, 17.78942873599999, 161.48144530139996, 88.08782960640002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045983.jpg", "text": "Detail the chosen region in the depicted scene . Specify the location of each mentioned object. For your reference, objects involved in this region include five people.", "boxes_value": [[638.9197998121, 238.789428736, 767.4814453014, 309.0878296064], [638.9197998121, 247.0175171072, 664.4460449122, 309.0878296064], [666.1282958938, 239.4202881024, 689.0484618751, 308.601196288], [689.4689941042, 246.3593750016, 714.9123535428, 306.9190063616], [716.3842773359, 238.789428736, 740.7763671929999, 305.8676147712], [737.2016601892, 240.6819458048, 767.4814453014, 306.9190063616]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00045983_crop.jpg", "text": "Detail the chosen region in the depicted scene . Specify the location of each mentioned object. For your reference, objects involved in this region include five people.", "boxes_value": [[32.919799812099996, 17.78942873599999, 161.48144530139996, 88.08782960640002], [32.919799812099996, 26.017517107200007, 58.44604491220002, 88.08782960640002], [60.12829589379999, 18.420288102400008, 83.04846187509997, 87.60119628799998], [83.46899410419996, 25.3593750016, 108.91235354280002, 85.9190063616], [110.38427733590004, 17.78942873599999, 134.77636719299994, 84.86761477120001], [131.20166018919997, 19.681945804799994, 161.48144530139996, 85.9190063616]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00045984.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Include the coordinates for each mentioned object.", "boxes_value": [[23.5156250112, 515.9981689384, 468.8378906112, 689.2463378722]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045984_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Include the coordinates for each mentioned object.", "boxes_value": [[23.5156250112, 43.9981689384, 468.8378906112, 217.2463378722]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045984.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, a leather shoes, a handbag, and a wine glass.", "boxes_value": [[23.5156250112, 515.9981689384, 468.8378906112, 689.2463378722], [418.678771968, 528.178955092, 468.8378906112, 592.669189471], [23.5156250112, 527.2211914348001, 50.7468261888, 547.0257568188], [187.9821167104, 674.8742675448, 216.9739379712, 689.2463378722], [260.062255872, 521.9045410094, 281.5444336128, 557.4047851186], [161.1688842752, 515.9981689384, 171.6118163968, 545.5865478374]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00045984_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, a leather shoes, a handbag, and a wine glass.", "boxes_value": [[23.5156250112, 43.9981689384, 468.8378906112, 217.2463378722], [418.678771968, 56.178955092000024, 468.8378906112, 120.66918947099998], [23.5156250112, 55.22119143480006, 50.7468261888, 75.0257568188], [187.9821167104, 202.87426754479998, 216.9739379712, 217.2463378722], [260.062255872, 49.90454100939996, 281.5444336128, 85.40478511859999], [161.1688842752, 43.9981689384, 171.6118163968, 73.58654783739996]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00045986.jpg", "text": "What can you tell me about the area within the image ? Include the coordinates for each object you identify.", "boxes_value": [[20.889465344, 395.90979004499997, 337.1530151424, 689.869140653]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045986_crop.jpg", "text": "What can you tell me about the area within the image ? Include the coordinates for each object you identify.", "boxes_value": [[20.889465344, 73.90979004499997, 337.1530151424, 367.86914065300004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045986.jpg", "text": "What can you tell me about the area within the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a hat, two sneakers, and two people.", "boxes_value": [[20.889465344, 395.90979004499997, 337.1530151424, 689.869140653], [62.122192384, 396.25402832400005, 100.4238281216, 414.251220691], [44.5864868352, 646.379028295, 73.4577636864, 667.134521515], [72.27441408, 661.815673827, 107.2871704064, 681.85437012], [20.889465344, 395.90979004499997, 128.3204345856, 689.869140653], [258.730102528, 399.187866231, 337.1530151424, 628.3851318359999]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00045986_crop.jpg", "text": "What can you tell me about the area within the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a hat, two sneakers, and two people.", "boxes_value": [[20.889465344, 73.90979004499997, 337.1530151424, 367.86914065300004], [62.122192384, 74.25402832400005, 100.4238281216, 92.25122069100001], [44.5864868352, 324.379028295, 73.4577636864, 345.13452151499996], [72.27441408, 339.81567382699996, 107.2871704064, 359.85437012], [20.889465344, 73.90979004499997, 128.3204345856, 367.86914065300004], [258.730102528, 77.18786623099999, 337.1530151424, 306.3851318359999]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00045988.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for each element you describe.", "boxes_value": [[6.084472647699999, 249.4216308736, 345.9335937575, 487.6495361536]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045988_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for each element you describe.", "boxes_value": [[6.084472647699999, 60.42163087360001, 345.9335937575, 298.6495361536]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045988.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a glasses, two hats, a sneakers, and a handbag.", "boxes_value": [[6.084472647699999, 249.4216308736, 345.9335937575, 487.6495361536], [39.0570068066, 293.2209472512, 72.5216674633, 302.5714111488], [6.084472647699999, 249.4216308736, 47.423217783, 270.0909423616], [20.037719722200002, 451.7182006784, 33.2112426589, 475.8695678464], [321.4803466523, 294.542724608, 345.9335937575, 311.9564209152], [144.4750976408, 448.1657714688, 192.6011962561, 487.6495361536]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5]]}, {"image_path": "objects365_v1_00045988_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a glasses, two hats, a sneakers, and a handbag.", "boxes_value": [[6.084472647699999, 60.42163087360001, 345.9335937575, 298.6495361536], [39.0570068066, 104.22094725120002, 72.5216674633, 113.5714111488], [6.084472647699999, 60.42163087360001, 47.423217783, 81.09094236160001], [20.037719722200002, 262.7182006784, 33.2112426589, 286.8695678464], [321.4803466523, 105.54272460800001, 345.9335937575, 122.9564209152], [144.4750976408, 259.1657714688, 192.6011962561, 298.6495361536]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5]]}, {"image_path": "objects365_v1_00045992.jpg", "text": "Please explain what is contained in the portion of defined by the box . Remember to mention the objects and their corresponding locations.", "boxes_value": [[432.35546877599995, 389.6972045824, 572.7082519620001, 442.5606078976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045992_crop.jpg", "text": "Please explain what is contained in the portion of defined by the box . Remember to mention the objects and their corresponding locations.", "boxes_value": [[35.35546877599995, 13.69720458239999, 175.70825196200008, 66.5606078976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045992.jpg", "text": "Please explain what is contained in the portion of defined by the box . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, two vans, and a car.", "boxes_value": [[432.35546877599995, 389.6972045824, 572.7082519620001, 442.5606078976], [466.419433557, 397.2041626112, 482.179931618, 439.0038452224], [432.35546877599995, 399.9357910016, 447.290893588, 442.5606078976], [441.691772445, 399.7938842624, 468.375854485, 423.3527832064], [480.39575199, 399.5534667776, 515.012939441, 429.8435668992], [514.051269501, 389.6972045824, 572.7082519620001, 431.7667236352]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00045992_crop.jpg", "text": "Please explain what is contained in the portion of defined by the box . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, two vans, and a car.", "boxes_value": [[35.35546877599995, 13.69720458239999, 175.70825196200008, 66.5606078976], [69.41943355699999, 21.20416261119999, 85.17993161800001, 63.0038452224], [35.35546877599995, 23.935791001600023, 50.29089358800002, 66.5606078976], [44.691772445000026, 23.793884262400013, 71.37585448499999, 47.35278320639998], [83.39575199000001, 23.553466777600022, 118.01293944099996, 53.84356689920003], [117.05126950099998, 13.69720458239999, 175.70825196200008, 55.76672363519998]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00045993.jpg", "text": "Can you break down the region in the image for me? Provide the coordinates for all objects that you mention.", "boxes_value": [[511.60989216239994, 58.0238036992, 594.5029301976, 457.2401366016]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045993_crop.jpg", "text": "Can you break down the region in the image for me? Provide the coordinates for all objects that you mention.", "boxes_value": [[21.609892162399944, 58.0238036992, 104.5029301976, 457.2401366016]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045993.jpg", "text": "Can you break down the region in the image for me? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a bench, a swing, a person, and two sneakers.", "boxes_value": [[511.60989216239994, 58.0238036992, 594.5029301976, 457.2401366016], [518.2445068684, 58.0238036992, 573.5761718748, 113.3554077184], [471.442226348, 0.0343028736, 632.391684822, 328.20402048], [473.2836913808, 153.1422729728, 623.7818603292, 458.8447876096001], [511.60989216239994, 416.8988580864, 542.55662639, 447.84559232], [559.6878542784, 427.1223328256, 594.5029301976, 457.2401366016]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00045993_crop.jpg", "text": "Can you break down the region in the image for me? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a bench, a swing, a person, and two sneakers.", "boxes_value": [[21.609892162399944, 58.0238036992, 104.5029301976, 457.2401366016], [28.24450686839998, 58.0238036992, 83.57617187480002, 113.3554077184], [0, 0.0343028736, 125, 328.20402048], [0, 153.1422729728, 125, 458.8447876096001], [21.609892162399944, 416.8988580864, 52.55662639000002, 447.84559232], [69.68785427839998, 427.1223328256, 104.5029301976, 457.2401366016]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00045994.jpg", "text": "Share some details about the objects or environment within the bounding box in . Give coordinates for the items you reference.", "boxes_value": [[0, 0.0300903424, 770.9793701217, 43.3628539904]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045994_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Give coordinates for the items you reference.", "boxes_value": [[0, 0.0300903424, 770.9793701217, 43.3628539904]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045994.jpg", "text": "Share some details about the objects or environment within the bounding box in . Give coordinates for the items you reference. For your reference, objects involved in this region include five breads.", "boxes_value": [[0, 0.0300903424, 770.9793701217, 43.3628539904], [0, 0.319641088, 168.97833255150002, 43.3628539904], [168.97833255150002, 0.4344482304, 353.7231445617, 38.9984131072], [348.6776123133, 0.0950317568, 483.5690917776, 36.2015991296], [481.13940432000004, 0.1459960832, 603.0812988542999, 20.6330566656], [615.0487060362, 0.0300903424, 770.9793701217, 27.6420898304]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00045994_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Give coordinates for the items you reference. For your reference, objects involved in this region include five breads.", "boxes_value": [[0, 0.0300903424, 770.9793701217, 43.3628539904], [0, 0.319641088, 168.97833255150002, 43.3628539904], [168.97833255150002, 0.4344482304, 353.7231445617, 38.9984131072], [348.6776123133, 0.0950317568, 483.5690917776, 36.2015991296], [481.13940432000004, 0.1459960832, 603.0812988542999, 20.6330566656], [615.0487060362, 0.0300903424, 770.9793701217, 27.6420898304]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00045997.jpg", "text": "Please tell me about the area in the image . What does it contain? Specify the location of each mentioned object.", "boxes_value": [[46.5001830912, 91.1621704192, 706.9199218944, 215.0795898368]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045997_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Specify the location of each mentioned object.", "boxes_value": [[46.5001830912, 31.162170419199995, 706.9199218944, 155.0795898368]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00045997.jpg", "text": "Please tell me about the area in the image . What does it contain? Specify the location of each mentioned object. For your reference, objects involved in this region include a person, four hats, a gloves, and a pickup truck.", "boxes_value": [[46.5001830912, 91.1621704192, 706.9199218944, 215.0795898368], [218.3145141504, 91.6981811712, 321.7337646336, 350.5321655296], [46.5001830912, 103.7243652096, 104.11773680639999, 134.7156371968], [255.5821532928, 92.375488256, 297.485839872, 116.3828125184], [510.05981445120005, 192.3817748992, 599.1051025152, 247.8168334848], [650.1752929536, 170.1204223488, 706.9199218944, 215.0795898368], [656.7227783424, 160.0809936384, 705.1738281216, 179.2868652544], [305.4923096064, 91.1621704192, 393.4675292928, 159.9287719936]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00045997_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Specify the location of each mentioned object. For your reference, objects involved in this region include a person, four hats, a gloves, and a pickup truck.", "boxes_value": [[46.5001830912, 31.162170419199995, 706.9199218944, 155.0795898368], [218.3145141504, 31.698181171200005, 321.7337646336, 186], [46.5001830912, 43.724365209599995, 104.11773680639999, 74.7156371968], [255.5821532928, 32.375488256, 297.485839872, 56.3828125184], [510.05981445120005, 132.3817748992, 599.1051025152, 186], [650.1752929536, 110.12042234879999, 706.9199218944, 155.0795898368], [656.7227783424, 100.0809936384, 705.1738281216, 119.2868652544], [305.4923096064, 31.162170419199995, 393.4675292928, 99.92877199360001]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00046001.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Please mention the objects and their locations.", "boxes_value": [[183.5262451426, 294.9923095552, 332.9079589493, 344.0354003968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046001_crop.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Please mention the objects and their locations.", "boxes_value": [[37.526245142600004, 12.992309555199995, 186.9079589493, 62.035400396800014]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046001.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Please mention the objects and their locations. For your reference, objects involved in this region include a lamp, a bed, two pillows, a vase, and a glasses.", "boxes_value": [[183.5262451426, 294.9923095552, 332.9079589493, 344.0354003968], [187.1268310273, 131.8483886592, 282.9047851723, 342.8375244288], [166.10717773870002, 197.7824707072, 607.8161621238, 426.4222412288], [288.0402832144, 283.6380004864, 351.8903808215, 311.2488403456], [237.9956054308, 295.7177734144, 332.9079589493, 339.7225341952], [183.5262451426, 294.9923095552, 231.8099365345, 341.3132934656], [224.5009155142, 328.3010864128, 257.7178955067, 344.0354003968]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046001_crop.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Please mention the objects and their locations. For your reference, objects involved in this region include a lamp, a bed, two pillows, a vase, and a glasses.", "boxes_value": [[37.526245142600004, 12.992309555199995, 186.9079589493, 62.035400396800014], [41.1268310273, 0, 136.90478517230002, 60.83752442880001], [20.107177738700017, 0, 224, 74], [142.0402832144, 1.638000486400017, 205.8903808215, 29.2488403456], [91.9956054308, 13.717773414400028, 186.9079589493, 57.72253419520001], [37.526245142600004, 12.992309555199995, 85.8099365345, 59.3132934656], [78.50091551419999, 46.301086412799975, 111.71789550670002, 62.035400396800014]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046004.jpg", "text": "Offer a thorough description of the area within the illustration . Include the coordinates for each object you identify.", "boxes_value": [[0, 201.25061033249997, 96.8966674944, 375.4924316695]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046004_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Include the coordinates for each object you identify.", "boxes_value": [[0, 44.250610332499974, 96.8966674944, 218.49243166949998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046004.jpg", "text": "Offer a thorough description of the area within the illustration . Include the coordinates for each object you identify. For your reference, objects involved in this region include a bed, a nightstand, a desk, and two storage boxes.", "boxes_value": [[0, 201.25061033249997, 96.8966674944, 375.4924316695], [2.341796864, 260.24194338300003, 508.0101318144, 694.0079345515], [42.6246338048, 312.65112304499996, 126.413085952, 386.918090793], [0, 201.25061033249997, 96.8966674944, 375.4924316695], [3.079772928, 269.6621704155, 72.9879150592, 338.3218994305], [26.1744384512, 262.1719970945, 95.4583740416, 320.2207031045]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046004_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Include the coordinates for each object you identify. For your reference, objects involved in this region include a bed, a nightstand, a desk, and two storage boxes.", "boxes_value": [[0, 44.250610332499974, 96.8966674944, 218.49243166949998], [2.341796864, 103.24194338300003, 121, 262], [42.6246338048, 155.65112304499996, 121, 229.91809079299998], [0, 44.250610332499974, 96.8966674944, 218.49243166949998], [3.079772928, 112.66217041549999, 72.9879150592, 181.32189943050003], [26.1744384512, 105.1719970945, 95.4583740416, 163.2207031045]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046006.jpg", "text": "Please detail the contents of the chosen region in the visual input . Include the coordinates for each object you identify.", "boxes_value": [[62.0076293921, 308.2729491968, 563.4414062352, 371.9733886464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046006_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Include the coordinates for each object you identify.", "boxes_value": [[62.0076293921, 16.272949196800027, 563.4414062352, 79.9733886464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046006.jpg", "text": "Please detail the contents of the chosen region in the visual input . Include the coordinates for each object you identify. For your reference, objects involved in this region include a bed, a nightstand, a chair, a telephone, and a trash bin can.", "boxes_value": [[62.0076293921, 308.2729491968, 563.4414062352, 371.9733886464], [304.7391967791, 244.049438464, 607.2550048757, 511.6327514624], [492.6530761838, 308.2729491968, 563.4414062352, 330.8557128704], [257.84631347010003, 238.3299560448, 346.68981934860005, 375.228637696], [544.63183594, 304.921691904, 568.231811555, 321.7216796672], [62.0076293921, 332.9082031104, 79.9479369815, 371.9733886464]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046006_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Include the coordinates for each object you identify. For your reference, objects involved in this region include a bed, a nightstand, a chair, a telephone, and a trash bin can.", "boxes_value": [[62.0076293921, 16.272949196800027, 563.4414062352, 79.9733886464], [304.7391967791, 0, 607.2550048757, 95], [492.6530761838, 16.272949196800027, 563.4414062352, 38.8557128704], [257.84631347010003, 0, 346.68981934860005, 83.22863769600002], [544.63183594, 12.921691904, 568.231811555, 29.72167966720002], [62.0076293921, 40.908203110399995, 79.9479369815, 79.9733886464]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046008.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Please point out the objects and their coordinates.", "boxes_value": [[280.513671908, 98.52142336, 714.7484130946, 368.8822021632]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046008_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Please point out the objects and their coordinates.", "boxes_value": [[109.51367190799999, 68.52142336, 543.7484130946, 338.8822021632]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046008.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Please point out the objects and their coordinates. For your reference, objects involved in this region include three lamps, and three people.", "boxes_value": [[280.513671908, 98.52142336, 714.7484130946, 368.8822021632], [679.0548095969999, 323.3559570432, 698.0863037286, 362.5383910912], [483.5158691558, 326.3413085696, 505.90588377639995, 368.8822021632], [280.513671908, 314.7731323392, 302.9036865286, 358.8067016704], [662.3442382452, 98.52142336, 714.7484130946, 204.6461791744], [505.195922881, 136.0413208064, 557.3535155896, 242.932189952], [323.1285400266, 167.1660156416, 378.098022449, 285.6439819264]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046008_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Please point out the objects and their coordinates. For your reference, objects involved in this region include three lamps, and three people.", "boxes_value": [[109.51367190799999, 68.52142336, 543.7484130946, 338.8822021632], [508.0548095969999, 293.3559570432, 527.0863037286, 332.5383910912], [312.5158691558, 296.3413085696, 334.90588377639995, 338.8822021632], [109.51367190799999, 284.7731323392, 131.90368652860002, 328.8067016704], [491.3442382452, 68.52142336, 543.7484130946, 174.6461791744], [334.195922881, 106.0413208064, 386.35351558959997, 212.932189952], [152.12854002659998, 137.1660156416, 207.09802244899998, 255.6439819264]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046009.jpg", "text": "What does the selected region in the image encompass? Give coordinates for the items you reference.", "boxes_value": [[321.4541015552, 478.3337402643, 473.0250244096, 593.9620361018999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046009_crop.jpg", "text": "What does the selected region in the image encompass? Give coordinates for the items you reference.", "boxes_value": [[38.4541015552, 29.33374026429999, 190.0250244096, 144.9620361018999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046009.jpg", "text": "What does the selected region in the image encompass? Give coordinates for the items you reference. For your reference, objects involved in this region include a vase, two flowers, a chair, a stool, a carpet, and two people.", "boxes_value": [[321.4541015552, 478.3337402643, 473.0250244096, 593.9620361018999], [369.42126464, 506.7612305037, 390.4776611328, 541.0853271477], [356.4996337664, 478.3337402643, 403.4481811456, 512.7913818342], [338.8401489408, 504.1768798827, 358.653259264, 520.1135253927], [451.278930688, 520.8649902513, 474.1395263488, 563.3865966609001], [321.4541015552, 561.6265869054, 372.8521118208, 593.9620361018999], [265.6127319552, 547.7421875085, 512.332763648, 572.2258300704], [412.3363647488, 560.9591064333, 475.129638656, 639.3377685735001], [452.4518432768, 522.620239293, 473.0250244096, 560.5852051134]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6], [7, 8]]}, {"image_path": "objects365_v1_00046009_crop.jpg", "text": "What does the selected region in the image encompass? Give coordinates for the items you reference. For your reference, objects involved in this region include a vase, two flowers, a chair, a stool, a carpet, and two people.", "boxes_value": [[38.4541015552, 29.33374026429999, 190.0250244096, 144.9620361018999], [86.42126464, 57.761230503700006, 107.4776611328, 92.08532714770001], [73.4996337664, 29.33374026429999, 120.44818114560002, 63.791381834200024], [55.84014894080002, 55.17687988270001, 75.65325926399998, 71.11352539270001], [168.278930688, 71.86499025130001, 191.13952634880002, 114.38659666090007], [38.4541015552, 112.62658690540002, 89.85211182080002, 144.9620361018999], [0, 98.74218750850002, 227, 123.22583007039998], [129.3363647488, 111.95910643330001, 192.129638656, 173], [169.4518432768, 73.62023929300005, 190.0250244096, 111.58520511339998]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6], [7, 8]]}, {"image_path": "objects365_v1_00046011.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[449.9163818111, 321.9765624832, 560.9804687184, 399.8704834048]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046011_crop.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[27.916381811099996, 19.976562483199984, 138.98046871839995, 97.87048340479998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046011.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, two cars, and a bus.", "boxes_value": [[449.9163818111, 321.9765624832, 560.9804687184, 399.8704834048], [458.30407716720003, 354.776611328, 471.83227537880003, 399.8704834048], [542.8741454801, 321.9765624832, 560.9804687184, 359.9137573376], [420.38073732410004, 364.011230464, 478.2214355559, 398.9970092544], [449.9163818111, 354.8692626944, 487.1876220768, 384.5807495168], [484.2420654616, 262.6121826304, 683.2036132963, 459.9202881024]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046011_crop.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, two cars, and a bus.", "boxes_value": [[27.916381811099996, 19.976562483199984, 138.98046871839995, 97.87048340479998], [36.304077167200035, 52.776611328, 49.832275378800034, 97.87048340479998], [120.87414548009997, 19.976562483199984, 138.98046871839995, 57.913757337599975], [0, 62.01123046399999, 56.22143555589997, 96.99700925439998], [27.916381811099996, 52.86926269439999, 65.18762207679998, 82.58074951679998], [62.24206546160002, 0, 166, 117]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046013.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Specify the location of each mentioned object.", "boxes_value": [[530.3159179434, 72.7843017728, 733.5710449530001, 171.059936512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046013_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Specify the location of each mentioned object.", "boxes_value": [[51.315917943399995, 24.784301772800006, 254.57104495300007, 123.05993651200001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046013.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include five people.", "boxes_value": [[530.3159179434, 72.7843017728, 733.5710449530001, 171.059936512], [637.3656005551001, 144.0867919872, 679.5112304565, 169.3741454848], [677.2634277689, 151.9539794944, 722.2187499711999, 171.059936512], [530.3159179434, 106.0353393664, 548.6191406365, 143.6051025408], [707.4420166292, 74.3442382848, 733.5710449530001, 114.512695296], [685.9929199046001, 72.7843017728, 711.3419189228, 112.952758784]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046013_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include five people.", "boxes_value": [[51.315917943399995, 24.784301772800006, 254.57104495300007, 123.05993651200001], [158.36560055510006, 96.0867919872, 200.51123045650002, 121.37414548480001], [198.2634277689, 103.95397949439999, 243.21874997119994, 123.05993651200001], [51.315917943399995, 58.035339366399995, 69.61914063649999, 95.6051025408], [228.4420166292, 26.3442382848, 254.57104495300007, 66.512695296], [206.99291990460006, 24.784301772800006, 232.3419189228, 64.952758784]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046017.jpg", "text": "Describe the selected rectangular area in the photo . Give coordinates for the items you reference.", "boxes_value": [[258.893310522, 96.0312500224, 362.704711908, 285.73870848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046017_crop.jpg", "text": "Describe the selected rectangular area in the photo . Give coordinates for the items you reference.", "boxes_value": [[26.89331052199998, 48.0312500224, 130.70471190799998, 237.73870848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046017.jpg", "text": "Describe the selected rectangular area in the photo . Give coordinates for the items you reference. For your reference, objects involved in this region include a handbag, a glasses, a plate, and two chairs.", "boxes_value": [[258.893310522, 96.0312500224, 362.704711908, 285.73870848], [258.893310522, 255.4713134592, 298.997558629, 285.73870848], [329.075805674, 185.36993408, 361.829345724, 202.8817748992], [338.142578158, 246.864379904, 387.59509280800006, 275.3106079232], [260.079711892, 108.2850341888, 302.201904312, 153.4706420736], [325.177612298, 96.0312500224, 362.704711908, 139.6852417024]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046017_crop.jpg", "text": "Describe the selected rectangular area in the photo . Give coordinates for the items you reference. For your reference, objects involved in this region include a handbag, a glasses, a plate, and two chairs.", "boxes_value": [[26.89331052199998, 48.0312500224, 130.70471190799998, 237.73870848], [26.89331052199998, 207.4713134592, 66.99755862900003, 237.73870848], [97.07580567399998, 137.36993408, 129.829345724, 154.8817748992], [106.14257815799999, 198.864379904, 155.59509280800006, 227.3106079232], [28.079711891999978, 60.2850341888, 70.20190431200001, 105.4706420736], [93.17761229799999, 48.0312500224, 130.70471190799998, 91.68524170239999]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046018.jpg", "text": "What sort of things can be seen in the region of the photo ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[487.81579591679997, 331.4125976576, 621.47790528, 497.376342784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046018_crop.jpg", "text": "What sort of things can be seen in the region of the photo ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[33.81579591679997, 42.412597657599974, 167.47790527999996, 208.37634278399997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046018.jpg", "text": "What sort of things can be seen in the region of the photo ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a chair, two towels, two plates, and a wine glass.", "boxes_value": [[487.81579591679997, 331.4125976576, 621.47790528, 497.376342784], [429.6480712704, 261.3637695488, 556.8745117440001, 511.8564453376], [487.81579591679997, 426.0899047936, 621.47790528, 497.376342784], [498.9543456768, 331.4125976576, 594.745483392, 378.1943359488], [489.16760256, 425.0624999936, 625.5784912128, 509.3891601408], [532.6140136704, 331.6481933824, 579.383422848, 421.4667358208], [498.33288575999995, 346.7120361472, 594.9088134912, 382.24468992]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 6], [5]]}, {"image_path": "objects365_v1_00046018_crop.jpg", "text": "What sort of things can be seen in the region of the photo ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a chair, two towels, two plates, and a wine glass.", "boxes_value": [[33.81579591679997, 42.412597657599974, 167.47790527999996, 208.37634278399997], [0, 0, 102.87451174400007, 222.8564453376], [33.81579591679997, 137.0899047936, 167.47790527999996, 208.37634278399997], [44.95434567680002, 42.412597657599974, 140.74548339199998, 89.19433594880002], [35.16760255999998, 136.0624999936, 171.57849121280003, 220.38916014080002], [78.61401367040003, 42.648193382399995, 125.38342284800001, 132.4667358208], [44.332885759999954, 57.712036147200024, 140.90881349120002, 93.24468991999998]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 6], [5]]}, {"image_path": "objects365_v1_00046019.jpg", "text": "In the image , please describe the bounding box . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[212.1489868096, 181.4038086144, 404.52038575439997, 318.6836548096]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046019_crop.jpg", "text": "In the image , please describe the bounding box . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[48.14898680959999, 34.40380861439999, 240.52038575439997, 171.6836548096]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046019.jpg", "text": "In the image , please describe the bounding box . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two desks, a potted plant, two umbrellas, and two stuffed toys.", "boxes_value": [[212.1489868096, 181.4038086144, 404.52038575439997, 318.6836548096], [252.1947021704, 275.232421888, 282.9071044584, 312.4712524288], [355.08239746159995, 276.7446288896, 404.52038575439997, 318.6836548096], [360.91491698, 258.4136963072, 378.690429656, 277.5778808832], [212.1489868096, 181.4038086144, 316.3702392648, 254.8637085184], [291.1184082104, 150.6425170944, 471.0949706928, 264.5053100544], [211.03124998959998, 288.513427712, 240.77343748080003, 311.4259643392], [339.4735107744, 275.735351552, 357.9797363232, 304.3759765504]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6, 7]]}, {"image_path": "objects365_v1_00046019_crop.jpg", "text": "In the image , please describe the bounding box . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two desks, a potted plant, two umbrellas, and two stuffed toys.", "boxes_value": [[48.14898680959999, 34.40380861439999, 240.52038575439997, 171.6836548096], [88.1947021704, 128.23242188799998, 118.9071044584, 165.47125242880003], [191.08239746159995, 129.7446288896, 240.52038575439997, 171.6836548096], [196.91491698, 111.41369630719998, 214.690429656, 130.5778808832], [48.14898680959999, 34.40380861439999, 152.37023926479998, 107.86370851839999], [127.11840821039999, 3.642517094400006, 288, 117.50531005440001], [47.031249989599985, 141.513427712, 76.77343748080003, 164.4259643392], [175.47351077439998, 128.735351552, 193.97973632319997, 157.37597655040003]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6, 7]]}, {"image_path": "objects365_v1_00046021.jpg", "text": "Please detail the contents of the chosen region in the visual input . Include the coordinates for each mentioned object.", "boxes_value": [[216.718625536, 217.58148192000002, 339.0975952, 426.44332488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046021_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Include the coordinates for each mentioned object.", "boxes_value": [[30.71862553599999, 52.581481920000016, 153.0975952, 261.44332488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046021.jpg", "text": "Please detail the contents of the chosen region in the visual input . Include the coordinates for each mentioned object. For your reference, objects involved in this region include four people, and a leather shoes.", "boxes_value": [[216.718625536, 217.58148192000002, 339.0975952, 426.44332488], [217.05767820799997, 221.394531264, 294.85388185600004, 427.420410144], [209.069580096, 230.134582512, 243.53833004799998, 280.105041504], [272.024291968, 221.512451184, 320.47570803200006, 380.820739728], [304.99426272, 217.58148192000002, 339.0975952, 415.760742192], [216.718625536, 410.868892464, 239.431339456, 426.44332488]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046021_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Include the coordinates for each mentioned object. For your reference, objects involved in this region include four people, and a leather shoes.", "boxes_value": [[30.71862553599999, 52.581481920000016, 153.0975952, 261.44332488], [31.05767820799997, 56.394531263999994, 108.85388185600004, 262.420410144], [23.06958009600001, 65.13458251200001, 57.53833004799998, 115.10504150399998], [86.024291968, 56.512451184000014, 134.47570803200006, 215.82073972799998], [118.99426272, 52.581481920000016, 153.0975952, 250.760742192], [30.71862553599999, 245.868892464, 53.43133945599999, 261.44332488]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046022.jpg", "text": "Describe what's happening within the coordinates of the given image . Give coordinates for the items you reference.", "boxes_value": [[10.9399414272, 269.2558593536, 768.6278075903999, 510.5162353664]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046022_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Give coordinates for the items you reference.", "boxes_value": [[10.9399414272, 61.25585935359999, 768, 302.5162353664]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046022.jpg", "text": "Describe what's happening within the coordinates of the given image . Give coordinates for the items you reference. For your reference, objects involved in this region include a desk, a storage box, a telephone, a keyboard, a mouse, and a computer box.", "boxes_value": [[10.9399414272, 269.2558593536, 768.6278075903999, 510.5162353664], [10.9399414272, 269.2558593536, 768.6278075903999, 510.5162353664], [466.0971679488, 339.6658324992, 534.3780517632, 403.0191650304], [0, 234.8287353344, 114.0792846336, 303.249633792], [410.0721435648, 277.56677248, 595.1407470336, 310.1444091904], [658.6369628928, 300.832031232, 695.1958007808, 321.6769409024], [237.51379392, 375.745117184, 367.5590820096, 511.0828857344]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046022_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Give coordinates for the items you reference. For your reference, objects involved in this region include a desk, a storage box, a telephone, a keyboard, a mouse, and a computer box.", "boxes_value": [[10.9399414272, 61.25585935359999, 768, 302.5162353664], [10.9399414272, 61.25585935359999, 768, 302.5162353664], [466.0971679488, 131.6658324992, 534.3780517632, 195.01916503040002], [0, 26.828735334399994, 114.0792846336, 95.249633792], [410.0721435648, 69.56677248, 595.1407470336, 102.14440919039998], [658.6369628928, 92.83203123200002, 695.1958007808, 113.67694090240002], [237.51379392, 167.74511718399998, 367.5590820096, 303.0828857344]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046024.jpg", "text": "Share some details about the objects or environment within the bounding box in . Provide the coordinates for each element you describe.", "boxes_value": [[45.7274169688, 5.2599487488, 218.5216675056, 93.0505371136]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046024_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Provide the coordinates for each element you describe.", "boxes_value": [[43.7274169688, 5.2599487488, 216.5216675056, 93.0505371136]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046024.jpg", "text": "Share some details about the objects or environment within the bounding box in . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two hats, a gloves, a helmet, and a person.", "boxes_value": [[45.7274169688, 5.2599487488, 218.5216675056, 93.0505371136], [47.8176879516, 5.2599487488, 70.1137084748, 21.9819336192], [196.2256469824, 20.5884399616, 218.5216675056, 38.0072021504], [45.7274169688, 56.8195190272, 74.990966776, 93.0505371136], [94.4999999808, 23.3754272256, 163.4783325332, 86.7797851648], [1.9945068532, 22.6301879808, 353.77185061, 499.161560064]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046024_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two hats, a gloves, a helmet, and a person.", "boxes_value": [[43.7274169688, 5.2599487488, 216.5216675056, 93.0505371136], [45.8176879516, 5.2599487488, 68.1137084748, 21.9819336192], [194.2256469824, 20.5884399616, 216.5216675056, 38.0072021504], [43.7274169688, 56.8195190272, 72.990966776, 93.0505371136], [92.4999999808, 23.3754272256, 161.4783325332, 86.7797851648], [0, 22.6301879808, 259, 114]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046030.jpg", "text": "In the provided image , please explain the content within the region . Please mention the objects and their locations.", "boxes_value": [[334.5665283072, 0, 759.5953369343999, 461.8666992128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046030_crop.jpg", "text": "In the provided image , please explain the content within the region . Please mention the objects and their locations.", "boxes_value": [[106.56652830719997, 0, 531.5953369343999, 461.8666992128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046030.jpg", "text": "In the provided image , please explain the content within the region . Please mention the objects and their locations. For your reference, objects involved in this region include two lamps, three people, and three cups.", "boxes_value": [[334.5665283072, 0, 759.5953369343999, 461.8666992128], [546.8498535168, 0, 595.8593750016, 160.9934692352], [718.3829346048, 19.534423808, 759.5953369343999, 173.24584959999999], [648.3994140672, 184.1059570176, 768.7088623104, 511.80590822399995], [214.9961548032, 187.571289088, 700.1297607168, 509.62084961279993], [181.6661376768, 198.3062744064, 514.5678710784, 449.8116454912], [334.5665283072, 361.0463257088, 397.45458984960004, 461.8666992128], [367.8708496128, 61.8539428864, 397.93969727999996, 113.9244384768], [396.1063232256, 54.8867798016, 445.97656250880004, 116.1246338048]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6, 7, 8]]}, {"image_path": "objects365_v1_00046030_crop.jpg", "text": "In the provided image , please explain the content within the region . Please mention the objects and their locations. For your reference, objects involved in this region include two lamps, three people, and three cups.", "boxes_value": [[106.56652830719997, 0, 531.5953369343999, 461.8666992128], [318.84985351679995, 0, 367.85937500160003, 160.9934692352], [490.38293460479997, 19.534423808, 531.5953369343999, 173.24584959999999], [420.39941406720004, 184.1059570176, 540, 511.80590822399995], [0, 187.571289088, 472.1297607168, 509.62084961279993], [0, 198.3062744064, 286.5678710784, 449.8116454912], [106.56652830719997, 361.0463257088, 169.45458984960004, 461.8666992128], [139.8708496128, 61.8539428864, 169.93969727999996, 113.9244384768], [168.10632322560002, 54.8867798016, 217.97656250880004, 116.1246338048]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6, 7, 8]]}, {"image_path": "objects365_v1_00046031.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Include the coordinates for each object you identify.", "boxes_value": [[0, 25.972290048, 146.4484252705, 341.7008667136]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046031_crop.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Include the coordinates for each object you identify.", "boxes_value": [[0, 25.972290048, 146.4484252705, 341.7008667136]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046031.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Include the coordinates for each object you identify. For your reference, objects involved in this region include a mirror, two pictures, a chair, a barrel, and a tea pot.", "boxes_value": [[0, 25.972290048, 146.4484252705, 341.7008667136], [92.2234496741, 50.3726806528, 153.3381958299, 174.773681664], [119.29211427989999, 153.2472534016, 146.4484252705, 170.9152221696], [0, 25.972290048, 36.187133762900004, 102.2063598592], [0, 222.7124023296, 26.9732665808, 341.7008667136], [0, 265.1607666176, 17.0558471774, 350.1275024384], [42.3527221543, 108.333190912, 65.6049194284, 138.0960082944]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046031_crop.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Include the coordinates for each object you identify. For your reference, objects involved in this region include a mirror, two pictures, a chair, a barrel, and a tea pot.", "boxes_value": [[0, 25.972290048, 146.4484252705, 341.7008667136], [92.2234496741, 50.3726806528, 153.3381958299, 174.773681664], [119.29211427989999, 153.2472534016, 146.4484252705, 170.9152221696], [0, 25.972290048, 36.187133762900004, 102.2063598592], [0, 222.7124023296, 26.9732665808, 341.7008667136], [0, 265.1607666176, 17.0558471774, 350.1275024384], [42.3527221543, 108.333190912, 65.6049194284, 138.0960082944]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046033.jpg", "text": "Share some details about the objects or environment within the bounding box in . Remember to mention the objects and their corresponding locations.", "boxes_value": [[0.5306396672, 367.356811514, 149.9802856448, 667.0191650429999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046033_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Remember to mention the objects and their corresponding locations.", "boxes_value": [[0.5306396672, 75.35681151400001, 149.9802856448, 375.01916504299993]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046033.jpg", "text": "Share some details about the objects or environment within the bounding box in . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, a hat, a glasses, and a microphone.", "boxes_value": [[0.5306396672, 367.356811514, 149.9802856448, 667.0191650429999], [1.399536128, 561.943359362, 142.7234497024, 769.21838378], [0.7714233344, 370.370971663, 135.1861572096, 573.8774413680001], [0.5306396672, 367.356811514, 33.7561645568, 455.95825198], [0.0068359168, 370.771118179, 90.923889152, 438.771972639], [16.4466552832, 407.885131808, 73.2386474496, 422.83044434400006], [119.0620727296, 537.808715829, 149.9802856448, 667.0191650429999]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046033_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, a hat, a glasses, and a microphone.", "boxes_value": [[0.5306396672, 75.35681151400001, 149.9802856448, 375.01916504299993], [1.399536128, 269.943359362, 142.7234497024, 449], [0.7714233344, 78.37097166299998, 135.1861572096, 281.87744136800006], [0.5306396672, 75.35681151400001, 33.7561645568, 163.95825198], [0.0068359168, 78.77111817899998, 90.923889152, 146.771972639], [16.4466552832, 115.88513180799998, 73.2386474496, 130.83044434400006], [119.0620727296, 245.808715829, 149.9802856448, 375.01916504299993]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046034.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Include the coordinates for each mentioned object.", "boxes_value": [[181.34124752, 364.5465088, 276.71362303999996, 460.7539673088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046034_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Include the coordinates for each mentioned object.", "boxes_value": [[24.341247519999996, 24.546508800000026, 119.71362303999996, 120.75396730879999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046034.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three people, a handbag, and a backpack.", "boxes_value": [[181.34124752, 364.5465088, 276.71362303999996, 460.7539673088], [249.82226559999998, 367.2189330944, 276.2125244, 460.7539673088], [233.45361327999998, 364.5465088, 256.16931152, 458.0815429632], [181.34124752, 364.5465088, 204.72503664, 438.3723755008], [253.53527832, 404.9240722432, 273.06689456000004, 420.90448], [263.80334472000004, 386.3442993152, 276.71362303999996, 412.0181274624]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046034_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three people, a handbag, and a backpack.", "boxes_value": [[24.341247519999996, 24.546508800000026, 119.71362303999996, 120.75396730879999], [92.82226559999998, 27.218933094399972, 119.2125244, 120.75396730879999], [76.45361327999998, 24.546508800000026, 99.16931152000001, 118.08154296319998], [24.341247519999996, 24.546508800000026, 47.72503664000001, 98.37237550079999], [96.53527832, 64.92407224319999, 116.06689456000004, 80.90447999999998], [106.80334472000004, 46.344299315199976, 119.71362303999996, 72.0181274624]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046035.jpg", "text": "Can you elaborate on the content of the bounding box in ? Please point out the objects and their coordinates.", "boxes_value": [[540.3383789182, 219.7614746112, 633.2960204878, 422.0598754816]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046035_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Please point out the objects and their coordinates.", "boxes_value": [[23.338378918199965, 50.761474611199986, 116.29602048779998, 253.0598754816]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046035.jpg", "text": "Can you elaborate on the content of the bounding box in ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a chair, three people, and a cup.", "boxes_value": [[540.3383789182, 219.7614746112, 633.2960204878, 422.0598754816], [580.3833007596, 361.4960937472, 633.2960204878, 413.7390747136], [564.0803222746999, 200.3745117184, 663.7312011936, 376.1437988352], [502.68835446890006, 245.6107788288, 589.2271728787, 403.516174336], [540.3383789182, 219.7614746112, 620.1339111517, 306.3004150272], [593.1369628973999, 399.4514770432, 630.6464843564, 422.0598754816]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046035_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a chair, three people, and a cup.", "boxes_value": [[23.338378918199965, 50.761474611199986, 116.29602048779998, 253.0598754816], [63.38330075960005, 192.4960937472, 116.29602048779998, 244.73907471360002], [47.08032227469994, 31.374511718399987, 139, 207.14379883520002], [0, 76.6107788288, 72.2271728787, 234.516174336], [23.338378918199965, 50.761474611199986, 103.13391115169998, 137.30041502720002], [76.13696289739994, 230.4514770432, 113.64648435640004, 253.0598754816]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046038.jpg", "text": "Can you discuss the entities within the region of image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[268.1757812412, 223.8840942592, 597.436523418, 253.898803712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046038_crop.jpg", "text": "Can you discuss the entities within the region of image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[83.17578124120001, 7.884094259199998, 412.436523418, 37.89880371199999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046038.jpg", "text": "Can you discuss the entities within the region of image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three benches, and two desks.", "boxes_value": [[268.1757812412, 223.8840942592, 597.436523418, 253.898803712], [497.8522949101, 238.316711424, 540.6475830332, 273.9274292224], [515.1011962721, 219.2083129856, 596.229370094, 272.758239744], [578.7486572343, 227.1282958848, 597.436523418, 244.6358032384], [268.1757812412, 223.8840942592, 303.7194824235, 251.529174784], [271.3352660919, 237.8383178752, 302.1397704941, 253.898803712]], "boxes_seq": [[0], [0], [1, 3, 5], [2, 4]]}, {"image_path": "objects365_v1_00046038_crop.jpg", "text": "Can you discuss the entities within the region of image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three benches, and two desks.", "boxes_value": [[83.17578124120001, 7.884094259199998, 412.436523418, 37.89880371199999], [312.8522949101, 22.316711424000005, 355.6475830332, 45], [330.10119627209997, 3.2083129856000028, 411.22937009400005, 45], [393.7486572343, 11.128295884799996, 412.436523418, 28.6358032384], [83.17578124120001, 7.884094259199998, 118.71948242349998, 35.52917478399999], [86.3352660919, 21.838317875200005, 117.13977049409999, 37.89880371199999]], "boxes_seq": [[0], [0], [1, 3, 5], [2, 4]]}, {"image_path": "objects365_v1_00046039.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for all objects that you mention.", "boxes_value": [[393.016601593, 247.919799808, 752.363769542, 449.9747314688]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046039_crop.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for all objects that you mention.", "boxes_value": [[90.01660159300002, 50.91979980799999, 449.363769542, 252.97473146879997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046039.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a bench, a person, and three street lights.", "boxes_value": [[393.016601593, 247.919799808, 752.363769542, 449.9747314688], [680.089355484, 367.276184064, 752.363769542, 449.9747314688], [458.391235379, 322.141723648, 485.920898463, 400.044921856], [393.016601593, 261.2590942208, 415.67150879099995, 336.6676025344], [643.876586892, 247.919799808, 663.754272489, 336.5269165056], [514.657348667, 260.5504150528, 533.206665068, 323.0686034944]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046039_crop.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a bench, a person, and three street lights.", "boxes_value": [[90.01660159300002, 50.91979980799999, 449.363769542, 252.97473146879997], [377.08935548399995, 170.276184064, 449.363769542, 252.97473146879997], [155.39123537900002, 125.14172364799998, 182.92089846300001, 203.04492185599997], [90.01660159300002, 64.25909422080002, 112.67150879099995, 139.66760253439998], [340.876586892, 50.91979980799999, 360.75427248899996, 139.5269165056], [211.65734866699995, 63.55041505280002, 230.206665068, 126.06860349440001]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046040.jpg", "text": "Could you please share some information on the region in this photograph ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[132.2573852672, 328.0796508625, 204.5067748864, 417.708129911]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046040_crop.jpg", "text": "Could you please share some information on the region in this photograph ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[18.257385267199993, 23.07965086249999, 90.5067748864, 112.70812991100001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046040.jpg", "text": "Could you please share some information on the region in this photograph ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include six street lights.", "boxes_value": [[132.2573852672, 328.0796508625, 204.5067748864, 417.708129911], [132.2573852672, 357.2071533135, 140.6452636672, 412.84655759199995], [142.3228149248, 277.2431030565, 157.9801025536, 454.22656252549996], [166.4140624896, 328.0796508625, 174.1122436608, 417.708129911], [179.2443237376, 341.459777836, 187.8588867072, 415.5388183365], [189.5085449216, 349.15795897249996, 196.5349731328, 414.0450439555], [198.7182617088, 354.7894897465, 204.5067748864, 414.14465329399997]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046040_crop.jpg", "text": "Could you please share some information on the region in this photograph ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include six street lights.", "boxes_value": [[18.257385267199993, 23.07965086249999, 90.5067748864, 112.70812991100001], [18.257385267199993, 52.20715331349999, 26.6452636672, 107.84655759199995], [28.322814924800014, 0, 43.980102553600005, 135], [52.41406248960001, 23.07965086249999, 60.112243660800004, 112.70812991100001], [65.24432373760001, 36.459777836, 73.85888670720001, 110.53881833650001], [75.50854492159999, 44.157958972499955, 82.53497313279999, 109.04504395549998], [84.71826170879999, 49.78948974650001, 90.5067748864, 109.14465329399997]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046041.jpg", "text": "Describe the bbox in the provided photo . Provide the coordinates for each element you describe.", "boxes_value": [[0, 252.5347290112, 313.7138061243, 511.7145996288]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046041_crop.jpg", "text": "Describe the bbox in the provided photo . Provide the coordinates for each element you describe.", "boxes_value": [[0, 65.5347290112, 313.7138061243, 324.7145996288]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046041.jpg", "text": "Describe the bbox in the provided photo . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a potted plant, a cabinet, a chair, a pillow, and a lamp.", "boxes_value": [[0, 252.5347290112, 313.7138061243, 511.7145996288], [214.2499389499, 252.5347290112, 253.0484619477, 304.2660522496], [155.2176513932, 296.8049926656, 285.2177734221, 492.3090820096], [0, 375.9310913024, 313.7138061243, 511.6696777216], [111.587341335, 386.2965698048, 186.73693844910002, 511.7145996288], [36.437805144500004, 129.7514648576, 150.4578247347, 388.8879394304]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046041_crop.jpg", "text": "Describe the bbox in the provided photo . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a potted plant, a cabinet, a chair, a pillow, and a lamp.", "boxes_value": [[0, 65.5347290112, 313.7138061243, 324.7145996288], [214.2499389499, 65.5347290112, 253.0484619477, 117.26605224960002], [155.2176513932, 109.80499266560003, 285.2177734221, 305.3090820096], [0, 188.93109130239998, 313.7138061243, 324.6696777216], [111.587341335, 199.29656980480001, 186.73693844910002, 324.7145996288], [36.437805144500004, 0, 150.4578247347, 201.88793943040002]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046042.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[61.423644992, 176.702819808, 206.983154304, 324.727050768]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046042_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[36.423644992, 37.70281980799999, 181.983154304, 185.72705076800003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046042.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three chairs, a desk, a person, a projector, a router, and a handbag.", "boxes_value": [[61.423644992, 176.702819808, 206.983154304, 324.727050768], [161.364746112, 159.68237304, 197.096618624, 220.667724624], [50.999572736000005, 183.376403808, 82.738342272, 286.196838384], [170.88684083200002, 176.702819808, 191.17828371200002, 206.294555664], [0.710144064, 240.56707761599998, 393.985229504, 480.844787616], [62.766845696000004, 124.839355488, 175.30572511999998, 273.440429664], [79.83435059199999, 247.34460451200002, 206.983154304, 302.57666015999996], [61.423644992, 295.960327152, 116.368041984, 324.727050768], [59.425170879999996, 297.47399904, 115.76403808, 327.96667478399996]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6], [7], [8]]}, {"image_path": "objects365_v1_00046042_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three chairs, a desk, a person, a projector, a router, and a handbag.", "boxes_value": [[36.423644992, 37.70281980799999, 181.983154304, 185.72705076800003], [136.364746112, 20.682373039999987, 172.096618624, 81.66772462399999], [25.999572736000005, 44.37640380799999, 57.738342272, 147.196838384], [145.88684083200002, 37.70281980799999, 166.17828371200002, 67.294555664], [0, 101.56707761599998, 218, 222], [37.766845696000004, 0, 150.30572511999998, 134.44042966400002], [54.83435059199999, 108.34460451200002, 181.983154304, 163.57666015999996], [36.423644992, 156.960327152, 91.368041984, 185.72705076800003], [34.425170879999996, 158.47399904000002, 90.76403808, 188.96667478399996]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6], [7], [8]]}, {"image_path": "objects365_v1_00046044.jpg", "text": "Please provide information about the area within the bounding box in the picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[67.6245116928, 255.7116088832, 741.4006348032, 511.4414062592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046044_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[67.6245116928, 64.7116088832, 741.4006348032, 320.4414062592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046044.jpg", "text": "Please provide information about the area within the bounding box in the picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a mirror, a guitar, a drum, a cymbal, a person, two bottles, and a speaker.", "boxes_value": [[67.6245116928, 255.7116088832, 741.4006348032, 511.4414062592], [166.23120115199998, 46.3464355328, 767.8652343552001, 511.9332885504], [222.03302000639997, 139.928833024, 717.5296631040001, 511.55133056], [678.9251708928, 367.6838989312, 741.4006348032, 463.0455932416], [651.3387451392, 255.7116088832, 689.7504882432, 304.1627807744], [225.956787072, 21.843383808, 579.835449216, 511.9097900544], [133.5230102784, 329.1058349568, 189.5119018752, 511.4414062592], [67.6245116928, 361.8073120256, 103.7943725568, 487.163024896], [441.50207516160003, 246.983947776, 714.2183837952, 510.6790771712]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6, 7], [8]]}, {"image_path": "objects365_v1_00046044_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a mirror, a guitar, a drum, a cymbal, a person, two bottles, and a speaker.", "boxes_value": [[67.6245116928, 64.7116088832, 741.4006348032, 320.4414062592], [166.23120115199998, 0, 767.8652343552001, 320.9332885504], [222.03302000639997, 0, 717.5296631040001, 320.55133056], [678.9251708928, 176.68389893120002, 741.4006348032, 272.0455932416], [651.3387451392, 64.7116088832, 689.7504882432, 113.1627807744], [225.956787072, 0, 579.835449216, 320.9097900544], [133.5230102784, 138.10583495679998, 189.5119018752, 320.4414062592], [67.6245116928, 170.80731202560003, 103.7943725568, 296.163024896], [441.50207516160003, 55.98394777600001, 714.2183837952, 319.6790771712]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6, 7], [8]]}, {"image_path": "objects365_v1_00046045.jpg", "text": "Tell me what you see in the area within the context of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[66.40881345, 260.7511596544, 185.992736832, 512.0920410112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046045_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[30.408813449999997, 63.7511596544, 149.992736832, 315]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046045.jpg", "text": "Tell me what you see in the area within the context of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a person, two chairs, a bottle, and a storage box.", "boxes_value": [[66.40881345, 260.7511596544, 185.992736832, 512.0920410112], [66.40881345, 260.7511596544, 156.965454072, 512.0920410112], [92.685852063, 495.3380737536, 117.02886961499999, 512.1633300992], [119.53479006, 467.7731933696, 140.655944835, 488.8943481344], [74.677490244, 261.341796864, 133.291137672, 296.5099487232], [155.16021729599998, 360.35083008, 185.992736832, 389.3033447424]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046045_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a person, two chairs, a bottle, and a storage box.", "boxes_value": [[30.408813449999997, 63.7511596544, 149.992736832, 315], [30.408813449999997, 63.7511596544, 120.965454072, 315], [56.685852063, 298.3380737536, 81.02886961499999, 315], [83.53479006, 270.7731933696, 104.65594483500001, 291.8943481344], [38.677490244, 64.341796864, 97.29113767199999, 99.50994872320001], [119.16021729599998, 163.35083007999998, 149.992736832, 192.3033447424]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046047.jpg", "text": "Please elucidate the area of the image . Give coordinates for the items you reference.", "boxes_value": [[234.979248064, 298.0825805824, 277.04052732400004, 425.0599975424]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046047_crop.jpg", "text": "Please elucidate the area of the image . Give coordinates for the items you reference.", "boxes_value": [[10.97924806399999, 32.0825805824, 53.04052732400004, 159.05999754240003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046047.jpg", "text": "Please elucidate the area of the image . Give coordinates for the items you reference. For your reference, objects involved in this region include two people, a sneakers, a handbag, and a stroller.", "boxes_value": [[234.979248064, 298.0825805824, 277.04052732400004, 425.0599975424], [113.2126464678, 159.9678955008, 334.4464721994, 423.7697754112], [225.205200187, 197.6287231488, 375.2891845494, 486.67932129279995], [252.96813966379997, 397.8015747072, 277.04052732400004, 425.0599975424], [234.979248064, 298.0825805824, 271.4783935292, 399.6883545088], [103.83245850120001, 193.9159545856, 301.5032958762, 512.1336669696]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046047_crop.jpg", "text": "Please elucidate the area of the image . Give coordinates for the items you reference. For your reference, objects involved in this region include two people, a sneakers, a handbag, and a stroller.", "boxes_value": [[10.97924806399999, 32.0825805824, 53.04052732400004, 159.05999754240003], [0, 0, 63, 157.7697754112], [1.2052001870000026, 0, 63, 190], [28.968139663799974, 131.80157470720002, 53.04052732400004, 159.05999754240003], [10.97924806399999, 32.0825805824, 47.47839352919999, 133.6883545088], [0, 0, 63, 190]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046048.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Provide the coordinates for each element you describe.", "boxes_value": [[19.001464820800003, 213.7313842688, 322.4609375028, 380.9789428736]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046048_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Provide the coordinates for each element you describe.", "boxes_value": [[19.001464820800003, 42.731384268800014, 322.4609375028, 209.97894287359998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046048.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a lamp, two pillows, a couch, a desk, and a chair.", "boxes_value": [[19.001464820800003, 213.7313842688, 322.4609375028, 380.9789428736], [186.2490234324, 213.7313842688, 234.5266113156, 296.4930419712], [277.05688477719997, 265.4573974528, 322.4609375028, 306.8382568448], [241.4234619152, 244.7670287872, 432.23510740600005, 375.8063354368], [166.1333007492, 287.8720702976, 273.0338134416, 380.9789428736], [19.001464820800003, 278.6763305472, 153.4891967932, 359.1390381056], [1.1847534283999999, 279.2510375936, 238.5498047044, 483.2815552]], "boxes_seq": [[0], [0], [1], [2, 5], [3], [4], [6]]}, {"image_path": "objects365_v1_00046048_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a lamp, two pillows, a couch, a desk, and a chair.", "boxes_value": [[19.001464820800003, 42.731384268800014, 322.4609375028, 209.97894287359998], [186.2490234324, 42.731384268800014, 234.5266113156, 125.4930419712], [277.05688477719997, 94.45739745280002, 322.4609375028, 135.83825684480001], [241.4234619152, 73.76702878719999, 398, 204.8063354368], [166.1333007492, 116.87207029759998, 273.0338134416, 209.97894287359998], [19.001464820800003, 107.6763305472, 153.4891967932, 188.13903810559998], [1.1847534283999999, 108.25103759360002, 238.5498047044, 251]], "boxes_seq": [[0], [0], [1], [2, 5], [3], [4], [6]]}, {"image_path": "objects365_v1_00046050.jpg", "text": "Please provide details for the area marked as in this photographic . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0.0886230205, 299.1624145408, 180.7711181595, 374.0997924864]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046050_crop.jpg", "text": "Please provide details for the area marked as in this photographic . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0.0886230205, 19.1624145408, 180.7711181595, 94.09979248640002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046050.jpg", "text": "Please provide details for the area marked as in this photographic . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four bowls, and a cutting.", "boxes_value": [[0.0886230205, 299.1624145408, 180.7711181595, 374.0997924864], [80.3231201404, 340.6171264512, 141.9737548613, 374.0997924864], [76.78491207520001, 299.1624145408, 154.1975707699, 329.225280768], [130.9020995854, 310.3233032192, 180.7711181595, 361.8759765504], [0.0886230205, 304.477111808, 25.581603984999997, 346.9948120064], [0.3000488045, 281.4304199168, 82.6331177024, 318.9153442304]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046050_crop.jpg", "text": "Please provide details for the area marked as in this photographic . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four bowls, and a cutting.", "boxes_value": [[0.0886230205, 19.1624145408, 180.7711181595, 94.09979248640002], [80.3231201404, 60.61712645120002, 141.9737548613, 94.09979248640002], [76.78491207520001, 19.1624145408, 154.1975707699, 49.225280768000005], [130.9020995854, 30.3233032192, 180.7711181595, 81.87597655040003], [0.0886230205, 24.477111808000018, 25.581603984999997, 66.99481200640002], [0.3000488045, 1.4304199168000196, 82.6331177024, 38.915344230400024]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046052.jpg", "text": "Explain the content within the rectangular region of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[39.6281127936, 543.9185791083, 94.922729472, 650.4842529627]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046052_crop.jpg", "text": "Explain the content within the rectangular region of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[14.628112793600003, 26.918579108300037, 69.922729472, 133.4842529627]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046052.jpg", "text": "Explain the content within the rectangular region of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a bench, five people, and a wheelchair.", "boxes_value": [[39.6281127936, 543.9185791083, 94.922729472, 650.4842529627], [32.7109374976, 621.2335204914, 83.9233398272, 650.4978027265], [39.6281127936, 605.575317387, 72.6081543168, 650.4842529627], [34.716247552, 599.2600097950001, 62.7843017728, 648.3791503865], [70.1414184448, 580.6505126705, 94.113891584, 611.5827636852], [58.5418701312, 543.9185791083, 71.301391616, 578.330688486], [72.0747070464, 546.6251220590999, 83.2875976704, 576.0107421811], [69.1046142464, 590.6499023189, 94.922729472, 619.8779296846]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6], [7]]}, {"image_path": "objects365_v1_00046052_crop.jpg", "text": "Explain the content within the rectangular region of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a bench, five people, and a wheelchair.", "boxes_value": [[14.628112793600003, 26.918579108300037, 69.922729472, 133.4842529627], [7.7109374976, 104.23352049139999, 58.923339827199996, 133.49780272650003], [14.628112793600003, 88.57531738700004, 47.6081543168, 133.4842529627], [9.716247551999999, 82.26000979500009, 37.7843017728, 131.3791503865], [45.141418444799996, 63.65051267050001, 69.113891584, 94.5827636852], [33.5418701312, 26.918579108300037, 46.301391616000004, 61.330688485999985], [47.07470704639999, 29.62512205909991, 58.287597670400004, 59.0107421811], [44.104614246400004, 73.64990231889999, 69.922729472, 102.87792968459996]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6], [7]]}, {"image_path": "objects365_v1_00046053.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Please mention the objects and their locations.", "boxes_value": [[351.4376220653, 117.9703369216, 428.59851074939996, 285.883789056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046053_crop.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Please mention the objects and their locations.", "boxes_value": [[19.43762206529999, 42.970336921599994, 96.59851074939996, 210.883789056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046053.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Please mention the objects and their locations. For your reference, objects involved in this region include a picture, a flower, a desk, a person, and a bakset.", "boxes_value": [[351.4376220653, 117.9703369216, 428.59851074939996, 285.883789056], [360.60412599769995, 107.6316528128, 425.6950683923, 198.805053696], [359.28051755380005, 238.9911499264, 399.4429931908, 270.4484253184], [351.4376220653, 211.1531372032, 428.59851074939996, 285.883789056], [370.5168456781, 117.9703369216, 417.47216795919996, 189.659240704], [367.2736816481, 253.549133312, 392.59045412250003, 277.8060302848]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046053_crop.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Please mention the objects and their locations. For your reference, objects involved in this region include a picture, a flower, a desk, a person, and a bakset.", "boxes_value": [[19.43762206529999, 42.970336921599994, 96.59851074939996, 210.883789056], [28.604125997699953, 32.6316528128, 93.69506839230002, 123.80505369599999], [27.280517553800053, 163.9911499264, 67.4429931908, 195.44842531839998], [19.43762206529999, 136.1531372032, 96.59851074939996, 210.883789056], [38.516845678100026, 42.970336921599994, 85.47216795919996, 114.65924070400001], [35.27368164810002, 178.549133312, 60.590454122500034, 202.8060302848]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046054.jpg", "text": "Regarding the coordinates in image , can you provide a description? Include the coordinates for each object you identify.", "boxes_value": [[377.1756592128, 328.4423827968, 534.7854004224, 496.216674816]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046054_crop.jpg", "text": "Regarding the coordinates in image , can you provide a description? Include the coordinates for each object you identify.", "boxes_value": [[40.175659212799985, 42.44238279680002, 197.78540042240002, 210.21667481600002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046054.jpg", "text": "Regarding the coordinates in image , can you provide a description? Include the coordinates for each object you identify. For your reference, objects involved in this region include seven benches.", "boxes_value": [[377.1756592128, 328.4423827968, 534.7854004224, 496.216674816], [398.550292992, 424.2287597568, 533.8002929664001, 496.216674816], [328.1776123392, 377.3884887552, 434.51269532159995, 426.419067392], [435.7385253888, 375.8562621952, 533.7996826368001, 426.1126098432], [377.1756592128, 351.482788096, 462.56799313920004, 380.2721557504], [465.0076904448, 345.1393432576, 534.7854004224, 379.2962646528], [335.0949706752, 331.526977536, 407.06823728640006, 355.3809814528], [407.47949222399996, 328.4423827968, 473.283691392, 353.94152832]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6, 7]]}, {"image_path": "objects365_v1_00046054_crop.jpg", "text": "Regarding the coordinates in image , can you provide a description? Include the coordinates for each object you identify. For your reference, objects involved in this region include seven benches.", "boxes_value": [[40.175659212799985, 42.44238279680002, 197.78540042240002, 210.21667481600002], [61.55029299199998, 138.2287597568, 196.8002929664001, 210.21667481600002], [0, 91.3884887552, 97.51269532159995, 140.419067392], [98.73852538879999, 89.85626219519997, 196.79968263680007, 140.1126098432], [40.175659212799985, 65.48278809599998, 125.56799313920004, 94.27215575039997], [128.0076904448, 59.13934325759999, 197.78540042240002, 93.2962646528], [0, 45.526977536000004, 70.06823728640006, 69.38098145279997], [70.47949222399996, 42.44238279680002, 136.28369139199998, 67.94152831999997]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6, 7]]}, {"image_path": "objects365_v1_00046057.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[256.1417236224, 390.1854247936, 409.86279298560004, 504.171325696]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046057_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[39.14172362239998, 29.18542479360002, 192.86279298560004, 143.171325696]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046057.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two people, a bracelet, a belt, and two sneakers.", "boxes_value": [[256.1417236224, 390.1854247936, 409.86279298560004, 504.171325696], [323.4295654656, 163.1985473536, 435.7183838208, 498.67871093760004], [126.06835937279999, 32.5627441152, 341.47961425920005, 511.8878173696], [315.70788572159995, 404.7813720576, 340.6345214976, 420.284790016], [256.1417236224, 390.1854247936, 315.01904294400003, 413.8309326336], [343.2839355648, 474.618408192, 363.7830810624, 504.171325696], [374.2564697088, 463.024780288, 409.86279298560004, 483.208068864]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00046057_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two people, a bracelet, a belt, and two sneakers.", "boxes_value": [[39.14172362239998, 29.18542479360002, 192.86279298560004, 143.171325696], [106.42956546559998, 0, 218.7183838208, 137.67871093760004], [0, 0, 124.47961425920005, 150.8878173696], [98.70788572159995, 43.78137205759998, 123.63452149760002, 59.28479001599999], [39.14172362239998, 29.18542479360002, 98.01904294400003, 52.830932633600014], [126.2839355648, 113.618408192, 146.78308106240002, 143.171325696], [157.2564697088, 102.02478028799999, 192.86279298560004, 122.20806886399998]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00046058.jpg", "text": "What can you share about the area in the presented image ? Give coordinates for the items you reference.", "boxes_value": [[114.2142333952, 453.99011228160003, 511.0164184576, 685.2895507968001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046058_crop.jpg", "text": "What can you share about the area in the presented image ? Give coordinates for the items you reference.", "boxes_value": [[99.2142333952, 57.990112281600034, 496.0164184576, 289.2895507968001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046058.jpg", "text": "What can you share about the area in the presented image ? Give coordinates for the items you reference. For your reference, objects involved in this region include a picture, a cabinet, two storage boxes, a desk, and a person.", "boxes_value": [[114.2142333952, 453.99011228160003, 511.0164184576, 685.2895507968001], [295.6190795776, 563.9320068096, 400.030334464, 685.2895507968001], [2.4110107648, 457.97802731519994, 410.0479126016, 656.1523437312001], [401.2680664064, 523.1999511552, 511.0164184576, 653.0167236096], [410.1891479552, 453.99011228160003, 510.5012817408, 537.715209984], [114.2142333952, 596.0179443456, 278.7306518528, 676.7528076287999], [112.3773193216, 478.3157958912, 126.330993664, 503.20617676800003]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046058_crop.jpg", "text": "What can you share about the area in the presented image ? Give coordinates for the items you reference. For your reference, objects involved in this region include a picture, a cabinet, two storage boxes, a desk, and a person.", "boxes_value": [[99.2142333952, 57.990112281600034, 496.0164184576, 289.2895507968001], [280.6190795776, 167.9320068096, 385.030334464, 289.2895507968001], [0, 61.97802731519994, 395.0479126016, 260.15234373120006], [386.2680664064, 127.19995115519998, 496.0164184576, 257.01672360960004], [395.1891479552, 57.990112281600034, 495.5012817408, 141.715209984], [99.2142333952, 200.01794434559997, 263.7306518528, 280.7528076287999], [97.3773193216, 82.31579589120003, 111.330993664, 107.20617676800003]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046062.jpg", "text": "Please help me understand the content present within the rectangle in . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[468.13610838870005, 92.8070068224, 682.9215088204, 323.3000488448]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046062_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[54.13610838870005, 57.8070068224, 268.92150882040005, 288.3000488448]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046062.jpg", "text": "Please help me understand the content present within the rectangle in . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a desk, a mouse, a speaker, a moniter, and a router.", "boxes_value": [[468.13610838870005, 92.8070068224, 682.9215088204, 323.3000488448], [489.5483398618, 225.9891968, 682.2829590068, 310.419799808], [525.5927734101999, 293.2292480512, 580.9016113369, 323.3000488448], [468.13610838870005, 183.6856689664, 518.6120605129, 271.7501220864], [527.4473877171, 92.8070068224, 682.9215088204, 269.1249389568], [620.4970703457, 224.184448256, 665.6032714840001, 244.6492919808]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046062_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a desk, a mouse, a speaker, a moniter, and a router.", "boxes_value": [[54.13610838870005, 57.8070068224, 268.92150882040005, 288.3000488448], [75.5483398618, 190.9891968, 268.28295900679996, 275.419799808], [111.59277341019992, 258.2292480512, 166.90161133690003, 288.3000488448], [54.13610838870005, 148.6856689664, 104.61206051290003, 236.75012208639998], [113.44738771710001, 57.8070068224, 268.92150882040005, 234.1249389568], [206.49707034569997, 189.184448256, 251.60327148400006, 209.6492919808]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046063.jpg", "text": "Can you divulge the contents of the area within the given image ? Please point out the objects and their coordinates.", "boxes_value": [[279.2887573224, 36.1243286016, 551.2016601476, 398.1065673728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046063_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Please point out the objects and their coordinates.", "boxes_value": [[68.28875732239999, 36.1243286016, 340.2016601476, 398.1065673728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046063.jpg", "text": "Can you divulge the contents of the area within the given image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two cabinets, a person, a glasses, and a tie.", "boxes_value": [[279.2887573224, 36.1243286016, 551.2016601476, 398.1065673728], [370.428955085, 36.1243286016, 551.2016601476, 398.1065673728], [162.657958974, 41.9327392768, 372.9870605314, 392.9903564288], [203.1188964959, 189.1654052864, 382.82763669220003, 441.5452270592], [256.621948268, 242.5069580288, 302.27484131389997, 261.3427734528], [279.2887573224, 295.8218383872, 295.57055665269996, 338.9207153152]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046063_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two cabinets, a person, a glasses, and a tie.", "boxes_value": [[68.28875732239999, 36.1243286016, 340.2016601476, 398.1065673728], [159.42895508499998, 36.1243286016, 340.2016601476, 398.1065673728], [0, 41.9327392768, 161.9870605314, 392.9903564288], [0, 189.1654052864, 171.82763669220003, 441.5452270592], [45.62194826799998, 242.5069580288, 91.27484131389997, 261.3427734528], [68.28875732239999, 295.8218383872, 84.57055665269996, 338.9207153152]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046064.jpg", "text": "Fill me in on the details of the rectangular box within the image . Please point out the objects and their coordinates.", "boxes_value": [[121.3657836935, 260.7480468992, 653.242797826, 348.3044433408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046064_crop.jpg", "text": "Fill me in on the details of the rectangular box within the image . Please point out the objects and their coordinates.", "boxes_value": [[121.3657836935, 22.748046899200006, 653.242797826, 110.30444334079999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046064.jpg", "text": "Fill me in on the details of the rectangular box within the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include three potted plants, and two people.", "boxes_value": [[121.3657836935, 260.7480468992, 653.242797826, 348.3044433408], [121.3657836935, 281.1209106432, 174.4407958781, 348.3044433408], [510.178222633, 261.6201782272, 542.5493164173, 282.6871337984], [460.08288571390005, 260.7480468992, 484.856201167, 281.0170898432], [405.07934571370004, 303.4111328256, 423.2154541039, 322.5410766848], [603.3972167953, 281.9318237184, 653.242797826, 344.9311523328]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046064_crop.jpg", "text": "Fill me in on the details of the rectangular box within the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include three potted plants, and two people.", "boxes_value": [[121.3657836935, 22.748046899200006, 653.242797826, 110.30444334079999], [121.3657836935, 43.12091064319998, 174.4407958781, 110.30444334079999], [510.178222633, 23.620178227199972, 542.5493164173, 44.6871337984], [460.08288571390005, 22.748046899200006, 484.856201167, 43.01708984319998], [405.07934571370004, 65.41113282560002, 423.2154541039, 84.54107668479998], [603.3972167953, 43.93182371839998, 653.242797826, 106.93115233280002]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046066.jpg", "text": "Please tell me about the area in the image . What does it contain? Specify the location of each mentioned object.", "boxes_value": [[70.88665772600001, 324.9044189696, 332.35156249159996, 465.9004516352]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046066_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Specify the location of each mentioned object.", "boxes_value": [[65.88665772600001, 35.90441896959999, 327.35156249159996, 176.9004516352]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046066.jpg", "text": "Please tell me about the area in the image . What does it contain? Specify the location of each mentioned object. For your reference, objects involved in this region include a person, and four sneakers.", "boxes_value": [[70.88665772600001, 324.9044189696, 332.35156249159996, 465.9004516352], [114.8997192304, 33.6561279488, 361.54882814719997, 466.3197021696], [70.88665772600001, 343.089233408, 111.50421140399999, 372.4241333248], [93.879516638, 324.9044189696, 121.5466308928, 360.3377685504], [282.2221679408, 435.3337402368, 332.35156249159996, 465.9004516352], [307.5925293136, 414.2427367936, 351.60864261119997, 457.9531250176]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046066_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Specify the location of each mentioned object. For your reference, objects involved in this region include a person, and four sneakers.", "boxes_value": [[65.88665772600001, 35.90441896959999, 327.35156249159996, 176.9004516352], [109.8997192304, 0, 356.54882814719997, 177.31970216960002], [65.88665772600001, 54.089233407999984, 106.50421140399999, 83.42413332479998], [88.879516638, 35.90441896959999, 116.5466308928, 71.33776855040003], [277.2221679408, 146.3337402368, 327.35156249159996, 176.9004516352], [302.5925293136, 125.24273679359999, 346.60864261119997, 168.95312501759997]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046067.jpg", "text": "Please interpret and describe the area inside the given picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[119.488865859, 411.0755821568, 675.8120873196, 495.8238956544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046067_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[119.488865859, 22.07558215680001, 675.8120873196, 106.82389565440002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046067.jpg", "text": "Please interpret and describe the area inside the given picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four sneakers, and a gloves.", "boxes_value": [[119.488865859, 411.0755821568, 675.8120873196, 495.8238956544], [119.488865859, 433.95242752, 150.6845640826, 492.184397568], [200.59768119440002, 446.9506351104, 242.71187378860003, 486.4651862016], [455.88254179240005, 442.271280384, 486.03838337279996, 495.8238956544], [623.2993286764, 411.0755821568, 675.8120873196, 441.751352064], [401.8388671924, 433.3522338816, 432.0600586074, 460.6105956864]], "boxes_seq": [[0], [0], [1, 2, 3, 5], [4]]}, {"image_path": "objects365_v1_00046067_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four sneakers, and a gloves.", "boxes_value": [[119.488865859, 22.07558215680001, 675.8120873196, 106.82389565440002], [119.488865859, 44.952427520000015, 150.6845640826, 103.18439756800001], [200.59768119440002, 57.950635110400015, 242.71187378860003, 97.46518620159998], [455.88254179240005, 53.27128038400002, 486.03838337279996, 106.82389565440002], [623.2993286764, 22.07558215680001, 675.8120873196, 52.751352064], [401.8388671924, 44.3522338816, 432.0600586074, 71.61059568640002]], "boxes_seq": [[0], [0], [1, 2, 3, 5], [4]]}, {"image_path": "objects365_v1_00046073.jpg", "text": "Can you give me a visual rundown of the area in ? Give coordinates for the items you reference.", "boxes_value": [[293.5952758911, 94.0072631808, 559.2821044581, 208.8366088704]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046073_crop.jpg", "text": "Can you give me a visual rundown of the area in ? Give coordinates for the items you reference.", "boxes_value": [[66.59527589110002, 29.007263180799995, 332.2821044581, 143.8366088704]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046073.jpg", "text": "Can you give me a visual rundown of the area in ? Give coordinates for the items you reference. For your reference, objects involved in this region include a soccer, and four dogs.", "boxes_value": [[293.5952758911, 94.0072631808, 559.2821044581, 208.8366088704], [293.5952758911, 94.0072631808, 349.1427001974, 148.7193603584], [373.136474625, 126.96893312, 428.5028076283, 198.2451782144], [411.74450680549995, 143.3030395392, 473.47473144469996, 204.821228032], [444.9127197131, 143.1969604608, 507.27941893869996, 200.1088867328], [490.3695068687, 133.1964721664, 559.2821044581, 208.8366088704]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046073_crop.jpg", "text": "Can you give me a visual rundown of the area in ? Give coordinates for the items you reference. For your reference, objects involved in this region include a soccer, and four dogs.", "boxes_value": [[66.59527589110002, 29.007263180799995, 332.2821044581, 143.8366088704], [66.59527589110002, 29.007263180799995, 122.14270019740002, 83.7193603584], [146.136474625, 61.96893312, 201.5028076283, 133.2451782144], [184.74450680549995, 78.3030395392, 246.47473144469996, 139.821228032], [217.9127197131, 78.1969604608, 280.27941893869996, 135.1088867328], [263.3695068687, 68.19647216640001, 332.2821044581, 143.8366088704]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046074.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Please mention the objects and their locations.", "boxes_value": [[342.146972672, 1.448211648, 545.434570304, 262.59271238400004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046074_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Please mention the objects and their locations.", "boxes_value": [[51.146972672000004, 1.448211648, 254.43457030399998, 262.59271238400004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046074.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Please mention the objects and their locations. For your reference, objects involved in this region include a fan, two lamps, a desk, and a pillow.", "boxes_value": [[342.146972672, 1.448211648, 545.434570304, 262.59271238400004], [342.146972672, 1.448211648, 545.434570304, 74.392578144], [417.08435059199996, 31.343444832, 450.567016576, 64.42749024], [483.99145510399995, 196.75494384, 517.233032256, 249.037902816], [465.59558105599996, 242.26049803200002, 510.45568844800005, 262.59271238400004], [479.069946304, 245.67626952, 547.3598632960001, 280.068054192]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046074_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Please mention the objects and their locations. For your reference, objects involved in this region include a fan, two lamps, a desk, and a pillow.", "boxes_value": [[51.146972672000004, 1.448211648, 254.43457030399998, 262.59271238400004], [51.146972672000004, 1.448211648, 254.43457030399998, 74.392578144], [126.08435059199996, 31.343444832, 159.56701657600001, 64.42749024], [192.99145510399995, 196.75494384, 226.233032256, 249.037902816], [174.59558105599996, 242.26049803200002, 219.45568844800005, 262.59271238400004], [188.06994630399998, 245.67626952, 256.35986329600007, 280.068054192]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046076.jpg", "text": "Please tell me more about the rectangular section in the photo . Specify the location of each mentioned object.", "boxes_value": [[7.342651392, 377.3216552777, 353.3728027136, 594.9858398478]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046076_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Specify the location of each mentioned object.", "boxes_value": [[7.342651392, 55.32165527770002, 353.3728027136, 272.9858398478]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046076.jpg", "text": "Please tell me more about the rectangular section in the photo . Specify the location of each mentioned object. For your reference, objects involved in this region include a storage box, a towel, a desk, a vase, a cabinet, a person, a barrel, a bathtub, a cup, a plate, and a bottle.", "boxes_value": [[7.342651392, 377.3216552777, 353.3728027136, 594.9858398478], [159.3396606464, 334.67950439080005, 284.5949096448, 437.9555664157], [151.2835082752, 448.8183593645, 228.8244018688, 520.3171386884], [1.236816384, 433.7130126656, 119.3464355328, 636.4127197362], [30.040100096, 396.643188483, 69.8401489408, 449.9296874842], [0, 268.53735350930003, 155.801391616, 537.7170410251], [16.682861312, 399.9627685769, 41.8663330304, 450.3295898688], [243.2957763584, 531.5075683737, 320.6423950336, 646.7381592091999], [7.342651392, 377.3216552777, 353.3728027136, 594.9858398478], [60.694213888, 423.7166747945, 101.606872576, 445.0755615008], [56.1817627136, 440.8638915849, 103.1110229504, 452.5961914032], [319.2490234368, 518.7617187671, 375.3462524416, 631.7143554614]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11]]}, {"image_path": "objects365_v1_00046076_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Specify the location of each mentioned object. For your reference, objects involved in this region include a storage box, a towel, a desk, a vase, a cabinet, a person, a barrel, a bathtub, a cup, a plate, and a bottle.", "boxes_value": [[7.342651392, 55.32165527770002, 353.3728027136, 272.9858398478], [159.3396606464, 12.679504390800048, 284.5949096448, 115.95556641569999], [151.2835082752, 126.8183593645, 228.8244018688, 198.31713868839995], [1.236816384, 111.71301266559999, 119.3464355328, 314.41271973619996], [30.040100096, 74.64318848300002, 69.8401489408, 127.92968748419997], [0, 0, 155.801391616, 215.7170410251], [16.682861312, 77.96276857689998, 41.8663330304, 128.32958986879999], [243.2957763584, 209.5075683737, 320.6423950336, 324.7381592091999], [7.342651392, 55.32165527770002, 353.3728027136, 272.9858398478], [60.694213888, 101.71667479450002, 101.606872576, 123.0755615008], [56.1817627136, 118.86389158489999, 103.1110229504, 130.5961914032], [319.2490234368, 196.76171876709998, 375.3462524416, 309.71435546140003]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11]]}, {"image_path": "objects365_v1_00046077.jpg", "text": "Please describe the region in the picture . Please mention the objects and their locations.", "boxes_value": [[288.44909670640004, 1.0016479744, 489.3787842084, 93.8554687488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046077_crop.jpg", "text": "Please describe the region in the picture . Please mention the objects and their locations.", "boxes_value": [[50.44909670640004, 1.0016479744, 251.37878420840002, 93.8554687488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046077.jpg", "text": "Please describe the region in the picture . Please mention the objects and their locations. For your reference, objects involved in this region include five lamps.", "boxes_value": [[288.44909670640004, 1.0016479744, 489.3787842084, 93.8554687488], [288.44909670640004, 32.5871582208, 301.76831052800003, 62.6505126912], [338.6815185436, 50.8535156224, 349.3369140516, 93.8554687488], [354.6645507554, 1.0016479744, 372.5504150572, 34.489929216], [395.383300774, 29.5427856384, 409.08300778399996, 77.4918823424], [474.537353509, 2.1432495104, 489.3787842084, 56.942260736]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046077_crop.jpg", "text": "Please describe the region in the picture . Please mention the objects and their locations. For your reference, objects involved in this region include five lamps.", "boxes_value": [[50.44909670640004, 1.0016479744, 251.37878420840002, 93.8554687488], [50.44909670640004, 32.5871582208, 63.76831052800003, 62.6505126912], [100.68151854360002, 50.8535156224, 111.33691405159999, 93.8554687488], [116.6645507554, 1.0016479744, 134.55041505719998, 34.489929216], [157.38330077400002, 29.5427856384, 171.08300778399996, 77.4918823424], [236.537353509, 2.1432495104, 251.37878420840002, 56.942260736]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046078.jpg", "text": "Can you generate a description of the contents within the selected region in ? Please point out the objects and their coordinates.", "boxes_value": [[285.48986817599996, 37.565246592, 445.460021952, 362.815063488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046078_crop.jpg", "text": "Can you generate a description of the contents within the selected region in ? Please point out the objects and their coordinates.", "boxes_value": [[40.48986817599996, 37.565246592, 200.46002195199998, 362.815063488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046078.jpg", "text": "Can you generate a description of the contents within the selected region in ? Please point out the objects and their coordinates. For your reference, objects involved in this region include four pots, a bowl, two bottles, and a cutting.", "boxes_value": [[285.48986817599996, 37.565246592, 445.460021952, 362.815063488], [285.48986817599996, 37.565246592, 346.91107175999997, 161.782714816], [348.286193856, 45.815856960000005, 388.164123552, 140.697875968], [388.622497536, 50.399536127999994, 445.460021952, 158.11578368], [357.45349123200003, 140.23950195199998, 416.58288576, 197.07702636800002], [323.072204592, 336.215270976, 386.57110593600004, 370.88421632], [380.00225832, 308.439575168, 397.88409422399997, 366.099548352], [399.70880126400004, 306.249938944, 417.95562744, 365.73455808], [317.598144528, 283.988830592, 415.765991232, 362.815063488]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6, 7], [8]]}, {"image_path": "objects365_v1_00046078_crop.jpg", "text": "Can you generate a description of the contents within the selected region in ? Please point out the objects and their coordinates. For your reference, objects involved in this region include four pots, a bowl, two bottles, and a cutting.", "boxes_value": [[40.48986817599996, 37.565246592, 200.46002195199998, 362.815063488], [40.48986817599996, 37.565246592, 101.91107175999997, 161.782714816], [103.28619385600001, 45.815856960000005, 143.16412355199998, 140.697875968], [143.62249753600003, 50.399536127999994, 200.46002195199998, 158.11578368], [112.45349123200003, 140.23950195199998, 171.58288576, 197.07702636800002], [78.07220459199999, 336.215270976, 141.57110593600004, 370.88421632], [135.00225832, 308.439575168, 152.88409422399997, 366.099548352], [154.70880126400004, 306.249938944, 172.95562744, 365.73455808], [72.59814452799998, 283.988830592, 170.76599123199998, 362.815063488]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6, 7], [8]]}, {"image_path": "objects365_v1_00046079.jpg", "text": "Describe the bbox in the provided photo . Please mention the objects and their locations.", "boxes_value": [[286.300964352, 207.7727051032, 357.9686889472, 371.29626465150005]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046079_crop.jpg", "text": "Describe the bbox in the provided photo . Please mention the objects and their locations.", "boxes_value": [[18.300964351999994, 41.772705103199996, 89.96868894720001, 205.29626465150005]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046079.jpg", "text": "Describe the bbox in the provided photo . Please mention the objects and their locations. For your reference, objects involved in this region include a hat, two helmets, a gloves, and a person.", "boxes_value": [[286.300964352, 207.7727051032, 357.9686889472, 371.29626465150005], [322.6395263488, 207.7727051032, 345.8558349824, 228.4654541313], [286.300964352, 276.9169311463, 329.2006835712, 317.7977905545], [322.6395263488, 279.9451293955, 357.9686889472, 324.358947753], [310.6708374016, 353.992187476, 336.1943969792, 371.29626465150005], [293.9144897536, 206.3115234526, 368.8181152256, 394.74096677330004]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046079_crop.jpg", "text": "Describe the bbox in the provided photo . Please mention the objects and their locations. For your reference, objects involved in this region include a hat, two helmets, a gloves, and a person.", "boxes_value": [[18.300964351999994, 41.772705103199996, 89.96868894720001, 205.29626465150005], [54.63952634880002, 41.772705103199996, 77.85583498239998, 62.46545413129999], [18.300964351999994, 110.91693114629999, 61.20068357119999, 151.7977905545], [54.63952634880002, 113.94512939549998, 89.96868894720001, 158.358947753], [42.67083740160001, 187.99218747600003, 68.19439697920001, 205.29626465150005], [25.91448975359998, 40.3115234526, 100.81811522560002, 228.74096677330004]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046080.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[19.332397423, 193.571899392, 256.4936523576, 327.5626830848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046080_crop.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[19.332397423, 33.571899392000006, 256.4936523576, 167.56268308480003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046080.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three bottles, a moniter, and a keyboard.", "boxes_value": [[19.332397423, 193.571899392, 256.4936523576, 327.5626830848], [19.332397423, 193.571899392, 51.4533691444, 242.0563964928], [53.8776245392, 197.8142700032, 84.1804199326, 239.6321411072], [84.7864379442, 194.1779174912, 112.0589599312, 239.6321411072], [112.8817138488, 203.9060668928, 217.5646972532, 298.4477538816], [142.65100095280002, 311.5331420672, 256.4936523576, 327.5626830848]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046080_crop.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three bottles, a moniter, and a keyboard.", "boxes_value": [[19.332397423, 33.571899392000006, 256.4936523576, 167.56268308480003], [19.332397423, 33.571899392000006, 51.4533691444, 82.05639649279999], [53.8776245392, 37.814270003199994, 84.1804199326, 79.6321411072], [84.7864379442, 34.17791749119999, 112.0589599312, 79.6321411072], [112.8817138488, 43.9060668928, 217.5646972532, 138.44775388160002], [142.65100095280002, 151.5331420672, 256.4936523576, 167.56268308480003]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046081.jpg", "text": "Please elucidate the area of the image . Include the coordinates for each object you identify.", "boxes_value": [[484.8647460872, 94.3502892544, 618.0288273176, 388.8277609984]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046081_crop.jpg", "text": "Please elucidate the area of the image . Include the coordinates for each object you identify.", "boxes_value": [[33.86474608719999, 74.3502892544, 167.02882731759996, 368.8277609984]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046081.jpg", "text": "Please elucidate the area of the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a hat, a handbag, a sneakers, and three people.", "boxes_value": [[484.8647460872, 94.3502892544, 618.0288273176, 388.8277609984], [584.180836802, 94.3502892544, 618.0288273176, 115.7873499136], [527.7675191988, 172.952845056, 575.5307947376, 259.82930816], [547.3241360132, 343.697106944, 566.8807527504, 388.8277609984], [548.1224365091999, 94.4551391744, 640.2373046619999, 404.9747924992], [484.8647460872, 115.043457024, 550.1429443224, 317.095214848], [539.7196045164, 144.5922851328, 558.1102295096, 178.8797607424]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046081_crop.jpg", "text": "Please elucidate the area of the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a hat, a handbag, a sneakers, and three people.", "boxes_value": [[33.86474608719999, 74.3502892544, 167.02882731759996, 368.8277609984], [133.18083680200004, 74.3502892544, 167.02882731759996, 95.7873499136], [76.76751919879996, 152.952845056, 124.53079473759999, 239.82930815999998], [96.32413601320002, 323.697106944, 115.8807527504, 368.8277609984], [97.12243650919993, 74.4551391744, 189.23730466199993, 384.9747924992], [33.86474608719999, 95.043457024, 99.14294432240001, 297.095214848], [88.71960451639995, 124.59228513279999, 107.1102295096, 158.8797607424]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046082.jpg", "text": "Help me understand the details within the area in photograph . Include the coordinates for each object you identify.", "boxes_value": [[223.34466552439997, 427.7947387904, 391.24536129890004, 461.4092407296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046082_crop.jpg", "text": "Help me understand the details within the area in photograph . Include the coordinates for each object you identify.", "boxes_value": [[42.34466552439997, 8.794738790400004, 210.24536129890004, 42.409240729600015]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046082.jpg", "text": "Help me understand the details within the area in photograph . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, three cars, and a suv.", "boxes_value": [[223.34466552439997, 427.7947387904, 391.24536129890004, 461.4092407296], [265.1530761561, 427.7947387904, 277.9401855592, 451.9638671872], [241.0642699964, 438.7816772608, 387.0042724574, 505.273681664], [223.34466552439997, 438.8331298816, 254.603881821, 461.4092407296], [275.84515381800003, 434.2747802624, 324.7717895618, 449.765686016], [335.0787963998, 428.7693481472, 391.24536129890004, 456.550598144]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046082_crop.jpg", "text": "Help me understand the details within the area in photograph . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, three cars, and a suv.", "boxes_value": [[42.34466552439997, 8.794738790400004, 210.24536129890004, 42.409240729600015], [84.15307615609998, 8.794738790400004, 96.94018555920002, 32.96386718719998], [60.064269996399986, 19.781677260799995, 206.00427245740002, 50], [42.34466552439997, 19.833129881599973, 73.60388182099999, 42.409240729600015], [94.84515381800003, 15.274780262399986, 143.7717895618, 30.765686016000018], [154.0787963998, 9.769348147199992, 210.24536129890004, 37.55059814399999]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046083.jpg", "text": "Within the input image , what can be found in the region defined by ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[220.73040768950003, 211.9416503808, 514.3160400304, 317.1451416064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046083_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[73.73040768950003, 26.941650380800013, 367.3160400304, 132.1451416064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046083.jpg", "text": "Within the input image , what can be found in the region defined by ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a picture, a person, a printer, a keyboard, and a moniter.", "boxes_value": [[220.73040768950003, 211.9416503808, 514.3160400304, 317.1451416064], [349.8789062389, 211.9416503808, 437.5017089613, 271.6831665152], [384.0351562353, 237.6361083904, 406.7416992088, 267.4000244224], [417.3018798721, 224.103515648, 514.3160400304, 311.4519653376], [220.73040768950003, 282.6473388544, 315.2798461695, 317.1451416064], [196.4541626297, 194.805786112, 308.89135744969997, 296.0631713792]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046083_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a picture, a person, a printer, a keyboard, and a moniter.", "boxes_value": [[73.73040768950003, 26.941650380800013, 367.3160400304, 132.1451416064], [202.87890623890002, 26.941650380800013, 290.5017089613, 86.68316651520001], [237.0351562353, 52.63610839040001, 259.7416992088, 82.40002442240001], [270.3018798721, 39.10351564800001, 367.3160400304, 126.45196533759997], [73.73040768950003, 97.6473388544, 168.2798461695, 132.1451416064], [49.454162629699994, 9.805786111999993, 161.89135744969997, 111.06317137920001]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046087.jpg", "text": "Please give me some details about the rectangle in the image . Include the coordinates for each object you identify.", "boxes_value": [[200.5939941376, 418.3873901347, 481.3780517376, 503.7918701283]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046087_crop.jpg", "text": "Please give me some details about the rectangle in the image . Include the coordinates for each object you identify.", "boxes_value": [[70.59399413759999, 21.387390134700013, 351.3780517376, 106.79187012829999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046087.jpg", "text": "Please give me some details about the rectangle in the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a potted plant, three people, and a luggage.", "boxes_value": [[200.5939941376, 418.3873901347, 481.3780517376, 503.7918701283], [395.4848022528, 466.1554565241, 425.2294921728, 499.20520019879996], [434.363525376, 422.3652343552, 448.8826904064, 466.7183227739], [445.3026123264, 418.3873901347, 473.3464965632, 485.6131591704], [200.5939941376, 457.52526853, 222.9234619392, 503.7918701283], [460.2209472512, 457.1552123974, 481.3780517376, 485.9427490283]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046087_crop.jpg", "text": "Please give me some details about the rectangle in the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a potted plant, three people, and a luggage.", "boxes_value": [[70.59399413759999, 21.387390134700013, 351.3780517376, 106.79187012829999], [265.4848022528, 69.15545652409998, 295.2294921728, 102.20520019879996], [304.363525376, 25.365234355200016, 318.8826904064, 69.71832277390001], [315.3026123264, 21.387390134700013, 343.3464965632, 88.61315917040002], [70.59399413759999, 60.525268530000005, 92.9234619392, 106.79187012829999], [330.2209472512, 60.15521239740002, 351.3780517376, 88.9427490283]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046088.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[8.6134032926, 151.0986938368, 375.7891845403, 357.003051776]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046088_crop.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[8.6134032926, 52.09869383680001, 375.7891845403, 258.003051776]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046088.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, a sneakers, a bus, a car, and a van.", "boxes_value": [[8.6134032926, 151.0986938368, 375.7891845403, 357.003051776], [139.4072265762, 151.0986938368, 183.4729614284, 289.8055419904], [184.0708007538, 185.1494751232, 207.6161498922, 285.9528808448], [301.7039184836, 333.4841918976, 326.4135742387, 357.003051776], [8.6134032926, 256.5817260544, 61.8992920004, 272.922729472], [62.064208975499994, 235.48455808, 196.24072267089997, 271.0780639744], [350.67590331009995, 258.6685180416, 375.7891845403, 272.4274292224]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046088_crop.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, a sneakers, a bus, a car, and a van.", "boxes_value": [[8.6134032926, 52.09869383680001, 375.7891845403, 258.003051776], [139.4072265762, 52.09869383680001, 183.4729614284, 190.8055419904], [184.0708007538, 86.1494751232, 207.6161498922, 186.9528808448], [301.7039184836, 234.4841918976, 326.4135742387, 258.003051776], [8.6134032926, 157.5817260544, 61.8992920004, 173.92272947200001], [62.064208975499994, 136.48455808, 196.24072267089997, 172.07806397439998], [350.67590331009995, 159.6685180416, 375.7891845403, 173.4274292224]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046089.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Include the coordinates for each mentioned object.", "boxes_value": [[140.7981567592, 299.688110336, 670.2122802681, 511.4537353728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046089_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Include the coordinates for each mentioned object.", "boxes_value": [[132.7981567592, 53.68811033600002, 662.2122802681, 265.4537353728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046089.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two desks, a chair, three people, a pen, and two baksets.", "boxes_value": [[140.7981567592, 299.688110336, 670.2122802681, 511.4537353728], [518.4156493833, 207.6806030336, 682.6231689564, 414.8116454912], [633.7103271489, 350.9254150144, 683.1223144403, 473.2076416], [140.7981567592, 299.688110336, 670.2122802681, 511.4537353728], [149.0235595677, 247.3154296832, 363.16064451520003, 511.62982179840003], [465.24829102260003, 250.1377563648, 644.7396240400001, 460.836975104], [642.3635253682, 390.5938720768, 682.8150634753, 511.743225088], [472.37219241109995, 367.3758544896, 483.4440917687, 427.7664795136], [366.2039794787, 404.6810302976, 479.92456053110004, 496.24816896], [442.06738282969997, 457.5136718848, 594.0172119122, 512.0926513664]], "boxes_seq": [[0], [0], [1, 3], [2], [4, 5, 6], [7], [8, 9]]}, {"image_path": "objects365_v1_00046089_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two desks, a chair, three people, a pen, and two baksets.", "boxes_value": [[132.7981567592, 53.68811033600002, 662.2122802681, 265.4537353728], [510.4156493833, 0, 674.6231689564, 168.8116454912], [625.7103271489, 104.92541501440002, 675, 227.2076416], [132.7981567592, 53.68811033600002, 662.2122802681, 265.4537353728], [141.0235595677, 1.3154296831999943, 355.16064451520003, 265.62982179840003], [457.24829102260003, 4.137756364799998, 636.7396240400001, 214.83697510399998], [634.3635253682, 144.59387207679998, 674.8150634753, 265.743225088], [464.37219241109995, 121.37585448959999, 475.4440917687, 181.76647951360002], [358.2039794787, 158.6810302976, 471.92456053110004, 250.24816896], [434.06738282969997, 211.51367188479998, 586.0172119122, 266]], "boxes_seq": [[0], [0], [1, 3], [2], [4, 5, 6], [7], [8, 9]]}, {"image_path": "objects365_v1_00046090.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each mentioned object.", "boxes_value": [[16.3103637504, 349.48583984640004, 512.595214848, 447.1904296704]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046090_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each mentioned object.", "boxes_value": [[16.3103637504, 24.48583984640004, 512, 122.19042967040002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046090.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three pictures, a vase, and a cup.", "boxes_value": [[16.3103637504, 349.48583984640004, 512.595214848, 447.1904296704], [478.2249755648, 411.8693847552, 512.595214848, 446.23962401280005], [381.8040771584, 414.20874024960005, 414.2704467968, 447.1904296704], [273.0675659264, 427.60754396159996, 291.6197509632, 446.675048832], [16.3103637504, 349.48583984640004, 33.5017090048, 381.626220672], [17.7659301888, 350.8183593984, 32.056274432, 380.0748291072]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046090_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three pictures, a vase, and a cup.", "boxes_value": [[16.3103637504, 24.48583984640004, 512, 122.19042967040002], [478.2249755648, 86.86938475519997, 512, 121.23962401280005], [381.8040771584, 89.20874024960005, 414.2704467968, 122.19042967040002], [273.0675659264, 102.60754396159996, 291.6197509632, 121.67504883200002], [16.3103637504, 24.48583984640004, 33.5017090048, 56.62622067199999], [17.7659301888, 25.81835939839999, 32.056274432, 55.0748291072]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046094.jpg", "text": "What is taking place within the specified area in this capture ? Provide the coordinates for all objects that you mention.", "boxes_value": [[104.8806762496, 570.1141357076, 498.18621824, 680.168701136]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046094_crop.jpg", "text": "What is taking place within the specified area in this capture ? Provide the coordinates for all objects that you mention.", "boxes_value": [[98.8806762496, 28.114135707599985, 492.18621824, 138.16870113599998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046094.jpg", "text": "What is taking place within the specified area in this capture ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two high heels, and three leather shoes.", "boxes_value": [[104.8806762496, 570.1141357076, 498.18621824, 680.168701136], [360.2557983232, 592.481201198, 378.362487808, 630.2922363582001], [384.2205810688, 591.948730469, 406.587646464, 623.9016113432], [484.339904768, 570.1141357076, 498.18621824, 598.8718261364], [104.8806762496, 654.9669189786, 153.9094848512, 679.7104492194], [169.0305786368, 656.3415527046, 216.684753408, 680.168701136]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046094_crop.jpg", "text": "What is taking place within the specified area in this capture ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two high heels, and three leather shoes.", "boxes_value": [[98.8806762496, 28.114135707599985, 492.18621824, 138.16870113599998], [354.2557983232, 50.48120119800001, 372.362487808, 88.29223635820006], [378.2205810688, 49.948730469, 400.587646464, 81.90161134319999], [478.339904768, 28.114135707599985, 492.18621824, 56.871826136400045], [98.8806762496, 112.9669189786, 147.9094848512, 137.71044921939995], [163.0305786368, 114.34155270459996, 210.684753408, 138.16870113599998]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046097.jpg", "text": "Explain the content within the rectangular region of the image . Provide the coordinates for each element you describe.", "boxes_value": [[104.5106201088, 647.8922118905, 442.9965820416, 755.9096679331001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046097_crop.jpg", "text": "Explain the content within the rectangular region of the image . Provide the coordinates for each element you describe.", "boxes_value": [[85.5106201088, 27.892211890499993, 423.9965820416, 135.90966793310008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046097.jpg", "text": "Explain the content within the rectangular region of the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, three cars, a van, and a stroller.", "boxes_value": [[104.5106201088, 647.8922118905, 442.9965820416, 755.9096679331001], [417.6005859328, 647.8922118905, 442.9965820416, 755.9096679331001], [189.6468506112, 652.2963867003, 221.8465575936, 671.442138689], [238.8166503936, 655.3422851914, 260.5731811328, 671.2246094047], [132.7855224832, 651.1679687431, 188.7308959744, 673.359619145], [76.0942382592, 647.0653075971, 132.9719848448, 676.7163085667], [104.5106201088, 665.853149438, 126.172485376, 693.704101553]], "boxes_seq": [[0], [0], [1], [2, 3, 5], [4], [6]]}, {"image_path": "objects365_v1_00046097_crop.jpg", "text": "Explain the content within the rectangular region of the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, three cars, a van, and a stroller.", "boxes_value": [[85.5106201088, 27.892211890499993, 423.9965820416, 135.90966793310008], [398.6005859328, 27.892211890499993, 423.9965820416, 135.90966793310008], [170.6468506112, 32.29638670029999, 202.8465575936, 51.44213868899999], [219.8166503936, 35.34228519140004, 241.57318113280002, 51.224609404700004], [113.7855224832, 31.167968743100005, 169.7308959744, 53.35961914500001], [57.0942382592, 27.065307597099945, 113.9719848448, 56.716308566700036], [85.5106201088, 45.853149437999946, 107.172485376, 73.70410155299999]], "boxes_seq": [[0], [0], [1], [2, 3, 5], [4], [6]]}, {"image_path": "objects365_v1_00046098.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Please point out the objects and their coordinates.", "boxes_value": [[152.6176147456, 496.2822265309, 275.5180053504, 683.1525878688999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046098_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Please point out the objects and their coordinates.", "boxes_value": [[31.617614745600008, 47.282226530900004, 154.51800535040002, 234]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046098.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a storage box, a flower, two people, and a trash bin can.", "boxes_value": [[152.6176147456, 496.2822265309, 275.5180053504, 683.1525878688999], [206.2796020736, 608.1657715108, 225.1284179456, 637.9414062838999], [152.6176147456, 496.2822265309, 191.271789568, 537.3231201472], [263.9753417728, 579.3048095864, 282.4399414272, 643.5048828434], [219.0921020416, 583.8499755602, 246.3629150208, 677.0252685821999], [251.672790528, 624.4857177428, 275.5180053504, 683.1525878688999]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046098_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a storage box, a flower, two people, and a trash bin can.", "boxes_value": [[31.617614745600008, 47.282226530900004, 154.51800535040002, 234], [85.27960207359999, 159.16577151080003, 104.1284179456, 188.94140628389994], [31.617614745600008, 47.282226530900004, 70.271789568, 88.32312014720003], [142.9753417728, 130.30480958639998, 161.43994142719998, 194.50488284339997], [98.09210204159999, 134.84997556020005, 125.36291502079999, 228.02526858219994], [130.672790528, 175.48571774280003, 154.51800535040002, 234]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046099.jpg", "text": "Please describe the area in the image for me. Please point out the objects and their coordinates.", "boxes_value": [[147.431457536, 35.682434064, 640.0091552639999, 207.80603025599999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046099_crop.jpg", "text": "Please describe the area in the image for me. Please point out the objects and their coordinates.", "boxes_value": [[123.43145753600001, 35.682434064, 616, 207.80603025599999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046099.jpg", "text": "Please describe the area in the image for me. Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, a van, and three street lights.", "boxes_value": [[147.431457536, 35.682434064, 640.0091552639999, 207.80603025599999], [147.431457536, 179.14166260800002, 158.557739264, 207.80603025599999], [584.8592529279999, 159.637878432, 640.0091552639999, 184.489379904], [281.969238272, 70.88317872, 341.42864992, 178.824829104], [389.301025408, 63.26019288, 428.025878912, 169.372314432], [597.517211904, 35.682434064, 621.375366208, 191.26794432]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046099_crop.jpg", "text": "Please describe the area in the image for me. Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, a van, and three street lights.", "boxes_value": [[123.43145753600001, 35.682434064, 616, 207.80603025599999], [123.43145753600001, 179.14166260800002, 134.557739264, 207.80603025599999], [560.8592529279999, 159.637878432, 616, 184.489379904], [257.969238272, 70.88317872, 317.42864992, 178.824829104], [365.301025408, 63.26019288, 404.025878912, 169.372314432], [573.517211904, 35.682434064, 597.375366208, 191.26794432]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046101.jpg", "text": "I would like a description of the content within the bbox in . Provide the coordinates for all objects that you mention.", "boxes_value": [[387.4210815488, 400.160888673, 451.0039672832, 570.9020996434]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046101_crop.jpg", "text": "I would like a description of the content within the bbox in . Provide the coordinates for all objects that you mention.", "boxes_value": [[16.421081548799975, 43.16088867299999, 80.00396728319998, 213.9020996434]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046101.jpg", "text": "I would like a description of the content within the bbox in . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a cabinet, a bottle, two cups, and a plate.", "boxes_value": [[387.4210815488, 400.160888673, 451.0039672832, 570.9020996434], [0, 94.06219484600001, 512.9326171648, 723.1433105732], [387.4210815488, 483.96765133400004, 410.6220703232, 502.4166259913], [389.8471679488, 544.2142333893, 416.784484864, 570.9020996434], [409.5087890432, 527.2896728385, 444.455383296, 559.3240966983], [431.0153808384, 400.160888673, 451.0039672832, 430.9433593953]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4]]}, {"image_path": "objects365_v1_00046101_crop.jpg", "text": "I would like a description of the content within the bbox in . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a cabinet, a bottle, two cups, and a plate.", "boxes_value": [[16.421081548799975, 43.16088867299999, 80.00396728319998, 213.9020996434], [0, 0, 95, 256], [16.421081548799975, 126.96765133400004, 39.62207032319998, 145.4166259913], [18.847167948800006, 187.21423338930003, 45.78448486399998, 213.9020996434], [38.50878904320001, 170.2896728385, 73.45538329599998, 202.32409669829997], [60.015380838400006, 43.16088867299999, 80.00396728319998, 73.94335939529998]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4]]}, {"image_path": "objects365_v1_00046102.jpg", "text": "Please provide details for the area marked as in this photographic . Give coordinates for the items you reference.", "boxes_value": [[243.1510009628, 306.8674926592, 514.3101806752001, 399.5277709824]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046102_crop.jpg", "text": "Please provide details for the area marked as in this photographic . Give coordinates for the items you reference.", "boxes_value": [[68.1510009628, 23.867492659200025, 339.31018067520006, 116.5277709824]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046102.jpg", "text": "Please provide details for the area marked as in this photographic . Give coordinates for the items you reference. For your reference, objects involved in this region include two people, and four sneakers.", "boxes_value": [[243.1510009628, 306.8674926592, 514.3101806752001, 399.5277709824], [273.5460204932, 69.9356079104, 514.2666015326, 398.9375610368], [242.7501830922, 38.6265259008, 413.6669921832, 371.2213134848], [243.1510009628, 342.680969216, 280.1013793802, 369.3989868032], [273.84826660799996, 348.9341430784, 330.1265869356, 391.000732416], [351.15985106159997, 306.8674926592, 371.0562744406, 363.7142944256], [484.18139649880004, 349.5025634816, 514.3101806752001, 399.5277709824]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046102_crop.jpg", "text": "Please provide details for the area marked as in this photographic . Give coordinates for the items you reference. For your reference, objects involved in this region include two people, and four sneakers.", "boxes_value": [[68.1510009628, 23.867492659200025, 339.31018067520006, 116.5277709824], [98.54602049319999, 0, 339.2666015326, 115.93756103679999], [67.75018309219999, 0, 238.66699218320002, 88.22131348480002], [68.1510009628, 59.680969215999994, 105.1013793802, 86.39898680319999], [98.84826660799996, 65.93414307839998, 155.12658693560002, 108.000732416], [176.15985106159997, 23.867492659200025, 196.05627444060002, 80.71429442559997], [309.18139649880004, 66.50256348160002, 339.31018067520006, 116.5277709824]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046103.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Give coordinates for the items you reference.", "boxes_value": [[6.647155776, 155.343200688, 376.790710464, 355.19342040000004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046103_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Give coordinates for the items you reference.", "boxes_value": [[6.647155776, 50.343200687999996, 376.790710464, 250.19342040000004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046103.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include a picture, two lamps, and three people.", "boxes_value": [[6.647155776, 155.343200688, 376.790710464, 355.19342040000004], [329.61645510399995, 223.069458, 376.790710464, 293.830749504], [120.645141632, 198.53540040000001, 140.943664576, 224.67736814399998], [6.647155776, 155.343200688, 25.097167999999996, 191.648132304], [336.352539072, 284.8631592, 359.197082496, 355.19342040000004], [319.524291968, 289.630065936, 334.98797606399995, 354.12481689599997], [271.247558592, 276.617980944, 292.368652352, 292.081604016]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046103_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include a picture, two lamps, and three people.", "boxes_value": [[6.647155776, 50.343200687999996, 376.790710464, 250.19342040000004], [329.61645510399995, 118.069458, 376.790710464, 188.83074950399998], [120.645141632, 93.53540040000001, 140.943664576, 119.67736814399998], [6.647155776, 50.343200687999996, 25.097167999999996, 86.648132304], [336.352539072, 179.86315919999998, 359.197082496, 250.19342040000004], [319.524291968, 184.630065936, 334.98797606399995, 249.12481689599997], [271.247558592, 171.617980944, 292.368652352, 187.08160401599997]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046104.jpg", "text": "Can you discuss the entities within the region of image ? Please mention the objects and their locations.", "boxes_value": [[83.111450191, 158.4244384768, 294.506225552, 198.4086303744]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046104_crop.jpg", "text": "Can you discuss the entities within the region of image ? Please mention the objects and their locations.", "boxes_value": [[53.111450191, 10.424438476799992, 264.506225552, 50.40863037439999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046104.jpg", "text": "Can you discuss the entities within the region of image ? Please mention the objects and their locations. For your reference, objects involved in this region include a mirror, and four lamps.", "boxes_value": [[83.111450191, 158.4244384768, 294.506225552, 198.4086303744], [145.120910627, 84.5565185536, 253.32775876099998, 415.925353984], [129.75054931300002, 158.4244384768, 179.862121593, 198.4086303744], [83.111450191, 166.1523437568, 113.14562990499999, 193.3708495872], [217.537109405, 166.1433715712, 245.324707012, 188.0291137536], [257.37414549, 161.9628906496, 294.506225552, 192.7013549568]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046104_crop.jpg", "text": "Can you discuss the entities within the region of image ? Please mention the objects and their locations. For your reference, objects involved in this region include a mirror, and four lamps.", "boxes_value": [[53.111450191, 10.424438476799992, 264.506225552, 50.40863037439999], [115.120910627, 0, 223.32775876099998, 60], [99.75054931300002, 10.424438476799992, 149.862121593, 50.40863037439999], [53.111450191, 18.152343756800008, 83.14562990499999, 45.37084958720001], [187.537109405, 18.143371571199992, 215.324707012, 40.0291137536], [227.37414549, 13.962890649600013, 264.506225552, 44.7013549568]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046105.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Provide the coordinates for each element you describe.", "boxes_value": [[116.00616457950001, 272.36834715000003, 154.6432495389, 296.34320069999995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046105_crop.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Provide the coordinates for each element you describe.", "boxes_value": [[10.006164579500009, 6.368347150000034, 48.643249538899994, 30.343200699999954]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046105.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a potted plant, a picture, a couch, a desk, and a vase.", "boxes_value": [[116.00616457950001, 272.36834715000003, 154.6432495389, 296.34320069999995], [125.8259887851, 229.01324465, 189.7282714527, 287.34454345], [134.90075683950002, 276.11920165, 154.6432495389, 296.34320069999995], [10.5278320038, 260.4906616, 210.4979248257, 325.49645995], [43.9593505548, 284.32611084999996, 234.02380367670003, 333.2352295], [116.00616457950001, 272.36834715000003, 148.7845459152, 292.82403565]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046105_crop.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a potted plant, a picture, a couch, a desk, and a vase.", "boxes_value": [[10.006164579500009, 6.368347150000034, 48.643249538899994, 30.343200699999954], [19.825988785099995, 0, 58, 21.344543450000003], [28.90075683950002, 10.11920164999998, 48.643249538899994, 30.343200699999954], [0, 0, 58, 36], [0, 18.326110849999964, 58, 36], [10.006164579500009, 6.368347150000034, 42.7845459152, 26.824035649999985]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046106.jpg", "text": "Please explain what is contained in the portion of defined by the box . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[460.26696780559996, 237.038451968, 605.4813232685, 441.985045248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046106_crop.jpg", "text": "Please explain what is contained in the portion of defined by the box . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[37.26696780559996, 52.038451968000004, 182.48132326849998, 256.985045248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046106.jpg", "text": "Please explain what is contained in the portion of defined by the box . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a chair, two desks, a glasses, and a sneakers.", "boxes_value": [[460.26696780559996, 237.038451968, 605.4813232685, 441.985045248], [473.22033689349996, 334.30548096, 494.354858381, 404.5264892416], [460.26696780559996, 286.58239744, 605.4813232685, 333.6237182464], [545.4866943124, 350.6676635648, 598.6638183437, 412.7075805696], [475.0443219971, 237.038451968, 489.14347665409997, 265.2367612416], [460.3966262034, 394.9310377984, 483.5035048459, 441.985045248]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046106_crop.jpg", "text": "Please explain what is contained in the portion of defined by the box . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a chair, two desks, a glasses, and a sneakers.", "boxes_value": [[37.26696780559996, 52.038451968000004, 182.48132326849998, 256.985045248], [50.22033689349996, 149.30548096, 71.35485838099999, 219.52648924160002], [37.26696780559996, 101.58239744000002, 182.48132326849998, 148.62371824640002], [122.4866943124, 165.66766356480002, 175.66381834369997, 227.7075805696], [52.044321997099985, 52.038451968000004, 66.14347665409997, 80.2367612416], [37.39662620339999, 209.93103779839998, 60.50350484590001, 256.985045248]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046107.jpg", "text": "I'd like some information about the bounding box in the photo . Include the coordinates for each mentioned object.", "boxes_value": [[194.03027342340002, 296.8375244288, 556.7221679354, 408.3255004672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046107_crop.jpg", "text": "I'd like some information about the bounding box in the photo . Include the coordinates for each mentioned object.", "boxes_value": [[91.03027342340002, 28.83752442880001, 453.72216793539997, 140.32550046720002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046107.jpg", "text": "I'd like some information about the bounding box in the photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two cabinets, three people, and a handbag.", "boxes_value": [[194.03027342340002, 296.8375244288, 556.7221679354, 408.3255004672], [432.3898925839, 300.4449462784, 501.83178710199996, 357.2610473472], [198.8124389629, 296.8375244288, 279.07647701850004, 356.3591919104], [234.98315429369998, 337.4835815424, 261.7823486259, 455.8872680448], [499.8178711045, 297.1903076352, 541.3442382522, 372.7252197376], [541.0704345485, 308.7678833152, 556.7221679354, 336.2383422976], [194.03027342340002, 374.7046508544, 215.7770385697, 408.3255004672]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00046107_crop.jpg", "text": "I'd like some information about the bounding box in the photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two cabinets, three people, and a handbag.", "boxes_value": [[91.03027342340002, 28.83752442880001, 453.72216793539997, 140.32550046720002], [329.3898925839, 32.44494627839998, 398.83178710199996, 89.26104734720002], [95.81243896289999, 28.83752442880001, 176.07647701850004, 88.3591919104], [131.98315429369998, 69.48358154239997, 158.7823486259, 168], [396.8178711045, 29.190307635199986, 438.34423825219994, 104.72521973760001], [438.0704345485, 40.76788331519998, 453.72216793539997, 68.23834229760001], [91.03027342340002, 106.70465085439997, 112.77703856970001, 140.32550046720002]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00046108.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each object you identify.", "boxes_value": [[161.1057129113, 224.2832031232, 269.8178710825, 428.7197876224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046108_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each object you identify.", "boxes_value": [[28.105712911300003, 51.283203123199996, 136.8178710825, 255.71978762240002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046108.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a bench, two people, two hats, and a backpack.", "boxes_value": [[161.1057129113, 224.2832031232, 269.8178710825, 428.7197876224], [84.11065676380001, 327.4701538304, 266.8759765767, 411.9559936512], [161.1057129113, 224.2832031232, 269.8178710825, 428.7197876224], [216.5956420727, 250.3273925632, 268.958862324, 397.521728512], [222.7224773173, 253.7011372032, 250.56845145100002, 265.4257579008], [186.0830375958, 224.756024576, 213.92901172950002, 244.9076716544], [161.0553092871, 253.4613725184, 194.4075679508, 316.0819397632]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046108_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a bench, two people, two hats, and a backpack.", "boxes_value": [[28.105712911300003, 51.283203123199996, 136.8178710825, 255.71978762240002], [0, 154.47015383040002, 133.8759765767, 238.9559936512], [28.105712911300003, 51.283203123199996, 136.8178710825, 255.71978762240002], [83.5956420727, 77.3273925632, 135.958862324, 224.52172851199998], [89.72247731729999, 80.7011372032, 117.56845145100002, 92.42575790080002], [53.08303759579999, 51.75602457599999, 80.92901172950002, 71.90767165439999], [28.055309287099988, 80.4613725184, 61.4075679508, 143.0819397632]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046112.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Provide the coordinates for each element you describe.", "boxes_value": [[270.78686524340003, 320.524780288, 678.4289550878999, 418.778930688]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046112_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Provide the coordinates for each element you describe.", "boxes_value": [[102.78686524340003, 25.524780287999988, 510.4289550878999, 123.778930688]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046112.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a potted plant, two vans, and two traffic cones.", "boxes_value": [[270.78686524340003, 320.524780288, 678.4289550878999, 418.778930688], [270.78686524340003, 376.5220947456, 289.4212646162, 409.4700317184], [471.2504882609, 320.524780288, 665.5222168005, 405.2805786112], [655.9356689609, 326.8042602496, 678.4289550878999, 381.9000244224], [396.9018554918, 393.6080932864, 409.85388187, 417.557006848], [353.8914794675, 394.8299560448, 369.77600097379997, 418.778930688]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046112_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a potted plant, two vans, and two traffic cones.", "boxes_value": [[102.78686524340003, 25.524780287999988, 510.4289550878999, 123.778930688], [102.78686524340003, 81.52209474559999, 121.4212646162, 114.47003171839998], [303.2504882609, 25.524780287999988, 497.5222168005, 110.28057861119999], [487.9356689609, 31.80426024960002, 510.4289550878999, 86.90002442240001], [228.9018554918, 98.60809328639999, 241.85388187, 122.55700684800001], [185.8914794675, 99.82995604479999, 201.77600097379997, 123.778930688]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046113.jpg", "text": "Can you share some insights about the rectangular region in the image ? Specify the location of each mentioned object.", "boxes_value": [[114.910766592, 358.0471801856, 430.0068359168, 512.0482177536]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046113_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Specify the location of each mentioned object.", "boxes_value": [[78.910766592, 39.047180185599984, 394.0068359168, 193]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046113.jpg", "text": "Can you share some insights about the rectangular region in the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include five people.", "boxes_value": [[114.910766592, 358.0471801856, 430.0068359168, 512.0482177536], [145.5156860416, 433.0852050944, 284.4215088128, 512.0482177536], [114.910766592, 404.0319824384, 198.88305664, 438.4206542848], [307.6472167936, 358.0471801856, 339.6366577152, 398.0339355648], [123.7078857216, 388.4371337728, 212.8784790016, 426.8244628992], [374.4251708928, 366.0444946432, 430.0068359168, 415.2282714624]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046113_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include five people.", "boxes_value": [[78.910766592, 39.047180185599984, 394.0068359168, 193], [109.51568604159999, 114.08520509440001, 248.42150881280003, 193], [78.910766592, 85.03198243840001, 162.88305664, 119.42065428479998], [271.6472167936, 39.047180185599984, 303.6366577152, 79.0339355648], [87.7078857216, 69.4371337728, 176.8784790016, 107.8244628992], [338.4251708928, 47.04449464319998, 394.0068359168, 96.22827146240002]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046115.jpg", "text": "Please tell me about the area in the image . What does it contain? Remember to mention the objects and their corresponding locations.", "boxes_value": [[253.0004882944, 363.9759521334, 379.7087402496, 502.2585448992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046115_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Remember to mention the objects and their corresponding locations.", "boxes_value": [[32.000488294399986, 34.97595213340003, 158.7087402496, 173.2585448992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046115.jpg", "text": "Please tell me about the area in the image . What does it contain? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, and five storage boxes.", "boxes_value": [[253.0004882944, 363.9759521334, 379.7087402496, 502.2585448992], [305.517639168, 301.9744872944, 476.233154304, 682.4777831859001], [302.5466918912, 401.93090818310003, 357.5077514752, 541.7025146446], [353.2435302912, 438.8874511446, 388.7786865152, 521.3291015835], [260.919799808, 387.1245117388, 318.7913207808, 502.2585448992], [253.0004882944, 363.9759521334, 337.0665893376, 416.3649902628], [334.0206908928, 386.5153808525, 379.7087402496, 427.330078119]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046115_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, and five storage boxes.", "boxes_value": [[32.000488294399986, 34.97595213340003, 158.7087402496, 173.2585448992], [84.51763916800002, 0, 190, 207], [81.5466918912, 72.93090818310003, 136.5077514752, 207], [132.2435302912, 109.88745114459999, 167.77868651519998, 192.3291015835], [39.91979980799999, 58.12451173879998, 97.79132078079999, 173.2585448992], [32.000488294399986, 34.97595213340003, 116.06658933760002, 87.3649902628], [113.02069089280002, 57.515380852500016, 158.7087402496, 98.330078119]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046117.jpg", "text": "What does the area look like in the context of the image ? Please point out the objects and their coordinates.", "boxes_value": [[0.3483276095, 139.701232896, 263.1195068336, 501.5869750784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046117_crop.jpg", "text": "What does the area look like in the context of the image ? Please point out the objects and their coordinates.", "boxes_value": [[0.3483276095, 90.701232896, 263.1195068336, 452.5869750784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046117.jpg", "text": "What does the area look like in the context of the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a cabinet, a desk, a stool, a person, a hat, a mask, two scissors, a blackboard, and a moniter.", "boxes_value": [[0.3483276095, 139.701232896, 263.1195068336, 501.5869750784], [70.5959472558, 246.9389648384, 215.4954834307, 441.1659546112], [0, 429.342956544, 293.99420164320003, 510.601196288], [15.1910400517, 335.6668701184, 89.541076659, 441.0407104512], [155.94238282979998, 203.0274048, 365.08105467089996, 452.4912109568], [193.95232775589997, 201.9474097664, 274.775493175, 254.3839024128], [244.3290405265, 247.3678588928, 275.1697524662, 280.4050190848], [0.3483276095, 485.6873168896, 25.453002903100003, 501.5869750784], [0.35473633509999997, 152.1531982336, 66.8575439555, 247.3621215744], [70.5755004897, 470.9454345728, 112.5908203025, 492.5026855424], [149.9960937176, 139.701232896, 263.1195068336, 225.2597656064]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6], [7, 9], [8], [10]]}, {"image_path": "objects365_v1_00046117_crop.jpg", "text": "What does the area look like in the context of the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a cabinet, a desk, a stool, a person, a hat, a mask, two scissors, a blackboard, and a moniter.", "boxes_value": [[0.3483276095, 90.701232896, 263.1195068336, 452.5869750784], [70.5959472558, 197.9389648384, 215.4954834307, 392.1659546112], [0, 380.342956544, 293.99420164320003, 461.601196288], [15.1910400517, 286.6668701184, 89.541076659, 392.0407104512], [155.94238282979998, 154.0274048, 328, 403.4912109568], [193.95232775589997, 152.9474097664, 274.775493175, 205.3839024128], [244.3290405265, 198.3678588928, 275.1697524662, 231.40501908480002], [0.3483276095, 436.6873168896, 25.453002903100003, 452.5869750784], [0.35473633509999997, 103.1531982336, 66.8575439555, 198.3621215744], [70.5755004897, 421.9454345728, 112.5908203025, 443.5026855424], [149.9960937176, 90.701232896, 263.1195068336, 176.2597656064]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6], [7, 9], [8], [10]]}, {"image_path": "objects365_v1_00046119.jpg", "text": "What does the area look like in the context of the image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[11.8071288832, 359.1268920684, 212.627868672, 494.903686518]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046119_crop.jpg", "text": "What does the area look like in the context of the image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[11.8071288832, 34.126892068400025, 212.627868672, 169.90368651799997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046119.jpg", "text": "What does the area look like in the context of the image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a flower, a vase, a scissors, two cups, and a laptop.", "boxes_value": [[11.8071288832, 359.1268920684, 212.627868672, 494.903686518], [11.8071288832, 359.1268920684, 75.59899904, 478.06091307990005], [36.675109888, 435.89343260519996, 66.9492187648, 484.5482177634], [31.0423583744, 436.8634032942, 51.1154174976, 462.7073974509], [14.7677001728, 460.93237302690005, 39.5576172032, 494.903686518], [182.7881469952, 448.5883788936, 212.627868672, 475.2145995966], [89.5021362176, 409.62377926979997, 214.3506469888, 499.6712646234]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046119_crop.jpg", "text": "What does the area look like in the context of the image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a flower, a vase, a scissors, two cups, and a laptop.", "boxes_value": [[11.8071288832, 34.126892068400025, 212.627868672, 169.90368651799997], [11.8071288832, 34.126892068400025, 75.59899904, 153.06091307990005], [36.675109888, 110.89343260519996, 66.9492187648, 159.54821776339998], [31.0423583744, 111.8634032942, 51.1154174976, 137.70739745089998], [14.7677001728, 135.93237302690005, 39.5576172032, 169.90368651799997], [182.7881469952, 123.58837889360001, 212.627868672, 150.2145995966], [89.5021362176, 84.62377926979997, 214.3506469888, 174.67126462340002]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046121.jpg", "text": "Can you provide a description of the area in the image ? Please point out the objects and their coordinates.", "boxes_value": [[398.053222656, 340.4776611328, 690.4477539072, 471.303283712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046121_crop.jpg", "text": "Can you provide a description of the area in the image ? Please point out the objects and their coordinates.", "boxes_value": [[74.053222656, 33.477661132799994, 366.44775390719997, 164.303283712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046121.jpg", "text": "Can you provide a description of the area in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two boots, and three sneakers.", "boxes_value": [[398.053222656, 340.4776611328, 690.4477539072, 471.303283712], [398.053222656, 435.427673344, 430.3411865088, 471.303283712], [462.4403076096, 435.2388916224, 484.72094730239996, 468.4709472768], [505.8472900608, 328.4135741952, 556.3876952832, 357.0599975424], [533.6563720704, 340.4776611328, 585.3702392832, 364.9815673856], [635.2349853696, 439.7491454976, 690.4477539072, 455.0490112512]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046121_crop.jpg", "text": "Can you provide a description of the area in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two boots, and three sneakers.", "boxes_value": [[74.053222656, 33.477661132799994, 366.44775390719997, 164.303283712], [74.053222656, 128.42767334400003, 106.34118650879998, 164.303283712], [138.44030760959998, 128.2388916224, 160.72094730239996, 161.47094727680002], [181.8472900608, 21.4135741952, 232.38769528319995, 50.059997542400026], [209.65637207040004, 33.477661132799994, 261.3702392832, 57.98156738559999], [311.23498536960005, 132.7491454976, 366.44775390719997, 148.0490112512]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046122.jpg", "text": "Describe the selected rectangular area in the photo . Include the coordinates for each mentioned object.", "boxes_value": [[400.45007325399996, 219.9493408256, 518.936645535, 262.7346801664]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046122_crop.jpg", "text": "Describe the selected rectangular area in the photo . Include the coordinates for each mentioned object.", "boxes_value": [[30.45007325399996, 10.94934082559999, 148.93664553500003, 53.73468016639998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046122.jpg", "text": "Describe the selected rectangular area in the photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include three bottles, two chairs, and a desk.", "boxes_value": [[400.45007325399996, 219.9493408256, 518.936645535, 262.7346801664], [395.88659664399995, 223.8593750016, 409.27404787499995, 274.0624389632], [441.35180666499997, 222.9000854528, 452.171020532, 262.7346801664], [478.48156741, 219.9493408256, 488.809082069, 253.3907470848], [400.45007325399996, 228.546081536, 422.540771499, 246.954956032], [486.135253912, 230.21960448, 518.936645535, 246.620300288], [1.049072255, 240.9700927488, 561.362426779, 511.683288576]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046122_crop.jpg", "text": "Describe the selected rectangular area in the photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include three bottles, two chairs, and a desk.", "boxes_value": [[30.45007325399996, 10.94934082559999, 148.93664553500003, 53.73468016639998], [25.88659664399995, 14.8593750016, 39.27404787499995, 64], [71.35180666499997, 13.900085452799999, 82.171020532, 53.73468016639998], [108.48156741000003, 10.94934082559999, 118.809082069, 44.39074708480001], [30.45007325399996, 19.546081536000003, 52.540771499000016, 37.95495603200001], [116.135253912, 21.219604479999987, 148.93664553500003, 37.62030028800001], [0, 31.9700927488, 178, 64]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046126.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give coordinates for the items you reference.", "boxes_value": [[24.4266967779, 185.7838745088, 623.0233154346, 473.14434816]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046126_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give coordinates for the items you reference.", "boxes_value": [[24.4266967779, 72.78387450880001, 623.0233154346, 360.14434816]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046126.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give coordinates for the items you reference. For your reference, objects involved in this region include a bench, a chair, and nine pillows.", "boxes_value": [[24.4266967779, 185.7838745088, 623.0233154346, 473.14434816], [24.4266967779, 185.7838745088, 623.0233154346, 473.14434816], [0.1856079137, 383.367004416, 130.64898685, 511.6377563648], [232.95312502849998, 160.093750016, 396.89831543450003, 221.8885497856], [403.83447269019996, 216.8440551936, 601.1993408265, 323.4084472832], [469.4125976871, 178.380004864, 633.9884032908, 339.1724242944], [347.08422849, 216.8440551936, 458.0624999989, 332.2362671104], [249.3475952346, 228.194091776, 381.7648925798, 334.1279907328], [171.15832519810002, 225.0413208064, 256.9143066278, 336.0196533248], [133.95538329320001, 229.4552002048, 230.4308471451, 322.7778930688], [0.9075317422, 181.5327758848, 178.7250365913, 354.3058471424], [232.54632565240001, 147.1002197504, 372.5303955193, 234.958007808]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5, 6, 7, 8, 9, 10, 11]]}, {"image_path": "objects365_v1_00046126_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give coordinates for the items you reference. For your reference, objects involved in this region include a bench, a chair, and nine pillows.", "boxes_value": [[24.4266967779, 72.78387450880001, 623.0233154346, 360.14434816], [24.4266967779, 72.78387450880001, 623.0233154346, 360.14434816], [0.1856079137, 270.367004416, 130.64898685, 398.6377563648], [232.95312502849998, 47.093750016, 396.89831543450003, 108.88854978559999], [403.83447269019996, 103.84405519360001, 601.1993408265, 210.40844728320002], [469.4125976871, 65.380004864, 633.9884032908, 226.1724242944], [347.08422849, 103.84405519360001, 458.0624999989, 219.23626711039998], [249.3475952346, 115.194091776, 381.7648925798, 221.12799073280001], [171.15832519810002, 112.0413208064, 256.9143066278, 223.0196533248], [133.95538329320001, 116.45520020480001, 230.4308471451, 209.77789306879998], [0.9075317422, 68.5327758848, 178.7250365913, 241.3058471424], [232.54632565240001, 34.10021975039999, 372.5303955193, 121.95800780799999]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5, 6, 7, 8, 9, 10, 11]]}, {"image_path": "objects365_v1_00046128.jpg", "text": "Describe the bbox in the provided photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[508.4957275034, 332.475219712, 764.5588378831, 463.8376464896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046128_crop.jpg", "text": "Describe the bbox in the provided photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[64.49572750340002, 33.47521971200001, 320.55883788309995, 164.8376464896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046128.jpg", "text": "Describe the bbox in the provided photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four chairs, and a desk.", "boxes_value": [[508.4957275034, 332.475219712, 764.5588378831, 463.8376464896], [517.0529785304, 339.7554931712, 612.3706054595999, 510.6504516608], [646.2113036989999, 384.8762817536, 713.3283691130999, 508.958435072], [687.3839110996, 349.9076537856, 764.0893554957, 463.8376464896], [488.7713622961, 322.6130981376, 525.2612304446, 450.327636736], [508.4957275034, 332.475219712, 764.5588378831, 426.746582016]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046128_crop.jpg", "text": "Describe the bbox in the provided photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four chairs, and a desk.", "boxes_value": [[64.49572750340002, 33.47521971200001, 320.55883788309995, 164.8376464896], [73.05297853039997, 40.75549317119999, 168.37060545959991, 197], [202.21130369899993, 85.87628175359998, 269.32836911309994, 197], [243.38391109960003, 50.90765378560002, 320.08935549570003, 164.8376464896], [44.771362296099994, 23.61309813759999, 81.26123044459996, 151.327636736], [64.49572750340002, 33.47521971200001, 320.55883788309995, 127.74658201599999]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046129.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Include the coordinates for each object you identify.", "boxes_value": [[519.1597900608, 161.5716552744, 672.767700192, 422.03314208160003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046129_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Include the coordinates for each object you identify.", "boxes_value": [[39.15979006079999, 65.5716552744, 192, 326.03314208160003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046129.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, and three cars.", "boxes_value": [[519.1597900608, 161.5716552744, 672.767700192, 422.03314208160003], [565.773681648, 288.9451904472, 667.6326904128, 422.03314208160003], [639.6141357312, 279.6057129096, 672.0106200864, 347.025268548], [480.13513181760004, 150.77758787279998, 579.7727050752001, 189.8023071168], [519.1597900608, 176.51727293759998, 671.1070556736, 233.808898932], [650.3492431968, 161.5716552744, 672.767700192, 197.2751464656]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046129_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, and three cars.", "boxes_value": [[39.15979006079999, 65.5716552744, 192, 326.03314208160003], [85.77368164799998, 192.9451904472, 187.6326904128, 326.03314208160003], [159.6141357312, 183.60571290960002, 192, 251.02526854799999], [0.13513181760004045, 54.777587872799984, 99.77270507520007, 93.8023071168], [39.15979006079999, 80.51727293759998, 191.10705567360003, 137.808898932], [170.3492431968, 65.5716552744, 192, 101.27514646559999]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046134.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Provide the coordinates for all objects that you mention.", "boxes_value": [[167.4701538304, 237.7459106716, 414.1610717696, 464.7470703115]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046134_crop.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Provide the coordinates for all objects that you mention.", "boxes_value": [[62.470153830399994, 57.7459106716, 309.1610717696, 284.7470703115]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046134.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a candle, and four people.", "boxes_value": [[167.4701538304, 237.7459106716, 414.1610717696, 464.7470703115], [404.951599104, 399.5850830082, 414.1610717696, 435.65563966739995], [236.9696045056, 317.777221668, 268.927673344, 373.6408691688], [298.3004760576, 315.5386352793, 360.2850952192, 444.3330078224], [175.4447631872, 359.33605959190004, 257.166809088, 464.7470703115], [167.4701538304, 237.7459106716, 239.7358398464, 275.3386841153]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046134_crop.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a candle, and four people.", "boxes_value": [[62.470153830399994, 57.7459106716, 309.1610717696, 284.7470703115], [299.951599104, 219.5850830082, 309.1610717696, 255.65563966739995], [131.9696045056, 137.77722166799998, 163.92767334400003, 193.6408691688], [193.3004760576, 135.53863527930002, 255.2850952192, 264.3330078224], [70.44476318720001, 179.33605959190004, 152.16680908799998, 284.7470703115], [62.470153830399994, 57.7459106716, 134.7358398464, 95.3386841153]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046137.jpg", "text": "In the provided image , would you mind describing the selected area ? Provide the coordinates for each element you describe.", "boxes_value": [[380.7666015744, 0.3262329344, 511.7826537984, 240.9984741376]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046137_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Provide the coordinates for each element you describe.", "boxes_value": [[32.7666015744, 0.3262329344, 163.78265379840002, 240.9984741376]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046137.jpg", "text": "In the provided image , would you mind describing the selected area ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a cabinet, a lamp, two chairs, and a flower.", "boxes_value": [[380.7666015744, 0.3262329344, 511.7826537984, 240.9984741376], [328.0715332096, 78.0651245056, 429.3571777536, 197.8223266816], [380.7666015744, 0.3262329344, 511.7826537984, 126.5924682752], [463.4893798912, 212.8132934656, 479.603393536, 246.5906372096], [469.9927978496, 211.7504882688, 484.186645504, 240.9984741376], [495.7352294912, 221.1966552576, 511.5556640768, 241.4946289152]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046137_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a cabinet, a lamp, two chairs, and a flower.", "boxes_value": [[32.7666015744, 0.3262329344, 163.78265379840002, 240.9984741376], [0, 78.0651245056, 81.35717775360001, 197.8223266816], [32.7666015744, 0.3262329344, 163.78265379840002, 126.5924682752], [115.48937989119997, 212.8132934656, 131.603393536, 246.5906372096], [121.99279784959998, 211.7504882688, 136.186645504, 240.9984741376], [147.73522949120002, 221.1966552576, 163.55566407679999, 241.4946289152]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046138.jpg", "text": "Can you analyze the content of the area within the photograph ? Give coordinates for the items you reference.", "boxes_value": [[263.8330688297, 233.9505005056, 554.7540283054, 495.9401245184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046138_crop.jpg", "text": "Can you analyze the content of the area within the photograph ? Give coordinates for the items you reference.", "boxes_value": [[72.83306882969998, 65.95050050559999, 363.75402830539997, 327.9401245184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046138.jpg", "text": "Can you analyze the content of the area within the photograph ? Give coordinates for the items you reference. For your reference, objects involved in this region include two cabinets, a carpet, a chair, and a moniter.", "boxes_value": [[263.8330688297, 233.9505005056, 554.7540283054, 495.9401245184], [363.74938962730005, 246.7199096832, 554.7540283054, 495.9401245184], [204.8058471523, 449.373718272, 354.9664306968, 512.3326415872], [353.6739501828, 391.3970947072, 681.7937011773, 511.9179687424], [246.40405271810002, 318.2011108352, 366.92492675380004, 438.0910034432], [263.8330688297, 233.9505005056, 366.85388184510003, 348.7360229376]], "boxes_seq": [[0], [0], [1, 4], [2], [3], [5]]}, {"image_path": "objects365_v1_00046138_crop.jpg", "text": "Can you analyze the content of the area within the photograph ? Give coordinates for the items you reference. For your reference, objects involved in this region include two cabinets, a carpet, a chair, and a moniter.", "boxes_value": [[72.83306882969998, 65.95050050559999, 363.75402830539997, 327.9401245184], [172.74938962730005, 78.7199096832, 363.75402830539997, 327.9401245184], [13.805847152300004, 281.373718272, 163.9664306968, 344], [162.6739501828, 223.39709470719998, 436, 343.9179687424], [55.40405271810002, 150.20111083519998, 175.92492675380004, 270.0910034432], [72.83306882969998, 65.95050050559999, 175.85388184510003, 180.7360229376]], "boxes_seq": [[0], [0], [1, 4], [2], [3], [5]]}, {"image_path": "objects365_v1_00046140.jpg", "text": "Please provide insights on the specified area within the graphic . Provide the coordinates for all objects that you mention.", "boxes_value": [[121.6732788052, 121.6173095936, 354.87377932360005, 512.4946289152]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046140_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Provide the coordinates for all objects that you mention.", "boxes_value": [[58.6732788052, 98.6173095936, 291.87377932360005, 489]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046140.jpg", "text": "Please provide insights on the specified area within the graphic . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two people, a watch, a hat, and a bottle.", "boxes_value": [[121.6732788052, 121.6173095936, 354.87377932360005, 512.4946289152], [172.536071812, 63.7869873152, 412.9151611648, 511.1010742272], [121.6732788052, 121.6173095936, 236.63720705120002, 512.4946289152], [335.4104004232, 350.4796752896, 354.87377932360005, 373.9700317184], [299.5316623636, 389.0017660928, 382.1557141372, 490.8064013312], [284.1750488628, 346.6434326016, 318.2777099808, 416.0889892352]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046140_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two people, a watch, a hat, and a bottle.", "boxes_value": [[58.6732788052, 98.6173095936, 291.87377932360005, 489], [109.53607181199999, 40.7869873152, 349.9151611648, 488.1010742272], [58.6732788052, 98.6173095936, 173.63720705120002, 489], [272.4104004232, 327.4796752896, 291.87377932360005, 350.9700317184], [236.5316623636, 366.0017660928, 319.1557141372, 467.8064013312], [221.1750488628, 323.6434326016, 255.2777099808, 393.0889892352]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046141.jpg", "text": "Describe the selected rectangular area in the photo . Include the coordinates for each object you identify.", "boxes_value": [[131.08953859599998, 313.577270528, 577.9309082256, 359.5382690304]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046141_crop.jpg", "text": "Describe the selected rectangular area in the photo . Include the coordinates for each object you identify.", "boxes_value": [[112.08953859599998, 11.577270527999985, 558.9309082256, 57.53826903039999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046141.jpg", "text": "Describe the selected rectangular area in the photo . Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a bed, two pillows, and two books.", "boxes_value": [[131.08953859599998, 313.577270528, 577.9309082256, 359.5382690304], [111.52661135919999, 321.0814819328, 238.11212161519998, 392.3411254784], [89.2028198216, 331.2374877696, 465.3553467112, 504.233459456], [246.5620117336, 332.5075073024, 341.476806628, 356.46661376], [333.490478492, 337.115051264, 435.16284181919997, 359.5382690304], [544.3408202879999, 325.0449828864, 577.9309082256, 339.2474975744], [131.08953859599998, 313.577270528, 180.70672609919998, 329.1225586176]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00046141_crop.jpg", "text": "Describe the selected rectangular area in the photo . Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a bed, two pillows, and two books.", "boxes_value": [[112.08953859599998, 11.577270527999985, 558.9309082256, 57.53826903039999], [92.52661135919999, 19.08148193279999, 219.11212161519998, 69], [70.2028198216, 29.237487769600023, 446.3553467112, 69], [227.5620117336, 30.507507302399972, 322.476806628, 54.46661375999997], [314.490478492, 35.11505126399999, 416.16284181919997, 57.53826903039999], [525.3408202879999, 23.04498288640002, 558.9309082256, 37.24749757439997], [112.08953859599998, 11.577270527999985, 161.70672609919998, 27.12255861760002]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00046143.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Please point out the objects and their coordinates.", "boxes_value": [[287.5666503354, 270.3992920064, 500.16552738629997, 396.200927744]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046143_crop.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Please point out the objects and their coordinates.", "boxes_value": [[53.56665033540003, 32.3992920064, 266.16552738629997, 158.200927744]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046143.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a bottle, a camera, two chairs, and a desk.", "boxes_value": [[287.5666503354, 270.3992920064, 500.16552738629997, 396.200927744], [461.55151373940004, 281.9935302656, 473.1513672267, 318.79351808], [456.9865723059, 374.6771240448, 476.9235840243, 396.200927744], [411.8515625862, 282.0920410112, 450.3764647977, 309.4503173632], [287.5666503354, 270.3992920064, 310.9001465736, 297.8505248768], [351.9572753766, 309.959594752, 500.16552738629997, 364.8988647424]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046143_crop.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a bottle, a camera, two chairs, and a desk.", "boxes_value": [[53.56665033540003, 32.3992920064, 266.16552738629997, 158.200927744], [227.55151373940004, 43.993530265599986, 239.15136722670002, 80.79351808000001], [222.98657230589998, 136.6771240448, 242.9235840243, 158.200927744], [177.85156258619998, 44.092041011200024, 216.3764647977, 71.45031736319999], [53.56665033540003, 32.3992920064, 76.90014657360001, 59.85052487680002], [117.95727537660002, 71.95959475199999, 266.16552738629997, 126.89886474240001]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046145.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[429.79748535330003, 217.6474609152, 572.2611083800999, 464.203552256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046145_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[35.79748535330003, 61.647460915200014, 178.26110838009993, 308.203552256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046145.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a cabinet, a desk, a bottle, a briefcase, and a recorder.", "boxes_value": [[429.79748535330003, 217.6474609152, 572.2611083800999, 464.203552256], [472.04821778129997, 90.697143552, 671.3037109214, 377.4517822464], [28.718383762, 279.8984374784, 593.2097167895, 472.4266967552], [429.79748535330003, 249.1784057856, 443.91943356440004, 307.488342272], [503.1925049166, 380.9150390784, 572.2611083800999, 464.203552256], [491.26586917040004, 217.6474609152, 520.1566161784, 237.5748291072]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046145_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a cabinet, a desk, a bottle, a briefcase, and a recorder.", "boxes_value": [[35.79748535330003, 61.647460915200014, 178.26110838009993, 308.203552256], [78.04821778129997, 0, 213, 221.4517822464], [0, 123.89843747840001, 199.20971678950002, 316.4266967552], [35.79748535330003, 93.1784057856, 49.91943356440004, 151.488342272], [109.1925049166, 224.9150390784, 178.26110838009993, 308.203552256], [97.26586917040004, 61.647460915200014, 126.15661617839999, 81.5748291072]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046146.jpg", "text": "Please tell me about the area in the image . What does it contain? Specify the location of each mentioned object.", "boxes_value": [[457.2812499708, 250.8057250816, 667.1435547113, 299.7069702144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046146_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Specify the location of each mentioned object.", "boxes_value": [[53.28124997079999, 12.805725081600002, 263.1435547113, 61.70697021439997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046146.jpg", "text": "Please tell me about the area in the image . What does it contain? Specify the location of each mentioned object. For your reference, objects involved in this region include a vase, three bowls, and a cup.", "boxes_value": [[457.2812499708, 250.8057250816, 667.1435547113, 299.7069702144], [636.113159192, 250.8057250816, 667.1435547113, 297.351135232], [497.0185546928, 268.8825683456, 545.0683594078, 286.4705810432], [496.47460939480004, 286.6519164928, 547.4255371355, 299.7069702144], [571.7446289026, 265.6297607168, 590.9030761442, 293.1562500096], [457.2812499708, 268.5748291072, 490.8591308545, 284.1814575104]], "boxes_seq": [[0], [0], [1], [2, 3, 5], [4]]}, {"image_path": "objects365_v1_00046146_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Specify the location of each mentioned object. For your reference, objects involved in this region include a vase, three bowls, and a cup.", "boxes_value": [[53.28124997079999, 12.805725081600002, 263.1435547113, 61.70697021439997], [232.113159192, 12.805725081600002, 263.1435547113, 59.35113523199999], [93.01855469280002, 30.88256834560002, 141.06835940780002, 48.47058104320001], [92.47460939480004, 48.65191649280001, 143.4255371355, 61.70697021439997], [167.74462890259997, 27.62976071679998, 186.90307614419999, 55.1562500096], [53.28124997079999, 30.574829107200003, 86.8591308545, 46.18145751039998]], "boxes_seq": [[0], [0], [1], [2, 3, 5], [4]]}, {"image_path": "objects365_v1_00046147.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Remember to mention the objects and their corresponding locations.", "boxes_value": [[462.07177737, 233.2373657088, 570.8768310665, 375.823852544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046147_crop.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Remember to mention the objects and their corresponding locations.", "boxes_value": [[28.071777370000007, 36.23736570880001, 136.87683106650002, 178.82385254399998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046147.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, a backpack, a bottle, and a cup.", "boxes_value": [[462.07177737, 233.2373657088, 570.8768310665, 375.823852544], [372.53833010799997, 124.981628416, 602.606811506, 419.0610351616], [224.107116695, 148.1740112384, 623.9437255975, 506.2644042752], [484.4233398165, 233.2373657088, 570.8768310665, 321.5703124992], [462.07177737, 290.6087035904, 475.5496826255, 331.042419456], [526.852661132, 341.9116821504, 546.8521728425, 375.823852544]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046147_crop.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, a backpack, a bottle, and a cup.", "boxes_value": [[28.071777370000007, 36.23736570880001, 136.87683106650002, 178.82385254399998], [0, 0, 164, 214], [0, 0, 164, 214], [50.423339816500004, 36.23736570880001, 136.87683106650002, 124.57031249919999], [28.071777370000007, 93.60870359040001, 41.5496826255, 134.042419456], [92.85266113199998, 144.9116821504, 112.85217284249995, 178.82385254399998]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046149.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Specify the location of each mentioned object.", "boxes_value": [[80.2109985295, 200.497802752, 245.8787841917, 367.4364013568]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046149_crop.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Specify the location of each mentioned object.", "boxes_value": [[42.2109985295, 42.49780275200001, 207.8787841917, 209.4364013568]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046149.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Specify the location of each mentioned object. For your reference, objects involved in this region include three paddles, and two helmets.", "boxes_value": [[80.2109985295, 200.497802752, 245.8787841917, 367.4364013568], [80.2109985295, 229.055114752, 245.8787841917, 367.4364013568], [98.23956296909999, 261.2953491456, 296.0595702846, 304.5800781312], [159.4378051919, 217.5233154048, 282.7953491142, 315.0155639808], [162.77288818149998, 200.497802752, 199.34594726959998, 235.8097533952], [215.1102294945, 189.462829568, 251.0527343768, 222.2524413952]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046149_crop.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Specify the location of each mentioned object. For your reference, objects involved in this region include three paddles, and two helmets.", "boxes_value": [[42.2109985295, 42.49780275200001, 207.8787841917, 209.4364013568], [42.2109985295, 71.05511475200001, 207.8787841917, 209.4364013568], [60.23956296909999, 103.29534914560003, 249, 146.58007813120003], [121.4378051919, 59.5233154048, 244.7953491142, 157.01556398079998], [124.77288818149998, 42.49780275200001, 161.34594726959998, 77.8097533952], [177.1102294945, 31.46282956799999, 213.0527343768, 64.25244139520001]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046150.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Provide the coordinates for all objects that you mention.", "boxes_value": [[144.7461547966, 159.6575927808, 277.33142089750004, 386.3817138688]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046150_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Provide the coordinates for all objects that you mention.", "boxes_value": [[33.746154796599996, 57.6575927808, 166.33142089750004, 284.3817138688]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046150.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four people, a hat, a handbag, and a backpack.", "boxes_value": [[144.7461547966, 159.6575927808, 277.33142089750004, 386.3817138688], [144.7461547966, 164.3094482432, 198.2079468077, 386.3817138688], [193.5080566093, 163.1344604672, 249.90734863589998, 325.2824707072], [242.4677734581, 159.6394653184, 285.19885254630003, 323.2262573056], [242.4677734581, 168.2720336896, 299.01092529659996, 369.4103393792], [207.236267068, 159.6575927808, 233.03265382170002, 177.6281127936], [251.46887207039998, 256.7798461952, 277.33142089750004, 281.607910144], [207.5411987016, 311.4519653376, 257.9501953113, 377.2846679552]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6], [7]]}, {"image_path": "objects365_v1_00046150_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four people, a hat, a handbag, and a backpack.", "boxes_value": [[33.746154796599996, 57.6575927808, 166.33142089750004, 284.3817138688], [33.746154796599996, 62.309448243199995, 87.20794680770001, 284.3817138688], [82.50805660930001, 61.1344604672, 138.90734863589998, 223.2824707072], [131.4677734581, 57.6394653184, 174.19885254630003, 221.22625730559997], [131.4677734581, 66.2720336896, 188.01092529659996, 267.4103393792], [96.23626706799999, 57.6575927808, 122.03265382170002, 75.6281127936], [140.46887207039998, 154.77984619519998, 166.33142089750004, 179.60791014400002], [96.5411987016, 209.45196533759997, 146.95019531129998, 275.2846679552]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6], [7]]}, {"image_path": "objects365_v1_00046151.jpg", "text": "Analyze and describe the region in the included photo . Give coordinates for the items you reference.", "boxes_value": [[473.8079834205, 172.6661377024, 692.153198262, 373.114318848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046151_crop.jpg", "text": "Analyze and describe the region in the included photo . Give coordinates for the items you reference.", "boxes_value": [[54.80798342050002, 50.66613770239999, 273.15319826200005, 251.11431884799998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046151.jpg", "text": "Analyze and describe the region in the included photo . Give coordinates for the items you reference. For your reference, objects involved in this region include a chair, and four desks.", "boxes_value": [[473.8079834205, 172.6661377024, 692.153198262, 373.114318848], [473.8079834205, 315.3892822016, 545.574218772, 373.114318848], [498.853027341, 226.1400756736, 692.153198262, 318.187805184], [582.5187988050001, 189.7216186368, 684.9251708595, 238.026550272], [483.49365232799994, 180.5437011968, 526.0019530875, 222.568969728], [493.1640625125, 172.6661377024, 553.850830044, 226.9647826944]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046151_crop.jpg", "text": "Analyze and describe the region in the included photo . Give coordinates for the items you reference. For your reference, objects involved in this region include a chair, and four desks.", "boxes_value": [[54.80798342050002, 50.66613770239999, 273.15319826200005, 251.11431884799998], [54.80798342050002, 193.3892822016, 126.574218772, 251.11431884799998], [79.85302734099997, 104.1400756736, 273.15319826200005, 196.187805184], [163.51879880500007, 67.7216186368, 265.9251708595, 116.02655027200001], [64.49365232799994, 58.54370119679999, 107.00195308750006, 100.56896972800001], [74.16406251249998, 50.66613770239999, 134.85083004399996, 104.96478269439999]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046152.jpg", "text": "In the image , please describe the bounding box . Please point out the objects and their coordinates.", "boxes_value": [[152.11096191200002, 211.60266112, 250.23632816, 258.7329101312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046152_crop.jpg", "text": "In the image , please describe the bounding box . Please point out the objects and their coordinates.", "boxes_value": [[25.110961912000022, 12.602661119999993, 123.23632816, 59.732910131200015]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046152.jpg", "text": "In the image , please describe the bounding box . Please point out the objects and their coordinates. For your reference, objects involved in this region include a desk, and four chairs.", "boxes_value": [[152.11096191200002, 211.60266112, 250.23632816, 258.7329101312], [201.89855959, 215.6594238464, 302.052001979, 267.9346313728], [175.83068848800002, 211.60266112, 208.912719743, 246.3598022656], [173.73681643499998, 220.3966674944, 201.375000008, 251.3848877056], [152.11096191200002, 212.4327392768, 172.21142575, 229.3027954176], [215.42675781900002, 223.527709952, 250.23632816, 258.7329101312]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046152_crop.jpg", "text": "In the image , please describe the bounding box . Please point out the objects and their coordinates. For your reference, objects involved in this region include a desk, and four chairs.", "boxes_value": [[25.110961912000022, 12.602661119999993, 123.23632816, 59.732910131200015], [74.89855958999999, 16.65942384639999, 147, 68.93463137280003], [48.83068848800002, 12.602661119999993, 81.912719743, 47.359802265599996], [46.73681643499998, 21.396667494399992, 74.375000008, 52.38488770559999], [25.110961912000022, 13.432739276799992, 45.21142574999999, 30.30279541760001], [88.42675781900002, 24.52770995200001, 123.23632816, 59.732910131200015]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046153.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each object you identify.", "boxes_value": [[363.4454345728, 122.9671630848, 523.9874267528, 207.2014160384]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046153_crop.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each object you identify.", "boxes_value": [[40.445434572800025, 21.967163084800006, 200.98742675280005, 106.2014160384]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046153.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two helmets, a gloves, and two hockey sticks.", "boxes_value": [[363.4454345728, 122.9671630848, 523.9874267528, 207.2014160384], [391.8687743966, 128.3342285312, 455.0463867329, 194.43670656], [460.0095214995, 122.9671630848, 523.9874267528, 168.6017456128], [363.4454345728, 159.567504896, 391.52441404340004, 207.2014160384], [414.7730712763, 56.1976318464, 473.6676025379, 231.09655761919998], [428.15808102989996, 95.4606933504, 566.4710693738, 511.2917480448]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046153_crop.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two helmets, a gloves, and two hockey sticks.", "boxes_value": [[40.445434572800025, 21.967163084800006, 200.98742675280005, 106.2014160384], [68.86877439659997, 27.33422853120001, 132.04638673289998, 93.43670656], [137.0095214995, 21.967163084800006, 200.98742675280005, 67.60174561279999], [40.445434572800025, 58.567504896, 68.52441404340004, 106.2014160384], [91.77307127630002, 0, 150.6676025379, 127], [105.15808102989996, 0, 241, 127]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046154.jpg", "text": "Help me understand what's happening in the selected bounding box within . Include the coordinates for each mentioned object.", "boxes_value": [[81.674682624, 361.88446042260006, 239.0777587712, 449.977783175]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046154_crop.jpg", "text": "Help me understand what's happening in the selected bounding box within . Include the coordinates for each mentioned object.", "boxes_value": [[39.674682624, 22.884460422600057, 197.0777587712, 110.97778317500001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046154.jpg", "text": "Help me understand what's happening in the selected bounding box within . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a bench, a chair, a carpet, a desk, and a pillow.", "boxes_value": [[81.674682624, 361.88446042260006, 239.0777587712, 449.977783175], [76.7598266368, 379.10180663759996, 140.0386962944, 443.9963378808], [136.9407348736, 361.88446042260006, 188.4923705856, 421.6065673972], [134.40930176, 422.1182861054, 185.702087424, 449.977783175], [174.766540544, 398.16430663740005, 239.0777587712, 438.5214843904], [81.674682624, 365.13659670759995, 119.3839111168, 398.09509279400004]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046154_crop.jpg", "text": "Help me understand what's happening in the selected bounding box within . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a bench, a chair, a carpet, a desk, and a pillow.", "boxes_value": [[39.674682624, 22.884460422600057, 197.0777587712, 110.97778317500001], [34.7598266368, 40.101806637599964, 98.03869629440001, 104.99633788080001], [94.94073487360001, 22.884460422600057, 146.4923705856, 82.60656739720002], [92.40930176, 83.11828610539999, 143.702087424, 110.97778317500001], [132.766540544, 59.164306637400045, 197.0777587712, 99.52148439040002], [39.674682624, 26.13659670759995, 77.3839111168, 59.09509279400004]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046155.jpg", "text": "In the provided image , would you mind describing the selected area ? Provide the coordinates for each element you describe.", "boxes_value": [[372.317138688, 103.7385253888, 592.2205810176, 246.4214477312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046155_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Provide the coordinates for each element you describe.", "boxes_value": [[55.317138688, 35.7385253888, 275.2205810176, 178.4214477312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046155.jpg", "text": "In the provided image , would you mind describing the selected area ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a lamp, a bed, and three pillows.", "boxes_value": [[372.317138688, 103.7385253888, 592.2205810176, 246.4214477312], [541.8051757824, 103.7385253888, 592.2205810176, 201.0045776384], [153.32824704, 74.566650368, 767.011596672, 512.3105468928], [409.1228027136, 166.9030151168, 506.81689451520003, 246.4214477312], [372.317138688, 144.6378784256, 522.2661133055999, 244.1494751232], [355.050292992, 143.72906496, 512.7239990016, 244.6038818304]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046155_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a lamp, a bed, and three pillows.", "boxes_value": [[55.317138688, 35.7385253888, 275.2205810176, 178.4214477312], [224.80517578240006, 35.7385253888, 275.2205810176, 133.0045776384], [0, 6.566650367999998, 330, 214], [92.12280271359998, 98.90301511679999, 189.81689451520003, 178.4214477312], [55.317138688, 76.63787842560001, 205.2661133055999, 176.1494751232], [38.05029299199998, 75.72906495999999, 195.72399900159996, 176.6038818304]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046156.jpg", "text": "Can you generate a description of the contents within the selected region in ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[30.503784146599997, 213.0146484224, 146.2324218761, 344.6898193408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046156_crop.jpg", "text": "Can you generate a description of the contents within the selected region in ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[29.503784146599997, 33.0146484224, 145.2324218761, 164.6898193408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046156.jpg", "text": "Can you generate a description of the contents within the selected region in ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, two cars, and three street lights.", "boxes_value": [[30.503784146599997, 213.0146484224, 146.2324218761, 344.6898193408], [30.503784146599997, 284.3621826048, 51.9829101423, 301.5455322112], [20.4036254978, 306.9663696384, 50.07348631479999, 323.4496459776], [100.7137450853, 297.2269897216, 124.66217043019999, 310.5728759808], [12.323791503799999, 213.3741454848, 60.5474242949, 379.7213134848], [56.650573737, 214.5919189504, 80.0317382812, 298.8615112192], [105.40771483479999, 213.0146484224, 146.2324218761, 344.6898193408]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046156_crop.jpg", "text": "Can you generate a description of the contents within the selected region in ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, two cars, and three street lights.", "boxes_value": [[29.503784146599997, 33.0146484224, 145.2324218761, 164.6898193408], [29.503784146599997, 104.36218260480001, 50.9829101423, 121.5455322112], [19.4036254978, 126.96636963840001, 49.07348631479999, 143.44964597760003], [99.7137450853, 117.22698972159998, 123.66217043019999, 130.5728759808], [11.323791503799999, 33.37414548480001, 59.5474242949, 197], [55.650573737, 34.591918950399986, 79.0317382812, 118.8615112192], [104.40771483479999, 33.0146484224, 145.2324218761, 164.6898193408]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046157.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Please point out the objects and their coordinates.", "boxes_value": [[69.5044555776, 191.2707519488, 236.02138613760002, 259.6782226432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046157_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Please point out the objects and their coordinates.", "boxes_value": [[42.5044555776, 17.27075194880001, 209.02138613760002, 85.6782226432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046157.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a basketball, a person, and three sneakers.", "boxes_value": [[69.5044555776, 191.2707519488, 236.02138613760002, 259.6782226432], [131.42962644480002, 191.2707519488, 191.039733888, 248.1082763776], [31.555847193600002, 61.1139526144, 271.0609130496, 504.3915405312], [203.2248012288, 215.4627252224, 236.02138613760002, 255.9351066624], [69.5044555776, 230.1072997888, 114.3826294272, 259.6782226432], [95.596435584, 224.1931152384, 118.2094726656, 249.2413940224]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046157_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a basketball, a person, and three sneakers.", "boxes_value": [[42.5044555776, 17.27075194880001, 209.02138613760002, 85.6782226432], [104.42962644480002, 17.27075194880001, 164.039733888, 74.10827637759999], [4.555847193600002, 0, 244.0609130496, 102], [176.2248012288, 41.462725222399996, 209.02138613760002, 81.93510666239999], [42.5044555776, 56.10729978879999, 87.3826294272, 85.6782226432], [68.596435584, 50.1931152384, 91.2094726656, 75.24139402239999]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046158.jpg", "text": "Please describe the content within the area displayed in the image . Specify the location of each mentioned object.", "boxes_value": [[31.316650370999998, 86.2626953216, 399.1125488094, 246.7432861184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046158_crop.jpg", "text": "Please describe the content within the area displayed in the image . Specify the location of each mentioned object.", "boxes_value": [[31.316650370999998, 40.262695321600006, 399.1125488094, 200.7432861184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046158.jpg", "text": "Please describe the content within the area displayed in the image . Specify the location of each mentioned object. For your reference, objects involved in this region include a nightstand, two pillows, a power outlet, a telephone, and two bottles.", "boxes_value": [[31.316650370999998, 86.2626953216, 399.1125488094, 246.7432861184], [308.4303588582, 178.4649658368, 399.1125488094, 246.7432861184], [216.68133546180002, 188.0665893376, 319.0988769534, 244.6096191488], [207.0796508772, 205.1361694208, 245.48626711379998, 249.94384768], [47.8839721512, 86.2626953216, 62.9768066556, 109.345886208], [303.9716186358, 174.128173824, 336.3572998266, 188.6782226432], [31.316650370999998, 167.9909057536, 48.1718749848, 200.7902221824], [334.96807858560004, 105.2598266368, 350.689575168, 146.3602905088]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00046158_crop.jpg", "text": "Please describe the content within the area displayed in the image . Specify the location of each mentioned object. For your reference, objects involved in this region include a nightstand, two pillows, a power outlet, a telephone, and two bottles.", "boxes_value": [[31.316650370999998, 40.262695321600006, 399.1125488094, 200.7432861184], [308.4303588582, 132.4649658368, 399.1125488094, 200.7432861184], [216.68133546180002, 142.0665893376, 319.0988769534, 198.6096191488], [207.0796508772, 159.1361694208, 245.48626711379998, 203.94384768], [47.8839721512, 40.262695321600006, 62.9768066556, 63.345886207999996], [303.9716186358, 128.128173824, 336.3572998266, 142.6782226432], [31.316650370999998, 121.9909057536, 48.1718749848, 154.7902221824], [334.96807858560004, 59.2598266368, 350.689575168, 100.3602905088]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00046159.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for each element you describe.", "boxes_value": [[233.4321899709, 202.2304077312, 396.948120136, 481.6659546112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046159_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for each element you describe.", "boxes_value": [[41.4321899709, 70.23040773119999, 204.948120136, 349.6659546112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046159.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a backpack, two sneakers, four wine glasses, a plate, a tea pot, a cup, a person, a chair, and a desk.", "boxes_value": [[233.4321899709, 202.2304077312, 396.948120136, 481.6659546112], [252.66448972930002, 337.494445824, 336.43310548880004, 463.5416869888], [360.4180907992, 392.3025512448, 385.8350830054, 431.3229980672], [362.9240722343, 376.5512084992, 404.45031739819996, 408.411926272], [229.5486450043, 238.4994506752, 247.11651610869998, 290.074951168], [326.0696411195, 297.0120239104, 370.7573242409, 316.9979858432], [326.0696411195, 257.6889038336, 346.50463865139994, 305.2958373888], [332.58190917800005, 235.9064331264, 360.7032470557, 281.8635864064], [314.1927490146, 227.3450927616, 324.7379150157, 259.467346176], [375.52392578129997, 231.6429443584, 390.1334228193, 260.555542016], [314.4517211684, 255.1722412032, 329.6801757552, 280.9435424768], [233.4321899709, 202.2304077312, 396.948120136, 481.6659546112], [169.8604125802, 328.121032704, 352.6195068481, 512.269897472], [163.7982788135, 246.7839965696, 520.591552764, 425.5165405184]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 6, 8, 9], [5], [7], [10], [11], [12], [13]]}, {"image_path": "objects365_v1_00046159_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a backpack, two sneakers, four wine glasses, a plate, a tea pot, a cup, a person, a chair, and a desk.", "boxes_value": [[41.4321899709, 70.23040773119999, 204.948120136, 349.6659546112], [60.664489729300016, 205.49444582400002, 144.43310548880004, 331.5416869888], [168.4180907992, 260.3025512448, 193.8350830054, 299.3229980672], [170.9240722343, 244.55120849920002, 212.45031739819996, 276.411926272], [37.5486450043, 106.4994506752, 55.11651610869998, 158.07495116799998], [134.06964111949998, 165.0120239104, 178.7573242409, 184.9979858432], [134.06964111949998, 125.68890383360002, 154.50463865139994, 173.2958373888], [140.58190917800005, 103.9064331264, 168.7032470557, 149.86358640639997], [122.1927490146, 95.3450927616, 132.73791501570003, 127.46734617599998], [183.52392578129997, 99.6429443584, 198.1334228193, 128.555542016], [122.45172116840001, 123.1722412032, 137.68017575520003, 148.94354247680002], [41.4321899709, 70.23040773119999, 204.948120136, 349.6659546112], [0, 196.12103270400002, 160.6195068481, 380], [0, 114.78399656959999, 245, 293.5165405184]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 6, 8, 9], [5], [7], [10], [11], [12], [13]]}, {"image_path": "objects365_v1_00046161.jpg", "text": "Help me understand what's happening in the selected bounding box within . Please mention the objects and their locations.", "boxes_value": [[515.3948974422, 222.1472167936, 710.1577148104, 373.3963622912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046161_crop.jpg", "text": "Help me understand what's happening in the selected bounding box within . Please mention the objects and their locations.", "boxes_value": [[49.39489744219998, 38.147216793599995, 244.15771481039997, 189.39636229119998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046161.jpg", "text": "Help me understand what's happening in the selected bounding box within . Please mention the objects and their locations. For your reference, objects involved in this region include a cabinet, two faucets, an extractor, and two toilet papers.", "boxes_value": [[515.3948974422, 222.1472167936, 710.1577148104, 373.3963622912], [515.3948974422, 316.5843505664, 595.9916992276001, 352.7576904192], [646.3300780914, 319.0246582272, 673.5158691122, 373.1008300544], [694.2008056498, 261.4025268736, 710.1577148104, 373.3963622912], [532.6263427426001, 222.1472167936, 648.1693114994, 248.4245605376], [674.4023437538, 265.5394897408, 718.7271728224, 337.345581056], [607.1228027198, 247.8518066176, 621.639648426, 269.8403930624]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00046161_crop.jpg", "text": "Help me understand what's happening in the selected bounding box within . Please mention the objects and their locations. For your reference, objects involved in this region include a cabinet, two faucets, an extractor, and two toilet papers.", "boxes_value": [[49.39489744219998, 38.147216793599995, 244.15771481039997, 189.39636229119998], [49.39489744219998, 132.58435056640002, 129.99169922760007, 168.7576904192], [180.33007809139997, 135.0246582272, 207.5158691122, 189.10083005439998], [228.20080564980003, 77.40252687359998, 244.15771481039997, 189.39636229119998], [66.62634274260006, 38.147216793599995, 182.1693114994, 64.4245605376], [208.40234375379998, 81.53948974079998, 252.7271728224, 153.34558105600001], [141.1228027198, 63.851806617600005, 155.639648426, 85.84039306239998]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00046163.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Provide the coordinates for all objects that you mention.", "boxes_value": [[175.4510497958, 378.6275634688, 366.2562255637, 512.408569344]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046163_crop.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Provide the coordinates for all objects that you mention.", "boxes_value": [[48.4510497958, 33.62756346880002, 239.2562255637, 167]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046163.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four benches, and a storage box.", "boxes_value": [[175.4510497958, 378.6275634688, 366.2562255637, 512.408569344], [138.1243896807, 439.181091328, 238.10809327759998, 512.408569344], [200.0861205849, 378.6275634688, 366.2562255637, 512.408569344], [283.0508422709, 350.3505859584, 364.3123779215, 453.5088500736], [257.7478638011, 346.9443969536, 295.2156982404, 406.7956542976], [175.4510497958, 381.4748535296, 236.05511473430002, 451.831298816]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046163_crop.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four benches, and a storage box.", "boxes_value": [[48.4510497958, 33.62756346880002, 239.2562255637, 167], [11.124389680700006, 94.18109132799998, 111.10809327759998, 167], [73.08612058489999, 33.62756346880002, 239.2562255637, 167], [156.05084227089998, 5.350585958400018, 237.3123779215, 108.5088500736], [130.74786380109998, 1.9443969536000054, 168.2156982404, 61.79565429759998], [48.4510497958, 36.47485352960001, 109.05511473430002, 106.83129881600001]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046165.jpg", "text": "Can you generate a description of the contents within the selected region in ? Please mention the objects and their locations.", "boxes_value": [[213.19018556940003, 476.2361449984, 504.48474118779995, 511.9975585792]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046165_crop.jpg", "text": "Can you generate a description of the contents within the selected region in ? Please mention the objects and their locations.", "boxes_value": [[73.19018556940003, 9.236144998399993, 364.48474118779995, 44.99755857920002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046165.jpg", "text": "Can you generate a description of the contents within the selected region in ? Please mention the objects and their locations. For your reference, objects involved in this region include six people.", "boxes_value": [[213.19018556940003, 476.2361449984, 504.48474118779995, 511.9975585792], [211.9636840474, 466.8177490432, 226.886291526, 505.2286987264], [290.9541015358, 491.6458740224, 306.7061767244, 511.988769536], [476.87402342, 476.2361449984, 491.89428710299995, 511.99285888], [490.56896973, 480.6538696192, 504.48474118779995, 511.9975585792], [213.19018556940003, 486.6264648192, 226.0686034884, 511.7701415936], [230.7702636828, 478.8585205248, 242.8310547064, 511.9745483264]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046165_crop.jpg", "text": "Can you generate a description of the contents within the selected region in ? Please mention the objects and their locations. For your reference, objects involved in this region include six people.", "boxes_value": [[73.19018556940003, 9.236144998399993, 364.48474118779995, 44.99755857920002], [71.9636840474, 0, 86.88629152600001, 38.22869872640001], [150.95410153580002, 24.645874022399994, 166.7061767244, 44.98876953600001], [336.87402342, 9.236144998399993, 351.89428710299995, 44.99285888000003], [350.56896973, 13.653869619200009, 364.48474118779995, 44.99755857920002], [73.19018556940003, 19.62646481920001, 86.06860348839999, 44.77014159359999], [90.77026368279999, 11.858520524799985, 102.83105470640001, 44.9745483264]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046166.jpg", "text": "What sort of things can be seen in the region of the photo ? Give coordinates for the items you reference.", "boxes_value": [[431.8676757525, 310.7921142784, 537.8157958836, 346.9242553856]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046166_crop.jpg", "text": "What sort of things can be seen in the region of the photo ? Give coordinates for the items you reference.", "boxes_value": [[26.86767575250002, 9.792114278399993, 132.8157958836, 45.92425538560002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046166.jpg", "text": "What sort of things can be seen in the region of the photo ? Give coordinates for the items you reference. For your reference, objects involved in this region include a desk, a person, a book, and two cups.", "boxes_value": [[431.8676757525, 310.7921142784, 537.8157958836, 346.9242553856], [404.5174560492, 300.215515136, 627.9466553022, 511.2725830144], [394.9863281013, 232.9181518336, 482.3139648366, 343.5076904448], [431.8676757525, 328.1528320512, 503.87158207230004, 346.9242553856], [502.8214111596, 296.0238647296, 520.158081066, 325.5604248064], [519.1949463323999, 310.7921142784, 537.8157958836, 344.50231936]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046166_crop.jpg", "text": "What sort of things can be seen in the region of the photo ? Give coordinates for the items you reference. For your reference, objects involved in this region include a desk, a person, a book, and two cups.", "boxes_value": [[26.86767575250002, 9.792114278399993, 132.8157958836, 45.92425538560002], [0, 0, 159, 54], [0, 0, 77.31396483660001, 42.507690444800005], [26.86767575250002, 27.152832051199994, 98.87158207230004, 45.92425538560002], [97.82141115960002, 0, 115.15808106600002, 24.56042480640002], [114.19494633239992, 9.792114278399993, 132.8157958836, 43.50231936]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046167.jpg", "text": "Could you give me a description of the rectangular region found in ? Provide the coordinates for each element you describe.", "boxes_value": [[646.5953368794, 300.4473876992, 760.243652385, 479.3544922112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046167_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Provide the coordinates for each element you describe.", "boxes_value": [[28.59533687939995, 45.44738769920002, 142.24365238500002, 224.3544922112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046167.jpg", "text": "Could you give me a description of the rectangular region found in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include four people, and a truck.", "boxes_value": [[646.5953368794, 300.4473876992, 760.243652385, 479.3544922112], [646.5953368794, 372.7152710144, 699.6645507746999, 457.32574464], [672.6292724842, 376.7205200384, 710.1783447461, 478.8538208256], [724.1965331602, 373.7166137856, 760.243652385, 479.3544922112], [707.3857422067, 300.4473876992, 732.1418457148, 329.4232177664], [101.29742435109999, 242.5163574272, 808.9414062702, 486.4547119104]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046167_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include four people, and a truck.", "boxes_value": [[28.59533687939995, 45.44738769920002, 142.24365238500002, 224.3544922112], [28.59533687939995, 117.7152710144, 81.66455077469993, 202.32574463999998], [54.62927248419999, 121.72052003840002, 92.17834474610004, 223.8538208256], [106.19653316020003, 118.71661378559998, 142.24365238500002, 224.3544922112], [89.3857422067, 45.44738769920002, 114.14184571479996, 74.4232177664], [0, 0, 170, 231.45471191040002]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046169.jpg", "text": "Please tell me about the area in the image . What does it contain? Include the coordinates for each object you identify.", "boxes_value": [[510.81250000840004, 356.3267211776, 681.7320556463, 511.9685668864]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046169_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Include the coordinates for each object you identify.", "boxes_value": [[42.812500008400036, 39.32672117760001, 213.73205564629995, 194.96856688640003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046169.jpg", "text": "Please tell me about the area in the image . What does it contain? Include the coordinates for each object you identify. For your reference, objects involved in this region include five cars.", "boxes_value": [[510.81250000840004, 356.3267211776, 681.7320556463, 511.9685668864], [510.81250000840004, 421.7414550528, 539.6097412429, 457.1380004864], [512.3674316733, 495.9658203136, 540.1361083748001, 511.9685668864], [638.5020751784, 360.866882304, 681.7320556463, 387.3181152256], [608.3002929369001, 356.9189453312, 625.4738769520001, 380.4091796992], [585.0074463031, 356.3267211776, 604.1550292686001, 379.6196289024]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046169_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Include the coordinates for each object you identify. For your reference, objects involved in this region include five cars.", "boxes_value": [[42.812500008400036, 39.32672117760001, 213.73205564629995, 194.96856688640003], [42.812500008400036, 104.7414550528, 71.60974124289999, 140.13800048640002], [44.36743167329996, 178.96582031359998, 72.13610837480007, 194.96856688640003], [170.5020751784, 43.866882304, 213.73205564629995, 70.31811522560002], [140.30029293690006, 39.91894533120001, 157.47387695200007, 63.409179699200024], [117.00744630309998, 39.32672117760001, 136.15502926860006, 62.619628902399995]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046173.jpg", "text": "Analyze and describe the region in the included photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[37.4325561468, 62.5850829824, 339.93054198880003, 402.0357665792]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046173_crop.jpg", "text": "Analyze and describe the region in the included photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[37.4325561468, 62.5850829824, 339.93054198880003, 402.0357665792]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046173.jpg", "text": "Analyze and describe the region in the included photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two pillows, a bed, a flower, and a vase.", "boxes_value": [[37.4325561468, 62.5850829824, 339.93054198880003, 402.0357665792], [37.4325561468, 276.9159545856, 170.5145263688, 400.3295898624], [119.3292236184, 260.4229126144, 286.5347900552, 402.0357665792], [1.8756103816, 243.5939941376, 576.3009033184, 512.4738769408], [296.0649414064, 62.5850829824, 339.93054198880003, 145.6717529088], [298.1291503796, 117.2881469952, 315.6754150196, 173.53936768]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046173_crop.jpg", "text": "Analyze and describe the region in the included photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two pillows, a bed, a flower, and a vase.", "boxes_value": [[37.4325561468, 62.5850829824, 339.93054198880003, 402.0357665792], [37.4325561468, 276.9159545856, 170.5145263688, 400.3295898624], [119.3292236184, 260.4229126144, 286.5347900552, 402.0357665792], [1.8756103816, 243.5939941376, 415, 486], [296.0649414064, 62.5850829824, 339.93054198880003, 145.6717529088], [298.1291503796, 117.2881469952, 315.6754150196, 173.53936768]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046174.jpg", "text": "Can you provide a description of the area in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[48.269043002299995, 438.5385742336, 177.7094116299, 511.6196899328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046174_crop.jpg", "text": "Can you provide a description of the area in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[33.269043002299995, 18.538574233600002, 162.7094116299, 91.61968993279999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046174.jpg", "text": "Can you provide a description of the area in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a handbag, a cup, a canned, a desk, and a wallet.", "boxes_value": [[48.269043002299995, 438.5385742336, 177.7094116299, 511.6196899328], [112.3970947289, 438.5385742336, 177.7094116299, 487.426208512], [48.269043002299995, 449.1901855232, 70.4259033412, 477.489501952], [100.1286620826, 439.3901367296, 116.83465576020001, 473.6373901312], [39.0172119482, 397.6209716736, 339.9478149513, 513.1568603648], [101.4760742376, 494.5504150528, 143.37341310850002, 511.6196899328]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046174_crop.jpg", "text": "Can you provide a description of the area in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a handbag, a cup, a canned, a desk, and a wallet.", "boxes_value": [[33.269043002299995, 18.538574233600002, 162.7094116299, 91.61968993279999], [97.3970947289, 18.538574233600002, 162.7094116299, 67.42620851200002], [33.269043002299995, 29.1901855232, 55.4259033412, 57.48950195200001], [85.1286620826, 19.390136729599988, 101.83465576020001, 53.63739013119999], [24.0172119482, 0, 195, 92], [86.4760742376, 74.55041505280002, 128.37341310850002, 91.61968993279999]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046175.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Include the coordinates for each mentioned object.", "boxes_value": [[209.5080566272, 31.7610473588, 449.8751220736, 219.1067504848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046175_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Include the coordinates for each mentioned object.", "boxes_value": [[60.508056627200006, 31.7610473588, 300.8751220736, 219.1067504848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046175.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a lamp, and five lanterns.", "boxes_value": [[209.5080566272, 31.7610473588, 449.8751220736, 219.1067504848], [209.5080566272, 31.7610473588, 449.8751220736, 219.1067504848], [210.891662592, 78.4492797775, 278.9721069568, 150.6309203776], [268.3088989184, 33.7458495757, 376.9915771392, 145.2993164058], [324.8682250752, 127.9695434337, 384.8299560448, 185.8220215131], [275.4525756928, 159.0050048999, 342.3444824064, 219.5693359685], [360.4234008576, 89.4012451132, 451.11926272, 182.2062377864]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046175_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a lamp, and five lanterns.", "boxes_value": [[60.508056627200006, 31.7610473588, 300.8751220736, 219.1067504848], [60.508056627200006, 31.7610473588, 300.8751220736, 219.1067504848], [61.89166259199999, 78.4492797775, 129.97210695680002, 150.6309203776], [119.30889891840002, 33.7458495757, 227.9915771392, 145.2993164058], [175.86822507519997, 127.9695434337, 235.82995604479999, 185.8220215131], [126.4525756928, 159.0050048999, 193.3444824064, 219.5693359685], [211.42340085759997, 89.4012451132, 302.11926272, 182.2062377864]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046176.jpg", "text": "Please interpret and describe the area inside the given picture . Include the coordinates for each object you identify.", "boxes_value": [[64.4466552576, 231.6439819264, 208.66949460479998, 311.6557006848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046176_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Include the coordinates for each object you identify.", "boxes_value": [[36.4466552576, 20.643981926399988, 180.66949460479998, 100.65570068480002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046176.jpg", "text": "Please interpret and describe the area inside the given picture . Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, and three suvs.", "boxes_value": [[64.4466552576, 231.6439819264, 208.66949460479998, 311.6557006848], [123.844543488, 227.5687866368, 165.9009399552, 347.1409301504], [66.5324097024, 237.0521240064, 101.9916992256, 350.8518066176], [136.7261352192, 231.6439819264, 208.66949460479998, 310.9833374208], [64.4466552576, 233.6610717696, 192.19647214079998, 311.6557006848], [0, 225.5926513664, 136.38995358719998, 339.8951415808]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046176_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, and three suvs.", "boxes_value": [[36.4466552576, 20.643981926399988, 180.66949460479998, 100.65570068480002], [95.844543488, 16.568786636800013, 137.9009399552, 120], [38.5324097024, 26.052124006399993, 73.9916992256, 120], [108.72613521919999, 20.643981926399988, 180.66949460479998, 99.98333742080001], [36.4466552576, 22.6610717696, 164.19647214079998, 100.65570068480002], [0, 14.592651366399991, 108.38995358719998, 120]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046177.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Please point out the objects and their coordinates.", "boxes_value": [[81.210693376, 105.88012692480001, 257.9132080128, 219.0721435392]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046177_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Please point out the objects and their coordinates.", "boxes_value": [[44.210693375999995, 28.88012692480001, 220.9132080128, 142.0721435392]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046177.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Please point out the objects and their coordinates. For your reference, objects involved in this region include a paddle, three people, a hat, a glasses, and a boat.", "boxes_value": [[81.210693376, 105.88012692480001, 257.9132080128, 219.0721435392], [119.202087424, 0.9844360704, 513.1502685696, 516.3005371392001], [69.1253662208, 111.32244871680001, 385.1503296, 450.6512451072], [0.9057617408, 33.005615232000004, 297.2636718592, 559.8796386816], [171.0744628736, 0.3202514688, 403.9835815424, 198.4387817472], [140.379272448, 123.88800046079999, 257.9132080128, 219.0721435392], [81.210693376, 105.88012692480001, 131.0675659264, 173.17504880639999], [0, 131.2144775424, 512.2395019776, 309.18237304319996]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5], [6], [7]]}, {"image_path": "objects365_v1_00046177_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Please point out the objects and their coordinates. For your reference, objects involved in this region include a paddle, three people, a hat, a glasses, and a boat.", "boxes_value": [[44.210693375999995, 28.88012692480001, 220.9132080128, 142.0721435392], [82.202087424, 0, 265, 170], [32.125366220800004, 34.32244871680001, 265, 170], [0, 0, 260.2636718592, 170], [134.0744628736, 0, 265, 121.43878174720001], [103.379272448, 46.88800046079999, 220.9132080128, 142.0721435392], [44.210693375999995, 28.88012692480001, 94.0675659264, 96.17504880639999], [0, 54.214477542400004, 265, 170]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5], [6], [7]]}, {"image_path": "objects365_v1_00046180.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[175.697326656, 280.9596557824, 468.49426272000005, 319.520996096]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046180_crop.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[73.697326656, 9.959655782399977, 366.49426272000005, 48.520996095999976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046180.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include five helmets.", "boxes_value": [[175.697326656, 280.9596557824, 468.49426272000005, 319.520996096], [443.898559552, 299.9891357184, 468.49426272000005, 319.520996096], [363.60095212799996, 292.3934326272, 384.760498048, 307.9465332224], [320.739379904, 283.3508911104, 338.46276857600003, 302.5210571264], [249.84600832, 293.659362816, 272.63317868800004, 314.276306176], [175.697326656, 280.9596557824, 201.378112768, 299.9489135616]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046180_crop.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include five helmets.", "boxes_value": [[73.697326656, 9.959655782399977, 366.49426272000005, 48.520996095999976], [341.898559552, 28.989135718400007, 366.49426272000005, 48.520996095999976], [261.60095212799996, 21.393432627200013, 282.760498048, 36.94653322239998], [218.73937990399997, 12.350891110400028, 236.46276857600003, 31.521057126400024], [147.84600832, 22.659362815999998, 170.63317868800004, 43.27630617599999], [73.697326656, 9.959655782399977, 99.378112768, 28.948913561600023]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046186.jpg", "text": "Please give me some details about the rectangle in the image . Please point out the objects and their coordinates.", "boxes_value": [[226.4878540288, 231.6973266687, 511.71582028800003, 682.9812011762999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046186_crop.jpg", "text": "Please give me some details about the rectangle in the image . Please point out the objects and their coordinates.", "boxes_value": [[71.4878540288, 113.69732666869999, 356.71582028800003, 564.9812011762999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046186.jpg", "text": "Please give me some details about the rectangle in the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, two bracelets, a glasses, and two hats.", "boxes_value": [[226.4878540288, 231.6973266687, 511.71582028800003, 682.9812011762999], [110.8245239296, 214.85314938730002, 471.2897949184, 683.1000976797], [226.4878540288, 231.6973266687, 511.71582028800003, 682.9812011762999], [402.7590942208, 528.7047118989, 452.9674682368, 552.7481689759001], [365.6331787264, 674.4808349301, 406.8505248768, 682.3605956851], [256.621887232, 278.5412597775, 340.5220947456, 350.6429443255], [226.4702758912, 232.6583252218, 328.7236327936, 311.314758311], [317.054687488, 460.8793945084, 433.359008768, 594.2282714794]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6, 7]]}, {"image_path": "objects365_v1_00046186_crop.jpg", "text": "Please give me some details about the rectangle in the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, two bracelets, a glasses, and two hats.", "boxes_value": [[71.4878540288, 113.69732666869999, 356.71582028800003, 564.9812011762999], [0, 96.85314938730002, 316.2897949184, 565], [71.4878540288, 113.69732666869999, 356.71582028800003, 564.9812011762999], [247.75909422080002, 410.7047118989, 297.9674682368, 434.74816897590006], [210.6331787264, 556.4808349301, 251.85052487680002, 564.3605956851], [101.621887232, 160.5412597775, 185.5220947456, 232.64294432550003], [71.4702758912, 114.6583252218, 173.72363279360002, 193.314758311], [162.054687488, 342.8793945084, 278.359008768, 476.22827147939995]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6, 7]]}, {"image_path": "objects365_v1_00046188.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Remember to mention the objects and their corresponding locations.", "boxes_value": [[23.6657714638, 86.5010376192, 225.9362793139, 182.4879760896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046188_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Remember to mention the objects and their corresponding locations.", "boxes_value": [[23.6657714638, 24.501037619200005, 225.9362793139, 120.48797608960001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046188.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a lamp, a hat, and three plates.", "boxes_value": [[23.6657714638, 86.5010376192, 225.9362793139, 182.4879760896], [198.4057007153, 112.9057006592, 225.9362793139, 154.0085449216], [148.3267211681, 160.6392211968, 176.6835937814, 182.4879760896], [23.6657714638, 86.5010376192, 38.4755859133, 115.2745361408], [41.8607177769, 90.7324829184, 57.5168457224, 122.8909912064], [55.4011841028, 102.5803222528, 69.3647461165, 125.8529662976]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046188_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a lamp, a hat, and three plates.", "boxes_value": [[23.6657714638, 24.501037619200005, 225.9362793139, 120.48797608960001], [198.4057007153, 50.905700659199994, 225.9362793139, 92.00854492159999], [148.3267211681, 98.63922119680001, 176.6835937814, 120.48797608960001], [23.6657714638, 24.501037619200005, 38.4755859133, 53.274536140799995], [41.8607177769, 28.732482918399995, 57.5168457224, 60.8909912064], [55.4011841028, 40.5803222528, 69.3647461165, 63.852966297600005]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046190.jpg", "text": "Offer a thorough description of the area within the illustration . Give coordinates for the items you reference.", "boxes_value": [[179.409667968, 448.3026733568, 239.6280517632, 511.3517455872]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046190_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Give coordinates for the items you reference.", "boxes_value": [[15.409667968000008, 16.302673356800028, 75.6280517632, 79.35174558720001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046190.jpg", "text": "Offer a thorough description of the area within the illustration . Give coordinates for the items you reference. For your reference, objects involved in this region include two chairs, a desk, and two people.", "boxes_value": [[179.409667968, 448.3026733568, 239.6280517632, 511.3517455872], [199.13952637440002, 448.3026733568, 214.0757446656, 473.1254272512], [179.409667968, 494.5695800832, 239.6280517632, 511.3517455872], [214.0757446656, 448.7257690624, 231.7137451008, 473.4190063616], [207.58477785600002, 484.4432373248, 237.1065063168, 511.1798706176], [195.88751224319998, 459.0991211008, 225.1307373312, 495.3049926656]], "boxes_seq": [[0], [0], [1, 3], [2], [4, 5]]}, {"image_path": "objects365_v1_00046190_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Give coordinates for the items you reference. For your reference, objects involved in this region include two chairs, a desk, and two people.", "boxes_value": [[15.409667968000008, 16.302673356800028, 75.6280517632, 79.35174558720001], [35.13952637440002, 16.302673356800028, 50.07574466560001, 41.125427251199994], [15.409667968000008, 62.56958008319998, 75.6280517632, 79.35174558720001], [50.07574466560001, 16.72576906239999, 67.7137451008, 41.4190063616], [43.58477785600002, 52.44323732480001, 73.1065063168, 79.17987061759999], [31.88751224319998, 27.099121100800005, 61.13073733120001, 63.304992665600025]], "boxes_seq": [[0], [0], [1, 3], [2], [4, 5]]}, {"image_path": "objects365_v1_00046192.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Include the coordinates for each mentioned object.", "boxes_value": [[458.04626467599996, 288.413085952, 769.605590831, 421.0428466688]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046192_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Include the coordinates for each mentioned object.", "boxes_value": [[78.04626467599996, 33.41308595200002, 389.605590831, 166.0428466688]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046192.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a sink, three traffic lights, and a radiator.", "boxes_value": [[458.04626467599996, 288.413085952, 769.605590831, 421.0428466688], [527.813964832, 280.3606567424, 770.0440674079999, 512.087646464], [541.000122047, 313.974487296, 622.507202163, 367.5086669824], [686.169433565, 364.1326294016, 769.605590831, 421.0428466688], [458.04626467599996, 288.413085952, 557.010986301, 318.2087402496], [506.80334474, 371.0770263552, 552.563232452, 381.4033813504]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046192_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a sink, three traffic lights, and a radiator.", "boxes_value": [[78.04626467599996, 33.41308595200002, 389.605590831, 166.0428466688], [147.813964832, 25.36065674240001, 390, 199], [161.000122047, 58.97448729600001, 242.507202163, 112.50866698239997], [306.16943356499996, 109.13262940160001, 389.605590831, 166.0428466688], [78.04626467599996, 33.41308595200002, 177.010986301, 63.2087402496], [126.80334474, 116.07702635520002, 172.56323245199997, 126.40338135040002]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046193.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Remember to mention the objects and their corresponding locations.", "boxes_value": [[136.159973152, 269.0335693312, 245.4423828443, 403.5357055488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046193_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Remember to mention the objects and their corresponding locations.", "boxes_value": [[28.159973151999992, 34.0335693312, 137.4423828443, 168.53570554880002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046193.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three chairs, a desk, and a person.", "boxes_value": [[136.159973152, 269.0335693312, 245.4423828443, 403.5357055488], [168.4235229174, 302.2907715072, 235.7018432293, 459.004943872], [136.159973152, 290.6077270528, 175.675048798, 403.5357055488], [177.13287350360002, 269.0335693312, 245.4423828443, 365.7584838656], [137.7623291268, 269.862487808, 186.85247800749997, 301.5127563264], [188.05364989070003, 218.4301757952, 251.5704955721, 366.1759033344]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046193_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three chairs, a desk, and a person.", "boxes_value": [[28.159973151999992, 34.0335693312, 137.4423828443, 168.53570554880002], [60.42352291739999, 67.29077150720002, 127.70184322930001, 202], [28.159973151999992, 55.60772705279999, 67.675048798, 168.53570554880002], [69.13287350360002, 34.0335693312, 137.4423828443, 130.7584838656], [29.76232912680001, 34.862487808000026, 78.85247800749997, 66.5127563264], [80.05364989070003, 0, 143.5704955721, 131.1759033344]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046195.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Please point out the objects and their coordinates.", "boxes_value": [[212.6444701828, 108.5669555712, 282.7441406096, 164.7569580032]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046195_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Please point out the objects and their coordinates.", "boxes_value": [[17.644470182800006, 14.566955571199998, 87.74414060959998, 70.7569580032]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046195.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a cabinet, and four storage boxes.", "boxes_value": [[212.6444701828, 108.5669555712, 282.7441406096, 164.7569580032], [201.876464878, 83.9537353728, 574.5386963024, 471.8449096704], [216.34027103239998, 108.5669555712, 249.99383543239998, 138.7689208832], [247.8366088896, 107.2725830144, 283.215942388, 138.3374633984], [212.6444701828, 136.2280273408, 249.0527953788, 164.7569580032], [247.150878886, 136.7714233344, 282.7441406096, 164.4852295168]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046195_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a cabinet, and four storage boxes.", "boxes_value": [[17.644470182800006, 14.566955571199998, 87.74414060959998, 70.7569580032], [6.876464878000007, 0, 105, 84], [21.340271032399983, 14.566955571199998, 54.993835432399976, 44.768920883199996], [52.8366088896, 13.272583014399999, 88.21594238799997, 44.33746339839999], [17.644470182800006, 42.2280273408, 54.0527953788, 70.7569580032], [52.15087888599999, 42.7714233344, 87.74414060959998, 70.48522951679999]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046206.jpg", "text": "Please provide insights on the specified area within the graphic . Include the coordinates for each object you identify.", "boxes_value": [[218.96228025599999, 180.886779792, 296.38568115199996, 398.325683616]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046206_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Include the coordinates for each object you identify.", "boxes_value": [[19.962280255999985, 54.886779792, 97.38568115199996, 272.325683616]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046206.jpg", "text": "Please provide insights on the specified area within the graphic . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, a handbag, two sneakers, and a bottle.", "boxes_value": [[218.96228025599999, 180.886779792, 296.38568115199996, 398.325683616], [218.96228025599999, 180.886779792, 290.908935552, 398.325683616], [270.480834944, 233.507446272, 287.756286592, 270.610412592], [231.2184448, 372.888916032, 246.13812256, 393.697998048], [264.310485824, 375.372558576, 290.059814464, 397.307189952], [280.709533696, 315.388549824, 296.38568115199996, 354.9055176]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046206_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, a handbag, two sneakers, and a bottle.", "boxes_value": [[19.962280255999985, 54.886779792, 97.38568115199996, 272.325683616], [19.962280255999985, 54.886779792, 91.908935552, 272.325683616], [71.48083494399998, 107.50744627200001, 88.75628659199998, 144.610412592], [32.218444799999986, 246.888916032, 47.13812256, 267.697998048], [65.31048582400001, 249.37255857600002, 91.059814464, 271.307189952], [81.709533696, 189.388549824, 97.38568115199996, 228.9055176]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046208.jpg", "text": "Please describe the section of the picture defined by the bbox . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0.7595214598000001, 261.7124023296, 462.3037109592, 395.2304077312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046208_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0.7595214598000001, 33.712402329600025, 462.3037109592, 167.2304077312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046208.jpg", "text": "Please describe the section of the picture defined by the bbox . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a pillow, two stools, a vase, a carpet, and a cabinet.", "boxes_value": [[0.7595214598000001, 261.7124023296, 462.3037109592, 395.2304077312], [427.8651123315, 265.7969970688, 462.3037109592, 297.7401122816], [235.2082519647, 328.6849364992, 384.94152834790003, 406.0471801856], [335.03051760600005, 338.6671752704, 454.81713868180003, 428.50714112], [203.99182131959998, 271.099304192, 234.2440185523, 290.9007568384], [110.70452882880001, 309.7426757632, 668.2080078255, 492.4220580864], [0.7595214598000001, 261.7124023296, 400.2705078125, 395.2304077312]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046208_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a pillow, two stools, a vase, a carpet, and a cabinet.", "boxes_value": [[0.7595214598000001, 33.712402329600025, 462.3037109592, 167.2304077312], [427.8651123315, 37.79699706880001, 462.3037109592, 69.74011228159998], [235.2082519647, 100.68493649919998, 384.94152834790003, 178.04718018559998], [335.03051760600005, 110.66717527039998, 454.81713868180003, 200], [203.99182131959998, 43.09930419199998, 234.2440185523, 62.900756838400014], [110.70452882880001, 81.7426757632, 577, 200], [0.7595214598000001, 33.712402329600025, 400.2705078125, 167.2304077312]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046211.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for all objects that you mention.", "boxes_value": [[186.7557372928, 69.56853070919999, 444.2956425216, 418.93597779379996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046211_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for all objects that you mention.", "boxes_value": [[64.7557372928, 69.56853070919999, 322.2956425216, 418.93597779379996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046211.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a cymbal, a person, a glasses, a hat, and a microphone.", "boxes_value": [[186.7557372928, 69.56853070919999, 444.2956425216, 418.93597779379996], [390.6031936512, 377.80984673370006, 444.2956425216, 418.93597779379996], [181.895751936, 68.777221662, 437.5783691264, 681.9704589819], [264.1100131328, 108.8708034305, 316.1440079872, 131.5664820596], [225.361293568, 69.56853070919999, 401.9447441408, 164.77967024400002], [186.7557372928, 136.2796630965, 275.4923705856, 191.9575805632]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046211_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a cymbal, a person, a glasses, a hat, and a microphone.", "boxes_value": [[64.7557372928, 69.56853070919999, 322.2956425216, 418.93597779379996], [268.6031936512, 377.80984673370006, 322.2956425216, 418.93597779379996], [59.89575193600001, 68.777221662, 315.5783691264, 506], [142.11001313280002, 108.8708034305, 194.14400798719998, 131.5664820596], [103.36129356800001, 69.56853070919999, 279.9447441408, 164.77967024400002], [64.7557372928, 136.2796630965, 153.49237058559999, 191.9575805632]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046212.jpg", "text": "Can you provide a description of the area in the image ? Include the coordinates for each object you identify.", "boxes_value": [[627.2315673600001, 436.7910156288, 748.9698486528, 511.75897216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046212_crop.jpg", "text": "Can you provide a description of the area in the image ? Include the coordinates for each object you identify.", "boxes_value": [[31.2315673600001, 18.791015628799983, 152.9698486528, 93.75897215999998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046212.jpg", "text": "Can you provide a description of the area in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, and three sneakers.", "boxes_value": [[627.2315673600001, 436.7910156288, 748.9698486528, 511.75897216], [602.0928955392, 53.5969848832, 739.4141846016, 498.7650146304], [634.2397460736, 8.169738752, 767.8676757503999, 510.5688476672], [704.1848144639999, 486.5839233536, 748.9698486528, 511.75897216], [710.8328857344, 436.7910156288, 737.181518592, 490.3967895552], [627.2315673600001, 470.0095825408, 675.5156249856, 499.6031494144]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046212_crop.jpg", "text": "Can you provide a description of the area in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, and three sneakers.", "boxes_value": [[31.2315673600001, 18.791015628799983, 152.9698486528, 93.75897215999998], [6.092895539199958, 0, 143.41418460160003, 80.7650146304], [38.23974607360003, 0, 171.86767575039994, 92.56884766719998], [108.18481446399994, 68.58392335360003, 152.9698486528, 93.75897215999998], [114.83288573439995, 18.791015628799983, 141.18151859199997, 72.39678955519997], [31.2315673600001, 52.00958254080001, 79.51562498559997, 81.60314941439998]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046215.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Please mention the objects and their locations.", "boxes_value": [[270.4409179815, 123.4260421632, 566.3741455134, 458.0676879872]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046215_crop.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Please mention the objects and their locations.", "boxes_value": [[74.44091798149998, 84.4260421632, 370.3741455134, 419.0676879872]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046215.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Please mention the objects and their locations. For your reference, objects involved in this region include two guns, two people, two gloves, and a helmet.", "boxes_value": [[270.4409179815, 123.4260421632, 566.3741455134, 458.0676879872], [270.4409179815, 153.0180663808, 566.3741455134, 458.0676879872], [465.3920898522, 218.2355346432, 649.8244629177, 401.2653198336], [181.3804321392, 2.9476318208, 506.76672361350006, 512.7662353408], [436.6403808417, 122.8637085184, 584.6069335743, 512.0649414144], [309.7989635496, 274.8717363712, 397.2188358033, 341.9757228032], [393.5250384093, 311.1940776448, 442.775670666, 369.0635705344], [436.0037086734, 123.4260421632, 530.8111006569, 185.604965376]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5, 6], [7]]}, {"image_path": "objects365_v1_00046215_crop.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Please mention the objects and their locations. For your reference, objects involved in this region include two guns, two people, two gloves, and a helmet.", "boxes_value": [[74.44091798149998, 84.4260421632, 370.3741455134, 419.0676879872], [74.44091798149998, 114.01806638080001, 370.3741455134, 419.0676879872], [269.3920898522, 179.2355346432, 444, 362.2653198336], [0, 0, 310.76672361350006, 473], [240.6403808417, 83.8637085184, 388.6069335743, 473], [113.79896354959999, 235.87173637119997, 201.2188358033, 302.9757228032], [197.5250384093, 272.1940776448, 246.775670666, 330.0635705344], [240.0037086734, 84.4260421632, 334.81110065689995, 146.604965376]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5, 6], [7]]}, {"image_path": "objects365_v1_00046216.jpg", "text": "Can you elaborate on the content of the bounding box in ? Specify the location of each mentioned object.", "boxes_value": [[203.89178464, 28.211425776000002, 476.83288575999995, 302.008239744]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046216_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Specify the location of each mentioned object.", "boxes_value": [[68.89178464, 28.211425776000002, 341.83288575999995, 302.008239744]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046216.jpg", "text": "Can you elaborate on the content of the bounding box in ? Specify the location of each mentioned object. For your reference, objects involved in this region include four people, and a suv.", "boxes_value": [[203.89178464, 28.211425776000002, 476.83288575999995, 302.008239744], [429.74365235199997, 29.49810792, 524.54052736, 176.509155264], [376.008605952, 48.761596656, 437.347656256, 122.77410888], [257.892822272, 5.165252688, 279.691040064, 93.37188719999999], [235.58770752, 0.051177983999999996, 259.413635264, 78.670776384], [203.89178464, 28.211425776000002, 476.83288575999995, 302.008239744]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046216_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Specify the location of each mentioned object. For your reference, objects involved in this region include four people, and a suv.", "boxes_value": [[68.89178464, 28.211425776000002, 341.83288575999995, 302.008239744], [294.74365235199997, 29.49810792, 389.54052736000006, 176.509155264], [241.00860595199998, 48.761596656, 302.347656256, 122.77410888], [122.89282227199999, 5.165252688, 144.691040064, 93.37188719999999], [100.58770752000001, 0.051177983999999996, 124.41363526399999, 78.670776384], [68.89178464, 28.211425776000002, 341.83288575999995, 302.008239744]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046219.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[21.776184064, 522.478759803, 379.2028808704, 662.310668943]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046219_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[21.776184064, 35.478759803, 379.2028808704, 175.310668943]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046219.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four people, and a handbag.", "boxes_value": [[21.776184064, 522.478759803, 379.2028808704, 662.310668943], [247.9640502784, 539.904052765, 270.172790528, 596.28015139], [160.837280256, 536.1456298659999, 178.9459838976, 594.230102543], [81.5690307584, 523.845458983, 90.794250496, 587.738403329], [21.776184064, 522.478759803, 39.8848877056, 581.92993166], [362.296447744, 633.718994139, 379.2028808704, 662.310668943]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046219_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four people, and a handbag.", "boxes_value": [[21.776184064, 35.478759803, 379.2028808704, 175.310668943], [247.9640502784, 52.90405276499996, 270.172790528, 109.28015139000001], [160.837280256, 49.14562986599992, 178.9459838976, 107.23010254300004], [81.5690307584, 36.84545898299996, 90.794250496, 100.738403329], [21.776184064, 35.478759803, 39.8848877056, 94.92993165999997], [362.296447744, 146.71899413899996, 379.2028808704, 175.310668943]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046220.jpg", "text": "Please provide information about the area within the bounding box in the picture . Include the coordinates for each mentioned object.", "boxes_value": [[222.37371824640002, 89.7070312448, 767.4799805184, 198.864379904]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046220_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Include the coordinates for each mentioned object.", "boxes_value": [[136.37371824640002, 27.707031244800007, 681.4799805184, 136.864379904]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046220.jpg", "text": "Please provide information about the area within the bounding box in the picture . Include the coordinates for each mentioned object. For your reference, objects involved in this region include six flowers.", "boxes_value": [[222.37371824640002, 89.7070312448, 767.4799805184, 198.864379904], [459.96948241920006, 89.7070312448, 598.0870361088, 197.7505492992], [703.9906006271999, 95.276306176, 767.4799805184, 198.864379904], [222.37371824640002, 109.8996581888, 294.0058593792, 196.6367187456], [459.96948241920006, 89.7070312448, 598.0870361088, 197.7505492992], [703.9906006271999, 95.276306176, 767.4799805184, 198.864379904], [222.37371824640002, 109.8996581888, 294.0058593792, 196.6367187456]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046220_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Include the coordinates for each mentioned object. For your reference, objects involved in this region include six flowers.", "boxes_value": [[136.37371824640002, 27.707031244800007, 681.4799805184, 136.864379904], [373.96948241920006, 27.707031244800007, 512.0870361088, 135.7505492992], [617.9906006271999, 33.276306176000006, 681.4799805184, 136.864379904], [136.37371824640002, 47.899658188800004, 208.0058593792, 134.6367187456], [373.96948241920006, 27.707031244800007, 512.0870361088, 135.7505492992], [617.9906006271999, 33.276306176000006, 681.4799805184, 136.864379904], [136.37371824640002, 47.899658188800004, 208.0058593792, 134.6367187456]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046222.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each mentioned object.", "boxes_value": [[345.15942382080004, 187.053222656, 767.9682617087999, 460.60980224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046222_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each mentioned object.", "boxes_value": [[106.15942382080004, 69.053222656, 528.9682617087999, 342.60980224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046222.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a paddle, two people, and three boats.", "boxes_value": [[345.15942382080004, 187.053222656, 767.9682617087999, 460.60980224], [322.1910400512, 227.382629376, 395.95007324159997, 287.993286144], [330.6614990592, 248.8551635968, 394.7584228608, 291.1413574144], [440.5805664, 187.053222656, 469.4521484544, 224.4743042048], [345.15942382080004, 189.0988158976, 450.26489256959997, 242.1232300032], [269.4664306944, 274.0635376128, 475.86926269439994, 316.3891601408], [729.822998016, 410.4461059584, 767.9682617087999, 460.60980224]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046222_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a paddle, two people, and three boats.", "boxes_value": [[106.15942382080004, 69.053222656, 528.9682617087999, 342.60980224], [83.19104005119999, 109.38262937600001, 156.95007324159997, 169.99328614400002], [91.66149905920003, 130.8551635968, 155.7584228608, 173.14135741439998], [201.5805664, 69.053222656, 230.4521484544, 106.4743042048], [106.15942382080004, 71.09881589759999, 211.26489256959997, 124.1232300032], [30.466430694400003, 156.06353761280002, 236.86926269439994, 198.38916014080002], [490.82299801600004, 292.4461059584, 528.9682617087999, 342.60980224]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046223.jpg", "text": "Please provide information about the area within the bounding box in the picture . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[327.0107421837, 331.1124877824, 732.0054931441, 405.239135744]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046223_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[102.01074218370002, 19.112487782400024, 507.0054931441, 93.23913574400001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046223.jpg", "text": "Please provide information about the area within the bounding box in the picture . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three people, and two dogs.", "boxes_value": [[327.0107421837, 331.1124877824, 732.0054931441, 405.239135744], [620.0855712734, 334.7141113344, 635.3232422157, 373.7777099776], [639.2019042679, 331.1124877824, 665.2442626774, 400.0971679744], [692.1177978897999, 331.9436035072, 714.2816162105, 402.3135375872], [712.0616454784999, 376.4938964992, 732.0054931441, 405.239135744], [327.0107421837, 375.6152343552, 342.8323974935, 396.3980102656]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046223_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three people, and two dogs.", "boxes_value": [[102.01074218370002, 19.112487782400024, 507.0054931441, 93.23913574400001], [395.0855712734, 22.714111334400002, 410.3232422157, 61.77770997760001], [414.2019042679, 19.112487782400024, 440.2442626774, 88.09716797440001], [467.11779788979993, 19.94360350720001, 489.28161621050003, 90.31353758720002], [487.06164547849994, 64.49389649919999, 507.0054931441, 93.23913574400001], [102.01074218370002, 63.615234355200016, 117.83239749350003, 84.39801026560002]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046224.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Provide the coordinates for all objects that you mention.", "boxes_value": [[135.021545428, 16.2460326912, 652.39819337, 502.1512451072]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046224_crop.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Provide the coordinates for all objects that you mention.", "boxes_value": [[130.021545428, 16.2460326912, 647.39819337, 502.1512451072]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046224.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a soccer, four people, and a sneakers.", "boxes_value": [[135.021545428, 16.2460326912, 652.39819337, 502.1512451072], [569.686279316, 433.5053100544, 643.583862306, 502.1512451072], [8.701782243, 4.3361206272, 266.686767578, 407.396606464], [119.641845708, 27.311889664, 342.83496093, 429.0595092992], [135.021545428, 51.6943359488, 546.897216789, 494.7982788096], [297.071044886, 16.2460326912, 652.39819337, 467.79003904], [412.33459474899996, 400.9028320256, 455.602905296, 450.234191872]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00046224_crop.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a soccer, four people, and a sneakers.", "boxes_value": [[130.021545428, 16.2460326912, 647.39819337, 502.1512451072], [564.686279316, 433.5053100544, 638.583862306, 502.1512451072], [3.7017822430000002, 4.3361206272, 261.686767578, 407.396606464], [114.641845708, 27.311889664, 337.83496093, 429.0595092992], [130.021545428, 51.6943359488, 541.897216789, 494.7982788096], [292.071044886, 16.2460326912, 647.39819337, 467.79003904], [407.33459474899996, 400.9028320256, 450.602905296, 450.234191872]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00046225.jpg", "text": "Can you discuss the entities within the region of image ? Include the coordinates for each mentioned object.", "boxes_value": [[202.1253051955, 482.300537088, 505.14886475850005, 605.597521664]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046225_crop.jpg", "text": "Can you discuss the entities within the region of image ? Include the coordinates for each mentioned object.", "boxes_value": [[76.1253051955, 31.300537088, 379, 154.59752166400006]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046225.jpg", "text": "Can you discuss the entities within the region of image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, a sneakers, a car, and two suvs.", "boxes_value": [[202.1253051955, 482.300537088, 505.14886475850005, 605.597521664], [132.1221923685, 11.796142592, 483.57269289450005, 607.367919936], [351.733357086, 563.492555904, 448.5749472615, 605.597521664], [202.1253051955, 495.37451174399996, 246.85119630050002, 523.938964864], [425.6134033425, 509.75671385600003, 502.7998657425, 545.972168], [475.4354247835, 482.300537088, 505.14886475850005, 546.28637696]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046225_crop.jpg", "text": "Can you discuss the entities within the region of image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, a sneakers, a car, and two suvs.", "boxes_value": [[76.1253051955, 31.300537088, 379, 154.59752166400006], [6.122192368500009, 0, 357.57269289450005, 156.36791993600002], [225.733357086, 112.49255590400003, 322.5749472615, 154.59752166400006], [76.1253051955, 44.37451174399996, 120.85119630050002, 72.93896486400001], [299.6134033425, 58.75671385600003, 376.7998657425, 94.97216800000001], [349.4354247835, 31.300537088, 379, 95.28637695999998]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046227.jpg", "text": "What can be observed in the rectangular region in the photograph ? Please point out the objects and their coordinates.", "boxes_value": [[84.18658443950001, 224.2904052736, 211.2482299678, 307.9346924032]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046227_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Please point out the objects and their coordinates.", "boxes_value": [[32.18658443950001, 21.290405273599987, 159.2482299678, 104.93469240320002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046227.jpg", "text": "What can be observed in the rectangular region in the photograph ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a bench, five people, and a trolley.", "boxes_value": [[84.18658443950001, 224.2904052736, 211.2482299678, 307.9346924032], [174.74102785699998, 263.5473022464, 213.4757690364, 290.8252563456], [84.18658443950001, 225.1134033408, 111.28057864670001, 307.9346924032], [155.4799194318, 224.2904052736, 171.184753405, 249.7707519488], [177.7551879974, 249.7707519488, 211.2482299678, 290.795715328], [200.3509521274, 249.610534656, 224.5493164047, 288.5521850368], [204.357299811, 227.815979008, 220.06219484439998, 254.5783691264], [106.56445312849999, 269.7669067264, 167.4346923875, 290.9315795968]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6], [7]]}, {"image_path": "objects365_v1_00046227_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a bench, five people, and a trolley.", "boxes_value": [[32.18658443950001, 21.290405273599987, 159.2482299678, 104.93469240320002], [122.74102785699998, 60.54730224640002, 161.4757690364, 87.8252563456], [32.18658443950001, 22.113403340800005, 59.28057864670001, 104.93469240320002], [103.4799194318, 21.290405273599987, 119.18475340500001, 46.77075194880001], [125.75518799739999, 46.77075194880001, 159.2482299678, 87.79571532800003], [148.3509521274, 46.610534656, 172.5493164047, 85.55218503679998], [152.357299811, 24.815979008, 168.06219484439998, 51.57836912639999], [54.56445312849999, 66.76690672640001, 115.43469238750001, 87.93157959680002]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6], [7]]}, {"image_path": "objects365_v1_00046228.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Provide the coordinates for all objects that you mention.", "boxes_value": [[113.35784910950001, 362.5919189504, 183.40942385079998, 459.7390136832]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046228_crop.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Provide the coordinates for all objects that you mention.", "boxes_value": [[18.35784910950001, 24.591918950399986, 88.40942385079998, 121.73901368320003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046228.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a plate, two watermelons, a mango, and two avocados.", "boxes_value": [[113.35784910950001, 362.5919189504, 183.40942385079998, 459.7390136832], [0.9300537355, 386.074523904, 327.30780032129996, 481.1885376], [125.2625121993, 362.5919189504, 172.40319822159998, 381.9532470784], [106.7429809276, 364.544921856, 261.0783691198, 472.078308096], [123.1996460016, 365.5741577216, 174.9463500876, 386.1770019328], [113.35784910950001, 429.1134033408, 152.0798339662, 459.7390136832], [145.743469211, 375.6066894336, 183.40942385079998, 399.8959350784]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5, 6]]}, {"image_path": "objects365_v1_00046228_crop.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a plate, two watermelons, a mango, and two avocados.", "boxes_value": [[18.35784910950001, 24.591918950399986, 88.40942385079998, 121.73901368320003], [0, 48.07452390399999, 105, 143.18853760000002], [30.262512199300005, 24.591918950399986, 77.40319822159998, 43.95324707840001], [11.742980927600001, 26.544921855999974, 105, 134.078308096], [28.1996460016, 27.57415772159999, 79.9463500876, 48.17700193280001], [18.35784910950001, 91.1134033408, 57.0798339662, 121.73901368320003], [50.74346921099999, 37.606689433600025, 88.40942385079998, 61.89593507839999]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5, 6]]}, {"image_path": "objects365_v1_00046229.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Please mention the objects and their locations.", "boxes_value": [[180.37329103919998, 160.7601928704, 403.23034665200004, 285.9833984512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046229_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Please mention the objects and their locations.", "boxes_value": [[56.373291039199984, 31.760192870400004, 279.23034665200004, 156.9833984512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046229.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Please mention the objects and their locations. For your reference, objects involved in this region include two pillows, a bench, a chair, and a cabinet.", "boxes_value": [[180.37329103919998, 160.7601928704, 403.23034665200004, 285.9833984512], [304.8933105812, 193.3746948096, 403.23034665200004, 285.0286865408], [221.83184813559998, 201.353759744, 308.7121582022, 285.9833984512], [141.6346435712, 185.7368774656, 543.575561498, 422.5097046016], [317.9445801058, 137.2756958208, 385.453979474, 195.509033216], [180.37329103919998, 160.7601928704, 277.922363264, 239.3676147712]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046229_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Please mention the objects and their locations. For your reference, objects involved in this region include two pillows, a bench, a chair, and a cabinet.", "boxes_value": [[56.373291039199984, 31.760192870400004, 279.23034665200004, 156.9833984512], [180.8933105812, 64.37469480959999, 279.23034665200004, 156.02868654079998], [97.83184813559998, 72.353759744, 184.7121582022, 156.9833984512], [17.6346435712, 56.7368774656, 334, 188], [193.94458010580001, 8.275695820799996, 261.453979474, 66.509033216], [56.373291039199984, 31.760192870400004, 153.922363264, 110.36761477120001]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046230.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Include the coordinates for each object you identify.", "boxes_value": [[0, 396.2801513472, 570.286499013, 511.9938964992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046230_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Include the coordinates for each object you identify.", "boxes_value": [[0, 29.28015134719999, 570.286499013, 144.9938964992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046230.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, and four people.", "boxes_value": [[0, 396.2801513472, 570.286499013, 511.9938964992], [532.08020019, 431.6981201408, 570.286499013, 451.2828368896], [0, 402.3233642496, 67.439086946, 511.9808349696], [199.396057091, 396.2801513472, 251.23370364400003, 511.9938964992], [445.679565408, 411.9821777408, 473.03576658800006, 489.09069824], [472.703247093, 411.1311645696, 498.913696281, 487.6932983296]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046230_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, and four people.", "boxes_value": [[0, 29.28015134719999, 570.286499013, 144.9938964992], [532.08020019, 64.69812014079997, 570.286499013, 84.28283688959999], [0, 35.32336424959999, 67.439086946, 144.98083496959998], [199.396057091, 29.28015134719999, 251.23370364400003, 144.9938964992], [445.679565408, 44.98217774080001, 473.03576658800006, 122.09069824], [472.703247093, 44.1311645696, 498.913696281, 120.6932983296]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046234.jpg", "text": "Please help me understand the content present within the rectangle in . Provide the coordinates for each element you describe.", "boxes_value": [[0.0861816105, 440.2858276352, 229.198730454, 512.05017088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046234_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Provide the coordinates for each element you describe.", "boxes_value": [[0.0861816105, 18.285827635200008, 229.198730454, 90]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046234.jpg", "text": "Please help me understand the content present within the rectangle in . Provide the coordinates for each element you describe. For your reference, objects involved in this region include three people, and two cars.", "boxes_value": [[0.0861816105, 440.2858276352, 229.198730454, 512.05017088], [82.4579467605, 446.8939209216, 117.997680663, 512.05017088], [121.981750479, 444.5697631744, 136.320068367, 478.3592529408], [198.62750240999998, 440.2858276352, 229.198730454, 502.5440673792], [15.541137715500001, 456.4573974528, 84.455566398, 502.1422118912], [0.0861816105, 490.504089344, 52.1116943535, 511.6459350528]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046234_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Provide the coordinates for each element you describe. For your reference, objects involved in this region include three people, and two cars.", "boxes_value": [[0.0861816105, 18.285827635200008, 229.198730454, 90], [82.4579467605, 24.8939209216, 117.997680663, 90], [121.981750479, 22.56976317440001, 136.320068367, 56.35925294079999], [198.62750240999998, 18.285827635200008, 229.198730454, 80.54406737919999], [15.541137715500001, 34.45739745280002, 84.455566398, 80.14221189120002], [0.0861816105, 68.50408934400002, 52.1116943535, 89.64593505279998]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046235.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Please point out the objects and their coordinates.", "boxes_value": [[333.8884165632, 260.307546368, 484.2983560704, 327.0030852608]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046235_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Please point out the objects and their coordinates.", "boxes_value": [[37.888416563199996, 17.307546367999976, 188.29835607040002, 84.00308526079999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046235.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, and three gloves.", "boxes_value": [[333.8884165632, 260.307546368, 484.2983560704, 327.0030852608], [231.8359985664, 225.4821777408, 510.7756347648, 511.55133056], [417.20166013439996, 204.9850463744, 562.4641113600001, 510.66015626239994], [333.8884165632, 274.5665926144, 364.24638604800003, 320.1035467776], [417.1428479232, 260.307546368, 449.34069427199995, 291.125485056], [438.30143262720003, 301.7047774208, 484.2983560704, 327.0030852608]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046235_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, and three gloves.", "boxes_value": [[37.888416563199996, 17.307546367999976, 188.29835607040002, 84.00308526079999], [0, 0, 214.77563476479997, 100], [121.20166013439996, 0, 225, 100], [37.888416563199996, 31.566592614400008, 68.24638604800003, 77.1035467776], [121.14284792320001, 17.307546367999976, 153.34069427199995, 48.125485056], [142.30143262720003, 58.704777420799985, 188.29835607040002, 84.00308526079999]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046238.jpg", "text": "What's the story in the section of the included visual ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[40.0376587008, 227.5416869888, 134.8821411072, 488.5044555776]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046238_crop.jpg", "text": "What's the story in the section of the included visual ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[24.0376587008, 65.5416869888, 118.8821411072, 326.5044555776]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046238.jpg", "text": "What's the story in the section of the included visual ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, a gloves, two sneakers, and a skiboard.", "boxes_value": [[40.0376587008, 227.5416869888, 134.8821411072, 488.5044555776], [40.0376587008, 227.5416869888, 134.8821411072, 488.5044555776], [56.87396236800001, 234.8374633984, 178.6565551872, 452.5869750784], [101.89458048, 336.8215542272, 121.26643776, 356.8852635648], [44.4709208832, 465.3330719744, 85.3890276864, 484.6060932608], [85.0925196288, 467.112120064, 134.0163429888, 489.943237632], [0.2491454976, 447.9238281216, 199.36450199040002, 460.6549682688]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046238_crop.jpg", "text": "What's the story in the section of the included visual ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, a gloves, two sneakers, and a skiboard.", "boxes_value": [[24.0376587008, 65.5416869888, 118.8821411072, 326.5044555776], [24.0376587008, 65.5416869888, 118.8821411072, 326.5044555776], [40.87396236800001, 72.83746339839999, 142, 290.5869750784], [85.89458048, 174.8215542272, 105.26643776, 194.88526356480003], [28.4709208832, 303.3330719744, 69.3890276864, 322.6060932608], [69.0925196288, 305.112120064, 118.0163429888, 327.943237632], [0, 285.9238281216, 142, 298.6549682688]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046239.jpg", "text": "In the image , elaborate on the details found within the section . Please mention the objects and their locations.", "boxes_value": [[184.74707033520002, 242.1015625216, 214.8646850524, 422.535278336]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046239_crop.jpg", "text": "In the image , elaborate on the details found within the section . Please mention the objects and their locations.", "boxes_value": [[7.747070335200021, 46.10156252159999, 37.864685052400006, 226.53527833599998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046239.jpg", "text": "In the image , elaborate on the details found within the section . Please mention the objects and their locations. For your reference, objects involved in this region include a flower, a vase, a person, a leather shoes, a chair, and a desk.", "boxes_value": [[184.74707033520002, 242.1015625216, 214.8646850524, 422.535278336], [184.74707033520002, 242.1015625216, 214.8646850524, 264.6897583104], [196.79412840679998, 261.6779785216, 214.8646850524, 287.2780151296], [160.9376830954, 270.3536376832, 243.44982910610003, 425.5447997952], [192.05718996619999, 407.6909179904, 213.1323852541, 422.535278336], [159.6391601487, 315.5744018432, 272.1656494325, 444.12408448], [111.64605713750001, 324.2819213824, 211.72637941699998, 427.286682112]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046239_crop.jpg", "text": "In the image , elaborate on the details found within the section . Please mention the objects and their locations. For your reference, objects involved in this region include a flower, a vase, a person, a leather shoes, a chair, and a desk.", "boxes_value": [[7.747070335200021, 46.10156252159999, 37.864685052400006, 226.53527833599998], [7.747070335200021, 46.10156252159999, 37.864685052400006, 68.68975831040001], [19.794128406799985, 65.67797852159998, 37.864685052400006, 91.27801512960002], [0, 74.35363768320002, 45, 229.5447997952], [15.057189966199985, 211.6909179904, 36.1323852541, 226.53527833599998], [0, 119.57440184320001, 45, 248.12408448000002], [0, 128.28192138240001, 34.726379416999976, 231.286682112]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046240.jpg", "text": "What objects or scenery can be found in the area in the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[36.5964355144, 172.2801513472, 641.3022460914, 267.2236327936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046240_crop.jpg", "text": "What objects or scenery can be found in the area in the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[36.5964355144, 24.28015134719999, 641.3022460914, 119.22363279360002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046240.jpg", "text": "What objects or scenery can be found in the area in the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a chair, a pillow, a book, and two speakers.", "boxes_value": [[36.5964355144, 172.2801513472, 641.3022460914, 267.2236327936], [503.2990722455, 187.6182251008, 683.1907959134, 338.7732544], [570.6497802441, 233.6028442624, 641.3022460914, 267.2236327936], [36.5964355144, 232.0175171072, 70.1359253096, 243.4458008064], [36.6458739913, 172.2801513472, 84.4949951385, 221.242065408], [78.9311523544, 168.2742309376, 119.6585693241, 219.4616699392]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046240_crop.jpg", "text": "What objects or scenery can be found in the area in the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a chair, a pillow, a book, and two speakers.", "boxes_value": [[36.5964355144, 24.28015134719999, 641.3022460914, 119.22363279360002], [503.2990722455, 39.618225100800004, 683, 142], [570.6497802441, 85.6028442624, 641.3022460914, 119.22363279360002], [36.5964355144, 84.0175171072, 70.1359253096, 95.4458008064], [36.6458739913, 24.28015134719999, 84.4949951385, 73.242065408], [78.9311523544, 20.27423093760001, 119.6585693241, 71.4616699392]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046244.jpg", "text": "In the image , please describe the bounding box . Remember to mention the objects and their corresponding locations.", "boxes_value": [[350.4185790987, 103.8922729472, 474.92602537259995, 369.629333504]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046244_crop.jpg", "text": "In the image , please describe the bounding box . Remember to mention the objects and their corresponding locations.", "boxes_value": [[31.41857909869998, 66.8922729472, 155.92602537259995, 332.629333504]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046244.jpg", "text": "In the image , please describe the bounding box . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, two sandals, a belt, two hats, and a handbag.", "boxes_value": [[350.4185790987, 103.8922729472, 474.92602537259995, 369.629333504], [397.4437255626, 102.7570190336, 489.7608642591, 381.0178832896], [271.0805663763, 103.4117431808, 467.5000000269, 382.3273925632], [350.4185790987, 346.3602905088, 376.90954591250005, 369.629333504], [375.477539085, 356.0258788864, 394.09277340949996, 382.1588134912], [355.2790527055, 219.4324340736, 395.5263671875, 230.9316406272], [355.0052490018, 103.8922729472, 382.6582031009, 121.4149170176], [407.0256347394, 103.8922729472, 437.4165038917, 130.1762695168], [439.3331298859, 205.195251456, 474.92602537259995, 235.5861206016]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6, 7], [8]]}, {"image_path": "objects365_v1_00046244_crop.jpg", "text": "In the image , please describe the bounding box . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, two sandals, a belt, two hats, and a handbag.", "boxes_value": [[31.41857909869998, 66.8922729472, 155.92602537259995, 332.629333504], [78.44372556259998, 65.7570190336, 170.7608642591, 344.0178832896], [0, 66.4117431808, 148.5000000269, 345.3273925632], [31.41857909869998, 309.3602905088, 57.90954591250005, 332.629333504], [56.47753908499999, 319.0258788864, 75.09277340949996, 345.1588134912], [36.27905270550002, 182.4324340736, 76.5263671875, 193.9316406272], [36.005249001799996, 66.8922729472, 63.65820310089998, 84.4149170176], [88.0256347394, 66.8922729472, 118.41650389170002, 93.1762695168], [120.33312988590001, 168.195251456, 155.92602537259995, 198.5861206016]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6, 7], [8]]}, {"image_path": "objects365_v1_00046247.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Include the coordinates for each mentioned object.", "boxes_value": [[254.74621581660003, 155.5667724288, 394.6307373283, 291.317138688]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046247_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Include the coordinates for each mentioned object.", "boxes_value": [[35.74621581660003, 34.56677242879999, 175.63073732829997, 170.317138688]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046247.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, two boots, a ladder, a bottle, and a cup.", "boxes_value": [[254.74621581660003, 155.5667724288, 394.6307373283, 291.317138688], [253.3148803461, 47.0385742336, 355.6860351681, 286.2944335872], [254.74621581660003, 233.1279297024, 280.11376949749996, 281.9364623872], [328.60131832919996, 254.9633178624, 344.97790526340003, 285.4686889472], [342.6900635142, 210.6934814208, 388.5332031375, 281.750366208], [311.2664794953, 238.9307861504, 324.1143798916, 291.317138688], [381.3414307013, 155.5667724288, 394.6307373283, 189.9184570368]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046247_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, two boots, a ladder, a bottle, and a cup.", "boxes_value": [[35.74621581660003, 34.56677242879999, 175.63073732829997, 170.317138688], [34.31488034610001, 0, 136.68603516809998, 165.2944335872], [35.74621581660003, 112.1279297024, 61.11376949749996, 160.93646238719998], [109.60131832919996, 133.9633178624, 125.97790526340003, 164.4686889472], [123.6900635142, 89.6934814208, 169.53320313749998, 160.750366208], [92.26647949530002, 117.9307861504, 105.11437989159998, 170.317138688], [162.3414307013, 34.56677242879999, 175.63073732829997, 68.91845703679999]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046249.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each mentioned object.", "boxes_value": [[174.5989380096, 99.5003662336, 473.2280273408, 256.414550784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046249_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each mentioned object.", "boxes_value": [[75.5989380096, 39.500366233600005, 374.2280273408, 196.41455078400003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046249.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a lamp, a picture, a storage box, two books, and a laptop.", "boxes_value": [[174.5989380096, 99.5003662336, 473.2280273408, 256.414550784], [174.5989380096, 99.5003662336, 254.3911132672, 243.7132568576], [242.3306274304, 33.5075073024, 378.4850463744, 244.8710327296], [334.2543334912, 219.5477905408, 473.2280273408, 256.3959961088], [261.098266624, 239.8660278272, 331.8572998144, 256.414550784], [380.9320678912, 215.8992920064, 439.1370849792, 224.4588622848], [189.2253417984, 190.606750464, 315.9522094592, 250.7482910208]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046249_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a lamp, a picture, a storage box, two books, and a laptop.", "boxes_value": [[75.5989380096, 39.500366233600005, 374.2280273408, 196.41455078400003], [75.5989380096, 39.500366233600005, 155.3911132672, 183.7132568576], [143.3306274304, 0, 279.4850463744, 184.8710327296], [235.2543334912, 159.5477905408, 374.2280273408, 196.39599610879998], [162.09826662400002, 179.8660278272, 232.8572998144, 196.41455078400003], [281.9320678912, 155.8992920064, 340.1370849792, 164.4588622848], [90.2253417984, 130.606750464, 216.9522094592, 190.7482910208]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046250.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object.", "boxes_value": [[164.7845459078, 240.3950805504, 401.8493652229, 482.051025408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046250_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object.", "boxes_value": [[59.78454590780001, 61.395080550399996, 296.8493652229, 303.051025408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046250.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three chairs, two desks, a laptop, and two converters.", "boxes_value": [[164.7845459078, 240.3950805504, 401.8493652229, 482.051025408], [59.675964378100005, 251.8341675008, 261.5304565184, 446.7580566528], [303.6976318192, 238.9630127104, 439.74658202349997, 466.6658935296], [164.7845459078, 240.3950805504, 310.8581543017, 456.6412353536], [146.16735840820002, 277.6295165952, 531.400512694, 511.94750976], [0.09381100619999999, 370.7156371968, 379.5986328176, 513.0245361152], [174.351135258, 264.9904174592, 269.7798462186, 303.6528320512], [322.4910888467, 438.5103149568, 342.27600096820004, 455.9675903488], [372.53344723159995, 466.90448, 401.8493652229, 482.051025408]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6], [7, 8]]}, {"image_path": "objects365_v1_00046250_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three chairs, two desks, a laptop, and two converters.", "boxes_value": [[59.78454590780001, 61.395080550399996, 296.8493652229, 303.051025408], [0, 72.83416750079999, 156.5304565184, 267.7580566528], [198.6976318192, 59.963012710399994, 334.74658202349997, 287.6658935296], [59.78454590780001, 61.395080550399996, 205.85815430169998, 277.6412353536], [41.167358408200016, 98.62951659520002, 356, 332.94750976], [0, 191.7156371968, 274.5986328176, 333], [69.351135258, 85.99041745919999, 164.77984621860003, 124.6528320512], [217.4910888467, 259.5103149568, 237.27600096820004, 276.9675903488], [267.53344723159995, 287.90448, 296.8493652229, 303.051025408]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6], [7, 8]]}, {"image_path": "objects365_v1_00046251.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Give coordinates for the items you reference.", "boxes_value": [[188.3298950144, 275.5160522392, 305.901855488, 393.08966062999997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046251_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Give coordinates for the items you reference.", "boxes_value": [[30.329895014399995, 29.516052239200008, 147.90185548800002, 147.08966062999997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046251.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Give coordinates for the items you reference. For your reference, objects involved in this region include two chairs, a cabinet, and two books.", "boxes_value": [[188.3298950144, 275.5160522392, 305.901855488, 393.08966062999997], [188.3298950144, 275.5160522392, 305.901855488, 360.25378415319994], [191.5059204096, 357.07617184519995, 304.842651392, 393.08966062999997], [84.9074706944, 308.4986572316, 414.0875854336, 355.784118675], [211.0808105472, 355.2305297852, 280.5400390656, 369.8390502674], [211.8679199232, 366.27746583540005, 275.0474853376, 374.14758303260004]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046251_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Give coordinates for the items you reference. For your reference, objects involved in this region include two chairs, a cabinet, and two books.", "boxes_value": [[30.329895014399995, 29.516052239200008, 147.90185548800002, 147.08966062999997], [30.329895014399995, 29.516052239200008, 147.90185548800002, 114.25378415319994], [33.50592040960001, 111.07617184519995, 146.842651392, 147.08966062999997], [0, 62.49865723160002, 177, 109.784118675], [53.0808105472, 109.23052978520002, 122.54003906560001, 123.83905026740001], [53.86791992319999, 120.27746583540005, 117.0474853376, 128.14758303260004]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046252.jpg", "text": "Please provide information about the area within the bounding box in the picture . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[368.0241699328, 331.6351318644, 462.0724487168, 478.9051513673]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046252_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[24.02416993280002, 37.63513186440002, 118.07244871680001, 184.9051513673]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046252.jpg", "text": "Please provide information about the area within the bounding box in the picture . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, two boots, and two gloves.", "boxes_value": [[368.0241699328, 331.6351318644, 462.0724487168, 478.9051513673], [365.601196288, 203.2261963256, 462.97973632, 480.52331542599995], [397.8054199296, 431.36975096950005, 430.1639404544, 474.32348635], [368.0241699328, 432.5151367152, 393.7963867136, 478.9051513673], [375.5512695296, 342.3122558891, 400.2189941248, 372.502563517], [446.9772338688, 331.6351318644, 462.0724487168, 372.502563517]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046252_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, two boots, and two gloves.", "boxes_value": [[24.02416993280002, 37.63513186440002, 118.07244871680001, 184.9051513673], [21.601196287999983, 0, 118.97973631999997, 186.52331542599995], [53.80541992960002, 137.36975096950005, 86.1639404544, 180.32348635], [24.02416993280002, 138.51513671520001, 49.796386713599986, 184.9051513673], [31.551269529600006, 48.312255889100015, 56.21899412480002, 78.502563517], [102.9772338688, 37.63513186440002, 118.07244871680001, 78.502563517]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046253.jpg", "text": "Describe the visual elements within the selected area of the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[231.8340454424, 295.763061504, 400.8251953168, 365.8669433344]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046253_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[42.8340454424, 17.763061504000007, 211.82519531679998, 87.86694333439999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046253.jpg", "text": "Describe the visual elements within the selected area of the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two chairs, a cabinet, a tea pot, an oven, and an induction cooker.", "boxes_value": [[231.8340454424, 295.763061504, 400.8251953168, 365.8669433344], [287.5683593802, 320.622619648, 338.6506957745, 358.5694580224], [340.4020996292, 327.0443725824, 400.8251953168, 365.8669433344], [385.0166015545, 323.1395263488, 404.996215803, 365.7432861184], [329.95410157929996, 295.763061504, 349.5871582128, 316.7434692608], [283.4194336025, 341.2514038272, 332.5195312661, 361.526550272], [231.8340454424, 321.5791015424, 330.539855971, 341.3038940672]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046253_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two chairs, a cabinet, a tea pot, an oven, and an induction cooker.", "boxes_value": [[42.8340454424, 17.763061504000007, 211.82519531679998, 87.86694333439999], [98.56835938019998, 42.62261964800001, 149.65069577449998, 80.5694580224], [151.4020996292, 49.0443725824, 211.82519531679998, 87.86694333439999], [196.01660155450003, 45.13952634880002, 215.99621580299998, 87.74328611840002], [140.95410157929996, 17.763061504000007, 160.58715821279998, 38.7434692608], [94.41943360250002, 63.25140382720002, 143.5195312661, 83.52655027200001], [42.8340454424, 43.5791015424, 141.539855971, 63.30389406720002]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046254.jpg", "text": "What objects or scenery can be found in the area in the image ? Provide the coordinates for each element you describe.", "boxes_value": [[371.711303733, 215.510498048, 619.434326203, 511.8492431872]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046254_crop.jpg", "text": "What objects or scenery can be found in the area in the image ? Provide the coordinates for each element you describe.", "boxes_value": [[62.71130373300002, 74.51049804799999, 310.43432620299996, 370.8492431872]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046254.jpg", "text": "What objects or scenery can be found in the area in the image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include three people, a hat, and a bottle.", "boxes_value": [[371.711303733, 215.510498048, 619.434326203, 511.8492431872], [334.913085969, 254.6312866304, 402.73083496799995, 511.2008056832], [370.9560547, 222.3822631936, 492.364257847, 511.2008056832], [507.356445288, 215.510498048, 619.434326203, 511.8492431872], [514.381243376, 218.006716416, 572.4770954210001, 245.2494041088], [371.711303733, 384.7092284928, 395.28356937300003, 426.1639404544]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046254_crop.jpg", "text": "What objects or scenery can be found in the area in the image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include three people, a hat, and a bottle.", "boxes_value": [[62.71130373300002, 74.51049804799999, 310.43432620299996, 370.8492431872], [25.913085969000008, 113.63128663040001, 93.73083496799995, 370.2008056832], [61.95605469999998, 81.38226319360001, 183.36425784699998, 370.2008056832], [198.35644528799997, 74.51049804799999, 310.43432620299996, 370.8492431872], [205.38124337600004, 77.00671641599999, 263.47709542100006, 104.24940410880001], [62.71130373300002, 243.70922849279998, 86.28356937300003, 285.1639404544]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046256.jpg", "text": "In the image , elaborate on the details found within the section . Include the coordinates for each object you identify.", "boxes_value": [[297.677001984, 251.5122680832, 607.1398925568, 391.6395263488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046256_crop.jpg", "text": "In the image , elaborate on the details found within the section . Include the coordinates for each object you identify.", "boxes_value": [[77.67700198400001, 35.51226808320001, 387.13989255679996, 175.63952634880002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046256.jpg", "text": "In the image , elaborate on the details found within the section . Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, four people, and a pickup truck.", "boxes_value": [[297.677001984, 251.5122680832, 607.1398925568, 391.6395263488], [297.677001984, 302.2775878656, 314.7471923712, 330.5090942464], [349.5679931904, 241.1489868288, 404.95275878399997, 411.2165527552], [426.31262208, 262.1320800768, 464.7813720576, 390.361328128], [549.9394531584, 251.5122680832, 598.3662109439999, 391.6395263488], [529.3885498367999, 297.283752448, 607.1398925568, 323.7393798656], [417.0766601472, 274.7808838144, 432.6243896832, 336.2380370944]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 6], [5]]}, {"image_path": "objects365_v1_00046256_crop.jpg", "text": "In the image , elaborate on the details found within the section . Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, four people, and a pickup truck.", "boxes_value": [[77.67700198400001, 35.51226808320001, 387.13989255679996, 175.63952634880002], [77.67700198400001, 86.27758786560003, 94.74719237120001, 114.50909424640002], [129.5679931904, 25.14898682879999, 184.95275878399997, 195.21655275519998], [206.31262207999998, 46.13208007679998, 244.78137205759998, 174.36132812800003], [329.93945315840006, 35.51226808320001, 378.36621094399993, 175.63952634880002], [309.38854983679994, 81.28375244799997, 387.13989255679996, 107.73937986559997], [197.07666014720002, 58.7808838144, 212.62438968319998, 120.2380370944]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 6], [5]]}, {"image_path": "objects365_v1_00046257.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Include the coordinates for each mentioned object.", "boxes_value": [[118.659973152, 429.84143063999994, 315.831054672, 521.175292992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046257_crop.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Include the coordinates for each mentioned object.", "boxes_value": [[49.659973152000006, 22.841430639999942, 246.831054672, 114.17529299199998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046257.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a wine glass, a bottle, and three chairs.", "boxes_value": [[118.659973152, 429.84143063999994, 315.831054672, 521.175292992], [118.659973152, 461.891723616, 132.025817856, 493.764160176], [186.295166016, 482.08642581600003, 206.165344224, 521.175292992], [274.46160888, 466.50976560000004, 313.95062256, 513.5205078], [244.374755856, 455.22717285600004, 288.564819312, 500.357543976], [261.298584, 429.84143063999994, 315.831054672, 489.07495116]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046257_crop.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a wine glass, a bottle, and three chairs.", "boxes_value": [[49.659973152000006, 22.841430639999942, 246.831054672, 114.17529299199998], [49.659973152000006, 54.89172361599998, 63.025817856, 86.76416017600002], [117.295166016, 75.08642581600003, 137.165344224, 114.17529299199998], [205.46160888000003, 59.50976560000004, 244.95062256, 106.52050780000002], [175.374755856, 48.22717285600004, 219.564819312, 93.35754397599999], [192.298584, 22.841430639999942, 246.831054672, 82.07495116000001]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046258.jpg", "text": "What is taking place within the specified area in this capture ? Please mention the objects and their locations.", "boxes_value": [[561.2128906388, 154.68371584, 709.2907714968, 334.9915161088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046258_crop.jpg", "text": "What is taking place within the specified area in this capture ? Please mention the objects and their locations.", "boxes_value": [[37.21289063879999, 45.68371583999999, 185.29077149679995, 225.9915161088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046258.jpg", "text": "What is taking place within the specified area in this capture ? Please mention the objects and their locations. For your reference, objects involved in this region include two lamps, two nightstands, and two pictures.", "boxes_value": [[561.2128906388, 154.68371584, 709.2907714968, 334.9915161088], [624.6748047056, 197.4905395712, 686.0214843976, 280.4141845504], [561.2128906388, 181.83654784, 709.2907714968, 334.9915161088], [608.0368652084001, 154.68371584, 650.9320068684, 194.3817138688], [624.6748047056, 197.4905395712, 686.0214843976, 280.4141845504], [561.2128906388, 181.83654784, 709.2907714968, 334.9915161088], [608.0368652084001, 154.68371584, 650.9320068684, 194.3817138688]], "boxes_seq": [[0], [0], [1, 4], [2, 5], [3, 6]]}, {"image_path": "objects365_v1_00046258_crop.jpg", "text": "What is taking place within the specified area in this capture ? Please mention the objects and their locations. For your reference, objects involved in this region include two lamps, two nightstands, and two pictures.", "boxes_value": [[37.21289063879999, 45.68371583999999, 185.29077149679995, 225.9915161088], [100.67480470559997, 88.4905395712, 162.02148439760003, 171.41418455040002], [37.21289063879999, 72.83654784000001, 185.29077149679995, 225.9915161088], [84.03686520840006, 45.68371583999999, 126.93200686839998, 85.3817138688], [100.67480470559997, 88.4905395712, 162.02148439760003, 171.41418455040002], [37.21289063879999, 72.83654784000001, 185.29077149679995, 225.9915161088], [84.03686520840006, 45.68371583999999, 126.93200686839998, 85.3817138688]], "boxes_seq": [[0], [0], [1, 4], [2, 5], [3, 6]]}, {"image_path": "objects365_v1_00046262.jpg", "text": "Tell me what you see in the area within the context of the image . Please mention the objects and their locations.", "boxes_value": [[254.28204341929998, 245.8438110208, 552.0001220539, 355.4005127168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046262_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Please mention the objects and their locations.", "boxes_value": [[75.28204341929998, 27.84381102079999, 373.0001220539, 137.4005127168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046262.jpg", "text": "Tell me what you see in the area within the context of the image . Please mention the objects and their locations. For your reference, objects involved in this region include two people, two horses, and a cow.", "boxes_value": [[254.28204341929998, 245.8438110208, 552.0001220539, 355.4005127168], [447.847412077, 245.8438110208, 476.29785153079996, 326.5274658304], [365.0203857469, 251.8609618944, 405.7908935509, 329.210571264], [411.3477783311, 263.812927232, 552.0001220539, 355.4005127168], [338.39306640620003, 270.8222045696, 490.4938964952, 356.335083008], [254.28204341929998, 296.7564697088, 360.8227538994, 352.1295776256]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046262_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Please mention the objects and their locations. For your reference, objects involved in this region include two people, two horses, and a cow.", "boxes_value": [[75.28204341929998, 27.84381102079999, 373.0001220539, 137.4005127168], [268.847412077, 27.84381102079999, 297.29785153079996, 108.52746583039999], [186.0203857469, 33.86096189439999, 226.79089355090002, 111.21057126400001], [232.34777833110002, 45.81292723199999, 373.0001220539, 137.4005127168], [159.39306640620003, 52.82220456959999, 311.4938964952, 138.33508300800003], [75.28204341929998, 78.75646970880001, 181.82275389940003, 134.1295776256]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046266.jpg", "text": "Tell me what you see in the area within the context of the image . Specify the location of each mentioned object.", "boxes_value": [[0.0884399616, 522.6975097732, 289.833618176, 682.9216308725]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046266_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Specify the location of each mentioned object.", "boxes_value": [[0.0884399616, 40.697509773199954, 289.833618176, 200.92163087250003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046266.jpg", "text": "Tell me what you see in the area within the context of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include two people, a gloves, and two sneakers.", "boxes_value": [[0.0884399616, 522.6975097732, 289.833618176, 682.9216308725], [86.5688476672, 358.6146240401, 238.7149047808, 683.3405761742], [0.0884399616, 554.2875976336, 76.347045888, 682.9216308725], [265.7342529536, 522.6975097732, 289.833618176, 560.39135745], [85.3628539904, 657.1042480158001, 143.587341312, 681.6197509808001], [164.0585327104, 653.1662597531999, 194.8280639488, 682.4348144265]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046266_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include two people, a gloves, and two sneakers.", "boxes_value": [[0.0884399616, 40.697509773199954, 289.833618176, 200.92163087250003], [86.5688476672, 0, 238.7149047808, 201], [0.0884399616, 72.28759763359994, 76.347045888, 200.92163087250003], [265.7342529536, 40.697509773199954, 289.833618176, 78.39135744999999], [85.3628539904, 175.10424801580007, 143.587341312, 199.61975098080006], [164.0585327104, 171.16625975319994, 194.8280639488, 200.4348144265]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046269.jpg", "text": "Help me understand the details within the area in photograph . Provide the coordinates for each element you describe.", "boxes_value": [[394.1579589632, 335.8374633744, 512.1870117376, 504.7399902423]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046269_crop.jpg", "text": "Help me understand the details within the area in photograph . Provide the coordinates for each element you describe.", "boxes_value": [[30.157958963199974, 42.83746337439999, 148, 211.7399902423]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046269.jpg", "text": "Help me understand the details within the area in photograph . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a chair, a stool, a person, two bracelets, and a handbag.", "boxes_value": [[394.1579589632, 335.8374633744, 512.1870117376, 504.7399902423], [488.4086914048, 335.8374633744, 512.1870117376, 402.5996093732], [465.0012207104, 358.6196289275, 492.3652343808, 400.57238771609997], [141.1988525568, 177.5971679563, 470.3128051712, 683.5999756128], [394.1579589632, 475.8698730444, 417.3571167232, 504.7399902423], [404.4686889472, 457.5683594024, 430.5032958976, 480.2519530978], [419.6940166144, 472.4239108103, 498.8602727936, 548.263853726]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046269_crop.jpg", "text": "Help me understand the details within the area in photograph . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a chair, a stool, a person, two bracelets, and a handbag.", "boxes_value": [[30.157958963199974, 42.83746337439999, 148, 211.7399902423], [124.40869140479998, 42.83746337439999, 148, 109.5996093732], [101.00122071039999, 65.6196289275, 128.36523438080002, 107.57238771609997], [0, 0, 106.31280517120001, 253], [30.157958963199974, 182.86987304439998, 53.35711672320002, 211.7399902423], [40.46868894720001, 164.5683594024, 66.50329589760003, 187.25195309780003], [55.69401661440003, 179.42391081030001, 134.86027279360002, 253]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046271.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Include the coordinates for each mentioned object.", "boxes_value": [[261.1212157952, 684.739990272, 380.2463378944, 727.8623046911999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046271_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Include the coordinates for each mentioned object.", "boxes_value": [[30.121215795199987, 11.739990272, 149.24633789440003, 54.86230469119994]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046271.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, a high heels, and two leather shoes.", "boxes_value": [[261.1212157952, 684.739990272, 380.2463378944, 727.8623046911999], [54.2141723648, 114.82373045759999, 478.8335571456, 729.0905761536001], [150.8270263808, 70.6919555328, 500.3031005696, 719.5485839616], [261.1212157952, 687.435180672, 313.9459228672, 727.8623046911999], [296.6970825216, 684.739990272, 336.585144064, 706.3011474432], [342.5144043008, 688.5131835648, 380.2463378944, 716.5426025472]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046271_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, a high heels, and two leather shoes.", "boxes_value": [[30.121215795199987, 11.739990272, 149.24633789440003, 54.86230469119994], [0, 0, 179, 56.09057615360007], [0, 0, 179, 46.5485839616], [30.121215795199987, 14.435180672000001, 82.94592286720001, 54.86230469119994], [65.69708252160001, 11.739990272, 105.58514406400002, 33.301147443199966], [111.51440430079998, 15.513183564799988, 149.24633789440003, 43.54260254719998]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046272.jpg", "text": "Please provide details for the area within the bounding box in . Remember to mention the objects and their corresponding locations.", "boxes_value": [[415.2453613056, 193.54614260169998, 511.3173217792, 516.0992431357]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046272_crop.jpg", "text": "Please provide details for the area within the bounding box in . Remember to mention the objects and their corresponding locations.", "boxes_value": [[24.2453613056, 81.54614260169998, 120.31732177919997, 404.0992431357]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046272.jpg", "text": "Please provide details for the area within the bounding box in . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two flowers, two vases, and a lamp.", "boxes_value": [[415.2453613056, 193.54614260169998, 511.3173217792, 516.0992431357], [412.6359863296, 371.8341064566, 512.407836928, 475.0238037209], [479.2584228352, 468.01086428819997, 511.3173217792, 516.0992431357], [413.7636108288, 325.02209471879996, 452.6190795776, 348.2365722934], [415.2453613056, 347.7426758011, 448.3383789056, 359.7614746111], [488.1268310528, 193.54614260169998, 510.2781371904, 212.53295897819999]], "boxes_seq": [[0], [0], [1, 3], [2, 4], [5]]}, {"image_path": "objects365_v1_00046272_crop.jpg", "text": "Please provide details for the area within the bounding box in . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two flowers, two vases, and a lamp.", "boxes_value": [[24.2453613056, 81.54614260169998, 120.31732177919997, 404.0992431357], [21.635986329599973, 259.8341064566, 121, 363.0238037209], [88.25842283520001, 356.01086428819997, 120.31732177919997, 404.0992431357], [22.763610828799983, 213.02209471879996, 61.61907957760002, 236.2365722934], [24.2453613056, 235.7426758011, 57.338378905599996, 247.76147461110003], [97.12683105280001, 81.54614260169998, 119.2781371904, 100.53295897819999]], "boxes_seq": [[0], [0], [1, 3], [2, 4], [5]]}, {"image_path": "objects365_v1_00046274.jpg", "text": "Could you please share some information on the region in this photograph ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[312.39538574700003, 1.5343017472, 718.6175537385999, 278.3759765504]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046274_crop.jpg", "text": "Could you please share some information on the region in this photograph ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[102.39538574700003, 1.5343017472, 508, 278.3759765504]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046274.jpg", "text": "Could you please share some information on the region in this photograph ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a chair, a lamp, a flower, a vase, a desk, a cabinet, a clock, and two trophies.", "boxes_value": [[312.39538574700003, 1.5343017472, 718.6175537385999, 278.3759765504], [295.9704589602, 192.8217163264, 415.8725585688, 319.2937622016], [312.39538574700003, 125.479431168, 390.68762209119996, 192.8217163264], [333.2003173792, 150.6643676672, 402.1850586216, 180.7767333888], [345.52148436379997, 175.794555648, 389.6341552592, 195.97375488], [299.9514770214, 194.7070922752, 505.93774412659997, 294.423156736], [569.4736328392, 139.3831787008, 718.6175537385999, 278.3759765504], [553.6899414414, 1.5343017472, 649.608154294, 99.0512695296], [676.9999999762, 66.9741211136, 718.119140612, 139.950134272], [632.0756835718, 144.6039428608, 667.8742675796, 192.9319457792]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6], [7], [8, 9]]}, {"image_path": "objects365_v1_00046274_crop.jpg", "text": "Could you please share some information on the region in this photograph ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a chair, a lamp, a flower, a vase, a desk, a cabinet, a clock, and two trophies.", "boxes_value": [[102.39538574700003, 1.5343017472, 508, 278.3759765504], [85.9704589602, 192.8217163264, 205.8725585688, 319.2937622016], [102.39538574700003, 125.479431168, 180.68762209119996, 192.8217163264], [123.20031737919999, 150.6643676672, 192.18505862159998, 180.7767333888], [135.52148436379997, 175.794555648, 179.63415525919999, 195.97375488], [89.95147702140002, 194.7070922752, 295.93774412659997, 294.423156736], [359.47363283920004, 139.3831787008, 508, 278.3759765504], [343.68994144140004, 1.5343017472, 439.608154294, 99.0512695296], [466.9999999762, 66.9741211136, 508, 139.950134272], [422.0756835718, 144.6039428608, 457.8742675796, 192.9319457792]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6], [7], [8, 9]]}, {"image_path": "objects365_v1_00046275.jpg", "text": "Can you give me a visual rundown of the area in ? Please mention the objects and their locations.", "boxes_value": [[76.00665283410001, 199.14916992, 391.7171630827, 512.1284179456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046275_crop.jpg", "text": "Can you give me a visual rundown of the area in ? Please mention the objects and their locations.", "boxes_value": [[76.00665283410001, 79.14916991999999, 391.7171630827, 392]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046275.jpg", "text": "Can you give me a visual rundown of the area in ? Please mention the objects and their locations. For your reference, objects involved in this region include a bench, a desk, two lamps, a stool, five people, a slippers, a glasses, two hats, and two cups.", "boxes_value": [[76.00665283410001, 199.14916992, 391.7171630827, 512.1284179456], [190.6005249263, 433.6685791232, 375.4893799145, 513.1556396544], [0, 320.5680542208, 175.0238037219, 442.4727783424], [57.4696045248, 234.530334464, 137.50531007979998, 258.1650390528], [275.4733276618, 182.061096192, 381.8758544673, 223.759399424], [76.30065919970001, 388.2985229312, 139.9128417776, 508.0095825408], [296.0418701347, 150.9617309696, 408.39575193170003, 511.087646464], [76.00665283410001, 199.14916992, 391.7171630827, 512.1284179456], [164.5932617177, 174.5115966976, 295.128540064, 358.2651367424], [138.7923584216, 238.611694336, 203.89776614150003, 370.3383789056], [82.6356201167, 252.5788574208, 145.912292468, 334.7391967744], [77.14224722360001, 493.2100954112, 158.2446175104, 511.9967544832], [303.119479559, 227.2776397824, 328.10207393, 240.90450944], [298.9439994243, 202.2259355136, 359.03969100250004, 235.1161450496], [106.7854976093, 255.1367240704, 139.33460755739998, 269.843812352], [104.4130571538, 289.7241665024, 134.4829781176, 340.9862222848], [209.77403962600002, 241.785788416, 233.49405255929997, 285.5195622912]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6, 7, 8, 9, 10], [11], [12], [13, 14], [15, 16]]}, {"image_path": "objects365_v1_00046275_crop.jpg", "text": "Can you give me a visual rundown of the area in ? Please mention the objects and their locations. For your reference, objects involved in this region include a bench, a desk, two lamps, a stool, five people, a slippers, a glasses, two hats, and two cups.", "boxes_value": [[76.00665283410001, 79.14916991999999, 391.7171630827, 392], [190.6005249263, 313.6685791232, 375.4893799145, 392], [0, 200.56805422079998, 175.0238037219, 322.4727783424], [57.4696045248, 114.53033446399999, 137.50531007979998, 138.1650390528], [275.4733276618, 62.06109619200001, 381.8758544673, 103.75939942400001], [76.30065919970001, 268.2985229312, 139.9128417776, 388.0095825408], [296.0418701347, 30.961730969600012, 408.39575193170003, 391.087646464], [76.00665283410001, 79.14916991999999, 391.7171630827, 392], [164.5932617177, 54.5115966976, 295.128540064, 238.2651367424], [138.7923584216, 118.611694336, 203.89776614150003, 250.3383789056], [82.6356201167, 132.5788574208, 145.912292468, 214.7391967744], [77.14224722360001, 373.2100954112, 158.2446175104, 391.9967544832], [303.119479559, 107.2776397824, 328.10207393, 120.90450944], [298.9439994243, 82.2259355136, 359.03969100250004, 115.1161450496], [106.7854976093, 135.1367240704, 139.33460755739998, 149.843812352], [104.4130571538, 169.7241665024, 134.4829781176, 220.98622228480002], [209.77403962600002, 121.785788416, 233.49405255929997, 165.51956229119997]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6, 7, 8, 9, 10], [11], [12], [13, 14], [15, 16]]}, {"image_path": "objects365_v1_00046276.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Include the coordinates for each object you identify.", "boxes_value": [[285.7173461708, 74.6103515574, 569.4797363004, 216.0727538919]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046276_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Include the coordinates for each object you identify.", "boxes_value": [[71.7173461708, 35.610351557399994, 355.47973630039996, 177.0727538919]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046276.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two lamps, and three pictures.", "boxes_value": [[285.7173461708, 74.6103515574, 569.4797363004, 216.0727538919], [285.7173461708, 75.62969968440001, 356.82714843720004, 110.5715332131], [309.62493898360003, 169.4210205096, 326.7893676536, 230.1095581152], [331.8070068348, 173.9945068248, 341.07189943439994, 216.0727538919], [382.76416018640003, 169.7481079287, 407.4705810664, 190.5941772495], [555.5316162080001, 74.6103515574, 569.4797363004, 136.9663085955]], "boxes_seq": [[0], [0], [1, 4], [2, 3, 5]]}, {"image_path": "objects365_v1_00046276_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two lamps, and three pictures.", "boxes_value": [[71.7173461708, 35.610351557399994, 355.47973630039996, 177.0727538919], [71.7173461708, 36.62969968440001, 142.82714843720004, 71.5715332131], [95.62493898360003, 130.4210205096, 112.78936765359998, 191.1095581152], [117.8070068348, 134.9945068248, 127.07189943439994, 177.0727538919], [168.76416018640003, 130.7481079287, 193.47058106639997, 151.5941772495], [341.53161620800006, 35.610351557399994, 355.47973630039996, 97.96630859550001]], "boxes_seq": [[0], [0], [1, 4], [2, 3, 5]]}, {"image_path": "objects365_v1_00046277.jpg", "text": "What can you tell me about the area within the image ? Provide the coordinates for each element you describe.", "boxes_value": [[242.45147704320001, 271.9977417216, 328.106445312, 397.1872558592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046277_crop.jpg", "text": "What can you tell me about the area within the image ? Provide the coordinates for each element you describe.", "boxes_value": [[21.451477043200015, 31.997741721599994, 107.106445312, 157.18725585919998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046277.jpg", "text": "What can you tell me about the area within the image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a lamp, a mirror, a picture, and two books.", "boxes_value": [[242.45147704320001, 271.9977417216, 328.106445312, 397.1872558592], [242.45147704320001, 278.1344604672, 264.26977536000004, 296.4876708864], [213.51336668160002, 7.9442748928, 296.0506591488, 356.918640128], [259.859741184, 271.9977417216, 272.3034667776, 328.6857299968], [295.5526123008, 370.37823488, 313.72814937600003, 397.1872558592], [312.9167480832, 356.3569946112, 328.106445312, 376.2203979264]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046277_crop.jpg", "text": "What can you tell me about the area within the image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a lamp, a mirror, a picture, and two books.", "boxes_value": [[21.451477043200015, 31.997741721599994, 107.106445312, 157.18725585919998], [21.451477043200015, 38.13446046719997, 43.26977536000004, 56.4876708864], [0, 0, 75.05065914879998, 116.91864012799999], [38.85974118399997, 31.997741721599994, 51.30346677760002, 88.68572999679998], [74.55261230079998, 130.37823487999998, 92.72814937600003, 157.18725585919998], [91.91674808319999, 116.35699461119998, 107.106445312, 136.22039792639998]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046278.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Specify the location of each mentioned object.", "boxes_value": [[21.6123046912, 228.9573364224, 290.4127197184, 692.5236816383999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046278_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Specify the location of each mentioned object.", "boxes_value": [[21.6123046912, 115.9573364224, 290.4127197184, 579.5236816383999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046278.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Specify the location of each mentioned object. For your reference, objects involved in this region include two people, two leather shoes, and a belt.", "boxes_value": [[21.6123046912, 228.9573364224, 290.4127197184, 692.5236816383999], [21.6123046912, 228.9573364224, 290.4127197184, 692.5236816383999], [165.4609110016, 623.1569799936001, 226.4319297024, 661.9200117504], [27.1368781312, 651.2774406912, 62.7850233856, 691.0787678976], [122.8055634944, 400.4531622144, 171.6183870464, 421.4931724032], [21.6123046912, 228.9573364224, 290.4127197184, 692.5236816383999]], "boxes_seq": [[0], [0], [1, 5], [2, 3], [4]]}, {"image_path": "objects365_v1_00046278_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Specify the location of each mentioned object. For your reference, objects involved in this region include two people, two leather shoes, and a belt.", "boxes_value": [[21.6123046912, 115.9573364224, 290.4127197184, 579.5236816383999], [21.6123046912, 115.9573364224, 290.4127197184, 579.5236816383999], [165.4609110016, 510.1569799936001, 226.4319297024, 548.9200117504], [27.1368781312, 538.2774406912, 62.7850233856, 578.0787678976], [122.8055634944, 287.4531622144, 171.6183870464, 308.4931724032], [21.6123046912, 115.9573364224, 290.4127197184, 579.5236816383999]], "boxes_seq": [[0], [0], [1, 5], [2, 3], [4]]}, {"image_path": "objects365_v1_00046280.jpg", "text": "In the photo , can you delve into the details of the region ? Include the coordinates for each mentioned object.", "boxes_value": [[165.2921142843, 78.0321655296, 450.39184570429995, 154.5107421696]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046280_crop.jpg", "text": "In the photo , can you delve into the details of the region ? Include the coordinates for each mentioned object.", "boxes_value": [[71.2921142843, 20.032165529599993, 356.39184570429995, 96.51074216960001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046280.jpg", "text": "In the photo , can you delve into the details of the region ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, a suv, and three pillows.", "boxes_value": [[165.2921142843, 78.0321655296, 450.39184570429995, 154.5107421696], [323.9144897376, 77.5855102464, 512.9300537429, 382.0], [165.2921142843, 130.191162112, 182.7318114914, 154.5107421696], [223.8167724726, 89.726318336, 259.38665768929997, 110.191162112], [400.204223601, 78.0321655296, 450.39184570429995, 116.525512704], [357.8127441336, 96.060668928, 385.5865478429, 121.3981323264]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046280_crop.jpg", "text": "In the photo , can you delve into the details of the region ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, a suv, and three pillows.", "boxes_value": [[71.2921142843, 20.032165529599993, 356.39184570429995, 96.51074216960001], [229.91448973759998, 19.585510246400005, 418.9300537429, 115], [71.2921142843, 72.191162112, 88.73181149140001, 96.51074216960001], [129.8167724726, 31.726318336000006, 165.38665768929997, 52.191162112], [306.204223601, 20.032165529599993, 356.39184570429995, 58.52551270399999], [263.8127441336, 38.060668928, 291.5865478429, 63.3981323264]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046282.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each object you identify.", "boxes_value": [[251.453247045, 72.0983269376, 480.374017266, 153.1765136896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046282_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each object you identify.", "boxes_value": [[57.45324704500001, 21.098326937600007, 286.374017266, 102.1765136896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046282.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, two helmets, and two head phones.", "boxes_value": [[251.453247045, 72.0983269376, 480.374017266, 153.1765136896], [306.7539062445, 70.9533691392, 605.1994628680001, 511.3264160256], [420.619455542, 72.0983269376, 480.374017266, 139.8557317632], [242.42281613550003, 97.1739019776, 318.1830639845, 172.4006269952], [251.453247045, 106.9963989504, 287.9677734505, 153.1765136896], [431.874389661, 84.7381591552, 455.72460934000003, 125.3582153216]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046282_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, two helmets, and two head phones.", "boxes_value": [[57.45324704500001, 21.098326937600007, 286.374017266, 102.1765136896], [112.7539062445, 19.953369139200007, 343, 122], [226.61945554200003, 21.098326937600007, 286.374017266, 88.8557317632], [48.42281613550003, 46.173901977599996, 124.1830639845, 121.40062699520001], [57.45324704500001, 55.99639895040001, 93.96777345049998, 102.1765136896], [237.874389661, 33.738159155199995, 261.72460934000003, 74.3582153216]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046285.jpg", "text": "Can you divulge the contents of the area within the given image ? Please point out the objects and their coordinates.", "boxes_value": [[429.2362047435, 161.9482661376, 505.5926740238, 210.6690160128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046285_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Please point out the objects and their coordinates.", "boxes_value": [[19.236204743500025, 12.948266137600001, 95.59267402379999, 61.6690160128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046285.jpg", "text": "Can you divulge the contents of the area within the given image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include three people, and two ties.", "boxes_value": [[429.2362047435, 161.9482661376, 505.5926740238, 210.6690160128], [396.7532958829, 112.0362548736, 481.84240724510005, 422.3974609408], [460.08605955179996, 109.7625122304, 557.3995361461999, 440.9993896448], [467.4586181716, 140.8912964096, 590.6243896179001, 471.0679931392], [429.2362047435, 161.9482661376, 442.5422918389, 210.6690160128], [492.4912959318, 170.5460455424, 505.5926740238, 205.141872128]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046285_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include three people, and two ties.", "boxes_value": [[19.236204743500025, 12.948266137600001, 95.59267402379999, 61.6690160128], [0, 0, 71.84240724510005, 73], [50.08605955179996, 0, 114, 73], [57.458618171599994, 0, 114, 73], [19.236204743500025, 12.948266137600001, 32.542291838899985, 61.6690160128], [82.49129593179998, 21.54604554240001, 95.59267402379999, 56.14187212799999]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046286.jpg", "text": "Can you break down the region in the image for me? Specify the location of each mentioned object.", "boxes_value": [[69.56689453919999, 144.3798828032, 338.2769775156, 471.16784665599994]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046286_crop.jpg", "text": "Can you break down the region in the image for me? Specify the location of each mentioned object.", "boxes_value": [[67.56689453919999, 82.37988280319999, 336.2769775156, 409.16784665599994]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046286.jpg", "text": "Can you break down the region in the image for me? Specify the location of each mentioned object. For your reference, objects involved in this region include three people, two hats, a sneakers, and a belt.", "boxes_value": [[69.56689453919999, 144.3798828032, 338.2769775156, 471.16784665599994], [45.698730504, 161.7075195392, 127.13830570020001, 391.3757324288], [178.6637572947, 144.3798828032, 338.2769775156, 471.16784665599994], [178.7239989942, 202.8211059712, 287.4774170241, 378.8037719552], [69.56689453919999, 161.2269897216, 104.6230468989, 177.4404296704], [305.6727295182, 431.0073242112, 336.9013671798, 468.0308837888], [228.67114257959997, 293.8195190272, 271.6810302441, 305.5494994944], [220.789855986, 145.8757324288, 267.69665528760004, 171.6359863296]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 7], [5], [6]]}, {"image_path": "objects365_v1_00046286_crop.jpg", "text": "Can you break down the region in the image for me? Specify the location of each mentioned object. For your reference, objects involved in this region include three people, two hats, a sneakers, and a belt.", "boxes_value": [[67.56689453919999, 82.37988280319999, 336.2769775156, 409.16784665599994], [43.698730504, 99.7075195392, 125.13830570020001, 329.3757324288], [176.6637572947, 82.37988280319999, 336.2769775156, 409.16784665599994], [176.7239989942, 140.8211059712, 285.4774170241, 316.8037719552], [67.56689453919999, 99.2269897216, 102.6230468989, 115.4404296704], [303.6727295182, 369.0073242112, 334.9013671798, 406.0308837888], [226.67114257959997, 231.8195190272, 269.6810302441, 243.54949949439998], [218.789855986, 83.8757324288, 265.69665528760004, 109.6359863296]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 7], [5], [6]]}, {"image_path": "objects365_v1_00046292.jpg", "text": "Describe what can be found within the bounds of in the image . Please mention the objects and their locations.", "boxes_value": [[485.42675780400003, 79.246582016, 685.2912598004, 511.498535168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046292_crop.jpg", "text": "Describe what can be found within the bounds of in the image . Please mention the objects and their locations.", "boxes_value": [[50.42675780400003, 79.246582016, 250.29125980039998, 511.498535168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046292.jpg", "text": "Describe what can be found within the bounds of in the image . Please mention the objects and their locations. For your reference, objects involved in this region include two people, a glasses, a tie, and a belt.", "boxes_value": [[485.42675780400003, 79.246582016, 685.2912598004, 511.498535168], [485.42675780400003, 79.246582016, 685.2912598004, 511.498535168], [399.77050778779994, 80.2662963712, 708.7447509806, 511.8353271296], [479.7058105104, 136.8961792, 525.418579108, 152.1337890816], [579.8623790934, 213.374450688, 612.917287622, 430.8368530432], [505.2446679804, 399.0803656192, 593.2846396778, 421.17039488]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046292_crop.jpg", "text": "Describe what can be found within the bounds of in the image . Please mention the objects and their locations. For your reference, objects involved in this region include two people, a glasses, a tie, and a belt.", "boxes_value": [[50.42675780400003, 79.246582016, 250.29125980039998, 511.498535168], [50.42675780400003, 79.246582016, 250.29125980039998, 511.498535168], [0, 80.2662963712, 273.7447509806, 511.8353271296], [44.7058105104, 136.8961792, 90.41857910800002, 152.1337890816], [144.86237909340002, 213.374450688, 177.917287622, 430.8368530432], [70.24466798039998, 399.0803656192, 158.2846396778, 421.17039488]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046295.jpg", "text": "Help me grasp the context of the region within image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0.8065795783999999, 302.950134272, 627.1708984096, 513.1257324032]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046295_crop.jpg", "text": "Help me grasp the context of the region within image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0.8065795783999999, 52.950134272000014, 627.1708984096, 262]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046295.jpg", "text": "Help me grasp the context of the region within image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four chairs, and a nightstand.", "boxes_value": [[0.8065795783999999, 302.950134272, 627.1708984096, 513.1257324032], [0.8065795783999999, 367.6976318464, 245.479186998, 511.3098144768], [227.6860961808, 338.64434816, 423.4827880796, 513.1257324032], [406.4305419672, 314.5884399616, 545.589355436, 512.212158208], [485.3010253664, 302.950134272, 627.1708984096, 471.2703857664], [558.8122558928, 293.6753540096, 676.6364746328, 471.2703857664]], "boxes_seq": [[0], [0], [1, 2, 4, 5], [3]]}, {"image_path": "objects365_v1_00046295_crop.jpg", "text": "Help me grasp the context of the region within image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four chairs, and a nightstand.", "boxes_value": [[0.8065795783999999, 52.950134272000014, 627.1708984096, 262], [0.8065795783999999, 117.69763184639999, 245.479186998, 261.3098144768], [227.6860961808, 88.64434815999999, 423.4827880796, 262], [406.4305419672, 64.58843996159999, 545.589355436, 262], [485.3010253664, 52.950134272000014, 627.1708984096, 221.2703857664], [558.8122558928, 43.67535400960003, 676.6364746328, 221.2703857664]], "boxes_seq": [[0], [0], [1, 2, 4, 5], [3]]}, {"image_path": "objects365_v1_00046296.jpg", "text": "Can you generate a description for the selected region in the image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[177.1504516697, 283.7794189312, 261.9120483452, 436.3054199296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046296_crop.jpg", "text": "Can you generate a description for the selected region in the image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[22.150451669700004, 38.779418931199984, 106.91204834519999, 191.30541992960002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046296.jpg", "text": "Can you generate a description for the selected region in the image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a desk, a couch, two cups, a wine glass, a bread, and two plates.", "boxes_value": [[177.1504516697, 283.7794189312, 261.9120483452, 436.3054199296], [5.440917946699999, 294.4572143616, 566.8094482094, 511.041992192], [144.7877197178, 198.8166503936, 681.4720458789, 358.070129408], [186.7140502995, 360.8460693504, 227.52667233879998, 436.3054199296], [231.856262203, 283.7794189312, 261.9120483452, 336.677612288], [210.09956234359998, 327.3829321728, 247.0711160059, 417.010940928], [184.5886230776, 342.6129760768, 215.01098631609997, 364.3244018688], [227.7621460142, 336.9719848448, 308.3707275178, 392.564147968], [177.1504516697, 301.4045410304, 248.49365235539997, 327.9652099584]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6], [7, 8]]}, {"image_path": "objects365_v1_00046296_crop.jpg", "text": "Can you generate a description for the selected region in the image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a desk, a couch, two cups, a wine glass, a bread, and two plates.", "boxes_value": [[22.150451669700004, 38.779418931199984, 106.91204834519999, 191.30541992960002], [0, 49.457214361599995, 128, 229], [0, 0, 128, 113.07012940800001], [31.714050299500002, 115.8460693504, 72.52667233879998, 191.30541992960002], [76.856262203, 38.779418931199984, 106.91204834519999, 91.67761228799998], [55.099562343599985, 82.3829321728, 92.0711160059, 172.01094092800002], [29.588623077600005, 97.61297607680001, 60.010986316099974, 119.32440186880001], [72.7621460142, 91.97198484479998, 128, 147.564147968], [22.150451669700004, 56.404541030400026, 93.49365235539997, 82.96520995840001]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6], [7, 8]]}, {"image_path": "objects365_v1_00046297.jpg", "text": "Describe the selected rectangular area in the photo . Include the coordinates for each object you identify.", "boxes_value": [[0.3421630833, 380.461059584, 42.372314503, 499.4882812416]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046297_crop.jpg", "text": "Describe the selected rectangular area in the photo . Include the coordinates for each object you identify.", "boxes_value": [[0.3421630833, 30.461059583999997, 42.372314503, 149.48828124160002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046297.jpg", "text": "Describe the selected rectangular area in the photo . Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a person, a bottle, a cup, and a laptop.", "boxes_value": [[0.3421630833, 380.461059584, 42.372314503, 499.4882812416], [0.5711669825, 366.4249267712, 225.5244140097, 511.9828491264], [2.0338135392, 460.3023681536, 42.372314503, 499.4882812416], [13.518310506999999, 388.881591808, 51.2711181186, 466.48455808], [0.3421630833, 380.461059584, 33.8023681415, 428.8902587904], [0, 368.4484863488, 182.496582077, 511.9934082048]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046297_crop.jpg", "text": "Describe the selected rectangular area in the photo . Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a person, a bottle, a cup, and a laptop.", "boxes_value": [[0.3421630833, 30.461059583999997, 42.372314503, 149.48828124160002], [0.5711669825, 16.424926771199978, 52, 161.98284912640003], [2.0338135392, 110.30236815360001, 42.372314503, 149.48828124160002], [13.518310506999999, 38.881591807999996, 51.2711181186, 116.48455808], [0.3421630833, 30.461059583999997, 33.8023681415, 78.89025879040003], [0, 18.448486348799975, 52, 161.9934082048]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046299.jpg", "text": "What does the area within the given visual contain? Remember to mention the objects and their corresponding locations.", "boxes_value": [[139.1301269788, 271.591430656, 251.85876463300002, 382.8840331776]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046299_crop.jpg", "text": "What does the area within the given visual contain? Remember to mention the objects and their corresponding locations.", "boxes_value": [[29.130126978800007, 28.591430656, 141.85876463300002, 139.88403317759997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046299.jpg", "text": "What does the area within the given visual contain? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a chair, two carpets, a cabinet, and a desk.", "boxes_value": [[139.1301269788, 271.591430656, 251.85876463300002, 382.8840331776], [139.1301269788, 271.591430656, 186.51928709679999, 382.8840331776], [139.1301269788, 329.750793472, 251.85876463300002, 382.8840331776], [168.6085205016, 244.0695800832, 230.11309811159998, 325.4350585856], [124.2321167308, 310.130798336, 373.4669189164, 402.6230468608], [135.5400390342, 273.0274658304, 218.11199951280003, 380.01196288]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5]]}, {"image_path": "objects365_v1_00046299_crop.jpg", "text": "What does the area within the given visual contain? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a chair, two carpets, a cabinet, and a desk.", "boxes_value": [[29.130126978800007, 28.591430656, 141.85876463300002, 139.88403317759997], [29.130126978800007, 28.591430656, 76.51928709679999, 139.88403317759997], [29.130126978800007, 86.750793472, 141.85876463300002, 139.88403317759997], [58.6085205016, 1.0695800832000089, 120.11309811159998, 82.43505858560002], [14.232116730800001, 67.130798336, 170, 159.6230468608], [25.540039034199992, 30.02746583039999, 108.11199951280003, 137.01196288]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5]]}, {"image_path": "objects365_v1_00046302.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Include the coordinates for each mentioned object.", "boxes_value": [[488.2551269844, 57.3699951104, 772.0491943104, 512.3060302848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046302_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Include the coordinates for each mentioned object.", "boxes_value": [[71.25512698440002, 57.3699951104, 355, 512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046302.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two ties, a camera, and three people.", "boxes_value": [[488.2551269844, 57.3699951104, 772.0491943104, 512.3060302848], [488.2551269844, 173.1707763712, 500.96777342359997, 227.746582016], [648.128051762, 111.5165405184, 665.9632568004, 146.1960449024], [584.6520996472, 57.3699951104, 604.8354492216, 76.6759033344], [427.8123779236, 319.6624145408, 632.7659911836, 512.2836914176], [571.4287109004, 254.0675659264, 718.2702636348, 512.3060302848], [734.1267089988, 380.2390136832, 772.0491943104, 512.1135254016]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046302_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two ties, a camera, and three people.", "boxes_value": [[71.25512698440002, 57.3699951104, 355, 512], [71.25512698440002, 173.1707763712, 83.96777342359997, 227.746582016], [231.12805176200004, 111.5165405184, 248.96325680040002, 146.1960449024], [167.65209964719998, 57.3699951104, 187.83544922160002, 76.6759033344], [10.812377923600025, 319.6624145408, 215.76599118360002, 512], [154.4287109004, 254.0675659264, 301.2702636348, 512], [317.12670899880004, 380.2390136832, 355, 512]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046304.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each object you identify.", "boxes_value": [[292.93957515899996, 227.2116088832, 682.118774414, 343.4521484288]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046304_crop.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each object you identify.", "boxes_value": [[97.93957515899996, 29.2116088832, 487.118774414, 145.4521484288]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046304.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, a lantern, a van, and a motorcycle.", "boxes_value": [[292.93957515899996, 227.2116088832, 682.118774414, 343.4521484288], [501.737426729, 257.802856448, 543.575805696, 332.1079711744], [524.903930628, 243.9364013568, 542.404296896, 283.2406616064], [655.687988263, 227.2116088832, 682.118774414, 286.7929077248], [292.93957515899996, 247.1470947328, 326.109008764, 282.7239990272], [479.965209955, 286.3380737536, 571.568481407, 343.4521484288]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046304_crop.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, a lantern, a van, and a motorcycle.", "boxes_value": [[97.93957515899996, 29.2116088832, 487.118774414, 145.4521484288], [306.737426729, 59.802856448, 348.575805696, 134.1079711744], [329.903930628, 45.93640135679999, 347.404296896, 85.24066160640001], [460.68798826299997, 29.2116088832, 487.118774414, 88.79290772479999], [97.93957515899996, 49.147094732800014, 131.109008764, 84.72399902720002], [284.965209955, 88.33807375359999, 376.56848140700004, 145.4521484288]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046308.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[49.799194368, 316.0046386688, 212.20275878400003, 348.5601806848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046308_crop.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[40.799194368, 9.004638668799998, 203.20275878400003, 41.5601806848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046308.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, a pickup truck, two vans, and two cars.", "boxes_value": [[49.799194368, 316.0046386688, 212.20275878400003, 348.5601806848], [193.869873024, 323.6798706176, 212.20275878400003, 348.5601806848], [0.595947264, 317.1041870336, 122.0922241536, 408.3638305792], [49.799194368, 316.0046386688, 92.4053344512, 341.2934570496], [194.8372192512, 326.2277221888, 230.57244871679998, 349.079467776], [155.97753907199998, 321.5411377152, 193.4702758656, 342.43554688], [108.52575682560001, 322.5175171072, 165.3507080448, 342.0449828864]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4, 6]]}, {"image_path": "objects365_v1_00046308_crop.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, a pickup truck, two vans, and two cars.", "boxes_value": [[40.799194368, 9.004638668799998, 203.20275878400003, 41.5601806848], [184.869873024, 16.67987061759999, 203.20275878400003, 41.5601806848], [0, 10.104187033599999, 113.0922241536, 49], [40.799194368, 9.004638668799998, 83.4053344512, 34.29345704960002], [185.8372192512, 19.227722188799987, 221.57244871679998, 42.079467776], [146.97753907199998, 14.541137715200023, 184.4702758656, 35.435546880000004], [99.52575682560001, 15.517517107199978, 156.3507080448, 35.04498288640002]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4, 6]]}, {"image_path": "objects365_v1_00046310.jpg", "text": "Within the input image , what can be found in the region defined by ? Please point out the objects and their coordinates.", "boxes_value": [[533.0133056601001, 214.6801147392, 683.3695068247999, 352.1580200448]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046310_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Please point out the objects and their coordinates.", "boxes_value": [[38.01330566010006, 34.68011473920001, 188, 172.15802004480003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046310.jpg", "text": "Within the input image , what can be found in the region defined by ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two cabinets, two people, and a book.", "boxes_value": [[533.0133056601001, 214.6801147392, 683.3695068247999, 352.1580200448], [647.1282959222, 214.6801147392, 681.4837646563, 320.6580200448], [566.7713622853, 227.490661632, 594.7215576428999, 296.7838745088], [514.4843750107, 317.2907715072, 582.0332031414999, 352.7752685568], [666.1560058309, 321.1738281472, 683.3695068247999, 352.1580200448], [533.0133056601001, 252.3177490432, 547.1096191737, 274.343261696]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00046310_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two cabinets, two people, and a book.", "boxes_value": [[38.01330566010006, 34.68011473920001, 188, 172.15802004480003], [152.12829592219998, 34.68011473920001, 186.48376465629997, 140.65802004480003], [71.77136228530003, 47.49066163200001, 99.72155764289994, 116.78387450880001], [19.484375010699978, 137.29077150720002, 87.03320314149994, 172.77526855679997], [171.15600583089997, 141.17382814720003, 188, 172.15802004480003], [38.01330566010006, 72.3177490432, 52.109619173700025, 94.34326169600001]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00046311.jpg", "text": "Describe the image content present in the specified rectangular area of . Please mention the objects and their locations.", "boxes_value": [[160.24505614179998, 160.6798706176, 357.3110351678, 272.170898432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046311_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Please mention the objects and their locations.", "boxes_value": [[50.24505614179998, 28.67987061759999, 247.31103516780001, 140.170898432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046311.jpg", "text": "Describe the image content present in the specified rectangular area of . Please mention the objects and their locations. For your reference, objects involved in this region include two cabinets, a tape measur, a bowl, and a bottle.", "boxes_value": [[160.24505614179998, 160.6798706176, 357.3110351678, 272.170898432], [215.5725097946, 160.6798706176, 357.3110351678, 206.3038329856], [217.3974609142, 229.4199828992, 355.48596194019996, 279.3021850624], [160.24505614179998, 244.2659301888, 186.3355712792, 263.9373169152], [272.0386962818, 225.091430656, 302.0121460062, 236.4450073088], [331.38006593160003, 249.3123779072, 344.5501708698, 272.170898432]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046311_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Please mention the objects and their locations. For your reference, objects involved in this region include two cabinets, a tape measur, a bowl, and a bottle.", "boxes_value": [[50.24505614179998, 28.67987061759999, 247.31103516780001, 140.170898432], [105.5725097946, 28.67987061759999, 247.31103516780001, 74.3038329856], [107.3974609142, 97.4199828992, 245.48596194019996, 147.30218506239999], [50.24505614179998, 112.26593018880001, 76.3355712792, 131.93731691519997], [162.03869628180001, 93.091430656, 192.0121460062, 104.4450073088], [221.38006593160003, 117.31237790719999, 234.5501708698, 140.170898432]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046312.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Provide the coordinates for all objects that you mention.", "boxes_value": [[0.11926267639999999, 10.734619136, 265.31494142779997, 511.4404907008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046312_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Provide the coordinates for all objects that you mention.", "boxes_value": [[0.11926267639999999, 10.734619136, 265.31494142779997, 511.4404907008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046312.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three storage boxes, a desk, and three lamps.", "boxes_value": [[0.11926267639999999, 10.734619136, 265.31494142779997, 511.4404907008], [0.1602782996, 413.8831176704, 51.4915161064, 511.4124755968], [71.32897950520001, 460.9505005056, 164.4199218816, 511.4404907008], [33.497802758, 343.6295165952, 424.282348613, 512.7836914176], [174.8593139688, 450.471862784, 341.9273681692, 512.7836914176], [0.11926267639999999, 19.3631591936, 66.2611694652, 74.5884399616], [3.9722290416, 118.2549438464, 34.795593279, 141.3724975616], [198.5908203296, 10.734619136, 265.31494142779997, 127.19854735359999]], "boxes_seq": [[0], [0], [1, 2, 4], [3], [5, 6, 7]]}, {"image_path": "objects365_v1_00046312_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three storage boxes, a desk, and three lamps.", "boxes_value": [[0.11926267639999999, 10.734619136, 265.31494142779997, 511.4404907008], [0.1602782996, 413.8831176704, 51.4915161064, 511.4124755968], [71.32897950520001, 460.9505005056, 164.4199218816, 511.4404907008], [33.497802758, 343.6295165952, 331, 512], [174.8593139688, 450.471862784, 331, 512], [0.11926267639999999, 19.3631591936, 66.2611694652, 74.5884399616], [3.9722290416, 118.2549438464, 34.795593279, 141.3724975616], [198.5908203296, 10.734619136, 265.31494142779997, 127.19854735359999]], "boxes_seq": [[0], [0], [1, 2, 4], [3], [5, 6, 7]]}, {"image_path": "objects365_v1_00046314.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Provide the coordinates for each element you describe.", "boxes_value": [[345.5721435648, 456.681823744, 532.5288086016, 511.8145752064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046314_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Provide the coordinates for each element you describe.", "boxes_value": [[47.5721435648, 14.681823743999985, 234.52880860159996, 69.81457520639998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046314.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, a leather shoes, two slippers, and a bicycle.", "boxes_value": [[345.5721435648, 456.681823744, 532.5288086016, 511.8145752064], [450.1870116864, 187.0623779328, 560.3352050688, 511.59545896960003], [329.8164062208, 130.0267944448, 467.0582275584, 511.5954589695999], [345.5721435648, 499.0916137472, 369.25097656319997, 511.8145752064], [444.528442368, 456.681823744, 463.6127929344, 474.3525390848], [494.713378944, 484.24816896, 532.5288086016, 511.4611816448], [191.7766113024, 209.4934082048, 582.1138916352, 511.2218017792]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046314_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, a leather shoes, two slippers, and a bicycle.", "boxes_value": [[47.5721435648, 14.681823743999985, 234.52880860159996, 69.81457520639998], [152.1870116864, 0, 262.33520506879995, 69.59545896960003], [31.81640622079999, 0, 169.0582275584, 69.59545896959992], [47.5721435648, 57.09161374719997, 71.25097656319997, 69.81457520639998], [146.52844236800001, 14.681823743999985, 165.61279293439998, 32.352539084800014], [196.713378944, 42.24816895999999, 234.52880860159996, 69.46118164479998], [0, 0, 281, 69.2218017792]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046316.jpg", "text": "Please tell me about the area in the image . What does it contain? Provide the coordinates for all objects that you mention.", "boxes_value": [[452.1397704864, 192.5150756864, 575.8761849936, 332.3356582912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046316_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Provide the coordinates for all objects that you mention.", "boxes_value": [[31.139770486399982, 35.515075686399996, 154.87618499359996, 175.3356582912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046316.jpg", "text": "Please tell me about the area in the image . What does it contain? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a handbag, two sneakers, and two cameras.", "boxes_value": [[452.1397704864, 192.5150756864, 575.8761849936, 332.3356582912], [534.80918724, 209.4247737856, 569.51709828, 237.2718187008], [521.9365959792, 307.2610385408, 544.3871276112, 332.3356582912], [558.673829592, 270.81537024, 575.8761849936, 307.5526038528], [452.1397704864, 240.498535168, 472.37768551199997, 262.6950073344], [523.6253661696, 192.5150756864, 546.8010253535999, 232.664489728]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046316_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a handbag, two sneakers, and two cameras.", "boxes_value": [[31.139770486399982, 35.515075686399996, 154.87618499359996, 175.3356582912], [113.80918724000003, 52.424773785599996, 148.51709828000003, 80.2718187008], [100.93659597919998, 150.26103854079997, 123.38712761119996, 175.3356582912], [137.67382959199995, 113.81537024, 154.87618499359996, 150.55260385280002], [31.139770486399982, 83.49853516799999, 51.37768551199997, 105.69500733439997], [102.62536616960006, 35.515075686399996, 125.80102535359993, 75.664489728]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046317.jpg", "text": "Can you break down the region in the image for me? Please point out the objects and their coordinates.", "boxes_value": [[549.7987060788, 107.9404907008, 680.2397460658, 229.655578624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046317_crop.jpg", "text": "Can you break down the region in the image for me? Please point out the objects and their coordinates.", "boxes_value": [[32.79870607880002, 30.940490700799998, 163.23974606579998, 152.655578624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046317.jpg", "text": "Can you break down the region in the image for me? Please point out the objects and their coordinates. For your reference, objects involved in this region include three people, a sneakers, and two hats.", "boxes_value": [[549.7987060788, 107.9404907008, 680.2397460658, 229.655578624], [488.2896728337, 31.580200192, 605.7188721028, 230.3815307776], [543.8719482394999, 176.4350585856, 682.9493407972, 355.1892089856], [603.9239502201, 107.9404907008, 677.3110351675, 229.655578624], [549.7987060788, 131.5136718848, 572.8604736641, 175.2349853696], [626.8726806907, 108.5900878848, 655.2193603486, 124.0103759872], [631.2982177802, 175.6965331968, 680.2397460658, 215.7868652544]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00046317_crop.jpg", "text": "Can you break down the region in the image for me? Please point out the objects and their coordinates. For your reference, objects involved in this region include three people, a sneakers, and two hats.", "boxes_value": [[32.79870607880002, 30.940490700799998, 163.23974606579998, 152.655578624], [0, 0, 88.71887210279999, 153.3815307776], [26.871948239499943, 99.43505858559999, 165.94934079719997, 183], [86.92395022009998, 30.940490700799998, 160.3110351675, 152.655578624], [32.79870607880002, 54.513671884800004, 55.86047366410003, 98.2349853696], [109.87268069070001, 31.5900878848, 138.21936034860005, 47.01037598720001], [114.29821778020005, 98.6965331968, 163.23974606579998, 138.7868652544]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00046318.jpg", "text": "I need details about the area located within image . Please mention the objects and their locations.", "boxes_value": [[133.6093139385, 212.0781250048, 253.58898921839997, 316.9483032064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046318_crop.jpg", "text": "I need details about the area located within image . Please mention the objects and their locations.", "boxes_value": [[30.60931393850001, 27.0781250048, 150.58898921839997, 131.9483032064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046318.jpg", "text": "I need details about the area located within image . Please mention the objects and their locations. For your reference, objects involved in this region include a flag, and six chairs.", "boxes_value": [[133.6093139385, 212.0781250048, 253.58898921839997, 316.9483032064], [165.17468259, 212.0781250048, 201.54333497039997, 266.1396484608], [135.4267577871, 235.2557983232, 165.52917483209998, 268.368469248], [241.28686524149998, 232.7472534016, 266.3721923535, 262.3479614464], [213.2271728145, 269.9516601344, 253.58898921839997, 316.9483032064], [196.0872192603, 263.3168334848, 233.1315917964, 307.5489502208], [149.09057615700002, 274.9277954048, 191.1110839767, 320.81860352], [133.6093139385, 267.1871337984, 171.7595215227, 313.078002944]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6, 7]]}, {"image_path": "objects365_v1_00046318_crop.jpg", "text": "I need details about the area located within image . Please mention the objects and their locations. For your reference, objects involved in this region include a flag, and six chairs.", "boxes_value": [[30.60931393850001, 27.0781250048, 150.58898921839997, 131.9483032064], [62.17468259, 27.0781250048, 98.54333497039997, 81.1396484608], [32.4267577871, 50.2557983232, 62.52917483209998, 83.368469248], [138.28686524149998, 47.747253401600005, 163.3721923535, 77.3479614464], [110.2271728145, 84.95166013440002, 150.58898921839997, 131.9483032064], [93.0872192603, 78.31683348479999, 130.1315917964, 122.54895022080001], [46.090576157000015, 89.92779540480001, 88.1110839767, 135.81860352], [30.60931393850001, 82.1871337984, 68.7595215227, 128.078002944]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6, 7]]}, {"image_path": "objects365_v1_00046319.jpg", "text": "Kindly share your observations about the rectangular region within . Please point out the objects and their coordinates.", "boxes_value": [[226.6619262464, 119.116699185, 394.094787584, 450.65905763840004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046319_crop.jpg", "text": "Kindly share your observations about the rectangular region within . Please point out the objects and their coordinates.", "boxes_value": [[42.661926246399986, 83.116699185, 210.09478758400002, 414.65905763840004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046319.jpg", "text": "Kindly share your observations about the rectangular region within . Please point out the objects and their coordinates. For your reference, objects involved in this region include three people, a tent, a hat, and two bottles.", "boxes_value": [[226.6619262464, 119.116699185, 394.094787584, 450.65905763840004], [99.4078979584, 169.5610351709, 341.3608398336, 595.531372097], [143.5770873856, 116.4082641876, 402.6030273536, 374.68554690120004], [311.3569946112, 154.74462888349998, 394.094787584, 304.7636718792], [0, 0, 511.9198608384, 682.644042939], [226.6619262464, 119.116699185, 299.1552123904, 178.73284915509998], [349.0779419136, 405.7696533356, 379.6409301504, 450.65905763840004], [318.9924926976, 390.96569827480005, 358.62890624, 442.5407714957]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00046319_crop.jpg", "text": "Kindly share your observations about the rectangular region within . Please point out the objects and their coordinates. For your reference, objects involved in this region include three people, a tent, a hat, and two bottles.", "boxes_value": [[42.661926246399986, 83.116699185, 210.09478758400002, 414.65905763840004], [0, 133.5610351709, 157.36083983359998, 497], [0, 80.4082641876, 218.6030273536, 338.68554690120004], [127.35699461119998, 118.74462888349998, 210.09478758400002, 268.7636718792], [0, 0, 251, 497], [42.661926246399986, 83.116699185, 115.15521239039998, 142.73284915509998], [165.0779419136, 369.7696533356, 195.64093015039998, 414.65905763840004], [134.99249269760003, 354.96569827480005, 174.62890624, 406.5407714957]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00046321.jpg", "text": "Can you elaborate on the content of the bounding box in ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[138.8783569336, 157.176879872, 374.4835205074, 403.6965331968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046321_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[59.8783569336, 62.176879872, 295.4835205074, 308.6965331968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046321.jpg", "text": "Can you elaborate on the content of the bounding box in ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include six people, and a sneakers.", "boxes_value": [[138.8783569336, 157.176879872, 374.4835205074, 403.6965331968], [304.9200439426, 236.1791382016, 339.701782225, 300.5252685312], [250.1388549586, 247.048400896, 374.4835205074, 349.6544799744], [251.8779907562, 246.6136474624, 429.6994629068, 405.3051757568], [99.708007834, 255.7438354432, 249.2693481278, 394.0010986496], [243.04992674029998, 157.9196167168, 261.2474975458, 185.9586791936], [138.8783569336, 157.176879872, 157.0758666711, 185.587341312], [315.169921851, 383.8001709056, 358.3734130822, 403.6965331968]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6], [7]]}, {"image_path": "objects365_v1_00046321_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include six people, and a sneakers.", "boxes_value": [[59.8783569336, 62.176879872, 295.4835205074, 308.6965331968], [225.9200439426, 141.1791382016, 260.701782225, 205.52526853120003], [171.1388549586, 152.048400896, 295.4835205074, 254.65447997439998], [172.8779907562, 151.6136474624, 350.6994629068, 310.3051757568], [20.708007834, 160.7438354432, 170.2693481278, 299.0010986496], [164.04992674029998, 62.91961671679999, 182.2474975458, 90.9586791936], [59.8783569336, 62.176879872, 78.07586667109999, 90.587341312], [236.16992185100003, 288.8001709056, 279.3734130822, 308.6965331968]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6], [7]]}, {"image_path": "objects365_v1_00046322.jpg", "text": "What does the area within the given visual contain? Provide the coordinates for all objects that you mention.", "boxes_value": [[15.4497680896, 41.49261472, 230.4802246144, 474.56994632000004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046322_crop.jpg", "text": "What does the area within the given visual contain? Provide the coordinates for all objects that you mention.", "boxes_value": [[15.4497680896, 41.49261472, 230.4802246144, 474.56994632000004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046322.jpg", "text": "What does the area within the given visual contain? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two traffic lights, two street lights, and a train.", "boxes_value": [[15.4497680896, 41.49261472, 230.4802246144, 474.56994632000004], [89.2553711104, 41.49261472, 149.8814087168, 137.70343015999998], [15.4497680896, 145.61120608, 141.9736328192, 283.99670408], [192.3259277312, 398.78393552, 201.2111816192, 468.2979736], [218.4589843968, 386.24011232000004, 230.4802246144, 471.95666504], [85.1802978304, 408.19189456, 176.646118144, 474.56994632000004]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046322_crop.jpg", "text": "What does the area within the given visual contain? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two traffic lights, two street lights, and a train.", "boxes_value": [[15.4497680896, 41.49261472, 230.4802246144, 474.56994632000004], [89.2553711104, 41.49261472, 149.8814087168, 137.70343015999998], [15.4497680896, 145.61120608, 141.9736328192, 283.99670408], [192.3259277312, 398.78393552, 201.2111816192, 468.2979736], [218.4589843968, 386.24011232000004, 230.4802246144, 471.95666504], [85.1802978304, 408.19189456, 176.646118144, 474.56994632000004]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046323.jpg", "text": "What can you share about the area in the presented image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[360.998535128, 310.904357888, 578.5804443650001, 370.2318725632]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046323_crop.jpg", "text": "What can you share about the area in the presented image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[54.998535128000015, 14.904357887999993, 272.58044436500006, 74.2318725632]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046323.jpg", "text": "What can you share about the area in the presented image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a lamp, two flowers, a person, and a moniter.", "boxes_value": [[360.998535128, 310.904357888, 578.5804443650001, 370.2318725632], [565.838623042, 310.904357888, 578.5804443650001, 352.1557617152], [402.46862794599997, 324.0986328064, 433.735839845, 369.1891479552], [360.998535128, 322.4529418752, 392.265747104, 367.8726806528], [477.287719755, 330.6841430528, 522.6229248239999, 370.2318725632], [462.09301753999995, 341.5125732352, 480.990966764, 359.7018432512]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046323_crop.jpg", "text": "What can you share about the area in the presented image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a lamp, two flowers, a person, and a moniter.", "boxes_value": [[54.998535128000015, 14.904357887999993, 272.58044436500006, 74.2318725632], [259.838623042, 14.904357887999993, 272.58044436500006, 56.155761715200015], [96.46862794599997, 28.09863280640002, 127.73583984499999, 73.18914795519999], [54.998535128000015, 26.452941875199997, 86.26574710400001, 71.8726806528], [171.287719755, 34.68414305279998, 216.62292482399994, 74.2318725632], [156.09301753999995, 45.51257323520002, 174.990966764, 63.70184325119999]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046324.jpg", "text": "Kindly give an overview of the section in photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[177.5592651264, 0.0722655936, 511.4819336192, 251.855468775]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046324_crop.jpg", "text": "Kindly give an overview of the section in photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[83.55926512639999, 0.0722655936, 417.4819336192, 251.855468775]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046324.jpg", "text": "Kindly give an overview of the section in photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four people, a sneakers, and a tripod.", "boxes_value": [[177.5592651264, 0.0722655936, 511.4819336192, 251.855468775], [442.0602417152, 134.7710571072, 511.4819336192, 251.855468775], [370.5662841856, 0.0722655936, 441.7958984192, 157.5662842044], [444.3733887488, 228.473944263, 483.5566217728, 247.85026823520002], [136.95581056, 41.566955598, 274.413818368, 263.4481201068], [177.5592651264, 10.571533178400001, 259.5894164992, 248.3550414684], [204.1380004864, 0.43566896520000004, 273.2598266368, 133.5571288974]], "boxes_seq": [[0], [0], [1, 2, 5, 6], [3], [4]]}, {"image_path": "objects365_v1_00046324_crop.jpg", "text": "Kindly give an overview of the section in photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four people, a sneakers, and a tripod.", "boxes_value": [[83.55926512639999, 0.0722655936, 417.4819336192, 251.855468775], [348.0602417152, 134.7710571072, 417.4819336192, 251.855468775], [276.5662841856, 0.0722655936, 347.7958984192, 157.5662842044], [350.3733887488, 228.473944263, 389.5566217728, 247.85026823520002], [42.95581056, 41.566955598, 180.41381836800002, 263.4481201068], [83.55926512639999, 10.571533178400001, 165.5894164992, 248.3550414684], [110.13800048639999, 0.43566896520000004, 179.25982663680003, 133.5571288974]], "boxes_seq": [[0], [0], [1, 2, 5, 6], [3], [4]]}, {"image_path": "objects365_v1_00046325.jpg", "text": "Can you divulge the contents of the area within the given image ? Include the coordinates for each mentioned object.", "boxes_value": [[116.2300415232, 319.9473876992, 550.5322265856, 482.1423340032]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046325_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Include the coordinates for each mentioned object.", "boxes_value": [[109.2300415232, 40.94738769920002, 543.5322265856, 203.1423340032]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046325.jpg", "text": "Can you divulge the contents of the area within the given image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, and five sneakers.", "boxes_value": [[116.2300415232, 319.9473876992, 550.5322265856, 482.1423340032], [272.1372070656, 68.9313965056, 549.393554688, 434.909729024], [116.2300415232, 454.8128051712, 196.4362792704, 482.1423340032], [274.26611328, 386.4888915968, 311.6956786944, 435.2067871232], [432.89636229120003, 319.9473876992, 454.8787842048, 368.6652832256], [467.35534671360006, 372.2300414976, 504.78491212800003, 387.0830077952], [512.508544896, 363.318176256, 550.5322265856, 407.2831420928]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046325_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, and five sneakers.", "boxes_value": [[109.2300415232, 40.94738769920002, 543.5322265856, 203.1423340032], [265.1372070656, 0, 542.393554688, 155.909729024], [109.2300415232, 175.8128051712, 189.4362792704, 203.1423340032], [267.26611328, 107.48889159679999, 304.6956786944, 156.2067871232], [425.89636229120003, 40.94738769920002, 447.8787842048, 89.66528322559998], [460.35534671360006, 93.23004149759998, 497.78491212800003, 108.08300779519999], [505.508544896, 84.31817625600002, 543.5322265856, 128.2831420928]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046326.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Provide the coordinates for all objects that you mention.", "boxes_value": [[374.257934592, 327.230102507, 474.1373901312, 440.20031740300004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046326_crop.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Provide the coordinates for all objects that you mention.", "boxes_value": [[25.257934592000026, 29.230102507000026, 125.1373901312, 142.20031740300004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046326.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three stools, a vase, and a flower.", "boxes_value": [[374.257934592, 327.230102507, 474.1373901312, 440.20031740300004], [383.5893554688, 383.808837874, 413.3314208768, 440.20031740300004], [417.1666870272, 373.555786142, 432.5461425664, 424.82080078900003], [448.9509887488, 376.631713843, 474.1373901312, 418.66906735500004], [380.3569335808, 342.477661141, 400.4837646336, 363.824340803], [374.257934592, 327.230102507, 413.9016723456, 349.1866455]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046326_crop.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three stools, a vase, and a flower.", "boxes_value": [[25.257934592000026, 29.230102507000026, 125.1373901312, 142.20031740300004], [34.58935546880002, 85.808837874, 64.3314208768, 142.20031740300004], [68.1666870272, 75.55578614199999, 83.54614256640002, 126.82080078900003], [99.9509887488, 78.631713843, 125.1373901312, 120.66906735500004], [31.35693358079999, 44.477661141, 51.4837646336, 65.82434080299998], [25.257934592000026, 29.230102507000026, 64.90167234559999, 51.1866455]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046329.jpg", "text": "In the submitted image , please give a synopsis of the area . Include the coordinates for each object you identify.", "boxes_value": [[192.9338989429, 153.3959961088, 454.4162597624, 294.760864256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046329_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Include the coordinates for each object you identify.", "boxes_value": [[65.93389894289999, 35.395996108800006, 327.4162597624, 176.760864256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046329.jpg", "text": "In the submitted image , please give a synopsis of the area . Include the coordinates for each object you identify. For your reference, objects involved in this region include a picture, a cup, two bottles, and a moniter.", "boxes_value": [[192.9338989429, 153.3959961088, 454.4162597624, 294.760864256], [435.9948730636, 153.3959961088, 454.4162597624, 178.2163696128], [391.3697509664, 275.9072876032, 410.8270263487, 294.760864256], [192.9338989429, 227.95465088, 208.7048949895, 275.7247924736], [210.5334472458, 232.7545165824, 221.96166988899998, 272.753417984], [261.3718261485, 194.8745727488, 302.79815674500003, 236.0283813376]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046329_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Include the coordinates for each object you identify. For your reference, objects involved in this region include a picture, a cup, two bottles, and a moniter.", "boxes_value": [[65.93389894289999, 35.395996108800006, 327.4162597624, 176.760864256], [308.9948730636, 35.395996108800006, 327.4162597624, 60.21636961280001], [264.3697509664, 157.90728760320002, 283.8270263487, 176.760864256], [65.93389894289999, 109.95465088, 81.70489498949999, 157.72479247360002], [83.5334472458, 114.75451658239999, 94.96166988899998, 154.753417984], [134.37182614850002, 76.8745727488, 175.79815674500003, 118.0283813376]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046331.jpg", "text": "What's the story in the section of the included visual ? Include the coordinates for each object you identify.", "boxes_value": [[54.6818237099, 128.3229980672, 349.978637694, 313.402587904]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046331_crop.jpg", "text": "What's the story in the section of the included visual ? Include the coordinates for each object you identify.", "boxes_value": [[54.6818237099, 46.32299806719999, 349.978637694, 231.40258790399997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046331.jpg", "text": "What's the story in the section of the included visual ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, two hats, and a cow.", "boxes_value": [[54.6818237099, 128.3229980672, 349.978637694, 313.402587904], [137.135131866, 160.3210449408, 290.7109375221, 357.9290771456], [240.14117432339998, 162.2484130816, 284.771484352, 186.7092895744], [192.5258789092, 128.3229980672, 216.1569824541, 142.1078491136], [54.6818237099, 173.6318359552, 349.978637694, 313.402587904], [44.8755493063, 112.6422118912, 78.5217284825, 187.32940672]], "boxes_seq": [[0], [0], [1, 5], [2, 3], [4]]}, {"image_path": "objects365_v1_00046331_crop.jpg", "text": "What's the story in the section of the included visual ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, two hats, and a cow.", "boxes_value": [[54.6818237099, 46.32299806719999, 349.978637694, 231.40258790399997], [137.135131866, 78.3210449408, 290.7109375221, 275.9290771456], [240.14117432339998, 80.2484130816, 284.771484352, 104.7092895744], [192.5258789092, 46.32299806719999, 216.1569824541, 60.1078491136], [54.6818237099, 91.63183595519999, 349.978637694, 231.40258790399997], [44.8755493063, 30.642211891200006, 78.5217284825, 105.32940672000001]], "boxes_seq": [[0], [0], [1, 5], [2, 3], [4]]}, {"image_path": "objects365_v1_00046332.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[154.492553694, 49.9906006016, 441.9396972936, 465.2565307392]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046332_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[72.49255369400001, 49.9906006016, 359.9396972936, 465.2565307392]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046332.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four lamps, a desk, and a chair.", "boxes_value": [[154.492553694, 49.9906006016, 441.9396972936, 465.2565307392], [404.57568358500004, 49.9906006016, 441.9396972936, 230.6964721664], [406.6137695304, 227.903625472, 434.4669189222, 356.2999267328], [407.84606933460003, 375.4165649408, 426.3132323976, 465.2565307392], [141.385559055, 383.6574096896, 165.364196781, 408.898071296], [135.8189697192, 244.42840576, 177.076293945, 258.7617797632], [154.492553694, 232.2896728576, 184.9805908188, 259.9547729408]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046332_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four lamps, a desk, and a chair.", "boxes_value": [[72.49255369400001, 49.9906006016, 359.9396972936, 465.2565307392], [322.57568358500004, 49.9906006016, 359.9396972936, 230.6964721664], [324.6137695304, 227.903625472, 352.4669189222, 356.2999267328], [325.84606933460003, 375.4165649408, 344.3132323976, 465.2565307392], [59.38555905499999, 383.6574096896, 83.364196781, 408.898071296], [53.8189697192, 244.42840576, 95.076293945, 258.7617797632], [72.49255369400001, 232.2896728576, 102.98059081880001, 259.9547729408]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046334.jpg", "text": "Offer a thorough description of the area within the illustration . Provide the coordinates for all objects that you mention.", "boxes_value": [[0.9112549118, 0.0204467712, 274.083312972, 195.3648681472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046334_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Provide the coordinates for all objects that you mention.", "boxes_value": [[0.9112549118, 0.0204467712, 274.083312972, 195.3648681472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046334.jpg", "text": "Offer a thorough description of the area within the illustration . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three flowers, and two flags.", "boxes_value": [[0.9112549118, 0.0204467712, 274.083312972, 195.3648681472], [0.9112549118, 0.0204467712, 85.81091307070001, 154.7933349376], [93.32415772130001, 78.158203136, 230.816589358, 195.3648681472], [233.8218993893, 112.9868164096, 274.083312972, 171.3224487424], [178.905761738, 0.674987776, 229.0202637054, 83.2499999744], [234.14562991489998, 0.1054687744, 284.8296508859, 128.239135744]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046334_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three flowers, and two flags.", "boxes_value": [[0.9112549118, 0.0204467712, 274.083312972, 195.3648681472], [0.9112549118, 0.0204467712, 85.81091307070001, 154.7933349376], [93.32415772130001, 78.158203136, 230.816589358, 195.3648681472], [233.8218993893, 112.9868164096, 274.083312972, 171.3224487424], [178.905761738, 0.674987776, 229.0202637054, 83.2499999744], [234.14562991489998, 0.1054687744, 284.8296508859, 128.239135744]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046335.jpg", "text": "Can you provide some context for the area within the picture ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[546.6966552445, 185.6992797696, 679.8750000242, 264.3422851584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046335_crop.jpg", "text": "Can you provide some context for the area within the picture ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[33.69665524449999, 19.699279769599997, 166.87500002419995, 98.34228515839999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046335.jpg", "text": "Can you provide some context for the area within the picture ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a bed, and four pillows.", "boxes_value": [[546.6966552445, 185.6992797696, 679.8750000242, 264.3422851584], [205.39245604529998, 87.7915649536, 681.6815185639, 510.7845458944], [589.0150146443, 194.6414794752, 679.8750000242, 261.8529662976], [546.6966552445, 200.8647460864, 600.2169189539, 264.3422851584], [573.1114502101001, 185.6992797696, 613.3469237943, 207.6458740224], [654.4968261913, 179.2981567488, 681.0157471023, 243.3091430912]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046335_crop.jpg", "text": "Can you provide some context for the area within the picture ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a bed, and four pillows.", "boxes_value": [[33.69665524449999, 19.699279769599997, 166.87500002419995, 98.34228515839999], [0, 0, 168.6815185639, 118], [76.01501464429998, 28.641479475199986, 166.87500002419995, 95.8529662976], [33.69665524449999, 34.864746086400004, 87.21691895389995, 98.34228515839999], [60.11145021010009, 19.699279769599997, 100.34692379429998, 41.645874022399994], [141.4968261913, 13.298156748800011, 168.0157471023, 77.30914309120001]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046337.jpg", "text": "Please interpret and describe the area inside the given picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[368.728698752, 249.776672352, 593.3299560319999, 291.509155296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046337_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[56.728698752000014, 10.776672351999991, 281.3299560319999, 52.50915529600002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046337.jpg", "text": "Please interpret and describe the area inside the given picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four pillows, and a nightstand.", "boxes_value": [[368.728698752, 249.776672352, 593.3299560319999, 291.509155296], [459.31982419199994, 249.776672352, 593.3299560319999, 289.573608384], [458.507568384, 274.14215088000003, 584.395996096, 314.75128176000004], [368.728698752, 266.625488304, 418.007934592, 280.394714352], [407.44812012799997, 252.641784672, 481.45581056, 282.45782472], [409.577880832, 272.341674816, 466.015380864, 291.509155296]], "boxes_seq": [[0], [0], [1, 2, 4, 5], [3]]}, {"image_path": "objects365_v1_00046337_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four pillows, and a nightstand.", "boxes_value": [[56.728698752000014, 10.776672351999991, 281.3299560319999, 52.50915529600002], [147.31982419199994, 10.776672351999991, 281.3299560319999, 50.57360838400001], [146.50756838400002, 35.14215088000003, 272.395996096, 62], [56.728698752000014, 27.625488303999987, 106.00793459200003, 41.394714351999994], [95.44812012799997, 13.641784672, 169.45581055999997, 43.45782472000002], [97.577880832, 33.34167481600002, 154.015380864, 52.50915529600002]], "boxes_seq": [[0], [0], [1, 2, 4, 5], [3]]}, {"image_path": "objects365_v1_00046343.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Please point out the objects and their coordinates.", "boxes_value": [[119.0765990939, 0, 441.39782713830004, 325.8068237312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046343_crop.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Please point out the objects and their coordinates.", "boxes_value": [[81.0765990939, 0, 403.39782713830004, 325.8068237312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046343.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Please point out the objects and their coordinates. For your reference, objects involved in this region include a fan, a lamp, two pillows, and a towel.", "boxes_value": [[119.0765990939, 0, 441.39782713830004, 325.8068237312], [119.0765990939, 0, 441.39782713830004, 38.2664795136], [244.42376709419997, 0.7476196352, 331.3993530062, 67.7240600576], [160.2835082908, 258.0273437696, 254.51300047499998, 295.8843383808], [86.9859619114, 275.7476196352, 207.0006103486, 333.7412719616], [396.06445309680004, 262.2918090752, 432.9031982287, 325.8068237312]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046343_crop.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Please point out the objects and their coordinates. For your reference, objects involved in this region include a fan, a lamp, two pillows, and a towel.", "boxes_value": [[81.0765990939, 0, 403.39782713830004, 325.8068237312], [81.0765990939, 0, 403.39782713830004, 38.2664795136], [206.42376709419997, 0.7476196352, 293.3993530062, 67.7240600576], [122.2835082908, 258.0273437696, 216.51300047499998, 295.8843383808], [48.9859619114, 275.7476196352, 169.0006103486, 333.7412719616], [358.06445309680004, 262.2918090752, 394.9031982287, 325.8068237312]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046345.jpg", "text": "Could you give me a description of the rectangular region found in ? Include the coordinates for each object you identify.", "boxes_value": [[0.1621704272, 128.4503783936, 546.1520996177, 511.8049926656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046345_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Include the coordinates for each object you identify.", "boxes_value": [[0.1621704272, 96.4503783936, 546.1520996177, 479.8049926656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046345.jpg", "text": "Could you give me a description of the rectangular region found in ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a bed, a pillow, a lamp, a clock, and a cabinet.", "boxes_value": [[0.1621704272, 128.4503783936, 546.1520996177, 511.8049926656], [0.1621704272, 128.4503783936, 546.1520996177, 511.8049926656], [118.2663574233, 216.5444946432, 217.00921633069999, 280.4368896512], [401.87731932009996, 92.1686401536, 450.9523925467, 157.2464599552], [441.3508300449, 132.7089233408, 456.28674316449997, 160.4470214656], [372.0056152013, 151.912231424, 557.6374511811999, 414.35723878399995]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046345_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a bed, a pillow, a lamp, a clock, and a cabinet.", "boxes_value": [[0.1621704272, 96.4503783936, 546.1520996177, 479.8049926656], [0.1621704272, 96.4503783936, 546.1520996177, 479.8049926656], [118.2663574233, 184.5444946432, 217.00921633069999, 248.43688965119998], [401.87731932009996, 60.168640153599995, 450.9523925467, 125.24645995520001], [441.3508300449, 100.7089233408, 456.28674316449997, 128.4470214656], [372.0056152013, 119.912231424, 557.6374511811999, 382.35723878399995]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046346.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each mentioned object.", "boxes_value": [[222.60827635200002, 140.2114868145, 490.669677757, 289.21362304720003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046346_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each mentioned object.", "boxes_value": [[67.60827635200002, 38.211486814500006, 335, 187.21362304720003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046346.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a cabinet, and five breads.", "boxes_value": [[222.60827635200002, 140.2114868145, 490.669677757, 289.21362304720003], [0, 187.1149902559, 490.372802731, 729.2813720698], [368.298400855, 201.03881838540002, 488.702636702, 289.21362304720003], [158.50311279599998, 211.3765258671, 352.487731918, 298.943237287], [222.60827635200002, 170.3684081849, 272.86981202600003, 217.949340803], [294.31469725700003, 160.9862671196, 424.994628926, 223.980712884], [446.439575211, 140.2114868145, 490.669677757, 196.5043945372]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046346_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a cabinet, and five breads.", "boxes_value": [[67.60827635200002, 38.211486814500006, 335, 187.21362304720003], [0, 85.11499025590001, 335, 224], [213.298400855, 99.03881838540002, 333.702636702, 187.21362304720003], [3.503112795999982, 109.37652586710001, 197.487731918, 196.943237287], [67.60827635200002, 68.3684081849, 117.86981202600003, 115.94934080300001], [139.31469725700003, 58.9862671196, 269.994628926, 121.98071288400001], [291.439575211, 38.211486814500006, 335, 94.5043945372]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046347.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[328.37335206399996, 286.567199712, 594.003051776, 419.68353273599996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046347_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[67.37335206399996, 33.56719971199999, 333.003051776, 166.68353273599996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046347.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a desk, a watch, and four books.", "boxes_value": [[328.37335206399996, 286.567199712, 594.003051776, 419.68353273599996], [46.72998048, 358.079345712, 637.698608384, 478.476379392], [328.37335206399996, 401.513427744, 352.263671872, 419.68353273599996], [370.14178463999997, 286.567199712, 418.707519552, 359.93255616], [414.621948224, 291.685607904, 466.67932127999995, 367.602600096], [471.668212864, 302.74780272, 537.390625024, 378.881713872], [534.1370849919999, 298.409667984, 594.003051776, 389.293151856]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046347_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a desk, a watch, and four books.", "boxes_value": [[67.37335206399996, 33.56719971199999, 333.003051776, 166.68353273599996], [0, 105.07934571200002, 376.69860838399995, 199], [67.37335206399996, 148.513427744, 91.26367187199997, 166.68353273599996], [109.14178463999997, 33.56719971199999, 157.707519552, 106.93255615999999], [153.621948224, 38.685607903999994, 205.67932127999995, 114.602600096], [210.668212864, 49.74780271999998, 276.390625024, 125.88171387199998], [273.1370849919999, 45.40966798400001, 333.003051776, 136.293151856]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046348.jpg", "text": "Please give me some details about the rectangle in the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[0, 72.918090816, 172.25921631999998, 241.9416504]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046348_crop.jpg", "text": "Please give me some details about the rectangle in the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[0, 42.918090816, 172.25921631999998, 211.9416504]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046348.jpg", "text": "Please give me some details about the rectangle in the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two chairs, a desk, a sports car, and a car.", "boxes_value": [[0, 72.918090816, 172.25921631999998, 241.9416504], [0.831176768, 123.50042726400001, 85.610168448, 241.9416504], [88.72705075200001, 115.39654540800001, 172.25921631999998, 180.22753905599998], [0.810119616, 110.276733408, 119.115905792, 223.536560064], [25.835327168000003, 69.07238769599999, 628.421997056, 427.779174816], [0, 72.918090816, 48.037231424, 122.11279296]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046348_crop.jpg", "text": "Please give me some details about the rectangle in the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two chairs, a desk, a sports car, and a car.", "boxes_value": [[0, 42.918090816, 172.25921631999998, 211.9416504], [0.831176768, 93.50042726400001, 85.610168448, 211.9416504], [88.72705075200001, 85.39654540800001, 172.25921631999998, 150.22753905599998], [0.810119616, 80.276733408, 119.115905792, 193.536560064], [25.835327168000003, 39.07238769599999, 215, 254], [0, 42.918090816, 48.037231424, 92.11279296]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046350.jpg", "text": "What details can you provide about the region in the snapshot ? Give coordinates for the items you reference.", "boxes_value": [[47.4237060854, 225.1619262464, 180.61450194489998, 359.4385375744]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046350_crop.jpg", "text": "What details can you provide about the region in the snapshot ? Give coordinates for the items you reference.", "boxes_value": [[33.4237060854, 34.161926246399986, 166.61450194489998, 168.43853757440002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046350.jpg", "text": "What details can you provide about the region in the snapshot ? Give coordinates for the items you reference. For your reference, objects involved in this region include two pillows, two lamps, a couch, and a bed.", "boxes_value": [[47.4237060854, 225.1619262464, 180.61450194489998, 359.4385375744], [45.614074686, 271.851135232, 99.5418700861, 310.215820288], [47.4237060854, 275.470397952, 64.4345092542, 298.27209472], [105.6947021606, 274.746582016, 118.7242431428, 299.7198486528], [152.0219116041, 225.1619262464, 180.61450194489998, 336.6368408064], [1.09649657, 278.7277832192, 100.6276855516, 338.4465332224], [72.3969726562, 293.5670166016, 159.2605590722, 359.4385375744]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046350_crop.jpg", "text": "What details can you provide about the region in the snapshot ? Give coordinates for the items you reference. For your reference, objects involved in this region include two pillows, two lamps, a couch, and a bed.", "boxes_value": [[33.4237060854, 34.161926246399986, 166.61450194489998, 168.43853757440002], [31.614074686000002, 80.85113523199999, 85.5418700861, 119.21582028799997], [33.4237060854, 84.47039795199998, 50.434509254199995, 107.27209471999998], [91.6947021606, 83.74658201599999, 104.7242431428, 108.71984865280001], [138.0219116041, 34.161926246399986, 166.61450194489998, 145.63684080640002], [0, 87.72778321919998, 86.6276855516, 147.44653322239998], [58.396972656200006, 102.56701660160002, 145.2605590722, 168.43853757440002]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046351.jpg", "text": "Regarding the image , what's going on in the section ? Include the coordinates for each mentioned object.", "boxes_value": [[466.0076904464, 335.0066528256, 574.4193115426, 511.8516845568]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046351_crop.jpg", "text": "Regarding the image , what's going on in the section ? Include the coordinates for each mentioned object.", "boxes_value": [[28.007690446399977, 45.006652825599986, 136.41931154259998, 221.85168455680002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046351.jpg", "text": "Regarding the image , what's going on in the section ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a towel, two wine glasses, two plates, and a cake.", "boxes_value": [[466.0076904464, 335.0066528256, 574.4193115426, 511.8516845568], [513.8774414037, 335.0066528256, 574.4193115426, 410.0444336128], [493.0803222577, 367.0565185536, 543.0416259591, 463.8109741056], [466.0076904464, 428.5791625728, 570.9799804872, 483.3708495872], [497.4722900338, 437.8015136768, 568.5386962954, 511.8516845568], [475.77258304110006, 479.5734253056, 591.5946044919999, 511.8516845568], [495.736206074, 386.041992192, 540.3311767264, 431.1770019328]], "boxes_seq": [[0], [0], [1], [2, 4], [3, 5], [6]]}, {"image_path": "objects365_v1_00046351_crop.jpg", "text": "Regarding the image , what's going on in the section ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a towel, two wine glasses, two plates, and a cake.", "boxes_value": [[28.007690446399977, 45.006652825599986, 136.41931154259998, 221.85168455680002], [75.8774414037, 45.006652825599986, 136.41931154259998, 120.04443361279999], [55.080322257700004, 77.05651855359997, 105.04162595909997, 173.8109741056], [28.007690446399977, 138.5791625728, 132.9799804872, 193.37084958719998], [59.47229003379999, 147.80151367680003, 130.53869629539997, 221.85168455680002], [37.77258304110006, 189.57342530559998, 153.59460449199992, 221.85168455680002], [57.736206073999995, 96.04199219200001, 102.33117672640003, 141.1770019328]], "boxes_seq": [[0], [0], [1], [2, 4], [3, 5], [6]]}, {"image_path": "objects365_v1_00046352.jpg", "text": "Can you give me a visual rundown of the area in ? Provide the coordinates for all objects that you mention.", "boxes_value": [[458.21948243199995, 120.28515624, 611.39318848, 235.368042]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046352_crop.jpg", "text": "Can you give me a visual rundown of the area in ? Provide the coordinates for all objects that you mention.", "boxes_value": [[39.21948243199995, 29.285156240000006, 192.39318848000005, 144.368042]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046352.jpg", "text": "Can you give me a visual rundown of the area in ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a flower, a picture, a cabinet, and two lamps.", "boxes_value": [[458.21948243199995, 120.28515624, 611.39318848, 235.368042], [458.21948243199995, 194.03546140799997, 492.25805664, 215.10693360000002], [507.656494144, 174.17956540799997, 529.943725568, 195.656311056], [495.499877952, 200.11376952, 551.0152588159999, 235.368042], [593.158203136, 120.28515624, 611.39318848, 144.193176288], [509.020996096, 140.021057136, 527.479248064, 160.265625024]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046352_crop.jpg", "text": "Can you give me a visual rundown of the area in ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a flower, a picture, a cabinet, and two lamps.", "boxes_value": [[39.21948243199995, 29.285156240000006, 192.39318848000005, 144.368042], [39.21948243199995, 103.03546140799997, 73.25805664, 124.10693360000002], [88.65649414400002, 83.17956540799997, 110.94372556799999, 104.65631105599999], [76.49987795200002, 109.11376952, 132.0152588159999, 144.368042], [174.158203136, 29.285156240000006, 192.39318848000005, 53.19317628799999], [90.02099609599998, 49.021057135999996, 108.47924806399999, 69.265625024]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046353.jpg", "text": "Please provide details for the area marked as in this photographic . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[208.7671509032, 101.4115600384, 432.57238772119996, 409.3754272256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046353_crop.jpg", "text": "Please provide details for the area marked as in this photographic . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[56.76715090319999, 77.4115600384, 280.57238772119996, 385.3754272256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046353.jpg", "text": "Please provide details for the area marked as in this photographic . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, a glasses, a leather shoes, a hammer, and a trolley.", "boxes_value": [[208.7671509032, 101.4115600384, 432.57238772119996, 409.3754272256], [208.7671509032, 101.4115600384, 347.4205321892, 409.3754272256], [282.4096679812, 140.4196167168, 318.7003173716, 154.6456299008], [211.1807861292, 388.389465344, 242.4618530216, 407.1580810752], [255.2868652188, 276.8235473408, 286.8165283088, 329.3557739008], [318.0533447016, 215.128601088, 432.57238772119996, 318.929138176]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046353_crop.jpg", "text": "Please provide details for the area marked as in this photographic . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, a glasses, a leather shoes, a hammer, and a trolley.", "boxes_value": [[56.76715090319999, 77.4115600384, 280.57238772119996, 385.3754272256], [56.76715090319999, 77.4115600384, 195.4205321892, 385.3754272256], [130.40966798120002, 116.4196167168, 166.70031737160002, 130.6456299008], [59.18078612919999, 364.389465344, 90.4618530216, 383.1580810752], [103.28686521879999, 252.82354734080002, 134.8165283088, 305.3557739008], [166.0533447016, 191.128601088, 280.57238772119996, 294.929138176]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046354.jpg", "text": "What does the area look like in the context of the image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[309.9623838878, 159.6009521664, 415.01598931319995, 276.9708942848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046354_crop.jpg", "text": "What does the area look like in the context of the image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[26.9623838878, 29.600952166399992, 132.01598931319995, 146.9708942848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046354.jpg", "text": "What does the area look like in the context of the image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, two handbags, and a street lights.", "boxes_value": [[309.9623838878, 159.6009521664, 415.01598931319995, 276.9708942848], [367.68652345690003, 214.2072753664, 412.1217040737, 342.2072753664], [320.48626706249996, 181.9080200192, 372.9567870919, 366.0148925952], [399.119758933, 240.3821736448, 415.01598931319995, 273.9810242048], [309.9623838878, 252.306005504, 331.20159363460004, 276.9708942848], [393.5594482151, 159.6009521664, 409.9479980406, 185.7596435456]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046354_crop.jpg", "text": "What does the area look like in the context of the image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, two handbags, and a street lights.", "boxes_value": [[26.9623838878, 29.600952166399992, 132.01598931319995, 146.9708942848], [84.68652345690003, 84.20727536640001, 129.12170407370002, 176], [37.48626706249996, 51.908020019199995, 89.95678709190003, 176], [116.11975893300001, 110.38217364479999, 132.01598931319995, 143.9810242048], [26.9623838878, 122.30600550400001, 48.201593634600044, 146.9708942848], [110.55944821510002, 29.600952166399992, 126.9479980406, 55.7596435456]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046358.jpg", "text": "Tell me about the region of the image . Specify the location of each mentioned object.", "boxes_value": [[40.940124479299996, 112.8608398336, 413.6512451453, 194.9226074112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046358_crop.jpg", "text": "Tell me about the region of the image . Specify the location of each mentioned object.", "boxes_value": [[40.940124479299996, 20.860839833599996, 413.6512451453, 102.9226074112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046358.jpg", "text": "Tell me about the region of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include three people, two flags, and a glasses.", "boxes_value": [[40.940124479299996, 112.8608398336, 413.6512451453, 194.9226074112], [0.09893801399999999, 64.094482432, 371.463500958, 511.980468736], [40.940124479299996, 113.6591796736, 59.7017822562, 174.3352660992], [61.2985229465, 112.8608398336, 94.0316772164, 147.989074688], [219.05090331309998, 175.4614867968, 244.87426761060001, 194.9226074112], [379.7633056885, 130.4678344704, 413.6512451453, 168.2847289856], [375.209106425, 92.2011108352, 413.8020019595, 147.9169311744]], "boxes_seq": [[0], [0], [1, 4, 5], [2, 3], [6]]}, {"image_path": "objects365_v1_00046358_crop.jpg", "text": "Tell me about the region of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include three people, two flags, and a glasses.", "boxes_value": [[40.940124479299996, 20.860839833599996, 413.6512451453, 102.9226074112], [0.09893801399999999, 0, 371.463500958, 123], [40.940124479299996, 21.659179673599994, 59.7017822562, 82.3352660992], [61.2985229465, 20.860839833599996, 94.0316772164, 55.98907468799999], [219.05090331309998, 83.46148679679999, 244.87426761060001, 102.9226074112], [379.7633056885, 38.46783447039999, 413.6512451453, 76.2847289856], [375.209106425, 0.20111083519999795, 413.8020019595, 55.91693117439999]], "boxes_seq": [[0], [0], [1, 4, 5], [2, 3], [6]]}, {"image_path": "objects365_v1_00046359.jpg", "text": "Explain the content within the rectangular region of the image . Include the coordinates for each mentioned object.", "boxes_value": [[370.54833984, 90.4630737408, 768.3275146752001, 250.5563354624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046359_crop.jpg", "text": "Explain the content within the rectangular region of the image . Include the coordinates for each mentioned object.", "boxes_value": [[99.54833983999998, 40.4630737408, 497, 200.5563354624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046359.jpg", "text": "Explain the content within the rectangular region of the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include five boats.", "boxes_value": [[370.54833984, 90.4630737408, 768.3275146752001, 250.5563354624], [370.54833984, 90.4630737408, 448.15673825280004, 136.2388916224], [482.2548827904, 111.8313598464, 602.6699218944, 171.0242309632], [698.1772461312, 117.363525376, 767.1376952832, 201.607177728], [736.7718505728, 205.9404907008, 768.3275146752001, 250.5563354624], [304.07067870720005, 77.9240722432, 462.55041500159996, 210.1388549632]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046359_crop.jpg", "text": "Explain the content within the rectangular region of the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include five boats.", "boxes_value": [[99.54833983999998, 40.4630737408, 497, 200.5563354624], [99.54833983999998, 40.4630737408, 177.15673825280004, 86.23889162239999], [211.25488279040002, 61.831359846400005, 331.6699218944, 121.02423096320001], [427.17724613120004, 67.363525376, 496.13769528319995, 151.607177728], [465.77185057279996, 155.9404907008, 497, 200.5563354624], [33.070678707200045, 27.9240722432, 191.55041500159996, 160.1388549632]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046360.jpg", "text": "Can you give me a description of the region in image ? Please mention the objects and their locations.", "boxes_value": [[319.674560541, 37.7973022208, 642.9085693458, 179.7133789184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046360_crop.jpg", "text": "Can you give me a description of the region in image ? Please mention the objects and their locations.", "boxes_value": [[81.674560541, 35.7973022208, 404.90856934579995, 177.7133789184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046360.jpg", "text": "Can you give me a description of the region in image ? Please mention the objects and their locations. For your reference, objects involved in this region include five hats.", "boxes_value": [[319.674560541, 37.7973022208, 642.9085693458, 179.7133789184], [410.529174803, 47.7168579072, 434.9981689586, 76.0241089024], [615.9381103468, 37.7973022208, 642.9085693458, 63.4834594816], [475.3063965206, 119.9930419712, 498.42395020460003, 142.4684448256], [515.7620849808001, 157.880126976, 541.4482421826, 179.7133789184], [319.674560541, 91.1486205952, 341.42480468459996, 113.2186889728]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046360_crop.jpg", "text": "Can you give me a description of the region in image ? Please mention the objects and their locations. For your reference, objects involved in this region include five hats.", "boxes_value": [[81.674560541, 35.7973022208, 404.90856934579995, 177.7133789184], [172.529174803, 45.7168579072, 196.9981689586, 74.0241089024], [377.93811034680004, 35.7973022208, 404.90856934579995, 61.4834594816], [237.3063965206, 117.9930419712, 260.42395020460003, 140.4684448256], [277.76208498080007, 155.880126976, 303.4482421826, 177.7133789184], [81.674560541, 89.1486205952, 103.42480468459996, 111.2186889728]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046361.jpg", "text": "Help me grasp the context of the region within image . Include the coordinates for each object you identify.", "boxes_value": [[603.2315674, 119.3832397312, 769.628784155, 512.7679443456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046361_crop.jpg", "text": "Help me grasp the context of the region within image . Include the coordinates for each object you identify.", "boxes_value": [[42.23156740000002, 98.3832397312, 208.62878415499995, 491]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046361.jpg", "text": "Help me grasp the context of the region within image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a cabinet, a desk, two chairs, and a book.", "boxes_value": [[603.2315674, 119.3832397312, 769.628784155, 512.7679443456], [691.021362339, 119.3832397312, 767.684448223, 409.60784911359997], [638.109619147, 392.87475584, 769.628784155, 510.5881347584], [608.317993129, 382.7020263424, 643.195922831, 481.5230712832], [603.2315674, 432.1125488128, 647.555664063, 512.7679443456], [719.29809567, 440.632995584, 770.0666503530001, 472.5595703296]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046361_crop.jpg", "text": "Help me grasp the context of the region within image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a cabinet, a desk, two chairs, and a book.", "boxes_value": [[42.23156740000002, 98.3832397312, 208.62878415499995, 491], [130.021362339, 98.3832397312, 206.684448223, 388.60784911359997], [77.10961914699999, 371.87475584, 208.62878415499995, 489.5881347584], [47.317993129, 361.7020263424, 82.19592283099996, 460.5230712832], [42.23156740000002, 411.1125488128, 86.555664063, 491], [158.29809566999995, 419.632995584, 209, 451.5595703296]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046365.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Please point out the objects and their coordinates.", "boxes_value": [[643.6326903930001, 94.8246459904, 770.7926025029999, 432.114013696]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046365_crop.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Please point out the objects and their coordinates.", "boxes_value": [[32.632690393000075, 84.8246459904, 159, 422.114013696]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046365.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a desk, three cabinets, and a chair.", "boxes_value": [[643.6326903930001, 94.8246459904, 770.7926025029999, 432.114013696], [608.245361339, 315.9424438272, 713.937377923, 451.0896606208], [643.6326903930001, 94.8246459904, 749.148315454, 285.113586432], [736.522583027, 97.5302124032, 770.7926025029999, 262.567504896], [726.602294957, 256.2545776128, 769.8907470869999, 432.114013696], [598.316284181, 286.8770141696, 752.234008757, 400.7193603584]], "boxes_seq": [[0], [0], [1], [2, 3, 5], [4]]}, {"image_path": "objects365_v1_00046365_crop.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a desk, three cabinets, and a chair.", "boxes_value": [[32.632690393000075, 84.8246459904, 159, 422.114013696], [0, 305.9424438272, 102.93737792299999, 441.0896606208], [32.632690393000075, 84.8246459904, 138.148315454, 275.113586432], [125.522583027, 87.5302124032, 159, 252.567504896], [115.60229495700003, 246.2545776128, 158.8907470869999, 422.114013696], [0, 276.8770141696, 141.23400875699997, 390.7193603584]], "boxes_seq": [[0], [0], [1], [2, 3, 5], [4]]}, {"image_path": "objects365_v1_00046366.jpg", "text": "Offer a thorough description of the area within the illustration . Provide the coordinates for each element you describe.", "boxes_value": [[314.3989257724, 311.9070434816, 683.6948242187, 393.447937024]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046366_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Provide the coordinates for each element you describe.", "boxes_value": [[92.39892577239999, 20.9070434816, 461.6948242187, 102.447937024]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046366.jpg", "text": "Offer a thorough description of the area within the illustration . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two suvs, two cars, and a van.", "boxes_value": [[314.3989257724, 311.9070434816, 683.6948242187, 393.447937024], [314.3989257724, 314.9937744384, 358.8647460769, 366.1040038912], [334.8428954955, 312.4382324224, 431.1856689651, 393.447937024], [436.8834228682, 311.9070434816, 494.1745605762, 365.1749267456], [418.6704101552, 312.1763305472, 444.09082033889996, 346.9372558848], [645.8021240307, 318.3687133696, 683.6948242187, 349.9721069568]], "boxes_seq": [[0], [0], [1, 5], [2, 4], [3]]}, {"image_path": "objects365_v1_00046366_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two suvs, two cars, and a van.", "boxes_value": [[92.39892577239999, 20.9070434816, 461.6948242187, 102.447937024], [92.39892577239999, 23.99377443840001, 136.86474607690002, 75.10400389120002], [112.84289549549999, 21.438232422400006, 209.18566896509998, 102.447937024], [214.8834228682, 20.9070434816, 272.1745605762, 74.17492674559998], [196.6704101552, 21.176330547199996, 222.09082033889996, 55.93725588479998], [423.80212403070004, 27.368713369600016, 461.6948242187, 58.97210695680002]], "boxes_seq": [[0], [0], [1, 5], [2, 4], [3]]}, {"image_path": "objects365_v1_00046367.jpg", "text": "Please share details about the rectangular region within the image . Please mention the objects and their locations.", "boxes_value": [[50.900634792, 214.6527710208, 266.954406768, 509.4225463808]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046367_crop.jpg", "text": "Please share details about the rectangular region within the image . Please mention the objects and their locations.", "boxes_value": [[50.900634792, 74.6527710208, 266.954406768, 369.4225463808]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046367.jpg", "text": "Please share details about the rectangular region within the image . Please mention the objects and their locations. For your reference, objects involved in this region include two street lights, a car, and two buses.", "boxes_value": [[50.900634792, 214.6527710208, 266.954406768, 509.4225463808], [50.900634792, 214.6527710208, 114.874694856, 485.3489990144], [175.402893096, 325.370666496, 203.663818368, 440.819641088], [112.706054712, 441.0290527232, 210.756835944, 509.4225463808], [197.22729492000002, 407.8693237248, 219.48962400000002, 422.1508178944], [230.830749504, 400.121887232, 266.954406768, 441.2860717568]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046367_crop.jpg", "text": "Please share details about the rectangular region within the image . Please mention the objects and their locations. For your reference, objects involved in this region include two street lights, a car, and two buses.", "boxes_value": [[50.900634792, 74.6527710208, 266.954406768, 369.4225463808], [50.900634792, 74.6527710208, 114.874694856, 345.3489990144], [175.402893096, 185.370666496, 203.663818368, 300.819641088], [112.706054712, 301.0290527232, 210.756835944, 369.4225463808], [197.22729492000002, 267.8693237248, 219.48962400000002, 282.1508178944], [230.830749504, 260.121887232, 266.954406768, 301.2860717568]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046368.jpg", "text": "What's inside the area of the provided graphic ? Give coordinates for the items you reference.", "boxes_value": [[790.3876953600001, 271.707336432, 1112.8403320319999, 480.323425296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046368_crop.jpg", "text": "What's inside the area of the provided graphic ? Give coordinates for the items you reference.", "boxes_value": [[81.38769536000007, 52.70733643199998, 403.8403320319999, 261]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046368.jpg", "text": "What's inside the area of the provided graphic ? Give coordinates for the items you reference. For your reference, objects involved in this region include two cabinets, a towel, an oven, and an induction cooker.", "boxes_value": [[790.3876953600001, 271.707336432, 1112.8403320319999, 480.323425296], [790.3876953600001, 312.982666032, 860.388915968, 479.04943848000005], [1027.945068416, 317.450805648, 1112.8403320319999, 479.04943848000005], [956.5590819839999, 320.000366208, 1013.049316352, 413.523010272], [862.249023488, 324.862609872, 1026.85449216, 480.323425296], [859.108886656, 271.707336432, 1028.587646464, 321.056640624]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046368_crop.jpg", "text": "What's inside the area of the provided graphic ? Give coordinates for the items you reference. For your reference, objects involved in this region include two cabinets, a towel, an oven, and an induction cooker.", "boxes_value": [[81.38769536000007, 52.70733643199998, 403.8403320319999, 261], [81.38769536000007, 93.982666032, 151.388915968, 260.04943848000005], [318.9450684159999, 98.45080564800003, 403.8403320319999, 260.04943848000005], [247.55908198399993, 101.000366208, 304.049316352, 194.52301027200002], [153.24902348800003, 105.86260987200001, 317.85449216000006, 261], [150.10888665599998, 52.70733643199998, 319.58764646400004, 102.05664062400001]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046369.jpg", "text": "Regarding the image , what's going on in the section ? Please mention the objects and their locations.", "boxes_value": [[294.444213888, 283.942382832, 632.1679687679999, 480.39892579199994]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046369_crop.jpg", "text": "Regarding the image , what's going on in the section ? Please mention the objects and their locations.", "boxes_value": [[84.44421388799998, 49.94238283200002, 422.1679687679999, 246]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046369.jpg", "text": "Regarding the image , what's going on in the section ? Please mention the objects and their locations. For your reference, objects involved in this region include a chair, a storage box, a desk, a stool, and a helmet.", "boxes_value": [[294.444213888, 283.942382832, 632.1679687679999, 480.39892579199994], [294.444213888, 309.45257568, 459.34436032, 479.465881344], [473.75866700800003, 371.378540016, 592.640258816, 480.39892579199994], [444.723144512, 308.376831072, 639.754516608, 460.676635728], [326.999633792, 283.942382832, 467.499267584, 468.687744144], [580.527832, 300.888793968, 632.1679687679999, 348.40307616]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046369_crop.jpg", "text": "Regarding the image , what's going on in the section ? Please mention the objects and their locations. For your reference, objects involved in this region include a chair, a storage box, a desk, a stool, and a helmet.", "boxes_value": [[84.44421388799998, 49.94238283200002, 422.1679687679999, 246], [84.44421388799998, 75.45257568, 249.34436032000002, 245.46588134400002], [263.75866700800003, 137.378540016, 382.640258816, 246], [234.72314451199998, 74.37683107200002, 429.754516608, 226.676635728], [116.999633792, 49.94238283200002, 257.499267584, 234.68774414400002], [370.527832, 66.88879396800002, 422.1679687679999, 114.40307616000001]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046370.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Include the coordinates for each mentioned object.", "boxes_value": [[559.784423843, 314.5466308608, 682.8273925814999, 492.2191772672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046370_crop.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Include the coordinates for each mentioned object.", "boxes_value": [[30.784423843000013, 44.54663086080001, 153.8273925814999, 222.2191772672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046370.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include four people, and a hat.", "boxes_value": [[559.784423843, 314.5466308608, 682.8273925814999, 492.2191772672], [559.784423843, 314.5466308608, 580.5457763963, 355.150634752], [599.4697265842, 325.5703735296, 614.7192382659, 352.210998528], [620.2967529342001, 315.3602905088, 633.0527344074, 356.6204833792], [624.2318115366, 436.8125000192, 682.8273925814999, 492.2191772672], [624.8482665734, 437.9802246144, 682.7086181302, 511.8676147712]], "boxes_seq": [[0], [0], [1, 2, 3, 5], [4]]}, {"image_path": "objects365_v1_00046370_crop.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include four people, and a hat.", "boxes_value": [[30.784423843000013, 44.54663086080001, 153.8273925814999, 222.2191772672], [30.784423843000013, 44.54663086080001, 51.54577639629997, 85.15063475199997], [70.46972658419998, 55.570373529599976, 85.7192382659, 82.210998528], [91.29675293420007, 45.360290508800006, 104.05273440739995, 86.62048337919998], [95.23181153660005, 166.8125000192, 153.8273925814999, 222.2191772672], [95.8482665734, 167.98022461440002, 153.70861813019997, 241.8676147712]], "boxes_seq": [[0], [0], [1, 2, 3, 5], [4]]}, {"image_path": "objects365_v1_00046372.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Include the coordinates for each object you identify.", "boxes_value": [[8.250366225, 159.78393555, 167.37786862500002, 306.44171145]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046372_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Include the coordinates for each object you identify.", "boxes_value": [[8.250366225, 36.783935549999995, 167.37786862500002, 183.44171145]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046372.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include two backpacks, a helmet, a glasses, and a handbag.", "boxes_value": [[8.250366225, 159.78393555, 167.37786862500002, 306.44171145], [8.250366225, 204.0725708, 26.84521485, 229.42926025], [34.95935055, 179.56115724999998, 78.9108276, 204.0725708], [42.9664917, 214.40844725, 110.5911255, 300.19665525], [151.7664795, 159.78393555, 167.37786862500002, 183.08795165], [133.652587875, 252.02258300000003, 159.095275875, 306.44171145]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5]]}, {"image_path": "objects365_v1_00046372_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include two backpacks, a helmet, a glasses, and a handbag.", "boxes_value": [[8.250366225, 36.783935549999995, 167.37786862500002, 183.44171145], [8.250366225, 81.0725708, 26.84521485, 106.42926025], [34.95935055, 56.56115724999998, 78.9108276, 81.0725708], [42.9664917, 91.40844725, 110.5911255, 177.19665525], [151.7664795, 36.783935549999995, 167.37786862500002, 60.08795165000001], [133.652587875, 129.02258300000003, 159.095275875, 183.44171145]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5]]}, {"image_path": "objects365_v1_00046373.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Give coordinates for the items you reference.", "boxes_value": [[608.2961425664, 172.4837036032, 710.301635708, 267.5654907392]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046373_crop.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Give coordinates for the items you reference.", "boxes_value": [[26.29614256640002, 24.483703603200013, 128.30163570800005, 119.56549073920002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046373.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Give coordinates for the items you reference. For your reference, objects involved in this region include a lamp, a person, and three bottles.", "boxes_value": [[608.2961425664, 172.4837036032, 710.301635708, 267.5654907392], [686.1422119192, 172.4837036032, 701.0332031156, 196.2160644608], [650.8057861036, 200.399963392, 696.4927978276, 253.2534790144], [699.4722900624, 230.885376, 710.301635708, 267.5654907392], [696.6776123116, 215.8640747008, 706.4589844008, 259.1814575104], [608.2961425664, 217.2613525504, 623.66687008, 266.1681518592]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046373_crop.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Give coordinates for the items you reference. For your reference, objects involved in this region include a lamp, a person, and three bottles.", "boxes_value": [[26.29614256640002, 24.483703603200013, 128.30163570800005, 119.56549073920002], [104.14221191920001, 24.483703603200013, 119.03320311560003, 48.2160644608], [68.80578610359998, 52.39996339199999, 114.49279782760004, 105.2534790144], [117.47229006240002, 82.88537600000001, 128.30163570800005, 119.56549073920002], [114.6776123116, 67.86407470079999, 124.45898440079998, 111.18145751039998], [26.29614256640002, 69.2613525504, 41.66687007999997, 118.16815185920001]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046374.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Please point out the objects and their coordinates.", "boxes_value": [[624.1533202887, 241.0219116032, 683.0330810367001, 401.4039917056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046374_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Please point out the objects and their coordinates.", "boxes_value": [[15.153320288700002, 41.02191160320001, 74, 201.4039917056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046374.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Please point out the objects and their coordinates. For your reference, objects involved in this region include a chair, a flower, a vase, and three people.", "boxes_value": [[624.1533202887, 241.0219116032, 683.0330810367001, 401.4039917056], [617.7830810557, 342.5094604288, 682.7158202969, 419.0372924928], [635.6220703078001, 301.27691648, 662.7421874892, 318.8683471872], [637.0880126733, 315.2034301952, 663.475219698, 364.3128051712], [624.1533202887, 339.1513061376, 683.0330810367001, 401.4039917056], [659.6057129085, 241.0219116032, 682.8519287418001, 311.6460571136], [629.7177734699, 245.4497680896, 655.1778564193, 308.9893188608]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046374_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Please point out the objects and their coordinates. For your reference, objects involved in this region include a chair, a flower, a vase, and three people.", "boxes_value": [[15.153320288700002, 41.02191160320001, 74, 201.4039917056], [8.783081055699995, 142.50946042880003, 73.7158202969, 219.03729249280002], [26.622070307800072, 101.27691648000001, 53.74218748919998, 118.86834718720002], [28.0880126733, 115.20343019519999, 54.475219698000046, 164.3128051712], [15.153320288700002, 139.1513061376, 74, 201.4039917056], [50.60571290849998, 41.02191160320001, 73.85192874180007, 111.64605711360002], [20.717773469899953, 45.44976808960001, 46.17785641930004, 108.98931886079998]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046375.jpg", "text": "Could you describe the content of the bbox in the image ? Specify the location of each mentioned object.", "boxes_value": [[174.07971194590002, 379.0894775296, 544.9089355285, 497.0001830912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046375_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Specify the location of each mentioned object.", "boxes_value": [[93.07971194590002, 30.089477529600003, 463.9089355285, 148.00018309119997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046375.jpg", "text": "Could you describe the content of the bbox in the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include six potted plants.", "boxes_value": [[174.07971194590002, 379.0894775296, 544.9089355285, 497.0001830912], [174.07971194590002, 379.0894775296, 206.50518796600002, 435.0970459136], [228.90820314409999, 380.2685547008, 275.482971212, 448.6567993344], [294.34869387410004, 396.7760620032, 327.9532470689, 459.2687988224], [362.7369384986, 399.7238769664, 419.3341064166, 475.7762450944], [440.5579834281, 423.3059692544, 485.3640136477, 485.7987060736], [494.79699706499997, 415.6417846784, 544.9089355285, 497.0001830912]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046375_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include six potted plants.", "boxes_value": [[93.07971194590002, 30.089477529600003, 463.9089355285, 148.00018309119997], [93.07971194590002, 30.089477529600003, 125.50518796600002, 86.09704591360003], [147.90820314409999, 31.268554700799996, 194.482971212, 99.65679933439998], [213.34869387410004, 47.776062003200025, 246.9532470689, 110.26879882240002], [281.7369384986, 50.723876966399985, 338.3341064166, 126.7762450944], [359.5579834281, 74.3059692544, 404.3640136477, 136.7987060736], [413.79699706499997, 66.64178467839997, 463.9089355285, 148.00018309119997]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046379.jpg", "text": "Explain the content within the rectangular region of the image . Include the coordinates for each object you identify.", "boxes_value": [[71.613464351, 291.0366821376, 313.7292480465, 424.6099243008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046379_crop.jpg", "text": "Explain the content within the rectangular region of the image . Include the coordinates for each object you identify.", "boxes_value": [[60.613464351000005, 34.036682137599996, 302.7292480465, 167.6099243008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046379.jpg", "text": "Explain the content within the rectangular region of the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include four people, and a carriage.", "boxes_value": [[71.613464351, 291.0366821376, 313.7292480465, 424.6099243008], [278.22900390999996, 307.6680297984, 313.7292480465, 424.6099243008], [218.48620608099998, 318.0885620224, 247.943298317, 408.82690432], [184.0138549535, 322.9398803456, 220.39776608850002, 406.5376586752], [71.613464351, 291.0366821376, 103.59796139550001, 332.2184448], [30.564941407, 327.8911132672, 124.85412596799998, 446.9718017536]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046379_crop.jpg", "text": "Explain the content within the rectangular region of the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include four people, and a carriage.", "boxes_value": [[60.613464351000005, 34.036682137599996, 302.7292480465, 167.6099243008], [267.22900390999996, 50.66802979840003, 302.7292480465, 167.6099243008], [207.48620608099998, 61.088562022400026, 236.943298317, 151.82690431999998], [173.0138549535, 65.93988034559999, 209.39776608850002, 149.53765867520002], [60.613464351000005, 34.036682137599996, 92.59796139550001, 75.21844479999999], [19.564941407, 70.89111326720001, 113.85412596799998, 189.9718017536]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046381.jpg", "text": "In the photo , can you delve into the details of the region ? Provide the coordinates for all objects that you mention.", "boxes_value": [[452.889160185, 106.1116333056, 648.6750488499, 183.8681030144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046381_crop.jpg", "text": "In the photo , can you delve into the details of the region ? Provide the coordinates for all objects that you mention.", "boxes_value": [[49.88916018499998, 20.111633305599995, 245.6750488499, 97.86810301439999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046381.jpg", "text": "In the photo , can you delve into the details of the region ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three umbrellas, a person, a suv, and a van.", "boxes_value": [[452.889160185, 106.1116333056, 648.6750488499, 183.8681030144], [603.2740478355, 106.1116333056, 648.6750488499, 126.3322754048], [536.8892821995, 119.6556396544, 575.4229736615999, 147.506713856], [493.5865478376, 122.8985595904, 535.7447509785001, 140.6392822272], [452.889160185, 133.3150635008, 476.5485839511, 183.8681030144], [48.5325317445, 90.8605346816, 538.2066650244, 411.47338864640005], [489.7286377134, 134.3598022656, 571.7709961209, 158.3956909056]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046381_crop.jpg", "text": "In the photo , can you delve into the details of the region ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three umbrellas, a person, a suv, and a van.", "boxes_value": [[49.88916018499998, 20.111633305599995, 245.6750488499, 97.86810301439999], [200.27404783550003, 20.111633305599995, 245.6750488499, 40.3322754048], [133.88928219950003, 33.655639654400005, 172.42297366159994, 61.506713856000005], [90.5865478376, 36.8985595904, 132.74475097850006, 54.6392822272], [49.88916018499998, 47.315063500799994, 73.5485839511, 97.86810301439999], [0, 4.860534681600001, 135.20666502439997, 117], [86.72863771340002, 48.359802265599996, 168.7709961209, 72.39569090559999]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046382.jpg", "text": "Describe the visual elements within the selected area of the image . Please point out the objects and their coordinates.", "boxes_value": [[313.6691894784, 402.3032226816, 512.2663574016, 511.6187133952]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046382_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Please point out the objects and their coordinates.", "boxes_value": [[49.66918947840003, 28.303222681600005, 248.26635740159998, 137.61871339520002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046382.jpg", "text": "Describe the visual elements within the selected area of the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include three cups, a spoon, a fork, a wine glass, and a plate.", "boxes_value": [[313.6691894784, 402.3032226816, 512.2663574016, 511.6187133952], [443.72326663679996, 460.8506469888, 492.9212646144, 510.9042968576], [391.1026611456, 490.7973022208, 412.9210205184, 511.332153344], [313.6691894784, 481.8132934656, 332.0649414144, 510.4765014528], [464.96276858880003, 425.4049682432, 512.2663574016, 487.0094604288], [403.724975616, 378.1014404096, 456.52893066239994, 487.0094604288], [363.38867189760003, 402.3032226816, 408.85864258559997, 472.3417358336], [367.42224122880003, 480.8164673024, 385.02355960320006, 511.6187133952]], "boxes_seq": [[0], [0], [1, 4, 6], [2], [3], [5], [7]]}, {"image_path": "objects365_v1_00046382_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include three cups, a spoon, a fork, a wine glass, and a plate.", "boxes_value": [[49.66918947840003, 28.303222681600005, 248.26635740159998, 137.61871339520002], [179.72326663679996, 86.85064698880001, 228.9212646144, 136.9042968576], [127.1026611456, 116.79730222080002, 148.92102051839998, 137.332153344], [49.66918947840003, 107.8132934656, 68.06494141439998, 136.4765014528], [200.96276858880003, 51.40496824320002, 248.26635740159998, 113.00946042880003], [139.724975616, 4.101440409600002, 192.52893066239994, 113.00946042880003], [99.38867189760003, 28.303222681600005, 144.85864258559997, 98.34173583360001], [103.42224122880003, 106.81646730239999, 121.02355960320006, 137.61871339520002]], "boxes_seq": [[0], [0], [1, 4, 6], [2], [3], [5], [7]]}, {"image_path": "objects365_v1_00046383.jpg", "text": "Could you please share some information on the region in this photograph ? Include the coordinates for each object you identify.", "boxes_value": [[269.5951538176, 471.910888648, 483.752258304, 636.50305178]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046383_crop.jpg", "text": "Could you please share some information on the region in this photograph ? Include the coordinates for each object you identify.", "boxes_value": [[53.59515381760002, 41.910888648000025, 267.752258304, 206.50305177999996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046383.jpg", "text": "Could you please share some information on the region in this photograph ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a storage box, a chair, and two boots.", "boxes_value": [[269.5951538176, 471.910888648, 483.752258304, 636.50305178], [269.5951538176, 497.963501004, 328.0883789312, 567.119506824], [380.2345581056, 531.448608372, 444.864379904, 587.46118164], [320.3051147264, 471.910888648, 384.9349365248, 592.161498996], [439.2460326912, 592.387207064, 459.9375000064, 634.551025372], [456.423889152, 595.9008788880001, 483.752258304, 636.50305178]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046383_crop.jpg", "text": "Could you please share some information on the region in this photograph ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a storage box, a chair, and two boots.", "boxes_value": [[53.59515381760002, 41.910888648000025, 267.752258304, 206.50305177999996], [53.59515381760002, 67.96350100400002, 112.0883789312, 137.11950682400004], [164.2345581056, 101.44860837199997, 228.86437990399997, 157.46118163999995], [104.3051147264, 41.910888648000025, 168.93493652479998, 162.16149899599998], [223.2460326912, 162.387207064, 243.9375000064, 204.55102537200003], [240.42388915200002, 165.90087888800008, 267.752258304, 206.50305177999996]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046384.jpg", "text": "Tell me what you see within the designated area in the picture . Include the coordinates for each object you identify.", "boxes_value": [[218.3425903086, 198.4554443264, 309.4502563497, 274.4967041024]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046384_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Include the coordinates for each object you identify.", "boxes_value": [[23.342590308599995, 19.4554443264, 114.4502563497, 95.49670410239997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046384.jpg", "text": "Tell me what you see within the designated area in the picture . Include the coordinates for each object you identify. For your reference, objects involved in this region include a suv, and four street lights.", "boxes_value": [[218.3425903086, 198.4554443264, 309.4502563497, 274.4967041024], [241.0070800842, 258.2579956224, 260.1191406295, 273.9578857472], [218.3425903086, 198.4554443264, 237.2857055448, 274.4967041024], [248.7595825034, 206.5593261568, 264.4606323151, 257.889770496], [275.0286865341, 209.5787963904, 294.9569701908, 270.269470208], [293.74914547779997, 212.2962646528, 309.4502563497, 269.0617065472]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046384_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Include the coordinates for each object you identify. For your reference, objects involved in this region include a suv, and four street lights.", "boxes_value": [[23.342590308599995, 19.4554443264, 114.4502563497, 95.49670410239997], [46.0070800842, 79.25799562240002, 65.11914062950001, 94.95788574720001], [23.342590308599995, 19.4554443264, 42.28570554480001, 95.49670410239997], [53.7595825034, 27.55932615680001, 69.46063231509999, 78.88977049599998], [80.02868653410002, 30.578796390399987, 99.9569701908, 91.26947020799997], [98.74914547779997, 33.296264652800005, 114.4502563497, 90.0617065472]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046385.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[257.50134278400003, 244.751464848, 362.985351552, 337.965576192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046385_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[26.50134278400003, 23.751464848000012, 131.985351552, 116.96557619200001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046385.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a pillow, a chair, a couch, a desk, a carpet, and two stools.", "boxes_value": [[257.50134278400003, 244.751464848, 362.985351552, 337.965576192], [257.50134278400003, 253.97015380800002, 279.671691904, 273.5100708], [246.979858368, 249.46093752, 290.568908672, 290.043884256], [302.593505856, 255.84899904000002, 349.188720704, 310.335327168], [200.59936524800003, 284.276977536, 295.637939456, 325.27404787200004], [150.637268096, 291.446105952, 349.762268096, 346.41931151999995], [304.06146240000004, 244.751464848, 347.61944582399997, 337.965576192], [338.29827878400005, 245.26898193600002, 362.985351552, 262.27563475200003]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00046385_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a pillow, a chair, a couch, a desk, a carpet, and two stools.", "boxes_value": [[26.50134278400003, 23.751464848000012, 131.985351552, 116.96557619200001], [26.50134278400003, 32.97015380800002, 48.671691904, 52.510070799999994], [15.97985836800001, 28.460937519999987, 59.56890867200002, 69.04388425600001], [71.59350585599998, 34.848999040000024, 118.18872070399999, 89.33532716799999], [0, 63.276977536000004, 64.63793945600003, 104.27404787200004], [0, 70.44610595199998, 118.76226809600001, 125.41931151999995], [73.06146240000004, 23.751464848000012, 116.61944582399997, 116.96557619200001], [107.29827878400005, 24.268981936000017, 131.985351552, 41.27563475200003]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00046390.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Provide the coordinates for each element you describe.", "boxes_value": [[251.26409914969997, 2.8681030144, 541.6374511565, 375.085449216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046390_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Provide the coordinates for each element you describe.", "boxes_value": [[73.26409914969997, 2.8681030144, 363.6374511565, 375.085449216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046390.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Provide the coordinates for each element you describe. For your reference, objects involved in this region include three people, a bicycle, and two street lights.", "boxes_value": [[251.26409914969997, 2.8681030144, 541.6374511565, 375.085449216], [266.6235351669, 156.2114868224, 352.0113525191, 324.2850341888], [289.8619384769, 208.0927123968, 413.62036134690004, 375.085449216], [451.45043945619994, 151.3475952128, 595.7451172163001, 341.5787963904], [251.26409914969997, 185.611999488, 272.33789065350004, 206.0167236096], [302.66662599480003, 46.2007446528, 329.2868652188, 181.63671874559998], [508.09301758339996, 2.8681030144, 541.6374511565, 179.2031860224]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00046390_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Provide the coordinates for each element you describe. For your reference, objects involved in this region include three people, a bicycle, and two street lights.", "boxes_value": [[73.26409914969997, 2.8681030144, 363.6374511565, 375.085449216], [88.62353516690001, 156.2114868224, 174.0113525191, 324.2850341888], [111.86193847689998, 208.0927123968, 235.62036134690004, 375.085449216], [273.45043945619994, 151.3475952128, 417.74511721630006, 341.5787963904], [73.26409914969997, 185.611999488, 94.33789065350004, 206.0167236096], [124.66662599480003, 46.2007446528, 151.28686521880002, 181.63671874559998], [330.09301758339996, 2.8681030144, 363.6374511565, 179.2031860224]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00046391.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Provide the coordinates for each element you describe.", "boxes_value": [[253.7835693312, 433.1646728704, 376.7773437696, 511.6816406016]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046391_crop.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Provide the coordinates for each element you describe.", "boxes_value": [[30.7835693312, 20.16467287040001, 153.7773437696, 98.68164060160001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046391.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, three bottles, and a bowl.", "boxes_value": [[253.7835693312, 433.1646728704, 376.7773437696, 511.6816406016], [129.3501586944, 38.3341675008, 622.1733398783999, 511.55133056], [353.78027343360003, 445.4247436288, 376.7773437696, 488.0534667776], [360.56970216959996, 488.2451782144, 378.4600829952, 511.3309326336], [277.1539306752, 433.1646728704, 361.0662842112, 511.05017088], [253.7835693312, 446.9662475776, 282.053955072, 511.6816406016]], "boxes_seq": [[0], [0], [1], [2, 3, 5], [4]]}, {"image_path": "objects365_v1_00046391_crop.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, three bottles, and a bowl.", "boxes_value": [[30.7835693312, 20.16467287040001, 153.7773437696, 98.68164060160001], [0, 0, 184, 98.55133056], [130.78027343360003, 32.4247436288, 153.7773437696, 75.05346677760002], [137.56970216959996, 75.24517821440003, 155.46008299520003, 98.33093263360001], [54.1539306752, 20.16467287040001, 138.0662842112, 98.05017088], [30.7835693312, 33.96624757759997, 59.05395507200001, 98.68164060160001]], "boxes_seq": [[0], [0], [1], [2, 3, 5], [4]]}, {"image_path": "objects365_v1_00046393.jpg", "text": "What information can you give me about the coordinates in image ? Include the coordinates for each mentioned object.", "boxes_value": [[291.3606567424, 190.63281251840002, 372.0149536256, 297.1376953216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046393_crop.jpg", "text": "What information can you give me about the coordinates in image ? Include the coordinates for each mentioned object.", "boxes_value": [[20.36065674240001, 26.632812518400016, 101.01495362560001, 133.1376953216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046393.jpg", "text": "What information can you give me about the coordinates in image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a cabinet, two chairs, a binoculars, and a tripod.", "boxes_value": [[291.3606567424, 190.63281251840002, 372.0149536256, 297.1376953216], [350.233459456, 190.63281251840002, 372.0149536256, 234.19586181600002], [292.9791869952, 235.4404907456, 330.318908672, 285.2268066032], [330.318908672, 234.81817624159999, 365.1693115392, 279.0035400432], [300.2218017792, 217.23413087039998, 327.0181274624, 233.36663818719998], [291.3606567424, 234.68865967680003, 333.4486084096, 297.1376953216]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046393_crop.jpg", "text": "What information can you give me about the coordinates in image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a cabinet, two chairs, a binoculars, and a tripod.", "boxes_value": [[20.36065674240001, 26.632812518400016, 101.01495362560001, 133.1376953216], [79.23345945599999, 26.632812518400016, 101.01495362560001, 70.19586181600002], [21.979186995199996, 71.44049074559999, 59.31890867200002, 121.22680660319998], [59.31890867200002, 70.81817624159999, 94.16931153920001, 115.00354004320002], [29.221801779200007, 53.23413087039998, 56.0181274624, 69.36663818719998], [20.36065674240001, 70.68865967680003, 62.44860840960001, 133.1376953216]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046394.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Include the coordinates for each object you identify.", "boxes_value": [[359.3652343848, 116.0632324096, 537.3823242508, 259.5051269632]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046394_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Include the coordinates for each object you identify.", "boxes_value": [[45.365234384799976, 36.063232409600005, 223.38232425080002, 179.50512696319998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046394.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Include the coordinates for each object you identify. For your reference, objects involved in this region include three umbrellas, two trash bin cans, and a desk.", "boxes_value": [[359.3652343848, 116.0632324096, 537.3823242508, 259.5051269632], [232.338562043, 54.8289184768, 511.2604980154, 229.7304687616], [322.84680176, 108.9619750912, 473.3175048938, 196.5442504704], [392.50683591980004, 116.0632324096, 513.9046630634, 176.9312133632], [514.0321045076, 167.6036987392, 533.1365967116, 196.7263794176], [408.580200217, 167.7531738112, 431.7382812386, 193.3029174784], [359.3652343848, 227.02026368, 537.3823242508, 259.5051269632]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046394_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Include the coordinates for each object you identify. For your reference, objects involved in this region include three umbrellas, two trash bin cans, and a desk.", "boxes_value": [[45.365234384799976, 36.063232409600005, 223.38232425080002, 179.50512696319998], [0, 0, 197.2604980154, 149.7304687616], [8.846801760000005, 28.961975091200003, 159.3175048938, 116.54425047039999], [78.50683591980004, 36.063232409600005, 199.9046630634, 96.93121336319999], [200.03210450760002, 87.60369873920001, 219.13659671159996, 116.72637941759999], [94.58020021700003, 87.75317381120001, 117.7382812386, 113.30291747839999], [45.365234384799976, 147.02026368, 223.38232425080002, 179.50512696319998]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046397.jpg", "text": "Tell me about the region of the image . Please point out the objects and their coordinates.", "boxes_value": [[331.4119724032, 464.356821148, 407.0809413632, 497.93340546]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046397_crop.jpg", "text": "Tell me about the region of the image . Please point out the objects and their coordinates.", "boxes_value": [[19.411972403200025, 9.356821147999995, 95.0809413632, 42.93340546000002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046397.jpg", "text": "Tell me about the region of the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, two sneakers, and a hurdle.", "boxes_value": [[331.4119724032, 464.356821148, 407.0809413632, 497.93340546], [35.9362182656, 49.55603028, 463.2591552512, 564.003051736], [323.0308837888, 207.20910643599998, 396.049133312, 487.6657714879999], [331.4119724032, 464.356821148, 353.7963619328, 486.254593532], [386.6430204928, 476.765558888, 407.0809413632, 497.93340546], [104.5574951424, 355.220092764, 496.3798828032, 637.611206032]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046397_crop.jpg", "text": "Tell me about the region of the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, two sneakers, and a hurdle.", "boxes_value": [[19.411972403200025, 9.356821147999995, 95.0809413632, 42.93340546000002], [0, 0, 113, 51], [11.030883788799997, 0, 84.04913331199998, 32.665771487999905], [19.411972403200025, 9.356821147999995, 41.79636193279998, 31.254593532], [74.64302049280002, 21.765558887999987, 95.0809413632, 42.93340546000002], [0, 0, 113, 51]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046399.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[110.3522338816, 301.5662841503, 303.9613037056, 706.8887939645999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046399_crop.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[49.3522338816, 101.56628415030002, 242.96130370560002, 506.8887939645999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046399.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three stools, and two couches.", "boxes_value": [[110.3522338816, 301.5662841503, 303.9613037056, 706.8887939645999], [166.5578613248, 462.78613278690005, 303.9613037056, 706.8887939645999], [136.9851074048, 434.26953122279997, 249.9953613312, 635.0711669796], [110.3522338816, 407.2366943034, 202.309997568, 577.6170653916], [241.6378173952, 311.9416503853, 355.339599616, 344.614624029], [172.3831787008, 301.5662841503, 289.0452880896, 342.7412109589]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046399_crop.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three stools, and two couches.", "boxes_value": [[49.3522338816, 101.56628415030002, 242.96130370560002, 506.8887939645999], [105.5578613248, 262.78613278690005, 242.96130370560002, 506.8887939645999], [75.9851074048, 234.26953122279997, 188.9953613312, 435.0711669796], [49.3522338816, 207.2366943034, 141.309997568, 377.6170653916], [180.6378173952, 111.94165038530002, 291, 144.61462402900003], [111.38317870079999, 101.56628415030002, 228.04528808959998, 142.7412109589]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046400.jpg", "text": "Within the input image , what can be found in the region defined by ? Provide the coordinates for all objects that you mention.", "boxes_value": [[48.3151855104, 222.6087035904, 622.394409216, 465.9359130624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046400_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Provide the coordinates for all objects that you mention.", "boxes_value": [[48.3151855104, 61.60870359040001, 622.394409216, 304.9359130624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046400.jpg", "text": "Within the input image , what can be found in the region defined by ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two mirrors, a cabinet, two desks, two pictures, and a chair.", "boxes_value": [[48.3151855104, 222.6087035904, 622.394409216, 465.9359130624], [438.07055662080006, 222.6087035904, 498.305908224, 321.2440185344], [257.9766845952, 242.8268432384, 352.4174804736, 333.3189697024], [599.564819328, 338.0781250048, 622.394409216, 406.010192896], [184.0691528448, 399.201232896, 631.846435584, 512.232421888], [175.3090209792, 270.2308959744, 193.3479003648, 289.9534301696], [48.3151855104, 284.635314944, 74.7722168064, 307.7250976768], [166.8671874816, 357.8116454912, 237.9683227392, 465.9359130624], [0.26721192959999995, 393.393249536, 149.466979968, 511.93554688]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 8], [5, 6], [7]]}, {"image_path": "objects365_v1_00046400_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two mirrors, a cabinet, two desks, two pictures, and a chair.", "boxes_value": [[48.3151855104, 61.60870359040001, 622.394409216, 304.9359130624], [438.07055662080006, 61.60870359040001, 498.305908224, 160.24401853440003], [257.9766845952, 81.82684323839999, 352.4174804736, 172.3189697024], [599.564819328, 177.07812500479997, 622.394409216, 245.01019289599998], [184.0691528448, 238.20123289600002, 631.846435584, 351], [175.3090209792, 109.23089597440003, 193.3479003648, 128.95343016959998], [48.3151855104, 123.63531494400002, 74.7722168064, 146.72509767679998], [166.8671874816, 196.8116454912, 237.9683227392, 304.9359130624], [0.26721192959999995, 232.39324953599998, 149.466979968, 350.93554688]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 8], [5, 6], [7]]}, {"image_path": "objects365_v1_00046401.jpg", "text": "Please tell me about the area in the image . What does it contain? Please point out the objects and their coordinates.", "boxes_value": [[431.12670901340005, 209.7791748096, 681.9204101762, 281.3368530432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046401_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Please point out the objects and their coordinates.", "boxes_value": [[63.126709013400045, 18.779174809599994, 313.9204101762, 90.3368530432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046401.jpg", "text": "Please tell me about the area in the image . What does it contain? Please point out the objects and their coordinates. For your reference, objects involved in this region include two desks, a person, a handbag, a pickup truck, and a stroller.", "boxes_value": [[431.12670901340005, 209.7791748096, 681.9204101762, 281.3368530432], [557.3090820611, 255.5009765376, 681.9204101762, 281.3368530432], [437.70397947960004, 198.6743163904, 466.9996338171, 278.226013184], [431.12670901340005, 209.7791748096, 455.6079101475, 241.2075805696], [487.7310790981, 175.7397460992, 681.6002197079, 251.7435913216], [455.7458495788, 234.066650368, 520.3830566324, 279.109069824], [557.3090820611, 255.5009765376, 681.9204101762, 281.3368530432]], "boxes_seq": [[0], [0], [1, 6], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046401_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Please point out the objects and their coordinates. For your reference, objects involved in this region include two desks, a person, a handbag, a pickup truck, and a stroller.", "boxes_value": [[63.126709013400045, 18.779174809599994, 313.9204101762, 90.3368530432], [189.30908206109996, 64.5009765376, 313.9204101762, 90.3368530432], [69.70397947960004, 7.6743163904000085, 98.9996338171, 87.22601318400001], [63.126709013400045, 18.779174809599994, 87.60791014749998, 50.2075805696], [119.7310790981, 0, 313.6002197079, 60.74359132160001], [87.7458495788, 43.06665036800001, 152.38305663239998, 88.10906982400002], [189.30908206109996, 64.5009765376, 313.9204101762, 90.3368530432]], "boxes_seq": [[0], [0], [1, 6], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046403.jpg", "text": "I am interested in the region of the image ; please describe it. Specify the location of each mentioned object.", "boxes_value": [[27.656494144699998, 256.6348266496, 240.5559082121, 312.2524413952]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046403_crop.jpg", "text": "I am interested in the region of the image ; please describe it. Specify the location of each mentioned object.", "boxes_value": [[27.656494144699998, 14.634826649599972, 240.5559082121, 70.25244139519998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046403.jpg", "text": "I am interested in the region of the image ; please describe it. Specify the location of each mentioned object. For your reference, objects involved in this region include a desk, two people, two bicycles, and a car.", "boxes_value": [[27.656494144699998, 256.6348266496, 240.5559082121, 312.2524413952], [27.656494144699998, 272.9009399296, 82.0296020821, 311.6845092864], [84.032165502, 274.3993530368, 100.97320558970002, 312.2524413952], [46.4647827446, 246.2459106304, 72.2894286918, 306.2629394432], [142.2792968697, 279.843261696, 168.3295898163, 309.3341675008], [161.2026367007, 287.2159424, 181.3547363537, 307.1223144448], [169.178649891, 256.6348266496, 240.5559082121, 304.9871215616]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046403_crop.jpg", "text": "I am interested in the region of the image ; please describe it. Specify the location of each mentioned object. For your reference, objects involved in this region include a desk, two people, two bicycles, and a car.", "boxes_value": [[27.656494144699998, 14.634826649599972, 240.5559082121, 70.25244139519998], [27.656494144699998, 30.900939929599986, 82.0296020821, 69.68450928639999], [84.032165502, 32.399353036799994, 100.97320558970002, 70.25244139519998], [46.4647827446, 4.2459106304000045, 72.2894286918, 64.26293944320003], [142.2792968697, 37.84326169600001, 168.3295898163, 67.33416750079999], [161.2026367007, 45.21594240000002, 181.3547363537, 65.1223144448], [169.178649891, 14.634826649599972, 240.5559082121, 62.98712156160002]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046406.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[0, 338.2061157376, 172.4305420032, 468.8736572416]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046406_crop.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[0, 33.20611573759999, 172.4305420032, 163.87365724159997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046406.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, two handbags, a backpack, and a luggage.", "boxes_value": [[0, 338.2061157376, 172.4305420032, 468.8736572416], [23.5471802112, 365.496643072, 68.7003783936, 468.8736572416], [91.4080200192, 413.9114379776, 133.0897216512, 511.6867065344], [137.0493774336, 351.1806030336, 174.8412475392, 439.103515648], [157.3463134464, 366.11578368, 172.4305420032, 392.5504760832], [55.3051757568, 410.54302976, 80.4915161088, 447.2342529536], [0, 338.2061157376, 17.3314209024, 362.1005249024], [5.9464721663999995, 359.5142212096, 27.0297241344, 388.5248413184]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6], [7]]}, {"image_path": "objects365_v1_00046406_crop.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, two handbags, a backpack, and a luggage.", "boxes_value": [[0, 33.20611573759999, 172.4305420032, 163.87365724159997], [23.5471802112, 60.49664307199998, 68.7003783936, 163.87365724159997], [91.4080200192, 108.91143797759997, 133.0897216512, 196], [137.0493774336, 46.18060303359999, 174.8412475392, 134.10351564799998], [157.3463134464, 61.11578367999999, 172.4305420032, 87.55047608320001], [55.3051757568, 105.54302976000002, 80.4915161088, 142.2342529536], [0, 33.20611573759999, 17.3314209024, 57.100524902400025], [5.9464721663999995, 54.51422120960001, 27.0297241344, 83.52484131839998]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6], [7]]}, {"image_path": "objects365_v1_00046407.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Include the coordinates for each object you identify.", "boxes_value": [[60.9188232192, 257.5485839872, 527.9982910464, 429.135314944]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046407_crop.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Include the coordinates for each object you identify.", "boxes_value": [[60.9188232192, 43.548583987200004, 527.9982910464, 215.13531494400002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046407.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a couch, a pillow, a picture, a book, and a slippers.", "boxes_value": [[60.9188232192, 257.5485839872, 527.9982910464, 429.135314944], [0, 154.143615744, 420.88488767999996, 483.1170043904], [60.9188232192, 257.5485839872, 178.4104003584, 369.5111694336], [385.8936767232, 372.9674682368, 440.03112791039996, 410.7468261888], [296.40405273600004, 378.1137084928, 354.43627929599995, 429.135314944], [511.2307128576, 318.0137939456, 527.9982910464, 341.1035766784]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046407_crop.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a couch, a pillow, a picture, a book, and a slippers.", "boxes_value": [[60.9188232192, 43.548583987200004, 527.9982910464, 215.13531494400002], [0, 0, 420.88488767999996, 258], [60.9188232192, 43.548583987200004, 178.4104003584, 155.5111694336], [385.8936767232, 158.96746823680002, 440.03112791039996, 196.74682618880001], [296.40405273600004, 164.11370849280001, 354.43627929599995, 215.13531494400002], [511.2307128576, 104.01379394560001, 527.9982910464, 127.10357667839997]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046408.jpg", "text": "Can you provide a description of the area in the image ? Include the coordinates for each object you identify.", "boxes_value": [[240.0819702528, 259.0195922944, 575.9394531072, 511.825317376]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046408_crop.jpg", "text": "Can you provide a description of the area in the image ? Include the coordinates for each object you identify.", "boxes_value": [[84.0819702528, 64.01959229440001, 419.93945310720005, 316.825317376]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046408.jpg", "text": "Can you provide a description of the area in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, two people, two sandals, a sneakers, a bottle, and two canneds.", "boxes_value": [[240.0819702528, 259.0195922944, 575.9394531072, 511.825317376], [240.0819702528, 259.0195922944, 575.9394531072, 511.825317376], [334.3237304832, 160.2321777152, 466.55187985919997, 462.3367309824], [397.9483642368, 126.9350585856, 527.370361344, 512.024047872], [484.9420165632, 465.7187500032, 523.796875008, 510.9732055552], [405.8610839808, 437.8347167744, 476.25683596799996, 476.6895141376], [340.4935302912, 430.5208740352, 379.3483886592, 458.8619995136], [331.1566162176, 262.0275268608, 351.883911168, 322.6152343552], [370.2092285184, 279.29718016, 388.39550784, 312.9989623808], [292.17492672, 247.1190185472, 305.1807861504, 272.2553711104]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6], [7], [8, 9]]}, {"image_path": "objects365_v1_00046408_crop.jpg", "text": "Can you provide a description of the area in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, two people, two sandals, a sneakers, a bottle, and two canneds.", "boxes_value": [[84.0819702528, 64.01959229440001, 419.93945310720005, 316.825317376], [84.0819702528, 64.01959229440001, 419.93945310720005, 316.825317376], [178.3237304832, 0, 310.55187985919997, 267.3367309824], [241.9483642368, 0, 371.370361344, 317], [328.9420165632, 270.7187500032, 367.79687500800003, 315.9732055552], [249.8610839808, 242.83471677440002, 320.25683596799996, 281.6895141376], [184.4935302912, 235.5208740352, 223.3483886592, 263.8619995136], [175.1566162176, 67.02752686079998, 195.883911168, 127.61523435520002], [214.20922851839998, 84.29718015999998, 232.39550784, 117.99896238079998], [136.17492671999997, 52.1190185472, 149.18078615040002, 77.2553711104]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6], [7], [8, 9]]}, {"image_path": "objects365_v1_00046409.jpg", "text": "Please tell me about the area in the image . What does it contain? Provide the coordinates for all objects that you mention.", "boxes_value": [[327.654595971, 234.5693987328, 527.402156913, 348.94152832]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046409_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Provide the coordinates for all objects that you mention.", "boxes_value": [[50.65459597099999, 29.56939873280001, 250.402156913, 143.94152831999997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046409.jpg", "text": "Please tell me about the area in the image . What does it contain? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a carpet, a flower, a person, and two hats.", "boxes_value": [[327.654595971, 234.5693987328, 527.402156913, 348.94152832], [137.2467651165, 276.050292992, 753.4129638375, 510.6456909312], [384.539672835, 307.289733888, 446.39575198200004, 348.94152832], [426.85766598149996, 255.6767578112, 534.5651855295, 412.5037231616], [327.654595971, 234.5693987328, 352.614869703, 286.231803392], [485.7088999455, 256.5256974336, 527.402156913, 299.7490289152]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046409_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a carpet, a flower, a person, and two hats.", "boxes_value": [[50.65459597099999, 29.56939873280001, 250.402156913, 143.94152831999997], [0, 71.05029299199998, 300, 172], [107.53967283499998, 102.289733888, 169.39575198200004, 143.94152831999997], [149.85766598149996, 50.67675781119999, 257.5651855295, 172], [50.65459597099999, 29.56939873280001, 75.61486970300001, 81.23180339200002], [208.70889994549998, 51.52569743359999, 250.402156913, 94.74902891519997]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046410.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for each element you describe.", "boxes_value": [[29.6519775744, 307.158203136, 335.1981037056, 401.9706420736]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046410_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for each element you describe.", "boxes_value": [[29.6519775744, 24.158203135999997, 335.1981037056, 118.9706420736]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046410.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two cabinets, an umbrella, two people, and a backpack.", "boxes_value": [[29.6519775744, 307.158203136, 335.1981037056, 401.9706420736], [29.6519775744, 317.9346924032, 113.6879272704, 401.9706420736], [111.4500122112, 321.4859619328, 166.00646976000002, 367.2108764672], [194.5261230336, 307.158203136, 247.83612057599998, 354.317016576], [159.9369507072, 337.1561279488, 182.2819213824, 398.6676635648], [60.012084940799994, 330.8794555904, 95.4125976576, 424.0254516736], [321.35530306559997, 331.4876791808, 335.1981037056, 362.613159168]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046410_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two cabinets, an umbrella, two people, and a backpack.", "boxes_value": [[29.6519775744, 24.158203135999997, 335.1981037056, 118.9706420736], [29.6519775744, 34.93469240320002, 113.6879272704, 118.9706420736], [111.4500122112, 38.48596193280002, 166.00646976000002, 84.21087646720002], [194.5261230336, 24.158203135999997, 247.83612057599998, 71.31701657600001], [159.9369507072, 54.15612794880002, 182.2819213824, 115.66766356480002], [60.012084940799994, 47.87945559040003, 95.4125976576, 141.0254516736], [321.35530306559997, 48.4876791808, 335.1981037056, 79.61315916799998]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046412.jpg", "text": "Please describe the area in the image for me. Please mention the objects and their locations.", "boxes_value": [[370.034668006, 158.9296875008, 761.0063476887, 271.6055908352]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046412_crop.jpg", "text": "Please describe the area in the image for me. Please mention the objects and their locations.", "boxes_value": [[98.034668006, 28.929687500799986, 489.0063476887, 141.60559083520002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046412.jpg", "text": "Please describe the area in the image for me. Please mention the objects and their locations. For your reference, objects involved in this region include four people, two hats, and a boat.", "boxes_value": [[370.034668006, 158.9296875008, 761.0063476887, 271.6055908352], [350.9919433594, 194.431762688, 433.6236572254, 278.6473998848], [369.7359619469, 190.9997558784, 409.8637695683, 255.9434814464], [514.1459960503, 191.888732928, 594.4447021397, 271.6055908352], [648.1579589636001, 158.9296875008, 761.0063476887, 258.1809081856], [370.034668006, 190.9984741376, 393.9088134653, 208.904052736], [718.2589111021, 160.1495361536, 741.6198730435, 175.455078144], [245.4930420327, 97.4400024576, 898.7913818108, 320.3406982656]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6], [7]]}, {"image_path": "objects365_v1_00046412_crop.jpg", "text": "Please describe the area in the image for me. Please mention the objects and their locations. For your reference, objects involved in this region include four people, two hats, and a boat.", "boxes_value": [[98.034668006, 28.929687500799986, 489.0063476887, 141.60559083520002], [78.99194335940001, 64.43176268799999, 161.6236572254, 148.6473998848], [97.73596194689998, 60.99975587840001, 137.86376956829997, 125.9434814464], [242.14599605030003, 61.888732927999996, 322.4447021397, 141.60559083520002], [376.1579589636001, 28.929687500799986, 489.0063476887, 128.1809081856], [98.034668006, 60.9984741376, 121.9088134653, 78.90405273600001], [446.2589111021, 30.149536153599996, 469.6198730435, 45.455078144], [0, 0, 586, 169]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6], [7]]}, {"image_path": "objects365_v1_00046413.jpg", "text": "Could you describe the content of the bbox in the image ? Please point out the objects and their coordinates.", "boxes_value": [[0, 211.8140258816, 307.9096069661, 395.429138176]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046413_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Please point out the objects and their coordinates.", "boxes_value": [[0, 46.8140258816, 307.9096069661, 230.42913817599998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046413.jpg", "text": "Could you describe the content of the bbox in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two desks, a book, a cup, and a moniter.", "boxes_value": [[0, 211.8140258816, 307.9096069661, 395.429138176], [0, 274.519531264, 133.2164306355, 395.429138176], [171.82849121829997, 256.3308105216, 332.5712890744, 398.3404540928], [245.35662843779997, 274.2324829184, 307.9096069661, 289.3494262784], [78.16815188140001, 219.857543936, 96.1799926562, 240.0466308608], [0.1066894493, 211.8140258816, 101.22692867800001, 304.5322875904]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046413_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two desks, a book, a cup, and a moniter.", "boxes_value": [[0, 46.8140258816, 307.9096069661, 230.42913817599998], [0, 109.51953126400002, 133.2164306355, 230.42913817599998], [171.82849121829997, 91.33081052159997, 332.5712890744, 233.34045409279997], [245.35662843779997, 109.23248291840002, 307.9096069661, 124.34942627840002], [78.16815188140001, 54.85754393600001, 96.1799926562, 75.04663086080001], [0.1066894493, 46.8140258816, 101.22692867800001, 139.53228759040002]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046414.jpg", "text": "Detail the chosen region in the depicted scene . Provide the coordinates for each element you describe.", "boxes_value": [[27.4332886038, 309.7734374912, 588.2756347818, 469.9124755968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046414_crop.jpg", "text": "Detail the chosen region in the depicted scene . Provide the coordinates for each element you describe.", "boxes_value": [[27.4332886038, 40.77343749120001, 588.2756347818, 200.9124755968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046414.jpg", "text": "Detail the chosen region in the depicted scene . Provide the coordinates for each element you describe. For your reference, objects involved in this region include four cabinets, and a handbag.", "boxes_value": [[27.4332886038, 309.7734374912, 588.2756347818, 469.9124755968], [27.4332886038, 333.097106944, 152.4060669006, 469.9124755968], [153.5015868975, 331.4528198144, 310.27886960669997, 468.8349609472], [308.6625976641, 329.6608886784, 468.6723633006, 464.5847778304], [467.85302734379997, 325.4666747904, 588.2756347818, 461.7713012736], [171.7847900484, 309.7734374912, 235.5098266338, 326.543151872]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046414_crop.jpg", "text": "Detail the chosen region in the depicted scene . Provide the coordinates for each element you describe. For your reference, objects involved in this region include four cabinets, and a handbag.", "boxes_value": [[27.4332886038, 40.77343749120001, 588.2756347818, 200.9124755968], [27.4332886038, 64.09710694400002, 152.4060669006, 200.9124755968], [153.5015868975, 62.452819814400016, 310.27886960669997, 199.8349609472], [308.6625976641, 60.6608886784, 468.6723633006, 195.58477783040001], [467.85302734379997, 56.46667479040002, 588.2756347818, 192.7713012736], [171.7847900484, 40.77343749120001, 235.5098266338, 57.54315187200001]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046416.jpg", "text": "Share some details about the objects or environment within the bounding box in . Remember to mention the objects and their corresponding locations.", "boxes_value": [[458.47741702, 267.5567016448, 940.0330810439999, 505.7749023232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046416_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Remember to mention the objects and their corresponding locations.", "boxes_value": [[120.47741702000002, 59.5567016448, 602, 297.7749023232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046416.jpg", "text": "Share some details about the objects or environment within the bounding box in . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two pictures, and four people.", "boxes_value": [[458.47741702, 267.5567016448, 940.0330810439999, 505.7749023232], [447.860961892, 226.1979369984, 504.38281249399995, 358.4748535296], [809.421630822, 309.2947387904, 940.0330810439999, 505.7749023232], [759.581176798, 391.3555297792, 781.786132782, 425.6724242944], [783.228027358, 377.8018188288, 808.316894506, 448.1658324992], [688.687011674, 330.4204711936, 723.609497062, 400.4998169088], [458.47741702, 267.5567016448, 494.00524906000004, 314.3038940672]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046416_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two pictures, and four people.", "boxes_value": [[120.47741702000002, 59.5567016448, 602, 297.7749023232], [109.86096189199998, 18.197936998399996, 166.38281249399995, 150.4748535296], [471.421630822, 101.2947387904, 602, 297.7749023232], [421.581176798, 183.35552977920003, 443.786132782, 217.6724242944], [445.22802735799996, 169.80181882879998, 470.31689450600004, 240.1658324992], [350.687011674, 122.42047119360001, 385.60949706199995, 192.49981690880003], [120.47741702000002, 59.5567016448, 156.00524906000004, 106.30389406720002]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046418.jpg", "text": "Please tell me about the area in the image . What does it contain? Provide the coordinates for all objects that you mention.", "boxes_value": [[290.4320068272, 340.5656128, 402.5617675488, 401.5857544192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046418_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Provide the coordinates for all objects that you mention.", "boxes_value": [[28.432006827199984, 15.565612799999997, 140.5617675488, 76.58575441919999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046418.jpg", "text": "Please tell me about the area in the image . What does it contain? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two people, and three chairs.", "boxes_value": [[290.4320068272, 340.5656128, 402.5617675488, 401.5857544192], [321.9317016768, 340.5656128, 340.2391968072, 395.2188110336], [290.4320068272, 342.7194213888, 311.4318237624, 400.603393536], [388.1967773184, 364.015747072, 412.23046875840004, 402.690734848], [382.67175293279996, 356.8332519424, 402.5617675488, 393.0219726336], [340.4055785832, 362.910766592, 361.9530639936, 401.5857544192]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046418_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two people, and three chairs.", "boxes_value": [[28.432006827199984, 15.565612799999997, 140.5617675488, 76.58575441919999], [59.9317016768, 15.565612799999997, 78.23919680720002, 70.21881103359999], [28.432006827199984, 17.719421388800015, 49.43182376239997, 75.603393536], [126.1967773184, 39.01574707200001, 150.23046875840004, 77.69073484799998], [120.67175293279996, 31.83325194240001, 140.5617675488, 68.0219726336], [78.4055785832, 37.910766592000016, 99.95306399359998, 76.58575441919999]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046422.jpg", "text": "What insights can you provide about the area in the selected picture ? Include the coordinates for each object you identify.", "boxes_value": [[197.42498777699998, 81.4566650368, 611.6229248364999, 129.8546753024]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046422_crop.jpg", "text": "What insights can you provide about the area in the selected picture ? Include the coordinates for each object you identify.", "boxes_value": [[104.42498777699998, 12.456665036800004, 518.6229248364999, 60.85467530240001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046422.jpg", "text": "What insights can you provide about the area in the selected picture ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a cabinet, a person, a glasses, and two routers.", "boxes_value": [[197.42498777699998, 81.4566650368, 611.6229248364999, 129.8546753024], [341.3253173543, 82.8770141696, 681.745971703, 510.912658688], [89.98999022489998, 0.6528320512, 406.34228518000003, 512.2739257856], [197.42498777699998, 81.4566650368, 301.1512451286, 123.1849975808], [468.80871579240005, 88.4306640384, 611.6229248364999, 129.8546753024], [379.2537842052, 84.0910034432, 488.13989257289995, 117.6247558656]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046422_crop.jpg", "text": "What insights can you provide about the area in the selected picture ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a cabinet, a person, a glasses, and two routers.", "boxes_value": [[104.42498777699998, 12.456665036800004, 518.6229248364999, 60.85467530240001], [248.3253173543, 13.877014169600002, 588.745971703, 72], [0, 0, 313.34228518000003, 72], [104.42498777699998, 12.456665036800004, 208.1512451286, 54.1849975808], [375.80871579240005, 19.430664038399996, 518.6229248364999, 60.85467530240001], [286.2537842052, 15.091003443199995, 395.13989257289995, 48.624755865599994]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046423.jpg", "text": "In the provided image , would you mind describing the selected area ? Specify the location of each mentioned object.", "boxes_value": [[247.98944094719997, 220.432189952, 475.84069824, 487.568237312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046423_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Specify the location of each mentioned object.", "boxes_value": [[56.98944094719997, 67.43218995199999, 284.84069824, 334.568237312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046423.jpg", "text": "In the provided image , would you mind describing the selected area ? Specify the location of each mentioned object. For your reference, objects involved in this region include four people, a gloves, and two hockey sticks.", "boxes_value": [[247.98944094719997, 220.432189952, 475.84069824, 487.568237312], [322.0574951424, 0, 767.7752685312, 512.6123046912], [417.1591796736, 229.5986328064, 526.0106201088, 417.510498048], [247.98944094719997, 257.752685568, 475.84069824, 487.568237312], [276.47082516480003, 220.432189952, 437.5382079744, 397.2133789184], [314.90722659840003, 433.8071899648, 515.2167969024, 511.1044311552], [274.801269504, 138.2872314368, 404.05895992319995, 512.1285400576], [270.7508544768, 143.0974731264, 334.3990478592, 511.15521239040004]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6, 7]]}, {"image_path": "objects365_v1_00046423_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Specify the location of each mentioned object. For your reference, objects involved in this region include four people, a gloves, and two hockey sticks.", "boxes_value": [[56.98944094719997, 67.43218995199999, 284.84069824, 334.568237312], [131.0574951424, 0, 341, 359], [226.15917967360002, 76.59863280639999, 335.0106201088, 264.510498048], [56.98944094719997, 104.752685568, 284.84069824, 334.568237312], [85.47082516480003, 67.43218995199999, 246.5382079744, 244.2133789184], [123.90722659840003, 280.8071899648, 324.2167969024, 358.1044311552], [83.801269504, 0, 213.05895992319995, 359], [79.75085447679999, 0, 143.39904785919998, 358.15521239040004]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6, 7]]}, {"image_path": "objects365_v1_00046425.jpg", "text": "I'd like a thorough description of the area in the image . Include the coordinates for each mentioned object.", "boxes_value": [[120.3100934144, 268.998092478, 272.1311035392, 398.66467282289995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046425_crop.jpg", "text": "I'd like a thorough description of the area in the image . Include the coordinates for each mentioned object.", "boxes_value": [[38.3100934144, 32.99809247799999, 190.1311035392, 162.66467282289995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046425.jpg", "text": "I'd like a thorough description of the area in the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, a helmet, and two gloves.", "boxes_value": [[120.3100934144, 268.998092478, 272.1311035392, 398.66467282289995], [121.1182251008, 269.8774413863, 249.8825073152, 510.9362792788], [251.13720704, 368.8914795016, 272.1311035392, 398.66467282289995], [164.9133609472, 268.998092478, 198.7255153152, 290.5803186214], [120.3100934144, 373.3121858004, 151.9640251904, 394.8944119438], [187.5746984448, 364.6792952884, 214.5524811776, 393.0958931099]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046425_crop.jpg", "text": "I'd like a thorough description of the area in the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, a helmet, and two gloves.", "boxes_value": [[38.3100934144, 32.99809247799999, 190.1311035392, 162.66467282289995], [39.118225100800004, 33.877441386300006, 167.8825073152, 195], [169.13720704, 132.89147950159997, 190.1311035392, 162.66467282289995], [82.9133609472, 32.99809247799999, 116.7255153152, 54.58031862140001], [38.3100934144, 137.31218580040002, 69.96402519040001, 158.89441194379998], [105.57469844479999, 128.6792952884, 132.5524811776, 157.09589310989998]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046426.jpg", "text": "Can you generate a description of the contents within the selected region in ? Please point out the objects and their coordinates.", "boxes_value": [[145.5103759955, 111.40930176, 228.4119262602, 277.8756713984]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046426_crop.jpg", "text": "Can you generate a description of the contents within the selected region in ? Please point out the objects and their coordinates.", "boxes_value": [[21.5103759955, 42.409301760000005, 104.4119262602, 208.87567139840002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046426.jpg", "text": "Can you generate a description of the contents within the selected region in ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, a hat, a glasses, a mask, and a gloves.", "boxes_value": [[145.5103759955, 111.40930176, 228.4119262602, 277.8756713984], [48.018127448200005, 87.533691392, 230.40155026989999, 497.3989868032], [115.6658325328, 88.1968994304, 179.9974364955, 212.880859392], [163.4171142715, 111.40930176, 186.62951661889997, 136.611389184], [145.5103759955, 133.9585571328, 181.9870605735, 160.4870605312], [195.9144897699, 246.0414428672, 228.4119262602, 277.8756713984]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046426_crop.jpg", "text": "Can you generate a description of the contents within the selected region in ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, a hat, a glasses, a mask, and a gloves.", "boxes_value": [[21.5103759955, 42.409301760000005, 104.4119262602, 208.87567139840002], [0, 18.533691391999994, 106.40155026989999, 250], [0, 19.196899430399995, 55.99743649550001, 143.880859392], [39.4171142715, 42.409301760000005, 62.62951661889997, 67.61138918399999], [21.5103759955, 64.9585571328, 57.9870605735, 91.4870605312], [71.9144897699, 177.0414428672, 104.4119262602, 208.87567139840002]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046427.jpg", "text": "Tell me what you see within the designated area in the picture . Give coordinates for the items you reference.", "boxes_value": [[0.5884399104, 204.1204833792, 224.1728515584, 430.4829101568]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046427_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Give coordinates for the items you reference.", "boxes_value": [[0.5884399104, 57.12048337920001, 224.1728515584, 283.4829101568]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046427.jpg", "text": "Tell me what you see within the designated area in the picture . Give coordinates for the items you reference. For your reference, objects involved in this region include two breads, and three chairs.", "boxes_value": [[0.5884399104, 204.1204833792, 224.1728515584, 430.4829101568], [163.7824707072, 215.004943872, 189.0958252032, 234.351562496], [200.66766359039997, 223.6837768704, 224.1728515584, 238.8717651456], [76.6610717952, 229.1688232448, 237.1557616896, 452.7481079296], [0.5884399104, 204.1204833792, 85.0104980736, 299.675109888], [2.4439087103999997, 231.9519653376, 97.99853514239999, 430.4829101568]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046427_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Give coordinates for the items you reference. For your reference, objects involved in this region include two breads, and three chairs.", "boxes_value": [[0.5884399104, 57.12048337920001, 224.1728515584, 283.4829101568], [163.7824707072, 68.00494387200001, 189.0958252032, 87.35156249600001], [200.66766359039997, 76.68377687040001, 224.1728515584, 91.8717651456], [76.6610717952, 82.1688232448, 237.1557616896, 305.7481079296], [0.5884399104, 57.12048337920001, 85.0104980736, 152.675109888], [2.4439087103999997, 84.9519653376, 97.99853514239999, 283.4829101568]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046428.jpg", "text": "Kindly describe what I should be seeing in the area of image . Include the coordinates for each mentioned object.", "boxes_value": [[0.0289306259, 216.068969728, 615.7757568365, 509.40216064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046428_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Include the coordinates for each mentioned object.", "boxes_value": [[0.0289306259, 74.06896972800001, 615.7757568365, 367.40216064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046428.jpg", "text": "Kindly describe what I should be seeing in the area of image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a bed, four pillows, two chairs, three lamps, two desks, a bench, and a stool.", "boxes_value": [[0.0289306259, 216.068969728, 615.7757568365, 509.40216064], [0.0289306259, 216.068969728, 615.7757568365, 509.40216064], [24.322875947500002, 219.3201904128, 178.2144164771, 338.5319824384], [271.1619872996, 244.2463378944, 298.802124001, 278.0287475712], [24.447631817199998, 188.9659423744, 79.7280273372, 241.1751709184], [182.0991211006, 250.3885497856, 202.57336429210002, 287.24218752], [217.9290161372, 263.6968384, 259.9011230342, 285.1947631616], [375.8343505519, 264.2770996224, 589.3887939163, 311.9259033088], [420.5928954974, 257.646240256, 479.7182617185, 283.0646362112], [473.28466799119997, 200.8100585984, 488.91467281340005, 229.7017822208], [462.86474605970005, 202.230957056, 473.28466799119997, 235.859008768], [444.39294434010003, 234.4381103616, 518.7535400053999, 283.6961059328], [71.2065429343, 227.1980590592, 170.9993896347, 302.042724608], [500.5238037213, 278.2539673088, 549.0187988568999, 295.7559814656], [393.68884275069996, 267.6798706176, 439.63146974959994, 282.2648315392]], "boxes_seq": [[0], [0], [1], [2, 12, 13, 14], [3, 5], [4, 9, 10], [6, 11], [7], [8]]}, {"image_path": "objects365_v1_00046428_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a bed, four pillows, two chairs, three lamps, two desks, a bench, and a stool.", "boxes_value": [[0.0289306259, 74.06896972800001, 615.7757568365, 367.40216064], [0.0289306259, 74.06896972800001, 615.7757568365, 367.40216064], [24.322875947500002, 77.3201904128, 178.2144164771, 196.5319824384], [271.1619872996, 102.2463378944, 298.802124001, 136.02874757119997], [24.447631817199998, 46.96594237439999, 79.7280273372, 99.1751709184], [182.0991211006, 108.38854978559999, 202.57336429210002, 145.24218752000002], [217.9290161372, 121.69683839999999, 259.9011230342, 143.1947631616], [375.8343505519, 122.27709962239999, 589.3887939163, 169.9259033088], [420.5928954974, 115.646240256, 479.7182617185, 141.06463621120002], [473.28466799119997, 58.81005859839999, 488.91467281340005, 87.7017822208], [462.86474605970005, 60.230957055999994, 473.28466799119997, 93.859008768], [444.39294434010003, 92.4381103616, 518.7535400053999, 141.69610593279998], [71.2065429343, 85.1980590592, 170.9993896347, 160.04272460800001], [500.5238037213, 136.2539673088, 549.0187988568999, 153.75598146559997], [393.68884275069996, 125.67987061759999, 439.63146974959994, 140.26483153919997]], "boxes_seq": [[0], [0], [1], [2, 12, 13, 14], [3, 5], [4, 9, 10], [6, 11], [7], [8]]}, {"image_path": "objects365_v1_00046429.jpg", "text": "I need details about the area located within image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[1.1015625042000001, 0, 254.9206542969, 82.1683349504]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046429_crop.jpg", "text": "I need details about the area located within image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[1.1015625042000001, 0, 254.9206542969, 82.1683349504]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046429.jpg", "text": "I need details about the area located within image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four flags, and a street lights.", "boxes_value": [[1.1015625042000001, 0, 254.9206542969, 82.1683349504], [195.6798095859, 0, 254.9206542969, 71.4855346688], [129.64080808440002, 0, 184.02581784059998, 76.3413696512], [54.861328106100004, 0, 103.41943360920001, 77.3125610496], [2.4186401109, 50.119995136, 35.4381103467, 82.1683349504], [1.1015625042000001, 0, 67.1442871284, 65.5454711808]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046429_crop.jpg", "text": "I need details about the area located within image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four flags, and a street lights.", "boxes_value": [[1.1015625042000001, 0, 254.9206542969, 82.1683349504], [195.6798095859, 0, 254.9206542969, 71.4855346688], [129.64080808440002, 0, 184.02581784059998, 76.3413696512], [54.861328106100004, 0, 103.41943360920001, 77.3125610496], [2.4186401109, 50.119995136, 35.4381103467, 82.1683349504], [1.1015625042000001, 0, 67.1442871284, 65.5454711808]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046431.jpg", "text": "I request a description of the area in the picture . Give coordinates for the items you reference.", "boxes_value": [[109.1995849728, 72.8497924608, 390.7414550784, 374.9645385728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046431_crop.jpg", "text": "I request a description of the area in the picture . Give coordinates for the items you reference.", "boxes_value": [[71.1995849728, 72.8497924608, 352.7414550784, 374.9645385728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046431.jpg", "text": "I request a description of the area in the picture . Give coordinates for the items you reference. For your reference, objects involved in this region include two people, a tent, a hat, a glasses, and a sneakers.", "boxes_value": [[109.1995849728, 72.8497924608, 390.7414550784, 374.9645385728], [109.1995849728, 72.8497924608, 209.5211181312, 326.4544067584], [242.949462912, 211.0010376192, 406.19934082559996, 368.5763549696], [192.73199462399998, 44.4514160128, 766.9874267903999, 512.9473876992], [126.75744629760001, 73.2380370944, 166.3530883584, 113.1555175936], [369.5969238528, 238.7109374976, 390.7414550784, 261.2011108352], [269.2269287424, 339.4604492288, 305.39257812479997, 374.9645385728]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046431_crop.jpg", "text": "I request a description of the area in the picture . Give coordinates for the items you reference. For your reference, objects involved in this region include two people, a tent, a hat, a glasses, and a sneakers.", "boxes_value": [[71.1995849728, 72.8497924608, 352.7414550784, 374.9645385728], [71.1995849728, 72.8497924608, 171.5211181312, 326.4544067584], [204.949462912, 211.0010376192, 368.19934082559996, 368.5763549696], [154.73199462399998, 44.4514160128, 423, 450], [88.75744629760001, 73.2380370944, 128.3530883584, 113.1555175936], [331.5969238528, 238.7109374976, 352.7414550784, 261.2011108352], [231.2269287424, 339.4604492288, 267.39257812479997, 374.9645385728]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046432.jpg", "text": "Can you break down the region in the image for me? Remember to mention the objects and their corresponding locations.", "boxes_value": [[257.5727539389, 1.7128295936, 464.25122071239997, 407.202087424]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046432_crop.jpg", "text": "Can you break down the region in the image for me? Remember to mention the objects and their corresponding locations.", "boxes_value": [[52.57275393890001, 1.7128295936, 259.25122071239997, 407.202087424]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046432.jpg", "text": "Can you break down the region in the image for me? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a pillow, a cabinet, a bed, two handbags, and a remote.", "boxes_value": [[257.5727539389, 1.7128295936, 464.25122071239997, 407.202087424], [187.02624509170002, 253.5668335104, 351.6285400308, 313.3435058688], [284.92132567299996, 1.7128295936, 464.25122071239997, 338.0957031424], [46.5573120222, 243.4183960064, 595.6856689685, 510.7425537024001], [278.1528320086, 364.7564697088, 363.70727536920003, 417.8134765568], [374.981811538, 350.8290405376, 436.6606445252, 407.202087424], [257.5727539389, 326.0328979456, 301.62957762179997, 334.728332544]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046432_crop.jpg", "text": "Can you break down the region in the image for me? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a pillow, a cabinet, a bed, two handbags, and a remote.", "boxes_value": [[52.57275393890001, 1.7128295936, 259.25122071239997, 407.202087424], [0, 253.5668335104, 146.62854003080002, 313.3435058688], [79.92132567299996, 1.7128295936, 259.25122071239997, 338.0957031424], [0, 243.4183960064, 310, 508], [73.1528320086, 364.7564697088, 158.70727536920003, 417.8134765568], [169.981811538, 350.8290405376, 231.66064452519998, 407.202087424], [52.57275393890001, 326.0328979456, 96.62957762179997, 334.728332544]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046436.jpg", "text": "What's going on in the section of contained within the bounding box ? Give coordinates for the items you reference.", "boxes_value": [[208.74633792, 275.7257690624, 467.39733886740004, 373.106445312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046436_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Give coordinates for the items you reference.", "boxes_value": [[64.74633792, 24.72576906239999, 323.39733886740004, 122.106445312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046436.jpg", "text": "What's going on in the section of contained within the bounding box ? Give coordinates for the items you reference. For your reference, objects involved in this region include three desks, a chair, and a person.", "boxes_value": [[208.74633792, 275.7257690624, 467.39733886740004, 373.106445312], [202.2005005035, 273.6823730688, 355.6082763384, 374.3926391808], [316.3579101927, 275.7257690624, 467.39733886740004, 373.106445312], [208.74633792, 297.0563964928, 284.4554443596, 348.716674816], [255.50781251099997, 301.5098876928, 303.160034196, 350.0527343616], [300.8218994418, 236.9304199168, 359.6138915739, 348.8909912064]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046436_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Give coordinates for the items you reference. For your reference, objects involved in this region include three desks, a chair, and a person.", "boxes_value": [[64.74633792, 24.72576906239999, 323.39733886740004, 122.106445312], [58.200500503499995, 22.68237306880002, 211.6082763384, 123.39263918080002], [172.3579101927, 24.72576906239999, 323.39733886740004, 122.106445312], [64.74633792, 46.05639649279999, 140.4554443596, 97.71667481600002], [111.50781251099997, 50.50988769280002, 159.16003419600003, 99.05273436160002], [156.82189944179999, 0, 215.6138915739, 97.89099120639997]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046439.jpg", "text": "Within the input image , what can be found in the region defined by ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[356.36633303039997, 246.1635742208, 428.54614256639996, 440.8348388864]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046439_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[18.36633303039997, 49.1635742208, 90.54614256639996, 243.8348388864]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046439.jpg", "text": "Within the input image , what can be found in the region defined by ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four lamps, and a person.", "boxes_value": [[356.36633303039997, 246.1635742208, 428.54614256639996, 440.8348388864], [397.49047848960004, 256.7224731648, 428.05578616319997, 303.4039917056], [356.36633303039997, 246.1635742208, 381.929931648, 313.9628906496], [369.8918456832, 320.5529785344, 428.54614256639996, 440.8348388864], [397.49047848960004, 256.7224731648, 428.05578616319997, 303.4039917056], [356.36633303039997, 246.1635742208, 381.929931648, 313.9628906496]], "boxes_seq": [[0], [0], [1, 2, 4, 5], [3]]}, {"image_path": "objects365_v1_00046439_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four lamps, and a person.", "boxes_value": [[18.36633303039997, 49.1635742208, 90.54614256639996, 243.8348388864], [59.49047848960004, 59.72247316480002, 90.05578616319997, 106.40399170559999], [18.36633303039997, 49.1635742208, 43.92993164799998, 116.96289064960001], [31.891845683200017, 123.55297853439998, 90.54614256639996, 243.8348388864], [59.49047848960004, 59.72247316480002, 90.05578616319997, 106.40399170559999], [18.36633303039997, 49.1635742208, 43.92993164799998, 116.96289064960001]], "boxes_seq": [[0], [0], [1, 2, 4, 5], [3]]}, {"image_path": "objects365_v1_00046440.jpg", "text": "Could you describe the content of the bbox in the image ? Please mention the objects and their locations.", "boxes_value": [[140.56530762239998, 232.1127929856, 377.003480832, 418.4345702912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046440_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Please mention the objects and their locations.", "boxes_value": [[59.565307622399985, 47.11279298560001, 296.003480832, 233.43457029119998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046440.jpg", "text": "Could you describe the content of the bbox in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include two paddles, two people, a hat, and a boat.", "boxes_value": [[140.56530762239998, 232.1127929856, 377.003480832, 418.4345702912], [121.0324707072, 307.371826176, 421.1624755968, 429.3646240256], [150.8375244288, 273.4079589888, 203.5162353408, 385.0036010496], [258.4194336, 242.2145996288, 440.2515869184, 415.6285400576], [140.56530762239998, 232.1127929856, 292.6531982592, 418.4345702912], [328.5632956416, 247.0378256384, 377.003480832, 272.713770752], [95.50183104, 296.9747314688, 697.148071296, 502.8375244288]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046440_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include two paddles, two people, a hat, and a boat.", "boxes_value": [[59.565307622399985, 47.11279298560001, 296.003480832, 233.43457029119998], [40.032470707200005, 122.37182617600001, 340.1624755968, 244.3646240256], [69.83752442880001, 88.40795898879998, 122.5162353408, 200.00360104959998], [177.4194336, 57.21459962879999, 355, 230.6285400576], [59.565307622399985, 47.11279298560001, 211.6531982592, 233.43457029119998], [247.5632956416, 62.037825638399994, 296.003480832, 87.71377075200002], [14.501831039999999, 111.97473146879997, 355, 280]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046442.jpg", "text": "In the photo , can you delve into the details of the region ? Specify the location of each mentioned object.", "boxes_value": [[123.4599609344, 0, 511.0189819392, 393.5319823946]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046442_crop.jpg", "text": "In the photo , can you delve into the details of the region ? Specify the location of each mentioned object.", "boxes_value": [[97.4599609344, 0, 485.0189819392, 393.5319823946]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046442.jpg", "text": "In the photo , can you delve into the details of the region ? Specify the location of each mentioned object. For your reference, objects involved in this region include a lamp, a cabinet, a faucet, a sink, and a gas stove.", "boxes_value": [[123.4599609344, 0, 511.0189819392, 393.5319823946], [154.2774047744, 38.4730834696, 207.6455077888, 69.89544674679999], [420.530334464, 0, 511.0189819392, 159.34527590349998], [180.4364623872, 248.19970704999997, 202.6635742208, 278.5663452064], [123.4599609344, 294.6480712834, 234.2824096768, 302.5901489584], [324.8475952128, 335.2630615355, 496.1633911296, 393.5319823946]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046442_crop.jpg", "text": "In the photo , can you delve into the details of the region ? Specify the location of each mentioned object. For your reference, objects involved in this region include a lamp, a cabinet, a faucet, a sink, and a gas stove.", "boxes_value": [[97.4599609344, 0, 485.0189819392, 393.5319823946], [128.2774047744, 38.4730834696, 181.6455077888, 69.89544674679999], [394.530334464, 0, 485.0189819392, 159.34527590349998], [154.4364623872, 248.19970704999997, 176.6635742208, 278.5663452064], [97.4599609344, 294.6480712834, 208.2824096768, 302.5901489584], [298.8475952128, 335.2630615355, 470.1633911296, 393.5319823946]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046443.jpg", "text": "Please tell me more about the rectangular section in the photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[157.3366088976, 313.5531005952, 274.604064956, 512.1601562624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046443_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[29.3366088976, 50.55310059520002, 146.604064956, 249]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046443.jpg", "text": "Please tell me more about the rectangular section in the photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, two hats, and a street lights.", "boxes_value": [[157.3366088976, 313.5531005952, 274.604064956, 512.1601562624], [157.3366088976, 410.9850463744, 224.7918701264, 512.0004882944], [195.4260864417, 407.3333740032, 237.7192382922, 512.1601562624], [176.697753874, 411.099304192, 196.2782592581, 428.5042114048], [241.53100583260002, 418.9315185664, 262.4168701055, 433.7256469504], [260.2479247873, 313.5531005952, 274.604064956, 369.541992192]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046443_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, two hats, and a street lights.", "boxes_value": [[29.3366088976, 50.55310059520002, 146.604064956, 249], [29.3366088976, 147.98504637439999, 96.7918701264, 249], [67.4260864417, 144.3333740032, 109.7192382922, 249], [48.697753874, 148.09930419199998, 68.27825925810001, 165.5042114048], [113.53100583260002, 155.93151856639997, 134.41687010549998, 170.7256469504], [132.2479247873, 50.55310059520002, 146.604064956, 106.54199219200001]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046444.jpg", "text": "Please describe the region in the picture . Include the coordinates for each mentioned object.", "boxes_value": [[339.5881347584, 628.1444092152, 490.6537475584, 796.7956543008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046444_crop.jpg", "text": "Please describe the region in the picture . Include the coordinates for each mentioned object.", "boxes_value": [[38.588134758399974, 43.14440921519997, 189.65374755840003, 211.79565430080004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046444.jpg", "text": "Please describe the region in the picture . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, a backpack, a suv, a car, and a street lights.", "boxes_value": [[339.5881347584, 628.1444092152, 490.6537475584, 796.7956543008], [343.3252563456, 711.0699463212, 426.9455566336, 796.7956543008], [351.4754028544, 746.7475586301, 398.0982055424, 797.28063964], [339.5881347584, 696.4539794676, 391.1798095872, 725.080078088], [360.774719232, 705.3670654232, 465.8988647424, 750.651367204], [455.6890869248, 628.1444092152, 490.6537475584, 687.0192871386]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046444_crop.jpg", "text": "Please describe the region in the picture . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, a backpack, a suv, a car, and a street lights.", "boxes_value": [[38.588134758399974, 43.14440921519997, 189.65374755840003, 211.79565430080004], [42.325256345599996, 126.06994632119995, 125.9455566336, 211.79565430080004], [50.47540285439999, 161.7475586301, 97.09820554240002, 212], [38.588134758399974, 111.45397946759999, 90.1798095872, 140.08007808800005], [59.774719231999995, 120.36706542319996, 164.8988647424, 165.65136720400005], [154.6890869248, 43.14440921519997, 189.65374755840003, 102.01928713860002]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046445.jpg", "text": "Explain the content within the rectangular region of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[192.3906860576, 307.3914795008, 571.9459228628, 469.364685056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046445_crop.jpg", "text": "Explain the content within the rectangular region of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[95.39068605759999, 41.39147950080002, 474.9459228628, 203.36468505599998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046445.jpg", "text": "Explain the content within the rectangular region of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include six people.", "boxes_value": [[192.3906860576, 307.3914795008, 571.9459228628, 469.364685056], [529.7633056417, 307.8091430912, 571.9459228628, 469.0219726336], [483.821777374, 307.3914795008, 532.2990722668, 468.6043090944], [445.3084716982, 331.3168945152, 485.7608642696, 469.141296384], [264.4929199292, 391.6264648192, 317.88391111569996, 468.425353984], [192.3906860576, 355.0681152512, 259.3454589734, 469.364685056], [170.2173461623, 321.7356567552, 281.9535522292, 380.8645629952]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046445_crop.jpg", "text": "Explain the content within the rectangular region of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include six people.", "boxes_value": [[95.39068605759999, 41.39147950080002, 474.9459228628, 203.36468505599998], [432.76330564169996, 41.809143091199985, 474.9459228628, 203.0219726336], [386.821777374, 41.39147950080002, 435.29907226679995, 202.60430909439998], [348.3084716982, 65.31689451519998, 388.7608642696, 203.141296384], [167.4929199292, 125.62646481920001, 220.88391111569996, 202.42535398400003], [95.39068605759999, 89.06811525120003, 162.3454589734, 203.36468505599998], [73.2173461623, 55.73565675520001, 184.9535522292, 114.8645629952]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046447.jpg", "text": "Describe the image content present in the specified rectangular area of . Give coordinates for the items you reference.", "boxes_value": [[232.717529275, 167.1217040896, 757.302246085, 483.3397827072]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046447_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Give coordinates for the items you reference.", "boxes_value": [[131.717529275, 79.1217040896, 656.302246085, 395.3397827072]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046447.jpg", "text": "Describe the image content present in the specified rectangular area of . Give coordinates for the items you reference. For your reference, objects involved in this region include a person, two hats, a sneakers, a gloves, and a horse.", "boxes_value": [[232.717529275, 167.1217040896, 757.302246085, 483.3397827072], [350.071289029, 27.7550048768, 504.626098669, 376.5374145536], [308.545410173, 314.3436279296, 329.856445303, 333.9267578368], [607.649780276, 159.369628928, 629.95666503, 172.9713134592], [391.14916995, 349.8714599424, 431.254882828, 375.9868164096], [350.424926775, 186.6581421056, 365.405029297, 206.9880981504], [232.717529275, 167.1217040896, 757.302246085, 483.3397827072]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046447_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Give coordinates for the items you reference. For your reference, objects involved in this region include a person, two hats, a sneakers, a gloves, and a horse.", "boxes_value": [[131.717529275, 79.1217040896, 656.302246085, 395.3397827072], [249.071289029, 0, 403.626098669, 288.5374145536], [207.545410173, 226.34362792960002, 228.856445303, 245.9267578368], [506.649780276, 71.369628928, 528.95666503, 84.97131345919999], [290.14916995, 261.8714599424, 330.254882828, 287.9868164096], [249.42492677500002, 98.6581421056, 264.405029297, 118.98809815039999], [131.717529275, 79.1217040896, 656.302246085, 395.3397827072]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046449.jpg", "text": "Can you share some insights about the rectangular region in the image ? Please mention the objects and their locations.", "boxes_value": [[58.029968254900005, 107.2801513472, 300.7356567719, 233.4053344768]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046449_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Please mention the objects and their locations.", "boxes_value": [[58.029968254900005, 32.280151347200004, 300.7356567719, 158.4053344768]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046449.jpg", "text": "Can you share some insights about the rectangular region in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include two pictures, a cabinet, and two potted plants.", "boxes_value": [[58.029968254900005, 107.2801513472, 300.7356567719, 233.4053344768], [138.477355953, 107.2801513472, 181.42810061330002, 150.912658688], [90.7543334759, 110.0072021504, 130.2962646819, 148.8673705984], [58.029968254900005, 185.0005493248, 205.97137449489998, 233.4053344768], [184.1551513904, 150.2308959744, 201.88079836339998, 187.7275390464], [252.33087154929999, 148.1856078848, 300.7356567719, 198.6356811776]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046449_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include two pictures, a cabinet, and two potted plants.", "boxes_value": [[58.029968254900005, 32.280151347200004, 300.7356567719, 158.4053344768], [138.477355953, 32.280151347200004, 181.42810061330002, 75.912658688], [90.7543334759, 35.007202150400005, 130.2962646819, 73.86737059839999], [58.029968254900005, 110.0005493248, 205.97137449489998, 158.4053344768], [184.1551513904, 75.2308959744, 201.88079836339998, 112.72753904640001], [252.33087154929999, 73.18560788479999, 300.7356567719, 123.63568117759999]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046451.jpg", "text": "What does the selected region in the image encompass? Provide the coordinates for all objects that you mention.", "boxes_value": [[573.3616943616, 133.4884033024, 638.4151611648, 367.386108416]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046451_crop.jpg", "text": "What does the selected region in the image encompass? Provide the coordinates for all objects that you mention.", "boxes_value": [[16.36169436160003, 58.4884033024, 81.4151611648, 292.386108416]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046451.jpg", "text": "What does the selected region in the image encompass? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two people, a helmet, a sneakers, and a gloves.", "boxes_value": [[573.3616943616, 133.4884033024, 638.4151611648, 367.386108416], [431.95568847359993, 176.170166016, 642.27441408, 407.8772582912], [501.46777344000003, 132.50231936, 639.6008300544, 378.4682617344], [573.3616943616, 133.4884033024, 606.1070556672, 166.9780273664], [581.209594752, 346.3275757056, 611.0833740288, 367.386108416], [608.1448974335999, 281.1929321472, 638.4151611648, 304.848937984]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046451_crop.jpg", "text": "What does the selected region in the image encompass? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two people, a helmet, a sneakers, and a gloves.", "boxes_value": [[16.36169436160003, 58.4884033024, 81.4151611648, 292.386108416], [0, 101.170166016, 85.27441408000004, 332.8772582912], [0, 57.50231936, 82.60083005440003, 303.4682617344], [16.36169436160003, 58.4884033024, 49.10705566720003, 91.9780273664], [24.209594752000044, 271.3275757056, 54.083374028799994, 292.386108416], [51.14489743359991, 206.1929321472, 81.4151611648, 229.84893798399997]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046453.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Include the coordinates for each mentioned object.", "boxes_value": [[256.2530517504, 30.917480448, 489.507080064, 284.8462524416]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046453_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Include the coordinates for each mentioned object.", "boxes_value": [[59.253051750400004, 30.917480448, 292.507080064, 284.8462524416]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046453.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, a plate, a spoon, and a bowl.", "boxes_value": [[256.2530517504, 30.917480448, 489.507080064, 284.8462524416], [256.2530517504, 30.917480448, 489.507080064, 284.8462524416], [0.9863891712000001, 0, 374.73022463999996, 512.1695556608], [356.3834228736, 242.7421264896, 437.5156250112, 270.8952026112], [417.0406493952, 253.747436544, 454.6633300992, 286.2514648576], [431.50061038079997, 195.1185302528, 472.80065917440004, 237.8265380864]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046453_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, a plate, a spoon, and a bowl.", "boxes_value": [[59.253051750400004, 30.917480448, 292.507080064, 284.8462524416], [59.253051750400004, 30.917480448, 292.507080064, 284.8462524416], [0, 0, 177.73022463999996, 348], [159.3834228736, 242.7421264896, 240.51562501119997, 270.8952026112], [220.04064939519998, 253.747436544, 257.6633300992, 286.2514648576], [234.50061038079997, 195.1185302528, 275.80065917440004, 237.8265380864]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046454.jpg", "text": "I'd like a thorough description of the area in the image . Give coordinates for the items you reference.", "boxes_value": [[275.5178222592, 165.7403564544, 419.36560058879996, 328.6567993344]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046454_crop.jpg", "text": "I'd like a thorough description of the area in the image . Give coordinates for the items you reference.", "boxes_value": [[36.51782225919999, 40.7403564544, 180.36560058879996, 203.65679933439998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046454.jpg", "text": "I'd like a thorough description of the area in the image . Give coordinates for the items you reference. For your reference, objects involved in this region include five people.", "boxes_value": [[275.5178222592, 165.7403564544, 419.36560058879996, 328.6567993344], [318.4949951232, 177.5611572224, 404.4492187392, 328.6567993344], [317.1435546624, 185.3997802496, 354.4444580352, 314.3311157248], [290.92480465920005, 185.3997802496, 326.063354496, 317.574646016], [275.5178222592, 180.804748544, 319.3059081984, 321.088500992], [398.4101562624, 165.7403564544, 419.36560058879996, 216.0334472704]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046454_crop.jpg", "text": "I'd like a thorough description of the area in the image . Give coordinates for the items you reference. For your reference, objects involved in this region include five people.", "boxes_value": [[36.51782225919999, 40.7403564544, 180.36560058879996, 203.65679933439998], [79.4949951232, 52.5611572224, 165.44921873919998, 203.65679933439998], [78.14355466239999, 60.39978024960001, 115.4444580352, 189.3311157248], [51.92480465920005, 60.39978024960001, 87.06335449599999, 192.57464601599997], [36.51782225919999, 55.804748544000006, 80.3059081984, 196.08850099199998], [159.4101562624, 40.7403564544, 180.36560058879996, 91.03344727039999]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046456.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Give coordinates for the items you reference.", "boxes_value": [[381.30444334879996, 82.9724121088, 682.9631347335, 511.45892331519997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046456_crop.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Give coordinates for the items you reference.", "boxes_value": [[76.30444334879996, 82.9724121088, 377.96313473350006, 511.45892331519997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046456.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Give coordinates for the items you reference. For your reference, objects involved in this region include a cabinet, two people, a ballon, and a cell phone.", "boxes_value": [[381.30444334879996, 82.9724121088, 682.9631347335, 511.45892331519997], [38.3435668626, 0, 544.2482910064, 510.778808576], [606.3781738469, 82.9724121088, 682.4948730753, 511.45892331519997], [146.4175415131, 59.5096435712, 543.6940918294, 512.8986816512], [381.30444334879996, 212.1955566592, 605.7551269722001, 390.9952392704], [665.4160156279, 457.9478759936, 682.9631347335, 489.7482910208]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046456_crop.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Give coordinates for the items you reference. For your reference, objects involved in this region include a cabinet, two people, a ballon, and a cell phone.", "boxes_value": [[76.30444334879996, 82.9724121088, 377.96313473350006, 511.45892331519997], [0, 0, 239.2482910064, 510.778808576], [301.37817384690004, 82.9724121088, 377.4948730753, 511.45892331519997], [0, 59.5096435712, 238.6940918294, 512], [76.30444334879996, 212.1955566592, 300.75512697220006, 390.9952392704], [360.41601562790004, 457.9478759936, 377.96313473350006, 489.7482910208]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046457.jpg", "text": "In the photo , can you delve into the details of the region ? Include the coordinates for each object you identify.", "boxes_value": [[641.1684570059999, 0.59899904, 770.619995145, 468.569702144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046457_crop.jpg", "text": "In the photo , can you delve into the details of the region ? Include the coordinates for each object you identify.", "boxes_value": [[33.168457005999926, 0.59899904, 162, 468.569702144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046457.jpg", "text": "In the photo , can you delve into the details of the region ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two cabinets, a barrel, a cleaning products, and a bottle.", "boxes_value": [[641.1684570059999, 0.59899904, 770.619995145, 468.569702144], [378.544555697, 0, 733.862915016, 361.486572288], [727.179809587, 0.59899904, 770.619995145, 308.021728512], [641.1684570059999, 233.614746112, 693.684448226, 301.5104370176], [694.736206087, 263.9199218688, 741.173583997, 307.9315795968], [728.306274388, 359.9956664832, 769.342163051, 468.569702144]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046457_crop.jpg", "text": "In the photo , can you delve into the details of the region ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two cabinets, a barrel, a cleaning products, and a bottle.", "boxes_value": [[33.168457005999926, 0.59899904, 162, 468.569702144], [0, 0, 125.86291501599999, 361.486572288], [119.17980958700002, 0.59899904, 162, 308.021728512], [33.168457005999926, 233.614746112, 85.68444822599997, 301.5104370176], [86.73620608700003, 263.9199218688, 133.17358399700004, 307.9315795968], [120.30627438800002, 359.9956664832, 161.34216305100006, 468.569702144]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046462.jpg", "text": "Please describe the area in the image for me. Include the coordinates for each mentioned object.", "boxes_value": [[670.9056396795, 61.2421264896, 915.2104491765001, 512.2252197376]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046462_crop.jpg", "text": "Please describe the area in the image for me. Include the coordinates for each mentioned object.", "boxes_value": [[61.90563967950004, 61.2421264896, 306, 512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046462.jpg", "text": "Please describe the area in the image for me. Include the coordinates for each mentioned object. For your reference, objects involved in this region include a soccer, two people, and two sneakers.", "boxes_value": [[670.9056396795, 61.2421264896, 915.2104491765001, 512.2252197376], [670.9056396795, 61.2421264896, 727.9620361005, 120.0815429632], [708.5595703515, 150.8424682496, 915.2104491765001, 512.2252197376], [847.5877685415, 430.0728759808, 910.643432643, 482.3857421824], [779.9705810175001, 493.2704467968, 834.9190674165, 511.586608896], [847.550903283, 438.3220214784, 911.341674816, 481.270202624]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046462_crop.jpg", "text": "Please describe the area in the image for me. Include the coordinates for each mentioned object. For your reference, objects involved in this region include a soccer, two people, and two sneakers.", "boxes_value": [[61.90563967950004, 61.2421264896, 306, 512], [61.90563967950004, 61.2421264896, 118.96203610049997, 120.0815429632], [99.55957035150004, 150.8424682496, 306, 512], [238.58776854150005, 430.0728759808, 301.643432643, 482.3857421824], [170.97058101750008, 493.2704467968, 225.91906741649996, 511.586608896], [238.55090328300003, 438.3220214784, 302.341674816, 481.270202624]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046463.jpg", "text": "Describe the visual elements within the selected area of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[407.35644531130004, 227.1472167936, 599.0982665843, 315.4534301696]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046463_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[48.356445311300035, 22.147216793599995, 240.09826658429995, 110.45343016959998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046463.jpg", "text": "Describe the visual elements within the selected area of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a chair, three people, a cup, and a laptop.", "boxes_value": [[407.35644531130004, 227.1472167936, 599.0982665843, 315.4534301696], [261.6699829002, 151.0784301568, 576.5683593633, 456.0195922944], [0.10729977809999999, 0, 489.6972656213, 512.0031738368], [331.0490112297, 126.7404174848, 661.7509765677, 400.8294677504], [407.35644531130004, 227.1472167936, 504.2788085621, 292.4465332224], [557.1724853395, 295.1162719744, 599.0982665843, 315.4534301696], [489.2220459006, 201.9956054528, 604.6662597545001, 322.2160644608]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046463_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a chair, three people, a cup, and a laptop.", "boxes_value": [[48.356445311300035, 22.147216793599995, 240.09826658429995, 110.45343016959998], [0, 0, 217.56835936330003, 132], [0, 0, 130.6972656213, 132], [0, 0, 288, 132], [48.356445311300035, 22.147216793599995, 145.27880856209998, 87.44653322239998], [198.17248533949999, 90.11627197439998, 240.09826658429995, 110.45343016959998], [130.2220459006, 0, 245.66625975450006, 117.2160644608]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046464.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Include the coordinates for each object you identify.", "boxes_value": [[139.4895019776, 301.169860864, 345.089843712, 467.4071655424]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046464_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Include the coordinates for each object you identify.", "boxes_value": [[51.489501977600014, 42.169860863999986, 257.089843712, 208.40716554239998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046464.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a chair, two people, a sneakers, and a bowl.", "boxes_value": [[139.4895019776, 301.169860864, 345.089843712, 467.4071655424], [156.2252807424, 332.4492187648, 197.4570312192, 404.6047973888], [275.53369144320004, 334.8255004672, 392.04333496320004, 504.4153442304], [139.4895019776, 301.169860864, 169.5030517248, 405.4669799936], [274.8569336064, 442.769042944, 303.1301269248, 467.4071655424], [314.03833006080004, 365.8968505856, 345.089843712, 376.3802490368]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046464_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a chair, two people, a sneakers, and a bowl.", "boxes_value": [[51.489501977600014, 42.169860863999986, 257.089843712, 208.40716554239998], [68.2252807424, 73.44921876479998, 109.45703121919999, 145.60479738880002], [187.53369144320004, 75.82550046720002, 304.04333496320004, 245.41534423040002], [51.489501977600014, 42.169860863999986, 81.5030517248, 146.46697999359998], [186.8569336064, 183.76904294399998, 215.13012692479998, 208.40716554239998], [226.03833006080004, 106.89685058560002, 257.089843712, 117.38024903680002]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046466.jpg", "text": "Please tell me more about the rectangular section in the photo . Include the coordinates for each object you identify.", "boxes_value": [[309.4934082048, 147.3902587904, 701.1708984576, 229.8881225728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046466_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Include the coordinates for each object you identify.", "boxes_value": [[98.4934082048, 21.390258790399997, 490.17089845759995, 103.8881225728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046466.jpg", "text": "Please tell me more about the rectangular section in the photo . Include the coordinates for each object you identify. For your reference, objects involved in this region include five people.", "boxes_value": [[309.4934082048, 147.3902587904, 701.1708984576, 229.8881225728], [335.70275880959997, 147.3902587904, 359.8348388352, 183.8688964608], [460.29138186240004, 175.4507446272, 481.0561523712, 225.9596557824], [521.4633788927999, 177.1343994368, 541.1057128704, 229.8881225728], [687.2207031552, 179.8903198208, 701.1708984576, 227.0319824384], [309.4934082048, 166.5830688256, 328.1644286976, 215.5142212096]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046466_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Include the coordinates for each object you identify. For your reference, objects involved in this region include five people.", "boxes_value": [[98.4934082048, 21.390258790399997, 490.17089845759995, 103.8881225728], [124.70275880959997, 21.390258790399997, 148.8348388352, 57.86889646079999], [249.29138186240004, 49.45074462720001, 270.0561523712, 99.9596557824], [310.46337889279994, 51.13439943680001, 330.1057128704, 103.8881225728], [476.2207031552, 53.89031982079999, 490.17089845759995, 101.03198243840001], [98.4934082048, 40.58306882560001, 117.16442869759999, 89.51422120960001]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046468.jpg", "text": "Can you give me a visual rundown of the area in ? Include the coordinates for each object you identify.", "boxes_value": [[247.02062991359998, 175.7518920704, 578.6181640704001, 242.2827148288]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046468_crop.jpg", "text": "Can you give me a visual rundown of the area in ? Include the coordinates for each object you identify.", "boxes_value": [[83.02062991359998, 16.751892070400004, 414.6181640704001, 83.28271482880001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046468.jpg", "text": "Can you give me a visual rundown of the area in ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, two gloves, and two sneakers.", "boxes_value": [[247.02062991359998, 175.7518920704, 578.6181640704001, 242.2827148288], [422.492919936, 85.8693237248, 639.5781249792001, 239.251525888], [247.02062991359998, 217.6396484608, 280.55578613759997, 238.5303954944], [548.671630848, 175.7518920704, 578.6181640704001, 199.1206054912], [479.2993164288, 215.625122048, 494.44567871999993, 242.2827148288], [422.34899903999997, 204.113891584, 446.5832519424, 240.2632446464]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046468_crop.jpg", "text": "Can you give me a visual rundown of the area in ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, two gloves, and two sneakers.", "boxes_value": [[83.02062991359998, 16.751892070400004, 414.6181640704001, 83.28271482880001], [258.492919936, 0, 475.5781249792001, 80.251525888], [83.02062991359998, 58.639648460800004, 116.55578613759997, 79.53039549440001], [384.67163084799995, 16.751892070400004, 414.6181640704001, 40.120605491199996], [315.2993164288, 56.62512204800001, 330.44567871999993, 83.28271482880001], [258.34899903999997, 45.11389158399999, 282.5832519424, 81.26324464640001]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046469.jpg", "text": "In the image , please describe the bounding box . Please mention the objects and their locations.", "boxes_value": [[183.393493632, 67.0527343616, 262.4185790976, 152.5658569216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046469_crop.jpg", "text": "In the image , please describe the bounding box . Please mention the objects and their locations.", "boxes_value": [[20.393493632000002, 22.052734361600002, 99.4185790976, 107.56585692159999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046469.jpg", "text": "In the image , please describe the bounding box . Please mention the objects and their locations. For your reference, objects involved in this region include six people.", "boxes_value": [[183.393493632, 67.0527343616, 262.4185790976, 152.5658569216], [250.08624268799997, 67.0527343616, 262.4185790976, 101.2037353472], [183.393493632, 95.6474609152, 202.095275904, 120.2443237376], [224.8626098688, 115.9754638848, 240.10858152959997, 152.5658569216], [250.08624268799997, 67.0527343616, 262.4185790976, 101.2037353472], [183.393493632, 95.6474609152, 202.095275904, 120.2443237376], [224.8626098688, 115.9754638848, 240.10858152959997, 152.5658569216]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046469_crop.jpg", "text": "In the image , please describe the bounding box . Please mention the objects and their locations. For your reference, objects involved in this region include six people.", "boxes_value": [[20.393493632000002, 22.052734361600002, 99.4185790976, 107.56585692159999], [87.08624268799997, 22.052734361600002, 99.4185790976, 56.203735347199995], [20.393493632000002, 50.6474609152, 39.095275904000005, 75.2443237376], [61.86260986880001, 70.9754638848, 77.10858152959997, 107.56585692159999], [87.08624268799997, 22.052734361600002, 99.4185790976, 56.203735347199995], [20.393493632000002, 50.6474609152, 39.095275904000005, 75.2443237376], [61.86260986880001, 70.9754638848, 77.10858152959997, 107.56585692159999]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046472.jpg", "text": "Help me grasp the context of the region within image . Specify the location of each mentioned object.", "boxes_value": [[0, 393.9378051584, 335.6827392512, 511.3225708032]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046472_crop.jpg", "text": "Help me grasp the context of the region within image . Specify the location of each mentioned object.", "boxes_value": [[0, 29.93780515840001, 335.6827392512, 147.3225708032]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046472.jpg", "text": "Help me grasp the context of the region within image . Specify the location of each mentioned object. For your reference, objects involved in this region include a desk, a storage box, two barrels, and a bowl.", "boxes_value": [[0, 393.9378051584, 335.6827392512, 511.3225708032], [0, 399.5686034944, 252.6310424576, 510.0], [180.5311889408, 418.7343750144, 335.6827392512, 511.3225708032], [28.6518554624, 330.6296996864, 138.7716064256, 446.4257202176], [207.4545288192, 340.2793579008, 332.3325805568, 432.8026733568], [69.8114013696, 393.9378051584, 180.2897949184, 471.0623169024]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046472_crop.jpg", "text": "Help me grasp the context of the region within image . Specify the location of each mentioned object. For your reference, objects involved in this region include a desk, a storage box, two barrels, and a bowl.", "boxes_value": [[0, 29.93780515840001, 335.6827392512, 147.3225708032], [0, 35.56860349440001, 252.6310424576, 146.0], [180.5311889408, 54.73437501439997, 335.6827392512, 147.3225708032], [28.6518554624, 0, 138.7716064256, 82.42572021759997], [207.4545288192, 0, 332.3325805568, 68.80267335680003], [69.8114013696, 29.93780515840001, 180.2897949184, 107.06231690240003]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046476.jpg", "text": "What does the area within the given visual contain? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[225.7254638592, 364.9750976287, 511.752685568, 656.5352782857999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046476_crop.jpg", "text": "What does the area within the given visual contain? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[71.7254638592, 72.97509762869998, 357.752685568, 364.5352782857999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046476.jpg", "text": "What does the area within the given visual contain? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a car, a truck, a bus, and two street lights.", "boxes_value": [[225.7254638592, 364.9750976287, 511.752685568, 656.5352782857999], [442.7403564544, 546.3288574302, 511.752685568, 656.5352782857999], [358.509033216, 521.5661621083, 392.4220580864, 554.5526123065], [225.7254638592, 504.8134765568, 253.9061889536, 537.5679931697], [393.1387329024, 364.9750976287, 476.5540771328, 553.0130614794999], [353.1358032384, 408.1958008106, 399.1403198464, 570.2573241797]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046476_crop.jpg", "text": "What does the area within the given visual contain? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a car, a truck, a bus, and two street lights.", "boxes_value": [[71.7254638592, 72.97509762869998, 357.752685568, 364.5352782857999], [288.7403564544, 254.32885743019995, 357.752685568, 364.5352782857999], [204.50903321599998, 229.56616210829998, 238.4220580864, 262.5526123065], [71.7254638592, 212.81347655680003, 99.90618895360001, 245.5679931697], [239.13873290240002, 72.97509762869998, 322.5540771328, 261.0130614794999], [199.1358032384, 116.19580081060002, 245.14031984640002, 278.25732417970005]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046478.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Specify the location of each mentioned object.", "boxes_value": [[10.3259888094, 209.1376953344, 439.90771485640005, 453.7882080256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046478_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Specify the location of each mentioned object.", "boxes_value": [[10.3259888094, 62.13769533440001, 439.90771485640005, 306.7882080256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046478.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include two boats, and three street lights.", "boxes_value": [[10.3259888094, 209.1376953344, 439.90771485640005, 453.7882080256], [40.491149898399996, 381.4309082112, 169.73620605760004, 453.7882080256], [380.0257568118, 378.4166259712, 439.90771485640005, 453.2689819136], [10.3259888094, 209.1376953344, 24.4497680236, 279.2697143808], [108.0139770462, 219.81628416, 119.7472534052, 270.6605224448], [228.38061520020003, 226.95697024, 236.84143068, 263.8330078208]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046478_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include two boats, and three street lights.", "boxes_value": [[10.3259888094, 62.13769533440001, 439.90771485640005, 306.7882080256], [40.491149898399996, 234.4309082112, 169.73620605760004, 306.7882080256], [380.0257568118, 231.4166259712, 439.90771485640005, 306.2689819136], [10.3259888094, 62.13769533440001, 24.4497680236, 132.2697143808], [108.0139770462, 72.81628416000001, 119.7472534052, 123.6605224448], [228.38061520020003, 79.95697024, 236.84143068, 116.83300782079999]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046479.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for each element you describe.", "boxes_value": [[103.81713866519999, 28.1429443584, 364.0606689708, 480.6903686656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046479_crop.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for each element you describe.", "boxes_value": [[65.81713866519999, 28.1429443584, 326.0606689708, 480.6903686656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046479.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two gloves, two sneakers, a helmet, and a person.", "boxes_value": [[103.81713866519999, 28.1429443584, 364.0606689708, 480.6903686656], [105.4575409524, 206.293021696, 128.515892592, 238.7188286976], [327.1995087168, 249.8890627072, 360.5163741378, 292.5762965504], [210.9430950342, 447.591125504, 280.8955540416, 478.8136772608], [304.9159318818, 347.9997451264, 346.61133905639997, 432.5650779648], [209.48116926359998, 30.2295287296, 272.430883575, 88.9638604288], [103.81713866519999, 28.1429443584, 364.0606689708, 480.6903686656]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046479_crop.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two gloves, two sneakers, a helmet, and a person.", "boxes_value": [[65.81713866519999, 28.1429443584, 326.0606689708, 480.6903686656], [67.4575409524, 206.293021696, 90.515892592, 238.7188286976], [289.1995087168, 249.8890627072, 322.5163741378, 292.5762965504], [172.9430950342, 447.591125504, 242.8955540416, 478.8136772608], [266.9159318818, 347.9997451264, 308.61133905639997, 432.5650779648], [171.48116926359998, 30.2295287296, 234.430883575, 88.9638604288], [65.81713866519999, 28.1429443584, 326.0606689708, 480.6903686656]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046481.jpg", "text": "Can you break down the region in the image for me? Please point out the objects and their coordinates.", "boxes_value": [[145.9777221605, 78.3750000128, 267.60357668160003, 142.1527099392]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046481_crop.jpg", "text": "Can you break down the region in the image for me? Please point out the objects and their coordinates.", "boxes_value": [[30.977722160500008, 16.3750000128, 152.60357668160003, 80.15270993920001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046481.jpg", "text": "Can you break down the region in the image for me? Please point out the objects and their coordinates. For your reference, objects involved in this region include four pictures, and a person.", "boxes_value": [[145.9777221605, 78.3750000128, 267.60357668160003, 142.1527099392], [232.7008666964, 93.5924072448, 267.60357668160003, 142.1527099392], [146.4783935347, 120.1814575104, 169.759887684, 159.9852295168], [172.0128784061, 78.3750000128, 219.07647701870002, 106.1625366016], [145.9777221605, 81.6293945344, 171.2619018749, 117.6781005824], [245.03515625170002, 105.6779174912, 258.27215577410004, 130.0339965952]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046481_crop.jpg", "text": "Can you break down the region in the image for me? Please point out the objects and their coordinates. For your reference, objects involved in this region include four pictures, and a person.", "boxes_value": [[30.977722160500008, 16.3750000128, 152.60357668160003, 80.15270993920001], [117.7008666964, 31.5924072448, 152.60357668160003, 80.15270993920001], [31.47839353469999, 58.181457510399994, 54.759887684000006, 96], [57.012878406099986, 16.3750000128, 104.07647701870002, 44.162536601599996], [30.977722160500008, 19.629394534400006, 56.26190187489999, 55.678100582400006], [130.03515625170002, 43.677917491200006, 143.27215577410004, 68.0339965952]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046483.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Include the coordinates for each object you identify.", "boxes_value": [[0.7243041792, 55.6555175938, 199.0906372096, 576.2852783339999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046483_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Include the coordinates for each object you identify.", "boxes_value": [[0.7243041792, 55.6555175938, 199.0906372096, 576.2852783339999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046483.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Include the coordinates for each object you identify. For your reference, objects involved in this region include a cabinet, two storage boxes, and two bottles.", "boxes_value": [[0.7243041792, 55.6555175938, 199.0906372096, 576.2852783339999], [25.1158447104, 175.7504272734, 496.276306176, 620.1265868992], [119.6682128896, 135.9530639709, 199.0906372096, 179.88885499960003], [0.7243041792, 416.1300048641, 50.3848266752, 576.2852783339999], [55.5383910912, 62.8240356158, 93.7704467968, 183.89227295470002], [86.601928704, 55.6555175938, 130.4095458816, 183.0957641602]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046483_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Include the coordinates for each object you identify. For your reference, objects involved in this region include a cabinet, two storage boxes, and two bottles.", "boxes_value": [[0.7243041792, 55.6555175938, 199.0906372096, 576.2852783339999], [25.1158447104, 175.7504272734, 248, 620.1265868992], [119.6682128896, 135.9530639709, 199.0906372096, 179.88885499960003], [0.7243041792, 416.1300048641, 50.3848266752, 576.2852783339999], [55.5383910912, 62.8240356158, 93.7704467968, 183.89227295470002], [86.601928704, 55.6555175938, 130.4095458816, 183.0957641602]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046487.jpg", "text": "Fill me in on the details of the rectangular box within the image . Provide the coordinates for each element you describe.", "boxes_value": [[593.9299316736001, 307.2550659072, 736.2451171584, 457.3095092736]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046487_crop.jpg", "text": "Fill me in on the details of the rectangular box within the image . Provide the coordinates for each element you describe.", "boxes_value": [[35.929931673600095, 38.25506590719999, 178.24511715840003, 188.30950927359999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046487.jpg", "text": "Fill me in on the details of the rectangular box within the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a laptop, and four microphones.", "boxes_value": [[593.9299316736001, 307.2550659072, 736.2451171584, 457.3095092736], [643.7783202816, 423.8360595456, 678.4714355712, 457.3095092736], [659.9080810752, 385.135742208, 673.0781250048, 413.5244140544], [715.1544189696, 307.2550659072, 736.2451171584, 337.7019042816], [650.9489746176, 315.6741943296, 665.6409911808, 339.1114502144], [593.9299316736001, 317.7730713088, 608.6219482368, 341.9099731456]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046487_crop.jpg", "text": "Fill me in on the details of the rectangular box within the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a laptop, and four microphones.", "boxes_value": [[35.929931673600095, 38.25506590719999, 178.24511715840003, 188.30950927359999], [85.77832028160003, 154.8360595456, 120.4714355712, 188.30950927359999], [101.90808107520002, 116.13574220800001, 115.07812500479997, 144.52441405439998], [157.1544189696, 38.25506590719999, 178.24511715840003, 68.70190428159998], [92.94897461760002, 46.67419432960003, 107.64099118080003, 70.11145021440001], [35.929931673600095, 48.77307130880001, 50.621948236799994, 72.90997314560002]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046493.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[383.1191406336, 250.7199096832, 435.4121093376, 300.4277343744]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046493_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[13.119140633600011, 12.719909683200001, 65.41210933759999, 62.42773437440002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046493.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, and two handbags.", "boxes_value": [[383.1191406336, 250.7199096832, 435.4121093376, 300.4277343744], [407.76977541119993, 220.6731567616, 443.20861816319996, 309.9788208128], [402.2130127104, 232.1468506112, 436.19079590399997, 330.7326660096], [366.0114746112, 211.2723999232, 407.11474606080003, 347.7096557568], [383.1191406336, 276.0098876928, 408.62707522560004, 300.4277343744], [423.2341308672, 250.7199096832, 435.4121093376, 284.279968256]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046493_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, and two handbags.", "boxes_value": [[13.119140633600011, 12.719909683200001, 65.41210933759999, 62.42773437440002], [37.76977541119993, 0, 73.20861816319996, 71.9788208128], [32.213012710399994, 0, 66.19079590399997, 74], [0, 0, 37.11474606080003, 74], [13.119140633600011, 38.00988769280002, 38.62707522560004, 62.42773437440002], [53.23413086720001, 12.719909683200001, 65.41210933759999, 46.27996825600002]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046494.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Give coordinates for the items you reference.", "boxes_value": [[254.6977539072, 351.8524169809, 330.10266112, 551.2641601676]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046494_crop.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Give coordinates for the items you reference.", "boxes_value": [[19.697753907199996, 50.85241698089999, 95.10266112, 250.26416016760004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046494.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Give coordinates for the items you reference. For your reference, objects involved in this region include a cabinet, a handbag, two sneakers, and a person.", "boxes_value": [[254.6977539072, 351.8524169809, 330.10266112, 551.2641601676], [164.2116699136, 422.6019287273, 511.9027099648, 508.569580053], [305.018371584, 351.8524169809, 330.10266112, 406.02380371429996], [276.8004760576, 519.318725575, 302.702087424, 551.2641601676], [254.6977539072, 512.2390136718, 284.3982543872, 542.4575195098], [239.6643676672, 240.11791994920003, 332.9326782464, 551.7487792659999]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046494_crop.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Give coordinates for the items you reference. For your reference, objects involved in this region include a cabinet, a handbag, two sneakers, and a person.", "boxes_value": [[19.697753907199996, 50.85241698089999, 95.10266112, 250.26416016760004], [0, 121.60192872729999, 113, 207.56958005299998], [70.01837158400002, 50.85241698089999, 95.10266112, 105.02380371429996], [41.80047605760001, 218.31872557500003, 67.70208742400001, 250.26416016760004], [19.697753907199996, 211.23901367179997, 49.398254387199984, 241.45751950980002], [4.664367667199997, 0, 97.93267824639997, 250.74877926599993]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046496.jpg", "text": "What does the selected region in the image encompass? Please point out the objects and their coordinates.", "boxes_value": [[378.1381836288, 88.2403564544, 603.904052736, 456.0010376192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046496_crop.jpg", "text": "What does the selected region in the image encompass? Please point out the objects and their coordinates.", "boxes_value": [[57.13818362879999, 88.2403564544, 282.90405273600004, 456.0010376192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046496.jpg", "text": "What does the selected region in the image encompass? Please point out the objects and their coordinates. For your reference, objects involved in this region include five people, and a tie.", "boxes_value": [[378.1381836288, 88.2403564544, 603.904052736, 456.0010376192], [431.60913085439995, 218.3527832064, 567.6627197184, 458.9716186624], [517.7565917951999, 106.6580810752, 603.904052736, 456.0010376192], [453.59155276800004, 88.2403564544, 593.8040771328, 284.3001709056], [378.1381836288, 105.4698486272, 463.69165040639996, 412.0360717824], [320.508544896, 77.5461425664, 457.15625003519995, 365.1005859328001], [467.8288574208, 281.1092529152, 480.49084469760004, 337.7715453952]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00046496_crop.jpg", "text": "What does the selected region in the image encompass? Please point out the objects and their coordinates. For your reference, objects involved in this region include five people, and a tie.", "boxes_value": [[57.13818362879999, 88.2403564544, 282.90405273600004, 456.0010376192], [110.60913085439995, 218.3527832064, 246.6627197184, 458.9716186624], [196.75659179519994, 106.6580810752, 282.90405273600004, 456.0010376192], [132.59155276800004, 88.2403564544, 272.8040771328, 284.3001709056], [57.13818362879999, 105.4698486272, 142.69165040639996, 412.0360717824], [0, 77.5461425664, 136.15625003519995, 365.1005859328001], [146.82885742079998, 281.1092529152, 159.49084469760004, 337.7715453952]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00046499.jpg", "text": "Can you divulge the contents of the area within the given image ? Include the coordinates for each object you identify.", "boxes_value": [[34.4544067584, 319.8706054668, 145.6775512576, 365.6740722895]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046499_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Include the coordinates for each object you identify.", "boxes_value": [[28.454406758399998, 11.870605466799987, 139.6775512576, 57.674072289499975]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046499.jpg", "text": "Can you divulge the contents of the area within the given image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include five people.", "boxes_value": [[34.4544067584, 319.8706054668, 145.6775512576, 365.6740722895], [124.0676879872, 319.8706054668, 145.6775512576, 365.6740722895], [104.0014038016, 327.4541625769, 123.530761728, 360.0703125266], [92.1226806784, 323.8876953061, 110.9330444288, 364.09692379570004], [76.3486938624, 323.0248413259, 88.1535034368, 364.34094241580004], [34.4544067584, 321.3056640749, 44.7242431488, 363.03686524479997]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046499_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include five people.", "boxes_value": [[28.454406758399998, 11.870605466799987, 139.6775512576, 57.674072289499975], [118.0676879872, 11.870605466799987, 139.6775512576, 57.674072289499975], [98.0014038016, 19.45416257689999, 117.530761728, 52.07031252659999], [86.1226806784, 15.8876953061, 104.9330444288, 56.096923795700036], [70.3486938624, 15.024841325900013, 82.1535034368, 56.34094241580004], [28.454406758399998, 13.305664074899994, 38.7242431488, 55.03686524479997]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046502.jpg", "text": "What information can you give me about the coordinates in image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[211.7285766723, 176.2621459968, 273.6888427951, 453.5375366144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046502_crop.jpg", "text": "What information can you give me about the coordinates in image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[15.728576672299994, 70.2621459968, 77.68884279510002, 347.5375366144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046502.jpg", "text": "What information can you give me about the coordinates in image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four people, and a boots.", "boxes_value": [[211.7285766723, 176.2621459968, 273.6888427951, 453.5375366144], [174.74725339559998, 257.6127319552, 251.21374512, 456.7895507968], [217.19915773440002, 189.6702270464, 240.20220946229998, 257.0362548736], [247.82122799709998, 180.2285156352, 273.6888427951, 247.3119506944], [217.1249389897, 176.2621459968, 244.37219236080003, 240.9312743936], [211.7285766723, 427.29351808, 226.6442871197, 453.5375366144]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046502_crop.jpg", "text": "What information can you give me about the coordinates in image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four people, and a boots.", "boxes_value": [[15.728576672299994, 70.2621459968, 77.68884279510002, 347.5375366144], [0, 151.6127319552, 55.21374512, 350.7895507968], [21.199157734400018, 83.67022704639999, 44.202209462299976, 151.0362548736], [51.82122799709998, 74.22851563520001, 77.68884279510002, 141.3119506944], [21.124938989700013, 70.2621459968, 48.37219236080003, 134.9312743936], [15.728576672299994, 321.29351808, 30.64428711970001, 347.5375366144]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046505.jpg", "text": "I'd like some information about the specific region in the image . Give coordinates for the items you reference.", "boxes_value": [[246.0458373888, 278.0777587712, 741.7011718655999, 412.7359619072]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046505_crop.jpg", "text": "I'd like some information about the specific region in the image . Give coordinates for the items you reference.", "boxes_value": [[124.04583738880001, 34.077758771200024, 619.7011718655999, 168.73596190720002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046505.jpg", "text": "I'd like some information about the specific region in the image . Give coordinates for the items you reference. For your reference, objects involved in this region include four people, a barrel, two helmets, and an airplane.", "boxes_value": [[246.0458373888, 278.0777587712, 741.7011718655999, 412.7359619072], [227.1883544832, 286.3746337792, 298.86291502079996, 441.2686157312], [475.42810060799997, 278.2362671104, 517.2320556288, 412.7359619072], [621.742065408, 284.5977172992, 664.9091797248, 378.2022705152], [716.7097168128, 288.2328491008, 741.7011718655999, 362.75299072], [684.6973876991999, 327.8442993152, 711.266479488, 342.4144287232], [246.0458373888, 286.7360229376, 269.85620113920004, 311.911926272], [485.683959936, 278.0777587712, 506.04846190079996, 296.4840698368], [6.2505493248, 104.4489135616, 714.4582519296, 392.891235328]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6, 7], [8]]}, {"image_path": "objects365_v1_00046505_crop.jpg", "text": "I'd like some information about the specific region in the image . Give coordinates for the items you reference. For your reference, objects involved in this region include four people, a barrel, two helmets, and an airplane.", "boxes_value": [[124.04583738880001, 34.077758771200024, 619.7011718655999, 168.73596190720002], [105.18835448319999, 42.374633779199996, 176.86291502079996, 197.2686157312], [353.42810060799997, 34.23626711039998, 395.2320556288, 168.73596190720002], [499.74206540800003, 40.597717299199985, 542.9091797248, 134.20227051519998], [594.7097168128, 44.232849100800024, 619.7011718655999, 118.75299072000001], [562.6973876991999, 83.84429931519998, 589.266479488, 98.41442872319999], [124.04583738880001, 42.73602293760001, 147.85620113920004, 67.91192627200002], [363.683959936, 34.077758771200024, 384.04846190079996, 52.48406983680002], [0, 0, 592.4582519296, 148.891235328]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6, 7], [8]]}, {"image_path": "objects365_v1_00046506.jpg", "text": "What does the area look like in the context of the image ? Include the coordinates for each mentioned object.", "boxes_value": [[297.6469726504, 190.4671630848, 371.5314941039, 312.6291504128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046506_crop.jpg", "text": "What does the area look like in the context of the image ? Include the coordinates for each mentioned object.", "boxes_value": [[18.646972650400016, 31.467163084800006, 92.5314941039, 153.62915041280002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046506.jpg", "text": "What does the area look like in the context of the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a cabinet, a person, two peaches, an apple, and an orange.", "boxes_value": [[297.6469726504, 190.4671630848, 371.5314941039, 312.6291504128], [230.99035648229997, 140.8937988096, 448.7855224789, 279.7777099776], [309.34338375100003, 190.4671630848, 371.5314941039, 289.3687744], [297.6469726504, 295.0216674816, 320.8737792866, 312.6291504128], [289.7702636799, 287.3364868096, 307.4495849842, 305.5358276608], [331.2105713184, 277.7782593024, 363.5388183954, 308.1691894784], [316.175415065, 281.64434816, 333.2209472554, 301.093750016]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046506_crop.jpg", "text": "What does the area look like in the context of the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a cabinet, a person, two peaches, an apple, and an orange.", "boxes_value": [[18.646972650400016, 31.467163084800006, 92.5314941039, 153.62915041280002], [0, 0, 111, 120.77770997760001], [30.343383751000033, 31.467163084800006, 92.5314941039, 130.3687744], [18.646972650400016, 136.0216674816, 41.873779286599984, 153.62915041280002], [10.770263679899983, 128.3364868096, 28.449584984199987, 146.5358276608], [52.210571318400014, 118.77825930239999, 84.53881839540003, 149.16918947840003], [37.17541506499998, 122.64434815999999, 54.220947255400006, 142.093750016]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046512.jpg", "text": "Can you provide a description of the area in the image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[673.2391357116, 181.2418823168, 785.303100567, 478.4409179648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046512_crop.jpg", "text": "Can you provide a description of the area in the image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[28.239135711600056, 75.2418823168, 140.303100567, 372.4409179648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046512.jpg", "text": "Can you provide a description of the area in the image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a chair, a nightstand, two lamps, a mirror, a potted plant, and a telephone.", "boxes_value": [[673.2391357116, 181.2418823168, 785.303100567, 478.4409179648], [700.6505126760001, 321.3879394304, 785.303100567, 478.4409179648], [649.3168945698, 259.7405395456, 785.303100567, 397.6867065344], [671.9241943392001, 176.6669311488, 755.0440673748, 277.6734008832], [695.0715331722, 105.120727552, 785.5563964554, 273.4647826944], [730.8507080304, 181.2418823168, 763.7092284828, 221.6109008896], [753.8378906388, 161.2888794112, 784.5875244378, 257.0520019456], [673.2391357116, 257.2415771648, 704.7601318326, 272.9525146624]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4], [6], [7]]}, {"image_path": "objects365_v1_00046512_crop.jpg", "text": "Can you provide a description of the area in the image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a chair, a nightstand, two lamps, a mirror, a potted plant, and a telephone.", "boxes_value": [[28.239135711600056, 75.2418823168, 140.303100567, 372.4409179648], [55.65051267600006, 215.38793943040002, 140.303100567, 372.4409179648], [4.316894569799956, 153.74053954559997, 140.303100567, 291.6867065344], [26.924194339200085, 70.66693114879999, 110.04406737479997, 171.67340088319997], [50.07153317220002, 0, 140.55639645539998, 167.46478269440001], [85.85070803040003, 75.2418823168, 118.70922848279997, 115.6109008896], [108.83789063879999, 55.288879411200014, 139.58752443779997, 151.0520019456], [28.239135711600056, 151.2415771648, 59.760131832599996, 166.9525146624]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4], [6], [7]]}, {"image_path": "objects365_v1_00046514.jpg", "text": "Can you break down the region in the image for me? Remember to mention the objects and their corresponding locations.", "boxes_value": [[188.622192384, 180.5955810536, 377.6564941312, 335.04241941]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046514_crop.jpg", "text": "Can you break down the region in the image for me? Remember to mention the objects and their corresponding locations.", "boxes_value": [[47.62219238399999, 39.5955810536, 236.65649413120002, 194.04241940999998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046514.jpg", "text": "Can you break down the region in the image for me? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four storage boxes, a picture, a flower, a cabinet, and a stuffed toy.", "boxes_value": [[188.622192384, 180.5955810536, 377.6564941312, 335.04241941], [154.6125488128, 173.6615600853, 219.3299560448, 226.8223266764], [287.5144043008, 180.5955810536, 377.6564941312, 231.4450073447], [188.622192384, 247.8720092506, 321.3589477376, 334.0518188173], [290.6511840768, 282.5420532368, 349.0949707264, 335.04241941], [319.377807616, 276.5986327929, 374.8499145728, 310.2780761864], [256.7879638528, 293.5365600886, 278.427673344, 316.61889649209996], [132.9590454272, 99.81066896189999, 392.815246592, 253.88476560059996], [248.3588867072, 150.0485839751, 292.7938842624, 225.2278442573]], "boxes_seq": [[0], [0], [1, 2, 4, 5], [3], [6], [7], [8]]}, {"image_path": "objects365_v1_00046514_crop.jpg", "text": "Can you break down the region in the image for me? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four storage boxes, a picture, a flower, a cabinet, and a stuffed toy.", "boxes_value": [[47.62219238399999, 39.5955810536, 236.65649413120002, 194.04241940999998], [13.612548812799986, 32.66156008530001, 78.32995604480001, 85.8223266764], [146.51440430079998, 39.5955810536, 236.65649413120002, 90.4450073447], [47.62219238399999, 106.8720092506, 180.35894773759998, 193.0518188173], [149.6511840768, 141.54205323679997, 208.0949707264, 194.04241940999998], [178.37780761599998, 135.5986327929, 233.8499145728, 169.27807618640003], [115.78796385279998, 152.53656008860003, 137.42767334400003, 175.61889649209996], [0, 0, 251.815246592, 112.88476560059996], [107.35888670720001, 9.048583975100001, 151.7938842624, 84.2278442573]], "boxes_seq": [[0], [0], [1, 2, 4, 5], [3], [6], [7], [8]]}, {"image_path": "objects365_v1_00046517.jpg", "text": "Can you generate a description of the contents within the selected region in ? Specify the location of each mentioned object.", "boxes_value": [[139.4080810775, 228.0626830848, 454.1770019255, 417.660400384]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046517_crop.jpg", "text": "Can you generate a description of the contents within the selected region in ? Specify the location of each mentioned object.", "boxes_value": [[79.40808107749999, 48.0626830848, 394.1770019255, 237.660400384]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046517.jpg", "text": "Can you generate a description of the contents within the selected region in ? Specify the location of each mentioned object. For your reference, objects involved in this region include a chair, a desk, three cabinets, and a person.", "boxes_value": [[139.4080810775, 228.0626830848, 454.1770019255, 417.660400384], [30.9799194288, 268.3588256768, 276.98968505759996, 475.929870592], [139.4080810775, 260.6044921856, 454.1770019255, 417.660400384], [341.2728271651, 209.2472534016, 438.1076660149, 282.9079589888], [240.7135619953, 214.627014144, 342.1004638895, 274.631530752], [133.1192626843, 216.6961059328, 242.3688354441, 274.631530752], [436.3118896309, 228.0626830848, 453.8863525732, 277.5435180544]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00046517_crop.jpg", "text": "Can you generate a description of the contents within the selected region in ? Specify the location of each mentioned object. For your reference, objects involved in this region include a chair, a desk, three cabinets, and a person.", "boxes_value": [[79.40808107749999, 48.0626830848, 394.1770019255, 237.660400384], [0, 88.3588256768, 216.98968505759996, 285], [79.40808107749999, 80.60449218560001, 394.1770019255, 237.660400384], [281.2728271651, 29.247253401600005, 378.1076660149, 102.90795898879998], [180.7135619953, 34.627014143999986, 282.1004638895, 94.631530752], [73.1192626843, 36.69610593280001, 182.3688354441, 94.631530752], [376.3118896309, 48.0626830848, 393.8863525732, 97.54351805440001]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00046520.jpg", "text": "Please detail the contents of the chosen region in the visual input . Include the coordinates for each object you identify.", "boxes_value": [[86.88519288, 272.396728512, 358.93176271199997, 307.88134766400003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046520_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Include the coordinates for each object you identify.", "boxes_value": [[68.88519288, 9.396728511999981, 340.93176271199997, 44.88134766400003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046520.jpg", "text": "Please detail the contents of the chosen region in the visual input . Include the coordinates for each object you identify. For your reference, objects involved in this region include two chairs, a pillow, a couch, and a bowl.", "boxes_value": [[86.88519288, 272.396728512, 358.93176271199997, 307.88134766400003], [286.8763428, 286.140502944, 358.93176271199997, 307.88134766400003], [215.575134264, 282.45782472, 256.039794936, 301.09289548799995], [146.359374984, 283.522705056, 210.25085450400002, 298.96313476800003], [137.163696264, 275.536254864, 278.401794408, 303.2225952], [86.88519288, 272.396728512, 137.565307584, 299.43963624]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5]]}, {"image_path": "objects365_v1_00046520_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Include the coordinates for each object you identify. For your reference, objects involved in this region include two chairs, a pillow, a couch, and a bowl.", "boxes_value": [[68.88519288, 9.396728511999981, 340.93176271199997, 44.88134766400003], [268.8763428, 23.14050294399999, 340.93176271199997, 44.88134766400003], [197.575134264, 19.45782472000002, 238.03979493600002, 38.092895487999954], [128.359374984, 20.522705056000007, 192.25085450400002, 35.96313476800003], [119.16369626400001, 12.536254864, 260.401794408, 40.2225952], [68.88519288, 9.396728511999981, 119.56530758400001, 36.43963624000003]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5]]}, {"image_path": "objects365_v1_00046523.jpg", "text": "Please tell me more about the rectangular section in the photo . Provide the coordinates for all objects that you mention.", "boxes_value": [[626.1304931328, 271.0318603264, 767.1756592128, 398.5147094528]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046523_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Provide the coordinates for all objects that you mention.", "boxes_value": [[36.130493132799984, 32.03186032640002, 177.17565921280004, 159.5147094528]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046523.jpg", "text": "Please tell me more about the rectangular section in the photo . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a cabinet, three people, and a speaker.", "boxes_value": [[626.1304931328, 271.0318603264, 767.1756592128, 398.5147094528], [737.097045888, 287.8370361344, 767.1756592128, 316.0551757824], [620.8291015679999, 265.0676879872, 634.7454833664001, 322.2241821184], [626.1304931328, 271.0318603264, 647.9908447488, 323.5043335168], [688.3070068224, 275.2683715584, 704.0335693055999, 326.1174316544], [683.7777099264, 365.9567260672, 711.2578125312, 398.5147094528]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046523_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a cabinet, three people, and a speaker.", "boxes_value": [[36.130493132799984, 32.03186032640002, 177.17565921280004, 159.5147094528], [147.09704588800003, 48.83703613440002, 177.17565921280004, 77.0551757824], [30.829101567999942, 26.067687987199974, 44.745483366400094, 83.2241821184], [36.130493132799984, 32.03186032640002, 57.99084474879999, 84.50433351679999], [98.30700682240001, 36.26837155840002, 114.03356930559994, 87.11743165439998], [93.77770992640001, 126.95672606720001, 121.25781253119999, 159.5147094528]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046524.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each mentioned object.", "boxes_value": [[156.837402337, 316.5908203008, 619.3220214720001, 384.453918464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046524_crop.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each mentioned object.", "boxes_value": [[115.83740233699999, 17.590820300799976, 578.3220214720001, 85.45391846400003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046524.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a lamp, a nightstand, a pillow, a bed, and a desk.", "boxes_value": [[156.837402337, 316.5908203008, 619.3220214720001, 384.453918464], [479.853271513, 293.933349632, 519.414184597, 350.9277954048], [439.62194822699996, 350.2572632064, 526.119384791, 381.7718505984], [539.529785179, 341.5404663296, 619.3220214720001, 384.453918464], [286.646972689, 288.7197265408, 769.916015638, 510.4027099648], [156.837402337, 316.5908203008, 246.360656773, 380.7907715072]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046524_crop.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a lamp, a nightstand, a pillow, a bed, and a desk.", "boxes_value": [[115.83740233699999, 17.590820300799976, 578.3220214720001, 85.45391846400003], [438.853271513, 0, 478.41418459700003, 51.92779540480001], [398.62194822699996, 51.25726320640001, 485.119384791, 82.77185059840002], [498.529785179, 42.54046632960001, 578.3220214720001, 85.45391846400003], [245.646972689, 0, 693, 102], [115.83740233699999, 17.590820300799976, 205.360656773, 81.79077150720002]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046529.jpg", "text": "Tell me about the region of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[413.227416978, 238.983093248, 495.416931159, 324.9774780416]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046529_crop.jpg", "text": "Tell me about the region of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[21.227416978000008, 21.98309324799999, 103.416931159, 107.97747804160002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046529.jpg", "text": "Tell me about the region of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a mirror, and four pictures.", "boxes_value": [[413.227416978, 238.983093248, 495.416931159, 324.9774780416], [351.7201537944, 220.42010496, 466.2651367038, 330.39849856], [469.0092773223, 238.983093248, 494.98535157090004, 269.0338134528], [466.6031494149, 271.497375488, 485.5940551863, 294.4174194176], [470.3140258839, 295.5088500736, 495.416931159, 324.9774780416], [413.227416978, 258.4965820416, 440.97015378450004, 313.4995117056]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046529_crop.jpg", "text": "Tell me about the region of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a mirror, and four pictures.", "boxes_value": [[21.227416978000008, 21.98309324799999, 103.416931159, 107.97747804160002], [0, 3.4201049600000033, 74.26513670380001, 113.39849856000001], [77.00927732230002, 21.98309324799999, 102.98535157090004, 52.03381345280002], [74.60314941489997, 54.49737548799999, 93.59405518630001, 77.4174194176], [78.3140258839, 78.5088500736, 103.416931159, 107.97747804160002], [21.227416978000008, 41.49658204159999, 48.97015378450004, 96.49951170560001]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046530.jpg", "text": "Within the input image , what can be found in the region defined by ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[134.8894042902, 198.1256713728, 396.36865230899997, 508.2597656064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046530_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[65.88940429019999, 78.12567137280001, 327.36865230899997, 388.2597656064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046530.jpg", "text": "Within the input image , what can be found in the region defined by ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a storage box, two people, a sneakers, and two slippers.", "boxes_value": [[134.8894042902, 198.1256713728, 396.36865230899997, 508.2597656064], [134.8894042902, 363.621032704, 213.29150390100003, 411.3440551936], [314.90368654919996, 183.9622802944, 409.54479981239996, 509.9484253184], [292.4308471392, 198.1256713728, 352.0299072612, 410.9795532288], [292.7914428432, 395.494323712, 312.421020513, 411.7827148288], [332.0505371358, 491.9714355712, 367.1330566554, 508.2597656064], [369.63903807599996, 462.3182983168, 396.36865230899997, 490.3008422912]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00046530_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a storage box, two people, a sneakers, and two slippers.", "boxes_value": [[65.88940429019999, 78.12567137280001, 327.36865230899997, 388.2597656064], [65.88940429019999, 243.62103270400002, 144.29150390100003, 291.3440551936], [245.90368654919996, 63.96228029439999, 340.54479981239996, 389.9484253184], [223.4308471392, 78.12567137280001, 283.0299072612, 290.9795532288], [223.7914428432, 275.494323712, 243.42102051299997, 291.7827148288], [263.0505371358, 371.9714355712, 298.1330566554, 388.2597656064], [300.63903807599996, 342.3182983168, 327.36865230899997, 370.3008422912]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00046531.jpg", "text": "Kindly describe what I should be seeing in the area of image . Include the coordinates for each mentioned object.", "boxes_value": [[194.5678100641, 58.3917846528, 370.8210449308, 287.718139648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046531_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Include the coordinates for each mentioned object.", "boxes_value": [[44.5678100641, 57.3917846528, 220.82104493079999, 286.718139648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046531.jpg", "text": "Kindly describe what I should be seeing in the area of image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a clock, two handbags, two traffic lights, and a person.", "boxes_value": [[194.5678100641, 58.3917846528, 370.8210449308, 287.718139648], [331.9458007479, 58.3917846528, 367.6114502142, 97.2277221888], [350.73376467960003, 258.9136963072, 370.8210449308, 287.718139648], [335.1945190705, 266.6833496064, 352.4393310383, 297.003784192], [239.395507844, 77.3554077184, 274.9653930607, 141.18621824], [194.5678100641, 81.2534790144, 223.80328370069998, 141.18621824], [227.1099243164, 180.88305664, 280.20629882860004, 331.9844360192]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046531_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a clock, two handbags, two traffic lights, and a person.", "boxes_value": [[44.5678100641, 57.3917846528, 220.82104493079999, 286.718139648], [181.94580074790002, 57.3917846528, 217.61145021419998, 96.2277221888], [200.73376467960003, 257.9136963072, 220.82104493079999, 286.718139648], [185.1945190705, 265.6833496064, 202.4393310383, 296.003784192], [89.39550784400001, 76.3554077184, 124.96539306070002, 140.18621824], [44.5678100641, 80.2534790144, 73.80328370069998, 140.18621824], [77.1099243164, 179.88305664, 130.20629882860004, 330.9844360192]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046532.jpg", "text": "Please enlighten me about the area in the photograph . Please point out the objects and their coordinates.", "boxes_value": [[21.2180175872, 456.59484863079996, 250.1722412032, 588.6406249842]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046532_crop.jpg", "text": "Please enlighten me about the area in the photograph . Please point out the objects and their coordinates.", "boxes_value": [[21.2180175872, 33.59484863079996, 250.1722412032, 165.64062498420003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046532.jpg", "text": "Please enlighten me about the area in the photograph . Please point out the objects and their coordinates. For your reference, objects involved in this region include two bottles, a knife, two chairs, and a desk.", "boxes_value": [[21.2180175872, 456.59484863079996, 250.1722412032, 588.6406249842], [21.2180175872, 483.28369140430004, 33.8466797056, 507.7756347624], [226.1200561664, 534.809814425, 250.1722412032, 551.4614257984999], [29.6269531136, 465.1738281087, 42.5270995968, 512.5358887057], [167.2415771648, 470.3337402533, 279.4423827968, 622.9879150665], [63.4367675904, 456.59484863079996, 173.3477172736, 588.6406249842], [0.0853271552, 486.3624267359, 435.9128417792, 594.7468261841]], "boxes_seq": [[0], [0], [1, 3], [2], [4, 5], [6]]}, {"image_path": "objects365_v1_00046532_crop.jpg", "text": "Please enlighten me about the area in the photograph . Please point out the objects and their coordinates. For your reference, objects involved in this region include two bottles, a knife, two chairs, and a desk.", "boxes_value": [[21.2180175872, 33.59484863079996, 250.1722412032, 165.64062498420003], [21.2180175872, 60.28369140430004, 33.8466797056, 84.77563476239999], [226.1200561664, 111.80981442500001, 250.1722412032, 128.46142579849993], [29.6269531136, 42.17382810869998, 42.5270995968, 89.53588870570002], [167.2415771648, 47.33374025329999, 279.4423827968, 198], [63.4367675904, 33.59484863079996, 173.3477172736, 165.64062498420003], [0.0853271552, 63.36242673589999, 307, 171.74682618409997]], "boxes_seq": [[0], [0], [1, 3], [2], [4, 5], [6]]}, {"image_path": "objects365_v1_00046536.jpg", "text": "I would like a description of the content within the bbox in . Provide the coordinates for all objects that you mention.", "boxes_value": [[5.3687133696, 120.8304443392, 220.6510619904, 248.8024902144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046536_crop.jpg", "text": "I would like a description of the content within the bbox in . Provide the coordinates for all objects that you mention.", "boxes_value": [[5.3687133696, 32.8304443392, 220.6510619904, 160.8024902144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046536.jpg", "text": "I would like a description of the content within the bbox in . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two hats, a helmet, a bottle, and a bakset.", "boxes_value": [[5.3687133696, 120.8304443392, 220.6510619904, 248.8024902144], [177.3988037376, 155.6655883776, 220.6510619904, 199.3297729536], [170.5747070208, 120.8304443392, 197.20703124480002, 152.3049926656], [160.8052368384, 206.8176880128, 220.55767825919997, 246.4611206144], [52.828430208, 209.9787597824, 87.8334350592, 248.8024902144], [5.3687133696, 159.0844726784, 63.5468140032, 190.0097046016]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046536_crop.jpg", "text": "I would like a description of the content within the bbox in . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two hats, a helmet, a bottle, and a bakset.", "boxes_value": [[5.3687133696, 32.8304443392, 220.6510619904, 160.8024902144], [177.3988037376, 67.66558837759999, 220.6510619904, 111.32977295360001], [170.5747070208, 32.8304443392, 197.20703124480002, 64.3049926656], [160.8052368384, 118.8176880128, 220.55767825919997, 158.4611206144], [52.828430208, 121.9787597824, 87.8334350592, 160.8024902144], [5.3687133696, 71.0844726784, 63.5468140032, 102.00970460159999]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046539.jpg", "text": "Kindly give an overview of the section in photo . Give coordinates for the items you reference.", "boxes_value": [[320.4918823, 196.9168701, 429.43231199999997, 547.246215825]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046539_crop.jpg", "text": "Kindly give an overview of the section in photo . Give coordinates for the items you reference.", "boxes_value": [[27.491882299999986, 87.91687010000001, 136.43231199999997, 438.24621582500004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046539.jpg", "text": "Kindly give an overview of the section in photo . Give coordinates for the items you reference. For your reference, objects involved in this region include a potted plant, three sneakers, and two people.", "boxes_value": [[320.4918823, 196.9168701, 429.43231199999997, 547.246215825], [314.53448485, 321.6154785, 369.6364746, 448.026000975], [323.48327635000004, 470.09387205, 345.828125, 526.296264675], [339.2965698, 460.98034665, 373.3295288, 511.51428225], [371.61071775, 519.7646484, 398.08081055, 548.6411133], [339.9944458, 234.323364225, 499.98077395, 621.4121094], [320.4918823, 196.9168701, 429.43231199999997, 547.246215825]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00046539_crop.jpg", "text": "Kindly give an overview of the section in photo . Give coordinates for the items you reference. For your reference, objects involved in this region include a potted plant, three sneakers, and two people.", "boxes_value": [[27.491882299999986, 87.91687010000001, 136.43231199999997, 438.24621582500004], [21.534484850000013, 212.6154785, 76.63647459999999, 339.026000975], [30.48327635000004, 361.09387205, 52.828125, 417.296264675], [46.296569799999986, 351.98034665, 80.32952879999999, 402.51428225], [78.61071774999999, 410.76464840000006, 105.08081055000002, 439.64111330000003], [46.994445799999994, 125.323364225, 163, 512.4121094], [27.491882299999986, 87.91687010000001, 136.43231199999997, 438.24621582500004]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00046540.jpg", "text": "In the submitted image , please give a synopsis of the area . Please mention the objects and their locations.", "boxes_value": [[353.9899902496, 51.8031005696, 771.9558105656, 511.6384277504]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046540_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Please mention the objects and their locations.", "boxes_value": [[104.9899902496, 51.8031005696, 522.9558105656, 511.6384277504]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046540.jpg", "text": "In the submitted image , please give a synopsis of the area . Please mention the objects and their locations. For your reference, objects involved in this region include three people, a sneakers, and a cabinet.", "boxes_value": [[353.9899902496, 51.8031005696, 771.9558105656, 511.6384277504], [551.4536132668001, 51.8031005696, 771.9558105656, 511.6384277504], [528.9101562444, 190.338439936, 699.4764404116, 511.8845825024], [481.3430175732, 174.8925781504, 598.7764892852, 443.6531372032], [544.067504888, 420.784912128, 574.9068603232, 444.854614272], [353.9899902496, 67.9892578304, 414.01000978919996, 112.7803344896]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046540_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Please mention the objects and their locations. For your reference, objects involved in this region include three people, a sneakers, and a cabinet.", "boxes_value": [[104.9899902496, 51.8031005696, 522.9558105656, 511.6384277504], [302.45361326680006, 51.8031005696, 522.9558105656, 511.6384277504], [279.9101562444, 190.338439936, 450.47644041160004, 511.8845825024], [232.34301757319997, 174.8925781504, 349.7764892852, 443.6531372032], [295.06750488800003, 420.784912128, 325.9068603232, 444.854614272], [104.9899902496, 67.9892578304, 165.01000978919996, 112.7803344896]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046541.jpg", "text": "Could you give me a description of the rectangular region found in ? Give coordinates for the items you reference.", "boxes_value": [[25.611938465999998, 430.3453979648, 224.0699462686, 500.9927368192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046541_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Give coordinates for the items you reference.", "boxes_value": [[25.611938465999998, 18.345397964799986, 224.0699462686, 88.99273681919999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046541.jpg", "text": "Could you give me a description of the rectangular region found in ? Give coordinates for the items you reference. For your reference, objects involved in this region include five people, a car, and a street lights.", "boxes_value": [[25.611938465999998, 430.3453979648, 224.0699462686, 500.9927368192], [211.0422363514, 431.8791503872, 224.0699462686, 478.5364990464], [141.701660175, 435.3128051712, 161.0192870956, 499.8888549888], [119.8083495986, 430.3453979648, 140.5978393912, 500.9927368192], [59.095825225400006, 419.8586425856, 125.69561770620001, 511.8473510912], [25.611938465999998, 437.8884887552, 47.137268080999995, 496.0253296128], [46.0749890858, 444.4299059712, 80.3990388928, 468.715790336], [195.38531492820002, 398.4982299648, 215.4432983136, 455.6497192448]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00046541_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Give coordinates for the items you reference. For your reference, objects involved in this region include five people, a car, and a street lights.", "boxes_value": [[25.611938465999998, 18.345397964799986, 224.0699462686, 88.99273681919999], [211.0422363514, 19.879150387200013, 224.0699462686, 66.53649904640002], [141.701660175, 23.31280517120001, 161.0192870956, 87.8888549888], [119.8083495986, 18.345397964799986, 140.5978393912, 88.99273681919999], [59.095825225400006, 7.858642585600023, 125.69561770620001, 99.84735109119998], [25.611938465999998, 25.8884887552, 47.137268080999995, 84.02532961280002], [46.0749890858, 32.429905971200014, 80.3990388928, 56.715790336], [195.38531492820002, 0, 215.4432983136, 43.649719244799996]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00046544.jpg", "text": "In , what elements can be found within the coordinates ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[0, 372.6815795712, 80.350341799, 509.3146972672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046544_crop.jpg", "text": "In , what elements can be found within the coordinates ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[0, 34.68157957120002, 80.350341799, 171.31469726720002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046544.jpg", "text": "In , what elements can be found within the coordinates ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a desk, and four cups.", "boxes_value": [[0, 372.6815795712, 80.350341799, 509.3146972672], [0.0103149392, 304.9690551808, 251.10369874399998, 512.6223144448], [26.761657745, 372.6815795712, 73.65173337760001, 432.2990112256], [41.1636352731, 424.2607421952, 80.350341799, 484.5480346624], [22.0726318227, 442.3469238272, 66.1846313191, 509.3146972672], [0, 412.715209984, 27.3162231466, 485.46893312]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046544_crop.jpg", "text": "In , what elements can be found within the coordinates ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a desk, and four cups.", "boxes_value": [[0, 34.68157957120002, 80.350341799, 171.31469726720002], [0.0103149392, 0, 100, 174], [26.761657745, 34.68157957120002, 73.65173337760001, 94.2990112256], [41.1636352731, 86.26074219520001, 80.350341799, 146.54803466240003], [22.0726318227, 104.34692382719999, 66.1846313191, 171.31469726720002], [0, 74.71520998400001, 27.3162231466, 147.46893311999997]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046546.jpg", "text": "Analyze and describe the region in the included photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0, 289.6448364019, 223.501892096, 635.7331543157001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046546_crop.jpg", "text": "Analyze and describe the region in the included photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0, 86.64483640190002, 223.501892096, 432.73315431570006]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046546.jpg", "text": "Analyze and describe the region in the included photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a chair, and four storage boxes.", "boxes_value": [[0, 289.6448364019, 223.501892096, 635.7331543157001], [134.0793457152, 230.536560041, 360.5285033984, 683.4349365172999], [0.2984008704, 289.6448364019, 128.5571289088, 432.89477540440004], [110.7896728576, 292.4209594996, 223.501892096, 435.11572263510004], [0, 409.43627927800003, 136.4890136576, 561.4906005988], [0.1397705216, 525.0832519269, 154.3357543936, 635.7331543157001]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046546_crop.jpg", "text": "Analyze and describe the region in the included photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a chair, and four storage boxes.", "boxes_value": [[0, 86.64483640190002, 223.501892096, 432.73315431570006], [134.0793457152, 27.536560041, 279, 480], [0.2984008704, 86.64483640190002, 128.5571289088, 229.89477540440004], [110.7896728576, 89.42095949959997, 223.501892096, 232.11572263510004], [0, 206.43627927800003, 136.4890136576, 358.49060059880003], [0.1397705216, 322.08325192689995, 154.3357543936, 432.73315431570006]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046547.jpg", "text": "Please enlighten me about the area in the photograph . Give coordinates for the items you reference.", "boxes_value": [[251.002258325, 211.234497088, 357.382690405, 373.3405761784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046547_crop.jpg", "text": "Please enlighten me about the area in the photograph . Give coordinates for the items you reference.", "boxes_value": [[27.002258325000014, 41.23449708800001, 133.382690405, 203.3405761784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046547.jpg", "text": "Please enlighten me about the area in the photograph . Give coordinates for the items you reference. For your reference, objects involved in this region include a lamp, a flower, a vase, a stool, and a radiator.", "boxes_value": [[251.002258325, 211.234497088, 357.382690405, 373.3405761784], [319.936767605, 211.234497088, 339.45202635500004, 253.6801147416], [251.002258325, 329.6417846752, 299.070922865, 372.7579345808], [268.190429715, 351.4912109512, 286.25262451500004, 373.3405761784], [277.94647218, 294.8762206928, 336.225769035, 355.0725708248], [298.78265378, 273.3015136752, 357.382690405, 313.30151366800004]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046547_crop.jpg", "text": "Please enlighten me about the area in the photograph . Give coordinates for the items you reference. For your reference, objects involved in this region include a lamp, a flower, a vase, a stool, and a radiator.", "boxes_value": [[27.002258325000014, 41.23449708800001, 133.382690405, 203.3405761784], [95.936767605, 41.23449708800001, 115.45202635500004, 83.6801147416], [27.002258325000014, 159.64178467519997, 75.070922865, 202.7579345808], [44.19042971499999, 181.4912109512, 62.252624515000036, 203.3405761784], [53.94647218, 124.87622069280002, 112.22576903499998, 185.07257082479998], [74.78265377999998, 103.3015136752, 133.382690405, 143.30151366800004]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046549.jpg", "text": "Regarding the image , what's going on in the section ? Provide the coordinates for all objects that you mention.", "boxes_value": [[1.0639037952, 303.38757325800003, 162.3227538944, 665.244018593]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046549_crop.jpg", "text": "Regarding the image , what's going on in the section ? Provide the coordinates for all objects that you mention.", "boxes_value": [[1.0639037952, 91.38757325800003, 162.3227538944, 453.24401859299996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046549.jpg", "text": "Regarding the image , what's going on in the section ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two cymbals, a guitar, a person, and a bracelet.", "boxes_value": [[1.0639037952, 303.38757325800003, 162.3227538944, 665.244018593], [69.1012573184, 303.38757325800003, 136.4656372224, 397.603515617], [1.0639037952, 453.258178681, 117.9589233152, 665.244018593], [86.6623535104, 380.027954075, 162.3227538944, 424.79370117800005], [0, 193.903259242, 407.1870117376, 769.8529052829999], [61.297912576, 491.94360353599996, 83.396484352, 527.661010723]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5]]}, {"image_path": "objects365_v1_00046549_crop.jpg", "text": "Regarding the image , what's going on in the section ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two cymbals, a guitar, a person, and a bracelet.", "boxes_value": [[1.0639037952, 91.38757325800003, 162.3227538944, 453.24401859299996], [69.1012573184, 91.38757325800003, 136.4656372224, 185.60351561700003], [1.0639037952, 241.258178681, 117.9589233152, 453.24401859299996], [86.6623535104, 168.02795407500003, 162.3227538944, 212.79370117800005], [0, 0, 202, 543], [61.297912576, 279.94360353599996, 83.396484352, 315.661010723]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5]]}, {"image_path": "objects365_v1_00046551.jpg", "text": "What objects or scenery can be found in the area in the image ? Please mention the objects and their locations.", "boxes_value": [[379.7014160008, 198.2911376896, 529.1705322388, 350.9138793984]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046551_crop.jpg", "text": "What objects or scenery can be found in the area in the image ? Please mention the objects and their locations.", "boxes_value": [[37.70141600080001, 38.29113768959999, 187.17053223879998, 190.9138793984]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046551.jpg", "text": "What objects or scenery can be found in the area in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a tea pot, a plate, and three chairs.", "boxes_value": [[379.7014160008, 198.2911376896, 529.1705322388, 350.9138793984], [492.8570556648, 320.0668945408, 529.1705322388, 350.9138793984], [444.1656494404, 198.2911376896, 483.80249025919994, 244.9227905024], [379.7014160008, 250.0042724864, 423.8857422036, 282.2468261888], [506.28332518159993, 283.4409789952, 526.5842285136, 322.2514648576], [424.4827881104, 275.0817870848, 502.70092773, 394.4987182592]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046551_crop.jpg", "text": "What objects or scenery can be found in the area in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a tea pot, a plate, and three chairs.", "boxes_value": [[37.70141600080001, 38.29113768959999, 187.17053223879998, 190.9138793984], [150.8570556648, 160.06689454079998, 187.17053223879998, 190.9138793984], [102.16564944039999, 38.29113768959999, 141.80249025919994, 84.9227905024], [37.70141600080001, 90.0042724864, 81.88574220359999, 122.24682618880001], [164.28332518159993, 123.4409789952, 184.5842285136, 162.2514648576], [82.48278811040001, 115.0817870848, 160.70092773, 229]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046552.jpg", "text": "Please describe the region in the picture . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[554.3363037268, 342.0513915904, 771.590820314, 487.7767333888]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046552_crop.jpg", "text": "Please describe the region in the picture . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[54.3363037268, 37.05139159039999, 271.590820314, 182.77673338879998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046552.jpg", "text": "Please describe the region in the picture . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two drums, two speakers, and a tripod.", "boxes_value": [[554.3363037268, 342.0513915904, 771.590820314, 487.7767333888], [554.3363037268, 373.2271118336, 613.57031247, 420.05017088], [643.4693603252, 388.4586791936, 698.1903076344, 414.4088134656], [494.0712890608, 429.877380352, 664.194946256, 493.5986328064], [707.0058593964, 384.0523071488, 771.590820314, 487.7767333888], [653.9102783396, 342.0513915904, 686.0845947028, 483.5073242112]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046552_crop.jpg", "text": "Please describe the region in the picture . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two drums, two speakers, and a tripod.", "boxes_value": [[54.3363037268, 37.05139159039999, 271.590820314, 182.77673338879998], [54.3363037268, 68.22711183360002, 113.57031246999998, 115.05017088], [143.4693603252, 83.4586791936, 198.19030763440003, 109.40881346560002], [0, 124.87738035199999, 164.19494625599998, 188.59863280640002], [207.00585939639996, 79.05230714880003, 271.590820314, 182.77673338879998], [153.91027833960004, 37.05139159039999, 186.08459470280002, 178.5073242112]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046553.jpg", "text": "Can you give me a description of the region in image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[122.4687271997, 413.518984192, 623.00520487, 511.9953256448]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046553_crop.jpg", "text": "Can you give me a description of the region in image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[122.4687271997, 25.518984192000005, 623.00520487, 123.9953256448]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046553.jpg", "text": "Can you give me a description of the region in image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, three sneakers, and a belt.", "boxes_value": [[122.4687271997, 413.518984192, 623.00520487, 511.9953256448], [480.1068114998, 292.7168579072, 635.2161864932, 511.752258304], [122.4687271997, 468.0150229504, 159.8372973559, 495.9134759936], [319.03003060509997, 470.3654479872, 344.6736848305, 499.1411515392], [567.6269357587, 474.427702272, 605.4924601299, 511.9953256448], [587.3686849439, 413.518984192, 623.00520487, 425.3978241536]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046553_crop.jpg", "text": "Can you give me a description of the region in image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, three sneakers, and a belt.", "boxes_value": [[122.4687271997, 25.518984192000005, 623.00520487, 123.9953256448], [480.1068114998, 0, 635.2161864932, 123.75225830400001], [122.4687271997, 80.01502295040001, 159.8372973559, 107.9134759936], [319.03003060509997, 82.36544798720001, 344.6736848305, 111.1411515392], [567.6269357587, 86.42770227199998, 605.4924601299, 123.9953256448], [587.3686849439, 25.518984192000005, 623.00520487, 37.39782415360003]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046554.jpg", "text": "For the image , can you assess and describe what's happening at ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[494.19262698, 1.4910888448, 764.6643066725001, 508.0386962944]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046554_crop.jpg", "text": "For the image , can you assess and describe what's happening at ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[68.19262698, 1.4910888448, 338.66430667250006, 508.0386962944]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046554.jpg", "text": "For the image , can you assess and describe what's happening at ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include five cabinets.", "boxes_value": [[494.19262698, 1.4910888448, 764.6643066725001, 508.0386962944], [494.19262698, 168.6195068416, 511.08154298240004, 322.6673584128], [507.82812502760004, 177.3838500864, 538.096557583, 308.5474853376], [563.9783935747, 199.7562866176, 577.577270517, 269.0668335104], [705.0704345437999, 1.4910888448, 764.6643066725001, 508.0386962944], [667.9449462855, 113.1377563648, 710.2183837632, 399.2385253888]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046554_crop.jpg", "text": "For the image , can you assess and describe what's happening at ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include five cabinets.", "boxes_value": [[68.19262698, 1.4910888448, 338.66430667250006, 508.0386962944], [68.19262698, 168.6195068416, 85.08154298240004, 322.6673584128], [81.82812502760004, 177.3838500864, 112.09655758300005, 308.5474853376], [137.97839357470002, 199.7562866176, 151.577270517, 269.0668335104], [279.07043454379993, 1.4910888448, 338.66430667250006, 508.0386962944], [241.9449462855, 113.1377563648, 284.2183837632, 399.2385253888]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046556.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each object you identify.", "boxes_value": [[528.6467284992, 335.5614624256, 617.1038818559999, 484.6188964864]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046556_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each object you identify.", "boxes_value": [[22.64672849919998, 37.561462425599984, 111.10388185599993, 186.6188964864]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046556.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, two backpacks, and a boots.", "boxes_value": [[528.6467284992, 335.5614624256, 617.1038818559999, 484.6188964864], [582.3804931584, 335.1502685696, 635.9360351232, 484.9132080128], [528.6467284992, 335.5614624256, 566.4881591808, 452.8700561408], [537.9967041024, 350.4077148672, 563.0794677504, 392.2123412992], [586.2327880704, 357.1607666176, 617.1038818559999, 422.4403076096], [590.5510253568, 452.369628928, 603.2302245888001, 484.6188964864]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046556_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, two backpacks, and a boots.", "boxes_value": [[22.64672849919998, 37.561462425599984, 111.10388185599993, 186.6188964864], [76.38049315839999, 37.15026856959997, 129.93603512319999, 186.9132080128], [22.64672849919998, 37.561462425599984, 60.48815918080004, 154.8700561408], [31.99670410240003, 52.40771486720001, 57.0794677504, 94.21234129919998], [80.23278807040003, 59.16076661760002, 111.10388185599993, 124.44030760959998], [84.55102535679998, 154.369628928, 97.23022458880007, 186.6188964864]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046558.jpg", "text": "What does the selected region in the image encompass? Remember to mention the objects and their corresponding locations.", "boxes_value": [[300.8387451037, 270.1738281472, 533.896118198, 511.7387695104]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046558_crop.jpg", "text": "What does the selected region in the image encompass? Remember to mention the objects and their corresponding locations.", "boxes_value": [[58.83874510369998, 61.17382814720003, 291.89611819799995, 302.7387695104]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046558.jpg", "text": "What does the selected region in the image encompass? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include an umbrella, a bottle, a bowl, and two chairs.", "boxes_value": [[300.8387451037, 270.1738281472, 533.896118198, 511.7387695104], [164.9342041165, 23.728942848, 536.212158221, 450.1213989376], [310.30499267000005, 270.1738281472, 322.8093872001, 303.7382202368], [499.8126220446, 335.5454711808, 533.896118198, 348.9199828992], [373.9816894472, 306.9387817472, 658.8905029017, 513.131958016], [300.8387451037, 288.827209472, 408.1149902338, 511.7387695104]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046558_crop.jpg", "text": "What does the selected region in the image encompass? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include an umbrella, a bottle, a bowl, and two chairs.", "boxes_value": [[58.83874510369998, 61.17382814720003, 291.89611819799995, 302.7387695104], [0, 0, 294.212158221, 241.12139893760002], [68.30499267000005, 61.17382814720003, 80.80938720009999, 94.73822023679998], [257.8126220446, 126.5454711808, 291.89611819799995, 139.91998289920002], [131.98168944719998, 97.93878174719998, 350, 303], [58.83874510369998, 79.82720947199999, 166.11499023379997, 302.7387695104]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046563.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[185.4171752766, 311.3585205248, 733.3214111342, 365.8878173696]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046563_crop.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[137.4171752766, 14.358520524799985, 685.3214111342, 68.88781736959999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046563.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three helmets, a hat, and a sports car.", "boxes_value": [[185.4171752766, 311.3585205248, 733.3214111342, 365.8878173696], [185.4171752766, 318.1115722752, 209.7995605582, 342.1644286976], [233.1934203784, 338.869506816, 253.95141600140002, 365.8878173696], [440.131713842, 311.3585205248, 463.3477783192, 329.1575317504], [708.7242431416, 320.4964599808, 733.3214111342, 337.2886962688], [164.32250978460002, 263.1631469568, 432.886718764, 424.4189453312]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046563_crop.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three helmets, a hat, and a sports car.", "boxes_value": [[137.4171752766, 14.358520524799985, 685.3214111342, 68.88781736959999], [137.4171752766, 21.11157227519999, 161.7995605582, 45.16442869759999], [185.1934203784, 41.86950681600001, 205.95141600140002, 68.88781736959999], [392.131713842, 14.358520524799985, 415.3477783192, 32.15753175039998], [660.7242431416, 23.49645998080001, 685.3214111342, 40.28869626879998], [116.32250978460002, 0, 384.886718764, 82]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046564.jpg", "text": "What information can you give me about the coordinates in image ? Give coordinates for the items you reference.", "boxes_value": [[53.788513151800004, 227.2987060736, 197.6281738218, 332.4341430784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046564_crop.jpg", "text": "What information can you give me about the coordinates in image ? Give coordinates for the items you reference.", "boxes_value": [[36.788513151800004, 26.298706073599988, 180.6281738218, 131.43414307839998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046564.jpg", "text": "What information can you give me about the coordinates in image ? Give coordinates for the items you reference. For your reference, objects involved in this region include five umbrellas.", "boxes_value": [[53.788513151800004, 227.2987060736, 197.6281738218, 332.4341430784], [95.7868041708, 272.4578247168, 121.4566650088, 332.4341430784], [109.1917724732, 255.9834594816, 127.7742920046, 293.6016845824], [185.3348388514, 253.5324706816, 197.6281738218, 289.6785278464], [77.2995605478, 249.0069580288, 96.01800536420001, 284.7247314432], [53.788513151800004, 227.2987060736, 66.5585327224, 259.1535644672]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046564_crop.jpg", "text": "What information can you give me about the coordinates in image ? Give coordinates for the items you reference. For your reference, objects involved in this region include five umbrellas.", "boxes_value": [[36.788513151800004, 26.298706073599988, 180.6281738218, 131.43414307839998], [78.7868041708, 71.45782471680002, 104.4566650088, 131.43414307839998], [92.1917724732, 54.98345948159999, 110.7742920046, 92.60168458240003], [168.3348388514, 52.53247068159999, 180.6281738218, 88.67852784640002], [60.2995605478, 48.0069580288, 79.01800536420001, 83.72473144320003], [36.788513151800004, 26.298706073599988, 49.5585327224, 58.1535644672]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046565.jpg", "text": "Please describe the region in the picture . Include the coordinates for each mentioned object.", "boxes_value": [[290.239190175, 175.1030537728, 335.67209541299997, 296.013249536]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046565_crop.jpg", "text": "Please describe the region in the picture . Include the coordinates for each mentioned object.", "boxes_value": [[12.239190174999976, 31.103053772799996, 57.672095412999965, 152.013249536]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046565.jpg", "text": "Please describe the region in the picture . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a cabinet, two people, a glasses, and a tie.", "boxes_value": [[290.239190175, 175.1030537728, 335.67209541299997, 296.013249536], [137.596374495, 4.4912719872, 368.836059591, 232.5633544704], [16.529663115, 39.5413818368, 318.665954592, 511.9075927552], [226.01586911399997, 138.0961303552, 373.182617163, 511.843383808], [290.239190175, 175.1030537728, 335.67209541299997, 190.8580128256], [303.638756679, 239.5099993088, 331.275362628, 296.013249536]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046565_crop.jpg", "text": "Please describe the region in the picture . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a cabinet, two people, a glasses, and a tie.", "boxes_value": [[12.239190174999976, 31.103053772799996, 57.672095412999965, 152.013249536], [0, 0, 69, 88.56335447039999], [0, 0, 40.66595459199999, 182], [0, 0, 69, 182], [12.239190174999976, 31.103053772799996, 57.672095412999965, 46.858012825600014], [25.638756678999982, 95.50999930879999, 53.27536262799998, 152.013249536]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046567.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Include the coordinates for each object you identify.", "boxes_value": [[33.384033221, 197.54052736, 395.86486816800004, 317.229370112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046567_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Include the coordinates for each object you identify.", "boxes_value": [[33.384033221, 30.54052736, 395.86486816800004, 150.22937011200003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046567.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include five people.", "boxes_value": [[33.384033221, 197.54052736, 395.86486816800004, 317.229370112], [33.384033221, 257.6895141376, 48.459289571, 317.229370112], [139.270141626, 197.54052736, 167.332397463, 259.4080200192], [175.816345243, 251.4461669888, 189.651672364, 295.3016357376], [373.37536621000004, 242.9608764416, 395.86486816800004, 307.2467040768], [381.437622104, 243.1730346496, 414.95959474200004, 316.7941284352]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046567_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include five people.", "boxes_value": [[33.384033221, 30.54052736, 395.86486816800004, 150.22937011200003], [33.384033221, 90.68951413759999, 48.459289571, 150.22937011200003], [139.270141626, 30.54052736, 167.332397463, 92.40802001920002], [175.816345243, 84.4461669888, 189.651672364, 128.3016357376], [373.37536621000004, 75.96087644159999, 395.86486816800004, 140.24670407679997], [381.437622104, 76.1730346496, 414.95959474200004, 149.79412843519998]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046569.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Please mention the objects and their locations.", "boxes_value": [[27.3275756994, 200.9141845504, 551.4700927952999, 376.5913085952]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046569_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Please mention the objects and their locations.", "boxes_value": [[27.3275756994, 44.914184550399995, 551.4700927952999, 220.59130859520002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046569.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Please mention the objects and their locations. For your reference, objects involved in this region include two chairs, two glasses, a tie, and a cup.", "boxes_value": [[27.3275756994, 200.9141845504, 551.4700927952999, 376.5913085952], [113.3416137897, 253.3109130752, 281.66674803, 376.5913085952], [157.11785888699998, 200.9141845504, 188.8184814184, 239.5820312576], [27.3275756994, 224.0600585728, 78.9497680658, 242.3263549952], [512.1176757849, 252.6101074432, 551.4700927952999, 266.5739135488], [143.4555053373, 238.374023424, 191.6997680751, 289.9920654336], [281.79479980689996, 316.574218752, 308.39624023150003, 354.0025634816]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046569_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Please mention the objects and their locations. For your reference, objects involved in this region include two chairs, two glasses, a tie, and a cup.", "boxes_value": [[27.3275756994, 44.914184550399995, 551.4700927952999, 220.59130859520002], [113.3416137897, 97.3109130752, 281.66674803, 220.59130859520002], [157.11785888699998, 44.914184550399995, 188.8184814184, 83.5820312576], [27.3275756994, 68.06005857279999, 78.9497680658, 86.3263549952], [512.1176757849, 96.61010744320001, 551.4700927952999, 110.57391354880002], [143.4555053373, 82.374023424, 191.6997680751, 133.99206543359998], [281.79479980689996, 160.57421875199998, 308.39624023150003, 198.00256348160002]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046570.jpg", "text": "Could you describe the content of the bbox in the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[358.50354003200005, 334.685852064, 412.654296896, 452.76232910399995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046570_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[14.503540032000046, 29.685852064000017, 68.654296896, 147.76232910399995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046570.jpg", "text": "Could you describe the content of the bbox in the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three chairs, a desk, and a lamp.", "boxes_value": [[358.50354003200005, 334.685852064, 412.654296896, 452.76232910399995], [371.149536128, 415.070068368, 398.431762688, 452.76232910399995], [385.95581056000003, 410.57873534400005, 412.654296896, 449.00451662399996], [358.50354003200005, 408.9468384, 381.41845702399996, 448.91046144], [355.93707276799995, 418.112792976, 401.766967744, 448.91046144], [393.932373056, 334.685852064, 406.67956544, 374.086303728]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046570_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three chairs, a desk, and a lamp.", "boxes_value": [[14.503540032000046, 29.685852064000017, 68.654296896, 147.76232910399995], [27.149536128000022, 110.07006836800002, 54.43176268799999, 147.76232910399995], [41.95581056000003, 105.57873534400005, 68.654296896, 144.00451662399996], [14.503540032000046, 103.94683839999999, 37.41845702399996, 143.91046144], [11.93707276799995, 113.11279297599998, 57.766967744, 143.91046144], [49.93237305600002, 29.685852064000017, 62.679565439999976, 69.08630372800002]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046571.jpg", "text": "Please provide information about the area within the bounding box in the picture . Include the coordinates for each mentioned object.", "boxes_value": [[181.2459716608, 640.8323974656, 386.061828608, 738.700561536]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046571_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Include the coordinates for each mentioned object.", "boxes_value": [[51.245971660799995, 24.832397465600025, 256.061828608, 122.70056153600001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046571.jpg", "text": "Please provide information about the area within the bounding box in the picture . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, two high heels, and a leather shoes.", "boxes_value": [[181.2459716608, 640.8323974656, 386.061828608, 738.700561536], [144.8726196224, 37.188598656, 431.3325805568, 708.2580566784001], [159.7092284928, 32.623474099199996, 397.0943603712, 748.2027588096], [181.2459716608, 690.9599609088, 232.9649048064, 738.700561536], [242.5130615296, 640.8323974656, 275.135742208, 721.1956786943999], [353.4924926976, 692.2856445696, 386.061828608, 706.8166503936]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046571_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, two high heels, and a leather shoes.", "boxes_value": [[51.245971660799995, 24.832397465600025, 256.061828608, 122.70056153600001], [14.87261962240001, 0, 301.3325805568, 92.25805667840007], [29.70922849280001, 0, 267.0943603712, 132.20275880960003], [51.245971660799995, 74.95996090879999, 102.9649048064, 122.70056153600001], [112.51306152960001, 24.832397465600025, 145.135742208, 105.19567869439993], [223.49249269760003, 76.28564456959998, 256.061828608, 90.81665039359996]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046572.jpg", "text": "Detail the chosen region in the depicted scene . Provide the coordinates for all objects that you mention.", "boxes_value": [[163.01092529250002, 312.3385009664, 530.5303954869, 413.9409179648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046572_crop.jpg", "text": "Detail the chosen region in the depicted scene . Provide the coordinates for all objects that you mention.", "boxes_value": [[92.01092529250002, 26.338500966399977, 459.53039548690003, 127.94091796480001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046572.jpg", "text": "Detail the chosen region in the depicted scene . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two chairs, a stool, a pillow, and four people.", "boxes_value": [[163.01092529250002, 312.3385009664, 530.5303954869, 413.9409179648], [151.51116946390002, 358.0365600768, 206.2899170208, 414.3161010688], [204.4139404546, 356.5357665792, 256.9415283441, 413.9409179648], [451.6176757999, 376.6124877824, 530.5303954869, 411.9080200192], [513.7239990335, 350.7473755136, 536.0714111133, 372.0406494208], [163.01092529250002, 312.598144512, 190.01416013750003, 359.5941161984], [187.417663581, 313.3770752, 218.8348999187, 371.0186157056], [226.62426760440002, 312.3385009664, 247.3959960888, 356.4783935488], [320.94598391259996, 322.0695190528, 344.24609377329995, 337.0306396672]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6, 7, 8]]}, {"image_path": "objects365_v1_00046572_crop.jpg", "text": "Detail the chosen region in the depicted scene . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two chairs, a stool, a pillow, and four people.", "boxes_value": [[92.01092529250002, 26.338500966399977, 459.53039548690003, 127.94091796480001], [80.51116946390002, 72.03656007680001, 135.2899170208, 128.31610106879998], [133.4139404546, 70.53576657920001, 185.9415283441, 127.94091796480001], [380.6176757999, 90.61248778240002, 459.53039548690003, 125.90802001920002], [442.72399903350004, 64.74737551359999, 465.0714111133, 86.04064942079998], [92.01092529250002, 26.598144511999976, 119.01416013750003, 73.5941161984], [116.417663581, 27.37707519999998, 147.8348999187, 85.01861570559998], [155.62426760440002, 26.338500966399977, 176.3959960888, 70.4783935488], [249.94598391259996, 36.06951905279999, 273.24609377329995, 51.03063966719998]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6, 7, 8]]}, {"image_path": "objects365_v1_00046584.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Include the coordinates for each object you identify.", "boxes_value": [[169.6160278038, 69.9672241152, 359.34228513690005, 253.8985595904]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046584_crop.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Include the coordinates for each object you identify.", "boxes_value": [[47.616027803799994, 46.9672241152, 237.34228513690005, 230.8985595904]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046584.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a cell phone, a speaker, a moniter, and a router.", "boxes_value": [[169.6160278038, 69.9672241152, 359.34228513690005, 253.8985595904], [0, 93.671630848, 652.9609375226, 512.1805420032], [241.72100830489998, 171.5890503168, 289.0437011917, 189.4207763456], [169.6160278038, 201.0908203008, 207.76544187630003, 253.8985595904], [176.41217040100003, 0, 337.0750732393, 207.3856201216], [322.1713867234, 69.9672241152, 359.34228513690005, 162.685485824]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046584_crop.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a cell phone, a speaker, a moniter, and a router.", "boxes_value": [[47.616027803799994, 46.9672241152, 237.34228513690005, 230.8985595904], [0, 70.671630848, 284, 276], [119.72100830489998, 148.5890503168, 167.0437011917, 166.4207763456], [47.616027803799994, 178.0908203008, 85.76544187630003, 230.8985595904], [54.412170401000026, 0, 215.07507323930002, 184.3856201216], [200.17138672340002, 46.9672241152, 237.34228513690005, 139.685485824]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046585.jpg", "text": "Please describe the area in the image for me. Include the coordinates for each mentioned object.", "boxes_value": [[450.467529281, 101.5615844864, 698.697143529, 333.8790283264]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046585_crop.jpg", "text": "Please describe the area in the image for me. Include the coordinates for each mentioned object.", "boxes_value": [[62.467529281, 58.561584486399994, 310.69714352899996, 290.8790283264]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046585.jpg", "text": "Please describe the area in the image for me. Include the coordinates for each mentioned object. For your reference, objects involved in this region include a stool, a chair, a lamp, a picture, and a hat.", "boxes_value": [[450.467529281, 101.5615844864, 698.697143529, 333.8790283264], [481.76147457900004, 305.2371215872, 589.186157252, 373.986450176], [450.467529281, 253.0452880896, 538.302612309, 333.8790283264], [535.120117224, 230.7682494976, 568.853881827, 311.6019897344], [606.4066162, 101.5615844864, 698.697143529, 195.1250610176], [654.1912842080001, 122.1442260992, 682.629882781, 146.3696899584]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046585_crop.jpg", "text": "Please describe the area in the image for me. Include the coordinates for each mentioned object. For your reference, objects involved in this region include a stool, a chair, a lamp, a picture, and a hat.", "boxes_value": [[62.467529281, 58.561584486399994, 310.69714352899996, 290.8790283264], [93.76147457900004, 262.2371215872, 201.18615725200004, 330.986450176], [62.467529281, 210.0452880896, 150.30261230899998, 290.8790283264], [147.12011722399996, 187.7682494976, 180.853881827, 268.6019897344], [218.40661620000003, 58.561584486399994, 310.69714352899996, 152.1250610176], [266.19128420800007, 79.1442260992, 294.629882781, 103.36968995839999]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046587.jpg", "text": "Please, can you help me understand what's inside the region in image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0.1701660408, 71.6313476608, 193.0836181478, 438.3730468864]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046587_crop.jpg", "text": "Please, can you help me understand what's inside the region in image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0.1701660408, 71.6313476608, 193.0836181478, 438.3730468864]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046587.jpg", "text": "Please, can you help me understand what's inside the region in image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four cabinets, a lamp, and a person.", "boxes_value": [[0.1701660408, 71.6313476608, 193.0836181478, 438.3730468864], [0.1701660408, 137.6163940352, 85.1486816644, 248.6910400512], [85.3555297872, 136.7903442432, 192.3249511866, 248.6910400512], [84.21752927920001, 249.4496459776, 193.0836181478, 348.954223616], [14.2100830164, 248.6910400512, 84.409728997, 364.4720458752], [1.2638549829999999, 71.6313476608, 149.97979733620002, 144.6302490112], [24.253906237200002, 295.7824096768, 140.9792480504, 438.3730468864]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046587_crop.jpg", "text": "Please, can you help me understand what's inside the region in image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four cabinets, a lamp, and a person.", "boxes_value": [[0.1701660408, 71.6313476608, 193.0836181478, 438.3730468864], [0.1701660408, 137.6163940352, 85.1486816644, 248.6910400512], [85.3555297872, 136.7903442432, 192.3249511866, 248.6910400512], [84.21752927920001, 249.4496459776, 193.0836181478, 348.954223616], [14.2100830164, 248.6910400512, 84.409728997, 364.4720458752], [1.2638549829999999, 71.6313476608, 149.97979733620002, 144.6302490112], [24.253906237200002, 295.7824096768, 140.9792480504, 438.3730468864]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046588.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Give coordinates for the items you reference.", "boxes_value": [[489.2950439161, 0.0706787328, 681.5950927684, 258.5220277248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046588_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Give coordinates for the items you reference.", "boxes_value": [[48.295043916099985, 0.0706787328, 240.59509276840004, 258.5220277248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046588.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Give coordinates for the items you reference. For your reference, objects involved in this region include four people, a helmet, and a crane.", "boxes_value": [[489.2950439161, 0.0706787328, 681.5950927684, 258.5220277248], [631.0416259497, 197.3750610432, 683.1070556739, 266.52026368], [485.17333985470003, 185.5879516672, 521.0299072182, 290.048706048], [519.7398681559999, 203.6542358528, 532.5119628969001, 237.7581787136], [662.4716796703, 238.1768188416, 683.1903076366999, 262.7501220864], [663.6254388546, 237.7842658816, 681.5950927684, 258.5220277248], [489.2950439161, 0.0706787328, 668.9194335978, 68.2131347456]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046588_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Give coordinates for the items you reference. For your reference, objects involved in this region include four people, a helmet, and a crane.", "boxes_value": [[48.295043916099985, 0.0706787328, 240.59509276840004, 258.5220277248], [190.0416259497, 197.3750610432, 242, 266.52026368], [44.17333985470003, 185.5879516672, 80.02990721820004, 290.048706048], [78.73986815599994, 203.6542358528, 91.51196289690006, 237.7581787136], [221.47167967029998, 238.1768188416, 242, 262.7501220864], [222.62543885460002, 237.7842658816, 240.59509276840004, 258.5220277248], [48.295043916099985, 0.0706787328, 227.91943359779998, 68.2131347456]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046591.jpg", "text": "Kindly describe what I should be seeing in the area of image . Please mention the objects and their locations.", "boxes_value": [[397.4708252268, 207.3176880128, 599.6444092044, 404.1743163904]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046591_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Please mention the objects and their locations.", "boxes_value": [[51.4708252268, 49.317688012800005, 253.64440920439995, 246.1743163904]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046591.jpg", "text": "Kindly describe what I should be seeing in the area of image . Please mention the objects and their locations. For your reference, objects involved in this region include two people, two hats, and a glasses.", "boxes_value": [[397.4708252268, 207.3176880128, 599.6444092044, 404.1743163904], [397.4708252268, 266.2118530048, 495.75781253559995, 404.1743163904], [361.6597900232, 207.3427124224, 677.0025634404, 512.2513427968], [426.15344240919995, 267.2382812672, 467.26171873640004, 291.6245727744], [465.8682861424, 207.3176880128, 599.6444092044, 276.9927978496], [474.9260254212, 265.8447876096, 532.0595703112, 297.1985473536]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046591_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Please mention the objects and their locations. For your reference, objects involved in this region include two people, two hats, and a glasses.", "boxes_value": [[51.4708252268, 49.317688012800005, 253.64440920439995, 246.1743163904], [51.4708252268, 108.21185300479999, 149.75781253559995, 246.1743163904], [15.659790023200003, 49.34271242240001, 304, 295], [80.15344240919995, 109.23828126720002, 121.26171873640004, 133.6245727744], [119.8682861424, 49.317688012800005, 253.64440920439995, 118.99279784959998], [128.9260254212, 107.84478760960002, 186.0595703112, 139.19854735360002]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046592.jpg", "text": "Please describe the content within the area displayed in the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[132.23480227230002, 300.4394531328, 441.2038573922, 511.5281982464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046592_crop.jpg", "text": "Please describe the content within the area displayed in the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[78.23480227230002, 53.4394531328, 387.2038573922, 264.5281982464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046592.jpg", "text": "Please describe the content within the area displayed in the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two cabinets, a bowl, a wine glass, a plate, an apple, and a desk.", "boxes_value": [[132.23480227230002, 300.4394531328, 441.2038573922, 511.5281982464], [253.9565429585, 330.4709472768, 335.51892091679997, 510.501892096], [399.4213866907, 327.1939086848, 441.2038573922, 511.5281982464], [182.31256102530003, 300.4394531328, 235.264709501, 323.359008768], [150.6727905155, 258.26727296, 182.29510497959998, 331.4559326208], [291.9977416914, 311.15747072, 327.3067627077, 321.1505737216], [132.23480227230002, 380.476806656, 159.7615356492, 406.0957031424], [0, 328.8952636928, 267.92938229780003, 400.5272827392]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00046592_crop.jpg", "text": "Please describe the content within the area displayed in the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two cabinets, a bowl, a wine glass, a plate, an apple, and a desk.", "boxes_value": [[78.23480227230002, 53.4394531328, 387.2038573922, 264.5281982464], [199.9565429585, 83.47094727680002, 281.51892091679997, 263.501892096], [345.4213866907, 80.19390868480002, 387.2038573922, 264.5281982464], [128.31256102530003, 53.4394531328, 181.264709501, 76.35900876800002], [96.6727905155, 11.267272960000014, 128.29510497959998, 84.45593262080001], [237.9977416914, 64.15747071999999, 273.3067627077, 74.15057372159998], [78.23480227230002, 133.476806656, 105.7615356492, 159.0957031424], [0, 81.89526369279997, 213.92938229780003, 153.52728273920002]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00046594.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Please mention the objects and their locations.", "boxes_value": [[40.7904052736, 210.60986327039998, 231.6464843776, 333.5751952896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046594_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Please mention the objects and their locations.", "boxes_value": [[40.7904052736, 31.609863270399984, 231.6464843776, 154.5751952896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046594.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Please mention the objects and their locations. For your reference, objects involved in this region include six traffic signs.", "boxes_value": [[40.7904052736, 210.60986327039998, 231.6464843776, 333.5751952896], [162.400268544, 317.2041015552, 231.7053833216, 356.2569580032], [166.8731689472, 248.7117920256, 231.6464843776, 288.8919677952], [80.6242065408, 210.60986327039998, 154.7498168832, 259.79602053120004], [51.8745727488, 268.10913085439995, 156.1353759744, 310.71398922239996], [82.009765632, 235.89569088000002, 153.710693376, 282.31079101439997], [40.7904052736, 301.361694336, 150.939636224, 333.5751952896]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046594_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Please mention the objects and their locations. For your reference, objects involved in this region include six traffic signs.", "boxes_value": [[40.7904052736, 31.609863270399984, 231.6464843776, 154.5751952896], [162.400268544, 138.2041015552, 231.7053833216, 177.2569580032], [166.8731689472, 69.7117920256, 231.6464843776, 109.8919677952], [80.6242065408, 31.609863270399984, 154.7498168832, 80.79602053120004], [51.8745727488, 89.10913085439995, 156.1353759744, 131.71398922239996], [82.009765632, 56.89569088000002, 153.710693376, 103.31079101439997], [40.7904052736, 122.36169433600003, 150.939636224, 154.5751952896]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046595.jpg", "text": "Can you break down the region in the image for me? Please mention the objects and their locations.", "boxes_value": [[572.858276352, 259.237976064, 701.8430175743999, 468.7564086784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046595_crop.jpg", "text": "Can you break down the region in the image for me? Please mention the objects and their locations.", "boxes_value": [[32.85827635199996, 53.23797606400001, 161.84301757439994, 262.7564086784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046595.jpg", "text": "Can you break down the region in the image for me? Please mention the objects and their locations. For your reference, objects involved in this region include a flower, a vase, a chair, a desk, and a pillow.", "boxes_value": [[572.858276352, 259.237976064, 701.8430175743999, 468.7564086784], [666.486816384, 259.237976064, 701.8430175743999, 275.6065673728], [671.7247314432, 275.6065673728, 695.9503173887999, 291.320495616], [619.9998779136, 313.5817870848, 701.1883544832, 392.1512451072], [572.858276352, 388.877502464, 699.8787841536, 468.7564086784], [623.1657715199999, 395.0384521728, 700.9117431552, 434.4859619328]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046595_crop.jpg", "text": "Can you break down the region in the image for me? Please mention the objects and their locations. For your reference, objects involved in this region include a flower, a vase, a chair, a desk, and a pillow.", "boxes_value": [[32.85827635199996, 53.23797606400001, 161.84301757439994, 262.7564086784], [126.48681638400001, 53.23797606400001, 161.84301757439994, 69.60656737279999], [131.72473144319997, 69.60656737279999, 155.95031738879993, 85.32049561600002], [79.99987791360002, 107.5817870848, 161.1883544832, 186.1512451072], [32.85827635199996, 182.87750246399997, 159.8787841536, 262.7564086784], [83.16577151999991, 189.03845217280002, 160.91174315520004, 228.48596193280002]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046596.jpg", "text": "What can I find in the bbox of the provided image ? Include the coordinates for each mentioned object.", "boxes_value": [[400.5339355648, 48.046508787, 511.9713134592, 255.78857424260002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046596_crop.jpg", "text": "What can I find in the bbox of the provided image ? Include the coordinates for each mentioned object.", "boxes_value": [[28.533935564800004, 48.046508787, 139.97131345920002, 255.78857424260002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046596.jpg", "text": "What can I find in the bbox of the provided image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three helmets, and two boots.", "boxes_value": [[400.5339355648, 48.046508787, 511.9713134592, 255.78857424260002], [400.5339355648, 115.56103517, 423.9280395264, 142.44366455], [413.6674804736, 48.046508787, 433.367736832, 67.13116455000001], [488.4879760896, 126.13812255540002, 511.9713134592, 162.7763061334], [451.9761963008, 204.4782104568, 470.5322875904, 255.78857424260002], [435.9505005056, 229.6414184442, 463.9251708928, 280.6705932628]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046596_crop.jpg", "text": "What can I find in the bbox of the provided image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three helmets, and two boots.", "boxes_value": [[28.533935564800004, 48.046508787, 139.97131345920002, 255.78857424260002], [28.533935564800004, 115.56103517, 51.92803952640003, 142.44366455], [41.667480473599994, 48.046508787, 61.36773683199999, 67.13116455000001], [116.48797608960001, 126.13812255540002, 139.97131345920002, 162.7763061334], [79.97619630079998, 204.4782104568, 98.53228759040002, 255.78857424260002], [63.95050050560002, 229.6414184442, 91.9251708928, 280.6705932628]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046597.jpg", "text": "Can you analyze the content of the area within the photograph ? Please point out the objects and their coordinates.", "boxes_value": [[129.4819336192, 876.8754882445, 297.3304443392, 910.9765625207999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046597_crop.jpg", "text": "Can you analyze the content of the area within the photograph ? Please point out the objects and their coordinates.", "boxes_value": [[42.48193361919999, 8.875488244500048, 210.33044433920003, 42.97656252079992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046597.jpg", "text": "Can you analyze the content of the area within the photograph ? Please point out the objects and their coordinates. For your reference, objects involved in this region include five people.", "boxes_value": [[129.4819336192, 876.8754882445, 297.3304443392, 910.9765625207999], [129.4819336192, 879.6927490031001, 165.9083251712, 910.548095717], [171.4793701376, 879.0499267234001, 208.7628173824, 910.9765625207999], [197.4063720448, 878.1928711329, 223.2437133824, 910.7623291189001], [243.5924682752, 881.6522216361, 272.8497924608, 910.7104492176001], [271.4566040064, 876.8754882445, 297.3304443392, 910.7104492176001]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046597_crop.jpg", "text": "Can you analyze the content of the area within the photograph ? Please point out the objects and their coordinates. For your reference, objects involved in this region include five people.", "boxes_value": [[42.48193361919999, 8.875488244500048, 210.33044433920003, 42.97656252079992], [42.48193361919999, 11.692749003100062, 78.9083251712, 42.548095717000024], [84.4793701376, 11.049926723400063, 121.76281738239999, 42.97656252079992], [110.40637204480001, 10.192871132899995, 136.2437133824, 42.762329118900084], [156.5924682752, 13.652221636099966, 185.84979246080002, 42.71044921760006], [184.45660400640003, 8.875488244500048, 210.33044433920003, 42.71044921760006]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046600.jpg", "text": "Analyze and describe the region in the included photo . Provide the coordinates for all objects that you mention.", "boxes_value": [[532.6126709337, 275.2850341888, 770.3911133127, 511.7124023296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046600_crop.jpg", "text": "Analyze and describe the region in the included photo . Provide the coordinates for all objects that you mention.", "boxes_value": [[59.612670933699974, 59.28503418880001, 297.3911133127, 295.7124023296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046600.jpg", "text": "Analyze and describe the region in the included photo . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a lamp, a cabinet, a picture, a chair, and a stool.", "boxes_value": [[532.6126709337, 275.2850341888, 770.3911133127, 511.7124023296], [653.6794433829, 275.2850341888, 725.7009277545001, 373.529602048], [748.5999755526, 365.0347290112, 770.3911133127, 480.6383056896], [712.9168701251999, 238.8997802496, 729.9716797128, 310.7921142784], [532.6126709337, 359.6499633664, 734.0168457062999, 511.7124023296], [496.27917480959997, 468.6505126912, 680.6380615362, 512.1610107392]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046600_crop.jpg", "text": "Analyze and describe the region in the included photo . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a lamp, a cabinet, a picture, a chair, and a stool.", "boxes_value": [[59.612670933699974, 59.28503418880001, 297.3911133127, 295.7124023296], [180.6794433829, 59.28503418880001, 252.70092775450007, 157.52960204800002], [275.5999755526, 149.0347290112, 297.3911133127, 264.6383056896], [239.91687012519992, 22.899780249600013, 256.9716797128, 94.79211427839999], [59.612670933699974, 143.64996336640002, 261.01684570629993, 295.7124023296], [23.279174809599965, 252.6505126912, 207.63806153619998, 296]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046601.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for each element you describe.", "boxes_value": [[365.4003905876, 321.94982912, 423.646972678, 373.2389526528]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046601_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for each element you describe.", "boxes_value": [[15.400390587599986, 12.949829120000004, 73.646972678, 64.23895265279998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046601.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a desk, two people, a bracelet, and two bottles.", "boxes_value": [[365.4003905876, 321.94982912, 423.646972678, 373.2389526528], [232.9971313362, 310.5015868928, 497.2983398614, 512.1055908352], [315.4827880862, 194.9070434816, 472.2829589782, 346.0668945408], [389.93469237119996, 243.4135131648, 608.7780761722, 510.763488768], [405.9890136752, 354.3630981632, 423.646972678, 373.2389526528], [365.4003905876, 321.94982912, 390.70361328240006, 371.262023936], [412.4193115154, 313.9690551808, 433.2099609554, 354.9106445312]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00046601_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a desk, two people, a bracelet, and two bottles.", "boxes_value": [[15.400390587599986, 12.949829120000004, 73.646972678, 64.23895265279998], [0, 1.5015868927999918, 88, 77], [0, 0, 88, 37.06689454079998], [39.93469237119996, 0, 88, 77], [55.9890136752, 45.36309816319999, 73.646972678, 64.23895265279998], [15.400390587599986, 12.949829120000004, 40.703613282400056, 62.26202393599999], [62.41931151540001, 4.969055180800012, 83.20996095539999, 45.91064453119998]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00046603.jpg", "text": "What is taking place within the specified area in this capture ? Please mention the objects and their locations.", "boxes_value": [[377.27636719000003, 114.6658324992, 420.9673257448, 281.48803712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046603_crop.jpg", "text": "What is taking place within the specified area in this capture ? Please mention the objects and their locations.", "boxes_value": [[11.27636719000003, 42.66583249919999, 54.96732574480001, 209.48803712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046603.jpg", "text": "What is taking place within the specified area in this capture ? Please mention the objects and their locations. For your reference, objects involved in this region include four people, and two sneakers.", "boxes_value": [[377.27636719000003, 114.6658324992, 420.9673257448, 281.48803712], [400.00292972, 130.6227417088, 447.8736572368, 285.8399657984], [377.27636719000003, 124.3366699008, 406.2889404148, 281.48803712], [391.29919436079996, 114.6658324992, 411.1243896664, 147.0631713792], [361.319457992, 119.5012207104, 391.29919436079996, 254.4096679936], [379.38946243000004, 176.0183484928, 420.9673257448, 217.266228736], [400.56849113, 273.462973952, 423.011186172, 286.8662501376]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00046603_crop.jpg", "text": "What is taking place within the specified area in this capture ? Please mention the objects and their locations. For your reference, objects involved in this region include four people, and two sneakers.", "boxes_value": [[11.27636719000003, 42.66583249919999, 54.96732574480001, 209.48803712], [34.00292972, 58.62274170879999, 65, 213.8399657984], [11.27636719000003, 52.336669900800004, 40.28894041479998, 209.48803712], [25.29919436079996, 42.66583249919999, 45.12438966640002, 75.06317137920001], [0, 47.501220710400005, 25.29919436079996, 182.4096679936], [13.389462430000037, 104.01834849279999, 54.96732574480001, 145.266228736], [34.568491129999984, 201.46297395200003, 57.01118617200001, 214.8662501376]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00046604.jpg", "text": "Describe what can be found within the bounds of in the image . Include the coordinates for each object you identify.", "boxes_value": [[125.87603757589999, 174.5845947392, 258.1744994779, 255.1267700224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046604_crop.jpg", "text": "Describe what can be found within the bounds of in the image . Include the coordinates for each object you identify.", "boxes_value": [[33.876037575899986, 20.584594739200014, 166.17449947789999, 101.1267700224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046604.jpg", "text": "Describe what can be found within the bounds of in the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include two chairs, a vase, a picture, and a cabinet.", "boxes_value": [[125.87603757589999, 174.5845947392, 258.1744994779, 255.1267700224], [200.7243041739, 183.459167488, 258.1744994779, 255.1267700224], [229.1592407503, 164.8894043136, 266.8790893492, 190.7129516544], [125.87603757589999, 217.3399657984, 142.7906494372, 251.7733764608], [174.60632325030002, 211.7017822208, 201.1864624075, 240.2955932672], [141.17675779729998, 174.5845947392, 174.0123901385, 244.8164673024]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046604_crop.jpg", "text": "Describe what can be found within the bounds of in the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include two chairs, a vase, a picture, and a cabinet.", "boxes_value": [[33.876037575899986, 20.584594739200014, 166.17449947789999, 101.1267700224], [108.72430417390001, 29.45916748799999, 166.17449947789999, 101.1267700224], [137.1592407503, 10.88940431360001, 174.8790893492, 36.7129516544], [33.876037575899986, 63.33996579839999, 50.79064943719999, 97.7733764608], [82.60632325030002, 57.7017822208, 109.18646240749999, 86.29559326719999], [49.176757797299985, 20.584594739200014, 82.0123901385, 90.81646730240001]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046605.jpg", "text": "I'd like some information about the bounding box in the photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[259.721435535, 144.1713867264, 442.6529541091, 498.2655029248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046605_crop.jpg", "text": "I'd like some information about the bounding box in the photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[46.72143553500001, 89.17138672639999, 229.6529541091, 443.2655029248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046605.jpg", "text": "I'd like some information about the bounding box in the photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a potted plant, two cabinets, a handbag, and a street lights.", "boxes_value": [[259.721435535, 144.1713867264, 442.6529541091, 498.2655029248], [378.3839111023, 165.3561401344, 399.01464844310004, 197.5915527168], [325.0053100621, 389.4060668928, 365.1561279136, 461.677612288], [395.6320800834, 379.89733888, 422.3233642404, 439.4394531328], [259.721435535, 473.2959594496, 278.2471923517, 498.2655029248], [415.8806152003, 144.1713867264, 442.6529541091, 454.4158325248]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046605_crop.jpg", "text": "I'd like some information about the bounding box in the photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a potted plant, two cabinets, a handbag, and a street lights.", "boxes_value": [[46.72143553500001, 89.17138672639999, 229.6529541091, 443.2655029248], [165.38391110229998, 110.3561401344, 186.01464844310004, 142.5915527168], [112.00531006210002, 334.4060668928, 152.15612791360002, 406.677612288], [182.6320800834, 324.89733888, 209.3233642404, 384.4394531328], [46.72143553500001, 418.2959594496, 65.2471923517, 443.2655029248], [202.88061520029999, 89.17138672639999, 229.6529541091, 399.4158325248]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046606.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give coordinates for the items you reference.", "boxes_value": [[147.73352049300001, 157.100891136, 324.554565408, 442.5615234559999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046606_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give coordinates for the items you reference.", "boxes_value": [[44.733520493000015, 72.100891136, 221.55456540799997, 357.5615234559999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046606.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give coordinates for the items you reference. For your reference, objects involved in this region include two people, and four sneakers.", "boxes_value": [[147.73352049300001, 157.100891136, 324.554565408, 442.5615234559999], [147.73352049300001, 157.100891136, 242.699462898, 442.5615234559999], [260.128051767, 44.3294677504, 358.26556394700003, 439.3476562432], [310.048095675, 396.9547119104, 333.983764644, 438.5399170048], [295.29980469599997, 343.7643432448, 324.554565408, 389.2179565568], [204.020080563, 361.1005249024, 216.543029811, 402.6660156416], [187.50048830699998, 407.1956176896, 206.950988757, 441.0341796864]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046606_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give coordinates for the items you reference. For your reference, objects involved in this region include two people, and four sneakers.", "boxes_value": [[44.733520493000015, 72.100891136, 221.55456540799997, 357.5615234559999], [44.733520493000015, 72.100891136, 139.699462898, 357.5615234559999], [157.128051767, 0, 255.26556394700003, 354.3476562432], [207.048095675, 311.9547119104, 230.98376464400002, 353.5399170048], [192.29980469599997, 258.7643432448, 221.55456540799997, 304.2179565568], [101.020080563, 276.1005249024, 113.543029811, 317.6660156416], [84.50048830699998, 322.1956176896, 103.950988757, 356.0341796864]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046607.jpg", "text": "Kindly give an overview of the section in photo . Please mention the objects and their locations.", "boxes_value": [[318.593139654, 164.0935668736, 507.5385742335, 376.7149047808]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046607_crop.jpg", "text": "Kindly give an overview of the section in photo . Please mention the objects and their locations.", "boxes_value": [[47.593139654000026, 54.0935668736, 236.53857423350001, 266.7149047808]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046607.jpg", "text": "Kindly give an overview of the section in photo . Please mention the objects and their locations. For your reference, objects involved in this region include two stools, two people, and a stuffed toy.", "boxes_value": [[318.593139654, 164.0935668736, 507.5385742335, 376.7149047808], [455.8309326405, 259.2005615104, 521.0690917809001, 407.411499008], [318.593139654, 276.8145752064, 369.2276611191, 376.7149047808], [443.1228027057, 164.0935668736, 507.5385742335, 277.9712524288], [354.5512694937, 161.7929687552, 473.03015137619997, 481.57080079360003], [320.1798095436, 193.9663696384, 350.4157715211, 255.3145752064]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046607_crop.jpg", "text": "Kindly give an overview of the section in photo . Please mention the objects and their locations. For your reference, objects involved in this region include two stools, two people, and a stuffed toy.", "boxes_value": [[47.593139654000026, 54.0935668736, 236.53857423350001, 266.7149047808], [184.8309326405, 149.2005615104, 250.06909178090007, 297.411499008], [47.593139654000026, 166.81457520639998, 98.22766111909999, 266.7149047808], [172.1228027057, 54.0935668736, 236.53857423350001, 167.97125242880003], [83.55126949369998, 51.79296875520001, 202.03015137619997, 319], [49.179809543600015, 83.96636963840001, 79.4157715211, 145.3145752064]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046609.jpg", "text": "Tell me what you see within the designated area in the picture . Specify the location of each mentioned object.", "boxes_value": [[0, 184.1857299968, 176.9655761688, 511.9299316224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046609_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Specify the location of each mentioned object.", "boxes_value": [[0, 82.1857299968, 176.9655761688, 409.9299316224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046609.jpg", "text": "Tell me what you see within the designated area in the picture . Specify the location of each mentioned object. For your reference, objects involved in this region include a vase, two cabinets, a person, and a refrigerator.", "boxes_value": [[0, 184.1857299968, 176.9655761688, 511.9299316224], [49.3889770808, 349.5849609216, 86.67010499579999, 420.841491712], [158.6793823078, 223.6942749184, 176.9655761688, 366.3820801024], [0, 235.8972778496, 54.426391596, 379.5793457152], [0.1676025594, 445.9147949056, 66.7669678005, 511.9299316224], [0.1430664308, 184.1857299968, 102.3398437497, 380.2666626048]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046609_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Specify the location of each mentioned object. For your reference, objects involved in this region include a vase, two cabinets, a person, and a refrigerator.", "boxes_value": [[0, 82.1857299968, 176.9655761688, 409.9299316224], [49.3889770808, 247.5849609216, 86.67010499579999, 318.841491712], [158.6793823078, 121.6942749184, 176.9655761688, 264.3820801024], [0, 133.8972778496, 54.426391596, 277.5793457152], [0.1676025594, 343.9147949056, 66.7669678005, 409.9299316224], [0.1430664308, 82.1857299968, 102.3398437497, 278.2666626048]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046612.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each object you identify.", "boxes_value": [[26.247497530599997, 121.8638916096, 88.65875246610001, 356.4246825984]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046612_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each object you identify.", "boxes_value": [[16.247497530599997, 58.8638916096, 78.65875246610001, 293.4246825984]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046612.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a cabinet, four storage boxes, and a plate.", "boxes_value": [[26.247497530599997, 121.8638916096, 88.65875246610001, 356.4246825984], [0.2211913941, 138.6121216, 102.3507690473, 303.2621459968], [61.9647216929, 222.697143552, 88.65875246610001, 243.5881347584], [26.247497530599997, 330.0183105536, 53.1784668093, 356.4246825984], [71.0123291219, 125.1072997888, 89.96185301540001, 141.053955072], [40.3352051076, 121.8638916096, 71.2825927683, 140.6485595648], [35.6055908175, 149.6802368, 59.4321899305, 170.8593750016]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00046612_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a cabinet, four storage boxes, and a plate.", "boxes_value": [[16.247497530599997, 58.8638916096, 78.65875246610001, 293.4246825984], [0, 75.6121216, 92.3507690473, 240.26214599679997], [51.9647216929, 159.697143552, 78.65875246610001, 180.5881347584], [16.247497530599997, 267.0183105536, 43.1784668093, 293.4246825984], [61.012329121899995, 62.107299788800006, 79.96185301540001, 78.05395507200001], [30.335205107599997, 58.8638916096, 61.282592768300006, 77.6485595648], [25.6055908175, 86.68023679999999, 49.4321899305, 107.8593750016]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00046613.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Include the coordinates for each mentioned object.", "boxes_value": [[21.300305950000002, 182.3959350784, 309.932128891, 445.699768064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046613_crop.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Include the coordinates for each mentioned object.", "boxes_value": [[21.300305950000002, 66.39593507839999, 309.932128891, 329.699768064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046613.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a chair, two lamps, a tie, and a wine glass.", "boxes_value": [[21.300305950000002, 182.3959350784, 309.932128891, 445.699768064], [68.956298796, 350.234191872, 101.655151367, 396.934082048], [287.499633806, 199.76843264, 309.932128891, 218.8837280256], [56.379821805999995, 182.3959350784, 76.519409197, 208.2621459968], [21.300305950000002, 357.1598510592, 39.066695514, 401.6386328576], [45.207641633, 398.0147704832, 68.823059074, 445.699768064]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046613_crop.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a chair, two lamps, a tie, and a wine glass.", "boxes_value": [[21.300305950000002, 66.39593507839999, 309.932128891, 329.699768064], [68.956298796, 234.234191872, 101.655151367, 280.934082048], [287.499633806, 83.76843263999999, 309.932128891, 102.88372802559999], [56.379821805999995, 66.39593507839999, 76.519409197, 92.2621459968], [21.300305950000002, 241.15985105919998, 39.066695514, 285.6386328576], [45.207641633, 282.0147704832, 68.823059074, 329.699768064]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046614.jpg", "text": "Describe the visual elements within the selected area of the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[198.466308608, 145.8977050624, 482.1802368, 411.4536742912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046614_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[71.46630860799999, 66.89770506240001, 355.1802368, 332.4536742912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046614.jpg", "text": "Describe the visual elements within the selected area of the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a flower, a blackboard, a tape measur, a scissors, and a cup.", "boxes_value": [[198.466308608, 145.8977050624, 482.1802368, 411.4536742912], [321.8922119168, 287.8928833024, 482.1802368, 379.5468749824], [219.475219712, 145.8977050624, 375.4532470784, 303.2133789184], [225.9757080064, 272.8206787072, 252.1912231424, 323.7295532032], [239.39501952, 362.2993774592, 267.5720825344, 394.1121826304], [198.466308608, 350.3345336832, 238.8580932608, 411.4536742912]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046614_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a flower, a blackboard, a tape measur, a scissors, and a cup.", "boxes_value": [[71.46630860799999, 66.89770506240001, 355.1802368, 332.4536742912], [194.89221191680002, 208.89288330239998, 355.1802368, 300.5468749824], [92.47521971200001, 66.89770506240001, 248.4532470784, 224.2133789184], [98.9757080064, 193.8206787072, 125.19122314239999, 244.7295532032], [112.39501952, 283.2993774592, 140.5720825344, 315.1121826304], [71.46630860799999, 271.3345336832, 111.85809326079999, 332.4536742912]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046616.jpg", "text": "What is taking place within the specified area in this capture ? Provide the coordinates for all objects that you mention.", "boxes_value": [[96.3416748032, 173.39135740199998, 316.9977417216, 368.207824734]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046616_crop.jpg", "text": "What is taking place within the specified area in this capture ? Provide the coordinates for all objects that you mention.", "boxes_value": [[55.34167480319999, 49.39135740199998, 275.9977417216, 244.20782473399998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046616.jpg", "text": "What is taking place within the specified area in this capture ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a person, two tents, a barrel, and two boots.", "boxes_value": [[96.3416748032, 173.39135740199998, 316.9977417216, 368.207824734], [96.3416748032, 173.39135740199998, 166.8724365312, 340.35229490399996], [0, 177.535095192, 201.8941040128, 286.761596706], [288.444457984, 219.89373777, 316.9977417216, 253.07733155399998], [131.714050304, 338.75152587, 164.6807250944, 368.207824734], [163.3739623936, 305.37542727, 198.0257568256, 333.096862788], [275.7034912256, 301.91027832, 302.2698364416, 337.13958742200003]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00046616_crop.jpg", "text": "What is taking place within the specified area in this capture ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a person, two tents, a barrel, and two boots.", "boxes_value": [[55.34167480319999, 49.39135740199998, 275.9977417216, 244.20782473399998], [55.34167480319999, 49.39135740199998, 125.87243653120001, 216.35229490399996], [0, 53.535095192, 160.8941040128, 162.76159670599998], [247.444457984, 95.89373777, 275.9977417216, 129.07733155399998], [90.71405030400001, 214.75152587000002, 123.6807250944, 244.20782473399998], [122.37396239360001, 181.37542727, 157.0257568256, 209.096862788], [234.70349122559998, 177.91027831999997, 261.2698364416, 213.13958742200003]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00046618.jpg", "text": "Explain the content within the rectangular region of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[177.3934936576, 159.5488891315, 393.521728512, 409.48645020410004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046618_crop.jpg", "text": "Explain the content within the rectangular region of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[54.393493657600004, 62.548889131500005, 270.521728512, 312.48645020410004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046618.jpg", "text": "Explain the content within the rectangular region of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a baseball glove, a person, two sneakers, and a helmet.", "boxes_value": [[177.3934936576, 159.5488891315, 393.521728512, 409.48645020410004], [177.3934936576, 318.3287353573, 210.264648448, 368.4711914219], [195.7790527488, 160.6585693399, 395.2345581056, 409.6994628781], [196.5440063488, 365.88061524600005, 224.2110595584, 409.48645020410004], [343.6006469632, 347.5361328446, 393.521728512, 375.6877441509], [254.679199232, 159.5488891315, 312.1425170944, 197.2592163006]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046618_crop.jpg", "text": "Explain the content within the rectangular region of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a baseball glove, a person, two sneakers, and a helmet.", "boxes_value": [[54.393493657600004, 62.548889131500005, 270.521728512, 312.48645020410004], [54.393493657600004, 221.3287353573, 87.264648448, 271.4711914219], [72.77905274880001, 63.6585693399, 272.2345581056, 312.6994628781], [73.5440063488, 268.88061524600005, 101.2110595584, 312.48645020410004], [220.6006469632, 250.5361328446, 270.521728512, 278.6877441509], [131.679199232, 62.548889131500005, 189.14251709439998, 100.2592163006]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046619.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[239.17822266000002, 269.0369872896, 520.8687743999999, 505.1733398528]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046619_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[71.17822266000002, 59.036987289600006, 352, 295.1733398528]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046619.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four chairs, and a desk.", "boxes_value": [[239.17822266000002, 269.0369872896, 520.8687743999999, 505.1733398528], [259.11383056, 269.0369872896, 388.320495628, 416.7813110272], [311.464843768, 271.2647094784, 466.29003907199996, 445.0253906432], [374.954284692, 281.2893676544, 520.8687743999999, 475.0993652224], [475.200866712, 313.5910034432, 519.7548828160001, 505.1733398528], [239.17822266000002, 312.511230464, 270.275085444, 394.345153792]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046619_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four chairs, and a desk.", "boxes_value": [[71.17822266000002, 59.036987289600006, 352, 295.1733398528], [91.11383056, 59.036987289600006, 220.320495628, 206.7813110272], [143.46484376799998, 61.26470947839999, 298.29003907199996, 235.0253906432], [206.954284692, 71.2893676544, 352, 265.0993652224], [307.200866712, 103.59100344320001, 351.7548828160001, 295.1733398528], [71.17822266000002, 102.511230464, 102.27508544400001, 184.34515379200002]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046620.jpg", "text": "What does the selected region in the image encompass? Provide the coordinates for each element you describe.", "boxes_value": [[590.6225586254, 343.9594116096, 686.356567365, 511.3510131712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046620_crop.jpg", "text": "What does the selected region in the image encompass? Provide the coordinates for each element you describe.", "boxes_value": [[24.62255862539996, 41.95941160960001, 120, 209.3510131712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046620.jpg", "text": "What does the selected region in the image encompass? Provide the coordinates for each element you describe. For your reference, objects involved in this region include four chairs, and a blackboard.", "boxes_value": [[590.6225586254, 343.9594116096, 686.356567365, 511.3510131712], [590.6225586254, 430.8563232256, 646.1092529094, 511.3510131712], [627.3531494412, 445.3141479424, 686.356567365, 511.3510131712], [596.4768066166, 343.9594116096, 667.3928222612, 436.1309203968], [590.6225586254, 430.8563232256, 646.1092529094, 511.3510131712], [627.3531494412, 445.3141479424, 686.356567365, 511.3510131712]], "boxes_seq": [[0], [0], [1, 2, 4, 5], [3]]}, {"image_path": "objects365_v1_00046620_crop.jpg", "text": "What does the selected region in the image encompass? Provide the coordinates for each element you describe. For your reference, objects involved in this region include four chairs, and a blackboard.", "boxes_value": [[24.62255862539996, 41.95941160960001, 120, 209.3510131712], [24.62255862539996, 128.85632322560002, 80.10925290939997, 209.3510131712], [61.3531494412, 143.31414794239998, 120, 209.3510131712], [30.476806616600015, 41.95941160960001, 101.39282226119997, 134.13092039679998], [24.62255862539996, 128.85632322560002, 80.10925290939997, 209.3510131712], [61.3531494412, 143.31414794239998, 120, 209.3510131712]], "boxes_seq": [[0], [0], [1, 2, 4, 5], [3]]}, {"image_path": "objects365_v1_00046623.jpg", "text": "Analyze and describe the region in the included photo . Please mention the objects and their locations.", "boxes_value": [[284.7991332864, 490.89367678959997, 511.7120971776, 581.4500732456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046623_crop.jpg", "text": "Analyze and describe the region in the included photo . Please mention the objects and their locations.", "boxes_value": [[56.79913328639998, 22.893676789599965, 283.7120971776, 113.45007324560004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046623.jpg", "text": "Analyze and describe the region in the included photo . Please mention the objects and their locations. For your reference, objects involved in this region include three people, two suvs, and a truck.", "boxes_value": [[284.7991332864, 490.89367678959997, 511.7120971776, 581.4500732456], [387.8621826048, 505.7314453363, 420.5879516672, 581.4500732456], [284.7991332864, 521.2421875113, 312.277343744, 573.7741698911001], [267.0190429696, 511.13989257509996, 297.3259277312, 575.7946777265], [420.324157696, 517.0368652625, 438.9044189696, 534.5183105714], [426.210266112, 513.0721435603, 463.6398925824, 535.8234863479], [484.1445312512, 490.89367678959997, 511.7120971776, 524.3176269581]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046623_crop.jpg", "text": "Analyze and describe the region in the included photo . Please mention the objects and their locations. For your reference, objects involved in this region include three people, two suvs, and a truck.", "boxes_value": [[56.79913328639998, 22.893676789599965, 283.7120971776, 113.45007324560004], [159.8621826048, 37.7314453363, 192.5879516672, 113.45007324560004], [56.79913328639998, 53.24218751130002, 84.277343744, 105.77416989110009], [39.01904296959998, 43.139892575099964, 69.32592773120001, 107.79467772650003], [192.324157696, 49.03686526249999, 210.9044189696, 66.51831057139998], [198.210266112, 45.07214356029999, 235.63989258240002, 67.82348634790003], [256.1445312512, 22.893676789599965, 283.7120971776, 56.31762695810005]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046625.jpg", "text": "Could you please provide a description of the rectangular area in ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0.9946289279999999, 198.213928224, 617.974731456, 479.511047376]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046625_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0.9946289279999999, 71.213928224, 617.974731456, 352.511047376]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046625.jpg", "text": "Could you please provide a description of the rectangular area in ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a desk, a chair, a stool, a carpet, and a dog.", "boxes_value": [[0.9946289279999999, 198.213928224, 617.974731456, 479.511047376], [0.9946289279999999, 198.213928224, 151.338439936, 479.466796896], [431.857910144, 60.337646496, 640.872436544, 339.75708009600004], [303.375061056, 241.850280768, 475.86096192, 394.760864256], [113.55895993600001, 234.199462896, 617.974731456, 479.511047376], [227.752807616, 247.203186048, 311.543090816, 409.69506835199996]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046625_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a desk, a chair, a stool, a carpet, and a dog.", "boxes_value": [[0.9946289279999999, 71.213928224, 617.974731456, 352.511047376], [0.9946289279999999, 71.213928224, 151.338439936, 352.466796896], [431.857910144, 0, 640, 212.75708009600004], [303.375061056, 114.850280768, 475.86096192, 267.760864256], [113.55895993600001, 107.199462896, 617.974731456, 352.511047376], [227.752807616, 120.20318604799999, 311.543090816, 282.69506835199996]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046626.jpg", "text": "Please provide details for the area within the bounding box in . Include the coordinates for each mentioned object.", "boxes_value": [[158.5513305664, 165.910766592, 773.0339355084, 392.4876708864]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046626_crop.jpg", "text": "Please provide details for the area within the bounding box in . Include the coordinates for each mentioned object.", "boxes_value": [[154.5513305664, 56.91076659199999, 768, 283.4876708864]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046626.jpg", "text": "Please provide details for the area within the bounding box in . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two cabinets, and three candies.", "boxes_value": [[158.5513305664, 165.910766592, 773.0339355084, 392.4876708864], [623.7528076532001, 205.1798095872, 704.004516628, 336.8604736512], [696.6574706772, 211.396484352, 773.0339355084, 392.4876708864], [158.5513305664, 165.910766592, 277.3970947552, 342.089233408], [361.007202134, 165.910766592, 466.7142333976, 359.4084472832], [235.49194335279998, 10.7175903232, 293.0335693728, 330.5810546688]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046626_crop.jpg", "text": "Please provide details for the area within the bounding box in . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two cabinets, and three candies.", "boxes_value": [[154.5513305664, 56.91076659199999, 768, 283.4876708864], [619.7528076532001, 96.1798095872, 700.004516628, 227.86047365119998], [692.6574706772, 102.39648435199999, 768, 283.4876708864], [154.5513305664, 56.91076659199999, 273.3970947552, 233.08923340799998], [357.007202134, 56.91076659199999, 462.7142333976, 250.40844728320002], [231.49194335279998, 0, 289.0335693728, 221.5810546688]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046628.jpg", "text": "Can you provide a description of the area in the image ? Include the coordinates for each mentioned object.", "boxes_value": [[340.973266575, 226.93316650120002, 407.184936525, 274.0382690324]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046628_crop.jpg", "text": "Can you provide a description of the area in the image ? Include the coordinates for each mentioned object.", "boxes_value": [[16.973266575000025, 11.93316650120002, 83.18493652500001, 59.038269032400024]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046628.jpg", "text": "Can you provide a description of the area in the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include five people.", "boxes_value": [[340.973266575, 226.93316650120002, 407.184936525, 274.0382690324], [260.32580565, 214.8892212054, 375.19140622500004, 391.751770024], [374.023193325, 225.1586303862, 468.567260775, 347.4138793914], [372.69396975, 226.93316650120002, 407.184936525, 270.8680419958], [340.973266575, 230.05212402680002, 364.7615967, 257.3733520408], [348.812622075, 237.7511596756, 373.3309326, 274.0382690324]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046628_crop.jpg", "text": "Can you provide a description of the area in the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include five people.", "boxes_value": [[16.973266575000025, 11.93316650120002, 83.18493652500001, 59.038269032400024], [0, 0, 51.19140622500004, 70], [50.02319332500002, 10.158630386200002, 99, 70], [48.69396975000001, 11.93316650120002, 83.18493652500001, 55.86804199580001], [16.973266575000025, 15.052124026800016, 40.761596699999984, 42.3733520408], [24.81262207499998, 22.751159675600007, 49.33093259999998, 59.038269032400024]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046632.jpg", "text": "In the image , could you provide a description for the coordinates ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[2.313598661, 260.4329833984, 406.2260741792, 509.4256591872]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046632_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[2.313598661, 62.43298339839998, 406.2260741792, 311.4256591872]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046632.jpg", "text": "In the image , could you provide a description for the coordinates ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three street lights, and three potted plants.", "boxes_value": [[2.313598661, 260.4329833984, 406.2260741792, 509.4256591872], [26.4558715421, 252.1177368064, 40.7631225628, 331.118835456], [290.84436037800003, 260.4329833984, 299.2991943788, 308.8771362304], [326.2634277056, 249.4644775424, 336.5463867392, 312.3048095744], [340.3452148514, 383.9382934528, 406.2260741792, 470.2108764672], [81.5275268137, 375.311035136, 290.93457031210005, 509.4256591872], [2.313598661, 354.1350708224, 80.7432251153, 453.7406616064]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046632_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three street lights, and three potted plants.", "boxes_value": [[2.313598661, 62.43298339839998, 406.2260741792, 311.4256591872], [26.4558715421, 54.11773680639999, 40.7631225628, 133.118835456], [290.84436037800003, 62.43298339839998, 299.2991943788, 110.87713623040003], [326.2634277056, 51.464477542400004, 336.5463867392, 114.3048095744], [340.3452148514, 185.9382934528, 406.2260741792, 272.2108764672], [81.5275268137, 177.311035136, 290.93457031210005, 311.4256591872], [2.313598661, 156.1350708224, 80.7432251153, 255.7406616064]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046634.jpg", "text": "In the provided image , please explain the content within the region . Remember to mention the objects and their corresponding locations.", "boxes_value": [[368.970214848, 22.751708976, 640.322387712, 330.039733872]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046634_crop.jpg", "text": "In the provided image , please explain the content within the region . Remember to mention the objects and their corresponding locations.", "boxes_value": [[67.97021484800001, 22.751708976, 339, 330.039733872]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046634.jpg", "text": "In the provided image , please explain the content within the region . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a picture, a cabinet, and three pillows.", "boxes_value": [[368.970214848, 22.751708976, 640.322387712, 330.039733872], [449.642456064, 22.751708976, 546.449218752, 113.691345216], [597.7861328, 127.62567139199999, 640.322387712, 280.16961672], [368.970214848, 275.035888656, 496.579101568, 330.039733872], [456.242919936, 268.43542478399996, 588.985473664, 317.572204608], [349.168823232, 295.570678704, 395.372070336, 325.639404288]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046634_crop.jpg", "text": "In the provided image , please explain the content within the region . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a picture, a cabinet, and three pillows.", "boxes_value": [[67.97021484800001, 22.751708976, 339, 330.039733872], [148.642456064, 22.751708976, 245.44921875199998, 113.691345216], [296.7861328, 127.62567139199999, 339, 280.16961672], [67.97021484800001, 275.035888656, 195.579101568, 330.039733872], [155.24291993600002, 268.43542478399996, 287.985473664, 317.572204608], [48.16882323200002, 295.570678704, 94.37207033599998, 325.639404288]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046635.jpg", "text": "Can you share some insights about the rectangular region in the image ? Include the coordinates for each object you identify.", "boxes_value": [[377.2893066408, 327.8247680512, 509.1500243979, 387.3311157248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046635_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Include the coordinates for each object you identify.", "boxes_value": [[33.28930664080002, 15.82476805120001, 165.15002439789998, 75.33111572479999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046635.jpg", "text": "Can you share some insights about the rectangular region in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include five people.", "boxes_value": [[377.2893066408, 327.8247680512, 509.1500243979, 387.3311157248], [377.2893066408, 329.756774912, 398.9279785203, 382.5010376192], [428.5201416138, 327.8247680512, 454.0229492328, 384.23986816], [451.9620361584, 327.8247680512, 479.3968505763, 387.3311157248], [484.93530270540003, 331.1735839744, 506.0588378463, 379.4742431744], [496.14099119910003, 328.5975952384, 509.1500243979, 372.0037231616]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046635_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include five people.", "boxes_value": [[33.28930664080002, 15.82476805120001, 165.15002439789998, 75.33111572479999], [33.28930664080002, 17.756774912000026, 54.92797852029997, 70.50103761920002], [84.52014161379998, 15.82476805120001, 110.02294923279999, 72.23986816000001], [107.9620361584, 15.82476805120001, 135.39685057629998, 75.33111572479999], [140.93530270540003, 19.173583974400003, 162.05883784629998, 67.47424317439999], [152.14099119910003, 16.597595238400004, 165.15002439789998, 60.00372316160002]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046637.jpg", "text": "In the photo , can you delve into the details of the region ? Include the coordinates for each mentioned object.", "boxes_value": [[567.716796878, 226.0077514752, 771.4154052990001, 378.895874048]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046637_crop.jpg", "text": "In the photo , can you delve into the details of the region ? Include the coordinates for each mentioned object.", "boxes_value": [[51.71679687799997, 39.007751475199996, 254, 191.895874048]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046637.jpg", "text": "In the photo , can you delve into the details of the region ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a bed, a lamp, a desk, and two chairs.", "boxes_value": [[567.716796878, 226.0077514752, 771.4154052990001, 378.895874048], [588.68823244, 226.0077514752, 729.642578126, 293.3855590912], [728.889038108, 237.9683837952, 766.562866223, 278.9435424768], [303.89721677700004, 312.181823744, 769.701293977, 511.4072876032], [567.716796878, 288.6731567616, 657.009399431, 350.9919433728], [653.288818336, 302.625122048, 771.4154052990001, 378.895874048]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046637_crop.jpg", "text": "In the photo , can you delve into the details of the region ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a bed, a lamp, a desk, and two chairs.", "boxes_value": [[51.71679687799997, 39.007751475199996, 254, 191.895874048], [72.68823243999998, 39.007751475199996, 213.642578126, 106.38555909119998], [212.88903810800002, 50.9683837952, 250.56286622300001, 91.94354247680002], [0, 125.18182374399998, 253.70129397699998, 230], [51.71679687799997, 101.67315676160001, 141.00939943100002, 163.9919433728], [137.28881833599996, 115.62512204799998, 254, 191.895874048]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046638.jpg", "text": "Regarding the coordinates in image , can you provide a description? Give coordinates for the items you reference.", "boxes_value": [[404.7578125056, 302.6406860288, 619.9298095872, 512.0278320128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046638_crop.jpg", "text": "Regarding the coordinates in image , can you provide a description? Give coordinates for the items you reference.", "boxes_value": [[54.757812505599986, 52.64068602880002, 269.9298095872, 262]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046638.jpg", "text": "Regarding the coordinates in image , can you provide a description? Give coordinates for the items you reference. For your reference, objects involved in this region include two people, a handbag, and two hats.", "boxes_value": [[404.7578125056, 302.6406860288, 619.9298095872, 512.0278320128], [370.37707522560004, 302.1833496064, 434.96936033279997, 499.1907958784], [447.10937502720003, 308.6362914816, 521.948364288, 512.0278320128], [595.4906006016, 359.2775878656, 619.9298095872, 378.198303232], [478.95410158080006, 308.7223510528, 502.87536622080006, 333.4544677888], [404.7578125056, 302.6406860288, 424.01635745280004, 323.1156616192]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046638_crop.jpg", "text": "Regarding the coordinates in image , can you provide a description? Give coordinates for the items you reference. For your reference, objects involved in this region include two people, a handbag, and two hats.", "boxes_value": [[54.757812505599986, 52.64068602880002, 269.9298095872, 262], [20.377075225600038, 52.183349606399986, 84.96936033279997, 249.19079587840002], [97.10937502720003, 58.63629148159998, 171.948364288, 262], [245.49060060160002, 109.27758786560003, 269.9298095872, 128.198303232], [128.95410158080006, 58.72235105279998, 152.87536622080006, 83.4544677888], [54.757812505599986, 52.64068602880002, 74.01635745280004, 73.11566161920001]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046640.jpg", "text": "What's the story in the section of the included visual ? Give coordinates for the items you reference.", "boxes_value": [[386.5679931621, 85.2762450944, 635.7476806328999, 133.6008300544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046640_crop.jpg", "text": "What's the story in the section of the included visual ? Give coordinates for the items you reference.", "boxes_value": [[62.56799316209998, 12.276245094399997, 311.7476806328999, 60.60083005440001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046640.jpg", "text": "What's the story in the section of the included visual ? Give coordinates for the items you reference. For your reference, objects involved in this region include five lamps.", "boxes_value": [[386.5679931621, 85.2762450944, 635.7476806328999, 133.6008300544], [386.5679931621, 85.2762450944, 466.3350830131, 96.8787231232], [583.0037841846, 80.3240966656, 642.6737060617, 92.0449828864], [413.0509033079, 124.5437622272, 453.5412597883, 133.6008300544], [487.1055908399, 121.8799438336, 542.5134277042, 131.4697265664], [586.2003174141, 120.2816162304, 635.7476806328999, 129.8714599424]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046640_crop.jpg", "text": "What's the story in the section of the included visual ? Give coordinates for the items you reference. For your reference, objects involved in this region include five lamps.", "boxes_value": [[62.56799316209998, 12.276245094399997, 311.7476806328999, 60.60083005440001], [62.56799316209998, 12.276245094399997, 142.33508301310002, 23.878723123200004], [259.0037841846, 7.3240966655999955, 318.67370606170005, 19.044982886400007], [89.0509033079, 51.543762227200006, 129.5412597883, 60.60083005440001], [163.1055908399, 48.879943833599995, 218.5134277042, 58.4697265664], [262.2003174141, 47.281616230400004, 311.7476806328999, 56.87145994240001]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046646.jpg", "text": "Could you please share some information on the region in this photograph ? Include the coordinates for each mentioned object.", "boxes_value": [[104.4987182592, 30.7272605172, 242.2609196544, 574.1907959297]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046646_crop.jpg", "text": "Could you please share some information on the region in this photograph ? Include the coordinates for each mentioned object.", "boxes_value": [[34.498718259200004, 30.7272605172, 172.2609196544, 574.1907959297]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046646.jpg", "text": "Could you please share some information on the region in this photograph ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, a ring, a handbag, a sneakers, and a slippers.", "boxes_value": [[104.4987182592, 30.7272605172, 242.2609196544, 574.1907959297], [79.3951415808, 151.8140258703, 470.6920165888, 681.8847656355999], [104.4987182592, 549.8885498111999, 120.4208374272, 574.1907959297], [227.0862905344, 30.7272605172, 242.2609196544, 118.5496054461], [127.7277807104, 352.807198185, 164.7448624128, 380.57000948740006], [163.6103926272, 245.7030047163, 180.7571617792, 267.9213534962]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046646_crop.jpg", "text": "Could you please share some information on the region in this photograph ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, a ring, a handbag, a sneakers, and a slippers.", "boxes_value": [[34.498718259200004, 30.7272605172, 172.2609196544, 574.1907959297], [9.3951415808, 151.8140258703, 206, 681.8847656355999], [34.498718259200004, 549.8885498111999, 50.4208374272, 574.1907959297], [157.0862905344, 30.7272605172, 172.2609196544, 118.5496054461], [57.7277807104, 352.807198185, 94.74486241279999, 380.57000948740006], [93.61039262720001, 245.7030047163, 110.7571617792, 267.9213534962]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046647.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Include the coordinates for each mentioned object.", "boxes_value": [[397.30358885199996, 147.8978882048, 494.0969238408, 264.7027588096]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046647_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Include the coordinates for each mentioned object.", "boxes_value": [[24.30358885199996, 29.89788820480001, 121.0969238408, 146.70275880960003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046647.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a vase, a flower, a cabinet, a picture, and a plate.", "boxes_value": [[397.30358885199996, 147.8978882048, 494.0969238408, 264.7027588096], [423.84289553720004, 147.8978882048, 451.5019531292, 194.918334976], [445.9700927763, 159.5147094528, 494.0969238408, 197.6842040832], [335.8870239034, 193.2587890688, 497.41601561479996, 325.4691162112], [440.59533690300003, 224.873718272, 480.4243164121, 264.7027588096], [397.30358885199996, 230.4249267712, 428.37121579409995, 261.4926757888]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046647_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a vase, a flower, a cabinet, a picture, and a plate.", "boxes_value": [[24.30358885199996, 29.89788820480001, 121.0969238408, 146.70275880960003], [50.842895537200036, 29.89788820480001, 78.50195312919999, 76.91833497600001], [72.97009277630002, 41.51470945279999, 121.0969238408, 79.6842040832], [0, 75.25878906880001, 124.41601561479996, 175], [67.59533690300003, 106.87371827199999, 107.42431641209998, 146.70275880960003], [24.30358885199996, 112.4249267712, 55.37121579409995, 143.4926757888]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046653.jpg", "text": "Please enlighten me about the region in the given photo . Provide the coordinates for all objects that you mention.", "boxes_value": [[605.0279541043001, 139.3956298752, 723.3104247686, 358.5776977408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046653_crop.jpg", "text": "Please enlighten me about the region in the given photo . Provide the coordinates for all objects that you mention.", "boxes_value": [[30.02795410430008, 55.3956298752, 148.3104247686, 274.5776977408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046653.jpg", "text": "Please enlighten me about the region in the given photo . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two pillows, and four plates.", "boxes_value": [[605.0279541043001, 139.3956298752, 723.3104247686, 358.5776977408], [590.219116182, 318.0297241088, 647.3388672185, 356.462097152], [605.0279541043001, 340.2429809664, 650.8647460907999, 358.5776977408], [693.3837890575, 139.3956298752, 723.3104247686, 169.7498168832], [653.1965332189, 168.0397338624, 679.27551266, 192.8361206272], [624.9799804972, 185.9957275136, 648.4937743953, 207.7994384896], [605.7413330137, 200.5315551744, 627.1175537045, 218.9150390784]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046653_crop.jpg", "text": "Please enlighten me about the region in the given photo . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two pillows, and four plates.", "boxes_value": [[30.02795410430008, 55.3956298752, 148.3104247686, 274.5776977408], [15.219116181999993, 234.0297241088, 72.33886721850001, 272.462097152], [30.02795410430008, 256.2429809664, 75.86474609079994, 274.5776977408], [118.38378905750005, 55.3956298752, 148.3104247686, 85.7498168832], [78.19653321889996, 84.0397338624, 104.27551266, 108.83612062719999], [49.97998049720002, 101.9957275136, 73.49377439529997, 123.79943848959999], [30.7413330137, 116.53155517440001, 52.11755370449998, 134.9150390784]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046654.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[1.5189819648, 275.6542968832, 476.3913573888, 471.6906738176]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046654_crop.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[1.5189819648, 49.654296883200004, 476.3913573888, 245.6906738176]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046654.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a street lights, a desk, and three chairs.", "boxes_value": [[1.5189819648, 275.6542968832, 476.3913573888, 471.6906738176], [460.42272952319996, 275.6542968832, 476.3913573888, 399.118835456], [60.1233520128, 318.2536621056, 196.51184079360002, 423.2088623104], [138.9729614592, 300.672363264, 249.7885741824, 433.3314208768], [10.0432739328, 306.0000610304, 122.989929216, 426.9382324224], [1.5189819648, 335.3022460928, 136.84185792, 471.6906738176]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046654_crop.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a street lights, a desk, and three chairs.", "boxes_value": [[1.5189819648, 49.654296883200004, 476.3913573888, 245.6906738176], [460.42272952319996, 49.654296883200004, 476.3913573888, 173.118835456], [60.1233520128, 92.25366210559997, 196.51184079360002, 197.20886231039998], [138.9729614592, 74.67236326400001, 249.7885741824, 207.3314208768], [10.0432739328, 80.00006103039999, 122.989929216, 200.9382324224], [1.5189819648, 109.30224609279998, 136.84185792, 245.6906738176]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046661.jpg", "text": "Can you generate a description for the selected region in the image ? Give coordinates for the items you reference.", "boxes_value": [[486.1075439739, 293.6680297984, 769.5534667620001, 510.912658688]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046661_crop.jpg", "text": "Can you generate a description for the selected region in the image ? Give coordinates for the items you reference.", "boxes_value": [[71.10754397390002, 54.66802979840003, 354.5534667620001, 271.912658688]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046661.jpg", "text": "Can you generate a description for the selected region in the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include two lamps, a picture, a traffic light, and two traffic cones.", "boxes_value": [[486.1075439739, 293.6680297984, 769.5534667620001, 510.912658688], [537.7388915703, 355.761169408, 769.5534667620001, 510.912658688], [563.2932128679, 266.3208617984, 770.4661865078999, 413.2584838656], [486.1075439739, 296.0583496192, 576.8920898196, 340.3949584896], [649.3388671974, 293.6680297984, 729.1196288775, 349.7123412992], [442.6975097766, 352.0252685312, 517.9271239917, 369.4158325248], [448.0148926158, 312.9850463744, 519.7353515253, 354.2075195392]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00046661_crop.jpg", "text": "Can you generate a description for the selected region in the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include two lamps, a picture, a traffic light, and two traffic cones.", "boxes_value": [[71.10754397390002, 54.66802979840003, 354.5534667620001, 271.912658688], [122.73889157029998, 116.761169408, 354.5534667620001, 271.912658688], [148.29321286790002, 27.320861798400017, 355.4661865078999, 174.2584838656], [71.10754397390002, 57.05834961919999, 161.89208981959996, 101.39495848960001], [234.33886719739996, 54.66802979840003, 314.11962887749996, 110.71234129919998], [27.697509776599986, 113.02526853120003, 102.9271239917, 130.4158325248], [33.01489261580002, 73.98504637439999, 104.73535152529996, 115.2075195392]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00046662.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Provide the coordinates for all objects that you mention.", "boxes_value": [[270.8183593979, 213.1056518656, 405.5194091705, 346.2039794688]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046662_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Provide the coordinates for all objects that you mention.", "boxes_value": [[33.8183593979, 34.10565186560001, 168.5194091705, 167.20397946880001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046662.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three people, a machinery vehicle, and a street lights.", "boxes_value": [[270.8183593979, 213.1056518656, 405.5194091705, 346.2039794688], [356.71081542880006, 302.0844115968, 375.32861325719995, 336.347473152], [305.2379150465, 306.7779540992, 325.5767822408, 346.2039794688], [270.8183593979, 297.3908081152, 282.08294678379997, 330.8716430848], [364.7214355688, 268.1384277504, 405.5194091705, 296.172790528], [326.4383545032, 213.1056518656, 334.8291626318, 277.7149658112]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046662_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three people, a machinery vehicle, and a street lights.", "boxes_value": [[33.8183593979, 34.10565186560001, 168.5194091705, 167.20397946880001], [119.71081542880006, 123.08441159680001, 138.32861325719995, 157.34747315200002], [68.2379150465, 127.77795409919997, 88.57678224080001, 167.20397946880001], [33.8183593979, 118.3908081152, 45.08294678379997, 151.87164308479998], [127.72143556880002, 89.13842775040001, 168.5194091705, 117.17279052800001], [89.4383545032, 34.10565186560001, 97.82916263179999, 98.71496581119999]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046664.jpg", "text": "Can you discuss the entities within the region of image ? Please mention the objects and their locations.", "boxes_value": [[111.93707278080001, 77.7648315392, 361.0797119328, 269.4049682432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046664_crop.jpg", "text": "Can you discuss the entities within the region of image ? Please mention the objects and their locations.", "boxes_value": [[62.93707278080001, 48.7648315392, 312.0797119328, 240.40496824320002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046664.jpg", "text": "Can you discuss the entities within the region of image ? Please mention the objects and their locations. For your reference, objects involved in this region include three cabinets, a faucet, and a sink.", "boxes_value": [[111.93707278080001, 77.7648315392, 361.0797119328, 269.4049682432], [240.37689212, 77.7648315392, 352.9570312336, 210.3357543936], [167.77850342399998, 77.7648315392, 260.3677368352, 206.127136256], [280.5764160224, 253.503051776, 361.0797119328, 269.4049682432], [111.93707278080001, 226.6918334976, 142.6284179488, 260.303283712], [35.558471678400004, 257.9674682368, 165.37493899039998, 266.161865216]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046664_crop.jpg", "text": "Can you discuss the entities within the region of image ? Please mention the objects and their locations. For your reference, objects involved in this region include three cabinets, a faucet, and a sink.", "boxes_value": [[62.93707278080001, 48.7648315392, 312.0797119328, 240.40496824320002], [191.37689212, 48.7648315392, 303.9570312336, 181.3357543936], [118.77850342399998, 48.7648315392, 211.3677368352, 177.127136256], [231.5764160224, 224.503051776, 312.0797119328, 240.40496824320002], [62.93707278080001, 197.6918334976, 93.6284179488, 231.303283712], [0, 228.96746823680002, 116.37493899039998, 237.16186521600002]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046665.jpg", "text": "What details can you provide about the region in the snapshot ? Please mention the objects and their locations.", "boxes_value": [[142.8446655167, 350.5718383616, 298.3943481394, 456.7921753088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046665_crop.jpg", "text": "What details can you provide about the region in the snapshot ? Please mention the objects and their locations.", "boxes_value": [[39.84466551669999, 26.571838361599987, 195.3943481394, 132.79217530879998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046665.jpg", "text": "What details can you provide about the region in the snapshot ? Please mention the objects and their locations. For your reference, objects involved in this region include a person, two sneakers, a pot, and a bottle.", "boxes_value": [[142.8446655167, 350.5718383616, 298.3943481394, 456.7921753088], [138.0681152617, 104.2810669056, 255.8534545675, 459.3187255808], [161.4535522441, 417.9507446272, 200.29498294110002, 456.7921753088], [209.0656127654, 421.291931136, 243.3129272201, 459.7157592576], [252.5921020426, 350.5718383616, 298.3943481394, 386.141723648], [142.8446655167, 376.7644653568, 156.80609131129998, 408.6251831296]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046665_crop.jpg", "text": "What details can you provide about the region in the snapshot ? Please mention the objects and their locations. For your reference, objects involved in this region include a person, two sneakers, a pot, and a bottle.", "boxes_value": [[39.84466551669999, 26.571838361599987, 195.3943481394, 132.79217530879998], [35.0681152617, 0, 152.8534545675, 135.3187255808], [58.453552244099996, 93.95074462719998, 97.29498294110002, 132.79217530879998], [106.0656127654, 97.29193113600002, 140.3129272201, 135.71575925759998], [149.5921020426, 26.571838361599987, 195.3943481394, 62.14172364799998], [39.84466551669999, 52.764465356799974, 53.80609131129998, 84.62518312959998]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046666.jpg", "text": "In the image , elaborate on the details found within the section . Include the coordinates for each mentioned object.", "boxes_value": [[109.12933347839999, 131.3014526464, 514.9754638848, 349.30664064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046666_crop.jpg", "text": "In the image , elaborate on the details found within the section . Include the coordinates for each mentioned object.", "boxes_value": [[102.12933347839999, 55.30145264640001, 507.9754638848, 273.30664064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046666.jpg", "text": "In the image , elaborate on the details found within the section . Include the coordinates for each mentioned object. For your reference, objects involved in this region include six boats.", "boxes_value": [[109.12933347839999, 131.3014526464, 514.9754638848, 349.30664064], [109.12933347839999, 141.5847168, 242.1261596928, 254.7006225408], [107.0726928384, 249.2161865216, 217.4463501312, 353.4199218688], [235.95623777279997, 131.3014526464, 366.2109374976, 261.55615232], [239.3839721472, 255.3861694464, 364.83984376319995, 367.8165283328], [403.91613772799997, 140.21362304, 513.6043701504, 251.2728881664], [403.91613772799997, 240.3040771584, 514.9754638848, 349.30664064]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046666_crop.jpg", "text": "In the image , elaborate on the details found within the section . Include the coordinates for each mentioned object. For your reference, objects involved in this region include six boats.", "boxes_value": [[102.12933347839999, 55.30145264640001, 507.9754638848, 273.30664064], [102.12933347839999, 65.5847168, 235.1261596928, 178.7006225408], [100.0726928384, 173.2161865216, 210.4463501312, 277.4199218688], [228.95623777279997, 55.30145264640001, 359.2109374976, 185.55615232000002], [232.3839721472, 179.3861694464, 357.83984376319995, 291.8165283328], [396.91613772799997, 64.21362303999999, 506.6043701504, 175.2728881664], [396.91613772799997, 164.3040771584, 507.9754638848, 273.30664064]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046667.jpg", "text": "Can you generate a description of the contents within the selected region in ? Provide the coordinates for each element you describe.", "boxes_value": [[109.3365478278, 410.8334350336, 734.339233413, 511.5596923904]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046667_crop.jpg", "text": "Can you generate a description of the contents within the selected region in ? Provide the coordinates for each element you describe.", "boxes_value": [[109.3365478278, 25.833435033599983, 734.339233413, 126.55969239040002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046667.jpg", "text": "Can you generate a description of the contents within the selected region in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include four boots, and an airplane.", "boxes_value": [[109.3365478278, 410.8334350336, 734.339233413, 511.5596923904], [221.07415774319998, 410.8334350336, 302.5876464558, 448.6898803712], [109.3365478278, 447.773986816, 203.06176759259998, 510.9698486272], [630.097778358, 490.584289536, 677.4512939478, 511.2418823168], [677.7691650666, 469.9266967552, 734.339233413, 511.5596923904], [50.2249755582, 155.5341796864, 773.7821045262, 512.6077880832]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046667_crop.jpg", "text": "Can you generate a description of the contents within the selected region in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include four boots, and an airplane.", "boxes_value": [[109.3365478278, 25.833435033599983, 734.339233413, 126.55969239040002], [221.07415774319998, 25.833435033599983, 302.5876464558, 63.68988037119999], [109.3365478278, 62.77398681599999, 203.06176759259998, 125.96984862720001], [630.097778358, 105.58428953600003, 677.4512939478, 126.2418823168], [677.7691650666, 84.9266967552, 734.339233413, 126.55969239040002], [50.2249755582, 0, 773.7821045262, 127]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046668.jpg", "text": "Could you give me a description of the rectangular region found in ? Give coordinates for the items you reference.", "boxes_value": [[218.24298097390002, 120.9152832, 561.6403808596, 207.7144164864]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046668_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Give coordinates for the items you reference.", "boxes_value": [[86.24298097390002, 21.915283200000005, 429.6403808596, 108.71441648640001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046668.jpg", "text": "Could you give me a description of the rectangular region found in ? Give coordinates for the items you reference. For your reference, objects involved in this region include a tea pot, a dinning table, a flag, a vase, and a glasses.", "boxes_value": [[218.24298097390002, 120.9152832, 561.6403808596, 207.7144164864], [243.468017601, 121.5970459136, 383.2283935618, 198.8900756992], [379.81958010750003, 120.9152832, 445.2683105495, 202.0444946432], [218.24298097390002, 170.683593728, 233.9234618991, 195.9086303744], [214.83422851150002, 148.8673705984, 237.3322143615, 172.0471191552], [546.4468994244, 167.9777221632, 561.6403808596, 207.7144164864]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046668_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Give coordinates for the items you reference. For your reference, objects involved in this region include a tea pot, a dinning table, a flag, a vase, and a glasses.", "boxes_value": [[86.24298097390002, 21.915283200000005, 429.6403808596, 108.71441648640001], [111.46801760100001, 22.5970459136, 251.22839356179998, 99.8900756992], [247.81958010750003, 21.915283200000005, 313.2683105495, 103.04449464320001], [86.24298097390002, 71.683593728, 101.9234618991, 96.90863037439999], [82.83422851150002, 49.86737059839999, 105.33221436150001, 73.0471191552], [414.44689942440004, 68.97772216320001, 429.6403808596, 108.71441648640001]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046671.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for each element you describe.", "boxes_value": [[427.70507810800007, 325.275146496, 600.6210937650001, 479.6342773248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046671_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for each element you describe.", "boxes_value": [[43.705078108000066, 39.27514649599999, 216.62109376500007, 193.6342773248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046671.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a pickup truck, two cars, and two street lights.", "boxes_value": [[427.70507810800007, 325.275146496, 600.6210937650001, 479.6342773248], [446.261962916, 408.7808838144, 532.2982177590001, 479.6342773248], [523.019775355, 421.4332885504, 575.316284158, 465.2949218816], [561.82043456, 419.746276864, 600.6210937650001, 452.6425170944], [427.70507810800007, 325.275146496, 453.85339353200004, 412.154846208], [537.527587905, 356.3029174784, 561.3331298960001, 420.6948242432]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046671_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a pickup truck, two cars, and two street lights.", "boxes_value": [[43.705078108000066, 39.27514649599999, 216.62109376500007, 193.6342773248], [62.261962916000016, 122.7808838144, 148.29821775900007, 193.6342773248], [139.01977535499998, 135.4332885504, 191.31628415800003, 179.29492188159998], [177.82043455999997, 133.74627686399998, 216.62109376500007, 166.64251709439998], [43.705078108000066, 39.27514649599999, 69.85339353200004, 126.15484620799998], [153.52758790500002, 70.30291747839999, 177.33312989600006, 134.6948242432]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046674.jpg", "text": "Can you discuss the entities within the region of image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[0, 211.2882080256, 330.59460452549996, 512.7830810624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046674_crop.jpg", "text": "Can you discuss the entities within the region of image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[0, 76.2882080256, 330.59460452549996, 377]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046674.jpg", "text": "Can you discuss the entities within the region of image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a pillow, a couch, two benches, a stool, and a desk.", "boxes_value": [[0, 211.2882080256, 330.59460452549996, 512.7830810624], [0.6292724865, 288.6694946304, 168.53198243909998, 490.882751488], [0, 274.7992553472, 311.61425784930003, 512.7830810624], [59.0302123761, 189.3878784, 168.53198243909998, 284.289428736], [215.9827270629, 211.2882080256, 330.59460452549996, 309.8398437376], [253.4554443687, 177.824157696, 326.93334960090004, 234.675720192], [141.8977051107, 135.9900512768, 308.161621131, 284.0185546752]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4], [6]]}, {"image_path": "objects365_v1_00046674_crop.jpg", "text": "Can you discuss the entities within the region of image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a pillow, a couch, two benches, a stool, and a desk.", "boxes_value": [[0, 76.2882080256, 330.59460452549996, 377], [0.6292724865, 153.66949463039998, 168.53198243909998, 355.882751488], [0, 139.79925534720002, 311.61425784930003, 377], [59.0302123761, 54.387878400000005, 168.53198243909998, 149.289428736], [215.9827270629, 76.2882080256, 330.59460452549996, 174.8398437376], [253.4554443687, 42.824157695999986, 326.93334960090004, 99.675720192], [141.8977051107, 0.9900512767999885, 308.161621131, 149.0185546752]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4], [6]]}, {"image_path": "objects365_v1_00046675.jpg", "text": "Please give me some details about the rectangle in the image . Include the coordinates for each object you identify.", "boxes_value": [[294.6885986593, 190.4736938496, 389.2003173715, 287.1562500096]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046675_crop.jpg", "text": "Please give me some details about the rectangle in the image . Include the coordinates for each object you identify.", "boxes_value": [[23.6885986593, 24.47369384960001, 118.20031737149998, 121.1562500096]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046675.jpg", "text": "Please give me some details about the rectangle in the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include two paddles, three people, a helmet, and a boat.", "boxes_value": [[294.6885986593, 190.4736938496, 389.2003173715, 287.1562500096], [294.6885986593, 221.5169677824, 379.0097656534, 287.1562500096], [376.48522949880004, 219.4973144576, 385.06872558739997, 329.0643310592], [248.1917114422, 196.440795904, 343.90588375539994, 309.7526244864], [323.7329712181, 190.8610229248, 388.5439452876, 297.30548096], [298.8386841027, 172.4049072128, 360.2159424013, 255.5405883904], [354.48315430810004, 190.4736938496, 389.2003173715, 218.247436544], [83.0782470774, 227.4299926528, 606.0437011655999, 355.64086912]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00046675_crop.jpg", "text": "Please give me some details about the rectangle in the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include two paddles, three people, a helmet, and a boat.", "boxes_value": [[23.6885986593, 24.47369384960001, 118.20031737149998, 121.1562500096], [23.6885986593, 55.5169677824, 108.0097656534, 121.1562500096], [105.48522949880004, 53.4973144576, 114.06872558739997, 145], [0, 30.440795903999998, 72.90588375539994, 143.7526244864], [52.732971218099976, 24.86102292480001, 117.54394528760002, 131.30548096], [27.838684102699972, 6.404907212799998, 89.21594240130003, 89.54058839039999], [83.48315430810004, 24.47369384960001, 118.20031737149998, 52.24743654400001], [0, 61.429992652799996, 141, 145]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00046676.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Provide the coordinates for all objects that you mention.", "boxes_value": [[433.26977541119993, 68.8794555904, 712.3499755775999, 347.2274169855999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046676_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Provide the coordinates for all objects that you mention.", "boxes_value": [[70.26977541119993, 68.8794555904, 349.34997557759993, 347.2274169855999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046676.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a chair, a towel, two people, a bracelet, a glasses, two bottles, a plate, and a stuffed toy.", "boxes_value": [[433.26977541119993, 68.8794555904, 712.3499755775999, 347.2274169855999], [642.7854004224, 183.7387695104, 765.2037353472, 319.0886230528], [458.6418457344, 304.9916381696, 529.3341064704, 353.8440551936], [433.26977541119993, 68.8794555904, 712.3499755775999, 347.2274169855999], [591.6478271231999, 231.882629376, 620.9511718655999, 267.0263671808], [408.130371072, 41.7445068288, 462.9686279424, 176.91601561600004], [446.1165771264, 110.652099584, 522.8464355328, 159.0588379136], [612.2724609024, 69.9177856512, 633.4376220672, 141.5612792832], [543.7451172096, 97.8518676992, 582.6723632640001, 114.3056640512], [590.545776384, 83.4302368256, 612.2724609024, 139.3446655488], [549.6936035328, 336.3410034176, 597.639770496, 356.1283569152]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4], [6], [7, 9], [8], [10]]}, {"image_path": "objects365_v1_00046676_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a chair, a towel, two people, a bracelet, a glasses, two bottles, a plate, and a stuffed toy.", "boxes_value": [[70.26977541119993, 68.8794555904, 349.34997557759993, 347.2274169855999], [279.7854004224, 183.7387695104, 402.2037353472, 319.0886230528], [95.64184573440002, 304.9916381696, 166.3341064704, 353.8440551936], [70.26977541119993, 68.8794555904, 349.34997557759993, 347.2274169855999], [228.64782712319993, 231.882629376, 257.9511718655999, 267.0263671808], [45.130371072, 41.7445068288, 99.96862794240002, 176.91601561600004], [83.11657712639999, 110.652099584, 159.8464355328, 159.0588379136], [249.27246090239998, 69.9177856512, 270.4376220672, 141.5612792832], [180.74511720960004, 97.8518676992, 219.67236326400007, 114.3056640512], [227.54577638399996, 83.4302368256, 249.27246090239998, 139.3446655488], [186.6936035328, 336.3410034176, 234.63977049599998, 356.1283569152]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4], [6], [7, 9], [8], [10]]}, {"image_path": "objects365_v1_00046677.jpg", "text": "I'd like some information about the bounding box in the photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[204.0650024448, 328.6193847808, 512.1760253952, 512.0679931392]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046677_crop.jpg", "text": "I'd like some information about the bounding box in the photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[77.0650024448, 46.619384780799976, 385, 230]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046677.jpg", "text": "I'd like some information about the bounding box in the photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two chairs, a desk, two carpets, a picture, a lamp, and a cabinet.", "boxes_value": [[204.0650024448, 328.6193847808, 512.1760253952, 512.0679931392], [344.477111808, 231.51794432, 462.136779776, 410.0361328128], [389.1066284032, 382.2365722624, 478.3657226752, 510.03936768], [442.5268554752, 407.2561645568, 512.1760253952, 485.0197753856], [376.9349365248, 437.0092163072, 511.4997558784, 512.0679931392], [204.0650024448, 328.6193847808, 226.820495616, 357.580932608], [214.60150144, 291.8928833024, 266.1254272512, 357.9257202176], [337.4594116096, 376.957275392, 500.5137329152, 409.6916504064], [210.7539673088, 366.9802246144, 264.1030273536, 438.5897217024]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 7], [5], [6], [8]]}, {"image_path": "objects365_v1_00046677_crop.jpg", "text": "I'd like some information about the bounding box in the photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two chairs, a desk, two carpets, a picture, a lamp, and a cabinet.", "boxes_value": [[77.0650024448, 46.619384780799976, 385, 230], [217.47711180800002, 0, 335.136779776, 128.03613281280002], [262.1066284032, 100.23657226239999, 351.3657226752, 228.03936768], [315.5268554752, 125.2561645568, 385, 203.01977538559998], [249.93493652479998, 155.0092163072, 384.4997558784, 230], [77.0650024448, 46.619384780799976, 99.82049561599999, 75.58093260800001], [87.60150143999999, 9.89288330239998, 139.1254272512, 75.92572021759997], [210.4594116096, 94.95727539199999, 373.5137329152, 127.69165040640002], [83.75396730879999, 84.98022461440002, 137.1030273536, 156.58972170240003]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 7], [5], [6], [8]]}, {"image_path": "objects365_v1_00046678.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Include the coordinates for each mentioned object.", "boxes_value": [[200.7299194671, 304.0335693312, 770.1887207241, 445.0847778304]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046678_crop.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Include the coordinates for each mentioned object.", "boxes_value": [[142.7299194671, 36.0335693312, 712.1887207241, 177.08477783040001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046678.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two desks, three chairs, and a trash bin can.", "boxes_value": [[200.7299194671, 304.0335693312, 770.1887207241, 445.0847778304], [185.8015746792, 303.1999511552, 282.6909179541, 370.7832641536], [229.7606200893, 283.7622680576, 279.7005615324, 354.3359985152], [548.2668456843, 304.0335693312, 621.7337646189, 370.9631347712], [632.9405517737999, 336.4088134656, 683.0599365278999, 393.3767699968], [684.765380895, 338.5628662272, 770.1887207241, 445.0847778304], [200.7299194671, 329.4916381696, 219.0906372126, 352.6031494144]], "boxes_seq": [[0], [0], [1, 4], [2, 3, 5], [6]]}, {"image_path": "objects365_v1_00046678_crop.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two desks, three chairs, and a trash bin can.", "boxes_value": [[142.7299194671, 36.0335693312, 712.1887207241, 177.08477783040001], [127.8015746792, 35.19995115519998, 224.69091795409997, 102.78326415359999], [171.7606200893, 15.76226805760001, 221.7005615324, 86.3359985152], [490.26684568430005, 36.0335693312, 563.7337646189, 102.96313477119998], [574.9405517737999, 68.40881346560002, 625.0599365278999, 125.37676999680002], [626.765380895, 70.5628662272, 712.1887207241, 177.08477783040001], [142.7299194671, 61.49163816959998, 161.0906372126, 84.60314941439998]], "boxes_seq": [[0], [0], [1, 4], [2, 3, 5], [6]]}, {"image_path": "objects365_v1_00046679.jpg", "text": "Fill me in on the details of the rectangular box within the image . Please mention the objects and their locations.", "boxes_value": [[37.0824585165, 444.2390136832, 469.747070321, 492.7150268416]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046679_crop.jpg", "text": "Fill me in on the details of the rectangular box within the image . Please mention the objects and their locations.", "boxes_value": [[37.0824585165, 12.239013683200028, 469.747070321, 60.71502684159998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046679.jpg", "text": "Fill me in on the details of the rectangular box within the image . Please mention the objects and their locations. For your reference, objects involved in this region include five sneakers.", "boxes_value": [[37.0824585165, 444.2390136832, 469.747070321, 492.7150268416], [37.0824585165, 462.2205810688, 70.965148956, 492.7150268416], [107.10675052699999, 458.7399902208, 142.68359377299998, 489.8914795008], [141.83471683099998, 454.7750854656, 169.7897949105, 483.6796264448], [197.4606933295, 453.4544677888, 225.69628903000003, 483.1149292032], [443.51940919149996, 444.2390136832, 469.747070321, 473.5307006976]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046679_crop.jpg", "text": "Fill me in on the details of the rectangular box within the image . Please mention the objects and their locations. For your reference, objects involved in this region include five sneakers.", "boxes_value": [[37.0824585165, 12.239013683200028, 469.747070321, 60.71502684159998], [37.0824585165, 30.220581068800016, 70.965148956, 60.71502684159998], [107.10675052699999, 26.739990220799996, 142.68359377299998, 57.89147950080002], [141.83471683099998, 22.7750854656, 169.7897949105, 51.67962644480002], [197.4606933295, 21.454467788800002, 225.69628903000003, 51.114929203200006], [443.51940919149996, 12.239013683200028, 469.747070321, 41.530700697600025]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046680.jpg", "text": "What can you tell me about the area within the image ? Specify the location of each mentioned object.", "boxes_value": [[432.68139650579997, 265.9378051584, 567.9766845546, 408.5285033984]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046680_crop.jpg", "text": "What can you tell me about the area within the image ? Specify the location of each mentioned object.", "boxes_value": [[34.68139650579997, 35.93780515840001, 169.97668455459996, 178.5285033984]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046680.jpg", "text": "What can you tell me about the area within the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include four people, and a handbag.", "boxes_value": [[432.68139650579997, 265.9378051584, 567.9766845546, 408.5285033984], [432.68139650579997, 268.59069824, 457.88342286290003, 355.4714966016], [443.9559326063, 275.885986304, 507.6243896715, 456.2797851648], [511.60363769090003, 269.2539062272, 567.9766845546, 408.5285033984], [482.422241194, 265.9378051584, 499.00256348629995, 313.0258788864], [428.4776611392, 371.4677124096, 475.0222167704, 414.179199232]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046680_crop.jpg", "text": "What can you tell me about the area within the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include four people, and a handbag.", "boxes_value": [[34.68139650579997, 35.93780515840001, 169.97668455459996, 178.5285033984], [34.68139650579997, 38.590698239999995, 59.883422862900034, 125.4714966016], [45.955932606299996, 45.88598630400003, 109.62438967150001, 214], [113.60363769090003, 39.25390622719999, 169.97668455459996, 178.5285033984], [84.42224119399998, 35.93780515840001, 101.00256348629995, 83.0258788864], [30.477661139199995, 141.46771240959998, 77.02221677040001, 184.17919923199997]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046682.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Specify the location of each mentioned object.", "boxes_value": [[422.4843750019, 269.1312256, 532.6988525284, 480.8310546944]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046682_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Specify the location of each mentioned object.", "boxes_value": [[28.48437500189999, 53.13122559999999, 138.69885252840004, 264.8310546944]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046682.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Specify the location of each mentioned object. For your reference, objects involved in this region include a paddle, two people, two helmets, and a boat.", "boxes_value": [[422.4843750019, 269.1312256, 532.6988525284, 480.8310546944], [409.6787109476, 247.7476196352, 501.52270508230004, 388.104248064], [320.8476562274, 377.0122680832, 531.9688721002, 504.2693481472], [372.21960449700003, 269.3860473856, 531.386230481, 444.6773681664], [422.4843750019, 377.7902832128, 532.6988525284, 480.8310546944], [429.05053708779997, 269.1312256, 483.43457033889996, 321.3613891584], [86.249206533, 271.681091328, 573.6795654438, 511.8297119232]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046682_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Specify the location of each mentioned object. For your reference, objects involved in this region include a paddle, two people, two helmets, and a boat.", "boxes_value": [[28.48437500189999, 53.13122559999999, 138.69885252840004, 264.8310546944], [15.678710947599996, 31.74761963520001, 107.52270508230004, 172.104248064], [0, 161.0122680832, 137.96887210019997, 288.2693481472], [0, 53.38604738560002, 137.38623048099998, 228.67736816640002], [28.48437500189999, 161.79028321279998, 138.69885252840004, 264.8310546944], [35.05053708779997, 53.13122559999999, 89.43457033889996, 105.36138915840002], [0, 55.68109132799998, 166, 295.8297119232]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046683.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each object you identify.", "boxes_value": [[417.65332033109996, 214.561828608, 572.9018554685999, 334.03704832]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046683_crop.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each object you identify.", "boxes_value": [[39.65332033109996, 30.561828608000013, 194.90185546859993, 150.03704832]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046683.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each object you identify. For your reference, objects involved in this region include a bench, two umbrellas, and two street lights.", "boxes_value": [[417.65332033109996, 214.561828608, 572.9018554685999, 334.03704832], [456.7026366903, 312.0050659328, 483.037719717, 334.03704832], [514.9079590046, 276.8018798592, 572.9018554685999, 313.3526611456], [482.90563968640004, 276.639404288, 514.7454834193001, 320.3379516416], [417.65332033109996, 236.9363403264, 432.3404541323, 325.4909057536], [517.7327881152, 214.561828608, 537.012939435, 317.3891601408]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046683_crop.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each object you identify. For your reference, objects involved in this region include a bench, two umbrellas, and two street lights.", "boxes_value": [[39.65332033109996, 30.561828608000013, 194.90185546859993, 150.03704832], [78.70263669029998, 128.0050659328, 105.03771971700002, 150.03704832], [136.9079590046, 92.80187985920003, 194.90185546859993, 129.3526611456], [104.90563968640004, 92.63940428799998, 136.74548341930006, 136.3379516416], [39.65332033109996, 52.9363403264, 54.34045413230001, 141.49090575359998], [139.73278811520004, 30.561828608000013, 159.01293943500002, 133.38916014080002]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046687.jpg", "text": "What insights can you provide about the area in the selected picture ? Provide the coordinates for each element you describe.", "boxes_value": [[269.2966308444, 267.4213867008, 434.62585453040003, 378.1452026368]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046687_crop.jpg", "text": "What insights can you provide about the area in the selected picture ? Provide the coordinates for each element you describe.", "boxes_value": [[42.2966308444, 28.421386700799985, 207.62585453040003, 139.14520263679998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046687.jpg", "text": "What insights can you provide about the area in the selected picture ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two handbags, a hat, a backpack, and a street lights.", "boxes_value": [[269.2966308444, 267.4213867008, 434.62585453040003, 378.1452026368], [257.1027832276, 325.1770019328, 279.4421386584, 368.098571776], [269.2966308444, 276.132263168, 316.85205078719997, 306.7036132864], [383.4284667648, 309.5999145472, 423.4132080284, 354.6621093888], [417.27795411119996, 347.0459594752, 434.62585453040003, 378.1452026368], [335.015625022, 267.4213867008, 351.8615722764, 302.4792480256]], "boxes_seq": [[0], [0], [1, 4], [2], [3], [5]]}, {"image_path": "objects365_v1_00046687_crop.jpg", "text": "What insights can you provide about the area in the selected picture ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two handbags, a hat, a backpack, and a street lights.", "boxes_value": [[42.2966308444, 28.421386700799985, 207.62585453040003, 139.14520263679998], [30.102783227600014, 86.17700193280001, 52.44213865839998, 129.09857177600003], [42.2966308444, 37.13226316800001, 89.85205078719997, 67.70361328640001], [156.42846676480002, 70.5999145472, 196.4132080284, 115.66210938879999], [190.27795411119996, 108.0459594752, 207.62585453040003, 139.14520263679998], [108.015625022, 28.421386700799985, 124.86157227640001, 63.479248025599986]], "boxes_seq": [[0], [0], [1, 4], [2], [3], [5]]}, {"image_path": "objects365_v1_00046690.jpg", "text": "Describe the image content present in the specified rectangular area of . Provide the coordinates for each element you describe.", "boxes_value": [[171.124084455, 311.13208005120003, 269.930969253, 395.5871582208]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046690_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Provide the coordinates for each element you describe.", "boxes_value": [[25.124084455000002, 21.132080051200035, 123.930969253, 105.5871582208]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046690.jpg", "text": "Describe the image content present in the specified rectangular area of . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a vase, a flower, a desk, a bed, and a plate.", "boxes_value": [[171.124084455, 311.13208005120003, 269.930969253, 395.5871582208], [198.171813942, 339.8358153984, 216.387573243, 356.9476318464], [183.133667016, 311.13208005120003, 222.459533691, 340.38781739520005], [171.124084455, 345.90771486719996, 269.930969253, 395.5871582208], [0, 1.417968768, 505.533264144, 767.3660888832001], [218.07867433799998, 344.4765625344, 262.078979469, 357.2899170048]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046690_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a vase, a flower, a desk, a bed, and a plate.", "boxes_value": [[25.124084455000002, 21.132080051200035, 123.930969253, 105.5871582208], [52.171813942, 49.83581539839997, 70.38757324299999, 66.94763184639999], [37.133667016000004, 21.132080051200035, 76.45953369099999, 50.387817395200045], [25.124084455000002, 55.90771486719996, 123.930969253, 105.5871582208], [0, 0, 148, 126], [72.07867433799998, 54.47656253439999, 116.07897946899999, 67.28991700479997]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046691.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Please mention the objects and their locations.", "boxes_value": [[287.7591552936, 374.0414428672, 680.5971679569, 511.608337408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046691_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Please mention the objects and their locations.", "boxes_value": [[98.75915529359997, 35.041442867199976, 491.59716795689997, 172.608337408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046691.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a person, and four cups.", "boxes_value": [[287.7591552936, 374.0414428672, 680.5971679569, 511.608337408], [207.2801513889, 82.5645751808, 572.0206298856, 512.0058593792], [287.7591552936, 376.2484130816, 403.9921874986, 509.4013671936], [427.5329589661, 374.0414428672, 531.2598877067, 510.8726806528], [530.524291994, 388.754455552, 644.5502929567, 510.8726806528], [541.5590820492, 416.7092284928, 680.5971679569, 511.608337408]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046691_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a person, and four cups.", "boxes_value": [[98.75915529359997, 35.041442867199976, 491.59716795689997, 172.608337408], [18.28015138890001, 0, 383.0206298856, 173], [98.75915529359997, 37.248413081600006, 214.9921874986, 170.40136719359998], [238.5329589661, 35.041442867199976, 342.25988770670006, 171.8726806528], [341.524291994, 49.754455552000024, 455.5502929567, 171.8726806528], [352.5590820492, 77.70922849279998, 491.59716795689997, 172.608337408]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046693.jpg", "text": "Kindly share your observations about the rectangular region within . Specify the location of each mentioned object.", "boxes_value": [[75.640441872, 0, 401.137268064, 241.941650368]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046693_crop.jpg", "text": "Kindly share your observations about the rectangular region within . Specify the location of each mentioned object.", "boxes_value": [[75.640441872, 0, 401.137268064, 241.941650368]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046693.jpg", "text": "Kindly share your observations about the rectangular region within . Specify the location of each mentioned object. For your reference, objects involved in this region include four lamps, and three people.", "boxes_value": [[75.640441872, 0, 401.137268064, 241.941650368], [233.43670656, 0, 401.137268064, 118.926940928], [168.409973136, 53.90020748799999, 248.267395008, 145.165771456], [331.165222176, 57.383239744, 455.054931648, 219.95080569599997], [75.640441872, 73.899658176, 98.783691408, 110.598205568], [247.342590336, 224.251037568, 271.48791504, 254.611999488], [222.48016358400002, 208.71203616, 246.864501936, 247.679138176], [182.317687968, 189.347961408, 219.61138915200002, 241.941650368]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6, 7]]}, {"image_path": "objects365_v1_00046693_crop.jpg", "text": "Kindly share your observations about the rectangular region within . Specify the location of each mentioned object. For your reference, objects involved in this region include four lamps, and three people.", "boxes_value": [[75.640441872, 0, 401.137268064, 241.941650368], [233.43670656, 0, 401.137268064, 118.926940928], [168.409973136, 53.90020748799999, 248.267395008, 145.165771456], [331.165222176, 57.383239744, 455.054931648, 219.95080569599997], [75.640441872, 73.899658176, 98.783691408, 110.598205568], [247.342590336, 224.251037568, 271.48791504, 254.611999488], [222.48016358400002, 208.71203616, 246.864501936, 247.679138176], [182.317687968, 189.347961408, 219.61138915200002, 241.941650368]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6, 7]]}, {"image_path": "objects365_v1_00046694.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Remember to mention the objects and their corresponding locations.", "boxes_value": [[20.8569335808, 123.679504384, 87.9154052352, 200.478271488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046694_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Remember to mention the objects and their corresponding locations.", "boxes_value": [[16.8569335808, 19.679504383999998, 83.9154052352, 96.47827148799999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046694.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two lamps, and three people.", "boxes_value": [[20.8569335808, 123.679504384, 87.9154052352, 200.478271488], [51.557678208, 123.679504384, 72.3495483648, 151.1544799744], [33.7360229376, 140.7585449472, 54.5278930944, 170.4611816448], [20.8569335808, 144.7048339968, 39.2673339648, 170.7332763648], [67.7263793664, 169.2951660032, 84.7171630848, 209.2734985216], [61.3298340096, 142.7096557568, 87.9154052352, 200.478271488]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046694_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two lamps, and three people.", "boxes_value": [[16.8569335808, 19.679504383999998, 83.9154052352, 96.47827148799999], [47.557678208, 19.679504383999998, 68.3495483648, 47.154479974400004], [29.736022937599998, 36.758544947199994, 50.5278930944, 66.4611816448], [16.8569335808, 40.704833996800005, 35.2673339648, 66.73327636479999], [63.726379366399996, 65.2951660032, 80.7171630848, 105.2734985216], [57.3298340096, 38.709655756800004, 83.9154052352, 96.47827148799999]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046696.jpg", "text": "Tell me what you see within the designated area in the picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[395.091308605, 146.2416992256, 661.4606933594999, 416.1602783232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046696_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[67.091308605, 68.2416992256, 333.46069335949994, 338.1602783232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046696.jpg", "text": "Tell me what you see within the designated area in the picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a picture, a vase, a storage box, a bracelet, and a dog.", "boxes_value": [[395.091308605, 146.2416992256, 661.4606933594999, 416.1602783232], [458.236083958, 146.2416992256, 661.4606933594999, 360.49401856], [395.091308605, 172.3867187712, 442.5457763855, 289.286682112], [279.00469973500003, 288.2024536064, 498.03845215300004, 418.79351808], [508.15478512349995, 357.9975585792, 573.531494127, 416.1602783232], [509.6904297, 169.716247552, 603.176635742, 330.3489990144]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046696_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a picture, a vase, a storage box, a bracelet, and a dog.", "boxes_value": [[67.091308605, 68.2416992256, 333.46069335949994, 338.1602783232], [130.236083958, 68.2416992256, 333.46069335949994, 282.49401856], [67.091308605, 94.38671877120001, 114.5457763855, 211.286682112], [0, 210.2024536064, 170.03845215300004, 340.79351808], [180.15478512349995, 279.9975585792, 245.53149412699997, 338.1602783232], [181.69042969999998, 91.716247552, 275.17663574200003, 252.34899901440002]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046697.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[135.3743286336, 354.928833024, 335.181335463, 512.0002441216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046697_crop.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[50.37432863359999, 39.92883302400003, 250.18133546299998, 197]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046697.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three barrels, a bottle, a bowl, and a cup.", "boxes_value": [[135.3743286336, 354.928833024, 335.181335463, 512.0002441216], [132.5153197977, 352.26843264, 160.3413086001, 376.3635864064], [200.76422122099999, 478.1798706176, 249.0839233578, 511.8809814528], [155.3102416735, 486.4283447296, 203.90454102340001, 512.0002441216], [269.1808471592, 377.4326171648, 293.4777221657, 448.1474609152], [262.6533203343, 446.3745117184, 335.181335463, 496.6099243008], [135.3743286336, 354.928833024, 156.482360854, 376.0368652288]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046697_crop.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three barrels, a bottle, a bowl, and a cup.", "boxes_value": [[50.37432863359999, 39.92883302400003, 250.18133546299998, 197], [47.51531979769999, 37.268432640000015, 75.3413086001, 61.363586406399975], [115.76422122099999, 163.1798706176, 164.0839233578, 196.88098145279997], [70.3102416735, 171.42834472959998, 118.90454102340001, 197], [184.18084715920003, 62.43261716479998, 208.47772216570002, 133.14746091519999], [177.65332033430002, 131.37451171840002, 250.18133546299998, 181.6099243008], [50.37432863359999, 39.92883302400003, 71.482360854, 61.036865228800025]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046698.jpg", "text": "Can you elaborate on the content of the bounding box in ? Give coordinates for the items you reference.", "boxes_value": [[175.09820559, 212.2890625024, 365.361694313, 501.240600576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046698_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Give coordinates for the items you reference.", "boxes_value": [[48.09820558999999, 72.28906250239999, 238.361694313, 361.240600576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046698.jpg", "text": "Can you elaborate on the content of the bounding box in ? Give coordinates for the items you reference. For your reference, objects involved in this region include a person, a hat, a helmet, and two gloves.", "boxes_value": [[175.09820559, 212.2890625024, 365.361694313, 501.240600576], [175.09820559, 248.7617797632, 365.361694313, 501.240600576], [281.70812985699996, 212.2890625024, 310.810668938, 243.944580096], [237.331359881, 248.9877929472, 310.498168981, 319.3217163264], [320.459716808, 264.7425537024, 379.21911619900004, 312.2948608512], [263.665527356, 319.3217163264, 317.812866217, 384.7781982208]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046698_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Give coordinates for the items you reference. For your reference, objects involved in this region include a person, a hat, a helmet, and two gloves.", "boxes_value": [[48.09820558999999, 72.28906250239999, 238.361694313, 361.240600576], [48.09820558999999, 108.7617797632, 238.361694313, 361.240600576], [154.70812985699996, 72.28906250239999, 183.810668938, 103.94458009600001], [110.331359881, 108.9877929472, 183.498168981, 179.3217163264], [193.459716808, 124.74255370240002, 252.21911619900004, 172.29486085119999], [136.66552735599998, 179.3217163264, 190.81286621700002, 244.7781982208]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046701.jpg", "text": "Please share details about the rectangular region within the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[209.6804199281, 383.7698364416, 444.7249755803, 492.148864768]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046701_crop.jpg", "text": "Please share details about the rectangular region within the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[59.68041992810001, 27.769836441599978, 294.7249755803, 136.148864768]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046701.jpg", "text": "Please share details about the rectangular region within the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three people, an umbrella, two handbags, and a car.", "boxes_value": [[209.6804199281, 383.7698364416, 444.7249755803, 492.148864768], [420.3218994012, 402.7725830144, 443.47741700079996, 492.148864768], [391.9185790997, 400.191162112, 420.3991699232, 492.7517089792], [389.9644775102, 383.7698364416, 444.7249755803, 414.2711792128], [400.3959960678, 413.4923705856, 429.00671389539997, 511.8822631936], [209.6804199281, 451.9756929536, 237.31689453490003, 473.4384155136], [287.56169937889996, 455.1315201024, 301.5384073668, 477.5560302592], [360.7788085751, 390.5465698304, 419.3280029237, 438.0816039936]], "boxes_seq": [[0], [0], [1, 2, 4], [3], [5, 6], [7]]}, {"image_path": "objects365_v1_00046701_crop.jpg", "text": "Please share details about the rectangular region within the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three people, an umbrella, two handbags, and a car.", "boxes_value": [[59.68041992810001, 27.769836441599978, 294.7249755803, 136.148864768], [270.3218994012, 46.77258301440003, 293.47741700079996, 136.148864768], [241.91857909970003, 44.19116211199997, 270.3991699232, 136.75170897919998], [239.96447751020003, 27.769836441599978, 294.7249755803, 58.27117921280001], [250.3959960678, 57.492370585599986, 279.00671389539997, 155.8822631936], [59.68041992810001, 95.9756929536, 87.31689453490003, 117.43841551359998], [137.56169937889996, 99.13152010239997, 151.53840736680002, 121.55603025919999], [210.77880857510002, 34.54656983040002, 269.3280029237, 82.08160399360003]], "boxes_seq": [[0], [0], [1, 2, 4], [3], [5, 6], [7]]}, {"image_path": "objects365_v1_00046702.jpg", "text": "Please describe the region in the picture . Give coordinates for the items you reference.", "boxes_value": [[235.1611328, 0, 430.2651977728, 297.41815188009997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046702_crop.jpg", "text": "Please describe the region in the picture . Give coordinates for the items you reference.", "boxes_value": [[49.16113279999999, 0, 244.26519777279998, 297.41815188009997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046702.jpg", "text": "Please describe the region in the picture . Give coordinates for the items you reference. For your reference, objects involved in this region include two lamps, a mirror, and two towels.", "boxes_value": [[235.1611328, 0, 430.2651977728, 297.41815188009997], [235.1611328, 0, 411.5902099456, 51.4683227797], [266.122192384, 24.9302368204, 430.2651977728, 67.6860351812], [194.3999023616, 14.6433105239, 486.41320801279994, 392.405761702], [237.3017577984, 220.28161624400002, 268.8271484416, 298.7597045781], [266.1441650176, 220.28161624400002, 300.0171508736, 297.41815188009997]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046702_crop.jpg", "text": "Please describe the region in the picture . Give coordinates for the items you reference. For your reference, objects involved in this region include two lamps, a mirror, and two towels.", "boxes_value": [[49.16113279999999, 0, 244.26519777279998, 297.41815188009997], [49.16113279999999, 0, 225.5902099456, 51.4683227797], [80.12219238400002, 24.9302368204, 244.26519777279998, 67.6860351812], [8.399902361599999, 14.6433105239, 293, 371], [51.30175779839999, 220.28161624400002, 82.8271484416, 298.7597045781], [80.14416501760002, 220.28161624400002, 114.01715087359997, 297.41815188009997]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046704.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Include the coordinates for each object you identify.", "boxes_value": [[182.9953613017, 200.3353271296, 551.5848388907999, 251.4432373248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046704_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Include the coordinates for each object you identify.", "boxes_value": [[92.9953613017, 13.335327129599989, 461.5848388907999, 64.44323732480001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046704.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, and five cars.", "boxes_value": [[182.9953613017, 200.3353271296, 551.5848388907999, 251.4432373248], [182.9953613017, 221.1471557632, 219.84051513080001, 245.3656005632], [521.8029785147, 214.239135744, 586.0200195442001, 235.3345947136], [477.1625976478, 211.1368408064, 551.5848388907999, 231.11926272], [367.2343750111, 200.3353271296, 420.98681643950005, 220.178466816], [383.699218768, 215.6572265472, 465.6966553089, 251.4432373248], [91.0914917364, 204.988342272, 438.62609860530006, 351.2625122304]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046704_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, and five cars.", "boxes_value": [[92.9953613017, 13.335327129599989, 461.5848388907999, 64.44323732480001], [92.9953613017, 34.147155763200004, 129.84051513080001, 58.36560056319999], [431.80297851470004, 27.23913574400001, 496.02001954420007, 48.33459471360001], [387.1625976478, 24.136840806399988, 461.5848388907999, 44.119262719999995], [277.2343750111, 13.335327129599989, 330.98681643950005, 33.178466816], [293.699218768, 28.657226547199997, 375.6966553089, 64.44323732480001], [1.0914917364000019, 17.98834227200001, 348.62609860530006, 77]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046705.jpg", "text": "Please interpret and describe the area inside the given picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[89.28338624999999, 111.34313962200001, 223.2980957, 189.3230590878]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046705_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[34.28338624999999, 20.34313962200001, 168.2980957, 98.3230590878]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046705.jpg", "text": "Please interpret and describe the area inside the given picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two chairs, a picture, a bowl, a cup, and a tea pot.", "boxes_value": [[89.28338624999999, 111.34313962200001, 223.2980957, 189.3230590878], [160.41143799999998, 127.44726560279999, 214.85247805, 161.6766967728], [175.56793215, 111.34313962200001, 223.2980957, 149.5272827268], [89.28338624999999, 115.1994628818, 105.43756105, 142.3262329038], [181.10614015000002, 166.0256958096, 217.7163086, 189.3230590878], [181.43896485, 149.3847046098, 201.4081421, 166.35852048419997], [97.23565674999999, 148.7747192382, 151.4852295, 198.5113525428]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046705_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two chairs, a picture, a bowl, a cup, and a tea pot.", "boxes_value": [[34.28338624999999, 20.34313962200001, 168.2980957, 98.3230590878], [105.41143799999998, 36.44726560279999, 159.85247805, 70.6766967728], [120.56793214999999, 20.34313962200001, 168.2980957, 58.527282726799996], [34.28338624999999, 24.199462881800002, 50.43756105, 51.32623290379999], [126.10614015000002, 75.0256958096, 162.7163086, 98.3230590878], [126.43896484999999, 58.384704609799996, 146.4081421, 75.35852048419997], [42.23565674999999, 57.77471923819999, 96.4852295, 107.51135254280001]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046706.jpg", "text": "Please tell me more about the rectangular section in the photo . Provide the coordinates for each element you describe.", "boxes_value": [[0, 500.7714843786, 196.2401122816, 674.6472168036]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046706_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Provide the coordinates for each element you describe.", "boxes_value": [[0, 43.77148437860001, 196.2401122816, 217.64721680360003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046706.jpg", "text": "Please tell me more about the rectangular section in the photo . Provide the coordinates for each element you describe. For your reference, objects involved in this region include five high heels.", "boxes_value": [[0, 500.7714843786, 196.2401122816, 674.6472168036], [140.5391845888, 489.5760497865, 254.0650024448, 562.7242431899], [84.9466552832, 526.4427490156, 247.0427856384, 632.9464111144], [0, 571.2615966864, 73.3037719552, 674.6472168036], [0, 500.7714843786, 64.6882934784, 590.0589599275], [109.6327514624, 508.9576415973, 196.2401122816, 559.8687744113]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046706_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Provide the coordinates for each element you describe. For your reference, objects involved in this region include five high heels.", "boxes_value": [[0, 43.77148437860001, 196.2401122816, 217.64721680360003], [140.5391845888, 32.57604978649999, 245, 105.7242431899], [84.9466552832, 69.44274901560004, 245, 175.9464111144], [0, 114.26159668640003, 73.3037719552, 217.64721680360003], [0, 43.77148437860001, 64.6882934784, 133.05895992750004], [109.6327514624, 51.95764159729998, 196.2401122816, 102.86877441130002]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046707.jpg", "text": "Please share details about the rectangular region within the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[97.0456543128, 246.8862304768, 281.34765623550004, 423.1501465088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046707_crop.jpg", "text": "Please share details about the rectangular region within the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[47.045654312799996, 44.886230476799994, 231.34765623550004, 221.1501465088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046707.jpg", "text": "Please share details about the rectangular region within the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three cabinets, a vase, a flower, a moniter, and a printer.", "boxes_value": [[97.0456543128, 246.8862304768, 281.34765623550004, 423.1501465088], [37.7651366897, 343.9244995072, 152.8250732291, 434.1578979328], [97.0456543128, 313.0916137472, 126.2763671662, 349.8657226752], [76.3012695285, 242.3720702976, 156.9239502011, 317.806213376], [224.44134523760002, 337.383361792, 280.07385256140003, 409.7056274432], [145.6286621018, 375.3988647424, 236.0314941355, 423.1501465088], [149.4668579174, 299.0562134016, 229.29760739809998, 387.4786376704], [213.5233153984, 246.8862304768, 281.34765623550004, 307.6300048896]], "boxes_seq": [[0], [0], [1, 4, 5], [2], [3], [6], [7]]}, {"image_path": "objects365_v1_00046707_crop.jpg", "text": "Please share details about the rectangular region within the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three cabinets, a vase, a flower, a moniter, and a printer.", "boxes_value": [[47.045654312799996, 44.886230476799994, 231.34765623550004, 221.1501465088], [0, 141.92449950719998, 102.82507322910001, 232.15789793279998], [47.045654312799996, 111.09161374719997, 76.2763671662, 147.8657226752], [26.3012695285, 40.372070297600004, 106.92395020110001, 115.80621337600002], [174.44134523760002, 135.38336179200002, 230.07385256140003, 207.7056274432], [95.62866210179999, 173.3988647424, 186.0314941355, 221.1501465088], [99.4668579174, 97.05621340160002, 179.29760739809998, 185.47863767040002], [163.5233153984, 44.886230476799994, 231.34765623550004, 105.6300048896]], "boxes_seq": [[0], [0], [1, 4, 5], [2], [3], [6], [7]]}, {"image_path": "objects365_v1_00046713.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[37.7623901184, 246.30810545330002, 327.745544448, 648.6717529314001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046713_crop.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[37.7623901184, 101.30810545330002, 327.745544448, 503.6717529314001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046713.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a flower, and four people.", "boxes_value": [[37.7623901184, 246.30810545330002, 327.745544448, 648.6717529314001], [298.6068115456, 528.1596679931, 327.745544448, 553.1342773275], [198.180664064, 518.7064208997, 231.8535156224, 648.6717529314001], [270.4259033088, 510.2882079834, 298.5132446208, 601.0606689736], [81.0788574208, 246.30810545330002, 101.7894897664, 294.9615478205], [37.7623901184, 305.0954590024, 55.1520385536, 344.463745109]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046713_crop.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a flower, and four people.", "boxes_value": [[37.7623901184, 101.30810545330002, 327.745544448, 503.6717529314001], [298.6068115456, 383.1596679931, 327.745544448, 408.13427732749994], [198.180664064, 373.7064208997, 231.8535156224, 503.6717529314001], [270.4259033088, 365.2882079834, 298.5132446208, 456.06066897359995], [81.0788574208, 101.30810545330002, 101.7894897664, 149.96154782050002], [37.7623901184, 160.0954590024, 55.1520385536, 199.463745109]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046714.jpg", "text": "Could you describe the content of the bbox in the image ? Give coordinates for the items you reference.", "boxes_value": [[186.771362304, 328.834350592, 342.2662353408, 356.6565551616]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046714_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Give coordinates for the items you reference.", "boxes_value": [[39.77136230400001, 7.834350592000021, 195.2662353408, 35.65655516160001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046714.jpg", "text": "Could you describe the content of the bbox in the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include three cups, and two plates.", "boxes_value": [[186.771362304, 328.834350592, 342.2662353408, 356.6565551616], [264.1297607424, 328.8919677952, 284.860595712, 356.6565551616], [315.6898193664, 337.9262695424, 335.971801728, 364.7357177856], [210.78332520959998, 328.834350592, 231.9977416704, 356.603088384], [293.3098144512, 339.09185792, 342.2662353408, 351.2144164864], [186.771362304, 337.2268676608, 236.42712399360002, 351.680664064]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046714_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include three cups, and two plates.", "boxes_value": [[39.77136230400001, 7.834350592000021, 195.2662353408, 35.65655516160001], [117.12976074239998, 7.891967795200003, 137.86059571200002, 35.65655516160001], [168.6898193664, 16.926269542400007, 188.971801728, 42], [63.78332520959998, 7.834350592000021, 84.99774167039999, 35.60308838399999], [146.3098144512, 18.091857919999995, 195.2662353408, 30.21441648640001], [39.77136230400001, 16.226867660799996, 89.42712399360002, 30.680664063999984]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046716.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Include the coordinates for each object you identify.", "boxes_value": [[64.5751952896, 124.3579101696, 402.3724365312, 759.8007812352]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046716_crop.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Include the coordinates for each object you identify.", "boxes_value": [[64.5751952896, 124.3579101696, 402.3724365312, 759.8007812352]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046716.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a carpet, four people, a high heels, and a leather shoes.", "boxes_value": [[64.5751952896, 124.3579101696, 402.3724365312, 759.8007812352], [0, 183.3220214784, 510.659667968, 257.82836912640005], [64.4458618368, 55.2186889728, 174.8035278336, 332.4328613376], [64.5751952896, 124.3579101696, 402.3724365312, 759.8007812352], [242.8262939648, 62.5975951872, 494.5206908928, 766.7966308608001], [105.6591186432, 86.75524899839999, 211.6152343552, 312.1851806976], [250.5177001984, 702.0148925952, 335.0866699264, 759.9855956736], [64.6167602688, 309.182251008, 93.9332885504, 332.1755371008]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00046716_crop.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a carpet, four people, a high heels, and a leather shoes.", "boxes_value": [[64.5751952896, 124.3579101696, 402.3724365312, 759.8007812352], [0, 183.3220214784, 486, 257.82836912640005], [64.4458618368, 55.2186889728, 174.8035278336, 332.4328613376], [64.5751952896, 124.3579101696, 402.3724365312, 759.8007812352], [242.8262939648, 62.5975951872, 486, 766.7966308608001], [105.6591186432, 86.75524899839999, 211.6152343552, 312.1851806976], [250.5177001984, 702.0148925952, 335.0866699264, 759.9855956736], [64.6167602688, 309.182251008, 93.9332885504, 332.1755371008]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00046717.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[444.4901123328, 263.2742309376, 599.2467041279999, 371.026550272]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046717_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[39.49011233279998, 27.27423093760001, 194.24670412799992, 135.026550272]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046717.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include five people.", "boxes_value": [[444.4901123328, 263.2742309376, 599.2467041279999, 371.026550272], [444.4901123328, 278.3925170688, 465.10595704319996, 354.8087768576], [456.30981442559994, 290.487182592, 490.669677696, 371.026550272], [482.6982422016, 271.2456665088, 504.96337889279994, 336.3919067136], [538.4985351936, 263.2742309376, 564.3371582208, 323.4725952], [577.5312499967999, 284.7147216896, 599.2467041279999, 340.5150756864]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046717_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include five people.", "boxes_value": [[39.49011233279998, 27.27423093760001, 194.24670412799992, 135.026550272], [39.49011233279998, 42.392517068799975, 60.105957043199965, 118.80877685759998], [51.30981442559994, 54.48718259200001, 85.66967769600001, 135.026550272], [77.69824220160001, 35.245666508800014, 99.96337889279994, 100.39190671360001], [133.4985351936, 27.27423093760001, 159.3371582208, 87.4725952], [172.53124999679994, 48.714721689600026, 194.24670412799992, 104.5150756864]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046718.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Give coordinates for the items you reference.", "boxes_value": [[312.87902828, 346.1743774208, 341.896484384, 380.50482176]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046718_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Give coordinates for the items you reference.", "boxes_value": [[7.87902828, 9.174377420799999, 36.89648438400002, 43.50482176000003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046718.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include two wine glasses, two chairs, and a desk.", "boxes_value": [[312.87902828, 346.1743774208, 341.896484384, 380.50482176], [312.87902828, 346.1743774208, 325.548583997, 376.4178466816], [327.183349584, 351.07873536, 341.896484384, 380.50482176], [302.809448214, 347.7966919168, 414.531372073, 485.300537088], [184.361816366, 365.7319335936, 367.824829099, 511.082519552], [290.10522459099997, 365.7319335936, 427.609130858, 511.829833984]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00046718_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include two wine glasses, two chairs, and a desk.", "boxes_value": [[7.87902828, 9.174377420799999, 36.89648438400002, 43.50482176000003], [7.87902828, 9.174377420799999, 20.54858399699998, 39.4178466816], [22.183349583999984, 14.078735359999996, 36.89648438400002, 43.50482176000003], [0, 10.7966919168, 44, 52], [0, 28.73193359359999, 44, 52], [0, 28.73193359359999, 44, 52]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00046719.jpg", "text": "Please interpret and describe the area inside the given picture . Please mention the objects and their locations.", "boxes_value": [[255.2745361224, 0.0062866432, 686.1612548904, 236.753417984]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046719_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Please mention the objects and their locations.", "boxes_value": [[108.27453612240001, 0.0062866432, 539.1612548904, 236.753417984]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046719.jpg", "text": "Please interpret and describe the area inside the given picture . Please mention the objects and their locations. For your reference, objects involved in this region include three lamps, two pictures, and a hat.", "boxes_value": [[255.2745361224, 0.0062866432, 686.1612548904, 236.753417984], [255.2745361224, 0.0062866432, 325.0236205716, 116.2546997248], [655.1563720698, 55.5501098496, 686.1612548904, 145.7965087744], [644.6368407936, 113.1306152448, 681.7319336157, 172.925781248], [382.4100341541, 115.7526244864, 466.3833008145, 236.753417984], [283.2290649531, 106.4957275136, 374.4755859117, 221.545654272], [206.5362548928, 168.445617664, 359.7382812708, 275.2227783168]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046719_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Please mention the objects and their locations. For your reference, objects involved in this region include three lamps, two pictures, and a hat.", "boxes_value": [[108.27453612240001, 0.0062866432, 539.1612548904, 236.753417984], [108.27453612240001, 0.0062866432, 178.02362057160002, 116.2546997248], [508.1563720698, 55.5501098496, 539.1612548904, 145.7965087744], [497.63684079359996, 113.1306152448, 534.7319336157, 172.925781248], [235.4100341541, 115.7526244864, 319.3833008145, 236.753417984], [136.22906495310002, 106.4957275136, 227.47558591170002, 221.545654272], [59.5362548928, 168.445617664, 212.73828127079997, 275.2227783168]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046720.jpg", "text": "Tell me what you see in the area within the context of the image . Give coordinates for the items you reference.", "boxes_value": [[231.8754883115, 299.276184064, 680.9730224471, 511.0421752832]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046720_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Give coordinates for the items you reference.", "boxes_value": [[112.87548831149999, 53.276184064000006, 561.9730224471, 265.0421752832]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046720.jpg", "text": "Tell me what you see in the area within the context of the image . Give coordinates for the items you reference. For your reference, objects involved in this region include a desk, three stools, and a chair.", "boxes_value": [[231.8754883115, 299.276184064, 680.9730224471, 511.0421752832], [584.3521728247, 343.234619136, 680.9730224471, 507.601196288], [231.8754883115, 306.24218752, 342.6346435852, 510.345581056], [378.1612548781, 300.6693725696, 498.67272946779997, 511.0421752832], [520.963989249, 299.276184064, 629.6333007812, 464.3700561408], [480.56127929, 460.8870849536, 641.4754638387, 511.0421752832]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046720_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Give coordinates for the items you reference. For your reference, objects involved in this region include a desk, three stools, and a chair.", "boxes_value": [[112.87548831149999, 53.276184064000006, 561.9730224471, 265.0421752832], [465.3521728247, 97.23461913599999, 561.9730224471, 261.601196288], [112.87548831149999, 60.242187520000016, 223.63464358520002, 264.345581056], [259.1612548781, 54.6693725696, 379.67272946779997, 265.0421752832], [401.96398924899995, 53.276184064000006, 510.6333007812, 218.3700561408], [361.56127929, 214.88708495359998, 522.4754638387, 265.0421752832]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046721.jpg", "text": "What is taking place within the specified area in this capture ? Please point out the objects and their coordinates.", "boxes_value": [[17.574951202399998, 159.0831909376, 130.2729492464, 400.606445312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046721_crop.jpg", "text": "What is taking place within the specified area in this capture ? Please point out the objects and their coordinates.", "boxes_value": [[17.574951202399998, 61.083190937599994, 130.2729492464, 302.606445312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046721.jpg", "text": "What is taking place within the specified area in this capture ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a couch, a carpet, a desk, a lamp, a flower, and a bowl.", "boxes_value": [[17.574951202399998, 159.0831909376, 130.2729492464, 400.606445312], [0.8181762537999999, 212.8527832064, 121.9915771476, 373.114318848], [3.0517578572, 308.8980102656, 228.646362334, 388.749633792], [79.8051757956, 224.6975097856, 130.2729492464, 303.5390624768], [17.574951202399998, 159.0831909376, 86.4445190016, 212.0086669824], [54.5563354288, 184.327941888, 107.7032470752, 214.8874511872], [18.3350830384, 361.9450683392, 72.8504638726, 400.606445312]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046721_crop.jpg", "text": "What is taking place within the specified area in this capture ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a couch, a carpet, a desk, a lamp, a flower, and a bowl.", "boxes_value": [[17.574951202399998, 61.083190937599994, 130.2729492464, 302.606445312], [0.8181762537999999, 114.8527832064, 121.9915771476, 275.114318848], [3.0517578572, 210.89801026560002, 158, 290.749633792], [79.8051757956, 126.6975097856, 130.2729492464, 205.53906247679998], [17.574951202399998, 61.083190937599994, 86.4445190016, 114.0086669824], [54.5563354288, 86.327941888, 107.7032470752, 116.88745118720001], [18.3350830384, 263.9450683392, 72.8504638726, 302.606445312]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046722.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Give coordinates for the items you reference.", "boxes_value": [[408.1928711168, 215.43072507850002, 511.9625854464, 568.2116699295]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046722_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Give coordinates for the items you reference.", "boxes_value": [[26.192871116800006, 88.43072507850002, 129.9625854464, 441.2116699295]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046722.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Give coordinates for the items you reference. For your reference, objects involved in this region include a picture, a chair, a desk, a person, and a bowl.", "boxes_value": [[408.1928711168, 215.43072507850002, 511.9625854464, 568.2116699295], [433.4788208128, 173.4667358615, 511.3255004672, 395.6146240555], [408.1928711168, 411.97583005750005, 470.9661865472, 551.4720458795], [441.6719970816, 474.7491455115, 511.42010496, 568.2116699295], [459.7135009792, 215.43072507850002, 511.9625854464, 375.790039079], [473.2711181824, 459.1038818055, 511.7122192384, 478.868408218]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046722_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Give coordinates for the items you reference. For your reference, objects involved in this region include a picture, a chair, a desk, a person, and a bowl.", "boxes_value": [[26.192871116800006, 88.43072507850002, 129.9625854464, 441.2116699295], [51.478820812799995, 46.46673586150001, 129.32550046720002, 268.6146240555], [26.192871116800006, 284.97583005750005, 88.96618654719998, 424.4720458795], [59.67199708160001, 347.7491455115, 129.42010496, 441.2116699295], [77.71350097919998, 88.43072507850002, 129.9625854464, 248.790039079], [91.27111818240002, 332.1038818055, 129.7122192384, 351.868408218]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046723.jpg", "text": "Please detail the contents of the chosen region in the visual input . Include the coordinates for each object you identify.", "boxes_value": [[173.3407592882, 178.6718139904, 600.0131836055, 334.98205568]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046723_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Include the coordinates for each object you identify.", "boxes_value": [[107.34075928819999, 39.67181399040001, 534.0131836055, 195.98205567999997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046723.jpg", "text": "Please detail the contents of the chosen region in the visual input . Include the coordinates for each object you identify. For your reference, objects involved in this region include a chair, a carpet, two cabinets, and a person.", "boxes_value": [[173.3407592882, 178.6718139904, 600.0131836055, 334.98205568], [249.49322510929997, 178.6718139904, 323.8098144553, 254.6774291968], [235.9811401654, 243.6988525568, 542.959350618, 310.8370971648], [543.8972167807, 199.6096801792, 600.0131836055, 334.98205568], [230.1011963072, 137.7503051776, 250.0608520663, 248.9048461824], [173.3407592882, 246.0119018496, 228.83319089329999, 320.2420043776]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046723_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Include the coordinates for each object you identify. For your reference, objects involved in this region include a chair, a carpet, two cabinets, and a person.", "boxes_value": [[107.34075928819999, 39.67181399040001, 534.0131836055, 195.98205567999997], [183.49322510929997, 39.67181399040001, 257.8098144553, 115.6774291968], [169.9811401654, 104.6988525568, 476.95935061800003, 171.8370971648], [477.8972167807, 60.60968017920001, 534.0131836055, 195.98205567999997], [164.1011963072, 0, 184.0608520663, 109.90484618240001], [107.34075928819999, 107.01190184960001, 162.83319089329999, 181.24200437759998]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046725.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Include the coordinates for each object you identify.", "boxes_value": [[17.0715332096, 397.6055908023, 448.2313842688, 439.6791992001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046725_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Include the coordinates for each object you identify.", "boxes_value": [[17.0715332096, 10.605590802300014, 448.2313842688, 52.679199200100015]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046725.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two potted plants, three flowers, and an umbrella.", "boxes_value": [[17.0715332096, 397.6055908023, 448.2313842688, 439.6791992001], [375.4638671872, 387.72778323359995, 420.4179687424, 434.004150381], [396.3223266816, 397.6055908023, 448.2313842688, 439.6791992001], [53.7952270336, 404.8804931367, 82.109374976, 425.2313232705], [35.214050304, 403.1108398695, 55.5648193536, 423.4616699334], [17.0715332096, 401.3659667814, 37.5487670784, 421.2744140748], [65.8421020672, 260.7954101514, 469.8676758016, 479.1258545268]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00046725_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two potted plants, three flowers, and an umbrella.", "boxes_value": [[17.0715332096, 10.605590802300014, 448.2313842688, 52.679199200100015], [375.4638671872, 0.7277832335999506, 420.4179687424, 47.00415038099999], [396.3223266816, 10.605590802300014, 448.2313842688, 52.679199200100015], [53.7952270336, 17.88049313670001, 82.109374976, 38.23132327050001], [35.214050304, 16.110839869500012, 55.5648193536, 36.4616699334], [17.0715332096, 14.365966781400004, 37.5487670784, 34.274414074800006], [65.8421020672, 0, 469.8676758016, 63]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00046727.jpg", "text": "Can you provide a description of the area in the image ? Please point out the objects and their coordinates.", "boxes_value": [[411.52221680639997, 231.6474609152, 538.0461425664, 349.7363891712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046727_crop.jpg", "text": "Can you provide a description of the area in the image ? Please point out the objects and their coordinates.", "boxes_value": [[32.52221680639997, 29.647460915200014, 159.04614256640002, 147.73638917120002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046727.jpg", "text": "Can you provide a description of the area in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, two sneakers, a bottle, and a bicycle.", "boxes_value": [[411.52221680639997, 231.6474609152, 538.0461425664, 349.7363891712], [419.08972170239997, 168.4483642368, 519.5413818624, 327.145080576], [458.137573248, 278.3190918144, 482.45666503679996, 296.2778320384], [497.0480957184, 304.1347656192, 518.3740234752, 326.5831909376], [464.18615723519997, 267.0993652224, 481.95544435200003, 290.7918701056], [411.52221680639997, 231.6474609152, 538.0461425664, 349.7363891712]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046727_crop.jpg", "text": "Can you provide a description of the area in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, two sneakers, a bottle, and a bicycle.", "boxes_value": [[32.52221680639997, 29.647460915200014, 159.04614256640002, 147.73638917120002], [40.08972170239997, 0, 140.54138186240004, 125.145080576], [79.13757324800002, 76.3190918144, 103.45666503679996, 94.27783203839999], [118.04809571840002, 102.13476561919998, 139.37402347520003, 124.58319093760002], [85.18615723519997, 65.09936522240002, 102.95544435200003, 88.79187010560003], [32.52221680639997, 29.647460915200014, 159.04614256640002, 147.73638917120002]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046730.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each object you identify.", "boxes_value": [[169.893981952, 479.31555172230003, 233.6104736256, 599.1855468846]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046730_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each object you identify.", "boxes_value": [[16.89398195199999, 30.31555172230003, 80.61047362560001, 150.18554688459994]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046730.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, two cups, a speaker, and a mouse.", "boxes_value": [[169.893981952, 479.31555172230003, 233.6104736256, 599.1855468846], [1.3208617984, 523.6135253850999, 298.2550048768, 703.7678222575], [170.7820434432, 534.2923583643, 195.3715820544, 567.6638183302], [171.1894531072, 578.9931640484, 195.9414062592, 599.1855468846], [169.893981952, 479.31555172230003, 193.5917358592, 536.7745361134], [200.3721923584, 566.3599853162, 233.6104736256, 579.1658935265999]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046730_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, two cups, a speaker, and a mouse.", "boxes_value": [[16.89398195199999, 30.31555172230003, 80.61047362560001, 150.18554688459994], [0, 74.61352538509993, 96, 180], [17.782043443199996, 85.29235836429996, 42.371582054399994, 118.6638183302], [18.189453107199995, 129.9931640484, 42.941406259199994, 150.18554688459994], [16.89398195199999, 30.31555172230003, 40.591735859200014, 87.77453611340002], [47.372192358400014, 117.35998531619998, 80.61047362560001, 130.16589352659992]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046731.jpg", "text": "Please give me some details about the rectangle in the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[144.64123534179998, 0.1511230464, 683.0131836075, 215.826477056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046731_crop.jpg", "text": "Please give me some details about the rectangle in the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[134.64123534179998, 0.1511230464, 673, 215.826477056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046731.jpg", "text": "Please give me some details about the rectangle in the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a person, three hats, and two moniters.", "boxes_value": [[144.64123534179998, 0.1511230464, 683.0131836075, 215.826477056], [643.7071532934, 21.6727294976, 661.5091552952, 84.4248047104], [272.0013427862, 189.2686767616, 304.5825805762, 215.826477056], [144.64123534179998, 117.8422851584, 210.2122192322, 186.7545165824], [201.0239257557, 162.9484863488, 251.55957030410002, 207.6370239488], [606.8154296956001, 0.1511230464, 683.0131836075, 117.6037597696], [524.6787109585999, 80.2650756608, 562.696655249, 169.2753295872]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00046731_crop.jpg", "text": "Please give me some details about the rectangle in the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a person, three hats, and two moniters.", "boxes_value": [[134.64123534179998, 0.1511230464, 673, 215.826477056], [633.7071532934, 21.6727294976, 651.5091552952, 84.4248047104], [262.0013427862, 189.2686767616, 294.5825805762, 215.826477056], [134.64123534179998, 117.8422851584, 200.2122192322, 186.7545165824], [191.0239257557, 162.9484863488, 241.55957030410002, 207.6370239488], [596.8154296956001, 0.1511230464, 673, 117.6037597696], [514.6787109585999, 80.2650756608, 552.696655249, 169.2753295872]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00046734.jpg", "text": "Could you tell me more about the area in the snapshot ? Include the coordinates for each mentioned object.", "boxes_value": [[177.2859496704, 289.7726440448, 291.06835937280005, 386.2432861184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046734_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Include the coordinates for each mentioned object.", "boxes_value": [[29.285949670399987, 24.772644044800018, 143.06835937280005, 121.24328611840002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046734.jpg", "text": "Could you tell me more about the area in the snapshot ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three chairs, a desk, a flower, and a tea pot.", "boxes_value": [[177.2859496704, 289.7726440448, 291.06835937280005, 386.2432861184], [254.4622802688, 292.6383056896, 324.8278808832, 404.4694213632], [231.5241699072, 289.7726440448, 291.06835937280005, 386.2432861184], [141.64752199679998, 295.5847778304, 212.26409909760002, 367.095214848], [135.54333496319998, 317.71246336, 337.7446288896, 384.09552], [211.90130618880002, 294.60266112, 257.0687255808, 322.4719238144], [177.2859496704, 317.7314452992, 202.1124267264, 340.8927612416]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046734_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three chairs, a desk, a flower, and a tea pot.", "boxes_value": [[29.285949670399987, 24.772644044800018, 143.06835937280005, 121.24328611840002], [106.46228026879999, 27.638305689599974, 171, 139.4694213632], [83.52416990719999, 24.772644044800018, 143.06835937280005, 121.24328611840002], [0, 30.584777830400014, 64.26409909760002, 102.09521484800001], [0, 52.712463360000015, 171, 119.09552000000002], [63.90130618880002, 29.602661119999993, 109.06872558079999, 57.471923814399986], [29.285949670399987, 52.731445299200004, 54.1124267264, 75.8927612416]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046737.jpg", "text": "What does the area within the given visual contain? Remember to mention the objects and their corresponding locations.", "boxes_value": [[55.63482665400001, 337.0093993984, 341.50085446049997, 409.732421888]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046737_crop.jpg", "text": "What does the area within the given visual contain? Remember to mention the objects and their corresponding locations.", "boxes_value": [[55.63482665400001, 19.009399398399978, 341.50085446049997, 91.73242188799998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046737.jpg", "text": "What does the area within the given visual contain? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a bench, three people, and a trash bin can.", "boxes_value": [[55.63482665400001, 337.0093993984, 341.50085446049997, 409.732421888], [287.50866701850003, 377.7276611584, 311.9398193205, 409.732421888], [171.089843769, 347.0976562688, 199.2777099525, 397.5390624768], [150.31988523, 337.0093993984, 167.5292968485, 398.1325073408], [55.63482665400001, 343.6703491072, 84.6279907425, 409.068542464], [326.1157226595, 368.9086914048, 341.50085446049997, 397.217407232]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046737_crop.jpg", "text": "What does the area within the given visual contain? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a bench, three people, and a trash bin can.", "boxes_value": [[55.63482665400001, 19.009399398399978, 341.50085446049997, 91.73242188799998], [287.50866701850003, 59.7276611584, 311.9398193205, 91.73242188799998], [171.089843769, 29.097656268799994, 199.2777099525, 79.53906247679998], [150.31988523, 19.009399398399978, 167.5292968485, 80.13250734079998], [55.63482665400001, 25.670349107200025, 84.6279907425, 91.06854246400002], [326.1157226595, 50.90869140479998, 341.50085446049997, 79.21740723200003]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046738.jpg", "text": "Please tell me about the area in the image . What does it contain? Give coordinates for the items you reference.", "boxes_value": [[296.35119632149997, 1.0364379648, 467.5101318335, 307.1158447104]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046738_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Give coordinates for the items you reference.", "boxes_value": [[43.35119632149997, 1.0364379648, 214.5101318335, 307.1158447104]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046738.jpg", "text": "Please tell me about the area in the image . What does it contain? Give coordinates for the items you reference. For your reference, objects involved in this region include a lamp, a clock, two cabinets, a sink, and a bowl.", "boxes_value": [[296.35119632149997, 1.0364379648, 467.5101318335, 307.1158447104], [355.4172363455, 1.0364379648, 415.792358409, 159.6273193472], [425.47338867650006, 88.583984384, 467.5101318335, 133.0304565248], [288.204589808, 93.8809814528, 374.07031249000005, 227.1055908352], [296.35119632149997, 279.366577152, 375.8341064715, 297.5133666816], [375.9572753845, 275.4934082048, 471.33435056100006, 303.7034912256], [364.5196533485, 287.0099487232, 399.74755862899997, 307.1158447104]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046738_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Give coordinates for the items you reference. For your reference, objects involved in this region include a lamp, a clock, two cabinets, a sink, and a bowl.", "boxes_value": [[43.35119632149997, 1.0364379648, 214.5101318335, 307.1158447104], [102.41723634549999, 1.0364379648, 162.79235840899997, 159.6273193472], [172.47338867650006, 88.583984384, 214.5101318335, 133.0304565248], [35.20458980799998, 93.8809814528, 121.07031249000005, 227.1055908352], [43.35119632149997, 279.366577152, 122.83410647149998, 297.5133666816], [122.95727538450001, 275.4934082048, 218.33435056100006, 303.7034912256], [111.51965334850001, 287.0099487232, 146.74755862899997, 307.1158447104]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046741.jpg", "text": "I need details about the area located within image . Give coordinates for the items you reference.", "boxes_value": [[312.5224609528, 32.068542464, 692.072998042, 379.9960937472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046741_crop.jpg", "text": "I need details about the area located within image . Give coordinates for the items you reference.", "boxes_value": [[95.52246095279997, 32.068542464, 475, 379.9960937472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046741.jpg", "text": "I need details about the area located within image . Give coordinates for the items you reference. For your reference, objects involved in this region include four people, and a sneakers.", "boxes_value": [[312.5224609528, 32.068542464, 692.072998042, 379.9960937472], [312.5224609528, 112.0139160064, 439.2539062292, 363.1436767744], [187.03234866600002, 156.8477172736, 422.216674822, 368.5136108544], [596.3280029480001, 32.068542464, 691.9714355548, 314.7614746112], [612.1049804940001, 68.6139526144, 692.072998042, 379.9960937472], [398.7312011976, 337.0336303616, 433.947631822, 362.7472534016]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046741_crop.jpg", "text": "I need details about the area located within image . Give coordinates for the items you reference. For your reference, objects involved in this region include four people, and a sneakers.", "boxes_value": [[95.52246095279997, 32.068542464, 475, 379.9960937472], [95.52246095279997, 112.0139160064, 222.25390622920003, 363.1436767744], [0, 156.8477172736, 205.21667482200002, 368.5136108544], [379.32800294800006, 32.068542464, 474.9714355548, 314.7614746112], [395.1049804940001, 68.6139526144, 475, 379.9960937472], [181.7312011976, 337.0336303616, 216.947631822, 362.7472534016]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046742.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[474.475463876, 86.5292358144, 728.9387206826999, 442.7616577024]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046742_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[64.47546387599999, 86.5292358144, 318.9387206826999, 442.7616577024]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046742.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a handbag, a car, three street lights, a fire hydrant, and a traffic light.", "boxes_value": [[474.475463876, 86.5292358144, 728.9387206826999, 442.7616577024], [462.2753676955, 355.2806607872, 486.633787592, 392.8504270336], [470.837768589, 354.4240722432, 495.95312502869996, 379.3784790016], [451.28430177399997, 186.6578979328, 545.7458495748, 389.7217406976], [474.475463876, 133.4879760896, 600.6126708966, 404.9939575296], [532.0684814781, 3.9492797952, 744.4959716453, 445.6786499072], [714.7969970883, 399.8541259776, 728.9387206826999, 442.7616577024], [605.2468261559001, 86.5292358144, 654.017578155, 161.7173462016]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00046742_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a handbag, a car, three street lights, a fire hydrant, and a traffic light.", "boxes_value": [[64.47546387599999, 86.5292358144, 318.9387206826999, 442.7616577024], [52.275367695499995, 355.2806607872, 76.63378759199998, 392.8504270336], [60.83776858900001, 354.4240722432, 85.95312502869996, 379.3784790016], [41.28430177399997, 186.6578979328, 135.7458495748, 389.7217406976], [64.47546387599999, 133.4879760896, 190.61267089659998, 404.9939575296], [122.06848147810001, 3.9492797952, 334.49597164529996, 445.6786499072], [304.79699708830003, 399.8541259776, 318.9387206826999, 442.7616577024], [195.24682615590007, 86.5292358144, 244.01757815500002, 161.7173462016]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00046744.jpg", "text": "Please provide details for the area within the bounding box in . Provide the coordinates for each element you describe.", "boxes_value": [[39.8248291064, 107.5218505728, 275.7749023492, 361.951721216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046744_crop.jpg", "text": "Please provide details for the area within the bounding box in . Provide the coordinates for each element you describe.", "boxes_value": [[39.8248291064, 64.5218505728, 275.7749023492, 318.951721216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046744.jpg", "text": "Please provide details for the area within the bounding box in . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, a leather shoes, a suv, two street lights, and a car.", "boxes_value": [[39.8248291064, 107.5218505728, 275.7749023492, 361.951721216], [0.08074950160000001, 140.3067627008, 86.2479248428, 379.351257344], [62.832336409599996, 347.0335083008, 83.45458983159999, 361.951721216], [39.8248291064, 141.8217773568, 152.7858886584, 213.5294189568], [153.6861572304, 107.5218505728, 168.0759888008, 143.7212524544], [215.51745606240002, 172.0512695296, 275.7749023492, 252.3195190272], [250.6295165988, 128.7914428928, 261.6770019612, 161.5394287104]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 6], [5]]}, {"image_path": "objects365_v1_00046744_crop.jpg", "text": "Please provide details for the area within the bounding box in . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, a leather shoes, a suv, two street lights, and a car.", "boxes_value": [[39.8248291064, 64.5218505728, 275.7749023492, 318.951721216], [0.08074950160000001, 97.3067627008, 86.2479248428, 336.351257344], [62.832336409599996, 304.0335083008, 83.45458983159999, 318.951721216], [39.8248291064, 98.8217773568, 152.7858886584, 170.5294189568], [153.6861572304, 64.5218505728, 168.0759888008, 100.7212524544], [215.51745606240002, 129.0512695296, 275.7749023492, 209.3195190272], [250.6295165988, 85.7914428928, 261.6770019612, 118.53942871039999]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 6], [5]]}, {"image_path": "objects365_v1_00046745.jpg", "text": "I would like a description of the content within the bbox in . Please mention the objects and their locations.", "boxes_value": [[214.23651124679998, 65.7601318152, 587.0424804397001, 158.977966302]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046745_crop.jpg", "text": "I would like a description of the content within the bbox in . Please mention the objects and their locations.", "boxes_value": [[93.23651124679998, 23.760131815199998, 466.04248043970006, 116.977966302]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046745.jpg", "text": "I would like a description of the content within the bbox in . Please mention the objects and their locations. For your reference, objects involved in this region include two lamps, two umbrellas, and a hat.", "boxes_value": [[214.23651124679998, 65.7601318152, 587.0424804397001, 158.977966302], [558.4376220705, 65.7601318152, 587.0424804397001, 123.46307375039999], [446.9772949418, 88.9399414242, 468.6774902525, 127.90173337739999], [269.1939697626, 69.17626955520001, 481.1188965081, 147.4707031488], [214.23651124679998, 84.1094970768, 309.3864746334, 152.8769531034], [392.5771484137, 144.7842407058, 415.0124511403, 158.977966302]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046745_crop.jpg", "text": "I would like a description of the content within the bbox in . Please mention the objects and their locations. For your reference, objects involved in this region include two lamps, two umbrellas, and a hat.", "boxes_value": [[93.23651124679998, 23.760131815199998, 466.04248043970006, 116.977966302], [437.43762207049997, 23.760131815199998, 466.04248043970006, 81.46307375039999], [325.9772949418, 46.9399414242, 347.6774902525, 85.90173337739999], [148.19396976259998, 27.176269555200008, 360.1188965081, 105.4707031488], [93.23651124679998, 42.1094970768, 188.38647463339998, 110.87695310340001], [271.5771484137, 102.78424070579999, 294.0124511403, 116.977966302]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046746.jpg", "text": "What can be observed in the rectangular region in the photograph ? Provide the coordinates for each element you describe.", "boxes_value": [[144.3236084224, 491.82116697600003, 434.2246704128, 627.8986816512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046746_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Provide the coordinates for each element you describe.", "boxes_value": [[73.32360842240001, 34.82116697600003, 363.2246704128, 170.89868165120004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046746.jpg", "text": "What can be observed in the rectangular region in the photograph ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include four chairs, and a desk.", "boxes_value": [[144.3236084224, 491.82116697600003, 434.2246704128, 627.8986816512], [144.3236084224, 495.2052002304, 199.0325317632, 619.8514404096], [170.268005376, 514.3815918335999, 274.0458374144, 646.923950208], [344.5471191552, 496.3332519168, 434.2246704128, 627.7475586048], [264.457702656, 491.82116697600003, 321.4226684416, 609.6992187648], [189.7263794176, 507.039428736, 308.1684570112, 627.8986816512]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046746_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include four chairs, and a desk.", "boxes_value": [[73.32360842240001, 34.82116697600003, 363.2246704128, 170.89868165120004], [73.32360842240001, 38.20520023040001, 128.0325317632, 162.85144040959995], [99.26800537599999, 57.38159183359994, 203.0458374144, 189.92395020799995], [273.5471191552, 39.33325191680001, 363.2246704128, 170.74755860480002], [193.45770265599998, 34.82116697600003, 250.42266844160002, 152.69921876479998], [118.72637941759999, 50.03942873599999, 237.16845701120002, 170.89868165120004]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046748.jpg", "text": "Can you divulge the contents of the area within the given image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[52.1027832, 0, 539.1927490300001, 156.0913696256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046748_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[52.1027832, 0, 539.1927490300001, 156.0913696256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046748.jpg", "text": "Can you divulge the contents of the area within the given image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a flower, and four lamps.", "boxes_value": [[52.1027832, 0, 539.1927490300001, 156.0913696256], [327.57873538, 105.765808128, 424.99462889, 156.0913696256], [52.1027832, 0.5522461184, 69.77661132, 87.1538696192], [213.994812005, 0, 234.14294436, 89.3764038144], [394.58264157, 0, 409.46704103499997, 81.5999145472], [521.51892091, 0.8663330304, 539.1927490300001, 83.72076416]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046748_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a flower, and four lamps.", "boxes_value": [[52.1027832, 0, 539.1927490300001, 156.0913696256], [327.57873538, 105.765808128, 424.99462889, 156.0913696256], [52.1027832, 0.5522461184, 69.77661132, 87.1538696192], [213.994812005, 0, 234.14294436, 89.3764038144], [394.58264157, 0, 409.46704103499997, 81.5999145472], [521.51892091, 0.8663330304, 539.1927490300001, 83.72076416]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046750.jpg", "text": "Help me understand what's happening in the selected bounding box within . Include the coordinates for each object you identify.", "boxes_value": [[602.4676513785, 300.4695434752, 752.8159180116, 365.9800415232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046750_crop.jpg", "text": "Help me understand what's happening in the selected bounding box within . Include the coordinates for each object you identify.", "boxes_value": [[38.46765137850002, 16.4695434752, 188.81591801160005, 81.98004152319999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046750.jpg", "text": "Help me understand what's happening in the selected bounding box within . Include the coordinates for each object you identify. For your reference, objects involved in this region include three microphones, four chairs, and a desk.", "boxes_value": [[602.4676513785, 300.4695434752, 752.8159180116, 365.9800415232], [699.4232177859001, 301.8988647424, 728.3955078122999, 365.9800415232], [639.3918456978, 302.8517455872, 664.6430664420001, 353.5410766848], [602.4676513785, 300.4695434752, 624.3839110938001, 336.440795904], [708.4779053247, 329.1342162944, 752.8159180116, 357.3889160192], [669.7906494624, 326.091430656, 706.7391357248999, 349.5645751808], [635.8850097393, 322.6139526144, 667.1824951668, 343.4789428736], [608.0650635114, 317.8323974656, 636.3197021912999, 336.9586181632], [518.5195312284, 329.1342162944, 877.5712890666, 467.79949952]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6, 7], [8]]}, {"image_path": "objects365_v1_00046750_crop.jpg", "text": "Help me understand what's happening in the selected bounding box within . Include the coordinates for each object you identify. For your reference, objects involved in this region include three microphones, four chairs, and a desk.", "boxes_value": [[38.46765137850002, 16.4695434752, 188.81591801160005, 81.98004152319999], [135.42321778590008, 17.898864742400008, 164.3955078122999, 81.98004152319999], [75.39184569780002, 18.851745587200014, 100.64306644200008, 69.54107668479998], [38.46765137850002, 16.4695434752, 60.383911093800066, 52.440795904000026], [144.47790532470003, 45.134216294400005, 188.81591801160005, 73.3889160192], [105.79064946239998, 42.091430656, 142.73913572489994, 65.56457518079998], [71.88500973930002, 38.61395261439998, 103.18249516679998, 59.478942873599976], [44.06506351140001, 33.832397465600025, 72.31970219129994, 52.958618163200015], [0, 45.134216294400005, 226, 98]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6, 7], [8]]}, {"image_path": "objects365_v1_00046751.jpg", "text": "Help me understand the details within the area in photograph . Include the coordinates for each mentioned object.", "boxes_value": [[35.991760277, 40.6310424576, 248.17474368100002, 193.188110336]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046751_crop.jpg", "text": "Help me understand the details within the area in photograph . Include the coordinates for each mentioned object.", "boxes_value": [[35.991760277, 38.6310424576, 248.17474368100002, 191.188110336]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046751.jpg", "text": "Help me understand the details within the area in photograph . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, a hat, two glasses, and a handbag.", "boxes_value": [[35.991760277, 40.6310424576, 248.17474368100002, 193.188110336], [0, 48.3888549888, 191.40893552199998, 512.0942382592], [35.991760277, 40.6310424576, 119.849426273, 75.8588867072], [177.16864015700003, 127.0394287104, 212.052673313, 138.2922973696], [203.083496101, 127.349365248, 248.17474368100002, 193.188110336], [56.155700677000006, 69.6103515648, 101.909240751, 84.1175537152]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4]]}, {"image_path": "objects365_v1_00046751_crop.jpg", "text": "Help me understand the details within the area in photograph . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, a hat, two glasses, and a handbag.", "boxes_value": [[35.991760277, 38.6310424576, 248.17474368100002, 191.188110336], [0, 46.3888549888, 191.40893552199998, 229], [35.991760277, 38.6310424576, 119.849426273, 73.8588867072], [177.16864015700003, 125.0394287104, 212.052673313, 136.2922973696], [203.083496101, 125.349365248, 248.17474368100002, 191.188110336], [56.155700677000006, 67.6103515648, 101.909240751, 82.1175537152]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4]]}, {"image_path": "objects365_v1_00046754.jpg", "text": "Offer a thorough description of the area within the illustration . Include the coordinates for each mentioned object.", "boxes_value": [[550.5294189834, 195.4404907008, 640.9006347492, 485.2706909184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046754_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Include the coordinates for each mentioned object.", "boxes_value": [[23.529418983399978, 73.44049070080001, 113.90063474919998, 363.2706909184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046754.jpg", "text": "Offer a thorough description of the area within the illustration . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a bench, a handbag, a street lights, and two speakers.", "boxes_value": [[550.5294189834, 195.4404907008, 640.9006347492, 485.2706909184], [555.432617187, 461.3792114176, 590.0147705448, 489.1741332992], [579.8167724754, 439.6620483584, 626.8570556832, 485.2706909184], [508.20849613080003, 0.104125952, 665.4461670252, 489.6834106368], [606.9366454697999, 195.4404907008, 640.9006347492, 241.672363264], [550.5294189834, 198.2374267392, 583.434570303, 243.3175659008]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046754_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a bench, a handbag, a street lights, and two speakers.", "boxes_value": [[23.529418983399978, 73.44049070080001, 113.90063474919998, 363.2706909184], [28.432617187000005, 339.3792114176, 63.014770544799944, 367.1741332992], [52.816772475399944, 317.6620483584, 99.85705568319997, 363.2706909184], [0, 0, 136, 367.6834106368], [79.93664546979994, 73.44049070080001, 113.90063474919998, 119.67236326400001], [23.529418983399978, 76.2374267392, 56.43457030299999, 121.31756590079999]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046755.jpg", "text": "In the submitted image , please give a synopsis of the area . Please point out the objects and their coordinates.", "boxes_value": [[103.7670898688, 311.10742188899997, 272.4235229696, 481.5246582036]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046755_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Please point out the objects and their coordinates.", "boxes_value": [[42.7670898688, 43.107421888999966, 211.42352296960001, 213.52465820359998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046755.jpg", "text": "In the submitted image , please give a synopsis of the area . Please point out the objects and their coordinates. For your reference, objects involved in this region include a picture, a microwave, a cutting, a wine glass, and a bakset.", "boxes_value": [[103.7670898688, 311.10742188899997, 272.4235229696, 481.5246582036], [230.1616821248, 391.9162597632, 272.4235229696, 467.87707518479993], [186.9421386752, 311.10742188899997, 266.3841552896, 367.158203109], [191.1022949376, 462.39367675140005, 272.165649408, 481.5246582036], [159.9445190656, 445.0753173852, 194.010925312, 467.4686279544], [103.7670898688, 451.9298096046, 170.7713012736, 466.406127891]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046755_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Please point out the objects and their coordinates. For your reference, objects involved in this region include a picture, a microwave, a cutting, a wine glass, and a bakset.", "boxes_value": [[42.7670898688, 43.107421888999966, 211.42352296960001, 213.52465820359998], [169.1616821248, 123.9162597632, 211.42352296960001, 199.87707518479993], [125.9421386752, 43.107421888999966, 205.38415528960002, 99.158203109], [130.1022949376, 194.39367675140005, 211.16564940799998, 213.52465820359998], [98.94451906559999, 177.07531738519998, 133.010925312, 199.4686279544], [42.7670898688, 183.9298096046, 109.77130127359999, 198.40612789099998]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046756.jpg", "text": "Please enlighten me about the area in the photograph . Give coordinates for the items you reference.", "boxes_value": [[54.1903686656, 342.9448241927, 285.7225952256, 411.621337888]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046756_crop.jpg", "text": "Please enlighten me about the area in the photograph . Give coordinates for the items you reference.", "boxes_value": [[54.1903686656, 17.944824192700025, 285.7225952256, 86.62133788800003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046756.jpg", "text": "Please enlighten me about the area in the photograph . Give coordinates for the items you reference. For your reference, objects involved in this region include a bed, two lamps, and two pillows.", "boxes_value": [[54.1903686656, 342.9448241927, 285.7225952256, 411.621337888], [56.4294433792, 356.4937744462, 438.9755249152, 581.0783691498], [54.1903686656, 345.2371825893, 87.0031738368, 368.24877932400005], [264.5373535232, 342.9448241927, 285.7225952256, 370.3395996145], [76.2516479488, 388.54187014160004, 175.999572736, 414.633300813], [185.5168457216, 389.0593261841, 276.1607666176, 411.621337888]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046756_crop.jpg", "text": "Please enlighten me about the area in the photograph . Give coordinates for the items you reference. For your reference, objects involved in this region include a bed, two lamps, and two pillows.", "boxes_value": [[54.1903686656, 17.944824192700025, 285.7225952256, 86.62133788800003], [56.4294433792, 31.493774446200007, 343, 103], [54.1903686656, 20.237182589300005, 87.0031738368, 43.248779324000054], [264.5373535232, 17.944824192700025, 285.7225952256, 45.3395996145], [76.2516479488, 63.54187014160004, 175.999572736, 89.633300813], [185.5168457216, 64.05932618409997, 276.1607666176, 86.62133788800003]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046757.jpg", "text": "I'd like a thorough description of the area in the image . Include the coordinates for each object you identify.", "boxes_value": [[444.2813720662, 312.4173583872, 644.3461913837, 467.0501098496]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046757_crop.jpg", "text": "I'd like a thorough description of the area in the image . Include the coordinates for each object you identify.", "boxes_value": [[50.28137206619999, 39.41735838720001, 250.3461913837, 194.0501098496]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046757.jpg", "text": "I'd like a thorough description of the area in the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include two chairs, a desk, and two napkins.", "boxes_value": [[444.2813720662, 312.4173583872, 644.3461913837, 467.0501098496], [493.66149904680003, 312.4173583872, 644.3461913837, 467.0501098496], [433.78247070929996, 327.5516357632, 589.7312012045, 511.1368408064], [335.2007446155, 300.6151122944, 568.0961913868, 473.9166870016], [444.2813720662, 328.9116210688, 479.9405517467, 341.2772827136], [504.9595947542, 316.2583007744, 535.7301025582, 326.8985595904]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046757_crop.jpg", "text": "I'd like a thorough description of the area in the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include two chairs, a desk, and two napkins.", "boxes_value": [[50.28137206619999, 39.41735838720001, 250.3461913837, 194.0501098496], [99.66149904680003, 39.41735838720001, 250.3461913837, 194.0501098496], [39.78247070929996, 54.55163576320001, 195.73120120450005, 232], [0, 27.615112294399978, 174.0961913868, 200.9166870016], [50.28137206619999, 55.9116210688, 85.94055174670001, 68.27728271360002], [110.9595947542, 43.25830077440003, 141.73010255819997, 53.8985595904]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046760.jpg", "text": "Please provide details for the area within the bounding box in . Please point out the objects and their coordinates.", "boxes_value": [[35.274841305799995, 210.4765624832, 200.01940917459999, 335.275878912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046760_crop.jpg", "text": "Please provide details for the area within the bounding box in . Please point out the objects and their coordinates.", "boxes_value": [[35.274841305799995, 31.476562483200013, 200.01940917459999, 156.275878912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046760.jpg", "text": "Please provide details for the area within the bounding box in . Please point out the objects and their coordinates. For your reference, objects involved in this region include a piano, a drum, a person, and three speakers.", "boxes_value": [[35.274841305799995, 210.4765624832, 200.01940917459999, 335.275878912], [34.704162568200005, 240.9249267712, 124.8692627236, 255.7622070272], [35.274841305799995, 264.3221435392, 77.50402829389999, 299.1327514624], [154.7824707034, 210.4765624832, 200.01940917459999, 320.8273315328], [33.7117309891, 297.266540544, 88.52075197459999, 330.0259399168], [103.8505248987, 300.4165039104, 151.9396362637, 335.275878912], [139.1578369124, 267.5717163008, 184.7872314182, 320.3259887616]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046760_crop.jpg", "text": "Please provide details for the area within the bounding box in . Please point out the objects and their coordinates. For your reference, objects involved in this region include a piano, a drum, a person, and three speakers.", "boxes_value": [[35.274841305799995, 31.476562483200013, 200.01940917459999, 156.275878912], [34.704162568200005, 61.924926771200006, 124.8692627236, 76.76220702719999], [35.274841305799995, 85.3221435392, 77.50402829389999, 120.1327514624], [154.7824707034, 31.476562483200013, 200.01940917459999, 141.82733153279997], [33.7117309891, 118.26654054400001, 88.52075197459999, 151.02593991679998], [103.8505248987, 121.41650391040002, 151.9396362637, 156.275878912], [139.1578369124, 88.5717163008, 184.7872314182, 141.3259887616]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046761.jpg", "text": "For the image , can you assess and describe what's happening at ? Provide the coordinates for all objects that you mention.", "boxes_value": [[211.30378809479998, 305.0296733184, 663.7263654186, 459.0572091392]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046761_crop.jpg", "text": "For the image , can you assess and describe what's happening at ? Provide the coordinates for all objects that you mention.", "boxes_value": [[113.30378809479998, 39.0296733184, 565.7263654186, 193.05720913919998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046761.jpg", "text": "For the image , can you assess and describe what's happening at ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a sneakers, and four pigeons.", "boxes_value": [[211.30378809479998, 305.0296733184, 663.7263654186, 459.0572091392], [388.70715328560004, 386.7005615104, 409.35144042, 408.9328613376], [627.606193023, 403.8145926144, 663.7263654186, 459.0572091392], [274.50126905760004, 361.958998528, 329.514903513, 433.3403813888], [211.30378809479998, 382.4186146304, 272.6826364746, 451.5267067904], [418.3490649546, 305.0296733184, 463.3798715604, 342.1496625152]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046761_crop.jpg", "text": "For the image , can you assess and describe what's happening at ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a sneakers, and four pigeons.", "boxes_value": [[113.30378809479998, 39.0296733184, 565.7263654186, 193.05720913919998], [290.70715328560004, 120.70056151040001, 311.35144042, 142.9328613376], [529.606193023, 137.8145926144, 565.7263654186, 193.05720913919998], [176.50126905760004, 95.958998528, 231.51490351299998, 167.34038138879998], [113.30378809479998, 116.41861463039999, 174.68263647459997, 185.5267067904], [320.3490649546, 39.0296733184, 365.3798715604, 76.14966251520002]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046763.jpg", "text": "I request a description of the area in the picture . Please mention the objects and their locations.", "boxes_value": [[0.1658325036, 10.6010131968, 205.0510864488, 355.1814575104]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046763_crop.jpg", "text": "I request a description of the area in the picture . Please mention the objects and their locations.", "boxes_value": [[0.1658325036, 10.6010131968, 205.0510864488, 355.1814575104]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046763.jpg", "text": "I request a description of the area in the picture . Please mention the objects and their locations. For your reference, objects involved in this region include an american football, three helmets, and a gloves.", "boxes_value": [[0.1658325036, 10.6010131968, 205.0510864488, 355.1814575104], [149.2053222648, 274.9926757888, 205.0510864488, 355.1814575104], [0.1658325036, 10.6010131968, 36.6425170608, 83.554382336], [32.7670898124, 264.9519043072, 109.7216186208, 348.427978496], [146.89453126200002, 129.303283712, 244.71807861960002, 237.5613403136], [177.64898678400002, 275.1503296, 214.3091430984, 323.0330200064]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046763_crop.jpg", "text": "I request a description of the area in the picture . Please mention the objects and their locations. For your reference, objects involved in this region include an american football, three helmets, and a gloves.", "boxes_value": [[0.1658325036, 10.6010131968, 205.0510864488, 355.1814575104], [149.2053222648, 274.9926757888, 205.0510864488, 355.1814575104], [0.1658325036, 10.6010131968, 36.6425170608, 83.554382336], [32.7670898124, 264.9519043072, 109.7216186208, 348.427978496], [146.89453126200002, 129.303283712, 244.71807861960002, 237.5613403136], [177.64898678400002, 275.1503296, 214.3091430984, 323.0330200064]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046764.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for each element you describe.", "boxes_value": [[286.6009521724, 28.4747924992, 422.042114258, 196.2266845696]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046764_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for each element you describe.", "boxes_value": [[34.600952172400014, 28.4747924992, 170.04211425800003, 196.2266845696]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046764.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include three chairs, a desk, a head phone, and a tripod.", "boxes_value": [[286.6009521724, 28.4747924992, 422.042114258, 196.2266845696], [385.1138916064, 98.6178588672, 428.5540771552, 169.9042968576], [212.96710202959997, 91.5596923904, 363.5415038936, 169.3018798592], [356.9947509708, 94.0147094528, 365.1782226376, 138.205017088], [286.6009521724, 28.4747924992, 320.226806636, 85.290893568], [307.60510256879996, 171.7209472512, 329.0476074576, 196.2266845696], [368.0832519212, 103.181823744, 422.042114258, 177.1826782208]], "boxes_seq": [[0], [0], [1, 3, 4], [2], [5], [6]]}, {"image_path": "objects365_v1_00046764_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include three chairs, a desk, a head phone, and a tripod.", "boxes_value": [[34.600952172400014, 28.4747924992, 170.04211425800003, 196.2266845696], [133.1138916064, 98.6178588672, 176.5540771552, 169.9042968576], [0, 91.5596923904, 111.54150389360001, 169.3018798592], [104.9947509708, 94.0147094528, 113.17822263760002, 138.205017088], [34.600952172400014, 28.4747924992, 68.22680663599999, 85.290893568], [55.605102568799964, 171.7209472512, 77.04760745760001, 196.2266845696], [116.08325192119997, 103.181823744, 170.04211425800003, 177.1826782208]], "boxes_seq": [[0], [0], [1, 3, 4], [2], [5], [6]]}, {"image_path": "objects365_v1_00046765.jpg", "text": "Share some details about the objects or environment within the bounding box in . Provide the coordinates for each element you describe.", "boxes_value": [[317.8886719005, 230.0048827904, 396.1340331754, 416.259704576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046765_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Provide the coordinates for each element you describe.", "boxes_value": [[19.888671900500015, 47.00488279039999, 98.13403317540002, 233.259704576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046765.jpg", "text": "Share some details about the objects or environment within the bounding box in . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, a glasses, a helmet, a gloves, and a sneakers.", "boxes_value": [[317.8886719005, 230.0048827904, 396.1340331754, 416.259704576], [271.51477051269995, 230.2030639616, 461.6051025667, 424.589233408], [317.8886719005, 246.8813476352, 356.8579101336, 267.133117696], [319.1160278181, 230.0048827904, 359.00585934879996, 269.2810058752], [366.6770019201, 317.7625122304, 396.1340331754, 346.9127807488], [340.595214874, 396.9284667904, 380.1781006041, 416.259704576]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046765_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, a glasses, a helmet, a gloves, and a sneakers.", "boxes_value": [[19.888671900500015, 47.00488279039999, 98.13403317540002, 233.259704576], [0, 47.20306396160001, 117, 241.58923340799998], [19.888671900500015, 63.8813476352, 58.8579101336, 84.133117696], [21.116027818100008, 47.00488279039999, 61.00585934879996, 86.28100587519998], [68.6770019201, 134.76251223039998, 98.13403317540002, 163.9127807488], [42.59521487400002, 213.92846679040002, 82.1781006041, 233.259704576]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046766.jpg", "text": "What does the area look like in the context of the image ? Please mention the objects and their locations.", "boxes_value": [[201.049804674, 346.4687499776, 386.3013916168, 489.7500939264]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046766_crop.jpg", "text": "What does the area look like in the context of the image ? Please mention the objects and their locations.", "boxes_value": [[47.049804674, 36.4687499776, 232.3013916168, 179.75009392639998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046766.jpg", "text": "What does the area look like in the context of the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a carpet, two backpacks, a sneakers, two chairs, and a desk.", "boxes_value": [[201.049804674, 346.4687499776, 386.3013916168, 489.7500939264], [0.3050537068, 327.7093506048, 690.8106689632, 510.1609497088], [237.93681314239998, 424.7706967552, 292.39573653080004, 489.7500939264], [366.89221612200004, 418.667702784, 387.3446410772, 441.473061632], [224.636875082, 347.6654744064, 272.1569466724, 403.7284801536], [201.049804674, 346.4687499776, 283.6242675444, 463.0815429632], [267.23547361280004, 372.942993152, 368.08984373280003, 506.574951168], [264.1715088208, 355.2305908224, 386.3013916168, 458.6818847744]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5, 6], [7]]}, {"image_path": "objects365_v1_00046766_crop.jpg", "text": "What does the area look like in the context of the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a carpet, two backpacks, a sneakers, two chairs, and a desk.", "boxes_value": [[47.049804674, 36.4687499776, 232.3013916168, 179.75009392639998], [0, 17.709350604800022, 278, 200.1609497088], [83.93681314239998, 114.77069675519999, 138.39573653080004, 179.75009392639998], [212.89221612200004, 108.66770278400003, 233.34464107719998, 131.473061632], [70.63687508199999, 37.665474406399994, 118.15694667240001, 93.7284801536], [47.049804674, 36.4687499776, 129.62426754440003, 153.08154296319998], [113.23547361280004, 62.942993151999985, 214.08984373280003, 196.57495116799998], [110.1715088208, 45.23059082240002, 232.3013916168, 148.68188477439998]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5, 6], [7]]}, {"image_path": "objects365_v1_00046767.jpg", "text": "Explain the content within the rectangular region of the image . Include the coordinates for each mentioned object.", "boxes_value": [[361.30383302, 256.779541028, 521.099731462, 315.63525388510004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046767_crop.jpg", "text": "Explain the content within the rectangular region of the image . Include the coordinates for each mentioned object.", "boxes_value": [[40.30383302000001, 14.779541027999983, 200.09973146200002, 73.63525388510004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046767.jpg", "text": "Explain the content within the rectangular region of the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a desk, a cup, a plate, a bowl, and a bread.", "boxes_value": [[361.30383302, 256.779541028, 521.099731462, 315.63525388510004], [336.34851073, 243.5445556627, 653.6135253919999, 344.84674072350003], [361.30383302, 260.39257814810003, 397.53051755399997, 306.6881713679], [438.537353522, 289.7758788882, 521.099731462, 315.63525388510004], [473.748046842, 256.779541028, 504.70886229399997, 273.7730712802], [452.860839846, 293.5509033354, 496.168579126, 311.4097900293]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046767_crop.jpg", "text": "Explain the content within the rectangular region of the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a desk, a cup, a plate, a bowl, and a bread.", "boxes_value": [[40.30383302000001, 14.779541027999983, 200.09973146200002, 73.63525388510004], [15.348510729999987, 1.5445556626999917, 240, 88], [40.30383302000001, 18.39257814810003, 76.53051755399997, 64.6881713679], [117.53735352199999, 47.7758788882, 200.09973146200002, 73.63525388510004], [152.748046842, 14.779541027999983, 183.70886229399997, 31.773071280199986], [131.86083984599998, 51.550903335399994, 175.168579126, 69.40979002929998]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046769.jpg", "text": "I'd like a thorough description of the area in the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[284.6276855808, 208.2424251392, 375.1633300992, 358.6677245952]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046769_crop.jpg", "text": "I'd like a thorough description of the area in the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[23.627685580800005, 38.24242513920001, 114.16333009919998, 188.6677245952]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046769.jpg", "text": "I'd like a thorough description of the area in the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, a watch, a belt, and two bottles.", "boxes_value": [[284.6276855808, 208.2424251392, 375.1633300992, 358.6677245952], [140.4404296704, 144.4837646336, 557.7111816192, 325.3934936576], [284.6276855808, 218.4647826944, 305.2054443264, 233.4303588864], [326.501733888, 208.2424251392, 340.4247209472, 257.7057134592], [366.15747072, 321.4894409216, 375.1633300992, 356.5894164992], [357.1514892288, 319.1802368, 369.8521728768, 358.6677245952]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046769_crop.jpg", "text": "I'd like a thorough description of the area in the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, a watch, a belt, and two bottles.", "boxes_value": [[23.627685580800005, 38.24242513920001, 114.16333009919998, 188.6677245952], [0, 0, 136, 155.3934936576], [23.627685580800005, 48.464782694399986, 44.20544432640003, 63.4303588864], [65.50173388799999, 38.24242513920001, 79.4247209472, 87.70571345920001], [105.15747071999999, 151.48944092160002, 114.16333009919998, 186.5894164992], [96.15148922880002, 149.1802368, 108.85217287680001, 188.6677245952]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046770.jpg", "text": "Fill me in on the details of the rectangular box within the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[92.46948239289999, 372.4826660352, 206.1428833079, 487.5988158976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046770_crop.jpg", "text": "Fill me in on the details of the rectangular box within the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[28.46948239289999, 29.482666035199998, 142.1428833079, 144.5988158976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046770.jpg", "text": "Fill me in on the details of the rectangular box within the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include five laptops.", "boxes_value": [[92.46948239289999, 372.4826660352, 206.1428833079, 487.5988158976], [92.46948239289999, 459.983947776, 119.4539184825, 487.5988158976], [177.5968627919, 425.7781982208, 206.1428833079, 453.5526733312], [132.5036620805, 397.555114752, 152.7133788924, 425.4356079104], [111.2613525382, 386.2701415936, 129.03704831689998, 409.1351318528], [93.59637454279999, 372.4826660352, 110.0338135083, 392.7134399488]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046770_crop.jpg", "text": "Fill me in on the details of the rectangular box within the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include five laptops.", "boxes_value": [[28.46948239289999, 29.482666035199998, 142.1428833079, 144.5988158976], [28.46948239289999, 116.98394777599998, 55.4539184825, 144.5988158976], [113.59686279190001, 82.7781982208, 142.1428833079, 110.55267333120003], [68.50366208049999, 54.55511475200001, 88.71337889239999, 82.4356079104], [47.2613525382, 43.27014159359999, 65.03704831689998, 66.13513185279999], [29.596374542799992, 29.482666035199998, 46.0338135083, 49.71343994879999]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046772.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each object you identify.", "boxes_value": [[247.7993164232, 90.7640380928, 474.6953125005, 330.7786254848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046772_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each object you identify.", "boxes_value": [[56.7993164232, 60.76403809280001, 283.6953125005, 300.7786254848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046772.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each object you identify. For your reference, objects involved in this region include two cabinets, a storage box, a person, a hat, and a shovel.", "boxes_value": [[247.7993164232, 90.7640380928, 474.6953125005, 330.7786254848], [379.74194338309997, 90.7640380928, 474.6953125005, 232.2509155328], [282.8341064845, 211.4395751936, 328.1052246053, 271.1152343552], [266.4920653883, 273.1569213952, 332.5192870749, 354.5904541184], [299.3370361022, 190.690429696, 500.77661135970004, 512.1715087872], [315.4882812757, 204.8960571392, 445.69921876300003, 330.7786254848], [247.7993164232, 96.8095092736, 288.6712646469, 241.4331664896]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5], [6]]}, {"image_path": "objects365_v1_00046772_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each object you identify. For your reference, objects involved in this region include two cabinets, a storage box, a person, a hat, and a shovel.", "boxes_value": [[56.7993164232, 60.76403809280001, 283.6953125005, 300.7786254848], [188.74194338309997, 60.76403809280001, 283.6953125005, 202.2509155328], [91.83410648450001, 181.4395751936, 137.1052246053, 241.11523435520002], [75.49206538829998, 243.15692139520002, 141.51928707489998, 324.5904541184], [108.33703610219999, 160.690429696, 309.77661135970004, 360], [124.48828127569999, 174.8960571392, 254.69921876300003, 300.7786254848], [56.7993164232, 66.8095092736, 97.6712646469, 211.4331664896]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5], [6]]}, {"image_path": "objects365_v1_00046773.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each object you identify.", "boxes_value": [[41.115478514, 15.5634765824, 413.80163573490006, 269.0761718784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046773_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each object you identify.", "boxes_value": [[41.115478514, 15.5634765824, 413.80163573490006, 269.0761718784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046773.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each object you identify. For your reference, objects involved in this region include four pictures, and a lamp.", "boxes_value": [[41.115478514, 15.5634765824, 413.80163573490006, 269.0761718784], [376.106323228, 194.3966674944, 413.80163573490006, 238.4931640832], [247.3731079392, 153.856384256, 280.8010253815, 243.4717407232], [285.0684203778, 166.6586303488, 303.560485829, 237.0706787328], [41.115478514, 82.0218505728, 143.5330810237, 269.0761718784], [87.2730712747, 15.5634765824, 161.342895521, 102.5661621248]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046773_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each object you identify. For your reference, objects involved in this region include four pictures, and a lamp.", "boxes_value": [[41.115478514, 15.5634765824, 413.80163573490006, 269.0761718784], [376.106323228, 194.3966674944, 413.80163573490006, 238.4931640832], [247.3731079392, 153.856384256, 280.8010253815, 243.4717407232], [285.0684203778, 166.6586303488, 303.560485829, 237.0706787328], [41.115478514, 82.0218505728, 143.5330810237, 269.0761718784], [87.2730712747, 15.5634765824, 161.342895521, 102.5661621248]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046775.jpg", "text": "I request a description of the area in the picture . Please point out the objects and their coordinates.", "boxes_value": [[80.358703632, 143.402038592, 391.619812032, 279.62017824]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046775_crop.jpg", "text": "I request a description of the area in the picture . Please point out the objects and their coordinates.", "boxes_value": [[78.358703632, 34.402038592, 389.619812032, 170.62017823999997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046775.jpg", "text": "I request a description of the area in the picture . Please point out the objects and their coordinates. For your reference, objects involved in this region include three people, two hats, a van, a strawberry, and a wallet.", "boxes_value": [[80.358703632, 143.402038592, 391.619812032, 279.62017824], [208.64019777599998, 141.885986304, 300.310058592, 325.609313984], [162.229919424, 138.434020992, 225.90020750399998, 276.51416012799996], [164.168884272, 186.721252416, 189.36096192000002, 267.681884736], [176.725280784, 139.452880832, 202.865112288, 165.592651392], [240.476318352, 143.402038592, 279.780090336, 167.097106944], [80.358703632, 161.99243161599998, 391.619812032, 279.62017824], [344.512756368, 187.476440448, 356.88128664, 212.864501952], [74.809631328, 213.80664064, 103.19183352, 231.79138182399998]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6], [7], [8]]}, {"image_path": "objects365_v1_00046775_crop.jpg", "text": "I request a description of the area in the picture . Please point out the objects and their coordinates. For your reference, objects involved in this region include three people, two hats, a van, a strawberry, and a wallet.", "boxes_value": [[78.358703632, 34.402038592, 389.619812032, 170.62017823999997], [206.64019777599998, 32.885986304, 298.310058592, 204], [160.229919424, 29.434020992, 223.90020750399998, 167.51416012799996], [162.168884272, 77.721252416, 187.36096192000002, 158.68188473599997], [174.725280784, 30.452880832000005, 200.865112288, 56.59265139199999], [238.476318352, 34.402038592, 277.780090336, 58.09710694399999], [78.358703632, 52.992431615999976, 389.619812032, 170.62017823999997], [342.512756368, 78.476440448, 354.88128664, 103.86450195200001], [72.809631328, 104.80664064000001, 101.19183352, 122.79138182399998]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6], [7], [8]]}, {"image_path": "objects365_v1_00046776.jpg", "text": "Could you please provide a description of the rectangular area in ? Include the coordinates for each object you identify.", "boxes_value": [[31.967834496, 16.88085936, 533.105590848, 346.966857888]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046776_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Include the coordinates for each object you identify.", "boxes_value": [[31.967834496, 16.88085936, 533.105590848, 346.966857888]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046776.jpg", "text": "Could you please provide a description of the rectangular area in ? Include the coordinates for each object you identify. For your reference, objects involved in this region include an air conditioner, a fan, and three chairs.", "boxes_value": [[31.967834496, 16.88085936, 533.105590848, 346.966857888], [31.967834496, 16.88085936, 200.59649657600002, 72.667785648], [237.7350464, 40.901916480000004, 292.827087424, 103.17980956800001], [108.849609344, 216.13244630399998, 198.615600576, 280.70898436799996], [301.397888192, 211.088867184, 422.19262694400004, 317.96160887999997], [395.082153344, 216.81005860800002, 533.105590848, 346.966857888]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046776_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Include the coordinates for each object you identify. For your reference, objects involved in this region include an air conditioner, a fan, and three chairs.", "boxes_value": [[31.967834496, 16.88085936, 533.105590848, 346.966857888], [31.967834496, 16.88085936, 200.59649657600002, 72.667785648], [237.7350464, 40.901916480000004, 292.827087424, 103.17980956800001], [108.849609344, 216.13244630399998, 198.615600576, 280.70898436799996], [301.397888192, 211.088867184, 422.19262694400004, 317.96160887999997], [395.082153344, 216.81005860800002, 533.105590848, 346.966857888]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046778.jpg", "text": "Could you please provide a description of the rectangular area in ? Provide the coordinates for all objects that you mention.", "boxes_value": [[329.05944822659995, 154.0051269632, 601.7639160502, 333.9709472768]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046778_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Provide the coordinates for all objects that you mention.", "boxes_value": [[69.05944822659995, 45.00512696320001, 341.7639160502, 224.97094727680002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046778.jpg", "text": "Could you please provide a description of the rectangular area in ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a bed, two chairs, a nightstand, a lamp, and a picture.", "boxes_value": [[329.05944822659995, 154.0051269632, 601.7639160502, 333.9709472768], [161.1961669898, 205.8065796096, 530.3164062412, 491.20135495680006], [562.1351318669999, 261.438598656, 641.3891601644, 337.6610717696], [524.1802978248, 258.9858398208, 601.7639160502, 333.9709472768], [352.4887695024, 273.649597184, 405.180664063, 290.8757934592], [338.617187489, 211.6654052864, 395.9412842054, 280.7149658112], [329.05944822659995, 154.0051269632, 375.9610595482, 212.8713378816]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046778_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a bed, two chairs, a nightstand, a lamp, and a picture.", "boxes_value": [[69.05944822659995, 45.00512696320001, 341.7639160502, 224.97094727680002], [0, 96.80657960959999, 270.31640624119996, 269], [302.1351318669999, 152.438598656, 381.3891601644, 228.66107176960003], [264.18029782480005, 149.98583982079998, 341.7639160502, 224.97094727680002], [92.48876950239998, 164.64959718400002, 145.180664063, 181.8757934592], [78.617187489, 102.66540528639999, 135.9412842054, 171.7149658112], [69.05944822659995, 45.00512696320001, 115.96105954820001, 103.8713378816]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046779.jpg", "text": "In the image , could you provide a description for the coordinates ? Please mention the objects and their locations.", "boxes_value": [[398.69494625280004, 351.084289536, 625.4384765952, 420.8385619968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046779_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Please mention the objects and their locations.", "boxes_value": [[56.69494625280004, 18.08428953600003, 283.4384765952, 87.83856199680002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046779.jpg", "text": "In the image , could you provide a description for the coordinates ? Please mention the objects and their locations. For your reference, objects involved in this region include a person, two slippers, a bowl, a plate, and a pot.", "boxes_value": [[398.69494625280004, 351.084289536, 625.4384765952, 420.8385619968], [464.830322304, 90.4187621888, 679.010864256, 395.400695808], [586.9555663872, 366.3251342848, 625.4384765952, 394.6376342528], [485.05688478720003, 360.5779419136, 501.61694338560005, 381.1770019328], [398.69494625280004, 351.084289536, 476.2011718656, 400.6882323968], [417.8742675456, 392.2666015744, 435.29443361280005, 420.8385619968], [487.65515136, 344.0780639744, 592.7268066048, 438.4166870016]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046779_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Please mention the objects and their locations. For your reference, objects involved in this region include a person, two slippers, a bowl, a plate, and a pot.", "boxes_value": [[56.69494625280004, 18.08428953600003, 283.4384765952, 87.83856199680002], [122.83032230399999, 0, 337.010864256, 62.40069580800002], [244.95556638719995, 33.325134284800015, 283.4384765952, 61.63763425280001], [143.05688478720003, 27.5779419136, 159.61694338560005, 48.17700193280001], [56.69494625280004, 18.08428953600003, 134.20117186559997, 67.6882323968], [75.87426754559999, 59.2666015744, 93.29443361280005, 87.83856199680002], [145.65515136, 11.078063974399981, 250.7268066048, 105]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046781.jpg", "text": "In the provided image , would you mind describing the selected area ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[25.8491211154, 190.8071288832, 107.7800292768, 367.0523681792]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046781_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[20.8491211154, 44.807128883199994, 102.7800292768, 221.05236817920002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046781.jpg", "text": "In the provided image , would you mind describing the selected area ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two paddles, and three helmets.", "boxes_value": [[25.8491211154, 190.8071288832, 107.7800292768, 367.0523681792], [51.886596692400005, 195.9274902528, 90.3724975449, 367.0523681792], [12.026184089000001, 198.6765136896, 39.516113259600004, 372.5503540224], [25.8491211154, 190.8071288832, 50.647216764700005, 205.9114379776], [81.5172119196, 255.1440429568, 107.7800292768, 278.160827648], [28.9916381971, 263.4064941568, 55.4019775439, 285.5380249088]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046781_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two paddles, and three helmets.", "boxes_value": [[20.8491211154, 44.807128883199994, 102.7800292768, 221.05236817920002], [46.886596692400005, 49.9274902528, 85.3724975449, 221.05236817920002], [7.026184089000001, 52.6765136896, 34.516113259600004, 226.55035402239997], [20.8491211154, 44.807128883199994, 45.647216764700005, 59.9114379776], [76.5172119196, 109.1440429568, 102.7800292768, 132.160827648], [23.9916381971, 117.40649415680002, 50.4019775439, 139.53802490880003]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046782.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[216.8182372864, 561.6142578392, 512.4230957056, 711.271850568]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046782_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[74.8182372864, 37.614257839200036, 370, 187.271850568]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046782.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two cars, a pickup truck, a suv, and a machinery vehicle.", "boxes_value": [[216.8182372864, 561.6142578392, 512.4230957056, 711.271850568], [351.3825683456, 670.7354736164, 405.8760986112, 700.0782470672], [434.0308227584, 667.3161620963999, 480.7861938688, 707.0722656012], [474.3468627968, 670.1158447232, 512.4230957056, 711.271850568], [216.8182372864, 561.6142578392, 287.5535888896, 584.159423838], [343.1502075392, 609.6862792608, 367.6895141376, 623.3192138531999]], "boxes_seq": [[0], [0], [1, 5], [2], [3], [4]]}, {"image_path": "objects365_v1_00046782_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two cars, a pickup truck, a suv, and a machinery vehicle.", "boxes_value": [[74.8182372864, 37.614257839200036, 370, 187.271850568], [209.38256834560002, 146.7354736164, 263.8760986112, 176.07824706719998], [292.0308227584, 143.31616209639992, 338.7861938688, 183.0722656012], [332.3468627968, 146.11584472319998, 370, 187.271850568], [74.8182372864, 37.614257839200036, 145.5535888896, 60.15942383799995], [201.15020753919998, 85.6862792608, 225.68951413759999, 99.31921385319993]], "boxes_seq": [[0], [0], [1, 5], [2], [3], [4]]}, {"image_path": "objects365_v1_00046784.jpg", "text": "Kindly give an overview of the section in photo . Include the coordinates for each object you identify.", "boxes_value": [[14.95205688, 210.920898432, 77.870117184, 351.036804224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046784_crop.jpg", "text": "Kindly give an overview of the section in photo . Include the coordinates for each object you identify.", "boxes_value": [[14.95205688, 35.920898432, 77.870117184, 176.03680422399998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046784.jpg", "text": "Kindly give an overview of the section in photo . Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, a necklace, and two wine glasses.", "boxes_value": [[14.95205688, 210.920898432, 77.870117184, 351.036804224], [39.45080568, 229.29534912000003, 142.24871827200002, 576.652832], [0, 139.761657728, 117.378234864, 561.730590848], [47.008972176, 210.920898432, 77.36145019199999, 231.734008768], [53.365295424, 304.01403808000003, 77.870117184, 351.036804224], [14.95205688, 273.632934592, 40.01098632, 326.18377683200004]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046784_crop.jpg", "text": "Kindly give an overview of the section in photo . Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, a necklace, and two wine glasses.", "boxes_value": [[14.95205688, 35.920898432, 77.870117184, 176.03680422399998], [39.45080568, 54.295349120000026, 93, 211], [0, 0, 93, 211], [47.008972176, 35.920898432, 77.36145019199999, 56.734008767999995], [53.365295424, 129.01403808000003, 77.870117184, 176.03680422399998], [14.95205688, 98.63293459200003, 40.01098632, 151.18377683200004]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046787.jpg", "text": "Analyze and describe the region in the included photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[197.710082989, 409.3149413888, 632.099243165, 506.9611816448]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046787_crop.jpg", "text": "Analyze and describe the region in the included photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[108.710082989, 25.31494138879998, 543.099243165, 122.96118164479998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046787.jpg", "text": "Analyze and describe the region in the included photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include five sneakers.", "boxes_value": [[197.710082989, 409.3149413888, 632.099243165, 506.9611816448], [572.845214861, 432.8284301824, 632.099243165, 463.1381225472], [486.0120849335, 409.3149413888, 544.1739501945, 465.0191650304], [441.50329587150003, 414.2299804672, 487.923583968, 470.4804077056], [339.378784157, 440.1707153408, 367.503967302, 464.4730834944], [197.710082989, 472.655883776, 264.0637207285, 506.9611816448]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046787_crop.jpg", "text": "Analyze and describe the region in the included photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include five sneakers.", "boxes_value": [[108.710082989, 25.31494138879998, 543.099243165, 122.96118164479998], [483.84521486100004, 48.828430182399984, 543.099243165, 79.1381225472], [397.0120849335, 25.31494138879998, 455.1739501945, 81.01916503040002], [352.50329587150003, 30.229980467199994, 398.923583968, 86.48040770559999], [250.37878415699998, 56.17071534079997, 278.503967302, 80.47308349439999], [108.710082989, 88.655883776, 175.06372072850002, 122.96118164479998]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046789.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[374.02185062399997, 217.253417984, 662.9826660096, 395.344116224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046789_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[73.02185062399997, 45.25341798400001, 361.98266600960005, 223.344116224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046789.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a chair, a piano, two people, and two leather shoes.", "boxes_value": [[374.02185062399997, 217.253417984, 662.9826660096, 395.344116224], [341.89685061119997, 244.8335571456, 445.7830810368, 363.9971923968], [321.5367431424, 331.3607788032, 511.37854003199993, 394.8573608448], [554.2950439680001, 226.4198608384, 662.9826660096, 395.344116224], [374.02185062399997, 217.253417984, 526.795654272, 342.9645385728], [573.4511718912, 379.191162112, 596.647949184, 393.4085083136], [605.2531738368, 353.749633792, 651.2723388672, 382.9325561344]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00046789_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a chair, a piano, two people, and two leather shoes.", "boxes_value": [[73.02185062399997, 45.25341798400001, 361.98266600960005, 223.344116224], [40.896850611199966, 72.8335571456, 144.7830810368, 191.99719239680002], [20.536743142399985, 159.3607788032, 210.37854003199993, 222.85736084479998], [253.29504396800007, 54.41986083840001, 361.98266600960005, 223.344116224], [73.02185062399997, 45.25341798400001, 225.79565427199998, 170.9645385728], [272.45117189120003, 207.19116211199997, 295.647949184, 221.4085083136], [304.2531738368, 181.749633792, 350.2723388672, 210.9325561344]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00046790.jpg", "text": "In the provided image , please explain the content within the region . Provide the coordinates for each element you describe.", "boxes_value": [[326.0648193573, 326.546081536, 615.6516113318, 377.3838500864]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046790_crop.jpg", "text": "In the provided image , please explain the content within the region . Provide the coordinates for each element you describe.", "boxes_value": [[73.0648193573, 13.546081535999974, 362.65161133180004, 64.3838500864]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046790.jpg", "text": "In the provided image , please explain the content within the region . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a trash bin can, three benches, and two desks.", "boxes_value": [[326.0648193573, 326.546081536, 615.6516113318, 377.3838500864], [458.53540040250004, 318.2857055744, 502.4071045294, 376.385986304], [326.0648193573, 337.4667968512, 420.5853271665, 362.6973876736], [323.4287109489, 321.2740478464, 439.4140624806, 364.2036743168], [527.1563720695, 344.2451782144, 615.6516113318, 377.3838500864], [451.84118654509996, 343.8685913088, 545.2319335968, 376.2540893696], [448.4519042726, 326.546081536, 592.6805420151, 374.7478027264]], "boxes_seq": [[0], [0], [1], [2, 4, 5], [3, 6]]}, {"image_path": "objects365_v1_00046790_crop.jpg", "text": "In the provided image , please explain the content within the region . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a trash bin can, three benches, and two desks.", "boxes_value": [[73.0648193573, 13.546081535999974, 362.65161133180004, 64.3838500864], [205.53540040250004, 5.285705574400026, 249.4071045294, 63.38598630400003], [73.0648193573, 24.4667968512, 167.5853271665, 49.69738767360002], [70.4287109489, 8.27404784639998, 186.41406248060002, 51.203674316800004], [274.1563720695, 31.245178214400028, 362.65161133180004, 64.3838500864], [198.84118654509996, 30.868591308799978, 292.2319335968, 63.254089369600024], [195.45190427260002, 13.546081535999974, 339.68054201509995, 61.74780272639998]], "boxes_seq": [[0], [0], [1], [2, 4, 5], [3, 6]]}, {"image_path": "objects365_v1_00046791.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each mentioned object.", "boxes_value": [[0.297912576, 394.2946777225, 512.225708032, 574.03002928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046791_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each mentioned object.", "boxes_value": [[0.297912576, 45.29467772250001, 512, 225.03002928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046791.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two cabinets, a bowl, a gas stove, and two bottles.", "boxes_value": [[0.297912576, 394.2946777225, 512.225708032, 574.03002928], [0.297912576, 456.720703125, 83.0246582272, 574.03002928], [289.0541381632, 407.31842042, 367.164916992, 470.92285157], [53.8621215744, 394.2946777225, 146.821533184, 446.22021481499996], [50.6625366016, 412.88513181, 347.473327616, 532.763305655], [498.763488768, 427.7897338925, 512.225708032, 509.2043456825], [473.1287841792, 358.36724856, 501.068542464, 472.9201659925]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00046791_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two cabinets, a bowl, a gas stove, and two bottles.", "boxes_value": [[0.297912576, 45.29467772250001, 512, 225.03002928], [0.297912576, 107.720703125, 83.0246582272, 225.03002928], [289.0541381632, 58.318420419999995, 367.164916992, 121.92285156999998], [53.8621215744, 45.29467772250001, 146.821533184, 97.22021481499996], [50.6625366016, 63.88513181000002, 347.473327616, 183.76330565499995], [498.763488768, 78.78973389250001, 512, 160.2043456825], [473.1287841792, 9.367248560000007, 501.068542464, 123.92016599250002]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00046792.jpg", "text": "Describe the selected rectangular area in the photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[128.11376956619998, 134.7992553472, 328.6912841775, 245.35681152]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046792_crop.jpg", "text": "Describe the selected rectangular area in the photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[51.11376956619998, 27.79925534719999, 251.6912841775, 138.35681152]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046792.jpg", "text": "Describe the selected rectangular area in the photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include five wild birds.", "boxes_value": [[128.11376956619998, 134.7992553472, 328.6912841775, 245.35681152], [128.11376956619998, 227.5063476736, 178.4020996168, 245.35681152], [156.3289794631, 174.146972672, 234.2567138964, 187.9666747904], [154.2176513593, 134.7992553472, 232.1453857157, 162.2467040768], [227.7307739582, 187.1989135872, 259.9766845897, 226.7385864192], [280.8981933305, 157.832031232, 328.6912841775, 175.6824951296]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046792_crop.jpg", "text": "Describe the selected rectangular area in the photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include five wild birds.", "boxes_value": [[51.11376956619998, 27.79925534719999, 251.6912841775, 138.35681152], [51.11376956619998, 120.5063476736, 101.4020996168, 138.35681152], [79.32897946310001, 67.146972672, 157.2567138964, 80.96667479039999], [77.21765135929999, 27.79925534719999, 155.1453857157, 55.2467040768], [150.7307739582, 80.1989135872, 182.97668458970003, 119.7385864192], [203.89819333050002, 50.83203123199999, 251.6912841775, 68.6824951296]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046793.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Provide the coordinates for each element you describe.", "boxes_value": [[191.6109008672, 139.68139648, 589.4685058864001, 425.14990233599997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046793_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Provide the coordinates for each element you describe.", "boxes_value": [[99.6109008672, 71.68139647999999, 497.46850588640007, 357.14990233599997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046793.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, a slippers, a hat, and a car.", "boxes_value": [[191.6109008672, 139.68139648, 589.4685058864001, 425.14990233599997], [546.1304931697999, 139.53454592, 620.794799833, 385.4525756928], [512.3537597846, 172.1260986368, 553.8339843518, 317.8991699456], [546.9844970714, 372.8309326336, 584.0233154522, 382.97857664], [554.8349609032, 139.68139648, 589.4685058864001, 158.7444457984], [191.6109008672, 155.5969848832, 558.293212907, 425.14990233599997]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046793_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, a slippers, a hat, and a car.", "boxes_value": [[99.6109008672, 71.68139647999999, 497.46850588640007, 357.14990233599997], [454.13049316979993, 71.53454592, 528.794799833, 317.4525756928], [420.3537597846, 104.12609863680001, 461.83398435180004, 249.89916994560002], [454.9844970714, 304.8309326336, 492.0233154522, 314.97857664], [462.8349609032, 71.68139647999999, 497.46850588640007, 90.7444457984], [99.6109008672, 87.59698488320001, 466.293212907, 357.14990233599997]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046794.jpg", "text": "Tell me what you see within the designated area in the picture . Include the coordinates for each object you identify.", "boxes_value": [[307.1204223904, 233.3865356288, 418.86975097199996, 291.6119384576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046794_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Include the coordinates for each object you identify.", "boxes_value": [[28.120422390399995, 15.386535628800004, 139.86975097199996, 73.61193845759999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046794.jpg", "text": "Tell me what you see within the designated area in the picture . Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a person, a necklace, a wine glass, and a tea pot.", "boxes_value": [[307.1204223904, 233.3865356288, 418.86975097199996, 291.6119384576], [1.2819823942, 263.867004416, 587.8189697006, 509.4450683392], [262.2283935235, 169.3804931584, 378.21569824240004, 300.885376], [307.1204223904, 233.3865356288, 329.1270141918, 260.5613403136], [381.52026369920003, 256.9252319232, 399.9475097867, 291.6119384576], [383.33703611789997, 241.1740112384, 418.86975097199996, 285.4155884032]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046794_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a person, a necklace, a wine glass, and a tea pot.", "boxes_value": [[28.120422390399995, 15.386535628800004, 139.86975097199996, 73.61193845759999], [0, 45.867004415999986, 167, 88], [0, 0, 99.21569824240004, 82.88537600000001], [28.120422390399995, 15.386535628800004, 50.127014191800015, 42.5613403136], [102.52026369920003, 38.92523192319999, 120.94750978669998, 73.61193845759999], [104.33703611789997, 23.1740112384, 139.86975097199996, 67.41558840319999]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046796.jpg", "text": "In the image , elaborate on the details found within the section . Give coordinates for the items you reference.", "boxes_value": [[406.8041992512, 97.4379272704, 464.653564428, 279.1683959808]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046796_crop.jpg", "text": "In the image , elaborate on the details found within the section . Give coordinates for the items you reference.", "boxes_value": [[14.804199251199975, 45.437927270399996, 72.65356442799998, 227.16839598080003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046796.jpg", "text": "In the image , elaborate on the details found within the section . Give coordinates for the items you reference. For your reference, objects involved in this region include a lamp, two pictures, a person, and a speaker.", "boxes_value": [[406.8041992512, 97.4379272704, 464.653564428, 279.1683959808], [428.1099853776, 97.4379272704, 462.60437010239997, 206.3239135744], [406.8041992512, 177.7655639552, 423.127807596, 213.0597534208], [433.85668943279995, 175.8251953152, 464.0163574032, 216.6609497088], [435.29882810640004, 225.1125488128, 464.653564428, 279.1683959808], [442.6983642252, 127.1635131904, 458.21594240879995, 149.5193481216]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046796_crop.jpg", "text": "In the image , elaborate on the details found within the section . Give coordinates for the items you reference. For your reference, objects involved in this region include a lamp, two pictures, a person, and a speaker.", "boxes_value": [[14.804199251199975, 45.437927270399996, 72.65356442799998, 227.16839598080003], [36.109985377600026, 45.437927270399996, 70.60437010239997, 154.3239135744], [14.804199251199975, 125.76556395520001, 31.127807596000025, 161.0597534208], [41.85668943279995, 123.8251953152, 72.0163574032, 164.6609497088], [43.29882810640004, 173.1125488128, 72.65356442799998, 227.16839598080003], [50.69836422520001, 75.1635131904, 66.21594240879995, 97.51934812159999]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046797.jpg", "text": "Can you share some insights about the rectangular region in the image ? Include the coordinates for each object you identify.", "boxes_value": [[399.78564454589997, 269.4226684416, 464.62207031959997, 495.9315795968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046797_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Include the coordinates for each object you identify.", "boxes_value": [[16.78564454589997, 57.422668441600024, 81.62207031959997, 283.9315795968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046797.jpg", "text": "Can you share some insights about the rectangular region in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a person, a pen, a laptop, and a moniter.", "boxes_value": [[399.78564454589997, 269.4226684416, 464.62207031959997, 495.9315795968], [0, 333.1947631616, 492.0548095666, 512.7145996288], [380.7790526961, 45.1921386496, 753.6502685321, 510.994323712], [399.78564454589997, 392.9268798976, 437.29943849200004, 495.9315795968], [407.1701659862, 311.0380859392, 464.62207031959997, 355.8109130752], [439.10217281710004, 269.4226684416, 459.5688476644, 310.499267584]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046797_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a person, a pen, a laptop, and a moniter.", "boxes_value": [[16.78564454589997, 57.422668441600024, 81.62207031959997, 283.9315795968], [0, 121.19476316160001, 97, 300], [0, 0, 97, 298.994323712], [16.78564454589997, 180.92687989759997, 54.299438492000036, 283.9315795968], [24.170165986200004, 99.03808593920002, 81.62207031959997, 143.8109130752], [56.10217281710004, 57.422668441600024, 76.56884766439998, 98.499267584]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046798.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object.", "boxes_value": [[484.659179648, 193.1330566656, 858.0083008, 275.8872680448]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046798_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object.", "boxes_value": [[93.65917964800002, 21.13305666560001, 467.00830080000003, 103.88726804480001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046798.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a lamp, two people, three moniters, and a watermelon.", "boxes_value": [[484.659179648, 193.1330566656, 858.0083008, 275.8872680448], [775.6256103679999, 162.1350708224, 812.7481689599999, 218.461059584], [494.66955569920003, 186.484680192, 571.2985840128, 280.6636352512], [673.8100586112, 218.89849856, 729.0279540992001, 274.3158569472], [551.9479980672, 255.461364736, 606.8060302975999, 275.8872680448], [741.0339355776, 254.3138427904, 786.2260741888, 272.5341796864], [832.2000732416, 231.6353759744, 858.0083008, 259.2056274432], [484.659179648, 193.1330566656, 517.048950208, 234.5022582784]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5, 6], [7]]}, {"image_path": "objects365_v1_00046798_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a lamp, two people, three moniters, and a watermelon.", "boxes_value": [[93.65917964800002, 21.13305666560001, 467.00830080000003, 103.88726804480001], [384.6256103679999, 0, 421.74816895999993, 46.461059584], [103.66955569920003, 14.484680192000013, 180.2985840128, 108.66363525119999], [282.8100586112, 46.89849856000001, 338.0279540992001, 102.31585694720002], [160.94799806720005, 83.46136473600001, 215.80603029759993, 103.88726804480001], [350.0339355776, 82.3138427904, 395.2260741888, 100.53417968640002], [441.20007324159997, 59.635375974400006, 467.00830080000003, 87.2056274432], [93.65917964800002, 21.13305666560001, 126.04895020799995, 62.50225827840001]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5, 6], [7]]}, {"image_path": "objects365_v1_00046799.jpg", "text": "Help me grasp the context of the region within image . Include the coordinates for each object you identify.", "boxes_value": [[367.214843765, 0.2454834176, 770.3330078289999, 511.3284912128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046799_crop.jpg", "text": "Help me grasp the context of the region within image . Include the coordinates for each object you identify.", "boxes_value": [[101.21484376500001, 0.2454834176, 504, 511.3284912128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046799.jpg", "text": "Help me grasp the context of the region within image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, a bracelet, a plate, a cup, and three breads.", "boxes_value": [[367.214843765, 0.2454834176, 770.3330078289999, 511.3284912128], [450.459350572, 0.2454834176, 770.3330078289999, 511.3284912128], [498.077392582, 314.058776832, 580.517944314, 357.2155761664], [291.237915045, 396.8663940608, 529.536254863, 467.5155639808], [467.14965820599997, 131.760864256, 574.782836936, 248.8772582912], [470.26538089300004, 370.7509155328, 529.236450204, 410.0650024448], [367.214843765, 371.942260736, 475.03063962199997, 413.0433349632], [319.56140139, 386.2382812672, 422.016235333, 432.7003784192]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6, 7]]}, {"image_path": "objects365_v1_00046799_crop.jpg", "text": "Help me grasp the context of the region within image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, a bracelet, a plate, a cup, and three breads.", "boxes_value": [[101.21484376500001, 0.2454834176, 504, 511.3284912128], [184.459350572, 0.2454834176, 504, 511.3284912128], [232.07739258200002, 314.058776832, 314.51794431400003, 357.2155761664], [25.237915045000022, 396.8663940608, 263.53625486299995, 467.5155639808], [201.14965820599997, 131.760864256, 308.78283693599997, 248.8772582912], [204.26538089300004, 370.7509155328, 263.236450204, 410.0650024448], [101.21484376500001, 371.942260736, 209.03063962199997, 413.0433349632], [53.561401390000015, 386.2382812672, 156.016235333, 432.7003784192]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6, 7]]}, {"image_path": "objects365_v1_00046800.jpg", "text": "In , what elements can be found within the coordinates ? Include the coordinates for each mentioned object.", "boxes_value": [[196.03277588, 36.500854512000004, 552.1005859439999, 238.65344236800001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046800_crop.jpg", "text": "In , what elements can be found within the coordinates ? Include the coordinates for each mentioned object.", "boxes_value": [[89.03277588, 36.500854512000004, 445.10058594399993, 238.65344236800001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046800.jpg", "text": "In , what elements can be found within the coordinates ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a chair, a bench, three people, and a boat.", "boxes_value": [[196.03277588, 36.500854512000004, 552.1005859439999, 238.65344236800001], [282.162475552, 80.842651344, 372.31469729599996, 158.000000016], [135.56701658400002, 57.326049792, 356.51855466399996, 167.034667968], [343.913940452, 141.29718019199998, 584.078125012, 327.46765137600005], [430.55517575600004, 138.05316163199998, 552.1005859439999, 238.65344236800001], [196.03277588, 36.500854512000004, 290.286010748, 173.91381835199996], [77.665710468, 48.20074464, 669.0589599680001, 428.457275376]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00046800_crop.jpg", "text": "In , what elements can be found within the coordinates ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a chair, a bench, three people, and a boat.", "boxes_value": [[89.03277588, 36.500854512000004, 445.10058594399993, 238.65344236800001], [175.162475552, 80.842651344, 265.31469729599996, 158.000000016], [28.567016584000015, 57.326049792, 249.51855466399996, 167.034667968], [236.91394045200002, 141.29718019199998, 477.078125012, 289], [323.55517575600004, 138.05316163199998, 445.10058594399993, 238.65344236800001], [89.03277588, 36.500854512000004, 183.28601074800002, 173.91381835199996], [0, 48.20074464, 534, 289]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00046804.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Please point out the objects and their coordinates.", "boxes_value": [[870.9525146, 239.3251953, 999.7017822, 410.04339600000003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046804_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Please point out the objects and their coordinates.", "boxes_value": [[32.95251459999997, 43.32519529999999, 161.70178220000003, 214.04339600000003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046804.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include two chairs, a desk, two people, a handbag, and three cars.", "boxes_value": [[870.9525146, 239.3251953, 999.7017822, 410.04339600000003], [856.0783691, 318.00646975, 905.1126709, 393.16857910000004], [944.4832763999999, 329.817688, 993.8754883, 412.85388185], [895.4489745999999, 336.618042, 928.7349854, 398.1793823], [909.3686523, 297.54382325, 976.1793213, 404.0234375], [870.9525146, 293.36816405, 922.730835, 398.59509275], [920.5625, 384.7350464, 943.5524902, 410.04339600000003], [927.9709472999999, 239.9107666, 999.5313721, 286.1913452], [961.9462891000001, 260.75012204999996, 999.7017822, 290.06030275], [891.463623, 239.3251953, 966.9324951, 277.36639405]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6], [7, 8, 9]]}, {"image_path": "objects365_v1_00046804_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include two chairs, a desk, two people, a handbag, and three cars.", "boxes_value": [[32.95251459999997, 43.32519529999999, 161.70178220000003, 214.04339600000003], [18.07836910000003, 122.00646975000001, 67.11267090000001, 197.16857910000004], [106.48327639999991, 133.81768799999998, 155.87548830000003, 216.85388185], [57.44897459999993, 140.618042, 90.73498540000003, 202.1793823], [71.36865230000001, 101.54382325, 138.17932129999997, 208.0234375], [32.95251459999997, 97.36816405000002, 84.73083499999996, 202.59509275], [82.5625, 188.7350464, 105.55249019999997, 214.04339600000003], [89.97094729999992, 43.91076659999999, 161.5313721, 90.1913452], [123.94628910000006, 64.75012204999996, 161.70178220000003, 94.06030275], [53.463622999999984, 43.32519529999999, 128.93249509999998, 81.36639405]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6], [7, 8, 9]]}, {"image_path": "objects365_v1_00046806.jpg", "text": "Please provide information about the area within the bounding box in the picture . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[506.5190429615, 203.0305175552, 682.7298584056999, 354.8157348864]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046806_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[44.51904296150002, 38.03051755519999, 220.72985840569993, 189.81573488639998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046806.jpg", "text": "Please provide information about the area within the bounding box in the picture . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, two boats, and a canned.", "boxes_value": [[506.5190429615, 203.0305175552, 682.7298584056999, 354.8157348864], [506.5190429615, 322.8235473408, 528.5303954778, 354.8087158272], [527.8881836231, 320.7037963776, 551.1347656127, 354.8157348864], [567.9938964961, 219.9591064576, 641.3911132704001, 243.7921142784], [650.8822021614, 228.1846313472, 682.7298584056999, 245.479431168], [515.9921875301001, 203.0305175552, 547.9396972816, 245.6272583168]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046806_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, two boats, and a canned.", "boxes_value": [[44.51904296150002, 38.03051755519999, 220.72985840569993, 189.81573488639998], [44.51904296150002, 157.82354734080002, 66.53039547779997, 189.8087158272], [65.88818362309996, 155.70379637759999, 89.1347656127, 189.81573488639998], [105.99389649609998, 54.9591064576, 179.39111327040007, 78.79211427839999], [188.88220216139996, 63.184631347199996, 220.72985840569993, 80.47943116799999], [53.99218753010007, 38.03051755519999, 85.93969728160005, 80.62725831680001]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046807.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Give coordinates for the items you reference.", "boxes_value": [[200.3384399264, 7.529602048, 739.1945800443999, 209.3840942592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046807_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Give coordinates for the items you reference.", "boxes_value": [[135.3384399264, 7.529602048, 674.1945800443999, 209.3840942592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046807.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Give coordinates for the items you reference. For your reference, objects involved in this region include three pictures, and two lamps.", "boxes_value": [[200.3384399264, 7.529602048, 739.1945800443999, 209.3840942592], [584.9881591988, 7.529602048, 739.1945800443999, 144.4094848512], [447.188354458, 86.5567626752, 516.26586917, 147.8975830016], [332.8865967056, 63.8409423872, 400.4602050832, 182.5278930432], [262.336914046, 135.1873168896, 312.072753882, 183.2652588032], [200.3384399264, 130.5482177536, 256.6497802668, 209.3840942592]], "boxes_seq": [[0], [0], [1, 3, 5], [2, 4]]}, {"image_path": "objects365_v1_00046807_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Give coordinates for the items you reference. For your reference, objects involved in this region include three pictures, and two lamps.", "boxes_value": [[135.3384399264, 7.529602048, 674.1945800443999, 209.3840942592], [519.9881591988, 7.529602048, 674.1945800443999, 144.4094848512], [382.188354458, 86.5567626752, 451.26586917, 147.8975830016], [267.8865967056, 63.8409423872, 335.4602050832, 182.5278930432], [197.336914046, 135.1873168896, 247.07275388199997, 183.2652588032], [135.3384399264, 130.5482177536, 191.6497802668, 209.3840942592]], "boxes_seq": [[0], [0], [1, 3, 5], [2, 4]]}, {"image_path": "objects365_v1_00046811.jpg", "text": "What information can you give me about the coordinates in image ? Provide the coordinates for each element you describe.", "boxes_value": [[208.71374509860001, 133.4848022528, 427.70727538719996, 178.6838379008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046811_crop.jpg", "text": "What information can you give me about the coordinates in image ? Provide the coordinates for each element you describe.", "boxes_value": [[55.713745098600015, 11.484802252799994, 274.70727538719996, 56.6838379008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046811.jpg", "text": "What information can you give me about the coordinates in image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include five people.", "boxes_value": [[208.71374509860001, 133.4848022528, 427.70727538719996, 178.6838379008], [400.9892577906, 133.4848022528, 427.70727538719996, 172.709106432], [384.5036621343, 145.4226684416, 405.5369873366, 173.2775878656], [288.4326171986, 133.4848022528, 324.8145752089, 174.9829711872], [249.6434325866, 139.4247436288, 275.53771974069997, 178.6838379008], [208.71374509860001, 149.0307006976, 232.1021118095, 177.4308471808]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046811_crop.jpg", "text": "What information can you give me about the coordinates in image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include five people.", "boxes_value": [[55.713745098600015, 11.484802252799994, 274.70727538719996, 56.6838379008], [247.9892577906, 11.484802252799994, 274.70727538719996, 50.709106432], [231.5036621343, 23.422668441599996, 252.53698733660002, 51.2775878656], [135.43261719859998, 11.484802252799994, 171.8145752089, 52.98297118720001], [96.6434325866, 17.4247436288, 122.53771974069997, 56.6838379008], [55.713745098600015, 27.030700697599997, 79.1021118095, 55.43084718079999]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046812.jpg", "text": "In the photo , can you delve into the details of the region ? Please point out the objects and their coordinates.", "boxes_value": [[33.1590576341, 264.2891235328, 356.9007568409, 511.043457024]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046812_crop.jpg", "text": "In the photo , can you delve into the details of the region ? Please point out the objects and their coordinates.", "boxes_value": [[33.1590576341, 62.28912353279998, 356.9007568409, 309.043457024]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046812.jpg", "text": "In the photo , can you delve into the details of the region ? Please point out the objects and their coordinates. For your reference, objects involved in this region include four chairs, and a desk.", "boxes_value": [[33.1590576341, 264.2891235328, 356.9007568409, 511.043457024], [31.1850586154, 255.7349853696, 162.7873535068, 464.9826660352], [263.4631347884, 291.9255981568, 375.3250732682, 496.5671996928], [276.6233520655, 264.2891235328, 354.9267578222, 446.5583496192], [1.5745239297, 308.37591552, 148.9691162034, 511.043457024], [33.1590576341, 277.4493408256, 356.9007568409, 511.043457024]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046812_crop.jpg", "text": "In the photo , can you delve into the details of the region ? Please point out the objects and their coordinates. For your reference, objects involved in this region include four chairs, and a desk.", "boxes_value": [[33.1590576341, 62.28912353279998, 356.9007568409, 309.043457024], [31.1850586154, 53.7349853696, 162.7873535068, 262.9826660352], [263.4631347884, 89.92559815679999, 375.3250732682, 294.5671996928], [276.6233520655, 62.28912353279998, 354.9267578222, 244.5583496192], [1.5745239297, 106.37591551999998, 148.9691162034, 309.043457024], [33.1590576341, 75.44934082560002, 356.9007568409, 309.043457024]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046815.jpg", "text": "What's going on in the section of contained within the bounding box ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[347.57409664830004, 330.7198486528, 618.1055907871, 472.335510272]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046815_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[68.57409664830004, 35.71984865280001, 339.1055907871, 177.33551027200002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046815.jpg", "text": "What's going on in the section of contained within the bounding box ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, a car, and three traffic lights.", "boxes_value": [[347.57409664830004, 330.7198486528, 618.1055907871, 472.335510272], [599.6949463123, 415.2623901184, 618.1055907871, 472.335510272], [347.57409664830004, 419.7695312384, 365.5270996368, 448.9870605312], [288.1679687684, 424.2005615104, 497.35803219319996, 494.27923584], [592.6181640693, 331.7879638528, 613.8508300614, 369.3892211712], [456.22668458399994, 330.7198486528, 472.6684570479, 367.5473022464], [451.6149902439, 383.9144287232, 467.9852295224, 404.1726074368]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046815_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, a car, and three traffic lights.", "boxes_value": [[68.57409664830004, 35.71984865280001, 339.1055907871, 177.33551027200002], [320.6949463123, 120.26239011839999, 339.1055907871, 177.33551027200002], [68.57409664830004, 124.76953123840002, 86.52709963680002, 153.98706053119997], [9.167968768399987, 129.2005615104, 218.35803219319996, 199.27923584], [313.61816406929995, 36.787963852799976, 334.85083006139996, 74.3892211712], [177.22668458399994, 35.71984865280001, 193.6684570479, 72.54730224640002], [172.61499024390002, 88.91442872319999, 188.9852295224, 109.17260743679998]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046816.jpg", "text": "Can you provide a description of the area in the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[152.9722900609, 292.6608886784, 290.8281250066, 512.0048827904]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046816_crop.jpg", "text": "Can you provide a description of the area in the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[34.97229006090001, 55.6608886784, 172.82812500659998, 275]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046816.jpg", "text": "Can you provide a description of the area in the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three faucets, and two chairs.", "boxes_value": [[152.9722900609, 292.6608886784, 290.8281250066, 512.0048827904], [152.9722900609, 292.6608886784, 181.4163208166, 358.635314944], [213.81097411980002, 284.364685056, 251.7363281548, 344.8083495936], [253.7042236212, 280.5339355648, 265.39697264579996, 317.3536376832], [155.83892818799998, 339.7530517504, 290.8281250066, 512.0048827904], [242.3163452051, 324.2855224832, 341.4489746274, 481.0698242048]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046816_crop.jpg", "text": "Can you provide a description of the area in the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three faucets, and two chairs.", "boxes_value": [[34.97229006090001, 55.6608886784, 172.82812500659998, 275], [34.97229006090001, 55.6608886784, 63.416320816600006, 121.63531494400002], [95.81097411980002, 47.364685055999985, 133.7363281548, 107.80834959359998], [135.7042236212, 43.533935564800004, 147.39697264579996, 80.35363768320002], [37.83892818799998, 102.7530517504, 172.82812500659998, 275], [124.31634520509999, 87.2855224832, 207, 244.0698242048]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046817.jpg", "text": "I'd like a thorough description of the area in the image . Please point out the objects and their coordinates.", "boxes_value": [[0.126953126, 348.77691648, 142.129089378, 512.1215820288]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046817_crop.jpg", "text": "I'd like a thorough description of the area in the image . Please point out the objects and their coordinates.", "boxes_value": [[0.126953126, 41.77691648000001, 142.129089378, 205]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046817.jpg", "text": "I'd like a thorough description of the area in the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, three flags, a bus, and a truck.", "boxes_value": [[0.126953126, 348.77691648, 142.129089378, 512.1215820288], [45.904479958, 475.500305152, 93.969909648, 512.1215820288], [104.755615226, 325.0798340096, 134.471069354, 394.102355968], [57.73754884, 334.1072997888, 90.838317852, 406.5151367168], [40.622985864, 348.77691648, 64.132019028, 404.6343994368], [26.17974855, 449.1944580096, 142.129089378, 482.6909179904], [0.126953126, 442.8959350784, 26.17974855, 504.4493408256]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046817_crop.jpg", "text": "I'd like a thorough description of the area in the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, three flags, a bus, and a truck.", "boxes_value": [[0.126953126, 41.77691648000001, 142.129089378, 205], [45.904479958, 168.500305152, 93.969909648, 205], [104.755615226, 18.079834009600006, 134.471069354, 87.10235596799998], [57.73754884, 27.107299788799992, 90.838317852, 99.51513671679999], [40.622985864, 41.77691648000001, 64.132019028, 97.63439943679998], [26.17974855, 142.1944580096, 142.129089378, 175.6909179904], [0.126953126, 135.8959350784, 26.17974855, 197.44934082560002]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046818.jpg", "text": "In the provided image , would you mind describing the selected area ? Include the coordinates for each mentioned object.", "boxes_value": [[370.1984863248, 371.361084, 472.06665040380005, 481.6040039]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046818_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Include the coordinates for each mentioned object.", "boxes_value": [[26.1984863248, 28.361084000000005, 128.06665040380005, 138.6040039]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046818.jpg", "text": "In the provided image , would you mind describing the selected area ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a street lights, a horse, and three people.", "boxes_value": [[370.1984863248, 371.361084, 472.06665040380005, 481.6040039], [370.1984863248, 420.2550049, 384.569091782, 448.18652345], [407.8964843882, 398.26049805, 422.0827636714, 430.7567749], [425.2907715078, 371.361084, 451.8726806276, 481.6040039], [440.745361296, 378.98535155, 453.52111812460004, 437.91894529999996], [450.63623044419995, 382.28234865, 472.06665040380005, 450.6947632]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046818_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a street lights, a horse, and three people.", "boxes_value": [[26.1984863248, 28.361084000000005, 128.06665040380005, 138.6040039], [26.1984863248, 77.25500490000002, 40.56909178199999, 105.18652344999998], [63.89648438820001, 55.260498050000024, 78.08276367140002, 87.75677489999998], [81.2907715078, 28.361084000000005, 107.8726806276, 138.6040039], [96.745361296, 35.98535155000002, 109.52111812460004, 94.91894529999996], [106.63623044419995, 39.28234865000002, 128.06665040380005, 107.69476320000001]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046820.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Please mention the objects and their locations.", "boxes_value": [[327.0828857576, 364.4615478272, 682.9533691312, 511.7440185344]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046820_crop.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Please mention the objects and their locations.", "boxes_value": [[89.08288575760002, 37.46154782719998, 444.9533691312, 184.74401853440003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046820.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a potted plant, three people, a glasses, a hat, and a cup.", "boxes_value": [[327.0828857576, 364.4615478272, 682.9533691312, 511.7440185344], [547.0919189555, 364.4615478272, 655.4028320432001, 503.3602905088], [590.4631347395, 415.7592163328, 682.9533691312, 511.7440185344], [312.3694458182, 376.5827636736, 364.9422607383, 458.3227538944], [287.2346191481, 423.7543334912, 385.5701904027, 512.0429687296], [327.0828857576, 439.1309814272, 355.0981445148, 457.8442382848], [312.56762694229997, 375.343261696, 344.63537597519996, 401.7002563584], [345.0654296572, 495.7586857472, 366.4780248961, 511.5801391616]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5], [6], [7]]}, {"image_path": "objects365_v1_00046820_crop.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a potted plant, three people, a glasses, a hat, and a cup.", "boxes_value": [[89.08288575760002, 37.46154782719998, 444.9533691312, 184.74401853440003], [309.0919189555, 37.46154782719998, 417.4028320432001, 176.3602905088], [352.4631347395, 88.75921633280001, 444.9533691312, 184.74401853440003], [74.3694458182, 49.58276367360003, 126.94226073829998, 131.32275389440002], [49.23461914810002, 96.75433349119999, 147.5701904027, 185], [89.08288575760002, 112.13098142720003, 117.09814451480003, 130.84423828479999], [74.56762694229997, 48.34326169600001, 106.63537597519996, 74.7002563584], [107.06542965720001, 168.7586857472, 128.4780248961, 184.58013916160002]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5], [6], [7]]}, {"image_path": "objects365_v1_00046825.jpg", "text": "Within the input image , what can be found in the region defined by ? Specify the location of each mentioned object.", "boxes_value": [[135.17376711789998, 67.7330932736, 460.6289062391, 197.3574829056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046825_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Specify the location of each mentioned object.", "boxes_value": [[82.17376711789998, 32.733093273600005, 407.6289062391, 162.3574829056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046825.jpg", "text": "Within the input image , what can be found in the region defined by ? Specify the location of each mentioned object. For your reference, objects involved in this region include three cabinets, and two cups.", "boxes_value": [[135.17376711789998, 67.7330932736, 460.6289062391, 197.3574829056], [214.57843019039998, 69.8726806528, 325.30114748240004, 189.6885376], [316.7428588748, 67.7330932736, 460.6289062391, 170.4324340736], [119.9024658238, 99.2917480448, 234.9042968895, 205.7352905216], [135.17376711789998, 166.4019165184, 154.04907227, 197.3574829056], [203.87988281100002, 165.1715698176, 224.0136108187, 197.1337280512]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046825_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Specify the location of each mentioned object. For your reference, objects involved in this region include three cabinets, and two cups.", "boxes_value": [[82.17376711789998, 32.733093273600005, 407.6289062391, 162.3574829056], [161.57843019039998, 34.8726806528, 272.30114748240004, 154.6885376], [263.7428588748, 32.733093273600005, 407.6289062391, 135.4324340736], [66.9024658238, 64.2917480448, 181.9042968895, 170.7352905216], [82.17376711789998, 131.4019165184, 101.04907227000001, 162.3574829056], [150.87988281100002, 130.1715698176, 171.0136108187, 162.1337280512]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046826.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each object you identify.", "boxes_value": [[678.71166996, 225.6141967872, 911.6635742352, 373.0428466688]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046826_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each object you identify.", "boxes_value": [[58.711669959999995, 37.6141967872, 291.66357423520003, 185.0428466688]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046826.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a glasses, three hats, three cups, and a bottle.", "boxes_value": [[678.71166996, 225.6141967872, 911.6635742352, 373.0428466688], [726.3975830448001, 262.9896240128, 765.3839111664, 277.1665038848], [720.9200439216, 247.8461303808, 778.2720947088001, 297.1430664192], [678.71166996, 282.6797485568, 712.5999756288, 295.924316416], [862.044555672, 225.6141967872, 911.6635742352, 258.4788208128], [667.8281249808, 337.9534301696, 698.636108424, 384.8612060672], [702.8101806671999, 349.8791503872, 720.3011474976, 378.898376448], [729.5974121088, 347.503784192, 747.4606933632, 386.6097412096], [748.6503906144001, 351.2539673088, 768.8020019856, 373.0428466688]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6, 8], [7]]}, {"image_path": "objects365_v1_00046826_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a glasses, three hats, three cups, and a bottle.", "boxes_value": [[58.711669959999995, 37.6141967872, 291.66357423520003, 185.0428466688], [106.39758304480006, 74.9896240128, 145.38391116640003, 89.16650388480002], [100.92004392160004, 59.84613038079999, 158.27209470880007, 109.14306641920001], [58.711669959999995, 94.6797485568, 92.59997562880005, 107.92431641600001], [242.044555672, 37.6141967872, 291.66357423520003, 70.4788208128], [47.8281249808, 149.95343016959998, 78.63610842399999, 196.8612060672], [82.81018066719992, 161.8791503872, 100.30114749760003, 190.89837644800002], [109.59741210879997, 159.503784192, 127.4606933632, 198.60974120959997], [128.65039061440007, 163.2539673088, 148.8020019856, 185.0428466688]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6, 8], [7]]}, {"image_path": "objects365_v1_00046827.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Please point out the objects and their coordinates.", "boxes_value": [[500.92700198399996, 100.3589477376, 663.9216308736, 193.856079104]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046827_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Please point out the objects and their coordinates.", "boxes_value": [[40.92700198399996, 24.358947737600005, 203.92163087359995, 117.856079104]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046827.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Please point out the objects and their coordinates. For your reference, objects involved in this region include a bottle, five cups, and a bowl.", "boxes_value": [[500.92700198399996, 100.3589477376, 663.9216308736, 193.856079104], [525.7436523264, 100.3589477376, 551.2111816704, 134.3478393344], [646.6938476544, 104.2912597504, 663.9216308736, 124.064147968], [614.9788818432, 104.4869995008, 637.2967529472, 125.8261108224], [613.0391845632, 171.8007202304, 660.7425537024001, 201.9465942528], [569.0264892672, 169.1525878784, 590.03869632, 200.1029052928], [552.2735596032, 169.1525878784, 568.4586181632, 193.856079104], [500.92700198399996, 117.4452514816, 520.0057373184, 135.2521362432]], "boxes_seq": [[0], [0], [1], [2, 3, 5, 6, 7], [4]]}, {"image_path": "objects365_v1_00046827_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Please point out the objects and their coordinates. For your reference, objects involved in this region include a bottle, five cups, and a bowl.", "boxes_value": [[40.92700198399996, 24.358947737600005, 203.92163087359995, 117.856079104], [65.74365232640002, 24.358947737600005, 91.21118167040004, 58.34783933439999], [186.69384765439997, 28.291259750400002, 203.92163087359995, 48.064147968], [154.97888184320004, 28.486999500799996, 177.29675294720005, 49.8261108224], [153.03918456320002, 95.8007202304, 200.74255370240007, 125.9465942528], [109.02648926719996, 93.1525878784, 130.03869631999999, 124.10290529279999], [92.27355960320006, 93.1525878784, 108.45861816319996, 117.856079104], [40.92700198399996, 41.445251481599996, 60.00573731839995, 59.2521362432]], "boxes_seq": [[0], [0], [1], [2, 3, 5, 6, 7], [4]]}, {"image_path": "objects365_v1_00046828.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Include the coordinates for each object you identify.", "boxes_value": [[70.172058112, 159.8967284922, 199.7231445504, 664.0698241836001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046828_crop.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Include the coordinates for each object you identify.", "boxes_value": [[33.172058112, 126.8967284922, 162.7231445504, 631.0698241836001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046828.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two clocks, a person, a handbag, a street lights, and a truck.", "boxes_value": [[70.172058112, 159.8967284922, 199.7231445504, 664.0698241836001], [119.3070068224, 159.8967284922, 150.5907592704, 193.039611817], [86.369628928, 165.0432129167, 101.9581908992, 198.28771969139999], [73.7158813696, 589.4719238339, 101.2299194368, 664.0698241836001], [70.172058112, 599.6799316345, 85.9993896448, 628.0740967006], [175.4770507776, 505.93127445159996, 199.7231445504, 543.3780517937], [99.8633422848, 539.8403320318, 304.6155395584, 626.4605712856]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046828_crop.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two clocks, a person, a handbag, a street lights, and a truck.", "boxes_value": [[33.172058112, 126.8967284922, 162.7231445504, 631.0698241836001], [82.3070068224, 126.8967284922, 113.59075927040001, 160.039611817], [49.369628928, 132.0432129167, 64.9581908992, 165.28771969139999], [36.7158813696, 556.4719238339, 64.2299194368, 631.0698241836001], [33.172058112, 566.6799316345, 48.999389644800004, 595.0740967006], [138.4770507776, 472.93127445159996, 162.7231445504, 510.3780517937], [62.8633422848, 506.8403320318, 195, 593.4605712856]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046829.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Give coordinates for the items you reference.", "boxes_value": [[44.4424513567, 203.8948974592, 300.9057300555, 251.5821830144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046829_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Give coordinates for the items you reference.", "boxes_value": [[44.4424513567, 12.894897459199996, 300.9057300555, 60.58218301439999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046829.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Give coordinates for the items you reference. For your reference, objects involved in this region include two people, a backpack, two sneakers, and a trash bin can.", "boxes_value": [[44.4424513567, 203.8948974592, 300.9057300555, 251.5821830144], [89.61761473179999, 212.3664550912, 329.9446411414, 335.9631958016], [151.7974243259, 165.0639648256, 268.9091796746, 335.5817260544], [273.365229994, 211.3614732288, 300.9057300555, 235.625976576], [147.7317835682, 211.5505527296, 181.29899121090003, 223.4155866624], [44.4424513567, 237.1179290112, 70.7206546549, 251.5821830144], [89.9140624809, 203.8948974592, 112.8578491243, 237.4793091072]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046829_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Give coordinates for the items you reference. For your reference, objects involved in this region include two people, a backpack, two sneakers, and a trash bin can.", "boxes_value": [[44.4424513567, 12.894897459199996, 300.9057300555, 60.58218301439999], [89.61761473179999, 21.36645509120001, 329.9446411414, 72], [151.7974243259, 0, 268.9091796746, 72], [273.365229994, 20.36147322880001, 300.9057300555, 44.625976576], [147.7317835682, 20.550552729600014, 181.29899121090003, 32.41558666239999], [44.4424513567, 46.117929011200005, 70.7206546549, 60.58218301439999], [89.9140624809, 12.894897459199996, 112.8578491243, 46.47930910720001]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046831.jpg", "text": "Describe the visual elements within the selected area of the image . Please point out the objects and their coordinates.", "boxes_value": [[545.0244140514, 153.3147582976, 598.6751708995, 218.7565307392]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046831_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Please point out the objects and their coordinates.", "boxes_value": [[14.024414051399958, 17.314758297600008, 67.67517089950002, 82.7565307392]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046831.jpg", "text": "Describe the visual elements within the selected area of the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include a bed, a lamp, a cabinet, and two pictures.", "boxes_value": [[545.0244140514, 153.3147582976, 598.6751708995, 218.7565307392], [336.5964355586, 150.6079711744, 827.314453159, 511.9967651328], [545.0244140514, 181.7509765632, 566.2750244240001, 218.7565307392], [532.5085449356, 169.6339111424, 598.6751708995, 232.8334350336], [545.267211903, 153.3147582976, 571.9711914427, 172.8977050624], [571.6744384576999, 156.8753051648, 598.6751708995, 179.425353984]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046831_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include a bed, a lamp, a cabinet, and two pictures.", "boxes_value": [[14.024414051399958, 17.314758297600008, 67.67517089950002, 82.7565307392], [0, 14.607971174400006, 81, 99], [14.024414051399958, 45.7509765632, 35.275024424000094, 82.7565307392], [1.5085449355999572, 33.633911142399995, 67.67517089950002, 96.83343503360001], [14.267211902999975, 17.314758297600008, 40.97119144270005, 36.89770506240001], [40.67443845769992, 20.87530516480001, 67.67517089950002, 43.425353984]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046833.jpg", "text": "I request a description of the area in the picture . Please mention the objects and their locations.", "boxes_value": [[0, 0.60498048, 234.9071655578, 512.8195800576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046833_crop.jpg", "text": "I request a description of the area in the picture . Please mention the objects and their locations.", "boxes_value": [[0, 0.60498048, 234.9071655578, 512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046833.jpg", "text": "I request a description of the area in the picture . Please mention the objects and their locations. For your reference, objects involved in this region include a chair, a picture, a person, a bracelet, a sneakers, and a cup.", "boxes_value": [[0, 0.60498048, 234.9071655578, 512.8195800576], [0, 346.2960815616, 145.962524386, 511.7513427968], [0, 0.60498048, 115.1626587149, 185.8900756992], [0.0975341758, 170.2879028224, 234.9071655578, 512.8195800576], [138.9856567104, 334.224121088, 158.9724121337, 353.8777465856], [202.65924071510003, 475.5106811392, 252.63110352820001, 511.8060302848], [218.5776977594, 295.1729125888, 246.2141723662, 343.1303100416]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046833_crop.jpg", "text": "I request a description of the area in the picture . Please mention the objects and their locations. For your reference, objects involved in this region include a chair, a picture, a person, a bracelet, a sneakers, and a cup.", "boxes_value": [[0, 0.60498048, 234.9071655578, 512], [0, 346.2960815616, 145.962524386, 511.7513427968], [0, 0.60498048, 115.1626587149, 185.8900756992], [0.0975341758, 170.2879028224, 234.9071655578, 512], [138.9856567104, 334.224121088, 158.9724121337, 353.8777465856], [202.65924071510003, 475.5106811392, 252.63110352820001, 511.8060302848], [218.5776977594, 295.1729125888, 246.2141723662, 343.1303100416]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046835.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Include the coordinates for each mentioned object.", "boxes_value": [[641.6137695474, 37.197570816, 702.5308837794, 359.9140014592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046835_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Include the coordinates for each mentioned object.", "boxes_value": [[15.613769547400011, 37.197570816, 76, 359.9140014592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046835.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, two helmets, two skating and skiing shoes, and a laptop.", "boxes_value": [[641.6137695474, 37.197570816, 702.5308837794, 359.9140014592], [449.7648926004, 194.4862060544, 701.7620849694, 511.3000488448], [641.6137695474, 198.4836425728, 666.9958496058, 224.3734130688], [679.1793213113999, 198.9912719872, 702.5308837794, 220.8198852608], [667.5035400072001, 333.5166015488, 693.900878931, 359.9140014592], [637.5526122888, 342.6541748224, 662.4271240236, 363.4675293184], [648.7845458795999, 37.197570816, 682.6329345654, 64.3143310336]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046835_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, two helmets, two skating and skiing shoes, and a laptop.", "boxes_value": [[15.613769547400011, 37.197570816, 76, 359.9140014592], [0, 194.4862060544, 75.76208496940001, 440], [15.613769547400011, 198.4836425728, 40.995849605800004, 224.3734130688], [53.17932131139992, 198.9912719872, 76, 220.8198852608], [41.50354000720006, 333.5166015488, 67.90087893099997, 359.9140014592], [11.552612288799992, 342.6541748224, 36.42712402359996, 363.4675293184], [22.784545879599932, 37.197570816, 56.63293456539998, 64.3143310336]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046838.jpg", "text": "Can you generate a description for the selected region in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[1.2811279403999998, 192.30053712, 321.9591064508, 400.08331300000003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046838_crop.jpg", "text": "Can you generate a description for the selected region in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[1.2811279403999998, 52.30053712, 321.9591064508, 260]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046838.jpg", "text": "Can you generate a description for the selected region in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a chair, a desk, two people, and three baksets.", "boxes_value": [[1.2811279403999998, 192.30053712, 321.9591064508, 400.08331300000003], [1.2811279403999998, 173.94885252, 98.849884035, 316.89001464], [1.2811279403999998, 307.81439208, 321.9591064508, 400.08331300000003], [67.03109742, 192.30053712, 179.9844665654, 323.38861084], [133.3523254252, 46.704650879999996, 315.2175903198, 342.55957032], [0, 356.40118408, 61.66470338, 399.48040772], [3.9296874848, 331.08660888, 125.6173095604, 374.60992432], [190.89331056039998, 292.56970216, 223.7352905256, 336.67864992]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6, 7]]}, {"image_path": "objects365_v1_00046838_crop.jpg", "text": "Can you generate a description for the selected region in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a chair, a desk, two people, and three baksets.", "boxes_value": [[1.2811279403999998, 52.30053712, 321.9591064508, 260], [1.2811279403999998, 33.94885252, 98.849884035, 176.89001464], [1.2811279403999998, 167.81439208, 321.9591064508, 260], [67.03109742, 52.30053712, 179.9844665654, 183.38861084], [133.3523254252, 0, 315.2175903198, 202.55957031999998], [0, 216.40118408, 61.66470338, 259.48040772], [3.9296874848, 191.08660888000003, 125.6173095604, 234.60992432], [190.89331056039998, 152.56970216000002, 223.7352905256, 196.67864992]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6, 7]]}, {"image_path": "objects365_v1_00046841.jpg", "text": "Can you elaborate on the content of the bounding box in ? Include the coordinates for each mentioned object.", "boxes_value": [[151.5005493248, 320.2730712762, 273.1956176896, 588.3829345482]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046841_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Include the coordinates for each mentioned object.", "boxes_value": [[30.500549324800005, 67.27307127620003, 152.1956176896, 335.38293454819996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046841.jpg", "text": "Can you elaborate on the content of the bounding box in ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two flowers, a potted plant, a person, and a desk.", "boxes_value": [[151.5005493248, 320.2730712762, 273.1956176896, 588.3829345482], [217.0767212032, 429.66198730549996, 232.1561889792, 450.16320803929995], [253.1542968832, 426.19592287880005, 273.1956176896, 452.62719726840004], [198.1441650176, 478.3488769357, 240.0208740352, 520.2254638761], [194.2630004736, 320.2730712762, 225.8020019712, 427.92578122530006], [151.5005493248, 516.9904784979, 242.2041626112, 588.3829345482]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046841_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two flowers, a potted plant, a person, and a desk.", "boxes_value": [[30.500549324800005, 67.27307127620003, 152.1956176896, 335.38293454819996], [96.07672120320001, 176.66198730549996, 111.15618897920001, 197.16320803929995], [132.1542968832, 173.19592287880005, 152.1956176896, 199.62719726840004], [77.14416501759999, 225.3488769357, 119.0208740352, 267.2254638761], [73.26300047359999, 67.27307127620003, 104.80200197120001, 174.92578122530006], [30.500549324800005, 263.99047849789997, 121.20416261119999, 335.38293454819996]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046843.jpg", "text": "In the image , could you provide a description for the coordinates ? Provide the coordinates for all objects that you mention.", "boxes_value": [[254.04235842519998, 178.7879028224, 522.9436034839, 262.3917846528]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046843_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Provide the coordinates for all objects that you mention.", "boxes_value": [[68.04235842519998, 21.787902822400014, 336.9436034839, 105.39178465280003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046843.jpg", "text": "In the image , could you provide a description for the coordinates ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include five people.", "boxes_value": [[254.04235842519998, 178.7879028224, 522.9436034839, 262.3917846528], [487.98645020270004, 190.0328979456, 522.9436034839, 258.9694213632], [408.5383300941, 215.7007446528, 473.5635986237, 298.5712280064], [345.9577636932, 186.610534656, 378.9592285027, 261.1694946304], [254.04235842519998, 196.388732928, 301.7112427024, 262.3917846528], [275.5544433372, 178.7879028224, 300.4889526641, 250.4134521344]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046843_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include five people.", "boxes_value": [[68.04235842519998, 21.787902822400014, 336.9436034839, 105.39178465280003], [301.98645020270004, 33.03289794560001, 336.9436034839, 101.96942136320001], [222.5383300941, 58.70074465280001, 287.5635986237, 126], [159.95776369319998, 29.610534656, 192.9592285027, 104.16949463039998], [68.04235842519998, 39.388732927999996, 115.7112427024, 105.39178465280003], [89.55444333719998, 21.787902822400014, 114.4889526641, 93.41345213439999]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046844.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Include the coordinates for each object you identify.", "boxes_value": [[129.1001587092, 269.7315063296, 219.6673584084, 454.4572143616]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046844_crop.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Include the coordinates for each object you identify.", "boxes_value": [[23.100158709200002, 46.731506329599995, 113.66735840839999, 231.4572143616]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046844.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two chairs, two people, and a handbag.", "boxes_value": [[129.1001587092, 269.7315063296, 219.6673584084, 454.4572143616], [129.1001587092, 336.417480448, 193.2373657116, 454.4572143616], [196.6489257852, 329.5944214016, 253.2807007164, 454.4572143616], [138.2743530504, 275.2559204352, 202.3575439284, 422.5736083968], [180.9964599408, 269.7315063296, 219.6673584084, 322.7658691584], [169.2110595924, 332.874450688, 190.29919431599998, 359.1420898304]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046844_crop.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two chairs, two people, and a handbag.", "boxes_value": [[23.100158709200002, 46.731506329599995, 113.66735840839999, 231.4572143616], [23.100158709200002, 113.41748044799999, 87.2373657116, 231.4572143616], [90.64892578519999, 106.59442140160002, 136, 231.4572143616], [32.27435305040001, 52.25592043519998, 96.35754392839999, 199.5736083968], [74.99645994080001, 46.731506329599995, 113.66735840839999, 99.7658691584], [63.211059592400005, 109.87445068800002, 84.29919431599998, 136.14208983039998]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046846.jpg", "text": "Kindly describe what I should be seeing in the area of image . Please point out the objects and their coordinates.", "boxes_value": [[208.3277587968, 277.7788696064, 450.01513674239993, 356.8266601472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046846_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Please point out the objects and their coordinates.", "boxes_value": [[61.3277587968, 19.778869606400008, 303.01513674239993, 98.82666014720002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046846.jpg", "text": "Kindly describe what I should be seeing in the area of image . Please point out the objects and their coordinates. For your reference, objects involved in this region include a cabinet, two people, a glasses, a bottle, and a moniter.", "boxes_value": [[208.3277587968, 277.7788696064, 450.01513674239993, 356.8266601472], [238.1170043904, 284.94750976, 290.0527343616, 344.179931648], [407.3179931904, 262.4866333184, 464.32092288, 350.5164184576], [265.8930663936, 262.4866333184, 376.29113771519997, 460.1929321472], [208.3277587968, 277.7788696064, 240.3707275008, 290.2661743104], [387.5717773056, 326.4274902528, 398.59155271680004, 356.8266601472], [416.4942627072, 282.6958618112, 450.01513674239993, 314.6929931776]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046846_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Please point out the objects and their coordinates. For your reference, objects involved in this region include a cabinet, two people, a glasses, a bottle, and a moniter.", "boxes_value": [[61.3277587968, 19.778869606400008, 303.01513674239993, 98.82666014720002], [91.11700439040001, 26.947509760000003, 143.05273436160002, 86.17993164799998], [260.3179931904, 4.486633318399981, 317.32092288, 92.51641845760003], [118.89306639360001, 4.486633318399981, 229.29113771519997, 118], [61.3277587968, 19.778869606400008, 93.3707275008, 32.266174310400004], [240.5717773056, 68.4274902528, 251.59155271680004, 98.82666014720002], [269.4942627072, 24.695861811200018, 303.01513674239993, 56.69299317759999]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046847.jpg", "text": "What details can you provide about the region in the snapshot ? Please point out the objects and their coordinates.", "boxes_value": [[89.1282348902, 207.2985839616, 256.880737268, 338.1666870272]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046847_crop.jpg", "text": "What details can you provide about the region in the snapshot ? Please point out the objects and their coordinates.", "boxes_value": [[42.128234890200005, 33.2985839616, 209.88073726800002, 164.1666870272]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046847.jpg", "text": "What details can you provide about the region in the snapshot ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a cymbal, two drums, a person, and a bottle.", "boxes_value": [[89.1282348902, 207.2985839616, 256.880737268, 338.1666870272], [210.6409912008, 223.9681396736, 256.880737268, 247.8104858624], [176.6837158251, 235.188964864, 220.4536742817, 282.4902953984], [133.40032958380002, 270.753417984, 211.9606933252, 338.1666870272], [156.8479003555, 207.2985839616, 219.4641723752, 287.5758666752], [89.1282348902, 307.2311401472, 101.4189453398, 337.2354125824]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046847_crop.jpg", "text": "What details can you provide about the region in the snapshot ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a cymbal, two drums, a person, and a bottle.", "boxes_value": [[42.128234890200005, 33.2985839616, 209.88073726800002, 164.1666870272], [163.6409912008, 49.96813967360001, 209.88073726800002, 73.81048586239999], [129.6837158251, 61.18896486400001, 173.4536742817, 108.49029539840001], [86.40032958380002, 96.75341798400001, 164.9606933252, 164.1666870272], [109.8479003555, 33.2985839616, 172.4641723752, 113.57586667520002], [42.128234890200005, 133.2311401472, 54.418945339800004, 163.2354125824]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046851.jpg", "text": "What's going on in the section of contained within the bounding box ? Provide the coordinates for all objects that you mention.", "boxes_value": [[65.84655763399999, 242.4304809472, 467.760498062, 497.2633056768]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046851_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Provide the coordinates for all objects that you mention.", "boxes_value": [[65.84655763399999, 64.43048094720001, 467.760498062, 319.2633056768]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046851.jpg", "text": "What's going on in the section of contained within the bounding box ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four hats, and a street lights.", "boxes_value": [[65.84655763399999, 242.4304809472, 467.760498062, 497.2633056768], [444.80895995249995, 438.1312866304, 467.760498062, 458.3648681472], [65.84655763399999, 425.7514038272, 97.71215818649999, 445.224853504], [118.81396484099999, 468.7938842624, 145.471130376, 490.1740722688], [161.571411144, 476.665344256, 188.0544433625, 497.2633056768], [180.246521026, 242.4304809472, 207.1833495875, 315.1948852736]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046851_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four hats, and a street lights.", "boxes_value": [[65.84655763399999, 64.43048094720001, 467.760498062, 319.2633056768], [444.80895995249995, 260.1312866304, 467.760498062, 280.3648681472], [65.84655763399999, 247.75140382720002, 97.71215818649999, 267.224853504], [118.81396484099999, 290.7938842624, 145.471130376, 312.1740722688], [161.571411144, 298.665344256, 188.0544433625, 319.2633056768], [180.246521026, 64.43048094720001, 207.1833495875, 137.1948852736]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046854.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Include the coordinates for each object you identify.", "boxes_value": [[144.6802368508, 403.2657470464, 765.7705077787999, 511.7999267328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046854_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Include the coordinates for each object you identify.", "boxes_value": [[144.6802368508, 27.26574704640001, 765.7705077787999, 135.79992673279997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046854.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, two cars, a suv, and a bicycle.", "boxes_value": [[144.6802368508, 403.2657470464, 765.7705077787999, 511.7999267328], [535.147583025, 403.2657470464, 554.6588134581999, 423.4166870016], [692.7034912022, 420.5260620288, 765.7705077787999, 484.327941888], [383.747192402, 422.2242431488, 648.1278076277999, 511.7999267328], [144.6802368508, 409.1291504128, 383.41894531599996, 511.5170288128], [313.34375001480004, 428.3236694528, 338.7261962596, 465.793090816]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046854_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, two cars, a suv, and a bicycle.", "boxes_value": [[144.6802368508, 27.26574704640001, 765.7705077787999, 135.79992673279997], [535.147583025, 27.26574704640001, 554.6588134581999, 47.416687001599996], [692.7034912022, 44.52606202880003, 765.7705077787999, 108.327941888], [383.747192402, 46.224243148799985, 648.1278076277999, 135.79992673279997], [144.6802368508, 33.129150412800016, 383.41894531599996, 135.5170288128], [313.34375001480004, 52.323669452800004, 338.7261962596, 89.79309081600002]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046855.jpg", "text": "Within the input image , what can be found in the region defined by ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[95.7233276416, 168.103027353, 177.5655517696, 454.2716064411]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046855_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[20.723327641599994, 72.10302735299999, 102.5655517696, 358.2716064411]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046855.jpg", "text": "Within the input image , what can be found in the region defined by ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, a ring, a watch, a bracelet, and a bottle.", "boxes_value": [[95.7233276416, 168.103027353, 177.5655517696, 454.2716064411], [1.4138793984, 55.506469713899996, 335.8361206272, 504.9416503635], [159.5870971904, 431.73144534109997, 177.5655517696, 454.2716064411], [108.335083008, 370.282714849, 149.6586914304, 423.1447754198], [95.7233276416, 359.0125732388, 137.315246592, 427.4381103463], [96.9541015552, 168.103027353, 124.6126708736, 203.4445800691]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046855_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, a ring, a watch, a bracelet, and a bottle.", "boxes_value": [[20.723327641599994, 72.10302735299999, 102.5655517696, 358.2716064411], [0, 0, 123, 408.9416503635], [84.58709719039999, 335.73144534109997, 102.5655517696, 358.2716064411], [33.335083008, 274.282714849, 74.65869143040001, 327.1447754198], [20.723327641599994, 263.0125732388, 62.315246591999994, 331.4381103463], [21.954101555199998, 72.10302735299999, 49.612670873599996, 107.4445800691]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046856.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Specify the location of each mentioned object.", "boxes_value": [[212.87499998250001, 142.956542976, 518.3415527623, 365.7633056768]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046856_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Specify the location of each mentioned object.", "boxes_value": [[76.87499998250001, 55.95654297600001, 382.3415527623, 278.7633056768]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046856.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Specify the location of each mentioned object. For your reference, objects involved in this region include three pictures, and three people.", "boxes_value": [[212.87499998250001, 142.956542976, 518.3415527623, 365.7633056768], [358.55920412240005, 88.9644775424, 524.0279540909, 236.240295424], [161.0363769369, 83.7664795136, 345.5643310369, 295.1505737216], [212.87499998250001, 301.2149047808, 323.0397338677, 353.1945800704], [285.9158935395, 142.956542976, 413.73864749419994, 365.7633056768], [320.8605957307, 114.4479980544, 556.475585948, 512.7296142336], [491.2360840185, 185.9248047104, 518.3415527623, 239.519531264]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046856_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Specify the location of each mentioned object. For your reference, objects involved in this region include three pictures, and three people.", "boxes_value": [[76.87499998250001, 55.95654297600001, 382.3415527623, 278.7633056768], [222.55920412240005, 1.9644775424000045, 388.02795409090004, 149.240295424], [25.036376936900012, 0, 209.5643310369, 208.15057372159998], [76.87499998250001, 214.2149047808, 187.03973386770002, 266.1945800704], [149.91589353950002, 55.95654297600001, 277.73864749419994, 278.7633056768], [184.86059573070003, 27.447998054400003, 420.475585948, 334], [355.2360840185, 98.9248047104, 382.3415527623, 152.519531264]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046858.jpg", "text": "In the submitted image , please give a synopsis of the area . Specify the location of each mentioned object.", "boxes_value": [[534.3990478848, 73.598266624, 639.2480468736001, 363.3255004672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046858_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Specify the location of each mentioned object.", "boxes_value": [[26.399047884799984, 72.598266624, 131.24804687360006, 362.3255004672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046858.jpg", "text": "In the submitted image , please give a synopsis of the area . Specify the location of each mentioned object. For your reference, objects involved in this region include a bench, and four lamps.", "boxes_value": [[534.3990478848, 73.598266624, 639.2480468736001, 363.3255004672], [549.8217773568, 340.6085204992, 571.4028320256, 363.3255004672], [534.3990478848, 73.598266624, 615.1759033344, 103.7550048768], [576.2601318144, 140.4507446272, 629.2762450944, 162.6066894336], [596.833618176, 177.6411132928, 634.8151855103999, 191.88421632], [603.2042236416, 190.3420410368, 639.2480468736001, 205.4571533312]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046858_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Specify the location of each mentioned object. For your reference, objects involved in this region include a bench, and four lamps.", "boxes_value": [[26.399047884799984, 72.598266624, 131.24804687360006, 362.3255004672], [41.8217773568, 339.6085204992, 63.40283202559999, 362.3255004672], [26.399047884799984, 72.598266624, 107.1759033344, 102.7550048768], [68.26013181439998, 139.4507446272, 121.2762450944, 161.6066894336], [88.83361817599996, 176.6411132928, 126.81518551039994, 190.88421632], [95.20422364160004, 189.3420410368, 131.24804687360006, 204.4571533312]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046862.jpg", "text": "Kindly share your observations about the rectangular region within . Please mention the objects and their locations.", "boxes_value": [[299.7884521551, 170.6788329984, 539.6469726657, 431.8491821056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046862_crop.jpg", "text": "Kindly share your observations about the rectangular region within . Please mention the objects and their locations.", "boxes_value": [[60.7884521551, 65.6788329984, 300.6469726657, 326.8491821056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046862.jpg", "text": "Kindly share your observations about the rectangular region within . Please mention the objects and their locations. For your reference, objects involved in this region include a chair, two potted plants, three people, and a hat.", "boxes_value": [[299.7884521551, 170.6788329984, 539.6469726657, 431.8491821056], [532.5145264007999, 224.242065408, 544.9478759538999, 252.053527808], [423.7100829771, 238.2299804672, 444.2236328171, 275.0148315648], [519.5191650386, 190.5170898432, 539.6469726657, 218.8861694464], [299.7884521551, 170.6788329984, 379.2115478267, 431.8491821056], [449.4114989906, 205.204589824, 469.4587402109, 242.793090816], [521.2473144508, 213.1399536128, 542.5473633105, 254.0695800832], [311.7524413762, 171.7156982272, 361.98730467710004, 200.3837280256]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5, 6], [7]]}, {"image_path": "objects365_v1_00046862_crop.jpg", "text": "Kindly share your observations about the rectangular region within . Please mention the objects and their locations. For your reference, objects involved in this region include a chair, two potted plants, three people, and a hat.", "boxes_value": [[60.7884521551, 65.6788329984, 300.6469726657, 326.8491821056], [293.5145264007999, 119.242065408, 305.94787595389994, 147.053527808], [184.7100829771, 133.2299804672, 205.2236328171, 170.01483156479998], [280.5191650386, 85.51708984320001, 300.6469726657, 113.8861694464], [60.7884521551, 65.6788329984, 140.21154782669998, 326.8491821056], [210.41149899060002, 100.20458982400001, 230.45874021089998, 137.793090816], [282.2473144508, 108.13995361280001, 303.54736331050003, 149.0695800832], [72.75244137620001, 66.7156982272, 122.98730467710004, 95.38372802559999]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5, 6], [7]]}, {"image_path": "objects365_v1_00046863.jpg", "text": "What's the story in the section of the included visual ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[317.69812011, 354.46697995719995, 445.38684085, 483.5875854392]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046863_crop.jpg", "text": "What's the story in the section of the included visual ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[32.69812010999999, 32.46697995719995, 160.38684085, 161.58758543919998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046863.jpg", "text": "What's the story in the section of the included visual ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a desk, a person, two sneakers, and an extention cord.", "boxes_value": [[317.69812011, 354.46697995719995, 445.38684085, 483.5875854392], [131.62957766, 218.4036865208, 447.01696774000004, 413.99279787359995], [306.61914066, 66.8953247072, 550.4819336099999, 483.2310180896], [410.20324709, 395.7593383656, 445.38684085, 431.5014038324], [318.57666015, 440.9953613472, 419.69726561, 483.5875854392], [317.69812011, 354.46697995719995, 356.49755862, 377.71087648360003]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046863_crop.jpg", "text": "What's the story in the section of the included visual ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a desk, a person, two sneakers, and an extention cord.", "boxes_value": [[32.69812010999999, 32.46697995719995, 160.38684085, 161.58758543919998], [0, 0, 162.01696774000004, 91.99279787359995], [21.619140660000028, 0, 192, 161.2310180896], [125.20324708999999, 73.7593383656, 160.38684085, 109.5014038324], [33.57666015000001, 118.9953613472, 134.69726561, 161.58758543919998], [32.69812010999999, 32.46697995719995, 71.49755862, 55.71087648360003]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046864.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Provide the coordinates for all objects that you mention.", "boxes_value": [[11.86279296, 296.9695434752, 179.4048461568, 431.8349609472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046864_crop.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Provide the coordinates for all objects that you mention.", "boxes_value": [[11.86279296, 33.9695434752, 179.4048461568, 168.8349609472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046864.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two bowls, two cups, a plate, and a chopsticks.", "boxes_value": [[11.86279296, 296.9695434752, 179.4048461568, 431.8349609472], [51.0747681024, 380.045043968, 136.3704223488, 431.8349609472], [11.86279296, 343.9050903552, 49.2924194304, 415.1995849728], [83.751403776, 289.2459716608, 139.5987548928, 380.740600576], [71.8690185216, 307.0696411136, 127.71636963840001, 345.6874389504], [125.9339599872, 296.9695434752, 179.4048461568, 329.6461791744], [136.03405762559998, 304.6931152384, 219.2109374976, 311.8225708032]], "boxes_seq": [[0], [0], [1, 4], [2, 3], [5], [6]]}, {"image_path": "objects365_v1_00046864_crop.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two bowls, two cups, a plate, and a chopsticks.", "boxes_value": [[11.86279296, 33.9695434752, 179.4048461568, 168.8349609472], [51.0747681024, 117.04504396800002, 136.3704223488, 168.8349609472], [11.86279296, 80.9050903552, 49.2924194304, 152.19958497279998], [83.751403776, 26.245971660800024, 139.5987548928, 117.74060057600002], [71.8690185216, 44.06964111360003, 127.71636963840001, 82.68743895040001], [125.9339599872, 33.9695434752, 179.4048461568, 66.6461791744], [136.03405762559998, 41.693115238400026, 219.2109374976, 48.822570803199994]], "boxes_seq": [[0], [0], [1, 4], [2, 3], [5], [6]]}, {"image_path": "objects365_v1_00046867.jpg", "text": "I request a description of the area in the picture . Give coordinates for the items you reference.", "boxes_value": [[146.4719848704, 231.7348632576, 296.9765624832, 307.1503906304]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046867_crop.jpg", "text": "I request a description of the area in the picture . Give coordinates for the items you reference.", "boxes_value": [[38.47198487040001, 19.73486325760001, 188.97656248319998, 95.15039063040001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046867.jpg", "text": "I request a description of the area in the picture . Give coordinates for the items you reference. For your reference, objects involved in this region include two chairs, two people, and a bowl.", "boxes_value": [[146.4719848704, 231.7348632576, 296.9765624832, 307.1503906304], [231.82379151359999, 231.7348632576, 262.4952392448, 306.0277709824], [271.569824256, 244.057006848, 296.9765624832, 307.1503906304], [143.2299804672, 210.406249984, 184.0698852864, 258.9639282176], [144.1848754944, 231.3052978688, 232.161560064, 368.7189941248], [146.4719848704, 252.1072997888, 165.5575561728, 268.1216430592]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046867_crop.jpg", "text": "I request a description of the area in the picture . Give coordinates for the items you reference. For your reference, objects involved in this region include two chairs, two people, and a bowl.", "boxes_value": [[38.47198487040001, 19.73486325760001, 188.97656248319998, 95.15039063040001], [123.82379151359999, 19.73486325760001, 154.49523924480002, 94.0277709824], [163.569824256, 32.057006847999986, 188.97656248319998, 95.15039063040001], [35.229980467199994, 0, 76.0698852864, 46.96392821760003], [36.18487549439999, 19.30529786880001, 124.16156006400001, 114], [38.47198487040001, 40.10729978879999, 57.55755617279999, 56.12164305919998]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046868.jpg", "text": "What is taking place within the specified area in this capture ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[203.23675539479999, 289.8386840576, 675.8291015864, 376.859863296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046868_crop.jpg", "text": "What is taking place within the specified area in this capture ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[118.23675539479999, 21.838684057600005, 590.8291015864, 108.85986329600001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046868.jpg", "text": "What is taking place within the specified area in this capture ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two cabinets, a bench, and three chairs.", "boxes_value": [[203.23675539479999, 289.8386840576, 675.8291015864, 376.859863296], [387.1678466545, 223.7730102784, 449.8212890871, 345.6374511616], [368.2341308643, 230.6580200448, 387.8563232217, 337.7196655104], [636.965698233, 289.8386840576, 675.8291015864, 325.8829956096], [288.1933593617, 301.1154174976, 352.16674806049997, 364.5769653248], [203.23675539479999, 303.162597632, 245.7150878742, 376.859863296], [353.7020264004, 320.0515136512, 463.736206065, 451.5807495168]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046868_crop.jpg", "text": "What is taking place within the specified area in this capture ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two cabinets, a bench, and three chairs.", "boxes_value": [[118.23675539479999, 21.838684057600005, 590.8291015864, 108.85986329600001], [302.1678466545, 0, 364.8212890871, 77.63745116159998], [283.2341308643, 0, 302.8563232217, 69.71966551039998], [551.965698233, 21.838684057600005, 590.8291015864, 57.882995609600016], [203.1933593617, 33.11541749759999, 267.16674806049997, 96.57696532480003], [118.23675539479999, 35.16259763199997, 160.7150878742, 108.85986329600001], [268.7020264004, 52.051513651200025, 378.736206065, 130]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046869.jpg", "text": "Can you break down the region in the image for me? Include the coordinates for each object you identify.", "boxes_value": [[276.08654784, 120.4519043072, 556.4349364992, 227.0328369152]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046869_crop.jpg", "text": "Can you break down the region in the image for me? Include the coordinates for each object you identify.", "boxes_value": [[71.08654783999998, 27.451904307199996, 351.43493649920003, 134.0328369152]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046869.jpg", "text": "Can you break down the region in the image for me? Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, and four helmets.", "boxes_value": [[276.08654784, 120.4519043072, 556.4349364992, 227.0328369152], [205.84320069119997, 118.3424682496, 375.7617187584, 272.2197265408], [327.6379394304, 132.6013183488, 436.36206051839997, 294.2021484544], [276.08654784, 120.4519043072, 337.4455566336, 181.43670656], [334.8265380864, 134.6691894784, 361.0163574528, 164.9745483264], [494.6271972864, 159.6452026368, 556.4349364992, 205.1425781248], [468.87402347520003, 190.9783325184, 502.3531494144, 227.0328369152]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046869_crop.jpg", "text": "Can you break down the region in the image for me? Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, and four helmets.", "boxes_value": [[71.08654783999998, 27.451904307199996, 351.43493649920003, 134.0328369152], [0.8432006911999679, 25.342468249600003, 170.76171875839998, 160], [122.63793943040002, 39.60131834879999, 231.36206051839997, 160], [71.08654783999998, 27.451904307199996, 132.4455566336, 88.43670656], [129.82653808639998, 41.6691894784, 156.01635745279998, 71.9745483264], [289.6271972864, 66.64520263680001, 351.43493649920003, 112.1425781248], [263.87402347520003, 97.97833251840001, 297.3531494144, 134.0328369152]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046871.jpg", "text": "Could you please provide a description of the rectangular area in ? Please point out the objects and their coordinates.", "boxes_value": [[453.6497802923, 367.7892455936, 560.6286621118, 438.2996826112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046871_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Please point out the objects and their coordinates.", "boxes_value": [[27.649780292300022, 17.789245593600015, 134.62866211180005, 88.29968261120001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046871.jpg", "text": "Could you please provide a description of the rectangular area in ? Please point out the objects and their coordinates. For your reference, objects involved in this region include three chairs, a desk, and a potted plant.", "boxes_value": [[453.6497802923, 367.7892455936, 560.6286621118, 438.2996826112], [521.8261719085, 375.4046630912, 560.6286621118, 415.6577148416], [491.00170901689995, 367.7892455936, 534.1558837929, 416.0203247104], [455.82568360470003, 367.7892455936, 487.0126953269, 414.5697631744], [453.6497802923, 376.492553728, 503.694091784, 414.2071533056], [457.69604495469997, 395.7648315392, 506.2595214708, 438.2996826112]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046871_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Please point out the objects and their coordinates. For your reference, objects involved in this region include three chairs, a desk, and a potted plant.", "boxes_value": [[27.649780292300022, 17.789245593600015, 134.62866211180005, 88.29968261120001], [95.82617190849999, 25.404663091200007, 134.62866211180005, 65.65771484160001], [65.00170901689995, 17.789245593600015, 108.1558837929, 66.02032471040002], [29.825683604700032, 17.789245593600015, 61.01269532689997, 64.56976317440001], [27.649780292300022, 26.49255372800002, 77.69409178400002, 64.2071533056], [31.69604495469997, 45.764831539199974, 80.2595214708, 88.29968261120001]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046872.jpg", "text": "Please tell me about the area in the image . What does it contain? Remember to mention the objects and their corresponding locations.", "boxes_value": [[0.560180672, 269.55682372800004, 526.3460693119999, 480.102050784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046872_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Remember to mention the objects and their corresponding locations.", "boxes_value": [[0.560180672, 53.55682372800004, 526.3460693119999, 264]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046872.jpg", "text": "Please tell me about the area in the image . What does it contain? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a fan, a bed, a desk, a pillow, a towel, a carpet, and a cup.", "boxes_value": [[0.560180672, 269.55682372800004, 526.3460693119999, 480.102050784], [394.60070803199994, 252.157165536, 505.842041024, 453.636108384], [93.661438016, 272.38690185599995, 447.47717286399995, 478.50390624], [272.681274432, 419.230285632, 420.603637696, 477.53601072], [138.364929216, 269.55682372800004, 272.676269504, 324.739013664], [94.655883776, 287.597351088, 137.492675776, 422.992370592], [0.560180672, 436.828125024, 118.580078144, 480.102050784], [483.80639648, 442.533203136, 526.3460693119999, 480.091125504]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00046872_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a fan, a bed, a desk, a pillow, a towel, a carpet, and a cup.", "boxes_value": [[0.560180672, 53.55682372800004, 526.3460693119999, 264], [394.60070803199994, 36.15716553600001, 505.842041024, 237.636108384], [93.661438016, 56.38690185599995, 447.47717286399995, 262.50390624], [272.681274432, 203.230285632, 420.603637696, 261.53601072], [138.364929216, 53.55682372800004, 272.676269504, 108.73901366400003], [94.655883776, 71.59735108799998, 137.492675776, 206.992370592], [0.560180672, 220.82812502399997, 118.580078144, 264], [483.80639648, 226.533203136, 526.3460693119999, 264]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00046873.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for each element you describe.", "boxes_value": [[237.1196899436, 200.44049074379998, 677.5214843648, 375.0009155406]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046873_crop.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for each element you describe.", "boxes_value": [[110.11968994360001, 44.440490743799984, 550.5214843648, 219.00091554059998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046873.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two cups, a spoon, a plate, two bottles, a pie, a chair, and a desk.", "boxes_value": [[237.1196899436, 200.44049074379998, 677.5214843648, 375.0009155406], [133.3773803964, 249.04046629799998, 364.2274169872, 421.0097045778], [237.1196899436, 200.44049074379998, 350.2081298984, 368.6712646626], [184.781250004, 195.76739502, 382.91967773, 213.5250854622], [374.7061767596, 305.3053588734, 677.5214843648, 375.0009155406], [437.44335934000003, 202.8669433812, 454.8537597656, 238.5729370134], [464.29663083599996, 204.0473022246, 483.77258298960004, 244.179687483], [403.9019775504, 282.6289672614, 599.6533202976, 362.5166625768], [504.9498290772, 172.5441894504, 722.5871582364, 484.54766846100006], [0.8134155204, 261.8094482424, 722.5871582364, 485.3978271432]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6], [7], [8], [9]]}, {"image_path": "objects365_v1_00046873_crop.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two cups, a spoon, a plate, two bottles, a pie, a chair, and a desk.", "boxes_value": [[110.11968994360001, 44.440490743799984, 550.5214843648, 219.00091554059998], [6.377380396400014, 93.04046629799998, 237.2274169872, 262], [110.11968994360001, 44.440490743799984, 223.2081298984, 212.67126466259998], [57.781250003999986, 39.76739502000001, 255.91967773, 57.525085462199996], [247.7061767596, 149.3053588734, 550.5214843648, 219.00091554059998], [310.44335934000003, 46.8669433812, 327.8537597656, 82.57293701340001], [337.29663083599996, 48.0473022246, 356.77258298960004, 88.17968748300001], [276.9019775504, 126.62896726140002, 472.6533202976, 206.5166625768], [377.9498290772, 16.54418945040001, 595.5871582364, 262], [0, 105.80944824239998, 595.5871582364, 262]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6], [7], [8], [9]]}, {"image_path": "objects365_v1_00046875.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Please point out the objects and their coordinates.", "boxes_value": [[12.997070336, 551.7166747758, 215.2069702144, 730.9182128973]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046875_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Please point out the objects and their coordinates.", "boxes_value": [[12.997070336, 45.716674775800016, 215.2069702144, 224.9182128973]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046875.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include two cabinets, and four people.", "boxes_value": [[12.997070336, 551.7166747758, 215.2069702144, 730.9182128973], [12.997070336, 576.1748046957, 108.1627807744, 730.9182128973], [0.2509155328, 589.7176513812, 28.7634277376, 738.419433576], [111.6845703168, 535.3031005683, 187.5861206016, 770.9974365021], [81.7234497024, 570.2578125351, 123.6690673664, 729.0517578246], [132.035278336, 561.8820800949, 150.9799194112, 589.6060791231], [177.7797241344, 551.7166747758, 215.2069702144, 651.9848632539]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046875_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include two cabinets, and four people.", "boxes_value": [[12.997070336, 45.716674775800016, 215.2069702144, 224.9182128973], [12.997070336, 70.1748046957, 108.1627807744, 224.9182128973], [0.2509155328, 83.71765138119997, 28.7634277376, 232.41943357599996], [111.6845703168, 29.30310056830001, 187.5861206016, 264.99743650209996], [81.7234497024, 64.25781253510002, 123.6690673664, 223.05175782460003], [132.035278336, 55.88208009489995, 150.9799194112, 83.60607912310002], [177.7797241344, 45.716674775800016, 215.2069702144, 145.98486325390002]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046876.jpg", "text": "For the image , can you assess and describe what's happening at ? Please mention the objects and their locations.", "boxes_value": [[151.323486336, 259.583984352, 372.283691392, 478.500671376]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046876_crop.jpg", "text": "For the image , can you assess and describe what's happening at ? Please mention the objects and their locations.", "boxes_value": [[55.323486336, 55.583984352000016, 276.283691392, 274.500671376]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046876.jpg", "text": "For the image , can you assess and describe what's happening at ? Please mention the objects and their locations. For your reference, objects involved in this region include three helmets, and two sneakers.", "boxes_value": [[151.323486336, 259.583984352, 372.283691392, 478.500671376], [297.61511232, 259.583984352, 355.17248537599994, 329.132507328], [266.438171392, 376.297546368, 324.794982912, 432.25610352], [220.871948224, 275.572143552, 268.036987328, 325.93487548800005], [151.323486336, 357.91119384, 204.883789056, 421.863830544], [337.332153344, 451.03875734400003, 372.283691392, 478.500671376]], "boxes_seq": [[0], [0], [1, 2, 4], [3, 5]]}, {"image_path": "objects365_v1_00046876_crop.jpg", "text": "For the image , can you assess and describe what's happening at ? Please mention the objects and their locations. For your reference, objects involved in this region include three helmets, and two sneakers.", "boxes_value": [[55.323486336, 55.583984352000016, 276.283691392, 274.500671376], [201.61511231999998, 55.583984352000016, 259.17248537599994, 125.13250732799997], [170.43817139200002, 172.29754636799998, 228.79498291200002, 228.25610352], [124.871948224, 71.572143552, 172.036987328, 121.93487548800005], [55.323486336, 153.91119384, 108.88378905600001, 217.863830544], [241.332153344, 247.03875734400003, 276.283691392, 274.500671376]], "boxes_seq": [[0], [0], [1, 2, 4], [3, 5]]}, {"image_path": "objects365_v1_00046878.jpg", "text": "Please describe the section of the picture defined by the bbox . Please point out the objects and their coordinates.", "boxes_value": [[510.95324705400003, 226.5821533184, 673.6492919858, 339.881164544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046878_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Please point out the objects and their coordinates.", "boxes_value": [[40.95324705400003, 28.582153318400003, 203.6492919858, 141.881164544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046878.jpg", "text": "Please describe the section of the picture defined by the bbox . Please point out the objects and their coordinates. For your reference, objects involved in this region include five people.", "boxes_value": [[510.95324705400003, 226.5821533184, 673.6492919858, 339.881164544], [661.7866211052, 265.08953856, 673.6492919858, 311.4105224704], [638.1591796638, 242.782165504, 651.0247802706, 285.5293579264], [566.346313483, 294.5174560768, 584.4918212846, 339.881164544], [581.4675292972, 304.4542846464, 598.7489013618, 339.881164544], [510.95324705400003, 226.5821533184, 522.3801269238, 267.5285033984]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046878_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Please point out the objects and their coordinates. For your reference, objects involved in this region include five people.", "boxes_value": [[40.95324705400003, 28.582153318400003, 203.6492919858, 141.881164544], [191.78662110519997, 67.08953856, 203.6492919858, 113.4105224704], [168.15917966380005, 44.782165504000005, 181.0247802706, 87.5293579264], [96.34631348300002, 96.51745607679999, 114.49182128459995, 141.881164544], [111.46752929720003, 106.45428464640003, 128.7489013618, 141.881164544], [40.95324705400003, 28.582153318400003, 52.38012692380005, 69.5285033984]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046879.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0.009033216, 96.9161987505, 317.8941040128, 418.3873291261]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046879_crop.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0.009033216, 80.9161987505, 317.8941040128, 402.3873291261]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046879.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three people, two sneakers, a street lights, and a trolley.", "boxes_value": [[0.009033216, 96.9161987505, 317.8941040128, 418.3873291261], [0.009033216, 213.1129150029, 33.9699096576, 418.3873291261], [34.3472289792, 238.7722167683, 64.1572875776, 304.0524901948], [72.8631591936, 276.5064697635, 176.0394287104, 398.7655028899], [73.524780288, 381.3271484525, 100.5991211008, 394.8371581803], [108.127136256, 384.0828857721, 128.8970336768, 398.5922851929], [35.08953856, 96.9161987505, 61.0528564224, 286.96777346510004], [208.1924438528, 232.23181156040002, 317.8941040128, 335.9106445316]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6], [7]]}, {"image_path": "objects365_v1_00046879_crop.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three people, two sneakers, a street lights, and a trolley.", "boxes_value": [[0.009033216, 80.9161987505, 317.8941040128, 402.3873291261], [0.009033216, 197.1129150029, 33.9699096576, 402.3873291261], [34.3472289792, 222.7722167683, 64.1572875776, 288.0524901948], [72.8631591936, 260.5064697635, 176.0394287104, 382.7655028899], [73.524780288, 365.3271484525, 100.5991211008, 378.8371581803], [108.127136256, 368.0828857721, 128.8970336768, 382.5922851929], [35.08953856, 80.9161987505, 61.0528564224, 270.96777346510004], [208.1924438528, 216.23181156040002, 317.8941040128, 319.9106445316]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6], [7]]}, {"image_path": "objects365_v1_00046883.jpg", "text": "Share some details about the objects or environment within the bounding box in . Please point out the objects and their coordinates.", "boxes_value": [[163.6386050754, 224.0921020416, 952.7155809444, 355.4781494272]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046883_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Please point out the objects and their coordinates.", "boxes_value": [[163.6386050754, 33.092102041599986, 952.7155809444, 164.47814942719998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046883.jpg", "text": "Share some details about the objects or environment within the bounding box in . Please point out the objects and their coordinates. For your reference, objects involved in this region include a hat, a glasses, a tie, a belt, and a tripod.", "boxes_value": [[163.6386050754, 224.0921020416, 952.7155809444, 355.4781494272], [724.3684248108, 270.3960128512, 786.084188085, 308.3528610304], [920.8375244334001, 339.4080062976, 952.7155809444, 355.4781494272], [163.6386050754, 233.1502521856, 174.4200246222, 273.708925952], [638.2951659924, 300.6212157952, 680.6827392288, 312.0332641792], [438.66125485140003, 224.0921020416, 472.3194579978, 287.8387451392]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046883_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Please point out the objects and their coordinates. For your reference, objects involved in this region include a hat, a glasses, a tie, a belt, and a tripod.", "boxes_value": [[163.6386050754, 33.092102041599986, 952.7155809444, 164.47814942719998], [724.3684248108, 79.3960128512, 786.084188085, 117.35286103039999], [920.8375244334001, 148.40800629760002, 952.7155809444, 164.47814942719998], [163.6386050754, 42.15025218560001, 174.4200246222, 82.70892595200002], [638.2951659924, 109.62121579519999, 680.6827392288, 121.03326417919999], [438.66125485140003, 33.092102041599986, 472.3194579978, 96.8387451392]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046884.jpg", "text": "Describe the bbox in the provided photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[467.55407712820005, 74.1271972864, 765.8205566102, 277.3836059648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046884_crop.jpg", "text": "Describe the bbox in the provided photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[75.55407712820005, 51.127197286400005, 373.8205566102, 254.38360596479998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046884.jpg", "text": "Describe the bbox in the provided photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two chairs, four cars, a sports car, and a dog.", "boxes_value": [[467.55407712820005, 74.1271972864, 765.8205566102, 277.3836059648], [533.3825683527999, 137.3874511872, 570.4458007622001, 176.6633300992], [467.55407712820005, 121.898376448, 518.999877895, 169.4719848448], [56.016479487800005, 35.4518432768, 706.501586929, 498.233459456], [667.7551269695999, 101.7978515456, 762.0859374748, 197.056579584], [443.0494384648, 84.0930786304, 495.3116454786, 96.5905761792], [452.3251952794, 87.0211181568, 523.7619628576, 96.597106944], [678.8205566091999, 74.1271972864, 765.8205566102, 158.7272338944], [676.4857177542, 242.0149536256, 706.0830078376, 277.3836059648]], "boxes_seq": [[0], [0], [1, 2], [3, 5, 6, 7], [4], [8]]}, {"image_path": "objects365_v1_00046884_crop.jpg", "text": "Describe the bbox in the provided photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two chairs, four cars, a sports car, and a dog.", "boxes_value": [[75.55407712820005, 51.127197286400005, 373.8205566102, 254.38360596479998], [141.38256835279992, 114.38745118720001, 178.44580076220007, 153.6633300992], [75.55407712820005, 98.898376448, 126.99987789500005, 146.4719848448], [0, 12.451843276799998, 314.50158692900004, 305], [275.7551269695999, 78.7978515456, 370.08593747479995, 174.056579584], [51.049438464800005, 61.0930786304, 103.31164547859998, 73.5905761792], [60.32519527940002, 64.0211181568, 131.7619628576, 73.597106944], [286.8205566091999, 51.127197286400005, 373.8205566102, 135.7272338944], [284.4857177542, 219.0149536256, 314.0830078376, 254.38360596479998]], "boxes_seq": [[0], [0], [1, 2], [3, 5, 6, 7], [4], [8]]}, {"image_path": "objects365_v1_00046885.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Provide the coordinates for all objects that you mention.", "boxes_value": [[125.3588867094, 107.420532224, 424.39270022389996, 439.4029540864]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046885_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Provide the coordinates for all objects that you mention.", "boxes_value": [[75.3588867094, 83.420532224, 374.39270022389996, 415.4029540864]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046885.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three chairs, and three umbrellas.", "boxes_value": [[125.3588867094, 107.420532224, 424.39270022389996, 439.4029540864], [371.20300291390004, 280.9507446272, 445.1226806748, 454.2379150336], [275.47088621, 283.9802246144, 351.8142090047, 456.6614990336], [148.2320556606, 281.5566406144, 242.14642332410003, 442.7258300928], [125.3588867094, 107.420532224, 424.39270022389996, 439.4029540864], [77.573852545, 112.302124032, 170.3588256832, 189.8850707968], [200.4014282179, 277.7103881728, 285.7123413034, 326.6414184448]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046885_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three chairs, and three umbrellas.", "boxes_value": [[75.3588867094, 83.420532224, 374.39270022389996, 415.4029540864], [321.20300291390004, 256.9507446272, 395.1226806748, 430.2379150336], [225.47088621, 259.9802246144, 301.8142090047, 432.6614990336], [98.2320556606, 257.5566406144, 192.14642332410003, 418.7258300928], [75.3588867094, 83.420532224, 374.39270022389996, 415.4029540864], [27.573852544999994, 88.302124032, 120.3588256832, 165.8850707968], [150.4014282179, 253.71038817279998, 235.71234130340002, 302.6414184448]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046889.jpg", "text": "Tell me about the region of the image . Specify the location of each mentioned object.", "boxes_value": [[335.5140381036, 222.2249755648, 420.06494142959997, 386.0343017472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046889_crop.jpg", "text": "Tell me about the region of the image . Specify the location of each mentioned object.", "boxes_value": [[21.5140381036, 41.22497556479999, 106.06494142959997, 205.0343017472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046889.jpg", "text": "Tell me about the region of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include two helmets, two gloves, and a hockey stick.", "boxes_value": [[335.5140381036, 222.2249755648, 420.06494142959997, 386.0343017472], [335.5140381036, 229.7185058816, 361.4871826348, 260.94152832], [364.5266113284, 250.9943847424, 388.56555177919995, 284.4279174656], [369.5001220492, 283.3226318336, 394.09179689840005, 301.8354492416], [400.1705322324, 268.4019165184, 420.06494142959997, 289.1251831296], [394.7996826072, 222.2249755648, 413.61193848839997, 386.0343017472]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046889_crop.jpg", "text": "Tell me about the region of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include two helmets, two gloves, and a hockey stick.", "boxes_value": [[21.5140381036, 41.22497556479999, 106.06494142959997, 205.0343017472], [21.5140381036, 48.71850588160001, 47.48718263479998, 79.94152831999997], [50.52661132840001, 69.9943847424, 74.56555177919995, 103.42791746559999], [55.5001220492, 102.32263183359998, 80.09179689840005, 120.83544924159997], [86.17053223239998, 87.40191651840001, 106.06494142959997, 108.12518312959998], [80.7996826072, 41.22497556479999, 99.61193848839997, 205.0343017472]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046891.jpg", "text": "What can you tell me about the selected region in the photo ? Please point out the objects and their coordinates.", "boxes_value": [[202.0860596007, 344.6038208, 684.9838867302, 440.3572998144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046891_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Please point out the objects and their coordinates.", "boxes_value": [[121.0860596007, 24.603820799999994, 603.9838867302, 120.3572998144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046891.jpg", "text": "What can you tell me about the selected region in the photo ? Please point out the objects and their coordinates. For your reference, objects involved in this region include five people.", "boxes_value": [[202.0860596007, 344.6038208, 684.9838867302, 440.3572998144], [202.0860596007, 344.6038208, 242.23333742999998, 440.3572998144], [229.0816039986, 351.0642700288, 245.2328491185, 436.6655883776], [338.7760009488, 347.7951660032, 368.8297119378, 439.0073242112], [615.6336670101, 350.5117187584, 645.0135498126, 430.1107177984], [652.358520495, 351.5366211072, 684.9838867302, 432.8437499904]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046891_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Please point out the objects and their coordinates. For your reference, objects involved in this region include five people.", "boxes_value": [[121.0860596007, 24.603820799999994, 603.9838867302, 120.3572998144], [121.0860596007, 24.603820799999994, 161.23333742999998, 120.3572998144], [148.0816039986, 31.064270028800024, 164.2328491185, 116.66558837759999], [257.7760009488, 27.795166003199995, 287.8297119378, 119.0073242112], [534.6336670101, 30.51171875839998, 564.0135498126, 110.1107177984], [571.358520495, 31.536621107200006, 603.9838867302, 112.8437499904]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046892.jpg", "text": "I'd like some information about the bounding box in the photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[89.1759643768, 25.7225341952, 199.1996460216, 310.7445068288]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046892_crop.jpg", "text": "I'd like some information about the bounding box in the photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[28.175964376799996, 25.7225341952, 138.1996460216, 310.7445068288]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046892.jpg", "text": "I'd like some information about the bounding box in the photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a desk, two lamps, a mirror, and a bed.", "boxes_value": [[89.1759643768, 25.7225341952, 199.1996460216, 310.7445068288], [89.1759643768, 203.3198242304, 199.1996460216, 310.7445068288], [121.23010252499999, 96.7614135808, 188.8037109252, 224.111694336], [105.6362304508, 25.7225341952, 198.3333130142, 171.2656249856], [14.7955322368, 25.3010864128, 688.675170897, 478.4409179648], [152.1467284806, 91.7660522496, 192.994262699, 165.1812133888]], "boxes_seq": [[0], [0], [1], [2, 5], [3], [4]]}, {"image_path": "objects365_v1_00046892_crop.jpg", "text": "I'd like some information about the bounding box in the photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a desk, two lamps, a mirror, and a bed.", "boxes_value": [[28.175964376799996, 25.7225341952, 138.1996460216, 310.7445068288], [28.175964376799996, 203.3198242304, 138.1996460216, 310.7445068288], [60.23010252499999, 96.7614135808, 127.8037109252, 224.111694336], [44.6362304508, 25.7225341952, 137.3333130142, 171.2656249856], [0, 25.3010864128, 165, 381], [91.14672848059999, 91.7660522496, 131.994262699, 165.1812133888]], "boxes_seq": [[0], [0], [1], [2, 5], [3], [4]]}, {"image_path": "objects365_v1_00046894.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[161.5093537856, 388.1403198464, 418.3872070356, 468.0110626304]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046894_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[64.50935378560001, 20.14031984640002, 321.3872070356, 100.01106263039998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046894.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two people, a handbag, a backpack, and a bicycle.", "boxes_value": [[161.5093537856, 388.1403198464, 418.3872070356, 468.0110626304], [397.3558349844, 388.1403198464, 418.3872070356, 449.6765136896], [319.4618530144, 391.6455077888, 342.44055177039996, 450.8449096704], [231.75509370039998, 443.8702392832, 252.16849578319997, 468.0110626304], [161.5093537856, 415.1746435584, 183.2904206692, 448.5269021696], [209.8349609544, 431.2074585088, 303.5891113528, 509.9189453312]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046894_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two people, a handbag, a backpack, and a bicycle.", "boxes_value": [[64.50935378560001, 20.14031984640002, 321.3872070356, 100.01106263039998], [300.3558349844, 20.14031984640002, 321.3872070356, 81.67651368959997], [222.4618530144, 23.64550778879999, 245.44055177039996, 82.8449096704], [134.75509370039998, 75.87023928320002, 155.16849578319997, 100.01106263039998], [64.50935378560001, 47.17464355840002, 86.29042066919999, 80.52690216960002], [112.8349609544, 63.207458508800016, 206.5891113528, 119]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046897.jpg", "text": "Describe the bbox in the provided photo . Include the coordinates for each mentioned object.", "boxes_value": [[412.0344238527, 103.5099487232, 742.8057861573001, 508.6399536128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046897_crop.jpg", "text": "Describe the bbox in the provided photo . Include the coordinates for each mentioned object.", "boxes_value": [[83.0344238527, 101.5099487232, 413.8057861573001, 506.6399536128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046897.jpg", "text": "Describe the bbox in the provided photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include five people, and a bracelet.", "boxes_value": [[412.0344238527, 103.5099487232, 742.8057861573001, 508.6399536128], [601.0942383144, 173.241332992, 742.8057861573001, 508.6399536128], [399.6738281493, 69.5946655232, 659.4011230437, 507.5051879936], [359.5067138862, 152.8925170688, 457.21166995439995, 331.7787475456], [412.0344238527, 453.9440307712, 428.5633544802, 473.7788696064], [621.9771728823, 49.4908447232, 742.8370361553, 385.1641235456], [649.8237304926, 103.5099487232, 711.0499267791, 195.1082153472]], "boxes_seq": [[0], [0], [1, 2, 3, 5, 6], [4]]}, {"image_path": "objects365_v1_00046897_crop.jpg", "text": "Describe the bbox in the provided photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include five people, and a bracelet.", "boxes_value": [[83.0344238527, 101.5099487232, 413.8057861573001, 506.6399536128], [272.09423831439995, 171.241332992, 413.8057861573001, 506.6399536128], [70.6738281493, 67.5946655232, 330.40112304369995, 505.5051879936], [30.506713886199975, 150.8925170688, 128.21166995439995, 329.7787475456], [83.0344238527, 451.9440307712, 99.56335448020002, 471.7788696064], [292.9771728823, 47.4908447232, 413.8370361553, 383.1641235456], [320.8237304926, 101.5099487232, 382.0499267791, 193.1082153472]], "boxes_seq": [[0], [0], [1, 2, 3, 5, 6], [4]]}, {"image_path": "objects365_v1_00046898.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each object you identify.", "boxes_value": [[33.5717163306, 37.4092407296, 166.65270997849998, 232.3937988096]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046898_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each object you identify.", "boxes_value": [[33.5717163306, 37.4092407296, 166.65270997849998, 232.3937988096]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046898.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each object you identify. For your reference, objects involved in this region include three towels, a cabinet, and a toiletry.", "boxes_value": [[33.5717163306, 37.4092407296, 166.65270997849998, 232.3937988096], [33.5717163306, 37.4092407296, 166.65270997849998, 85.7619629056], [34.4589233588, 80.882324224, 162.6602783516, 115.039794944], [37.1205444434, 107.0549316608, 164.8782959221, 133.6711425536], [3.9724731375, 0, 210.76318358460003, 485.7059936768], [79.8158569389, 210.0686034944, 93.55444332869999, 232.3937988096]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046898_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each object you identify. For your reference, objects involved in this region include three towels, a cabinet, and a toiletry.", "boxes_value": [[33.5717163306, 37.4092407296, 166.65270997849998, 232.3937988096], [33.5717163306, 37.4092407296, 166.65270997849998, 85.7619629056], [34.4589233588, 80.882324224, 162.6602783516, 115.039794944], [37.1205444434, 107.0549316608, 164.8782959221, 133.6711425536], [3.9724731375, 0, 199, 281], [79.8158569389, 210.0686034944, 93.55444332869999, 232.3937988096]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046899.jpg", "text": "What can I find in the bbox of the provided image ? Give coordinates for the items you reference.", "boxes_value": [[0, 307.7352294912, 511.697143552, 765.8024902656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046899_crop.jpg", "text": "What can I find in the bbox of the provided image ? Give coordinates for the items you reference.", "boxes_value": [[0, 114.73522949120002, 511.697143552, 572.8024902656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046899.jpg", "text": "What can I find in the bbox of the provided image ? Give coordinates for the items you reference. For your reference, objects involved in this region include a storage box, two desks, a person, and a handbag.", "boxes_value": [[0, 307.7352294912, 511.697143552, 765.8024902656], [1.2487182848, 403.4416503552, 262.4467163136, 594.4670409984], [323.5228271616, 460.61926272000005, 510.6497192448, 616.5583496448], [0, 516.4974365184, 510.4641113088, 765.8024902656], [68.262329088, 22.061096217600003, 368.8600463872, 615.7102050816001], [350.7075195392, 307.7352294912, 511.697143552, 494.43847656959997]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046899_crop.jpg", "text": "What can I find in the bbox of the provided image ? Give coordinates for the items you reference. For your reference, objects involved in this region include a storage box, two desks, a person, and a handbag.", "boxes_value": [[0, 114.73522949120002, 511.697143552, 572.8024902656], [1.2487182848, 210.4416503552, 262.4467163136, 401.46704099839997], [323.5228271616, 267.61926272000005, 510.6497192448, 423.55834964480005], [0, 323.49743651840004, 510.4641113088, 572.8024902656], [68.262329088, 0, 368.8600463872, 422.71020508160007], [350.7075195392, 114.73522949120002, 511.697143552, 301.43847656959997]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046901.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[295.6230468796, 120.55017088, 503.5546875276, 288.3692626944]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046901_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[52.6230468796, 42.550170879999996, 260.5546875276, 210.3692626944]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046901.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a fan, a potted plant, a speaker, and two lamps.", "boxes_value": [[295.6230468796, 120.55017088, 503.5546875276, 288.3692626944], [437.3438720682, 184.175109888, 462.92333987079996, 211.13720704], [360.8525390496, 218.3609618944, 397.8883056864, 288.3692626944], [460.573852568, 164.5454101504, 475.3306884488, 207.764648448], [373.9263915812, 120.55017088, 503.5546875276, 148.186645504], [295.6230468796, 140.9484863488, 402.2208252026, 166.61096192]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046901_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a fan, a potted plant, a speaker, and two lamps.", "boxes_value": [[52.6230468796, 42.550170879999996, 260.5546875276, 210.3692626944], [194.34387206820003, 106.17510988800001, 219.92333987079996, 133.13720704], [117.85253904960001, 140.3609618944, 154.88830568639997, 210.3692626944], [217.573852568, 86.54541015039999, 232.3306884488, 129.764648448], [130.9263915812, 42.550170879999996, 260.5546875276, 70.18664550400001], [52.6230468796, 62.9484863488, 159.2208252026, 88.61096192]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046902.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give coordinates for the items you reference.", "boxes_value": [[473.0020752263, 101.3924560384, 730.2802734464, 417.9849098752]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046902_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give coordinates for the items you reference.", "boxes_value": [[65.00207522630001, 79.3924560384, 322.2802734464, 395.9849098752]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046902.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give coordinates for the items you reference. For your reference, objects involved in this region include a lamp, a helmet, two motorcycles, and two bicycles.", "boxes_value": [[473.0020752263, 101.3924560384, 730.2802734464, 417.9849098752], [526.3721923863001, 170.8824462848, 548.1684570635999, 227.3750610432], [473.0020752263, 351.2893709824, 541.9994081284, 417.9849098752], [366.6911621003, 210.5601196544, 759.7586669974, 500.7215576064], [472.97119141110005, 235.02142336, 759.7586669974, 434.929138176], [633.3022460564, 101.3924560384, 680.1977539084, 136.4503173632], [685.6612548589, 101.3924560384, 730.2802734464, 136.9055785984]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00046902_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give coordinates for the items you reference. For your reference, objects involved in this region include a lamp, a helmet, two motorcycles, and two bicycles.", "boxes_value": [[65.00207522630001, 79.3924560384, 322.2802734464, 395.9849098752], [118.37219238630007, 148.8824462848, 140.16845706359993, 205.3750610432], [65.00207522630001, 329.2893709824, 133.99940812839998, 395.9849098752], [0, 188.5601196544, 351.75866699740004, 475], [64.97119141110005, 213.02142336, 351.75866699740004, 412.929138176], [225.3022460564, 79.3924560384, 272.19775390840005, 114.45031736320001], [277.66125485889995, 79.3924560384, 322.2802734464, 114.90557859840001]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00046903.jpg", "text": "For the image , can you assess and describe what's happening at ? Give coordinates for the items you reference.", "boxes_value": [[498.763793946, 36.3979492107, 595.002075228, 429.1254272445]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046903_crop.jpg", "text": "For the image , can you assess and describe what's happening at ? Give coordinates for the items you reference.", "boxes_value": [[24.76379394600002, 36.3979492107, 121.00207522799997, 429.1254272445]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046903.jpg", "text": "For the image , can you assess and describe what's happening at ? Give coordinates for the items you reference. For your reference, objects involved in this region include two people, a boots, two sneakers, two gloves, and a helmet.", "boxes_value": [[498.763793946, 36.3979492107, 595.002075228, 429.1254272445], [259.580932644, 46.800537126, 566.653442388, 432.7287597759], [501.829711944, 34.6359863178, 612.344970732, 291.12597656730003], [498.763793946, 369.009765642, 567.01525881, 429.1254272445], [519.103637712, 271.3784179638, 541.7034912299999, 291.2662963749], [541.7034912299999, 268.2797851734, 568.371337908, 291.2662963749], [509.44311521400004, 129.38598633869998, 537.9627685739999, 166.0328979477], [563.371215792, 125.0682373017, 595.002075228, 160.3289794827], [533.8144531500001, 36.3979492107, 561.81555177, 56.1024780204]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6, 7], [8]]}, {"image_path": "objects365_v1_00046903_crop.jpg", "text": "For the image , can you assess and describe what's happening at ? Give coordinates for the items you reference. For your reference, objects involved in this region include two people, a boots, two sneakers, two gloves, and a helmet.", "boxes_value": [[24.76379394600002, 36.3979492107, 121.00207522799997, 429.1254272445], [0, 46.800537126, 92.65344238800003, 432.7287597759], [27.829711943999996, 34.6359863178, 138.34497073199998, 291.12597656730003], [24.76379394600002, 369.009765642, 93.01525880999998, 429.1254272445], [45.10363771200002, 271.3784179638, 67.70349122999994, 291.2662963749], [67.70349122999994, 268.2797851734, 94.37133790799999, 291.2662963749], [35.443115214000045, 129.38598633869998, 63.96276857399994, 166.0328979477], [89.37121579200004, 125.0682373017, 121.00207522799997, 160.3289794827], [59.81445315000008, 36.3979492107, 87.81555176999996, 56.1024780204]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6, 7], [8]]}, {"image_path": "objects365_v1_00046904.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Please point out the objects and their coordinates.", "boxes_value": [[228.070373504, 39.50714112, 486.44848631080004, 111.9069824]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046904_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Please point out the objects and their coordinates.", "boxes_value": [[65.070373504, 18.50714112, 323.44848631080004, 90.9069824]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046904.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a paddle, a person, two hats, and a boat.", "boxes_value": [[228.070373504, 39.50714112, 486.44848631080004, 111.9069824], [228.070373504, 60.9115600384, 326.2148437428, 86.5144653312], [287.8084717124, 39.7521362432, 352.15026851560003, 104.0486450176], [469.2247314736, 92.436584448, 486.44848631080004, 111.9069824], [327.6447754136, 39.50714112, 345.82507320679997, 56.95581056], [179.0184326548, 78.5955810304, 484.75817871559997, 137.0056152576]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046904_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a paddle, a person, two hats, and a boat.", "boxes_value": [[65.070373504, 18.50714112, 323.44848631080004, 90.9069824], [65.070373504, 39.9115600384, 163.21484374279999, 65.5144653312], [124.80847171239998, 18.7521362432, 189.15026851560003, 83.0486450176], [306.2247314736, 71.436584448, 323.44848631080004, 90.9069824], [164.64477541359997, 18.50714112, 182.82507320679997, 35.95581056], [16.018432654799994, 57.5955810304, 321.75817871559997, 109]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046905.jpg", "text": "I would like a description of the content within the bbox in . Include the coordinates for each object you identify.", "boxes_value": [[2.7244872732, 82.9788208128, 491.916015641, 145.4440307712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046905_crop.jpg", "text": "I would like a description of the content within the bbox in . Include the coordinates for each object you identify.", "boxes_value": [[2.7244872732, 15.978820812799995, 491.916015641, 78.4440307712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046905.jpg", "text": "I would like a description of the content within the bbox in . Include the coordinates for each object you identify. For your reference, objects involved in this region include four people, and two glasses.", "boxes_value": [[2.7244872732, 82.9788208128, 491.916015641, 145.4440307712], [0.09594729360000001, 70.4880981504, 252.37396242909998, 511.7927246336], [470.4521484117, 114.3489990144, 491.916015641, 145.4440307712], [462.1968993911, 82.9788208128, 481.45935059429996, 113.7986450432], [392.25329588520003, 73.8685913088, 417.1821289357, 102.5961303552], [405.73352051829994, 119.7120971776, 451.60852052639996, 138.6470947328], [2.7244872732, 104.622192384, 53.9522704982, 121.2619018752]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00046905_crop.jpg", "text": "I would like a description of the content within the bbox in . Include the coordinates for each object you identify. For your reference, objects involved in this region include four people, and two glasses.", "boxes_value": [[2.7244872732, 15.978820812799995, 491.916015641, 78.4440307712], [0.09594729360000001, 3.488098150400006, 252.37396242909998, 94], [470.4521484117, 47.34899901439999, 491.916015641, 78.4440307712], [462.1968993911, 15.978820812799995, 481.45935059429996, 46.7986450432], [392.25329588520003, 6.868591308800006, 417.1821289357, 35.5961303552], [405.73352051829994, 52.7120971776, 451.60852052639996, 71.64709473280001], [2.7244872732, 37.622192384, 53.9522704982, 54.261901875199996]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00046908.jpg", "text": "What's the story in the section of the included visual ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[446.3110351751, 34.4675903488, 551.6270752227999, 323.2045287936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046908_crop.jpg", "text": "What's the story in the section of the included visual ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[27.311035175100017, 34.4675903488, 132.62707522279993, 323.2045287936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046908.jpg", "text": "What's the story in the section of the included visual ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three bowls, a tea pot, and a cake.", "boxes_value": [[446.3110351751, 34.4675903488, 551.6270752227999, 323.2045287936], [493.6596679921, 36.0872192512, 551.6270752227999, 71.0381469696], [456.3153075905, 291.9217529344, 517.4906006035, 323.2045287936], [515.4050293002, 261.3341675008, 547.3829346007, 281.146545408], [446.3110351751, 34.4675903488, 493.4530029152, 75.7168579072], [462.4696044893, 252.1746215936, 519.4279785282, 303.8061523456]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046908_crop.jpg", "text": "What's the story in the section of the included visual ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three bowls, a tea pot, and a cake.", "boxes_value": [[27.311035175100017, 34.4675903488, 132.62707522279993, 323.2045287936], [74.65966799210003, 36.0872192512, 132.62707522279993, 71.0381469696], [37.315307590500026, 291.9217529344, 98.49060060349996, 323.2045287936], [96.4050293002, 261.3341675008, 128.38293460069997, 281.146545408], [27.311035175100017, 34.4675903488, 74.45300291519999, 75.7168579072], [43.469604489300025, 252.1746215936, 100.42797852820001, 303.8061523456]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046909.jpg", "text": "Can you discuss the entities within the region of image ? Include the coordinates for each mentioned object.", "boxes_value": [[224.7168579439, 126.7800292864, 524.8206787239, 205.9172363264]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046909_crop.jpg", "text": "Can you discuss the entities within the region of image ? Include the coordinates for each mentioned object.", "boxes_value": [[75.71685794390001, 20.780029286399994, 375.82067872389996, 99.9172363264]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046909.jpg", "text": "Can you discuss the entities within the region of image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a vase, two necklaces, and three handbags.", "boxes_value": [[224.7168579439, 126.7800292864, 524.8206787239, 205.9172363264], [405.97802734680005, 132.5194092032, 436.5610351222, 195.8191528448], [262.6795654289, 169.90448, 297.40167238780003, 223.6846313472], [342.18835451509995, 128.5597534208, 379.2067871118, 170.1815185408], [224.7168579439, 136.9826660352, 317.27850340059996, 202.7625122304], [270.2929076909, 96.5750121984, 351.5780029484, 163.7644653568], [481.32543942869995, 126.7800292864, 524.8206787239, 205.9172363264]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046909_crop.jpg", "text": "Can you discuss the entities within the region of image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a vase, two necklaces, and three handbags.", "boxes_value": [[75.71685794390001, 20.780029286399994, 375.82067872389996, 99.9172363264], [256.97802734680005, 26.519409203200013, 287.5610351222, 89.81915284479999], [113.67956542889999, 63.90448000000001, 148.40167238780003, 117.6846313472], [193.18835451509995, 22.559753420800007, 230.2067871118, 64.1815185408], [75.71685794390001, 30.982666035199998, 168.27850340059996, 96.7625122304], [121.2929076909, 0, 202.5780029484, 57.7644653568], [332.32543942869995, 20.780029286399994, 375.82067872389996, 99.9172363264]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00046912.jpg", "text": "What objects or scenery can be found in the area in the image ? Please mention the objects and their locations.", "boxes_value": [[337.2077026452, 1.3185424896, 606.0036621186, 512.167236352]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046912_crop.jpg", "text": "What objects or scenery can be found in the area in the image ? Please mention the objects and their locations.", "boxes_value": [[67.20770264520002, 1.3185424896, 336, 512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046912.jpg", "text": "What objects or scenery can be found in the area in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include two cabinets, three books, and a storage box.", "boxes_value": [[337.2077026452, 1.3185424896, 606.0036621186, 512.167236352], [0, 1.5812378112, 605.2807616886, 511.890930176], [335.069396982, 200.9082031104, 600.2297363568, 268.5218505728], [337.2077026452, 334.8842773504, 549.8511962904, 461.9180297728], [472.75634763719995, 423.1055298048, 606.0036621186, 512.167236352], [152.86022947860002, 5.8469848576, 526.1511230195999, 510.6987915264], [503.61071777039996, 1.3185424896, 605.1151123157999, 35.3523559424]], "boxes_seq": [[0], [0], [1, 5], [2, 3, 4], [6]]}, {"image_path": "objects365_v1_00046912_crop.jpg", "text": "What objects or scenery can be found in the area in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include two cabinets, three books, and a storage box.", "boxes_value": [[67.20770264520002, 1.3185424896, 336, 512], [0, 1.5812378112, 335.2807616886, 511.890930176], [65.069396982, 200.9082031104, 330.2297363568, 268.5218505728], [67.20770264520002, 334.8842773504, 279.8511962904, 461.9180297728], [202.75634763719995, 423.1055298048, 336, 512], [0, 5.8469848576, 256.15112301959994, 510.6987915264], [233.61071777039996, 1.3185424896, 335.11511231579993, 35.3523559424]], "boxes_seq": [[0], [0], [1, 5], [2, 3, 4], [6]]}, {"image_path": "objects365_v1_00046913.jpg", "text": "Please interpret and describe the area inside the given picture . Specify the location of each mentioned object.", "boxes_value": [[1.184631336, 225.3886108155, 719.741455056, 493.34020995599997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046913_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Specify the location of each mentioned object.", "boxes_value": [[1.184631336, 67.38861081549999, 719.741455056, 335.34020995599997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046913.jpg", "text": "Please interpret and describe the area inside the given picture . Specify the location of each mentioned object. For your reference, objects involved in this region include a desk, four people, a watch, a glasses, and a pen.", "boxes_value": [[1.184631336, 225.3886108155, 719.741455056, 493.34020995599997], [1.184631336, 331.04748535650003, 719.741455056, 493.34020995599997], [0.064880352, 139.29595945650001, 359.014648464, 494.96099855399996], [175.10266116, 126.89874266849999, 401.15759277599994, 402.54730223399997], [393.084228528, 164.3823242325, 596.648925792, 376.02044676900005], [262.427124024, 0.110931381, 434.439697248, 381.5400390615], [130.181518584, 410.956726059, 159.48193356, 427.8266601345], [122.217346224, 225.3886108155, 191.097412128, 247.49432373599998], [239.575134312, 458.9470825335, 326.89074708000004, 477.730285659]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6], [7], [8]]}, {"image_path": "objects365_v1_00046913_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Specify the location of each mentioned object. For your reference, objects involved in this region include a desk, four people, a watch, a glasses, and a pen.", "boxes_value": [[1.184631336, 67.38861081549999, 719.741455056, 335.34020995599997], [1.184631336, 173.04748535650003, 719.741455056, 335.34020995599997], [0.064880352, 0, 359.014648464, 336.96099855399996], [175.10266116, 0, 401.15759277599994, 244.54730223399997], [393.084228528, 6.382324232499997, 596.648925792, 218.02044676900005], [262.427124024, 0, 434.439697248, 223.5400390615], [130.181518584, 252.956726059, 159.48193356, 269.8266601345], [122.217346224, 67.38861081549999, 191.097412128, 89.49432373599998], [239.575134312, 300.9470825335, 326.89074708000004, 319.730285659]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6], [7], [8]]}, {"image_path": "objects365_v1_00046915.jpg", "text": "What information can you give me about the coordinates in image ? Provide the coordinates for each element you describe.", "boxes_value": [[186.7320556544, 87.4974975488, 370.4782104576, 293.4799804928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046915_crop.jpg", "text": "What information can you give me about the coordinates in image ? Provide the coordinates for each element you describe.", "boxes_value": [[46.7320556544, 51.4974975488, 230.47821045760003, 257.4799804928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046915.jpg", "text": "What information can you give me about the coordinates in image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include four people, and a handbag.", "boxes_value": [[186.7320556544, 87.4974975488, 370.4782104576, 293.4799804928], [212.0678710784, 32.0443115008, 359.8044433408, 441.2438354432], [202.7495727616, 87.4974975488, 245.4174194176, 293.4799804928], [341.5425415168, 138.0122680832, 371.9495239168, 242.9652099584], [351.8416748032, 114.4713744896, 370.4782104576, 145.3687744], [186.7320556544, 168.0302124032, 207.37823488, 209.3225708032]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046915_crop.jpg", "text": "What information can you give me about the coordinates in image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include four people, and a handbag.", "boxes_value": [[46.7320556544, 51.4974975488, 230.47821045760003, 257.4799804928], [72.0678710784, 0, 219.8044433408, 308], [62.74957276160001, 51.4974975488, 105.4174194176, 257.4799804928], [201.5425415168, 102.01226808320001, 231.9495239168, 206.9652099584], [211.84167480320002, 78.4713744896, 230.47821045760003, 109.3687744], [46.7320556544, 132.0302124032, 67.37823488000001, 173.3225708032]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046916.jpg", "text": "Tell me about the region of the image . Specify the location of each mentioned object.", "boxes_value": [[0, 0, 277.4974364976, 445.9240722432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046916_crop.jpg", "text": "Tell me about the region of the image . Specify the location of each mentioned object.", "boxes_value": [[0, 0, 277.4974364976, 445.9240722432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046916.jpg", "text": "Tell me about the region of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include a cabinet, a storage box, a stool, a bowl, and a bakset.", "boxes_value": [[0, 0, 277.4974364976, 445.9240722432], [0, 0, 277.4974364976, 418.7629394432], [0, 352.5747070464, 50.823242205999996, 445.9240722432], [163.1918334706, 358.6890869248, 231.4311523392, 413.971557632], [108.148986814, 387.2542724608, 221.7355346668, 469.2173462016], [0, 279.1849975808, 62.3583984276, 364.6644286976]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046916_crop.jpg", "text": "Tell me about the region of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include a cabinet, a storage box, a stool, a bowl, and a bakset.", "boxes_value": [[0, 0, 277.4974364976, 445.9240722432], [0, 0, 277.4974364976, 418.7629394432], [0, 352.5747070464, 50.823242205999996, 445.9240722432], [163.1918334706, 358.6890869248, 231.4311523392, 413.971557632], [108.148986814, 387.2542724608, 221.7355346668, 469.2173462016], [0, 279.1849975808, 62.3583984276, 364.6644286976]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046917.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give coordinates for the items you reference.", "boxes_value": [[114.8524169728, 53.295166005400006, 187.7832641536, 566.2031249974]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046917_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give coordinates for the items you reference.", "boxes_value": [[18.8524169728, 53.295166005400006, 91.78326415359999, 566.2031249974]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046917.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give coordinates for the items you reference. For your reference, objects involved in this region include a flower, a vase, a lamp, two bowls, and a wine glass.", "boxes_value": [[114.8524169728, 53.295166005400006, 187.7832641536, 566.2031249974], [160.5668335104, 422.6226806318, 215.6923828224, 485.2229003688], [168.0415038976, 482.4199219026, 183.9251098624, 513.2528075862], [114.8524169728, 53.295166005400006, 166.507568384, 118.1644896982], [151.9876708864, 508.3583984402, 187.7832641536, 536.5610351838], [152.6286621184, 549.9114990402, 179.9862670848, 566.2031249974], [102.216979968, 538.5382080166, 132.341064448, 569.8917236342]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046917_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give coordinates for the items you reference. For your reference, objects involved in this region include a flower, a vase, a lamp, two bowls, and a wine glass.", "boxes_value": [[18.8524169728, 53.295166005400006, 91.78326415359999, 566.2031249974], [64.56683351039999, 422.6226806318, 110, 485.2229003688], [72.0415038976, 482.4199219026, 87.9251098624, 513.2528075862], [18.8524169728, 53.295166005400006, 70.507568384, 118.1644896982], [55.9876708864, 508.3583984402, 91.78326415359999, 536.5610351838], [56.6286621184, 549.9114990402, 83.9862670848, 566.2031249974], [6.216979968000004, 538.5382080166, 36.341064448, 569.8917236342]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046921.jpg", "text": "Could you tell me more about the area in the snapshot ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[348.769531264, 338.3585815552, 705.4311523318, 415.3345336832]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046921_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[89.76953126400002, 19.358581555199976, 446.43115233180004, 96.3345336832]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046921.jpg", "text": "Could you tell me more about the area in the snapshot ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four people, and a backpack.", "boxes_value": [[348.769531264, 338.3585815552, 705.4311523318, 415.3345336832], [677.565673808, 345.7755737088, 697.0703125048, 415.3345336832], [655.005981455, 369.7452392448, 678.2706298892, 406.6397094912], [520.445190441, 338.3585815552, 540.0428466626, 394.2973633024], [348.769531264, 345.6337890816, 363.8441162008, 390.0557861376], [683.4981689302, 354.4346313728, 705.4311523318, 380.2059325952]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046921_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four people, and a backpack.", "boxes_value": [[89.76953126400002, 19.358581555199976, 446.43115233180004, 96.3345336832], [418.56567380800004, 26.775573708799982, 438.0703125048, 96.3345336832], [396.005981455, 50.74523924480002, 419.2706298892, 87.6397094912], [261.445190441, 19.358581555199976, 281.0428466626, 75.29736330240002], [89.76953126400002, 26.633789081600014, 104.84411620079999, 71.05578613760002], [424.4981689302, 35.43463137280003, 446.43115233180004, 61.20593259520001]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046924.jpg", "text": "Can you divulge the contents of the area within the given image ? Please mention the objects and their locations.", "boxes_value": [[6.5903930399999995, 162.2273559552, 204.552307113, 365.0635228672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046924_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Please mention the objects and their locations.", "boxes_value": [[6.5903930399999995, 51.22735595520001, 204.552307113, 254.0635228672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046924.jpg", "text": "Can you divulge the contents of the area within the given image ? Please mention the objects and their locations. For your reference, objects involved in this region include two chairs, a desk, three people, and a leather shoes.", "boxes_value": [[6.5903930399999995, 162.2273559552, 204.552307113, 365.0635228672], [131.671020481, 162.2273559552, 193.268493649, 264.051757824], [0.933471693, 218.796447744, 56.874084498, 411.1314697216], [6.5903930399999995, 181.0837402112, 46.188781716, 302.3930663936], [0.617675751, 253.3435668992, 99.085266126, 455.5302734336], [0, 168.0050049024, 35.409545864, 370.8481445376], [171.631408718, 196.675720192, 204.552307113, 252.8348999168], [27.937586677000002, 313.0823855616, 98.666019452, 365.0635228672]], "boxes_seq": [[0], [0], [1, 3], [2], [4, 5, 6], [7]]}, {"image_path": "objects365_v1_00046924_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Please mention the objects and their locations. For your reference, objects involved in this region include two chairs, a desk, three people, and a leather shoes.", "boxes_value": [[6.5903930399999995, 51.22735595520001, 204.552307113, 254.0635228672], [131.671020481, 51.22735595520001, 193.268493649, 153.051757824], [0.933471693, 107.796447744, 56.874084498, 300.1314697216], [6.5903930399999995, 70.0837402112, 46.188781716, 191.3930663936], [0.617675751, 142.3435668992, 99.085266126, 304], [0, 57.0050049024, 35.409545864, 259.8481445376], [171.631408718, 85.675720192, 204.552307113, 141.8348999168], [27.937586677000002, 202.0823855616, 98.666019452, 254.0635228672]], "boxes_seq": [[0], [0], [1, 3], [2], [4, 5, 6], [7]]}, {"image_path": "objects365_v1_00046925.jpg", "text": "Please provide details for the area marked as in this photographic . Please mention the objects and their locations.", "boxes_value": [[328.890686016, 155.98156737600002, 639.972534208, 282.37609862399995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046925_crop.jpg", "text": "Please provide details for the area marked as in this photographic . Please mention the objects and their locations.", "boxes_value": [[77.89068601600002, 31.981567376000015, 388.97253420799996, 158.37609862399995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046925.jpg", "text": "Please provide details for the area marked as in this photographic . Please mention the objects and their locations. For your reference, objects involved in this region include a bench, four chairs, and a desk.", "boxes_value": [[328.890686016, 155.98156737600002, 639.972534208, 282.37609862399995], [545.45190432, 173.647644048, 639.972534208, 247.536865248], [328.890686016, 157.0516968, 395.9256592, 245.06073000000004], [394.335693376, 203.459960928, 462.26354982399994, 282.37609862399995], [458.267700224, 183.98065185599998, 530.19128416, 263.396301264], [362.73779296, 162.824646, 464.471191424, 252.24053956800003], [415.65747072, 155.98156737600002, 479.52587891199994, 233.08001707199998]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 6], [5]]}, {"image_path": "objects365_v1_00046925_crop.jpg", "text": "Please provide details for the area marked as in this photographic . Please mention the objects and their locations. For your reference, objects involved in this region include a bench, four chairs, and a desk.", "boxes_value": [[77.89068601600002, 31.981567376000015, 388.97253420799996, 158.37609862399995], [294.45190432000004, 49.64764404799999, 388.97253420799996, 123.536865248], [77.89068601600002, 33.0516968, 144.92565919999998, 121.06073000000004], [143.335693376, 79.45996092799999, 211.26354982399994, 158.37609862399995], [207.267700224, 59.98065185599998, 279.19128416, 139.396301264], [111.73779295999998, 38.824646, 213.47119142399998, 128.24053956800003], [164.65747072, 31.981567376000015, 228.52587891199994, 109.08001707199998]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 6], [5]]}, {"image_path": "objects365_v1_00046927.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give coordinates for the items you reference.", "boxes_value": [[291.0238647635, 255.169860864, 440.78027342769997, 354.7756958208]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046927_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give coordinates for the items you reference.", "boxes_value": [[38.0238647635, 25.169860863999986, 187.78027342769997, 124.77569582080002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046927.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give coordinates for the items you reference. For your reference, objects involved in this region include three people, a moniter, and a remote.", "boxes_value": [[291.0238647635, 255.169860864, 440.78027342769997, 354.7756958208], [291.0238647635, 270.0721435648, 303.97174075559997, 319.9091796992], [300.307250964, 255.169860864, 336.95214847020003, 330.6583862272], [346.9683837672, 273.7366332928, 373.3527831838, 326.0166625792], [267.23663329240003, 244.5266113536, 397.90136718549996, 364.0938110464], [413.2947998076, 343.3306274304, 440.78027342769997, 354.7756958208]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046927_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give coordinates for the items you reference. For your reference, objects involved in this region include three people, a moniter, and a remote.", "boxes_value": [[38.0238647635, 25.169860863999986, 187.78027342769997, 124.77569582080002], [38.0238647635, 40.0721435648, 50.97174075559997, 89.90917969920002], [47.30725096399999, 25.169860863999986, 83.95214847020003, 100.65838622720003], [93.96838376720001, 43.73663329279998, 120.3527831838, 96.01666257919999], [14.236633292400029, 14.526611353600003, 144.90136718549996, 134.0938110464], [160.2947998076, 113.3306274304, 187.78027342769997, 124.77569582080002]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046928.jpg", "text": "I need details about the area located within image . Provide the coordinates for all objects that you mention.", "boxes_value": [[57.9440917963, 0.0299072512, 464.23327639, 511.9238281216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046928_crop.jpg", "text": "I need details about the area located within image . Provide the coordinates for all objects that you mention.", "boxes_value": [[57.9440917963, 0.0299072512, 464.23327639, 511.9238281216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046928.jpg", "text": "I need details about the area located within image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a chair, a desk, a person, two bracelets, a glasses, a pen, and a cell phone.", "boxes_value": [[57.9440917963, 0.0299072512, 464.23327639, 511.9238281216], [25.2794189312, 265.085144064, 127.359985324, 510.4195556864], [133.5725097486, 187.4892578304, 640.9300537106, 511.5159301632], [57.9440917963, 0.0299072512, 464.23327639, 511.9238281216], [342.8146972746, 326.6912841728, 360.40759274770005, 392.4330444288], [351.3619384938, 330.6474609152, 366.13378907130004, 389.7348632576], [182.3742675482, 59.5072021504, 293.5065307642, 140.6936645632], [373.9891357181, 269.6352539136, 430.8157958707, 328.6966552576], [321.8485717593, 240.6292114432, 389.82824708149997, 267.9612426752]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6], [7], [8]]}, {"image_path": "objects365_v1_00046928_crop.jpg", "text": "I need details about the area located within image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a chair, a desk, a person, two bracelets, a glasses, a pen, and a cell phone.", "boxes_value": [[57.9440917963, 0.0299072512, 464.23327639, 511.9238281216], [25.2794189312, 265.085144064, 127.359985324, 510.4195556864], [133.5725097486, 187.4892578304, 565, 511.5159301632], [57.9440917963, 0.0299072512, 464.23327639, 511.9238281216], [342.8146972746, 326.6912841728, 360.40759274770005, 392.4330444288], [351.3619384938, 330.6474609152, 366.13378907130004, 389.7348632576], [182.3742675482, 59.5072021504, 293.5065307642, 140.6936645632], [373.9891357181, 269.6352539136, 430.8157958707, 328.6966552576], [321.8485717593, 240.6292114432, 389.82824708149997, 267.9612426752]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6], [7], [8]]}, {"image_path": "objects365_v1_00046929.jpg", "text": "I am interested in the region of the image ; please describe it. Give coordinates for the items you reference.", "boxes_value": [[524.5819091637, 100.0230102528, 669.2014160216, 188.2017822208]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046929_crop.jpg", "text": "I am interested in the region of the image ; please describe it. Give coordinates for the items you reference.", "boxes_value": [[36.58190916369995, 23.023010252800006, 181.20141602160004, 111.2017822208]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046929.jpg", "text": "I am interested in the region of the image ; please describe it. Give coordinates for the items you reference. For your reference, objects involved in this region include two street lights, and three traffic signs.", "boxes_value": [[524.5819091637, 100.0230102528, 669.2014160216, 188.2017822208], [509.0323486154, 119.95465088, 539.8137206955, 186.8590698496], [524.5819091637, 136.6650390528, 563.3782958741, 154.8793334784], [624.9774170241001, 133.0759277568, 670.4709472575, 139.9391479296], [618.2652587706, 100.0230102528, 642.0273437526, 188.2017822208], [623.6876220661, 146.685485824, 669.2014160216, 154.4143676928]], "boxes_seq": [[0], [0], [1, 4], [2, 3, 5]]}, {"image_path": "objects365_v1_00046929_crop.jpg", "text": "I am interested in the region of the image ; please describe it. Give coordinates for the items you reference. For your reference, objects involved in this region include two street lights, and three traffic signs.", "boxes_value": [[36.58190916369995, 23.023010252800006, 181.20141602160004, 111.2017822208], [21.032348615399997, 42.95465088, 51.813720695500024, 109.85906984959999], [36.58190916369995, 59.66503905280001, 75.37829587409999, 77.87933347840001], [136.97741702410008, 56.07592775680001, 182.47094725750003, 62.93914792960001], [130.26525877059998, 23.023010252800006, 154.02734375260002, 111.2017822208], [135.68762206609995, 69.68548582400001, 181.20141602160004, 77.4143676928]], "boxes_seq": [[0], [0], [1, 4], [2, 3, 5]]}, {"image_path": "objects365_v1_00046930.jpg", "text": "Describe what can be found within the bounds of in the image . Specify the location of each mentioned object.", "boxes_value": [[279.3480224256, 467.6572875776, 533.4072265728, 512.0230712832]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046930_crop.jpg", "text": "Describe what can be found within the bounds of in the image . Specify the location of each mentioned object.", "boxes_value": [[64.34802242559999, 11.657287577600016, 318.4072265728, 56]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046930.jpg", "text": "Describe what can be found within the bounds of in the image . Specify the location of each mentioned object. For your reference, objects involved in this region include five people.", "boxes_value": [[279.3480224256, 467.6572875776, 533.4072265728, 512.0230712832], [509.5191650304, 467.6572875776, 533.4072265728, 511.9891968], [497.7546386688, 471.6787109376, 520.8065185536, 511.8236084224], [475.17309573119996, 477.3240966656, 498.38183592959996, 511.8236084224], [354.672363264, 471.1777954304, 386.25500490239995, 511.9998169088], [279.3480224256, 476.5762329088, 302.20947264, 512.0230712832]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046930_crop.jpg", "text": "Describe what can be found within the bounds of in the image . Specify the location of each mentioned object. For your reference, objects involved in this region include five people.", "boxes_value": [[64.34802242559999, 11.657287577600016, 318.4072265728, 56], [294.5191650304, 11.657287577600016, 318.4072265728, 55.9891968], [282.7546386688, 15.678710937599988, 305.8065185536, 55.823608422400014], [260.17309573119996, 21.324096665599996, 283.38183592959996, 55.823608422400014], [139.672363264, 15.17779543040001, 171.25500490239995, 55.99981690880003], [64.34802242559999, 20.576232908800023, 87.20947264, 56]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046932.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each mentioned object.", "boxes_value": [[348.24145506039997, 250.9451275264, 491.71398922689997, 403.4931030528]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046932_crop.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each mentioned object.", "boxes_value": [[36.24145506039997, 38.9451275264, 179.71398922689997, 191.4931030528]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046932.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, two hats, and a boat.", "boxes_value": [[348.24145506039997, 250.9451275264, 491.71398922689997, 403.4931030528], [348.24145506039997, 269.0725097472, 491.71398922689997, 403.4931030528], [305.7299194239, 250.8394165248, 412.0250243992, 357.522460928], [357.3371585981, 250.9451275264, 402.09923100599997, 271.2495727104], [433.4066162191, 266.5177001984, 465.4374999855, 296.1080932864], [67.45068362619999, 138.1119384576, 636.4118652123, 478.3790893568]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046932_crop.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, two hats, and a boat.", "boxes_value": [[36.24145506039997, 38.9451275264, 179.71398922689997, 191.4931030528], [36.24145506039997, 57.0725097472, 179.71398922689997, 191.4931030528], [0, 38.83941652479999, 100.02502439919999, 145.522460928], [45.33715859810002, 38.9451275264, 90.09923100599997, 59.2495727104], [121.40661621909999, 54.51770019840001, 153.43749998549998, 84.10809328639999], [0, 0, 215, 229]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046933.jpg", "text": "Please tell me about the area in the image . What does it contain? Include the coordinates for each mentioned object.", "boxes_value": [[155.0219726592, 101.8889770496, 481.01904299520004, 270.8933715968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046933_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Include the coordinates for each mentioned object.", "boxes_value": [[82.0219726592, 42.8889770496, 408.01904299520004, 211.89337159680002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046933.jpg", "text": "Please tell me about the area in the image . What does it contain? Include the coordinates for each mentioned object. For your reference, objects involved in this region include four storage boxes, and two hats.", "boxes_value": [[155.0219726592, 101.8889770496, 481.01904299520004, 270.8933715968], [155.0219726592, 238.8392333824, 208.7343750144, 270.8933715968], [259.158203136, 177.9399414272, 310.0770263808, 204.0358276608], [237.51763914240001, 177.9399414272, 259.79467776, 201.489929216], [433.5554199552, 193.0337524224, 481.01904299520004, 246.2257690624], [356.0797118976, 75.5541381632, 423.6350097408, 123.8618774528], [421.79809566719996, 101.8889770496, 439.2664794624, 119.0801391616]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00046933_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Include the coordinates for each mentioned object. For your reference, objects involved in this region include four storage boxes, and two hats.", "boxes_value": [[82.0219726592, 42.8889770496, 408.01904299520004, 211.89337159680002], [82.0219726592, 179.8392333824, 135.7343750144, 211.89337159680002], [186.158203136, 118.93994142720001, 237.07702638080002, 145.0358276608], [164.51763914240001, 118.93994142720001, 186.79467776, 142.489929216], [360.5554199552, 134.0337524224, 408.01904299520004, 187.2257690624], [283.0797118976, 16.554138163199994, 350.6350097408, 64.8618774528], [348.79809566719996, 42.8889770496, 366.2664794624, 60.0801391616]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00046935.jpg", "text": "Can you elaborate on the content of the bounding box in ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[0, 164.0204467712, 113.01074219600001, 440.4357910016]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046935_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[0, 70.0204467712, 113.01074219600001, 346.4357910016]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046935.jpg", "text": "Can you elaborate on the content of the bounding box in ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a chair, a bench, and three moniters.", "boxes_value": [[0, 164.0204467712, 113.01074219600001, 440.4357910016], [0.5164794832, 312.413818368, 89.0650024106, 440.4357910016], [69.2454834044, 268.6923828224, 113.01074219600001, 329.1575927808], [0, 311.4554443264, 66.9380493484, 403.316406272], [0, 225.553527808, 67.6776123138, 262.6912841728], [35.4044800148, 164.0204467712, 103.4170532514, 204.2342529536]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046935_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a chair, a bench, and three moniters.", "boxes_value": [[0, 70.0204467712, 113.01074219600001, 346.4357910016], [0.5164794832, 218.41381836800002, 89.0650024106, 346.4357910016], [69.2454834044, 174.69238282240002, 113.01074219600001, 235.15759278079997], [0, 217.45544432640003, 66.9380493484, 309.316406272], [0, 131.553527808, 67.6776123138, 168.6912841728], [35.4044800148, 70.0204467712, 103.4170532514, 110.23425295359999]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046939.jpg", "text": "Can you give me a visual rundown of the area in ? Specify the location of each mentioned object.", "boxes_value": [[336.3452758551, 221.2749633536, 540.3143310778, 295.0376586752]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046939_crop.jpg", "text": "Can you give me a visual rundown of the area in ? Specify the location of each mentioned object.", "boxes_value": [[51.3452758551, 19.274963353599986, 255.31433107780003, 93.03765867520002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046939.jpg", "text": "Can you give me a visual rundown of the area in ? Specify the location of each mentioned object. For your reference, objects involved in this region include four people, and two boats.", "boxes_value": [[336.3452758551, 221.2749633536, 540.3143310778, 295.0376586752], [523.1168212995, 251.1740722688, 540.3143310778, 295.0376586752], [464.76281735460003, 273.0864258048, 488.6130371347, 294.920227072], [418.47534183060003, 256.0327148544, 440.1365966746, 290.614624], [336.3452758551, 236.452026368, 348.5273437661, 276.7147216896], [497.21594237579995, 221.2749633536, 536.2091064404, 230.74468992], [331.03674318369997, 275.5268554752, 576.5603027636, 310.9267578368]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00046939_crop.jpg", "text": "Can you give me a visual rundown of the area in ? Specify the location of each mentioned object. For your reference, objects involved in this region include four people, and two boats.", "boxes_value": [[51.3452758551, 19.274963353599986, 255.31433107780003, 93.03765867520002], [238.11682129949997, 49.17407226879999, 255.31433107780003, 93.03765867520002], [179.76281735460003, 71.0864258048, 203.6130371347, 92.92022707199999], [133.47534183060003, 54.03271485440001, 155.1365966746, 88.61462399999999], [51.3452758551, 34.45202636799999, 63.52734376609999, 74.71472168960003], [212.21594237579995, 19.274963353599986, 251.20910644039998, 28.744689920000013], [46.03674318369997, 73.52685547520002, 291.5603027636, 108.9267578368]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00046941.jpg", "text": "Please provide information about the area within the bounding box in the picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[46.318908672, 0, 300.198358032, 361.773643392]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046941_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[46.318908672, 0, 300.198358032, 361.773643392]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046941.jpg", "text": "Please provide information about the area within the bounding box in the picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, two glasses, and a hat.", "boxes_value": [[46.318908672, 0, 300.198358032, 361.773643392], [12.939758304, 34.626525912000005, 333.90362548800005, 660.168701136], [46.318908672, 0, 123.31506345599999, 34.756958016], [190.294756896, 272.808503784, 225.760589568, 361.773643392], [153.701353344, 113.930542008, 246.46331788799998, 145.932312024], [155.161456368, 35.318969712, 300.198358032, 155.624373504]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046941_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, two glasses, and a hat.", "boxes_value": [[46.318908672, 0, 300.198358032, 361.773643392], [12.939758304, 34.626525912000005, 333.90362548800005, 452], [46.318908672, 0, 123.31506345599999, 34.756958016], [190.294756896, 272.808503784, 225.760589568, 361.773643392], [153.701353344, 113.930542008, 246.46331788799998, 145.932312024], [155.161456368, 35.318969712, 300.198358032, 155.624373504]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00046942.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Provide the coordinates for all objects that you mention.", "boxes_value": [[43.2873534912, 359.9326171648, 305.8968505824, 511.7696532992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046942_crop.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Provide the coordinates for all objects that you mention.", "boxes_value": [[43.2873534912, 38.93261716479998, 305.8968505824, 190.7696532992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046942.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a bench, and four people.", "boxes_value": [[43.2873534912, 359.9326171648, 305.8968505824, 511.7696532992], [231.3949585248, 376.4519653376, 305.8968505824, 414.6265259008], [130.306091328, 359.9326171648, 153.4589233632, 451.0968627712], [102.4503173568, 361.3796386816, 137.1795654432, 453.6292114432], [45.4468383936, 374.0930176, 103.587280272, 511.7696532992], [43.2873534912, 359.9400024576, 55.6463623296, 395.023620608]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046942_crop.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a bench, and four people.", "boxes_value": [[43.2873534912, 38.93261716479998, 305.8968505824, 190.7696532992], [231.3949585248, 55.45196533759997, 305.8968505824, 93.6265259008], [130.306091328, 38.93261716479998, 153.4589233632, 130.0968627712], [102.4503173568, 40.3796386816, 137.1795654432, 132.6292114432], [45.4468383936, 53.093017599999996, 103.587280272, 190.7696532992], [43.2873534912, 38.940002457599974, 55.6463623296, 74.02362060799999]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00046945.jpg", "text": "Please describe the section of the picture defined by the bbox . Provide the coordinates for all objects that you mention.", "boxes_value": [[114.0847167758, 120.6107177984, 393.1822510046, 290.2644042752]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046945_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Provide the coordinates for all objects that you mention.", "boxes_value": [[70.0847167758, 42.6107177984, 349.1822510046, 212.26440427519998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046945.jpg", "text": "Please describe the section of the picture defined by the bbox . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a clock, a backpack, two street lights, and a bus.", "boxes_value": [[114.0847167758, 120.6107177984, 393.1822510046, 290.2644042752], [114.0847167758, 120.6107177984, 134.270446778, 140.2508545024], [121.2036133121, 248.4535522304, 141.5820312598, 290.2644042752], [120.1647339156, 169.2737426944, 133.2766723844, 253.9395141632], [250.19927976830002, 166.3604126208, 351.6271972618, 278.9626464768], [386.4160156382, 191.9646606336, 393.1822510046, 236.6216430592]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4]]}, {"image_path": "objects365_v1_00046945_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a clock, a backpack, two street lights, and a bus.", "boxes_value": [[70.0847167758, 42.6107177984, 349.1822510046, 212.26440427519998], [70.0847167758, 42.6107177984, 90.27044677800001, 62.25085450239999], [77.2036133121, 170.4535522304, 97.5820312598, 212.26440427519998], [76.1647339156, 91.2737426944, 89.2766723844, 175.9395141632], [206.19927976830002, 88.36041262079999, 307.6271972618, 200.9626464768], [342.4160156382, 113.9646606336, 349.1822510046, 158.6216430592]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4]]}, {"image_path": "objects365_v1_00046947.jpg", "text": "For the image , can you assess and describe what's happening at ? Provide the coordinates for all objects that you mention.", "boxes_value": [[87.0926513607, 69.9087524352, 308.2612915206, 244.6451416064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046947_crop.jpg", "text": "For the image , can you assess and describe what's happening at ? Provide the coordinates for all objects that you mention.", "boxes_value": [[56.0926513607, 43.9087524352, 277.2612915206, 218.6451416064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046947.jpg", "text": "For the image , can you assess and describe what's happening at ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four people, a handbag, and a canned.", "boxes_value": [[87.0926513607, 69.9087524352, 308.2612915206, 244.6451416064], [117.59277346620001, 51.5875244032, 231.06597902180002, 296.8552246272], [180.2394409486, 69.9087524352, 238.15808102309998, 235.9815673856], [268.8103027208, 96.4152832, 308.2612915206, 196.903625472], [87.0926513607, 88.9457397248, 119.74340816910001, 168.9249878016], [80.0735155471, 100.84836608, 100.80097005910001, 136.9195206144], [202.6652832161, 203.1032104448, 225.8146362626, 244.6451416064]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046947_crop.jpg", "text": "For the image , can you assess and describe what's happening at ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four people, a handbag, and a canned.", "boxes_value": [[56.0926513607, 43.9087524352, 277.2612915206, 218.6451416064], [86.59277346620001, 25.5875244032, 200.06597902180002, 262], [149.2394409486, 43.9087524352, 207.15808102309998, 209.9815673856], [237.8103027208, 70.4152832, 277.2612915206, 170.903625472], [56.0926513607, 62.945739724800006, 88.74340816910001, 142.9249878016], [49.0735155471, 74.84836608, 69.80097005910001, 110.9195206144], [171.6652832161, 177.1032104448, 194.8146362626, 218.6451416064]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046949.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each mentioned object.", "boxes_value": [[106.4075317048, 245.7285156352, 239.99591066160002, 302.1080322048]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046949_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each mentioned object.", "boxes_value": [[33.40753170479999, 14.728515635200012, 166.99591066160002, 71.1080322048]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046949.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a piano, two drums, two people, and a tripod.", "boxes_value": [[106.4075317048, 245.7285156352, 239.99591066160002, 302.1080322048], [106.4075317048, 266.6706543104, 186.317871122, 275.5476073984], [195.0447997932, 264.4859619328, 239.99591066160002, 302.1080322048], [215.26275633160003, 245.7285156352, 237.09985350120002, 263.9857787904], [186.8529052464, 210.941101056, 224.0625000108, 303.669738752], [119.521301302, 207.39733888, 170.9059448204, 321.388610816], [174.9376831092, 268.0559692288, 207.4306030364, 301.8841552896]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046949_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a piano, two drums, two people, and a tripod.", "boxes_value": [[33.40753170479999, 14.728515635200012, 166.99591066160002, 71.1080322048], [33.40753170479999, 35.67065431039998, 113.31787112200001, 44.547607398399975], [122.04479979320001, 33.48596193280002, 166.99591066160002, 71.1080322048], [142.26275633160003, 14.728515635200012, 164.09985350120002, 32.98577879039999], [113.85290524640001, 0, 151.0625000108, 72.669738752], [46.521301302, 0, 97.90594482040001, 85], [101.93768310920001, 37.055969228799995, 134.4306030364, 70.88415528960002]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00046951.jpg", "text": "Tell me about the region of the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[356.1145019335, 104.7385864192, 532.10168455, 185.590270976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046951_crop.jpg", "text": "Tell me about the region of the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[44.11450193349998, 20.738586419200004, 220.10168454999996, 101.590270976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046951.jpg", "text": "Tell me about the region of the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a soccer, three people, and a briefcase.", "boxes_value": [[356.1145019335, 104.7385864192, 532.10168455, 185.590270976], [356.1145019335, 165.5339965952, 374.636352564, 184.0557861376], [463.7593994178, 104.7385864192, 497.83508301160003, 182.2608032256], [482.2374267476, 115.8131714048, 515.8262939215, 182.5042114048], [497.4100341934, 126.0557251072, 532.10168455, 181.6696777216], [373.8684081837, 148.2021484544, 417.2385254052, 185.590270976]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046951_crop.jpg", "text": "Tell me about the region of the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a soccer, three people, and a briefcase.", "boxes_value": [[44.11450193349998, 20.738586419200004, 220.10168454999996, 101.590270976], [44.11450193349998, 81.5339965952, 62.63635256399999, 100.0557861376], [151.7593994178, 20.738586419200004, 185.83508301160003, 98.2608032256], [170.2374267476, 31.813171404800002, 203.8262939215, 98.5042114048], [185.4100341934, 42.055725107200004, 220.10168454999996, 97.66967772160001], [61.86840818370001, 64.2021484544, 105.2385254052, 101.590270976]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046952.jpg", "text": "Describe what's happening within the coordinates of the given image . Give coordinates for the items you reference.", "boxes_value": [[0.366882304, 190.3311157248, 223.7123412992, 645.118164096]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046952_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Give coordinates for the items you reference.", "boxes_value": [[0.366882304, 114.33111572479999, 223.7123412992, 569.118164096]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046952.jpg", "text": "Describe what's happening within the coordinates of the given image . Give coordinates for the items you reference. For your reference, objects involved in this region include a stool, a picture, two people, a handbag, and two plates.", "boxes_value": [[0.366882304, 190.3311157248, 223.7123412992, 645.118164096], [0.366882304, 453.61328125439996, 57.6719360512, 616.5576172032], [183.4214477312, 145.766540544, 221.222656256, 257.64770511359995], [9.3662719488, 154.75048826879998, 274.8884887552, 635.6002197503999], [0.4058227712, 190.3311157248, 79.4890136576, 645.118164096], [1.0305277952, 334.58939950079997, 47.4425854976, 434.16277570560004], [179.3755493376, 411.34838868479994, 223.7123412992, 429.0139160064], [211.5889892352, 398.5323485952, 232.025512704, 413.4267578112]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6, 7]]}, {"image_path": "objects365_v1_00046952_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Give coordinates for the items you reference. For your reference, objects involved in this region include a stool, a picture, two people, a handbag, and two plates.", "boxes_value": [[0.366882304, 114.33111572479999, 223.7123412992, 569.118164096], [0.366882304, 377.61328125439996, 57.6719360512, 540.5576172032], [183.4214477312, 69.76654054400001, 221.222656256, 181.64770511359995], [9.3662719488, 78.75048826879998, 274.8884887552, 559.6002197503999], [0.4058227712, 114.33111572479999, 79.4890136576, 569.118164096], [1.0305277952, 258.58939950079997, 47.4425854976, 358.16277570560004], [179.3755493376, 335.34838868479994, 223.7123412992, 353.0139160064], [211.5889892352, 322.5323485952, 232.025512704, 337.4267578112]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6, 7]]}, {"image_path": "objects365_v1_00046953.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Include the coordinates for each mentioned object.", "boxes_value": [[904.2360839904001, 211.7927246336, 1050.152099646, 455.5620727296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046953_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Include the coordinates for each mentioned object.", "boxes_value": [[37.23608399040006, 61.79272463359999, 183.1520996459999, 305.5620727296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046953.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, a helmet, a handbag, a leather shoes, and a bicycle.", "boxes_value": [[904.2360839904001, 211.7927246336, 1050.152099646, 455.5620727296], [894.7395019116, 143.3769531392, 984.8503417398, 454.3225097728], [977.869873014, 259.5056152576, 1040.059082022, 447.3421020672], [1019.3972168352, 211.7927246336, 1042.5751952568, 237.63488768], [979.5651855912, 286.1136474624, 1002.5498046516, 339.1292114432], [904.2360839904001, 445.6636352512, 938.9645996262, 455.5620727296], [1017.2055663828, 265.5332641792, 1050.152099646, 325.8316040192]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046953_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, a helmet, a handbag, a leather shoes, and a bicycle.", "boxes_value": [[37.23608399040006, 61.79272463359999, 183.1520996459999, 305.5620727296], [27.739501911599973, 0, 117.8503417398, 304.3225097728], [110.86987301399995, 109.50561525760003, 173.05908202199998, 297.3421020672], [152.39721683519997, 61.79272463359999, 175.57519525680004, 87.63488767999999], [112.56518559120002, 136.11364746240002, 135.54980465159997, 189.1292114432], [37.23608399040006, 295.6636352512, 71.96459962619997, 305.5620727296], [150.20556638280004, 115.53326417919999, 183.1520996459999, 175.83160401919997]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00046954.jpg", "text": "Please tell me more about the rectangular section in the photo . Include the coordinates for each object you identify.", "boxes_value": [[64.1949462528, 19.0252685312, 276.5574951168, 76.284179712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046954_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Include the coordinates for each object you identify.", "boxes_value": [[53.194946252799994, 15.025268531199998, 265.5574951168, 72.284179712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046954.jpg", "text": "Please tell me more about the rectangular section in the photo . Include the coordinates for each object you identify. For your reference, objects involved in this region include four people, a helmet, and a hockey stick.", "boxes_value": [[64.1949462528, 19.0252685312, 276.5574951168, 76.284179712], [42.784790016, 19.4912109568, 93.2937011712, 99.5571899392], [83.5928345088, 0.1428222464, 117.67773434879999, 47.1470337024], [159.1843872, 11.137939456, 207.28808593920002, 76.284179712], [234.50109864959998, 23.5075073024, 276.5574951168, 76.284179712], [64.1949462528, 19.0252685312, 80.83032230399999, 39.8195190272], [133.89672852479998, 58.333923328, 218.989990272, 74.1477661184]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046954_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Include the coordinates for each object you identify. For your reference, objects involved in this region include four people, a helmet, and a hockey stick.", "boxes_value": [[53.194946252799994, 15.025268531199998, 265.5574951168, 72.284179712], [31.784790016000002, 15.4912109568, 82.2937011712, 86], [72.5928345088, 0, 106.67773434879999, 43.1470337024], [148.1843872, 7.137939456, 196.28808593920002, 72.284179712], [223.50109864959998, 19.5075073024, 265.5574951168, 72.284179712], [53.194946252799994, 15.025268531199998, 69.83032230399999, 35.8195190272], [122.89672852479998, 54.333923328, 207.989990272, 70.1477661184]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046956.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give coordinates for the items you reference.", "boxes_value": [[178.56622315279998, 362.2021484544, 511.5460204932, 474.0925293056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046956_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give coordinates for the items you reference.", "boxes_value": [[83.56622315279998, 28.202148454400003, 416.5460204932, 140.0925293056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046956.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give coordinates for the items you reference. For your reference, objects involved in this region include two cars, a van, and two street lights.", "boxes_value": [[178.56622315279998, 362.2021484544, 511.5460204932, 474.0925293056], [178.56622315279998, 456.5636596736, 224.77880859, 474.0925293056], [236.99597169359998, 452.3142089728, 291.1762085064, 469.8431396352], [387.0698242432, 440.7626953216, 443.4761962764, 463.4133300736], [489.4278564616, 362.2021484544, 511.5460204932, 456.2044067328], [193.8566894488, 368.5592651264, 224.93499756559999, 457.5285033984]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046956_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give coordinates for the items you reference. For your reference, objects involved in this region include two cars, a van, and two street lights.", "boxes_value": [[83.56622315279998, 28.202148454400003, 416.5460204932, 140.0925293056], [83.56622315279998, 122.5636596736, 129.77880859, 140.0925293056], [141.99597169359998, 118.31420897279997, 196.17620850639997, 135.84313963519998], [292.0698242432, 106.7626953216, 348.4761962764, 129.41333007359998], [394.4278564616, 28.202148454400003, 416.5460204932, 122.20440673280001], [98.85668944880001, 34.55926512640002, 129.93499756559999, 123.5285033984]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00046957.jpg", "text": "Can you analyze the content of the area within the photograph ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[458.520874042, 137.7791748096, 672.195800815, 374.2490844672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046957_crop.jpg", "text": "Can you analyze the content of the area within the photograph ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[53.520874042, 59.77917480959999, 267.195800815, 296.2490844672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046957.jpg", "text": "Can you analyze the content of the area within the photograph ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three sneakers, a hat, and a handbag.", "boxes_value": [[458.520874042, 137.7791748096, 672.195800815, 374.2490844672], [458.520874042, 283.738830592, 473.33154299200004, 310.7273559552], [580.956542936, 284.7261962752, 594.450805641, 317.6390381056], [587.5390624949999, 356.1470337024, 613.869262699, 374.2490844672], [529.60339355, 137.7791748096, 556.694946269, 153.3072509952], [637.115112326, 276.9508056576, 672.195800815, 295.5229492224]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046957_crop.jpg", "text": "Can you analyze the content of the area within the photograph ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three sneakers, a hat, and a handbag.", "boxes_value": [[53.520874042, 59.77917480959999, 267.195800815, 296.2490844672], [53.520874042, 205.738830592, 68.33154299200004, 232.72735595519998], [175.956542936, 206.72619627519998, 189.450805641, 239.63903810559998], [182.53906249499994, 278.1470337024, 208.86926269900005, 296.2490844672], [124.60339354999996, 59.77917480959999, 151.69494626899996, 75.30725099520001], [232.11511232600003, 198.95080565759997, 267.195800815, 217.52294922239997]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046958.jpg", "text": "Within the input image , what can be found in the region defined by ? Provide the coordinates for all objects that you mention.", "boxes_value": [[493.63171389490003, 340.3440551936, 602.5236816513, 500.2207031296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046958_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Provide the coordinates for all objects that you mention.", "boxes_value": [[27.63171389490003, 40.34405519360001, 136.52368165129997, 200.2207031296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046958.jpg", "text": "Within the input image , what can be found in the region defined by ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a person, two slippers, a pen, and a mouse.", "boxes_value": [[493.63171389490003, 340.3440551936, 602.5236816513, 500.2207031296], [510.01330565790005, 165.9416503808, 682.9898681730999, 502.1497192448], [572.0074462781, 428.3507080192, 602.5236816513, 464.8561401344], [527.2313232625, 470.845275904, 586.8377685539, 500.2207031296], [493.63171389490003, 340.3440551936, 543.2279052618, 349.8123779072], [487.8135986117, 340.5363769344, 512.4162597531, 356.3630981632]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046958_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a person, two slippers, a pen, and a mouse.", "boxes_value": [[27.63171389490003, 40.34405519360001, 136.52368165129997, 200.2207031296], [44.01330565790005, 0, 163, 202.1497192448], [106.00744627810002, 128.3507080192, 136.52368165129997, 164.8561401344], [61.23132326250004, 170.845275904, 120.83776855389999, 200.2207031296], [27.63171389490003, 40.34405519360001, 77.22790526179995, 49.812377907200016], [21.8135986117, 40.53637693439998, 46.41625975310001, 56.36309816319999]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046960.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Please mention the objects and their locations.", "boxes_value": [[301.932251005, 317.6232300032, 618.6032715169999, 375.9312743936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046960_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Please mention the objects and their locations.", "boxes_value": [[79.93225100500001, 14.623230003199978, 396.60327151699994, 72.9312743936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046960.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Please mention the objects and their locations. For your reference, objects involved in this region include a desk, a telephone, a moniter, a keyboard, two speakers, and a laptop.", "boxes_value": [[301.932251005, 317.6232300032, 618.6032715169999, 375.9312743936], [260.408813511, 334.603088384, 487.26416015200004, 495.4852295168], [486.54406739399997, 332.8513183744, 514.064453133, 353.082519552], [344.34765626800004, 273.432495104, 437.498535166, 348.1185302528], [359.103149405, 345.9700317184, 430.57775876499994, 360.143493632], [460.34191896, 317.6232300032, 480.38720706899994, 348.3997802496], [301.932251005, 319.664489728, 324.692382784, 351.796447744], [552.071411122, 327.224121088, 618.6032715169999, 375.9312743936]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6], [7]]}, {"image_path": "objects365_v1_00046960_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Please mention the objects and their locations. For your reference, objects involved in this region include a desk, a telephone, a moniter, a keyboard, two speakers, and a laptop.", "boxes_value": [[79.93225100500001, 14.623230003199978, 396.60327151699994, 72.9312743936], [38.408813511000005, 31.60308838399999, 265.26416015200004, 87], [264.54406739399997, 29.851318374400023, 292.06445313300003, 50.08251955200001], [122.34765626800004, 0, 215.498535166, 45.118530252799985], [137.103149405, 42.97003171839998, 208.57775876499994, 57.143493632], [238.34191896, 14.623230003199978, 258.38720706899994, 45.399780249599985], [79.93225100500001, 16.664489727999978, 102.69238278400002, 48.79644774399998], [330.071411122, 24.224121088000004, 396.60327151699994, 72.9312743936]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6], [7]]}, {"image_path": "objects365_v1_00046962.jpg", "text": "Help me understand the details within the area in photograph . Include the coordinates for each object you identify.", "boxes_value": [[60.975463872000006, 93.56311036800001, 305.856750464, 480.718749984]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046962_crop.jpg", "text": "Help me understand the details within the area in photograph . Include the coordinates for each object you identify.", "boxes_value": [[60.975463872000006, 93.56311036800001, 305.856750464, 480]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046962.jpg", "text": "Help me understand the details within the area in photograph . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, a bracelet, a glasses, a tie, and a moniter.", "boxes_value": [[60.975463872000006, 93.56311036800001, 305.856750464, 480.718749984], [26.210815423999996, 37.965698256, 382.79370118400004, 480.171081552], [207.092407232, 456.07263182400004, 230.09545900799998, 480.718749984], [203.098266624, 164.695251456, 305.856750464, 192.29565431999998], [111.64172364800001, 247.02557371199998, 257.781921408, 478.657836912], [60.975463872000006, 93.56311036800001, 88.807251008, 118.80590822399999]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046962_crop.jpg", "text": "Help me understand the details within the area in photograph . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, a bracelet, a glasses, a tie, and a moniter.", "boxes_value": [[60.975463872000006, 93.56311036800001, 305.856750464, 480], [26.210815423999996, 37.965698256, 367, 480], [207.092407232, 456.07263182400004, 230.09545900799998, 480], [203.098266624, 164.695251456, 305.856750464, 192.29565431999998], [111.64172364800001, 247.02557371199998, 257.781921408, 478.657836912], [60.975463872000006, 93.56311036800001, 88.807251008, 118.80590822399999]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046964.jpg", "text": "I would like a description of the content within the bbox in . Include the coordinates for each object you identify.", "boxes_value": [[71.5036010501, 12.1243286016, 430.675537108, 153.3411865088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046964_crop.jpg", "text": "I would like a description of the content within the bbox in . Include the coordinates for each object you identify.", "boxes_value": [[71.5036010501, 12.1243286016, 430.675537108, 153.3411865088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046964.jpg", "text": "I would like a description of the content within the bbox in . Include the coordinates for each object you identify. For your reference, objects involved in this region include a bench, a person, three cars, and a sports car.", "boxes_value": [[71.5036010501, 12.1243286016, 430.675537108, 153.3411865088], [328.323608395, 123.1428832768, 496.48095706019996, 155.3854370304], [230.2323608566, 31.1606445568, 271.5207519535, 148.5491943424], [71.5036010501, 41.3184204288, 298.5231933574, 153.3411865088], [116.95422365259999, 12.1243286016, 198.52832032, 77.9783935488], [310.8115234329, 40.559204096, 430.675537108, 118.7517089792], [0, 75.6243286016, 543.9620361444, 467.0830077952]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00046964_crop.jpg", "text": "I would like a description of the content within the bbox in . Include the coordinates for each object you identify. For your reference, objects involved in this region include a bench, a person, three cars, and a sports car.", "boxes_value": [[71.5036010501, 12.1243286016, 430.675537108, 153.3411865088], [328.323608395, 123.1428832768, 496.48095706019996, 155.3854370304], [230.2323608566, 31.1606445568, 271.5207519535, 148.5491943424], [71.5036010501, 41.3184204288, 298.5231933574, 153.3411865088], [116.95422365259999, 12.1243286016, 198.52832032, 77.9783935488], [310.8115234329, 40.559204096, 430.675537108, 118.7517089792], [0, 75.6243286016, 520, 188]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00046966.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Please mention the objects and their locations.", "boxes_value": [[0, 41.8526001272, 168.8759765504, 770.5622558412]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046966_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Please mention the objects and their locations.", "boxes_value": [[0, 41.8526001272, 168.8759765504, 770.5622558412]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046966.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a desk, a towel, a person, a sneakers, and a paint brush.", "boxes_value": [[0, 41.8526001272, 168.8759765504, 770.5622558412], [0.312194816, 417.57824705720003, 512.5828857344, 772.830810574], [92.9995117056, 565.640502944, 227.4769287168, 643.9110107171999], [0, 41.8526001272, 168.8759765504, 520.8164062632], [1.5, 742.0092773764, 97.8516235264, 770.5622558412], [132.7786254848, 304.0084228136, 158.2137451008, 467.1654052516]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046966_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a desk, a towel, a person, a sneakers, and a paint brush.", "boxes_value": [[0, 41.8526001272, 168.8759765504, 770.5622558412], [0.312194816, 417.57824705720003, 211, 772], [92.9995117056, 565.640502944, 211, 643.9110107171999], [0, 41.8526001272, 168.8759765504, 520.8164062632], [1.5, 742.0092773764, 97.8516235264, 770.5622558412], [132.7786254848, 304.0084228136, 158.2137451008, 467.1654052516]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046967.jpg", "text": "Explain the content within the rectangular region of the image . Please mention the objects and their locations.", "boxes_value": [[0.21789550929999998, 275.1225586176, 203.3405761758, 465.5241699328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046967_crop.jpg", "text": "Explain the content within the rectangular region of the image . Please mention the objects and their locations.", "boxes_value": [[0.21789550929999998, 48.12255861760002, 203.3405761758, 238.52416993280002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046967.jpg", "text": "Explain the content within the rectangular region of the image . Please mention the objects and their locations. For your reference, objects involved in this region include four people, and a moniter.", "boxes_value": [[0.21789550929999998, 275.1225586176, 203.3405761758, 465.5241699328], [143.5816040074, 293.0347900416, 235.1861571929, 408.5149536256], [9.0983886839, 306.678039552, 83.6488647548, 465.5241699328], [173.04199220770002, 275.1225586176, 203.3405761758, 304.9185790976], [0.21789550929999998, 343.353881856, 8.3455810297, 429.1149292032], [150.2601928792, 266.8792114176, 209.64410399850001, 310.4732055552]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046967_crop.jpg", "text": "Explain the content within the rectangular region of the image . Please mention the objects and their locations. For your reference, objects involved in this region include four people, and a moniter.", "boxes_value": [[0.21789550929999998, 48.12255861760002, 203.3405761758, 238.52416993280002], [143.5816040074, 66.03479004159999, 235.1861571929, 181.51495362560001], [9.0983886839, 79.67803955199997, 83.6488647548, 238.52416993280002], [173.04199220770002, 48.12255861760002, 203.3405761758, 77.9185790976], [0.21789550929999998, 116.35388185599999, 8.3455810297, 202.1149292032], [150.2601928792, 39.879211417600004, 209.64410399850001, 83.47320555520002]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046969.jpg", "text": "What can you tell me about the area within the image ? Include the coordinates for each mentioned object.", "boxes_value": [[163.6481323004, 33.2368774656, 334.36279300219996, 229.7523803648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046969_crop.jpg", "text": "What can you tell me about the area within the image ? Include the coordinates for each mentioned object.", "boxes_value": [[43.64813230039999, 33.2368774656, 214.36279300219996, 229.7523803648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046969.jpg", "text": "What can you tell me about the area within the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, two glasses, and two helmets.", "boxes_value": [[163.6481323004, 33.2368774656, 334.36279300219996, 229.7523803648], [137.0078735342, 143.7813110272, 430.3317871168, 510.8806152192], [0.12341309419999999, 30.8960571392, 273.0035400378, 511.769470208], [221.9176635976, 94.1285400576, 260.9582519306, 117.4363403264], [163.6481323004, 33.2368774656, 271.7381592066, 102.5776367104], [308.5635986358, 214.2367553536, 334.36279300219996, 229.7523803648], [230.47515868119999, 146.2824707072, 341.9556884914, 229.3709106688]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4, 6]]}, {"image_path": "objects365_v1_00046969_crop.jpg", "text": "What can you tell me about the area within the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, two glasses, and two helmets.", "boxes_value": [[43.64813230039999, 33.2368774656, 214.36279300219996, 229.7523803648], [17.0078735342, 143.7813110272, 257, 278], [0, 30.8960571392, 153.0035400378, 278], [101.91766359760001, 94.1285400576, 140.9582519306, 117.4363403264], [43.64813230039999, 33.2368774656, 151.73815920660002, 102.5776367104], [188.56359863580002, 214.2367553536, 214.36279300219996, 229.7523803648], [110.47515868119999, 146.2824707072, 221.9556884914, 229.3709106688]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4, 6]]}, {"image_path": "objects365_v1_00046970.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Specify the location of each mentioned object.", "boxes_value": [[87.518676784, 147.8765258752, 369.966796892, 249.5060424704]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046970_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Specify the location of each mentioned object.", "boxes_value": [[71.518676784, 25.876525875200002, 353.966796892, 127.50604247039999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046970.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Specify the location of each mentioned object. For your reference, objects involved in this region include five pictures, and a person.", "boxes_value": [[87.518676784, 147.8765258752, 369.966796892, 249.5060424704], [87.518676784, 147.8765258752, 164.709106408, 215.6965331968], [240.979309109, 182.9513549824, 305.400512725, 273.026977536], [298.055542015, 112.772827136, 358.59265134000003, 192.6115722752], [335.374267613, 202.505249024, 369.966796892, 249.5060424704], [296.645507774, 200.249206528, 334.998168967, 249.13006592], [215.804138165, 183.7048950272, 267.317016582, 243.1139526144]], "boxes_seq": [[0], [0], [1, 3, 4, 5, 6], [2]]}, {"image_path": "objects365_v1_00046970_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Specify the location of each mentioned object. For your reference, objects involved in this region include five pictures, and a person.", "boxes_value": [[71.518676784, 25.876525875200002, 353.966796892, 127.50604247039999], [71.518676784, 25.876525875200002, 148.709106408, 93.6965331968], [224.979309109, 60.951354982400005, 289.400512725, 151.026977536], [282.055542015, 0, 342.59265134000003, 70.61157227519999], [319.374267613, 80.505249024, 353.966796892, 127.50604247039999], [280.645507774, 78.249206528, 318.998168967, 127.13006591999999], [199.804138165, 61.704895027199996, 251.317016582, 121.1139526144]], "boxes_seq": [[0], [0], [1, 3, 4, 5, 6], [2]]}, {"image_path": "objects365_v1_00046971.jpg", "text": "Offer a thorough description of the area within the illustration . Remember to mention the objects and their corresponding locations.", "boxes_value": [[310.6241454874, 73.401733376, 750.8443603316, 369.549438464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046971_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Remember to mention the objects and their corresponding locations.", "boxes_value": [[110.62414548740003, 73.401733376, 550.8443603316, 369.549438464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046971.jpg", "text": "Offer a thorough description of the area within the illustration . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a storage box, a desk, two people, a ballon, and a trash bin can.", "boxes_value": [[310.6241454874, 73.401733376, 750.8443603316, 369.549438464], [401.4134521683, 305.9645385728, 494.70422360910004, 342.7897949184], [670.6470947048999, 207.7638550016, 750.8443603316, 301.0545043968], [239.4183349562, 98.7779540992, 360.9859619184, 336.4187621888], [333.5130614986, 102.8989257728, 470.1909179397, 332.984680192], [310.6241454874, 73.401733376, 372.59765621860004, 132.4588622848], [629.63134765, 250.4694213632, 694.2507324036001, 369.549438464]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046971_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a storage box, a desk, two people, a ballon, and a trash bin can.", "boxes_value": [[110.62414548740003, 73.401733376, 550.8443603316, 369.549438464], [201.4134521683, 305.9645385728, 294.70422360910004, 342.7897949184], [470.6470947048999, 207.7638550016, 550.8443603316, 301.0545043968], [39.4183349562, 98.7779540992, 160.9859619184, 336.4187621888], [133.5130614986, 102.8989257728, 270.1909179397, 332.984680192], [110.62414548740003, 73.401733376, 172.59765621860004, 132.4588622848], [429.63134764999995, 250.4694213632, 494.25073240360007, 369.549438464]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046972.jpg", "text": "What does the area within the given visual contain? Specify the location of each mentioned object.", "boxes_value": [[94.60449216, 204.02337648, 369.307434112, 390.610778832]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046972_crop.jpg", "text": "What does the area within the given visual contain? Specify the location of each mentioned object.", "boxes_value": [[69.60449216, 47.023376479999996, 344.307434112, 233.610778832]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046972.jpg", "text": "What does the area within the given visual contain? Specify the location of each mentioned object. For your reference, objects involved in this region include a lamp, four people, and a handbag.", "boxes_value": [[94.60449216, 204.02337648, 369.307434112, 390.610778832], [241.744995136, 164.27630616, 343.750793472, 255.052124016], [105.044250496, 258.137451168, 154.81927488, 390.610778832], [115.97558592, 246.14599608000003, 138.125976576, 278.077026384], [285.47308352000005, 232.132507344, 308.897399872, 264.67999267199997], [352.787170432, 204.02337648, 369.307434112, 225.721679664], [94.60449216, 296.661315936, 112.615600576, 324.06958008]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00046972_crop.jpg", "text": "What does the area within the given visual contain? Specify the location of each mentioned object. For your reference, objects involved in this region include a lamp, four people, and a handbag.", "boxes_value": [[69.60449216, 47.023376479999996, 344.307434112, 233.610778832], [216.744995136, 7.27630615999999, 318.750793472, 98.052124016], [80.044250496, 101.13745116799998, 129.81927488, 233.610778832], [90.97558592, 89.14599608000003, 113.125976576, 121.07702638400002], [260.47308352000005, 75.132507344, 283.897399872, 107.67999267199997], [327.787170432, 47.023376479999996, 344.307434112, 68.72167966399999], [69.60449216, 139.661315936, 87.615600576, 167.06958007999998]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00046978.jpg", "text": "Kindly describe what I should be seeing in the area of image . Give coordinates for the items you reference.", "boxes_value": [[0, 265.7312622, 283.34307862, 398.5024414]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046978_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Give coordinates for the items you reference.", "boxes_value": [[0, 33.7312622, 283.34307862, 166.5024414]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046978.jpg", "text": "Kindly describe what I should be seeing in the area of image . Give coordinates for the items you reference. For your reference, objects involved in this region include three chairs, two desks, and two benches.", "boxes_value": [[0, 265.7312622, 283.34307862, 398.5024414], [82.37750241, 287.6860962, 142.84991455, 397.26690675], [20.281738259999997, 297.02075195, 90.08874511, 416.34210205], [0, 298.23828125, 103.48199463, 409.4425659], [0, 278.7572632, 99.42346189999999, 398.5024414], [161.26531981, 265.7312622, 221.80950925, 344.6428833], [178.2720947, 275.95074465000005, 263.05590822, 351.44555665], [255.42773438, 271.80889895, 283.34307862, 345.7024536]], "boxes_seq": [[0], [0], [1, 2, 7], [3, 6], [4, 5]]}, {"image_path": "objects365_v1_00046978_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Give coordinates for the items you reference. For your reference, objects involved in this region include three chairs, two desks, and two benches.", "boxes_value": [[0, 33.7312622, 283.34307862, 166.5024414], [82.37750241, 55.68609620000001, 142.84991455, 165.26690674999998], [20.281738259999997, 65.02075194999998, 90.08874511, 184.34210205], [0, 66.23828125, 103.48199463, 177.44256589999998], [0, 46.75726320000001, 99.42346189999999, 166.5024414], [161.26531981, 33.7312622, 221.80950925, 112.6428833], [178.2720947, 43.95074465000005, 263.05590822, 119.44555665000001], [255.42773438, 39.808898950000014, 283.34307862, 113.70245360000001]], "boxes_seq": [[0], [0], [1, 2, 7], [3, 6], [4, 5]]}, {"image_path": "objects365_v1_00046979.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Provide the coordinates for each element you describe.", "boxes_value": [[498.78063964300003, 0, 675.3349609216, 337.119873024]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046979_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Provide the coordinates for each element you describe.", "boxes_value": [[44.78063964300003, 0, 221.3349609216, 337.119873024]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046979.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two lamps, and three pictures.", "boxes_value": [[498.78063964300003, 0, 675.3349609216, 337.119873024], [504.3498535391, 0, 589.002563486, 21.7621459968], [498.78063964300003, 83.02392576, 544.4484862975, 172.131958016], [552.0344237992, 298.5410156032, 585.7032470891, 337.119873024], [603.9404297124, 295.7352905216, 645.3250732673, 333.6127319552], [655.1451416072, 297.1381835776, 675.3349609216, 335.7170409984]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046979_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two lamps, and three pictures.", "boxes_value": [[44.78063964300003, 0, 221.3349609216, 337.119873024], [50.349853539100025, 0, 135.00256348599999, 21.7621459968], [44.78063964300003, 83.02392576, 90.44848629750004, 172.131958016], [98.0344237992, 298.5410156032, 131.70324708910005, 337.119873024], [149.94042971240003, 295.7352905216, 191.3250732673, 333.6127319552], [201.1451416072, 297.1381835776, 221.3349609216, 335.7170409984]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046980.jpg", "text": "Describe the selected rectangular area in the photo . Provide the coordinates for each element you describe.", "boxes_value": [[162.3770141696, 313.090209981, 431.2620849664, 491.0507812692]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046980_crop.jpg", "text": "Describe the selected rectangular area in the photo . Provide the coordinates for each element you describe.", "boxes_value": [[67.37701416959999, 45.09020998099999, 336.2620849664, 223.0507812692]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046980.jpg", "text": "Describe the selected rectangular area in the photo . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a belt, three sneakers, and a cup.", "boxes_value": [[162.3770141696, 313.090209981, 431.2620849664, 491.0507812692], [205.6542358528, 313.090209981, 318.7651367424, 327.8438110034], [162.3770141696, 447.2777099363, 196.0995483648, 468.3542480705], [382.2584228352, 471.7072753825, 431.2620849664, 491.0507812692], [369.3627319296, 448.49499508720004, 390.3182372864, 472.6744384887], [178.9569091584, 380.0592041007, 205.5874023424, 418.46850588850003]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046980_crop.jpg", "text": "Describe the selected rectangular area in the photo . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a belt, three sneakers, and a cup.", "boxes_value": [[67.37701416959999, 45.09020998099999, 336.2620849664, 223.0507812692], [110.65423585280001, 45.09020998099999, 223.7651367424, 59.84381100339999], [67.37701416959999, 179.27770993630003, 101.0995483648, 200.35424807049998], [287.2584228352, 203.70727538249997, 336.2620849664, 223.0507812692], [274.3627319296, 180.49499508720004, 295.3182372864, 204.6744384887], [83.95690915840001, 112.05920410070001, 110.5874023424, 150.46850588850003]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046981.jpg", "text": "In the photo , can you delve into the details of the region ? Please point out the objects and their coordinates.", "boxes_value": [[0, 272.71478272, 237.700561536, 511.4088134656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046981_crop.jpg", "text": "In the photo , can you delve into the details of the region ? Please point out the objects and their coordinates.", "boxes_value": [[0, 59.71478272000002, 237.700561536, 298.4088134656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046981.jpg", "text": "In the photo , can you delve into the details of the region ? Please point out the objects and their coordinates. For your reference, objects involved in this region include three people, a hat, and a street lights.", "boxes_value": [[0, 272.71478272, 237.700561536, 511.4088134656], [0, 272.71478272, 207.6751098624, 511.4088134656], [145.2924194304, 341.3357543936, 268.67150876159997, 511.4088134656], [0, 354.0527343616, 29.712707481600003, 391.4667358208], [40.5645141504, 272.7006835712, 179.5887451392, 347.5598754816], [217.630371072, 318.8475341824, 237.700561536, 381.0651245056]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046981_crop.jpg", "text": "In the photo , can you delve into the details of the region ? Please point out the objects and their coordinates. For your reference, objects involved in this region include three people, a hat, and a street lights.", "boxes_value": [[0, 59.71478272000002, 237.700561536, 298.4088134656], [0, 59.71478272000002, 207.6751098624, 298.4088134656], [145.2924194304, 128.33575439359998, 268.67150876159997, 298.4088134656], [0, 141.05273436160002, 29.712707481600003, 178.4667358208], [40.5645141504, 59.70068357119999, 179.5887451392, 134.5598754816], [217.630371072, 105.84753418240001, 237.700561536, 168.0651245056]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00046984.jpg", "text": "Can you provide a description of the area in the image ? Please point out the objects and their coordinates.", "boxes_value": [[186.1591186432, 483.78625489340004, 335.8752441344, 683.3574218911001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046984_crop.jpg", "text": "Can you provide a description of the area in the image ? Please point out the objects and their coordinates.", "boxes_value": [[38.1591186432, 50.78625489340004, 187.87524413440002, 250]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046984.jpg", "text": "Can you provide a description of the area in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a lamp, a picture, a person, two handbags, a chicken, and a bakset.", "boxes_value": [[186.1591186432, 483.78625489340004, 335.8752441344, 683.3574218911001], [278.4863891456, 438.3419189644, 327.3911132672, 524.8657226422], [225.2823486464, 459.9729003798, 280.0986328064, 531.9864501980001], [234.5629272576, 483.78625489340004, 259.4684448256, 524.5533447377], [186.1591186432, 598.6269531455, 241.827514624, 683.3574218911001], [270.8896484352, 595.7615967125, 305.2730713088, 634.2382812313], [173.4451293696, 466.4641113434, 224.871398912, 516.0202636794], [293.6012573184, 596.5766601582, 335.8752441344, 656.1210937623]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6], [7]]}, {"image_path": "objects365_v1_00046984_crop.jpg", "text": "Can you provide a description of the area in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a lamp, a picture, a person, two handbags, a chicken, and a bakset.", "boxes_value": [[38.1591186432, 50.78625489340004, 187.87524413440002, 250], [130.48638914560001, 5.3419189644000085, 179.3911132672, 91.86572264220001], [77.28234864640001, 26.972900379800024, 132.09863280640002, 98.98645019800006], [86.5629272576, 50.78625489340004, 111.46844482559999, 91.55334473769994], [38.1591186432, 165.6269531455, 93.827514624, 250], [122.8896484352, 162.76159671250002, 157.2730713088, 201.2382812313], [25.44512936960001, 33.46411134340002, 76.87139891199999, 83.02026367940005], [145.60125731839997, 163.57666015819996, 187.87524413440002, 223.1210937623]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6], [7]]}, {"image_path": "objects365_v1_00046985.jpg", "text": "In the photo , can you delve into the details of the region ? Give coordinates for the items you reference.", "boxes_value": [[322.799072243, 192.0032958976, 449.774658178, 285.103088384]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046985_crop.jpg", "text": "In the photo , can you delve into the details of the region ? Give coordinates for the items you reference.", "boxes_value": [[31.799072243000012, 24.003295897599997, 158.77465817799998, 117.10308838399999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046985.jpg", "text": "In the photo , can you delve into the details of the region ? Give coordinates for the items you reference. For your reference, objects involved in this region include two people, a gloves, a street lights, and a bus.", "boxes_value": [[322.799072243, 192.0032958976, 449.774658178, 285.103088384], [318.676757834, 220.9042968576, 450.1376952988, 411.5386962944], [417.61730953660003, 259.3206176768, 440.17700196619995, 285.103088384], [433.7239990434, 244.3150635008, 449.774658178, 279.4259643392], [322.799072243, 192.0032958976, 362.4431152228, 218.995056128], [267.2252197272, 215.6210937344, 451.0098877172, 341.3014526464]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046985_crop.jpg", "text": "In the photo , can you delve into the details of the region ? Give coordinates for the items you reference. For your reference, objects involved in this region include two people, a gloves, a street lights, and a bus.", "boxes_value": [[31.799072243000012, 24.003295897599997, 158.77465817799998, 117.10308838399999], [27.676757834, 52.9042968576, 159.1376952988, 140], [126.61730953660003, 91.3206176768, 149.17700196619995, 117.10308838399999], [142.7239990434, 76.3150635008, 158.77465817799998, 111.4259643392], [31.799072243000012, 24.003295897599997, 71.44311522279997, 50.99505612799999], [0, 47.621093734400006, 160.0098877172, 140]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00046988.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[428.2753906165, 236.6740722688, 509.7075195389, 372.7951049728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046988_crop.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[21.275390616499976, 34.67407226879999, 102.70751953889999, 170.7951049728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046988.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three people, and two backpacks.", "boxes_value": [[428.2753906165, 236.6740722688, 509.7075195389, 372.7951049728], [471.0925292756, 207.0595703296, 628.6782226756001, 511.8370361344], [392.9176025378, 206.5422363136, 502.264160176, 512.0148925952], [457.7646484744, 195.104125952, 522.5635986093, 499.4276123136], [428.2753906165, 258.1170654208, 498.356445294, 372.7951049728], [475.23010257550004, 236.6740722688, 509.7075195389, 285.5946044928]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046988_crop.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three people, and two backpacks.", "boxes_value": [[21.275390616499976, 34.67407226879999, 102.70751953889999, 170.7951049728], [64.09252927559999, 5.059570329600007, 123, 204], [0, 4.5422363136, 95.26416017600002, 204], [50.76464847440002, 0, 115.56359860930002, 204], [21.275390616499976, 56.117065420799975, 91.35644529400003, 170.7951049728], [68.23010257550004, 34.67407226879999, 102.70751953889999, 83.59460449279999]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00046989.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Include the coordinates for each mentioned object.", "boxes_value": [[354.33129882009996, 233.013549824, 462.9283447177, 363.0776977408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046989_crop.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Include the coordinates for each mentioned object.", "boxes_value": [[27.33129882009996, 33.013549823999995, 135.92834471769999, 163.07769774079998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046989.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a wine glass, and five people.", "boxes_value": [[354.33129882009996, 233.013549824, 462.9283447177, 363.0776977408], [384.7275390844, 336.5028686336, 398.76147461780005, 363.0776977408], [347.3201904421, 284.797607424, 416.45678710199996, 368.6364135936], [383.18066408749996, 286.1140747264, 436.8385010036, 346.1072387584], [441.4768066629, 286.50482176, 462.9283447177, 305.377380352], [354.33129882009996, 233.013549824, 423.5910644216, 309.7061157376], [417.19543459529996, 282.5997924864, 436.7083739995, 309.2249755648]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046989_crop.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a wine glass, and five people.", "boxes_value": [[27.33129882009996, 33.013549823999995, 135.92834471769999, 163.07769774079998], [57.72753908440001, 136.50286863359997, 71.76147461780005, 163.07769774079998], [20.3201904421, 84.79760742399998, 89.45678710199996, 168.63641359360003], [56.18066408749996, 86.11407472640002, 109.83850100360002, 146.1072387584], [114.4768066629, 86.50482176000003, 135.92834471769999, 105.37738035199999], [27.33129882009996, 33.013549823999995, 96.59106442159998, 109.70611573759999], [90.19543459529996, 82.59979248640002, 109.7083739995, 109.22497556479999]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00046990.jpg", "text": "Please describe the region in the picture . Please mention the objects and their locations.", "boxes_value": [[570.1029052859, 4.0439453184, 676.3208007994, 241.7496948224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046990_crop.jpg", "text": "Please describe the region in the picture . Please mention the objects and their locations.", "boxes_value": [[27.102905285899965, 4.0439453184, 133.3208007994, 241.7496948224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046990.jpg", "text": "Please describe the region in the picture . Please mention the objects and their locations. For your reference, objects involved in this region include two people, two cups, a bottle, a refrigerator, and a canned.", "boxes_value": [[570.1029052859, 4.0439453184, 676.3208007994, 241.7496948224], [570.1029052859, 17.3380737536, 590.6206054437, 56.2612304896], [570.3123779026, 4.0439453184, 676.3208007994, 179.7824096768], [639.9936523097, 223.9985961984, 659.1435546749, 255.6182251008], [652.0179443521, 201.7312011776, 677.8481445234, 263.6345214976], [582.9742431369, 221.2767333888, 617.1893310562, 241.7496948224], [630.0274658184001, 1.0900268544, 673.1942138731, 68.2800292864], [637.9902343637, 223.9574584832, 658.1868896202001, 257.7868652544]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4], [6], [7]]}, {"image_path": "objects365_v1_00046990_crop.jpg", "text": "Please describe the region in the picture . Please mention the objects and their locations. For your reference, objects involved in this region include two people, two cups, a bottle, a refrigerator, and a canned.", "boxes_value": [[27.102905285899965, 4.0439453184, 133.3208007994, 241.7496948224], [27.102905285899965, 17.3380737536, 47.620605443700015, 56.2612304896], [27.31237790260002, 4.0439453184, 133.3208007994, 179.7824096768], [96.9936523097, 223.9985961984, 116.14355467489997, 255.6182251008], [109.01794435210002, 201.7312011776, 134.84814452340004, 263.6345214976], [39.974243136899986, 221.2767333888, 74.18933105619999, 241.7496948224], [87.02746581840006, 1.0900268544, 130.1942138731, 68.2800292864], [94.99023436369998, 223.9574584832, 115.18688962020008, 257.7868652544]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4], [6], [7]]}, {"image_path": "objects365_v1_00046993.jpg", "text": "Describe the visual elements within the selected area of the image . Please mention the objects and their locations.", "boxes_value": [[361.1973266432, 164.58978270880002, 425.599731456, 548.9984130976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046993_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Please mention the objects and their locations.", "boxes_value": [[16.197326643199972, 96.58978270880002, 80.59973145599997, 480.99841309759995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046993.jpg", "text": "Describe the visual elements within the selected area of the image . Please mention the objects and their locations. For your reference, objects involved in this region include four people, a gloves, and a sneakers.", "boxes_value": [[361.1973266432, 164.58978270880002, 425.599731456, 548.9984130976], [146.476379392, 175.228454622, 415.0762329088, 647.997680648], [406.0960693248, 190.6589355532, 420.9675903488, 216.01367187760002], [403.1705322496, 164.58978270880002, 425.599731456, 196.5100097856], [413.7688598528, 171.3009033096, 435.6079711744, 204.8057861452], [370.585449216, 376.6875000264, 415.0159301632, 410.2812500208], [361.1973266432, 498.8399657936, 409.3653564416, 548.9984130976]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046993_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Please mention the objects and their locations. For your reference, objects involved in this region include four people, a gloves, and a sneakers.", "boxes_value": [[16.197326643199972, 96.58978270880002, 80.59973145599997, 480.99841309759995], [0, 107.22845462199999, 70.07623290880002, 577], [61.0960693248, 122.6589355532, 75.9675903488, 148.01367187760002], [58.1705322496, 96.58978270880002, 80.59973145599997, 128.5100097856], [68.7688598528, 103.30090330959999, 90.6079711744, 136.8057861452], [25.585449215999972, 308.6875000264, 70.01593016319998, 342.2812500208], [16.197326643199972, 430.8399657936, 64.3653564416, 480.99841309759995]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00046994.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Include the coordinates for each object you identify.", "boxes_value": [[1.8656006134, 215.2230224384, 251.70269776139997, 311.881591808]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046994_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Include the coordinates for each object you identify.", "boxes_value": [[1.8656006134, 24.223022438399994, 251.70269776139997, 120.881591808]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046994.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Include the coordinates for each object you identify. For your reference, objects involved in this region include four people, and a trash bin can.", "boxes_value": [[1.8656006134, 215.2230224384, 251.70269776139997, 311.881591808], [111.3658447591, 242.3197632, 148.371093776, 311.881591808], [152.81982421790002, 237.4666747904, 187.3984985026, 306.2195434496], [147.1578369392, 215.2230224384, 163.5372314155, 263.5523071488], [230.4702148793, 216.6385498112, 251.70269776139997, 292.0645751808], [1.8656006134, 240.105285632, 18.2445678333, 262.8934936576]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046994_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Include the coordinates for each object you identify. For your reference, objects involved in this region include four people, and a trash bin can.", "boxes_value": [[1.8656006134, 24.223022438399994, 251.70269776139997, 120.881591808], [111.3658447591, 51.31976320000001, 148.371093776, 120.881591808], [152.81982421790002, 46.46667479039999, 187.3984985026, 115.2195434496], [147.1578369392, 24.223022438399994, 163.5372314155, 72.55230714880003], [230.4702148793, 25.638549811199994, 251.70269776139997, 101.06457518079998], [1.8656006134, 49.105285632000005, 18.2445678333, 71.8934936576]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046996.jpg", "text": "Can you analyze the content of the area within the photograph ? Include the coordinates for each object you identify.", "boxes_value": [[61.646484403200006, 348.894714368, 195.891662592, 511.3580932608]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046996_crop.jpg", "text": "Can you analyze the content of the area within the photograph ? Include the coordinates for each object you identify.", "boxes_value": [[33.646484403200006, 40.894714367999995, 167.891662592, 203.3580932608]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046996.jpg", "text": "Can you analyze the content of the area within the photograph ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a candle, a flower, and three bottles.", "boxes_value": [[61.646484403200006, 348.894714368, 195.891662592, 511.3580932608], [81.568908672, 354.2991332864, 105.083129856, 454.234619136], [0.4447631616, 400.1519164928, 207.3700561152, 511.7756347904], [158.8721313792, 481.742492672, 173.6799316224, 507.4093627904], [179.109497088, 474.3386230272, 195.891662592, 511.3580932608], [61.646484403200006, 348.894714368, 125.97973632, 442.9187011584]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046996_crop.jpg", "text": "Can you analyze the content of the area within the photograph ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a candle, a flower, and three bottles.", "boxes_value": [[33.646484403200006, 40.894714367999995, 167.891662592, 203.3580932608], [53.568908672000006, 46.29913328639998, 77.083129856, 146.234619136], [0, 92.15191649280001, 179.3700561152, 203.77563479039998], [130.8721313792, 173.74249267200003, 145.6799316224, 199.4093627904], [151.109497088, 166.33862302720001, 167.891662592, 203.3580932608], [33.646484403200006, 40.894714367999995, 97.97973632, 134.91870115839998]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00046997.jpg", "text": "In the image , elaborate on the details found within the section . Specify the location of each mentioned object.", "boxes_value": [[349.5977172992, 467.4355468878, 508.8018188288, 583.4909667869]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046997_crop.jpg", "text": "In the image , elaborate on the details found within the section . Specify the location of each mentioned object.", "boxes_value": [[40.597717299199985, 29.4355468878, 199.80181882879998, 145.4909667869]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046997.jpg", "text": "In the image , elaborate on the details found within the section . Specify the location of each mentioned object. For your reference, objects involved in this region include a van, two suvs, a car, and a traffic light.", "boxes_value": [[349.5977172992, 467.4355468878, 508.8018188288, 583.4909667869], [481.70281984, 506.02001952970005, 504.8350219776, 530.657836935], [446.1148071424, 509.8525390689, 483.07159424, 544.0717773743], [419.3822631936, 520.8979492277, 445.6475830272, 555.8074950867], [349.5977172992, 518.6384277279, 430.3884887552, 583.4909667869], [495.0986328064, 467.4355468878, 508.8018188288, 506.23559570760005]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5]]}, {"image_path": "objects365_v1_00046997_crop.jpg", "text": "In the image , elaborate on the details found within the section . Specify the location of each mentioned object. For your reference, objects involved in this region include a van, two suvs, a car, and a traffic light.", "boxes_value": [[40.597717299199985, 29.4355468878, 199.80181882879998, 145.4909667869], [172.70281984000002, 68.02001952970005, 195.83502197759998, 92.65783693499998], [137.11480714240002, 71.8525390689, 174.07159424000002, 106.07177737430004], [110.38226319360001, 82.8979492277, 136.64758302720003, 117.80749508669999], [40.597717299199985, 80.63842772789997, 121.3884887552, 145.4909667869], [186.09863280640002, 29.4355468878, 199.80181882879998, 68.23559570760005]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5]]}, {"image_path": "objects365_v1_00046998.jpg", "text": "What's going on in the section of contained within the bounding box ? Please point out the objects and their coordinates.", "boxes_value": [[371.2233886908, 201.9240722432, 534.2486572066, 380.359863296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046998_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Please point out the objects and their coordinates.", "boxes_value": [[41.22338869079999, 44.92407224319999, 204.2486572066, 223.35986329600001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046998.jpg", "text": "What's going on in the section of contained within the bounding box ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a lifesaver, three people, and a boat.", "boxes_value": [[371.2233886908, 201.9240722432, 534.2486572066, 380.359863296], [371.2233886908, 201.9240722432, 399.187988304, 231.8078002688], [393.8325195264, 334.3006592, 425.3458251726, 382.3815307776], [423.76806640079997, 314.738403328, 451.28125003459996, 380.359863296], [504.05090332340006, 213.3851318272, 534.2486572066, 252.2107544064], [99.48529053440001, 0, 650.3247070412001, 509.9917602304]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046998_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a lifesaver, three people, and a boat.", "boxes_value": [[41.22338869079999, 44.92407224319999, 204.2486572066, 223.35986329600001], [41.22338869079999, 44.92407224319999, 69.18798830399999, 74.80780026880001], [63.832519526400006, 177.30065919999998, 95.34582517259997, 225.3815307776], [93.76806640079997, 157.738403328, 121.28125003459996, 223.35986329600001], [174.05090332340006, 56.38513182720001, 204.2486572066, 95.21075440640001], [0, 0, 245, 267]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00046999.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[283.9842529536, 28.5758056448, 572.4685058304, 512.1171874816]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046999_crop.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[72.98425295359999, 28.5758056448, 361.4685058304, 512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00046999.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two cabinets, a handbag, a blackboard, and two pears.", "boxes_value": [[283.9842529536, 28.5758056448, 572.4685058304, 512.1171874816], [412.0739745792, 189.9536132608, 572.4685058304, 510.7425537024], [368.63378903039995, 372.6250610176, 477.79113768959996, 510.7425537024], [283.9842529536, 490.2195434496, 357.3156738048, 512.1171874816], [194.5245971712, 208.167846656, 394.87670899200003, 355.26727296], [450.5076903936, 58.6212157952, 518.9445800448, 113.7045287936], [484.2253417728, 28.5758056448, 527.9581298687999, 93.0065917952]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00046999_crop.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two cabinets, a handbag, a blackboard, and two pears.", "boxes_value": [[72.98425295359999, 28.5758056448, 361.4685058304, 512], [201.0739745792, 189.9536132608, 361.4685058304, 510.7425537024], [157.63378903039995, 372.6250610176, 266.79113768959996, 510.7425537024], [72.98425295359999, 490.2195434496, 146.31567380479999, 512], [0, 208.167846656, 183.87670899200003, 355.26727296], [239.5076903936, 58.6212157952, 307.9445800448, 113.7045287936], [273.2253417728, 28.5758056448, 316.9581298687999, 93.0065917952]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047000.jpg", "text": "Could you please share some information on the region in this photograph ? Specify the location of each mentioned object.", "boxes_value": [[0, 138.2622680576, 191.2963256832, 313.1582641664]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047000_crop.jpg", "text": "Could you please share some information on the region in this photograph ? Specify the location of each mentioned object.", "boxes_value": [[0, 44.26226805760001, 191.2963256832, 219.1582641664]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047000.jpg", "text": "Could you please share some information on the region in this photograph ? Specify the location of each mentioned object. For your reference, objects involved in this region include three people, and two umbrellas.", "boxes_value": [[0, 138.2622680576, 191.2963256832, 313.1582641664], [77.2149047808, 276.4609375232, 184.93933102079998, 365.7172241408], [58.562866176, 181.3877563392, 191.2963256832, 313.1582641664], [0.1209106176, 244.835083008, 23.074340812800003, 273.8643188224], [0, 179.135620096, 92.198303232, 281.0671996928], [0.17034915839999998, 138.2622680576, 79.86077882880001, 228.9053955072]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047000_crop.jpg", "text": "Could you please share some information on the region in this photograph ? Specify the location of each mentioned object. For your reference, objects involved in this region include three people, and two umbrellas.", "boxes_value": [[0, 44.26226805760001, 191.2963256832, 219.1582641664], [77.2149047808, 182.46093752320002, 184.93933102079998, 262], [58.562866176, 87.3877563392, 191.2963256832, 219.1582641664], [0.1209106176, 150.835083008, 23.074340812800003, 179.86431882239998], [0, 85.135620096, 92.198303232, 187.0671996928], [0.17034915839999998, 44.26226805760001, 79.86077882880001, 134.9053955072]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047005.jpg", "text": "Could you tell me more about the area in the snapshot ? Please mention the objects and their locations.", "boxes_value": [[0.985412608, 554.3762207232, 189.5543213056, 727.9097900544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047005_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Please mention the objects and their locations.", "boxes_value": [[0.985412608, 44.37622072320005, 189.5543213056, 217.9097900544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047005.jpg", "text": "Could you tell me more about the area in the snapshot ? Please mention the objects and their locations. For your reference, objects involved in this region include a person, two slippers, and two sneakers.", "boxes_value": [[0.985412608, 554.3762207232, 189.5543213056, 727.9097900544], [1.1099853312, 161.0281372416, 150.388732928, 728.6802978816], [0.985412608, 692.8271484672, 28.550292992, 727.9097900544], [25.417907712, 650.8533935616, 79.2947387904, 718.5126953472001], [98.0889892352, 554.3762207232, 131.2921753088, 581.9411621376], [155.7246704128, 555.4439697408, 189.5543213056, 584.4470214912]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047005_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Please mention the objects and their locations. For your reference, objects involved in this region include a person, two slippers, and two sneakers.", "boxes_value": [[0.985412608, 44.37622072320005, 189.5543213056, 217.9097900544], [1.1099853312, 0, 150.388732928, 218.68029788160004], [0.985412608, 182.8271484672, 28.550292992, 217.9097900544], [25.417907712, 140.8533935616, 79.2947387904, 208.51269534720007], [98.0889892352, 44.37622072320005, 131.2921753088, 71.94116213760003], [155.7246704128, 45.44396974079996, 189.5543213056, 74.44702149119996]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047006.jpg", "text": "Could you describe the content of the bbox in the image ? Specify the location of each mentioned object.", "boxes_value": [[237.71191403519998, 114.4061889536, 395.78259279360003, 475.8255615488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047006_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Specify the location of each mentioned object.", "boxes_value": [[39.71191403519998, 90.4061889536, 197.78259279360003, 451.8255615488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047006.jpg", "text": "Could you describe the content of the bbox in the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include a person, a trophy, two sneakers, and a street lights.", "boxes_value": [[237.71191403519998, 114.4061889536, 395.78259279360003, 475.8255615488], [291.95861813759996, 114.4061889536, 395.78259279360003, 475.8255615488], [358.86828610559996, 167.3097533952, 426.5876464896, 278.4293823488], [344.28483264, 436.2270642688, 377.5743807744, 453.4999429632], [298.0296630528, 450.9875242496, 327.95411097600004, 474.8555021312], [237.71191403519998, 236.0514526208, 258.4533691392, 308.1644286976]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047006_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include a person, a trophy, two sneakers, and a street lights.", "boxes_value": [[39.71191403519998, 90.4061889536, 197.78259279360003, 451.8255615488], [93.95861813759996, 90.4061889536, 197.78259279360003, 451.8255615488], [160.86828610559996, 143.3097533952, 228.5876464896, 254.4293823488], [146.28483264, 412.2270642688, 179.57438077440003, 429.4999429632], [100.0296630528, 426.9875242496, 129.95411097600004, 450.8555021312], [39.71191403519998, 212.0514526208, 60.45336913919999, 284.1644286976]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047007.jpg", "text": "Please tell me more about the rectangular section in the photo . Provide the coordinates for each element you describe.", "boxes_value": [[156.6876220927, 132.483520512, 382.4403075916, 360.6540527104]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047007_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Provide the coordinates for each element you describe.", "boxes_value": [[56.687622092699996, 57.48352051200001, 282.4403075916, 285.6540527104]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047007.jpg", "text": "Please tell me more about the rectangular section in the photo . Provide the coordinates for each element you describe. For your reference, objects involved in this region include three people, a handbag, a sneakers, and a car.", "boxes_value": [[156.6876220927, 132.483520512, 382.4403075916, 360.6540527104], [326.57720949599997, 152.6917114368, 382.4403075916, 301.6600342016], [280.4165038945, 154.710571264, 309.2031250254, 194.3277587968], [156.6876220927, 132.483520512, 241.89819332599998, 360.6540527104], [158.4932861311, 198.8199462912, 201.7200317061, 242.3599243264], [178.22473144170002, 346.8522338816, 209.80773925580002, 360.547546368], [365.3955078026, 138.61810304, 383.7584228714, 172.830993664]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047007_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Provide the coordinates for each element you describe. For your reference, objects involved in this region include three people, a handbag, a sneakers, and a car.", "boxes_value": [[56.687622092699996, 57.48352051200001, 282.4403075916, 285.6540527104], [226.57720949599997, 77.6917114368, 282.4403075916, 226.6600342016], [180.4165038945, 79.71057126400001, 209.20312502540003, 119.3277587968], [56.687622092699996, 57.48352051200001, 141.89819332599998, 285.6540527104], [58.49328613110001, 123.81994629120001, 101.7200317061, 167.3599243264], [78.22473144170002, 271.8522338816, 109.80773925580002, 285.547546368], [265.3955078026, 63.618103039999994, 283.7584228714, 97.830993664]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047010.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Give coordinates for the items you reference.", "boxes_value": [[161.04663084, 284.435852032, 355.65319824, 338.8375244288]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047010_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Give coordinates for the items you reference.", "boxes_value": [[49.046630840000006, 14.435852032000014, 243.65319824, 68.83752442880001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047010.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Give coordinates for the items you reference. For your reference, objects involved in this region include a flower, a vase, three pictures, and a chair.", "boxes_value": [[161.04663084, 284.435852032, 355.65319824, 338.8375244288], [161.04663084, 295.7824096768, 234.66320802, 334.911926272], [172.32122802, 322.9741211136, 210.12432864000002, 335.5751342592], [232.10235594, 284.435852032, 248.31713868, 312.4432373248], [255.68749998, 285.6642456064, 270.1824951, 312.9345702912], [276.07879638, 289.3494262784, 289.836792, 313.6715698176], [328.03222656, 327.3287353344, 355.65319824, 338.8375244288]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047010_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Give coordinates for the items you reference. For your reference, objects involved in this region include a flower, a vase, three pictures, and a chair.", "boxes_value": [[49.046630840000006, 14.435852032000014, 243.65319824, 68.83752442880001], [49.046630840000006, 25.7824096768, 122.66320802000001, 64.91192627200002], [60.32122802000001, 52.974121113600006, 98.12432864000002, 65.57513425920001], [120.10235594, 14.435852032000014, 136.31713868, 42.44323732480001], [143.68749998, 15.664245606400016, 158.18249509999998, 42.934570291199975], [164.07879638000003, 19.349426278400017, 177.836792, 43.671569817600016], [216.03222656000003, 57.328735334399994, 243.65319824, 68.83752442880001]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047011.jpg", "text": "Please provide details for the area marked as in this photographic . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[1.0216674816, 200.40216064, 137.1022338816, 365.8195800576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047011_crop.jpg", "text": "Please provide details for the area marked as in this photographic . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[1.0216674816, 41.402160640000005, 137.1022338816, 206.81958005759998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047011.jpg", "text": "Please provide details for the area marked as in this photographic . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, a helmet, and two gloves.", "boxes_value": [[1.0216674816, 200.40216064, 137.1022338816, 365.8195800576], [70.872192384, 200.40216064, 137.1022338816, 346.1082153472], [1.0216674816, 289.1156005888, 96.6113281536, 365.8195800576], [70.6516113408, 287.9402465792, 96.8414306304, 307.7696532992], [76.6342773504, 255.7500000256, 96.7901001216, 274.9755249152], [97.720336896, 261.0215454208, 123.14764400639999, 276.2158813696]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047011_crop.jpg", "text": "Please provide details for the area marked as in this photographic . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, a helmet, and two gloves.", "boxes_value": [[1.0216674816, 41.402160640000005, 137.1022338816, 206.81958005759998], [70.872192384, 41.402160640000005, 137.1022338816, 187.10821534719997], [1.0216674816, 130.11560058880002, 96.6113281536, 206.81958005759998], [70.6516113408, 128.9402465792, 96.8414306304, 148.7696532992], [76.6342773504, 96.7500000256, 96.7901001216, 115.97552491520003], [97.720336896, 102.02154542080001, 123.14764400639999, 117.21588136960003]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047012.jpg", "text": "In the provided image , please explain the content within the region . Please point out the objects and their coordinates.", "boxes_value": [[73.2001952745, 0, 463.4019775342, 188.2736205824]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047012_crop.jpg", "text": "In the provided image , please explain the content within the region . Please point out the objects and their coordinates.", "boxes_value": [[73.2001952745, 0, 463.4019775342, 188.2736205824]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047012.jpg", "text": "In the provided image , please explain the content within the region . Please point out the objects and their coordinates. For your reference, objects involved in this region include four lamps, and three people.", "boxes_value": [[73.2001952745, 0, 463.4019775342, 188.2736205824], [73.2001952745, 0, 98.9942627321, 50.523620608], [198.698181119, 0.4236450304, 223.00408938910002, 67.3889770496], [293.4416503949, 0.4236450304, 361.8951415695, 89.2147216896], [289.6723632896, 72.1066894336, 308.781616178, 102.1355590656], [219.2746581696, 13.8674926592, 312.0366210758, 174.4641723392], [364.2849121155, 12.5853881856, 463.4019775342, 188.2736205824], [116.62689212509999, 91.7928466944, 250.3514404141, 224.1575317504]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6, 7]]}, {"image_path": "objects365_v1_00047012_crop.jpg", "text": "In the provided image , please explain the content within the region . Please point out the objects and their coordinates. For your reference, objects involved in this region include four lamps, and three people.", "boxes_value": [[73.2001952745, 0, 463.4019775342, 188.2736205824], [73.2001952745, 0, 98.9942627321, 50.523620608], [198.698181119, 0.4236450304, 223.00408938910002, 67.3889770496], [293.4416503949, 0.4236450304, 361.8951415695, 89.2147216896], [289.6723632896, 72.1066894336, 308.781616178, 102.1355590656], [219.2746581696, 13.8674926592, 312.0366210758, 174.4641723392], [364.2849121155, 12.5853881856, 463.4019775342, 188.2736205824], [116.62689212509999, 91.7928466944, 250.3514404141, 224.1575317504]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6, 7]]}, {"image_path": "objects365_v1_00047013.jpg", "text": "In the provided image , please explain the content within the region . Specify the location of each mentioned object.", "boxes_value": [[164.562927264, 95.6563720704, 433.70751951, 324.4478149632]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047013_crop.jpg", "text": "In the provided image , please explain the content within the region . Specify the location of each mentioned object.", "boxes_value": [[67.562927264, 57.656372070399996, 336.70751951, 286.4478149632]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047013.jpg", "text": "In the provided image , please explain the content within the region . Specify the location of each mentioned object. For your reference, objects involved in this region include a keyboard, two mice, and two moniters.", "boxes_value": [[164.562927264, 95.6563720704, 433.70751951, 324.4478149632], [380.7263183535, 270.1877441536, 433.70751951, 310.231689472], [315.7229003805, 296.5148925952, 388.658691405, 324.4478149632], [233.91595462049997, 190.4431762944, 260.822631852, 203.0556640768], [164.562927264, 95.6563720704, 240.81158444849999, 224.3259887616], [264.63928225949996, 96.6775512576, 328.633666992, 202.5406494208]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047013_crop.jpg", "text": "In the provided image , please explain the content within the region . Specify the location of each mentioned object. For your reference, objects involved in this region include a keyboard, two mice, and two moniters.", "boxes_value": [[67.562927264, 57.656372070399996, 336.70751951, 286.4478149632], [283.7263183535, 232.18774415360002, 336.70751951, 272.231689472], [218.7229003805, 258.5148925952, 291.658691405, 286.4478149632], [136.91595462049997, 152.4431762944, 163.82263185199997, 165.0556640768], [67.562927264, 57.656372070399996, 143.81158444849999, 186.3259887616], [167.63928225949996, 58.6775512576, 231.63366699199997, 164.5406494208]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047016.jpg", "text": "Tell me what you see in the area within the context of the image . Give coordinates for the items you reference.", "boxes_value": [[190.889953621, 325.5037231616, 459.2098388466, 377.6591796736]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047016_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Give coordinates for the items you reference.", "boxes_value": [[67.88995362099999, 13.503723161600021, 336.2098388466, 65.65917967360002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047016.jpg", "text": "Tell me what you see in the area within the context of the image . Give coordinates for the items you reference. For your reference, objects involved in this region include a bench, three cars, and a van.", "boxes_value": [[190.889953621, 325.5037231616, 459.2098388466, 377.6591796736], [435.3342285415, 348.8393554432, 459.2098388466, 377.3574829056], [190.889953621, 329.0474243072, 264.436157215, 377.6591796736], [279.80120852339996, 325.520996096, 327.574890135, 360.0042114048], [334.407592766, 325.654113792, 374.12487793839995, 362.446472192], [368.7689208888, 325.5037231616, 387.5395507586, 346.7974243328]], "boxes_seq": [[0], [0], [1], [2, 3, 5], [4]]}, {"image_path": "objects365_v1_00047016_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Give coordinates for the items you reference. For your reference, objects involved in this region include a bench, three cars, and a van.", "boxes_value": [[67.88995362099999, 13.503723161600021, 336.2098388466, 65.65917967360002], [312.3342285415, 36.83935544320002, 336.2098388466, 65.35748290560002], [67.88995362099999, 17.047424307200004, 141.43615721499998, 65.65917967360002], [156.80120852339996, 13.520996095999976, 204.57489013499998, 48.0042114048], [211.407592766, 13.654113791999976, 251.12487793839995, 50.44647219199999], [245.76892088879998, 13.503723161600021, 264.5395507586, 34.797424332800006]], "boxes_seq": [[0], [0], [1], [2, 3, 5], [4]]}, {"image_path": "objects365_v1_00047017.jpg", "text": "Describe the bbox in the provided photo . Please point out the objects and their coordinates.", "boxes_value": [[177.010986321, 40.4500732416, 585.051635727, 387.8477172736]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047017_crop.jpg", "text": "Describe the bbox in the provided photo . Please point out the objects and their coordinates.", "boxes_value": [[102.01098632099999, 40.4500732416, 510, 387.8477172736]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047017.jpg", "text": "Describe the bbox in the provided photo . Please point out the objects and their coordinates. For your reference, objects involved in this region include three cabinets, 11 lamps, a mirror, a calculator, a telephone, and a tea pot.", "boxes_value": [[177.010986321, 40.4500732416, 585.051635727, 387.8477172736], [174.41198728199998, 172.9982909952, 274.0397338575, 228.4432983552], [261.9111938625, 132.2808838144, 456.83508300299997, 228.964416512], [452.50341797699997, 172.9982909952, 584.1853027335, 238.8392333824], [530.4729003795, 100.2267456, 546.933105459, 129.6818847744], [564.2597656215, 120.1522827264, 585.9179687789999, 138.345153792], [542.6015625225, 103.6920776192, 556.462768563, 131.414550784], [402.2563476555, 47.38073728, 419.6983642605, 75.3583373824], [413.7313842675, 55.1776733184, 429.97882081050005, 76.3004760576], [294.831665037, 61.2419433472, 316.06158448800005, 86.9782104576], [309.5592041295, 63.8409423872, 324.226867653, 88.8625488384], [206.466186537, 121.884948736, 225.52539059699998, 145.2758178816], [222.060058623, 124.3502807552, 237.2348022525, 147.8748168704], [181.34265134700001, 134.8798828032, 203.0008545045, 156.5380859392], [196.07019043949998, 134.013549824, 215.1294555735, 158.2707519488], [177.010986321, 40.4500732416, 585.051635727, 387.8477172736], [495.5484619215, 328.1500854272, 541.466430687, 341.6882323968], [333.7885741905, 308.1693725696, 369.806091291, 322.661560064], [237.231933588, 283.1287841792, 262.17138671099997, 314.9958496256]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], [15], [16], [17], [18]]}, {"image_path": "objects365_v1_00047017_crop.jpg", "text": "Describe the bbox in the provided photo . Please point out the objects and their coordinates. For your reference, objects involved in this region include three cabinets, 11 lamps, a mirror, a calculator, a telephone, and a tea pot.", "boxes_value": [[102.01098632099999, 40.4500732416, 510, 387.8477172736], [99.41198728199998, 172.9982909952, 199.03973385749998, 228.4432983552], [186.9111938625, 132.2808838144, 381.83508300299997, 228.964416512], [377.50341797699997, 172.9982909952, 509.18530273349995, 238.8392333824], [455.47290037949995, 100.2267456, 471.933105459, 129.6818847744], [489.25976562150004, 120.1522827264, 510, 138.345153792], [467.6015625225, 103.6920776192, 481.46276856300005, 131.414550784], [327.2563476555, 47.38073728, 344.6983642605, 75.3583373824], [338.7313842675, 55.1776733184, 354.97882081050005, 76.3004760576], [219.831665037, 61.2419433472, 241.06158448800005, 86.9782104576], [234.55920412950002, 63.8409423872, 249.226867653, 88.8625488384], [131.466186537, 121.884948736, 150.52539059699998, 145.2758178816], [147.060058623, 124.3502807552, 162.2348022525, 147.8748168704], [106.34265134700001, 134.8798828032, 128.0008545045, 156.5380859392], [121.07019043949998, 134.013549824, 140.1294555735, 158.2707519488], [102.01098632099999, 40.4500732416, 510, 387.8477172736], [420.5484619215, 328.1500854272, 466.46643068699996, 341.6882323968], [258.7885741905, 308.1693725696, 294.806091291, 322.661560064], [162.231933588, 283.1287841792, 187.17138671099997, 314.9958496256]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], [15], [16], [17], [18]]}, {"image_path": "objects365_v1_00047018.jpg", "text": "Describe what's happening within the coordinates of the given image . Please mention the objects and their locations.", "boxes_value": [[19.40100096, 102.46560299519999, 449.5079955968, 256.9907226624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047018_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Please mention the objects and their locations.", "boxes_value": [[19.40100096, 39.46560299519999, 449.5079955968, 193.9907226624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047018.jpg", "text": "Describe what's happening within the coordinates of the given image . Please mention the objects and their locations. For your reference, objects involved in this region include a cabinet, three storage boxes, and a hat.", "boxes_value": [[19.40100096, 102.46560299519999, 449.5079955968, 256.9907226624], [317.175720192, 143.5630493184, 449.5079955968, 256.9907226624], [323.2397460992, 202.7100219648, 401.1909179904, 248.7979126272], [298.7733764608, 109.39624020480001, 380.1384277504, 154.9151611392], [19.40100096, 142.9664306688, 69.4718017536, 185.0714111232], [208.0443630592, 102.46560299519999, 301.3346482176, 167.0158654464]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047018_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Please mention the objects and their locations. For your reference, objects involved in this region include a cabinet, three storage boxes, and a hat.", "boxes_value": [[19.40100096, 39.46560299519999, 449.5079955968, 193.9907226624], [317.175720192, 80.5630493184, 449.5079955968, 193.9907226624], [323.2397460992, 139.7100219648, 401.1909179904, 185.7979126272], [298.7733764608, 46.39624020480001, 380.1384277504, 91.9151611392], [19.40100096, 79.9664306688, 69.4718017536, 122.0714111232], [208.0443630592, 39.46560299519999, 301.3346482176, 104.0158654464]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047019.jpg", "text": "I need details about the area located within image . Give coordinates for the items you reference.", "boxes_value": [[567.520019528, 260.4802002944, 678.687255862, 367.287841792]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047019_crop.jpg", "text": "I need details about the area located within image . Give coordinates for the items you reference.", "boxes_value": [[28.520019528000034, 27.480200294399992, 139.68725586200003, 134.287841792]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047019.jpg", "text": "I need details about the area located within image . Give coordinates for the items you reference. For your reference, objects involved in this region include three people, and two hats.", "boxes_value": [[567.520019528, 260.4802002944, 678.687255862, 367.287841792], [606.377319317, 259.5587158016, 637.505126951, 356.6239624192], [643.564575192, 298.7351684608, 678.687255862, 367.287841792], [567.520019528, 286.818176256, 583.213256857, 334.6115112448], [642.862594913, 300.3648475136, 671.1273055409999, 322.9766160384], [606.746575743, 260.4802002944, 633.126972324, 282.4638641152]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047019_crop.jpg", "text": "I need details about the area located within image . Give coordinates for the items you reference. For your reference, objects involved in this region include three people, and two hats.", "boxes_value": [[28.520019528000034, 27.480200294399992, 139.68725586200003, 134.287841792], [67.377319317, 26.558715801599988, 98.50512695099997, 123.62396241919998], [104.564575192, 65.73516846080003, 139.68725586200003, 134.287841792], [28.520019528000034, 53.818176256000015, 44.21325685700003, 101.6115112448], [103.86259491299995, 67.36484751360001, 132.12730554099994, 89.97661603839998], [67.746575743, 27.480200294399992, 94.12697232400001, 49.46386411520001]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047020.jpg", "text": "Regarding the image , what's going on in the section ? Please mention the objects and their locations.", "boxes_value": [[453.821715578, 132.0295323648, 769.917187193, 331.5996663296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047020_crop.jpg", "text": "Regarding the image , what's going on in the section ? Please mention the objects and their locations.", "boxes_value": [[79.82171557800001, 50.02953236479999, 395.917187193, 249.59966632959998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047020.jpg", "text": "Regarding the image , what's going on in the section ? Please mention the objects and their locations. For your reference, objects involved in this region include three hats, a handbag, and a glasses.", "boxes_value": [[453.821715578, 132.0295323648, 769.917187193, 331.5996663296], [730.712986696, 278.5655054336, 769.917187193, 329.5557861376], [621.1111490430001, 270.2275723776, 667.445047808, 331.5996663296], [592.7029874149999, 132.0295323648, 632.783431665, 162.1755075072], [459.018944461, 207.6253832192, 492.92467547399997, 228.6617857024], [453.821715578, 143.7737145856, 541.92711881, 230.2323608576]], "boxes_seq": [[0], [0], [1, 3, 5], [2], [4]]}, {"image_path": "objects365_v1_00047020_crop.jpg", "text": "Regarding the image , what's going on in the section ? Please mention the objects and their locations. For your reference, objects involved in this region include three hats, a handbag, and a glasses.", "boxes_value": [[79.82171557800001, 50.02953236479999, 395.917187193, 249.59966632959998], [356.71298669600003, 196.56550543359998, 395.917187193, 247.55578613760002], [247.11114904300007, 188.22757237759998, 293.445047808, 249.59966632959998], [218.70298741499994, 50.02953236479999, 258.78343166499997, 80.1755075072], [85.01894446099999, 125.62538321919999, 118.92467547399997, 146.6617857024], [79.82171557800001, 61.773714585600004, 167.92711881000002, 148.2323608576]], "boxes_seq": [[0], [0], [1, 3, 5], [2], [4]]}, {"image_path": "objects365_v1_00047021.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Give coordinates for the items you reference.", "boxes_value": [[144.2159424, 136.0902710213, 469.2653198336, 200.47741702209998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047021_crop.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Give coordinates for the items you reference.", "boxes_value": [[82.21594239999999, 17.090271021299998, 407.2653198336, 81.47741702209998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047021.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Give coordinates for the items you reference. For your reference, objects involved in this region include three cabinets, six people, two handbags, and a coffee machine.", "boxes_value": [[144.2159424, 136.0902710213, 469.2653198336, 200.47741702209998], [128.6062622208, 147.8709716601, 198.4015502848, 234.7858276026], [165.9182129152, 153.13854980739998, 275.2202148352, 248.8326415791], [225.178344704, 155.3333739956, 410.8599853568, 312.4824218727], [371.0133666816, 141.524475078, 436.228637696, 200.47741702209998], [312.350097664, 136.0902710213, 343.2191161856, 192.90447994730002], [203.4057006592, 133.46600341159998, 254.6326294016, 281.7188720629], [193.3895263744, 136.6680907971, 226.3510131712, 270.3975830415], [144.6536254976, 140.218872085, 182.0691528192, 240.34924317019997], [135.4979248128, 137.97564696519999, 160.6342773248, 231.99700931069998], [144.2159424, 165.65130616989998, 168.3186645504, 190.3883056934], [195.0100097536, 187.1800536865, 210.137512192, 212.50976559589998], [451.246582016, 155.55102540069998, 469.2653198336, 193.3903198399]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6, 7, 8, 9], [10, 11], [12]]}, {"image_path": "objects365_v1_00047021_crop.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Give coordinates for the items you reference. For your reference, objects involved in this region include three cabinets, six people, two handbags, and a coffee machine.", "boxes_value": [[82.21594239999999, 17.090271021299998, 407.2653198336, 81.47741702209998], [66.6062622208, 28.870971660099997, 136.4015502848, 97], [103.9182129152, 34.13854980739998, 213.2202148352, 97], [163.178344704, 36.3333739956, 348.8599853568, 97], [309.0133666816, 22.524475077999995, 374.228637696, 81.47741702209998], [250.35009766399997, 17.090271021299998, 281.2191161856, 73.90447994730002], [141.4057006592, 14.466003411599985, 192.6326294016, 97], [131.3895263744, 17.668090797100007, 164.3510131712, 97], [82.65362549759999, 21.218872084999987, 120.06915281920001, 97], [73.4979248128, 18.975646965199985, 98.6342773248, 97], [82.21594239999999, 46.65130616989998, 106.3186645504, 71.38830569340001], [133.0100097536, 68.1800536865, 148.137512192, 93.50976559589998], [389.246582016, 36.551025400699984, 407.2653198336, 74.3903198399]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6, 7, 8, 9], [10, 11], [12]]}, {"image_path": "objects365_v1_00047022.jpg", "text": "Can you share some insights about the rectangular region in the image ? Please mention the objects and their locations.", "boxes_value": [[161.3713378614, 199.3971557376, 263.5373534784, 311.274719232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047022_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Please mention the objects and their locations.", "boxes_value": [[26.371337861400008, 28.397155737600002, 128.5373534784, 140.274719232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047022.jpg", "text": "Can you share some insights about the rectangular region in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a mirror, three cabinets, and a coffee machine.", "boxes_value": [[161.3713378614, 199.3971557376, 263.5373534784, 311.274719232], [136.2699584646, 132.9458618368, 279.4857177558, 345.2505492992], [161.3713378614, 199.3971557376, 209.15240481339998, 277.4783325184], [205.2677612076, 206.3895263744, 242.1718139912, 276.3129882624], [235.9563598478, 202.5048827904, 263.5373534784, 311.274719232], [230.4885864314, 286.164489728, 246.1125488662, 306.9410400256]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047022_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a mirror, three cabinets, and a coffee machine.", "boxes_value": [[26.371337861400008, 28.397155737600002, 128.5373534784, 140.274719232], [1.2699584645999948, 0, 144.4857177558, 168], [26.371337861400008, 28.397155737600002, 74.15240481339998, 106.47833251840001], [70.26776120759999, 35.38952637439999, 107.1718139912, 105.31298826239998], [100.9563598478, 31.50488279039999, 128.5373534784, 140.274719232], [95.4885864314, 115.16448972799998, 111.1125488662, 135.9410400256]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047026.jpg", "text": "Please provide details for the area within the bounding box in . Provide the coordinates for each element you describe.", "boxes_value": [[334.677612288, 51.303955087, 511.2774047744, 560.7965088110001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047026_crop.jpg", "text": "Please provide details for the area within the bounding box in . Provide the coordinates for each element you describe.", "boxes_value": [[44.67761228799998, 51.303955087, 221.2774047744, 560.7965088110001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047026.jpg", "text": "Please provide details for the area within the bounding box in . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a potted plant, two towels, a faucet, an oven, an extractor, and an induction cooker.", "boxes_value": [[334.677612288, 51.303955087, 511.2774047744, 560.7965088110001], [392.1192016384, 310.12170412300003, 452.339599616, 420.70812991900004], [330.638244608, 481.962646473, 378.5017090048, 600.213500964], [385.5404663296, 500.26330569600003, 413.6954345472, 560.7965088110001], [334.677612288, 356.425415038, 348.8960571392, 412.11437990800005], [336.9016723456, 437.729125988, 470.2485961728, 676.448120117], [369.9618530304, 51.303955087, 511.2761230336, 285.167480444], [365.2636718592, 421.186035193, 511.2774047744, 448.632324217]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00047026_crop.jpg", "text": "Please provide details for the area within the bounding box in . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a potted plant, two towels, a faucet, an oven, an extractor, and an induction cooker.", "boxes_value": [[44.67761228799998, 51.303955087, 221.2774047744, 560.7965088110001], [102.1192016384, 310.12170412300003, 162.339599616, 420.70812991900004], [40.63824460799998, 481.962646473, 88.50170900479998, 600.213500964], [95.54046632960001, 500.26330569600003, 123.69543454720002, 560.7965088110001], [44.67761228799998, 356.425415038, 58.896057139200025, 412.11437990800005], [46.90167234559999, 437.729125988, 180.24859617279998, 676.448120117], [79.9618530304, 51.303955087, 221.27612303360002, 285.167480444], [75.26367185919997, 421.186035193, 221.2774047744, 448.632324217]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00047027.jpg", "text": "Please describe the content within the area displayed in the image . Include the coordinates for each mentioned object.", "boxes_value": [[259.1497192521, 278.8456420864, 485.405517577, 383.1696166912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047027_crop.jpg", "text": "Please describe the content within the area displayed in the image . Include the coordinates for each mentioned object.", "boxes_value": [[57.1497192521, 26.845642086400005, 283.405517577, 131.16961669120002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047027.jpg", "text": "Please describe the content within the area displayed in the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a fan, a desk, a person, a cup, a moniter, and a mouse.", "boxes_value": [[259.1497192521, 278.8456420864, 485.405517577, 383.1696166912], [409.7597656311, 278.8456420864, 485.405517577, 355.7808227328], [0, 316.5293579264, 520.4073486202001, 510.1898803712], [158.3009033355, 61.0051879936, 681.8597411986, 511.23168947199997], [259.1497192521, 295.0632324096, 313.8563232265, 383.1696166912], [229.7886962823, 128.7084350464, 422.8112792919, 359.8162231296], [415.3983154333, 363.8750000128, 458.60302736, 379.9080200192]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047027_crop.jpg", "text": "Please describe the content within the area displayed in the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a fan, a desk, a person, a cup, a moniter, and a mouse.", "boxes_value": [[57.1497192521, 26.845642086400005, 283.405517577, 131.16961669120002], [207.75976563109998, 26.845642086400005, 283.405517577, 103.7808227328], [0, 64.5293579264, 318.4073486202001, 157], [0, 0, 339, 157], [57.1497192521, 43.063232409600005, 111.85632322650002, 131.16961669120002], [27.78869628230001, 0, 220.8112792919, 107.81622312960002], [213.3983154333, 111.8750000128, 256.60302736, 127.90802001920002]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047028.jpg", "text": "In the displayed image , help me understand the region defined by . Remember to mention the objects and their corresponding locations.", "boxes_value": [[111.93115232, 318.010864272, 568.7263183360001, 411.14807131199996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047028_crop.jpg", "text": "In the displayed image , help me understand the region defined by . Remember to mention the objects and their corresponding locations.", "boxes_value": [[111.93115232, 24.010864271999992, 568.7263183360001, 117.14807131199996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047028.jpg", "text": "In the displayed image , help me understand the region defined by . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a bowl, a plate, a desk, and two chairs.", "boxes_value": [[111.93115232, 318.010864272, 568.7263183360001, 411.14807131199996], [176.17156985600002, 331.10870361599996, 209.26477049599998, 358.341674784], [111.93115232, 376.89898680000005, 154.36187744, 387.506713872], [316.508117696, 318.010864272, 510.94042969599997, 404.349731424], [461.31262208000004, 318.010864272, 568.7263183360001, 411.14807131199996], [184.42620851200002, 358.14532468799996, 361.86273190400004, 479.641296384]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047028_crop.jpg", "text": "In the displayed image , help me understand the region defined by . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a bowl, a plate, a desk, and two chairs.", "boxes_value": [[111.93115232, 24.010864271999992, 568.7263183360001, 117.14807131199996], [176.17156985600002, 37.10870361599996, 209.26477049599998, 64.34167478400002], [111.93115232, 82.89898680000005, 154.36187744, 93.50671387199998], [316.508117696, 24.010864271999992, 510.94042969599997, 110.34973142400003], [461.31262208000004, 24.010864271999992, 568.7263183360001, 117.14807131199996], [184.42620851200002, 64.14532468799996, 361.86273190400004, 140]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047034.jpg", "text": "In the provided image , would you mind describing the selected area ? Please mention the objects and their locations.", "boxes_value": [[47.3952026112, 0.3104858624, 767.7565917696, 227.2081298944]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047034_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Please mention the objects and their locations.", "boxes_value": [[47.3952026112, 0.3104858624, 767.7565917696, 227.2081298944]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047034.jpg", "text": "In the provided image , would you mind describing the selected area ? Please mention the objects and their locations. For your reference, objects involved in this region include two flags, a person, a suv, and a car.", "boxes_value": [[47.3952026112, 0.3104858624, 767.7565917696, 227.2081298944], [354.62097169919997, 0.3104858624, 383.7329101824, 36.3538208256], [47.3952026112, 113.9572143616, 136.7716064256, 227.2081298944], [138.3377074944, 0.4484253184, 152.2158813696, 25.2308959744], [646.7795410176, 105.0292358144, 670.6107177984001, 121.0039672832], [751.4935302911999, 115.2485961728, 767.7565917696, 134.2221679616]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5]]}, {"image_path": "objects365_v1_00047034_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Please mention the objects and their locations. For your reference, objects involved in this region include two flags, a person, a suv, and a car.", "boxes_value": [[47.3952026112, 0.3104858624, 767.7565917696, 227.2081298944], [354.62097169919997, 0.3104858624, 383.7329101824, 36.3538208256], [47.3952026112, 113.9572143616, 136.7716064256, 227.2081298944], [138.3377074944, 0.4484253184, 152.2158813696, 25.2308959744], [646.7795410176, 105.0292358144, 670.6107177984001, 121.0039672832], [751.4935302911999, 115.2485961728, 767.7565917696, 134.2221679616]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5]]}, {"image_path": "objects365_v1_00047035.jpg", "text": "Please elucidate the area of the image . Include the coordinates for each object you identify.", "boxes_value": [[373.0817870882, 100.4960327168, 482.37634277440003, 177.380859392]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047035_crop.jpg", "text": "Please elucidate the area of the image . Include the coordinates for each object you identify.", "boxes_value": [[28.081787088199974, 19.496032716800002, 137.37634277440003, 96.38085939199999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047035.jpg", "text": "Please elucidate the area of the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a glasses, a hat, and four pictures.", "boxes_value": [[373.0817870882, 100.4960327168, 482.37634277440003, 177.380859392], [440.74768066769997, 163.341796864, 471.8856201376, 177.380859392], [434.705200226, 144.1987304448, 482.37634277440003, 167.9571533312], [362.5535888561, 91.0958862336, 400.9062500003, 123.0564575232], [373.0817870882, 125.6884765696, 403.9143066478, 150.12890624], [402.78637695260005, 100.4960327168, 445.2750244395, 137.7207031296], [453.1711425664, 114.4082641408, 482.1236572258, 152.0089111552]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047035_crop.jpg", "text": "Please elucidate the area of the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a glasses, a hat, and four pictures.", "boxes_value": [[28.081787088199974, 19.496032716800002, 137.37634277440003, 96.38085939199999], [95.74768066769997, 82.341796864, 126.88562013759997, 96.38085939199999], [89.70520022599999, 63.19873044479999, 137.37634277440003, 86.9571533312], [17.55358885610002, 10.095886233599998, 55.90625000030002, 42.056457523199995], [28.081787088199974, 44.6884765696, 58.914306647800004, 69.12890623999999], [57.78637695260005, 19.496032716800002, 100.27502443949999, 56.7207031296], [108.17114256640002, 33.4082641408, 137.1236572258, 71.0089111552]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047036.jpg", "text": "What information can you give me about the coordinates in image ? Please mention the objects and their locations.", "boxes_value": [[304.40173338330004, 266.5048827904, 422.8214111267, 311.521728512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047036_crop.jpg", "text": "What information can you give me about the coordinates in image ? Please mention the objects and their locations.", "boxes_value": [[30.40173338330004, 11.504882790400018, 148.8214111267, 56.52172851199998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047036.jpg", "text": "What information can you give me about the coordinates in image ? Please mention the objects and their locations. For your reference, objects involved in this region include four stools, and a desk.", "boxes_value": [[304.40173338330004, 266.5048827904, 422.8214111267, 311.521728512], [304.40173338330004, 266.5048827904, 329.648559601, 310.185913088], [340.8693847279, 269.2432861184, 365.9158935204, 311.521728512], [372.4544677441, 268.1142578176, 400.1794433708, 311.0541991936], [401.58203123740003, 266.9724121088, 422.8214111267, 309.6516113408], [263.3121338085, 242.2931518464, 465.73632809580005, 307.962524416]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047036_crop.jpg", "text": "What information can you give me about the coordinates in image ? Please mention the objects and their locations. For your reference, objects involved in this region include four stools, and a desk.", "boxes_value": [[30.40173338330004, 11.504882790400018, 148.8214111267, 56.52172851199998], [30.40173338330004, 11.504882790400018, 55.64855960099999, 55.18591308800001], [66.86938472790001, 14.243286118400022, 91.9158935204, 56.52172851199998], [98.45446774409999, 13.114257817599992, 126.17944337080002, 56.05419919360003], [127.58203123740003, 11.972412108799972, 148.8214111267, 54.6516113408], [0, 0, 178, 52.96252441600001]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047037.jpg", "text": "Can you elaborate on the content of the bounding box in ? Please point out the objects and their coordinates.", "boxes_value": [[9.8794555904, 1.8634643658, 255.5130615296, 472.36682131919997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047037_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Please point out the objects and their coordinates.", "boxes_value": [[9.8794555904, 1.8634643658, 255.5130615296, 472.36682131919997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047037.jpg", "text": "Can you elaborate on the content of the bounding box in ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, two gloves, a helmet, a sneakers, a boots, and a hockey stick.", "boxes_value": [[9.8794555904, 1.8634643658, 255.5130615296, 472.36682131919997], [9.8794555904, 1.8634643658, 255.5130615296, 472.36682131919997], [0, 0, 110.305908224, 360.0563964498], [202.5940668928, 407.60919792180005, 267.8362057216, 514.2102637266], [88.6538832384, 6.7914272412, 167.9605758464, 97.2288837072], [10.1149292032, 132.83501158800001, 74.4818390528, 199.8629788446], [65.8015646208, 337.7826851346, 98.7005600768, 354.54255071399996], [112.3567468544, 409.788033696, 168.8437012992, 460.06763043420005], [0.552062976, 315.6020508024, 202.5902099456, 496.8049316058]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4], [6], [7], [8]]}, {"image_path": "objects365_v1_00047037_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, two gloves, a helmet, a sneakers, a boots, and a hockey stick.", "boxes_value": [[9.8794555904, 1.8634643658, 255.5130615296, 472.36682131919997], [9.8794555904, 1.8634643658, 255.5130615296, 472.36682131919997], [0, 0, 110.305908224, 360.0563964498], [202.5940668928, 407.60919792180005, 267.8362057216, 514.2102637266], [88.6538832384, 6.7914272412, 167.9605758464, 97.2288837072], [10.1149292032, 132.83501158800001, 74.4818390528, 199.8629788446], [65.8015646208, 337.7826851346, 98.7005600768, 354.54255071399996], [112.3567468544, 409.788033696, 168.8437012992, 460.06763043420005], [0.552062976, 315.6020508024, 202.5902099456, 496.8049316058]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4], [6], [7], [8]]}, {"image_path": "objects365_v1_00047039.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[356.4708251742, 93.9501953024, 478.85192871119995, 258.756774912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047039_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[31.4708251742, 41.950195302400004, 153.85192871119995, 206.75677491200003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047039.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a flower, a person, a tie, and two wine glasses.", "boxes_value": [[356.4708251742, 93.9501953024, 478.85192871119995, 258.756774912], [423.3541259904, 80.619995136, 540.3463134333, 317.9208984576], [356.4708251742, 93.9501953024, 478.85192871119995, 258.756774912], [420.49719235919997, 165.1633911296, 443.4333496197, 229.3847656448], [405.3577880748, 217.4474487296, 428.7462157836, 272.9268798976], [428.2022704773, 212.0083008, 453.7663573938, 261.5046386688]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047039_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a flower, a person, a tie, and two wine glasses.", "boxes_value": [[31.4708251742, 41.950195302400004, 153.85192871119995, 206.75677491200003], [98.35412599040001, 28.619995136, 184, 247], [31.4708251742, 41.950195302400004, 153.85192871119995, 206.75677491200003], [95.49719235919997, 113.1633911296, 118.43334961969998, 177.3847656448], [80.3577880748, 165.4474487296, 103.74621578360001, 220.92687989759997], [103.20227047729998, 160.0083008, 128.76635739379998, 209.5046386688]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047040.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for all objects that you mention.", "boxes_value": [[184.3920288256, 429.2534790167, 446.2467651584, 559.5825195337]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047040_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for all objects that you mention.", "boxes_value": [[66.3920288256, 33.2534790167, 328.2467651584, 163]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047040.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a person, and four sneakers.", "boxes_value": [[184.3920288256, 429.2534790167, 446.2467651584, 559.5825195337], [332.2001342976, 26.4364013459, 510.1519164928, 558.9279784959], [184.3920288256, 429.2534790167, 220.8711547904, 518.6273193543], [198.9837036032, 496.7398681596, 258.2622070272, 558.298339846], [392.6393432576, 396.7666015885, 416.006408704, 474.4056396248], [417.5139770368, 527.9328613247, 446.2467651584, 559.5825195337]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047040_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a person, and four sneakers.", "boxes_value": [[66.3920288256, 33.2534790167, 328.2467651584, 163], [214.20013429760002, 0, 392.1519164928, 162.9279784959], [66.3920288256, 33.2534790167, 102.8711547904, 122.62731935429997], [80.98370360320001, 100.73986815960001, 140.26220702720002, 162.29833984599998], [274.6393432576, 0.7666015885000093, 298.006408704, 78.40563962480002], [299.5139770368, 131.93286132469996, 328.2467651584, 163]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047041.jpg", "text": "I'd like some information about the bounding box in the photo . Specify the location of each mentioned object.", "boxes_value": [[111.7357178112, 143.783752448, 167.57531735039998, 208.3409423872]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047041_crop.jpg", "text": "I'd like some information about the bounding box in the photo . Specify the location of each mentioned object.", "boxes_value": [[14.735717811200004, 16.783752448, 70.57531735039998, 81.34094238719999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047041.jpg", "text": "I'd like some information about the bounding box in the photo . Specify the location of each mentioned object. For your reference, objects involved in this region include a chair, a stool, a desk, a person, and two cars.", "boxes_value": [[111.7357178112, 143.783752448, 167.57531735039998, 208.3409423872], [111.7357178112, 143.783752448, 147.3128662272, 208.3409423872], [154.3811645184, 157.9203491328, 167.57531735039998, 201.2726440448], [88.61120609279999, 142.6683959808, 135.0264281856, 203.2202148352], [126.9464111616, 106.6660156416, 162.4974975744, 204.4313965056], [17.5054321152, 121.4656982528, 721.7364502272, 511.0108032], [37.294128384000004, 119.6057739264, 239.09851077119998, 186.9579467776]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047041_crop.jpg", "text": "I'd like some information about the bounding box in the photo . Specify the location of each mentioned object. For your reference, objects involved in this region include a chair, a stool, a desk, a person, and two cars.", "boxes_value": [[14.735717811200004, 16.783752448, 70.57531735039998, 81.34094238719999], [14.735717811200004, 16.783752448, 50.312866227200004, 81.34094238719999], [57.3811645184, 30.9203491328, 70.57531735039998, 74.27264404479999], [0, 15.6683959808, 38.0264281856, 76.22021483520001], [29.946411161599997, 0, 65.4974975744, 77.43139650559999], [0, 0, 84, 97], [0, 0, 84, 59.9579467776]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047043.jpg", "text": "What sort of things can be seen in the region of the photo ? Include the coordinates for each object you identify.", "boxes_value": [[426.9833984519, 173.7930297856, 531.9224853356, 332.7503051776]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047043_crop.jpg", "text": "What sort of things can be seen in the region of the photo ? Include the coordinates for each object you identify.", "boxes_value": [[26.983398451899973, 39.7930297856, 131.92248533559996, 198.7503051776]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047043.jpg", "text": "What sort of things can be seen in the region of the photo ? Include the coordinates for each object you identify. For your reference, objects involved in this region include three people, and two motorcycles.", "boxes_value": [[426.9833984519, 173.7930297856, 531.9224853356, 332.7503051776], [447.50439456280003, 162.1759643648, 461.11572268509997, 204.8874511872], [468.62548827029997, 161.7066039808, 485.05297852120003, 207.7036132864], [504.3920898493, 156.9863281152, 548.6170654002, 310.277343744], [426.9833984519, 199.8020630016, 531.9224853356, 332.7503051776], [446.46923826020003, 173.7930297856, 490.82995603140006, 208.2958984192]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047043_crop.jpg", "text": "What sort of things can be seen in the region of the photo ? Include the coordinates for each object you identify. For your reference, objects involved in this region include three people, and two motorcycles.", "boxes_value": [[26.983398451899973, 39.7930297856, 131.92248533559996, 198.7503051776], [47.50439456280003, 28.175964364799995, 61.11572268509997, 70.88745118720001], [68.62548827029997, 27.706603980799997, 85.05297852120003, 73.70361328640001], [104.39208984930002, 22.986328115199996, 148.61706540019998, 176.277343744], [26.983398451899973, 65.8020630016, 131.92248533559996, 198.7503051776], [46.46923826020003, 39.7930297856, 90.82995603140006, 74.2958984192]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047044.jpg", "text": "In the provided image , would you mind describing the selected area ? Give coordinates for the items you reference.", "boxes_value": [[0.1089477204, 412.3258667008, 189.2414550514, 511.958007808]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047044_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Give coordinates for the items you reference.", "boxes_value": [[0.1089477204, 25.32586670080002, 189.2414550514, 124.95800780799999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047044.jpg", "text": "In the provided image , would you mind describing the selected area ? Give coordinates for the items you reference. For your reference, objects involved in this region include four people, a backpack, and a train.", "boxes_value": [[0.1089477204, 412.3258667008, 189.2414550514, 511.958007808], [165.31024169, 418.5121459712, 189.2414550514, 497.81378176], [138.697631847, 419.0484619264, 170.8740844518, 500.6962280448], [66.7027588136, 421.4616699392, 98.47698972079999, 511.958007808], [0.1089477204, 412.3258667008, 18.7252807292, 511.2684936704], [175.5736084326, 427.6796874752, 192.44403073260003, 454.9831542784], [0, 342.2142333952, 448.860107415, 461.185180672]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047044_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Give coordinates for the items you reference. For your reference, objects involved in this region include four people, a backpack, and a train.", "boxes_value": [[0.1089477204, 25.32586670080002, 189.2414550514, 124.95800780799999], [165.31024169, 31.512145971200027, 189.2414550514, 110.81378175999998], [138.697631847, 32.04846192640002, 170.8740844518, 113.69622804480002], [66.7027588136, 34.46166993920002, 98.47698972079999, 124.95800780799999], [0.1089477204, 25.32586670080002, 18.7252807292, 124.2684936704], [175.5736084326, 40.67968747520001, 192.44403073260003, 67.98315427839998], [0, 0, 236, 74.185180672]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047047.jpg", "text": "What does the area look like in the context of the image ? Include the coordinates for each object you identify.", "boxes_value": [[109.544616704, 419.22131346000003, 492.3799438336, 556.1284179324]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047047_crop.jpg", "text": "What does the area look like in the context of the image ? Include the coordinates for each object you identify.", "boxes_value": [[96.544616704, 35.22131346000003, 479.3799438336, 172.1284179324]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047047.jpg", "text": "What does the area look like in the context of the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, and five sneakers.", "boxes_value": [[109.544616704, 419.22131346000003, 492.3799438336, 556.1284179324], [297.6782836736, 190.2901611394, 509.7679443456, 534.9357909956], [110.8936767488, 512.2825927928001, 156.088500992, 537.5782471012], [109.544616704, 531.8446045066, 157.7749023232, 556.1284179324], [319.7880248832, 492.71850583180003, 359.2137451008, 523.1838379158], [415.1266479616, 501.32043458379997, 442.7246704128, 534.6531982756], [463.4140624896, 419.22131346000003, 492.3799438336, 437.3907470552]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047047_crop.jpg", "text": "What does the area look like in the context of the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, and five sneakers.", "boxes_value": [[96.544616704, 35.22131346000003, 479.3799438336, 172.1284179324], [284.6782836736, 0, 496.7679443456, 150.93579099559997], [97.8936767488, 128.28259279280007, 143.088500992, 153.57824710119996], [96.544616704, 147.84460450660004, 144.7749023232, 172.1284179324], [306.7880248832, 108.71850583180003, 346.2137451008, 139.18383791580004], [402.1266479616, 117.32043458379997, 429.7246704128, 150.65319827559995], [450.4140624896, 35.22131346000003, 479.3799438336, 53.390747055199995]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047051.jpg", "text": "Can you discuss the entities within the region of image ? Give coordinates for the items you reference.", "boxes_value": [[5.729919424899999, 245.8077392384, 435.7897949372, 304.4373169152]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047051_crop.jpg", "text": "Can you discuss the entities within the region of image ? Give coordinates for the items you reference.", "boxes_value": [[5.729919424899999, 14.80773923839999, 435.7897949372, 73.43731691519997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047051.jpg", "text": "Can you discuss the entities within the region of image ? Give coordinates for the items you reference. For your reference, objects involved in this region include a person, two leather shoes, and two sneakers.", "boxes_value": [[5.729919424899999, 245.8077392384, 435.7897949372, 304.4373169152], [4.740112273299999, 183.5234985472, 273.219360318, 347.2422485504], [67.753845197, 266.1737670656, 98.6115112457, 304.4373169152], [5.729919424899999, 245.8077392384, 41.5247802655, 274.1967773184], [413.04699709429997, 271.2911376896, 435.7897949372, 292.7704467968], [368.8249512036, 267.7112427008, 401.4650879084, 281.8201904128]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047051_crop.jpg", "text": "Can you discuss the entities within the region of image ? Give coordinates for the items you reference. For your reference, objects involved in this region include a person, two leather shoes, and two sneakers.", "boxes_value": [[5.729919424899999, 14.80773923839999, 435.7897949372, 73.43731691519997], [4.740112273299999, 0, 273.219360318, 88], [67.753845197, 35.173767065599975, 98.6115112457, 73.43731691519997], [5.729919424899999, 14.80773923839999, 41.5247802655, 43.196777318399995], [413.04699709429997, 40.29113768960002, 435.7897949372, 61.7704467968], [368.8249512036, 36.71124270080003, 401.4650879084, 50.8201904128]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047053.jpg", "text": "Regarding the coordinates in image , can you provide a description? Remember to mention the objects and their corresponding locations.", "boxes_value": [[117.2894287104, 325.02142336, 404.60607912960006, 406.617248512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047053_crop.jpg", "text": "Regarding the coordinates in image , can you provide a description? Remember to mention the objects and their corresponding locations.", "boxes_value": [[72.2894287104, 21.02142335999997, 359.60607912960006, 102.617248512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047053.jpg", "text": "Regarding the coordinates in image , can you provide a description? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three cups, and two plates.", "boxes_value": [[117.2894287104, 325.02142336, 404.60607912960006, 406.617248512], [303.8090820096, 380.4558715904, 335.8275146496, 406.617248512], [321.5646972672, 331.3443603456, 346.43493649920003, 350.3132324352], [391.1171874816, 325.02142336, 404.60607912960006, 359.586914048], [117.2894287104, 369.386413568, 173.5582885632, 386.1738281472], [124.07354734079999, 387.4519653376, 152.8519286784, 400.4707641856]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047053_crop.jpg", "text": "Regarding the coordinates in image , can you provide a description? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three cups, and two plates.", "boxes_value": [[72.2894287104, 21.02142335999997, 359.60607912960006, 102.617248512], [258.8090820096, 76.45587159040002, 290.8275146496, 102.617248512], [276.5646972672, 27.344360345600023, 301.43493649920003, 46.31323243520001], [346.1171874816, 21.02142335999997, 359.60607912960006, 55.58691404799998], [72.2894287104, 65.38641356800002, 128.5582885632, 82.17382814720003], [79.07354734079999, 83.45196533759997, 107.85192867839999, 96.47076418559999]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047054.jpg", "text": "Please explain what is contained in the portion of defined by the box . Please mention the objects and their locations.", "boxes_value": [[0.30657961, 287.89056398639997, 385.92797852840005, 382.47424315999996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047054_crop.jpg", "text": "Please explain what is contained in the portion of defined by the box . Please mention the objects and their locations.", "boxes_value": [[0.30657961, 23.890563986399968, 385.92797852840005, 118.47424315999996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047054.jpg", "text": "Please explain what is contained in the portion of defined by the box . Please mention the objects and their locations. For your reference, objects involved in this region include a potted plant, a vase, and three chairs.", "boxes_value": [[0.30657961, 287.89056398639997, 385.92797852840005, 382.47424315999996], [0.30657961, 298.9768066616, 65.5472412464, 379.8752441224], [363.79260256960004, 287.89056398639997, 385.92797852840005, 305.1963501184], [72.7225341808, 293.4165039176, 258.7821655372, 355.2969970472], [41.782287608, 302.6149902312, 262.1270752212, 382.47424315999996], [29.238952638799997, 318.92132566960004, 280.9420776388, 455.225646964]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047054_crop.jpg", "text": "Please explain what is contained in the portion of defined by the box . Please mention the objects and their locations. For your reference, objects involved in this region include a potted plant, a vase, and three chairs.", "boxes_value": [[0.30657961, 23.890563986399968, 385.92797852840005, 118.47424315999996], [0.30657961, 34.976806661599994, 65.5472412464, 115.87524412239998], [363.79260256960004, 23.890563986399968, 385.92797852840005, 41.196350118400005], [72.7225341808, 29.416503917599982, 258.7821655372, 91.29699704720002], [41.782287608, 38.61499023120001, 262.1270752212, 118.47424315999996], [29.238952638799997, 54.92132566960004, 280.9420776388, 142]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047057.jpg", "text": "Regarding the coordinates in image , can you provide a description? Include the coordinates for each mentioned object.", "boxes_value": [[0.8894043075000001, 154.4825439232, 442.7442626716, 269.0308227584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047057_crop.jpg", "text": "Regarding the coordinates in image , can you provide a description? Include the coordinates for each mentioned object.", "boxes_value": [[0.8894043075000001, 29.482543923200012, 442.7442626716, 144.0308227584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047057.jpg", "text": "Regarding the coordinates in image , can you provide a description? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a storage box, a cup, a tricycle, two other fish, and a bakset.", "boxes_value": [[0.8894043075000001, 154.4825439232, 442.7442626716, 269.0308227584], [0.8894043075000001, 171.0181274624, 40.9880371342, 260.1261596672], [19.4741821594, 154.4825439232, 48.243652340699995, 168.990234368], [14.594482444099999, 133.98571776, 552.1488036919001, 448.8520507904], [432.2196044968, 222.783325184, 442.7442626716, 258.627014144], [408.4245605302, 202.5091552768, 447.2042236441, 213.7905273344], [2.9204101261, 193.9647826944, 135.59527585520001, 269.0308227584]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00047057_crop.jpg", "text": "Regarding the coordinates in image , can you provide a description? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a storage box, a cup, a tricycle, two other fish, and a bakset.", "boxes_value": [[0.8894043075000001, 29.482543923200012, 442.7442626716, 144.0308227584], [0.8894043075000001, 46.0181274624, 40.9880371342, 135.1261596672], [19.4741821594, 29.482543923200012, 48.243652340699995, 43.99023436799999], [14.594482444099999, 8.98571776, 552.1488036919001, 172], [432.2196044968, 97.783325184, 442.7442626716, 133.627014144], [408.4245605302, 77.50915527679999, 447.2042236441, 88.7905273344], [2.9204101261, 68.96478269439999, 135.59527585520001, 144.0308227584]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00047058.jpg", "text": "Please describe the section of the picture defined by the bbox . Please mention the objects and their locations.", "boxes_value": [[146.13146969509998, 288.7965698048, 484.81030274740004, 401.7994384896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047058_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Please mention the objects and their locations.", "boxes_value": [[85.13146969509998, 28.796569804800015, 423.81030274740004, 141.7994384896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047058.jpg", "text": "Please describe the section of the picture defined by the bbox . Please mention the objects and their locations. For your reference, objects involved in this region include two cabinets, two faucets, and two sinks.", "boxes_value": [[146.13146969509998, 288.7965698048, 484.81030274740004, 401.7994384896], [363.19409177650004, 317.9211425792, 484.81030274740004, 401.7994384896], [321.7823486257, 318.6740722688, 369.9705810487, 387.9446411264], [194.2469482751, 274.1527099392, 243.1992797766, 335.2385253888], [222.69787599880001, 288.7965698048, 235.2497558307, 315.1555175936], [146.13146969509998, 336.4937133568, 269.9765625126, 366.6181640704], [210.84527588170002, 324.710998528, 305.54022218430003, 341.9328613376]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00047058_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Please mention the objects and their locations. For your reference, objects involved in this region include two cabinets, two faucets, and two sinks.", "boxes_value": [[85.13146969509998, 28.796569804800015, 423.81030274740004, 141.7994384896], [302.19409177650004, 57.92114257920002, 423.81030274740004, 141.7994384896], [260.7823486257, 58.67407226879999, 308.9705810487, 127.94464112639997], [133.2469482751, 14.152709939200008, 182.1992797766, 75.23852538879999], [161.69787599880001, 28.796569804800015, 174.2497558307, 55.155517593599996], [85.13146969509998, 76.49371335680001, 208.97656251260003, 106.61816407039998], [149.84527588170002, 64.710998528, 244.54022218430003, 81.9328613376]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00047061.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Include the coordinates for each mentioned object.", "boxes_value": [[22.7046508544, 0, 488.262541056, 183.0362548992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047061_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Include the coordinates for each mentioned object.", "boxes_value": [[22.7046508544, 0, 488.262541056, 183.0362548992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047061.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, two hats, and a belt.", "boxes_value": [[22.7046508544, 0, 488.262541056, 183.0362548992], [22.7046508544, 0, 113.233154304, 183.0362548992], [418.6109008896, 66.6425170944, 512.44982912, 190.7269287168], [456.9358201344, 66.2666917632, 488.262541056, 89.3495387904], [121.712790784, 23.2221679872, 236.165147648, 87.8615184384], [67.6039095808, 25.5862295808, 115.2543334912, 43.8858031872]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047061_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, two hats, and a belt.", "boxes_value": [[22.7046508544, 0, 488.262541056, 183.0362548992], [22.7046508544, 0, 113.233154304, 183.0362548992], [418.6109008896, 66.6425170944, 512, 190.7269287168], [456.9358201344, 66.2666917632, 488.262541056, 89.3495387904], [121.712790784, 23.2221679872, 236.165147648, 87.8615184384], [67.6039095808, 25.5862295808, 115.2543334912, 43.8858031872]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047064.jpg", "text": "Describe the selected rectangular area in the photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[448.5261230592, 122.238342297, 512.29833984, 420.1267090032]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047064_crop.jpg", "text": "Describe the selected rectangular area in the photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[16.526123059200017, 75.238342297, 80, 373.1267090032]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047064.jpg", "text": "Describe the selected rectangular area in the photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a cabinet, a stool, a lamp, a desk, and three pictures.", "boxes_value": [[448.5261230592, 122.238342297, 512.29833984, 420.1267090032], [381.0388183552, 28.7056274544, 512.0034179584, 259.6431274533], [448.5261230592, 296.1331787079, 511.8609618944, 420.1267090032], [477.8911742976, 188.1142578261, 512.29833984, 284.4545288157], [456.0999145472, 280.631530773, 512.29833984, 300.8935546668], [444.6047973888, 129.4262695503, 479.6817627136, 163.92816160290002], [489.457275392, 122.238342297, 512.1710204928, 162.49060058130001], [448.34252928, 97.512023934, 511.8835449344, 162.49060058130001]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6, 7]]}, {"image_path": "objects365_v1_00047064_crop.jpg", "text": "Describe the selected rectangular area in the photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a cabinet, a stool, a lamp, a desk, and three pictures.", "boxes_value": [[16.526123059200017, 75.238342297, 80, 373.1267090032], [0, 0, 80, 212.6431274533], [16.526123059200017, 249.13317870790002, 79.86096189440002, 373.1267090032], [45.8911742976, 141.1142578261, 80, 237.45452881569997], [24.0999145472, 233.631530773, 80, 253.89355466680001], [12.604797388800023, 82.42626955029999, 47.681762713599994, 116.92816160290002], [57.457275391999985, 75.238342297, 80, 115.49060058130001], [16.342529280000008, 50.512023934, 79.88354493439999, 115.49060058130001]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6, 7]]}, {"image_path": "objects365_v1_00047065.jpg", "text": "Describe the bbox in the provided photo . Provide the coordinates for each element you describe.", "boxes_value": [[335.3612060475, 283.1249999872, 644.6907958725, 433.6385498112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047065_crop.jpg", "text": "Describe the bbox in the provided photo . Provide the coordinates for each element you describe.", "boxes_value": [[77.36120604749999, 38.1249999872, 386.69079587249996, 188.6385498112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047065.jpg", "text": "Describe the bbox in the provided photo . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a soccer, a person, and three sneakers.", "boxes_value": [[335.3612060475, 283.1249999872, 644.6907958725, 433.6385498112], [335.3612060475, 283.1249999872, 404.98217771400004, 353.1264648192], [403.5375976905, 33.7857055744, 654.271850565, 432.07141114880005], [433.7468262045, 405.2811889664, 471.94250485500004, 433.6385498112], [549.780517548, 374.3195800576, 597.5250244245, 424.6683349504], [612.861084, 311.8176269312, 644.6907958725, 385.3153076224]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047065_crop.jpg", "text": "Describe the bbox in the provided photo . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a soccer, a person, and three sneakers.", "boxes_value": [[77.36120604749999, 38.1249999872, 386.69079587249996, 188.6385498112], [77.36120604749999, 38.1249999872, 146.98217771400004, 108.12646481920001], [145.53759769049998, 0, 396.271850565, 187.07141114880005], [175.7468262045, 160.2811889664, 213.94250485500004, 188.6385498112], [291.78051754800003, 129.31958005759998, 339.5250244245, 179.66833495039998], [354.861084, 66.81762693119998, 386.69079587249996, 140.31530762239998]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047069.jpg", "text": "I'd like some information about the bounding box in the photo . Please mention the objects and their locations.", "boxes_value": [[51.6719970816, 168.601623558, 269.8723755008, 205.68579100699998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047069_crop.jpg", "text": "I'd like some information about the bounding box in the photo . Please mention the objects and their locations.", "boxes_value": [[51.6719970816, 9.601623558, 269.8723755008, 46.68579100699998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047069.jpg", "text": "I'd like some information about the bounding box in the photo . Please mention the objects and their locations. For your reference, objects involved in this region include five hats.", "boxes_value": [[51.6719970816, 168.601623558, 269.8723755008, 205.68579100699998], [237.504089344, 178.4621581985, 269.8723755008, 205.68579100699998], [206.8507690496, 168.601623558, 237.504089344, 194.11035158599998], [136.8274535936, 171.972351069, 183.1290283008, 195.5955810415], [51.6719970816, 179.4032592625, 77.4069213696, 193.3826904025], [88.8085937664, 183.80633545049997, 117.0336303616, 199.486938481]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047069_crop.jpg", "text": "I'd like some information about the bounding box in the photo . Please mention the objects and their locations. For your reference, objects involved in this region include five hats.", "boxes_value": [[51.6719970816, 9.601623558, 269.8723755008, 46.68579100699998], [237.504089344, 19.462158198499992, 269.8723755008, 46.68579100699998], [206.8507690496, 9.601623558, 237.504089344, 35.11035158599998], [136.8274535936, 12.972351069000013, 183.1290283008, 36.5955810415], [51.6719970816, 20.4032592625, 77.4069213696, 34.3826904025], [88.8085937664, 24.806335450499972, 117.0336303616, 40.48693848100001]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047070.jpg", "text": "What can I find in the bbox of the provided image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[1.9397582832, 21.9108886528, 771.1605224312999, 434.9636230656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047070_crop.jpg", "text": "What can I find in the bbox of the provided image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[1.9397582832, 21.9108886528, 771, 434.9636230656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047070.jpg", "text": "What can I find in the bbox of the provided image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three ties, two belts, and a fire truck.", "boxes_value": [[1.9397582832, 21.9108886528, 771.1605224312999, 434.9636230656], [87.2751465186, 207.1851196416, 105.0596313543, 232.3895874048], [416.71765137420005, 219.9729004032, 434.5538329935, 297.7877197312], [235.9025878911, 296.9552002048, 295.3541259594, 313.8182983168], [46.8363647508, 285.1871337984, 82.2797851677, 298.8843994112], [144.2333984187, 218.48571776, 162.79492188060001, 303.6881713664], [1.9397582832, 21.9108886528, 771.1605224312999, 434.9636230656]], "boxes_seq": [[0], [0], [1, 2, 5], [3, 4], [6]]}, {"image_path": "objects365_v1_00047070_crop.jpg", "text": "What can I find in the bbox of the provided image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three ties, two belts, and a fire truck.", "boxes_value": [[1.9397582832, 21.9108886528, 771, 434.9636230656], [87.2751465186, 207.1851196416, 105.0596313543, 232.3895874048], [416.71765137420005, 219.9729004032, 434.5538329935, 297.7877197312], [235.9025878911, 296.9552002048, 295.3541259594, 313.8182983168], [46.8363647508, 285.1871337984, 82.2797851677, 298.8843994112], [144.2333984187, 218.48571776, 162.79492188060001, 303.6881713664], [1.9397582832, 21.9108886528, 771, 434.9636230656]], "boxes_seq": [[0], [0], [1, 2, 5], [3, 4], [6]]}, {"image_path": "objects365_v1_00047072.jpg", "text": "Help me understand what's happening in the selected bounding box within . Please mention the objects and their locations.", "boxes_value": [[397.9676514048, 256.939636224, 496.12219238399996, 452.3072509952]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047072_crop.jpg", "text": "Help me understand what's happening in the selected bounding box within . Please mention the objects and their locations.", "boxes_value": [[24.967651404799994, 48.939636224000026, 123.12219238399996, 244.30725099519998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047072.jpg", "text": "Help me understand what's happening in the selected bounding box within . Please mention the objects and their locations. For your reference, objects involved in this region include two bottles, two wine glasses, and a cup.", "boxes_value": [[397.9676514048, 256.939636224, 496.12219238399996, 452.3072509952], [452.6364745728, 256.939636224, 468.80212400639994, 296.9109496832], [455.92956541440003, 363.507446272, 516.4300537344, 448.9696045056], [439.42944337919994, 317.579772928, 496.12219238399996, 390.3495483392], [399.6599121408, 290.9257202176, 441.96789550080007, 371.3109130752], [397.9676514048, 349.0757446144, 445.35253908479996, 452.3072509952]], "boxes_seq": [[0], [0], [1, 4], [2, 5], [3]]}, {"image_path": "objects365_v1_00047072_crop.jpg", "text": "Help me understand what's happening in the selected bounding box within . Please mention the objects and their locations. For your reference, objects involved in this region include two bottles, two wine glasses, and a cup.", "boxes_value": [[24.967651404799994, 48.939636224000026, 123.12219238399996, 244.30725099519998], [79.63647457280001, 48.939636224000026, 95.80212400639994, 88.91094968319999], [82.92956541440003, 155.50744627199998, 143.43005373439996, 240.9696045056], [66.42944337919994, 109.57977292800001, 123.12219238399996, 182.3495483392], [26.659912140799975, 82.92572021759997, 68.96789550080007, 163.3109130752], [24.967651404799994, 141.07574461439998, 72.35253908479996, 244.30725099519998]], "boxes_seq": [[0], [0], [1, 4], [2, 5], [3]]}, {"image_path": "objects365_v1_00047074.jpg", "text": "Please enlighten me about the region in the given photo . Include the coordinates for each mentioned object.", "boxes_value": [[209.8319701834, 324.7509155328, 390.1082763985, 424.8569335808]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047074_crop.jpg", "text": "Please enlighten me about the region in the given photo . Include the coordinates for each mentioned object.", "boxes_value": [[45.83197018339999, 25.75091553279998, 226.10827639849998, 125.85693358079999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047074.jpg", "text": "Please enlighten me about the region in the given photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a paddle, a hat, two sneakers, a cup, and two ice creams.", "boxes_value": [[209.8319701834, 324.7509155328, 390.1082763985, 424.8569335808], [254.9404296612, 261.9585571328, 359.0648193238, 376.6942749184], [350.1430663924, 340.2199707136, 406.1044921732, 360.97857664], [243.6561279584, 387.9173583872, 268.2209472799, 408.4188232192], [292.2316894765, 403.9860839936, 321.4139404064, 424.8569335808], [366.0799560358, 408.6939086848, 389.134887679, 431.7488403456], [209.8319701834, 324.7509155328, 240.54943846129999, 347.4113769472], [372.483520478, 405.3213500928, 390.1082763985, 424.456848128]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6, 7]]}, {"image_path": "objects365_v1_00047074_crop.jpg", "text": "Please enlighten me about the region in the given photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a paddle, a hat, two sneakers, a cup, and two ice creams.", "boxes_value": [[45.83197018339999, 25.75091553279998, 226.10827639849998, 125.85693358079999], [90.9404296612, 0, 195.0648193238, 77.69427491840003], [186.1430663924, 41.21997071359999, 242.10449217320001, 61.97857664000003], [79.65612795839999, 88.91735838720001, 104.22094727989997, 109.41882321920002], [128.23168947649998, 104.9860839936, 157.4139404064, 125.85693358079999], [202.07995603580002, 109.69390868480002, 225.13488767899997, 132.7488403456], [45.83197018339999, 25.75091553279998, 76.54943846129999, 48.41137694719998], [208.483520478, 106.3213500928, 226.10827639849998, 125.45684812799999]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6, 7]]}, {"image_path": "objects365_v1_00047077.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Specify the location of each mentioned object.", "boxes_value": [[56.340332051599994, 428.8386230272, 565.8835449283, 511.817016576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047077_crop.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Specify the location of each mentioned object.", "boxes_value": [[56.340332051599994, 20.838623027200015, 565.8835449283, 103.81701657600001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047077.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Specify the location of each mentioned object. For your reference, objects involved in this region include a person, two plates, a fork, and a bread.", "boxes_value": [[56.340332051599994, 428.8386230272, 565.8835449283, 511.817016576], [0.1478882142, 41.8794555904, 507.9659423991, 489.34564208639995], [468.1535644344, 426.9592285184, 615.5194091697999, 478.3933715968], [430.73754879530003, 428.8386230272, 565.8835449283, 464.127136256], [56.340332051599994, 483.5816039936, 220.68402099320002, 511.817016576], [74.5897216607, 473.6733398528, 188.36749269359998, 511.7514648576]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5]]}, {"image_path": "objects365_v1_00047077_crop.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Specify the location of each mentioned object. For your reference, objects involved in this region include a person, two plates, a fork, and a bread.", "boxes_value": [[56.340332051599994, 20.838623027200015, 565.8835449283, 103.81701657600001], [0.1478882142, 0, 507.9659423991, 81.34564208639995], [468.1535644344, 18.959228518399982, 615.5194091697999, 70.39337159680002], [430.73754879530003, 20.838623027200015, 565.8835449283, 56.12713625599997], [56.340332051599994, 75.58160399360003, 220.68402099320002, 103.81701657600001], [74.5897216607, 65.67333985279998, 188.36749269359998, 103.75146485760001]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5]]}, {"image_path": "objects365_v1_00047078.jpg", "text": "Can you share some insights about the rectangular region in the image ? Provide the coordinates for each element you describe.", "boxes_value": [[522.3752441088, 219.2877197312, 673.6186523136, 313.6774291968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047078_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Provide the coordinates for each element you describe.", "boxes_value": [[38.37524410879996, 24.287719731200013, 189.61865231360002, 118.6774291968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047078.jpg", "text": "Can you share some insights about the rectangular region in the image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include five people.", "boxes_value": [[522.3752441088, 219.2877197312, 673.6186523136, 313.6774291968], [522.3752441088, 256.7590331904, 560.0190429695999, 313.6774291968], [560.4934082304, 246.7982788096, 596.7788086272001, 310.8314819584], [623.5778808576, 238.0233764864, 640.4162597376001, 300.3964233216], [638.993286144, 243.0037231616, 652.2742920192001, 297.5505371136], [648.7169189376, 219.2877197312, 673.6186523136, 292.8073120256]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047078_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include five people.", "boxes_value": [[38.37524410879996, 24.287719731200013, 189.61865231360002, 118.6774291968], [38.37524410879996, 61.759033190399975, 76.01904296959992, 118.6774291968], [76.49340823039995, 51.79827880959999, 112.77880862720008, 115.83148195839999], [139.5778808576, 43.0233764864, 156.41625973760006, 105.39642332160003], [154.99328614399997, 48.00372316159999, 168.27429201920006, 102.5505371136], [164.71691893759998, 24.287719731200013, 189.61865231360002, 97.80731202560003]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047079.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give coordinates for the items you reference.", "boxes_value": [[27.3579032064, 77.4862670838, 295.0749751296, 311.0724487618]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047079_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give coordinates for the items you reference.", "boxes_value": [[27.3579032064, 58.4862670838, 295.0749751296, 292.0724487618]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047079.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give coordinates for the items you reference. For your reference, objects involved in this region include a flag, and four hats.", "boxes_value": [[27.3579032064, 77.4862670838, 295.0749751296, 311.0724487618], [93.7189330944, 77.4862670838, 207.249633792, 311.0724487618], [212.3577897984, 213.8640859544, 295.0749751296, 253.24063745279997], [27.3579032064, 84.32820913389999, 49.9308004864, 102.7969433051], [53.0887579136, 120.0442418027, 71.9585801728, 136.4218233312], [73.7387521024, 119.33217298119999, 97.9490900992, 135.3537202014]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047079_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give coordinates for the items you reference. For your reference, objects involved in this region include a flag, and four hats.", "boxes_value": [[27.3579032064, 58.4862670838, 295.0749751296, 292.0724487618], [93.7189330944, 58.4862670838, 207.249633792, 292.0724487618], [212.3577897984, 194.8640859544, 295.0749751296, 234.24063745279997], [27.3579032064, 65.32820913389999, 49.9308004864, 83.7969433051], [53.0887579136, 101.0442418027, 71.9585801728, 117.42182333119999], [73.7387521024, 100.33217298119999, 97.9490900992, 116.3537202014]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047080.jpg", "text": "For the image , can you assess and describe what's happening at ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[404.597534172, 109.9633788928, 753.769165013, 511.8232421888]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047080_crop.jpg", "text": "For the image , can you assess and describe what's happening at ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[87.597534172, 100.9633788928, 436.769165013, 502.8232421888]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047080.jpg", "text": "For the image , can you assess and describe what's happening at ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, a bracelet, a barrel, and a glasses.", "boxes_value": [[404.597534172, 109.9633788928, 753.769165013, 511.8232421888], [317.0560302614, 47.4450073088, 481.8703612966, 510.75646970879995], [404.597534172, 109.9633788928, 753.769165013, 511.8232421888], [500.8334960722, 481.0263671808, 536.425170876, 510.2433471488], [686.652709925, 183.9046020608, 708.3990478766, 221.7977905152], [474.66681479560003, 194.8364460032, 589.237408343, 218.6270769152]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047080_crop.jpg", "text": "For the image , can you assess and describe what's happening at ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, a bracelet, a barrel, and a glasses.", "boxes_value": [[87.597534172, 100.9633788928, 436.769165013, 502.8232421888], [0.05603026139999656, 38.4450073088, 164.87036129659998, 501.75646970879995], [87.597534172, 100.9633788928, 436.769165013, 502.8232421888], [183.83349607219998, 472.0263671808, 219.42517087600004, 501.2433471488], [369.65270992499995, 174.9046020608, 391.3990478766, 212.7977905152], [157.66681479560003, 185.8364460032, 272.23740834299997, 209.6270769152]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047082.jpg", "text": "Could you give me a description of the rectangular region found in ? Provide the coordinates for each element you describe.", "boxes_value": [[0, 33.0272827392, 135.0578613366, 255.5865478656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047082_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Provide the coordinates for each element you describe.", "boxes_value": [[0, 33.0272827392, 135.0578613366, 255.5865478656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047082.jpg", "text": "Could you give me a description of the rectangular region found in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include four street lights, and a boat.", "boxes_value": [[0, 33.0272827392, 135.0578613366, 255.5865478656], [85.02917481, 41.145080576, 99.7090454214, 145.396911616], [9.363342321000001, 82.3353271296, 21.68487549, 109.6425170944], [115.8565063338, 67.8016357376, 126.9887084712, 143.1580200448], [67.04614255679999, 91.4932251136, 76.4656982532, 142.5871582208], [0, 33.0272827392, 135.0578613366, 255.5865478656]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047082_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include four street lights, and a boat.", "boxes_value": [[0, 33.0272827392, 135.0578613366, 255.5865478656], [85.02917481, 41.145080576, 99.7090454214, 145.396911616], [9.363342321000001, 82.3353271296, 21.68487549, 109.6425170944], [115.8565063338, 67.8016357376, 126.9887084712, 143.1580200448], [67.04614255679999, 91.4932251136, 76.4656982532, 142.5871582208], [0, 33.0272827392, 135.0578613366, 255.5865478656]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047084.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each mentioned object.", "boxes_value": [[191.5145873664, 163.9974365184, 393.2791748352, 290.9133300736]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047084_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each mentioned object.", "boxes_value": [[50.51458736640001, 31.997436518400008, 252.27917483520002, 158.91333007359998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047084.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include three pictures, a stool, a person, and a speaker.", "boxes_value": [[191.5145873664, 163.9974365184, 393.2791748352, 290.9133300736], [191.5145873664, 163.9974365184, 210.2693481216, 259.5292358144], [253.81304931840003, 206.5155029504, 270.39086914560005, 226.5380859392], [273.95861813759996, 221.1556396544, 291.0593261568, 241.2397460992], [372.6916503552, 262.2916870144, 393.2791748352, 290.9133300736], [341.9536132608, 223.7633667072, 375.3513183744, 315.297912576], [316.3602295296, 222.4274292224, 330.4927978752, 246.0590820352]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047084_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include three pictures, a stool, a person, and a speaker.", "boxes_value": [[50.51458736640001, 31.997436518400008, 252.27917483520002, 158.91333007359998], [50.51458736640001, 31.997436518400008, 69.26934812159999, 127.52923581440001], [112.81304931840003, 74.51550295039999, 129.39086914560005, 94.53808593919999], [132.95861813759996, 89.1556396544, 150.0593261568, 109.2397460992], [231.6916503552, 130.2916870144, 252.27917483520002, 158.91333007359998], [200.9536132608, 91.76336670719999, 234.35131837440002, 183.297912576], [175.36022952960002, 90.42742922240001, 189.49279787519998, 114.05908203519999]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047087.jpg", "text": "What insights can you provide about the area in the selected picture ? Please mention the objects and their locations.", "boxes_value": [[465.2921142473, 232.96063232, 747.9522705121001, 512.3929443328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047087_crop.jpg", "text": "What insights can you provide about the area in the selected picture ? Please mention the objects and their locations.", "boxes_value": [[71.29211424729999, 69.96063232, 353.9522705121001, 349]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047087.jpg", "text": "What insights can you provide about the area in the selected picture ? Please mention the objects and their locations. For your reference, objects involved in this region include two towels, and three people.", "boxes_value": [[465.2921142473, 232.96063232, 747.9522705121001, 512.3929443328], [335.4995116966, 407.1454467584, 590.0144043346, 446.6299438592], [572.4064941252, 425.9539794944, 747.9522705121001, 512.3929443328], [527.6239013306999, 272.8819579904, 743.9587402463, 468.8356933632], [552.2856445544, 239.7446288896, 682.9025879099, 385.3400879104], [465.2921142473, 232.96063232, 570.1834716604, 371.7720336896]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047087_crop.jpg", "text": "What insights can you provide about the area in the selected picture ? Please mention the objects and their locations. For your reference, objects involved in this region include two towels, and three people.", "boxes_value": [[71.29211424729999, 69.96063232, 353.9522705121001, 349], [0, 244.1454467584, 196.01440433460004, 283.6299438592], [178.40649412519997, 262.9539794944, 353.9522705121001, 349], [133.62390133069994, 109.8819579904, 349.95874024629995, 305.8356933632], [158.28564455440005, 76.7446288896, 288.90258790990003, 222.34008791039997], [71.29211424729999, 69.96063232, 176.18347166039996, 208.7720336896]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047088.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Provide the coordinates for each element you describe.", "boxes_value": [[0.6396484608, 435.04357911799997, 210.711425792, 566.0164794944]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047088_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Provide the coordinates for each element you describe.", "boxes_value": [[0.6396484608, 33.04357911799997, 210.711425792, 164.01647949439996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047088.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a desk, and four cups.", "boxes_value": [[0.6396484608, 435.04357911799997, 210.711425792, 566.0164794944], [0.6396484608, 448.11914063300003, 210.711425792, 566.0164794944], [113.4770507776, 434.14013669599996, 134.2550659072, 457.62841798], [137.4169922048, 433.6884765694, 158.6467284992, 458.98352052859997], [89.988769536, 435.04357911799997, 110.3151245312, 458.0800781066], [64.6937255936, 435.04357911799997, 86.375183104, 458.0800781066]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047088_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a desk, and four cups.", "boxes_value": [[0.6396484608, 33.04357911799997, 210.711425792, 164.01647949439996], [0.6396484608, 46.11914063300003, 210.711425792, 164.01647949439996], [113.4770507776, 32.140136695999956, 134.2550659072, 55.628417979999995], [137.4169922048, 31.688476569399995, 158.6467284992, 56.98352052859997], [89.988769536, 33.04357911799997, 110.3151245312, 56.08007810660001], [64.6937255936, 33.04357911799997, 86.375183104, 56.08007810660001]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047091.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Include the coordinates for each object you identify.", "boxes_value": [[0, 271.3259277312, 109.1755981824, 358.244812032]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047091_crop.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Include the coordinates for each object you identify.", "boxes_value": [[0, 22.32592773120001, 109.1755981824, 109.24481203200003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047091.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a bed, and five pillows.", "boxes_value": [[0, 271.3259277312, 109.1755981824, 358.244812032], [0.2232055296, 222.6667480576, 215.9924926464, 441.585021952], [7.406555212800001, 271.3259277312, 103.30468746240001, 323.4053955072], [0.2232055296, 257.3182983168, 107.9738769408, 307.9611206144], [0.2232055296, 297.1860961792, 37.5767822592, 333.8212890624], [0, 304.728637696, 88.21960450559999, 358.244812032], [34.0579834368, 329.9371948032, 109.1755981824, 352.1030273536]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047091_crop.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a bed, and five pillows.", "boxes_value": [[0, 22.32592773120001, 109.1755981824, 109.24481203200003], [0.2232055296, 0, 136, 130], [7.406555212800001, 22.32592773120001, 103.30468746240001, 74.40539550720001], [0.2232055296, 8.318298316799996, 107.9738769408, 58.96112061439999], [0.2232055296, 48.18609617919998, 37.5767822592, 84.82128906240001], [0, 55.72863769600002, 88.21960450559999, 109.24481203200003], [34.0579834368, 80.93719480319999, 109.1755981824, 103.1030273536]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047092.jpg", "text": "Please enlighten me about the area in the photograph . Provide the coordinates for all objects that you mention.", "boxes_value": [[79.1109619396, 239.5878906368, 537.4766845839999, 419.0626830848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047092_crop.jpg", "text": "Please enlighten me about the area in the photograph . Provide the coordinates for all objects that you mention.", "boxes_value": [[79.1109619396, 45.58789063680001, 537.4766845839999, 225.06268308480003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047092.jpg", "text": "Please enlighten me about the area in the photograph . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three desks, and five chairs.", "boxes_value": [[79.1109619396, 239.5878906368, 537.4766845839999, 419.0626830848], [389.82177735360005, 250.2110595584, 429.5913085632, 258.6726684672], [486.70703121559995, 252.3264160256, 537.4766845839999, 262.9034423808], [79.1109619396, 239.5878906368, 213.3913574088, 271.5593871872], [176.8882446632, 320.8211670016, 260.4739989932, 421.5189819392], [253.8924560448, 344.5147704832, 356.564697254, 462.9827270656], [346.0341796844, 329.3771972608, 404.60998531919995, 437.3146972672], [397.51135252480003, 320.9035034112, 446.026733392, 419.0626830848], [448.2833252128, 353.0590820352, 557.1610107335999, 470.3987426816]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6, 7, 8]]}, {"image_path": "objects365_v1_00047092_crop.jpg", "text": "Please enlighten me about the area in the photograph . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three desks, and five chairs.", "boxes_value": [[79.1109619396, 45.58789063680001, 537.4766845839999, 225.06268308480003], [389.82177735360005, 56.211059558399995, 429.5913085632, 64.67266846720003], [486.70703121559995, 58.3264160256, 537.4766845839999, 68.90344238080002], [79.1109619396, 45.58789063680001, 213.3913574088, 77.5593871872], [176.8882446632, 126.82116700159997, 260.4739989932, 227.5189819392], [253.8924560448, 150.51477048319998, 356.564697254, 268.9827270656], [346.0341796844, 135.37719726080002, 404.60998531919995, 243.31469726720002], [397.51135252480003, 126.9035034112, 446.026733392, 225.06268308480003], [448.2833252128, 159.0590820352, 557.1610107335999, 269]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6, 7, 8]]}, {"image_path": "objects365_v1_00047095.jpg", "text": "Kindly give an overview of the section in photo . Remember to mention the objects and their corresponding locations.", "boxes_value": [[147.948303256, 254.943603537, 271.182373032, 358.031127945]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047095_crop.jpg", "text": "Kindly give an overview of the section in photo . Remember to mention the objects and their corresponding locations.", "boxes_value": [[30.948303256000003, 25.943603537, 154.182373032, 129.03112794499998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047095.jpg", "text": "Kindly give an overview of the section in photo . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four people, and a desk.", "boxes_value": [[147.948303256, 254.943603537, 271.182373032, 358.031127945], [242.92486572800001, 254.943603537, 271.182373032, 343.37908937099996], [234.02899169199998, 271.165527324, 246.326232936, 297.591491688], [175.94415281599998, 264.624389673, 204.201660188, 344.425659198], [147.948303256, 264.362792976, 178.037292504, 358.031127945], [187.081604012, 282.14794923, 336.9591675, 344.596923828]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047095_crop.jpg", "text": "Kindly give an overview of the section in photo . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four people, and a desk.", "boxes_value": [[30.948303256000003, 25.943603537, 154.182373032, 129.03112794499998], [125.92486572800001, 25.943603537, 154.182373032, 114.37908937099996], [117.02899169199998, 42.16552732399998, 129.326232936, 68.59149168800002], [58.944152815999985, 35.624389672999996, 87.201660188, 115.425659198], [30.948303256000003, 35.36279297599998, 61.03729250399999, 129.03112794499998], [70.08160401200001, 53.147949229999995, 184, 115.596923828]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047096.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for all objects that you mention.", "boxes_value": [[213.4707031571, 266.1296996864, 682.0515136654, 511.9894409216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047096_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for all objects that you mention.", "boxes_value": [[117.47070315709999, 62.12969968639999, 586.0515136654, 307.9894409216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047096.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four chairs, two desks, and a lamp.", "boxes_value": [[213.4707031571, 266.1296996864, 682.0515136654, 511.9894409216], [248.9221191552, 285.2189330944, 347.7769775637, 479.519836416], [325.9606933307, 274.992553728, 400.954101595, 450.204284672], [338.2323608699, 256.5850829824, 390.0458984183, 362.9392700416], [213.4707031571, 266.1296996864, 260.5119628887, 325.4426269696], [154.8395385635, 304.308105472, 409.8168945514, 502.6995849728], [612.8552245813, 320.3687744, 682.0515136654, 511.9894409216], [571.0214843482, 474.8420410368, 656.5804443195, 511.8256225792]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 7], [6]]}, {"image_path": "objects365_v1_00047096_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four chairs, two desks, and a lamp.", "boxes_value": [[117.47070315709999, 62.12969968639999, 586.0515136654, 307.9894409216], [152.9221191552, 81.21893309439997, 251.77697756369997, 275.519836416], [229.9606933307, 70.99255372800002, 304.954101595, 246.20428467199997], [242.2323608699, 52.585082982400024, 294.0458984183, 158.93927004160003], [117.47070315709999, 62.12969968639999, 164.5119628887, 121.44262696959998], [58.839538563500014, 100.30810547200002, 313.8168945514, 298.6995849728], [516.8552245813, 116.3687744, 586.0515136654, 307.9894409216], [475.02148434820003, 270.8420410368, 560.5804443195, 307.8256225792]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 7], [6]]}, {"image_path": "objects365_v1_00047099.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Specify the location of each mentioned object.", "boxes_value": [[163.68041992370001, 299.3875122176, 503.913818348, 373.0857544192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047099_crop.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Specify the location of each mentioned object.", "boxes_value": [[85.68041992370001, 19.387512217599976, 425.913818348, 93.08575441919999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047099.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Specify the location of each mentioned object. For your reference, objects involved in this region include two sinks, a wine glass, a cup, a bowl, and a bottle.", "boxes_value": [[163.68041992370001, 299.3875122176, 503.913818348, 373.0857544192], [214.1231078989, 326.41644288, 351.1600342001, 367.0698852352], [302.4298705778, 316.4550171136, 466.65881347040005, 358.4545898496], [163.68041992370001, 299.759704576, 197.8480834648, 373.0857544192], [462.7374267522, 299.3875122176, 503.913818348, 335.4544677888], [407.1342773638, 308.4042358272, 431.4794921734, 323.4321289216], [185.32910155730002, 234.5341796864, 216.30413815080001, 364.5354003968]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047099_crop.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Specify the location of each mentioned object. For your reference, objects involved in this region include two sinks, a wine glass, a cup, a bowl, and a bottle.", "boxes_value": [[85.68041992370001, 19.387512217599976, 425.913818348, 93.08575441919999], [136.1231078989, 46.41644287999998, 273.1600342001, 87.06988523519999], [224.4298705778, 36.45501711359998, 388.65881347040005, 78.45458984959998], [85.68041992370001, 19.75970457599999, 119.8480834648, 93.08575441919999], [384.7374267522, 19.387512217599976, 425.913818348, 55.4544677888], [329.1342773638, 28.40423582720001, 353.4794921734, 43.4321289216], [107.32910155730002, 0, 138.30413815080001, 84.53540039680001]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047108.jpg", "text": "Please help me understand the content present within the rectangle in . Include the coordinates for each mentioned object.", "boxes_value": [[166.3701171712, 76.234497058, 306.0733032448, 310.8471679735]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047108_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Include the coordinates for each mentioned object.", "boxes_value": [[35.37011717120001, 59.234497058, 175.0733032448, 293.8471679735]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047108.jpg", "text": "Please help me understand the content present within the rectangle in . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two pictures, a person, a leather shoes, and a stroller.", "boxes_value": [[166.3701171712, 76.234497058, 306.0733032448, 310.8471679735], [224.5283203072, 84.6631470065, 249.8428344832, 127.9954223651], [254.8394775552, 91.56945798180001, 277.2850952192, 137.61175535659999], [264.5363769344, 76.234497058, 306.0733032448, 220.0640258991], [166.3701171712, 286.9697876056, 203.1412353536, 310.8471679735], [184.0980224512, 144.6002197575, 265.7645874176, 232.25567630010002]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047108_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two pictures, a person, a leather shoes, and a stroller.", "boxes_value": [[35.37011717120001, 59.234497058, 175.0733032448, 293.8471679735], [93.5283203072, 67.6631470065, 118.8428344832, 110.9954223651], [123.8394775552, 74.56945798180001, 146.2850952192, 120.61175535659999], [133.53637693439998, 59.234497058, 175.0733032448, 203.0640258991], [35.37011717120001, 269.9697876056, 72.1412353536, 293.8471679735], [53.098022451199995, 127.60021975750001, 134.7645874176, 215.25567630010002]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047109.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Give coordinates for the items you reference.", "boxes_value": [[268.405639656, 111.3123168768, 533.260375994, 489.74023439359996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047109_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Give coordinates for the items you reference.", "boxes_value": [[66.405639656, 95.3123168768, 331.260375994, 473.74023439359996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047109.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Give coordinates for the items you reference. For your reference, objects involved in this region include a bed, a pillow, a lamp, a nightstand, and a carpet.", "boxes_value": [[268.405639656, 111.3123168768, 533.260375994, 489.74023439359996], [268.405639656, 111.3123168768, 533.260375994, 489.74023439359996], [285.42163087200004, 278.4982909952, 408.665039098, 312.9674072064], [424.338134764, 263.9946289152, 454.031005852, 307.612426752], [416.556518526, 302.9024658432, 456.488281238, 318.6704101376], [186.73004149599998, 398.6557006848, 642.147949182, 510.89135744]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047109_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Give coordinates for the items you reference. For your reference, objects involved in this region include a bed, a pillow, a lamp, a nightstand, and a carpet.", "boxes_value": [[66.405639656, 95.3123168768, 331.260375994, 473.74023439359996], [66.405639656, 95.3123168768, 331.260375994, 473.74023439359996], [83.42163087200004, 262.4982909952, 206.66503909800002, 296.9674072064], [222.33813476400002, 247.9946289152, 252.03100585200002, 291.612426752], [214.556518526, 286.9024658432, 254.488281238, 302.6704101376], [0, 382.6557006848, 397, 494.89135744]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047112.jpg", "text": "Please describe the content within the area displayed in the image . Please mention the objects and their locations.", "boxes_value": [[264.100924065, 146.045776384, 350.153284434, 459.1499352064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047112_crop.jpg", "text": "Please describe the content within the area displayed in the image . Please mention the objects and their locations.", "boxes_value": [[22.100924065000015, 79.04577638399999, 108.153284434, 392.1499352064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047112.jpg", "text": "Please describe the content within the area displayed in the image . Please mention the objects and their locations. For your reference, objects involved in this region include two people, two ties, and two leather shoes.", "boxes_value": [[264.100924065, 146.045776384, 350.153284434, 459.1499352064], [276.59704589849997, 84.2725830144, 403.154052735, 459.50616453119994], [189.35528566349998, 77.5595703296, 306.5261840655, 476.24487306239996], [330.0826174845, 146.045776384, 344.4826049745, 187.8595713536], [264.100924065, 158.0999145472, 301.755371067, 272.5447368704], [323.87943802200004, 424.859917312, 349.89862991999996, 438.1308580352], [287.7528992055, 434.0289534976, 350.153284434, 459.1499352064]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00047112_crop.jpg", "text": "Please describe the content within the area displayed in the image . Please mention the objects and their locations. For your reference, objects involved in this region include two people, two ties, and two leather shoes.", "boxes_value": [[22.100924065000015, 79.04577638399999, 108.153284434, 392.1499352064], [34.59704589849997, 17.2725830144, 129, 392.50616453119994], [0, 10.559570329600007, 64.52618406549999, 409.24487306239996], [88.08261748450002, 79.04577638399999, 102.48260497450002, 120.85957135359999], [22.100924065000015, 91.0999145472, 59.755371067, 205.54473687040002], [81.87943802200004, 357.859917312, 107.89862991999996, 371.1308580352], [45.7528992055, 367.0289534976, 108.153284434, 392.1499352064]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00047114.jpg", "text": "Please interpret and describe the area inside the given picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[248.8858032128, 13.5568847616, 512.2535400448, 418.67614748159997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047114_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[65.8858032128, 13.5568847616, 329, 418.67614748159997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047114.jpg", "text": "Please interpret and describe the area inside the given picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, a bracelet, two handbags, and a tripod.", "boxes_value": [[248.8858032128, 13.5568847616, 512.2535400448, 418.67614748159997], [134.0280151552, 33.8515624704, 407.0826416128, 714.2740478208], [286.7145996288, 390.67810060799997, 318.127014144, 418.67614748159997], [390.7592773632, 56.1773681664, 415.3039550976, 113.1905517312], [487.5755615232, 116.65502929920001, 512.2535400448, 142.8094482432], [248.8858032128, 13.5568847616, 301.8969726464, 87.772521984]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047114_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, a bracelet, two handbags, and a tripod.", "boxes_value": [[65.8858032128, 13.5568847616, 329, 418.67614748159997], [0, 33.8515624704, 224.0826416128, 519], [103.71459962879999, 390.67810060799997, 135.127014144, 418.67614748159997], [207.7592773632, 56.1773681664, 232.3039550976, 113.1905517312], [304.5755615232, 116.65502929920001, 329, 142.8094482432], [65.8858032128, 13.5568847616, 118.8969726464, 87.772521984]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047117.jpg", "text": "Detail the chosen region in the depicted scene . Please mention the objects and their locations.", "boxes_value": [[127.5243084989, 145.7175010816, 294.8287115332, 331.6582641664]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047117_crop.jpg", "text": "Detail the chosen region in the depicted scene . Please mention the objects and their locations.", "boxes_value": [[42.5243084989, 46.717501081600005, 209.82871153320002, 232.6582641664]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047117.jpg", "text": "Detail the chosen region in the depicted scene . Please mention the objects and their locations. For your reference, objects involved in this region include two people, and three hats.", "boxes_value": [[127.5243084989, 145.7175010816, 294.8287115332, 331.6582641664], [128.1694946137, 165.6407470592, 187.8185425082, 331.6582641664], [134.6576537967, 159.2882080256, 293.334777847, 512.8884277248], [127.5243084989, 162.249556736, 187.7009910367, 192.0072568832], [188.36227320760003, 160.26571008, 294.8287115332, 221.1036748288], [196.2976599414, 145.7175010816, 247.8776735745, 171.5075079168]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047117_crop.jpg", "text": "Detail the chosen region in the depicted scene . Please mention the objects and their locations. For your reference, objects involved in this region include two people, and three hats.", "boxes_value": [[42.5243084989, 46.717501081600005, 209.82871153320002, 232.6582641664], [43.16949461370001, 66.64074705920001, 102.81854250820001, 232.6582641664], [49.657653796700004, 60.2882080256, 208.334777847, 279], [42.5243084989, 63.24955673599999, 102.7009910367, 93.0072568832], [103.36227320760003, 61.26571007999999, 209.82871153320002, 122.1036748288], [111.29765994140001, 46.717501081600005, 162.8776735745, 72.5075079168]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047119.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[337.7158203176, 142.8952636928, 466.95776365399996, 361.0382690304]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047119_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[32.71582031759999, 54.8952636928, 161.95776365399996, 273.0382690304]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047119.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two people, two sneakers, and two hockey sticks.", "boxes_value": [[337.7158203176, 142.8952636928, 466.95776365399996, 361.0382690304], [387.0739745936, 160.4113769472, 487.48974611240004, 358.9862060544], [426.30639645319997, 142.8952636928, 466.95776365399996, 329.2523193344], [390.7365722636, 324.6689453056, 414.5166015932, 361.0382690304], [447.621948258, 313.0121459712, 466.7391357244, 329.7979736576], [315.61889651, 200.1511230464, 362.2016601428, 247.3311157248], [337.7158203176, 267.0391845888, 407.589965844, 305.2609863168]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00047119_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two people, two sneakers, and two hockey sticks.", "boxes_value": [[32.71582031759999, 54.8952636928, 161.95776365399996, 273.0382690304], [82.07397459359998, 72.41137694720001, 182.48974611240004, 270.9862060544], [121.30639645319997, 54.8952636928, 161.95776365399996, 241.2523193344], [85.73657226360001, 236.6689453056, 109.5166015932, 273.0382690304], [142.62194825799997, 225.01214597120003, 161.7391357244, 241.79797365759998], [10.618896510000013, 112.15112304639999, 57.201660142799994, 159.3311157248], [32.71582031759999, 179.03918458880003, 102.589965844, 217.26098631679997]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00047120.jpg", "text": "Detail the chosen region in the depicted scene . Provide the coordinates for all objects that you mention.", "boxes_value": [[180.1239013888, 436.89465329489997, 228.618408192, 544.7497558635]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047120_crop.jpg", "text": "Detail the chosen region in the depicted scene . Provide the coordinates for all objects that you mention.", "boxes_value": [[12.123901388799993, 27.894653294899967, 60.618408192000004, 135.7497558635]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047120.jpg", "text": "Detail the chosen region in the depicted scene . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a cabinet, a flower, two bottles, and a cup.", "boxes_value": [[180.1239013888, 436.89465329489997, 228.618408192, 544.7497558635], [87.6473998848, 166.6914672927, 453.9652099584, 769.8541259991], [180.1239013888, 436.89465329489997, 217.0822753792, 471.78540040319996], [194.63836672, 488.03356935930003, 215.0765991424, 544.7497558635], [181.8109741056, 477.9133300683, 195.9063720448, 521.5294189499999], [213.1932372992, 493.07250978900004, 228.618408192, 521.7952881228]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4]]}, {"image_path": "objects365_v1_00047120_crop.jpg", "text": "Detail the chosen region in the depicted scene . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a cabinet, a flower, two bottles, and a cup.", "boxes_value": [[12.123901388799993, 27.894653294899967, 60.618408192000004, 135.7497558635], [0, 0, 72, 162], [12.123901388799993, 27.894653294899967, 49.08227537920001, 62.78540040319996], [26.638366719999993, 79.03356935930003, 47.0765991424, 135.7497558635], [13.810974105599996, 68.91333006830001, 27.90637204480001, 112.52941894999992], [45.19323729920001, 84.07250978900004, 60.618408192000004, 112.7952881228]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4]]}, {"image_path": "objects365_v1_00047121.jpg", "text": "In , what elements can be found within the coordinates ? Please mention the objects and their locations.", "boxes_value": [[10.593322752, 426.4313964859, 511.1632079872, 682.7874756026999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047121_crop.jpg", "text": "In , what elements can be found within the coordinates ? Please mention the objects and their locations.", "boxes_value": [[10.593322752, 64.4313964859, 511.1632079872, 320.78747560269994]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047121.jpg", "text": "In , what elements can be found within the coordinates ? Please mention the objects and their locations. For your reference, objects involved in this region include a picture, three people, a hat, and a cell phone.", "boxes_value": [[10.593322752, 426.4313964859, 511.1632079872, 682.7874756026999], [253.425781248, 589.7502441324, 334.8937988096, 682.3505859103001], [424.0639037952, 549.538574191, 511.1632079872, 682.7874756026999], [337.9373779456, 563.3946533143, 440.9637451264, 682.1374511842], [0, 352.37268068820003, 77.8964233216, 681.9659423712], [103.9529418752, 600.6982421675, 136.3853149184, 627.034301781], [10.593322752, 426.4313964859, 33.7502441472, 479.87036133929996]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047121_crop.jpg", "text": "In , what elements can be found within the coordinates ? Please mention the objects and their locations. For your reference, objects involved in this region include a picture, three people, a hat, and a cell phone.", "boxes_value": [[10.593322752, 64.4313964859, 511.1632079872, 320.78747560269994], [253.425781248, 227.75024413239998, 334.8937988096, 320.3505859103001], [424.0639037952, 187.53857419099995, 511.1632079872, 320.78747560269994], [337.9373779456, 201.39465331429994, 440.9637451264, 320.1374511842], [0, 0, 77.8964233216, 319.96594237119996], [103.9529418752, 238.69824216749998, 136.3853149184, 265.03430178099995], [10.593322752, 64.4313964859, 33.7502441472, 117.87036133929996]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047123.jpg", "text": "Could you please share some information on the region in this photograph ? Please point out the objects and their coordinates.", "boxes_value": [[179.502258276, 314.0069580288, 570.2467040732, 390.5193481216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047123_crop.jpg", "text": "Could you please share some information on the region in this photograph ? Please point out the objects and their coordinates.", "boxes_value": [[98.50225827599999, 20.0069580288, 489.2467040732, 96.51934812159999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047123.jpg", "text": "Could you please share some information on the region in this photograph ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a potted plant, a chair, three pillows, a person, and a moniter.", "boxes_value": [[179.502258276, 314.0069580288, 570.2467040732, 390.5193481216], [390.5632324242, 325.6015625216, 432.4124755742, 355.6807250944], [289.6017456332, 355.2543334912, 370.60522461600004, 436.2578735104], [306.8152465752, 356.6549682688, 355.682922353, 390.5193481216], [179.502258276, 363.513549824, 267.80688478499997, 379.3741454848], [195.7914428734, 352.7969970688, 243.80175779080002, 363.513549824], [492.823852521, 314.0069580288, 532.8013916258001, 355.8255004672], [537.7998046673999, 322.959899904, 570.2467040732, 357.703857408]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00047123_crop.jpg", "text": "Could you please share some information on the region in this photograph ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a potted plant, a chair, three pillows, a person, and a moniter.", "boxes_value": [[98.50225827599999, 20.0069580288, 489.2467040732, 96.51934812159999], [309.5632324242, 31.601562521599988, 351.4124755742, 61.680725094399975], [208.60174563319998, 61.254333491199986, 289.60522461600004, 115], [225.81524657519998, 62.65496826880002, 274.682922353, 96.51934812159999], [98.50225827599999, 69.513549824, 186.80688478499997, 85.37414548480001], [114.7914428734, 58.79699706880001, 162.80175779080002, 69.513549824], [411.823852521, 20.0069580288, 451.8013916258001, 61.825500467200015], [456.79980466739994, 28.959899903999997, 489.2467040732, 63.703857407999976]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00047124.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Remember to mention the objects and their corresponding locations.", "boxes_value": [[206.868713359, 207.1083984384, 376.23291013, 417.6101074432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047124_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Remember to mention the objects and their corresponding locations.", "boxes_value": [[42.868713359, 53.1083984384, 212.23291013, 263.6101074432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047124.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, a hat, and a truck.", "boxes_value": [[206.868713359, 207.1083984384, 376.23291013, 417.6101074432], [206.868713359, 229.427673344, 290.89440919, 417.6101074432], [262.88574219599997, 253.4975585792, 325.905029296, 344.5253296128], [323.71691891, 207.1083984384, 376.23291013, 333.1468506112], [324.664161668, 207.7913196544, 359.330599012, 234.8041280512], [33.132019074, 183.6119384576, 449.6154785430001, 359.9296264704]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047124_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, a hat, and a truck.", "boxes_value": [[42.868713359, 53.1083984384, 212.23291013, 263.6101074432], [42.868713359, 75.427673344, 126.89440918999998, 263.6101074432], [98.88574219599997, 99.49755857919999, 161.905029296, 190.52532961280002], [159.71691891, 53.1083984384, 212.23291013, 179.14685061120002], [160.66416166800002, 53.79131965440001, 195.330599012, 80.8041280512], [0, 29.61193845759999, 254, 205.92962647040002]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047126.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Remember to mention the objects and their corresponding locations.", "boxes_value": [[525.7662353209, 289.229370112, 888.0456543217, 437.3707885568]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047126_crop.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Remember to mention the objects and their corresponding locations.", "boxes_value": [[90.76623532090002, 37.229370112000026, 453.0456543217, 185.3707885568]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047126.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four cars, and a street lights.", "boxes_value": [[525.7662353209, 289.229370112, 888.0456543217, 437.3707885568], [525.7662353209, 301.7991943168, 573.5057373017, 352.8889160192], [566.8054199556, 303.753417984, 633.5291747603, 364.0560912896], [668.8332519295001, 289.229370112, 851.5871582015, 416.1417846784], [792.9766845413001, 293.8443603456, 888.0456543217, 437.3707885568], [536.937866178, 274.0043335168, 555.4125976308, 303.6901855232]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047126_crop.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four cars, and a street lights.", "boxes_value": [[90.76623532090002, 37.229370112000026, 453.0456543217, 185.3707885568], [90.76623532090002, 49.799194316800026, 138.50573730170004, 100.8889160192], [131.80541995559997, 51.75341798400001, 198.52917476029995, 112.05609128959998], [233.83325192950008, 37.229370112000026, 416.58715820149996, 164.14178467839997], [357.9766845413001, 41.84436034560002, 453.0456543217, 185.3707885568], [101.93786617800004, 22.00433351679999, 120.41259763079995, 51.6901855232]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047127.jpg", "text": "Please provide insights on the specified area within the graphic . Provide the coordinates for each element you describe.", "boxes_value": [[462.37976076800004, 287.30651856, 639.9256591999999, 328.932128928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047127_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Provide the coordinates for each element you describe.", "boxes_value": [[45.37976076800004, 11.306518559999972, 222.92565919999993, 52.932128928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047127.jpg", "text": "Please provide insights on the specified area within the graphic . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a van, three cars, a truck, and a suv.", "boxes_value": [[462.37976076800004, 287.30651856, 639.9256591999999, 328.932128928], [538.184936512, 287.30651856, 593.06237792, 298.644836448], [525.48596192, 291.16156008, 558.593872064, 303.633728016], [594.422973632, 282.9979248, 639.32275392, 297.05749512], [589.444580096, 293.384521488, 639.9256591999999, 312.543945312], [462.37976076800004, 288.661621104, 543.349243136, 328.932128928], [520.000976576, 295.730346672, 639.9556884479999, 337.500366192]], "boxes_seq": [[0], [0], [1], [2, 4, 6], [3], [5]]}, {"image_path": "objects365_v1_00047127_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a van, three cars, a truck, and a suv.", "boxes_value": [[45.37976076800004, 11.306518559999972, 222.92565919999993, 52.932128928], [121.18493651200004, 11.306518559999972, 176.06237792000002, 22.644836447999978], [108.48596192000002, 15.161560080000015, 141.59387206400004, 27.63372801600002], [177.42297363199998, 6.997924800000021, 222.32275391999997, 21.05749512], [172.44458009599998, 17.38452148800002, 222.92565919999993, 36.543945312000005], [45.37976076800004, 12.661621104000005, 126.34924313600004, 52.932128928], [103.00097657599997, 19.730346671999996, 222.95568844799993, 61.500366192]], "boxes_seq": [[0], [0], [1], [2, 4, 6], [3], [5]]}, {"image_path": "objects365_v1_00047128.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each mentioned object.", "boxes_value": [[462.3636474892, 193.8507080192, 615.7065429738, 389.05847168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047128_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each mentioned object.", "boxes_value": [[38.36364748919999, 48.8507080192, 191.70654297379997, 244.05847168000003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047128.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a stool, a person, a mop, and two sneakers.", "boxes_value": [[462.3636474892, 193.8507080192, 615.7065429738, 389.05847168], [500.0102539001, 261.7129516544, 558.620117158, 369.0930786304], [428.4290771636, 111.6737670656, 519.3608398331, 390.0646362112001], [551.269165055, 193.8507080192, 615.7065429738, 377.4972534272], [469.8674316336, 359.0433349632, 514.0074462874, 369.6369018368], [462.3636474892, 370.9611206144, 492.82019043820003, 389.05847168]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047128_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a stool, a person, a mop, and two sneakers.", "boxes_value": [[38.36364748919999, 48.8507080192, 191.70654297379997, 244.05847168000003], [76.0102539001, 116.7129516544, 134.62011715799997, 224.0930786304], [4.429077163600027, 0, 95.36083983310004, 245.06463621120008], [127.26916505500003, 48.8507080192, 191.70654297379997, 232.4972534272], [45.867431633600006, 214.04333496319998, 90.00744628739994, 224.6369018368], [38.36364748919999, 225.9611206144, 68.82019043820003, 244.05847168000003]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047129.jpg", "text": "What can you tell me about the area within the image ? Give coordinates for the items you reference.", "boxes_value": [[600.3885498315001, 168.9034423808, 705.5948486144999, 474.5820312576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047129_crop.jpg", "text": "What can you tell me about the area within the image ? Give coordinates for the items you reference.", "boxes_value": [[26.388549831500086, 76.90344238079999, 131, 382.5820312576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047129.jpg", "text": "What can you tell me about the area within the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include a mirror, two desks, a carpet, and a bathtub.", "boxes_value": [[600.3885498315001, 168.9034423808, 705.5948486144999, 474.5820312576], [401.7589111095, 0.6447753728, 703.652831997, 437.289733888], [513.3858642885, 223.06927488, 704.3157958785, 510.51330565119997], [600.3885498315001, 214.7134399488, 705.5948486144999, 474.5820312576], [520.7363280975, 353.8979492352, 682.592163111, 412.8597412352], [663.122680635, 168.9034423808, 704.6153564415, 208.7427367936]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047129_crop.jpg", "text": "What can you tell me about the area within the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include a mirror, two desks, a carpet, and a bathtub.", "boxes_value": [[26.388549831500086, 76.90344238079999, 131, 382.5820312576], [0, 0, 129.65283199700002, 345.289733888], [0, 131.06927488, 130.3157958785, 418.51330565119997], [26.388549831500086, 122.71343994879999, 131, 382.5820312576], [0, 261.8979492352, 108.59216311099999, 320.8597412352], [89.12268063500005, 76.90344238079999, 130.6153564415, 116.74273679359999]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047130.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Provide the coordinates for each element you describe.", "boxes_value": [[232.92156981379998, 86.9387207168, 314.21362306550003, 511.75531008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047130_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Provide the coordinates for each element you describe.", "boxes_value": [[20.921569813799977, 86.9387207168, 102.21362306550003, 511.75531008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047130.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a lamp, three chairs, and a desk.", "boxes_value": [[232.92156981379998, 86.9387207168, 314.21362306550003, 511.75531008], [232.92156981379998, 86.9387207168, 314.21362306550003, 291.1766357504], [199.88439940350003, 423.7664795136, 274.8694457671, 511.8361816576], [274.4740600723, 417.7184448, 347.8055419897, 510.8925781504], [224.4360961777, 486.3049316352, 252.90594484550002, 511.3239746048], [237.8082885835, 421.6007079936, 311.1398315611, 511.75531008]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047130_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a lamp, three chairs, and a desk.", "boxes_value": [[20.921569813799977, 86.9387207168, 102.21362306550003, 511.75531008], [20.921569813799977, 86.9387207168, 102.21362306550003, 291.1766357504], [0, 423.7664795136, 62.86944576709999, 511.8361816576], [62.474060072300006, 417.7184448, 122, 510.8925781504], [12.436096177699994, 486.3049316352, 40.90594484550002, 511.3239746048], [25.8082885835, 421.6007079936, 99.13983156109998, 511.75531008]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047132.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Please mention the objects and their locations.", "boxes_value": [[625.376342784, 0, 767.460327168, 108.6425170944]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047132_crop.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Please mention the objects and their locations.", "boxes_value": [[36.37634278400003, 0, 178.46032716800005, 108.6425170944]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047132.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Please mention the objects and their locations. For your reference, objects involved in this region include four lamps, and a person.", "boxes_value": [[625.376342784, 0, 767.460327168, 108.6425170944], [619.8071288832, 0, 639.8564453376, 84.1377563648], [625.376342784, 0, 682.1827392768, 108.6425170944], [707.8012695552, 0, 767.460327168, 94.1624145408], [743.4216308736, 89.5850830336, 767.2880859648001, 105.6900634624], [409.14392087039994, 64.8275757056, 767.4357910271999, 511.0023803904]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047132_crop.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Please mention the objects and their locations. For your reference, objects involved in this region include four lamps, and a person.", "boxes_value": [[36.37634278400003, 0, 178.46032716800005, 108.6425170944], [30.807128883199994, 0, 50.85644533760001, 84.1377563648], [36.37634278400003, 0, 93.18273927680002, 108.6425170944], [118.80126955519995, 0, 178.46032716800005, 94.1624145408], [154.42163087359995, 89.5850830336, 178.28808596480008, 105.6900634624], [0, 64.8275757056, 178.4357910271999, 135]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047134.jpg", "text": "Describe what can be found within the bounds of in the image . Give coordinates for the items you reference.", "boxes_value": [[1.6591186591000002, 327.50268556, 193.14501950780001, 440.69708252]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047134_crop.jpg", "text": "Describe what can be found within the bounds of in the image . Give coordinates for the items you reference.", "boxes_value": [[1.6591186591000002, 28.502685559999975, 193.14501950780001, 141.69708251999998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047134.jpg", "text": "Describe what can be found within the bounds of in the image . Give coordinates for the items you reference. For your reference, objects involved in this region include three drums, a person, and a cup.", "boxes_value": [[1.6591186591000002, 327.50268556, 193.14501950780001, 440.69708252], [8.9577636621, 335.3666381872, 118.8394164709, 399.698608376], [153.86914062099999, 325.08947751520003, 195.182006815, 354.909240704], [149.9778442631, 327.50268556, 193.14501950780001, 393.645874], [63.691833496899996, 105.8261108224, 189.9117431636, 412.17901612640003], [1.6591186591000002, 376.04437254239997, 27.9464721444, 440.69708252]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047134_crop.jpg", "text": "Describe what can be found within the bounds of in the image . Give coordinates for the items you reference. For your reference, objects involved in this region include three drums, a person, and a cup.", "boxes_value": [[1.6591186591000002, 28.502685559999975, 193.14501950780001, 141.69708251999998], [8.9577636621, 36.36663818720001, 118.8394164709, 100.69860837599998], [153.86914062099999, 26.08947751520003, 195.182006815, 55.90924070400001], [149.9778442631, 28.502685559999975, 193.14501950780001, 94.64587399999999], [63.691833496899996, 0, 189.9117431636, 113.17901612640003], [1.6591186591000002, 77.04437254239997, 27.9464721444, 141.69708251999998]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047137.jpg", "text": "What's inside the area of the provided graphic ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[193.0556030544, 134.580932608, 421.5677490464, 243.1792602624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047137_crop.jpg", "text": "What's inside the area of the provided graphic ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[58.055603054399995, 27.580932608000012, 286.5677490464, 136.1792602624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047137.jpg", "text": "What's inside the area of the provided graphic ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a chair, a desk, a moniter, and two speakers.", "boxes_value": [[193.0556030544, 134.580932608, 421.5677490464, 243.1792602624], [297.414001494, 134.580932608, 421.5677490464, 243.1792602624], [62.183227531, 191.0964355584, 489.6568603658, 503.585144064], [214.6353759984, 143.5420532224, 248.9755859661, 210.8832397312], [196.73333737709999, 195.598266624, 220.2711791844, 222.2254028288], [193.0556030544, 190.15515136, 209.8262329151, 209.868041984]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047137_crop.jpg", "text": "What's inside the area of the provided graphic ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a chair, a desk, a moniter, and two speakers.", "boxes_value": [[58.055603054399995, 27.580932608000012, 286.5677490464, 136.1792602624], [162.414001494, 27.580932608000012, 286.5677490464, 136.1792602624], [0, 84.0964355584, 343, 163], [79.63537599840001, 36.5420532224, 113.9755859661, 103.8832397312], [61.733337377099986, 88.59826662399999, 85.27117918440001, 115.22540282879999], [58.055603054399995, 83.15515135999999, 74.82623291510001, 102.868041984]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047138.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Provide the coordinates for each element you describe.", "boxes_value": [[491.50720214940003, 107.4721069568, 567.792968727, 308.428710912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047138_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Provide the coordinates for each element you describe.", "boxes_value": [[19.50720214940003, 50.472106956800005, 95.79296872700002, 251.42871091199999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047138.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a desk, three people, and a pen.", "boxes_value": [[491.50720214940003, 107.4721069568, 567.792968727, 308.428710912], [84.7642822354, 196.6129150464, 622.3629150478, 513.007568384], [491.50720214940003, 107.4721069568, 567.792968727, 241.33203123200002], [518.1352539396, 124.0246582272, 631.8442383034, 316.1784057856], [523.1729736612, 135.5394897408, 765.7041015535999, 436.36450196480007], [522.2901611123999, 298.2781982208, 567.1057129142, 308.428710912]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047138_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a desk, three people, and a pen.", "boxes_value": [[19.50720214940003, 50.472106956800005, 95.79296872700002, 251.42871091199999], [0, 139.6129150464, 114, 301], [19.50720214940003, 50.472106956800005, 95.79296872700002, 184.33203123200002], [46.135253939600034, 67.0246582272, 114, 259.1784057856], [51.17297366119999, 78.53948974080001, 114, 301], [50.290161112399915, 241.2781982208, 95.10571291420001, 251.42871091199999]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047139.jpg", "text": "I need details about the area located within image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[125.83465576699999, 151.7828979712, 316.35400388939996, 201.7347411968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047139_crop.jpg", "text": "I need details about the area located within image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[47.83465576699999, 12.782897971199986, 238.35400388939996, 62.7347411968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047139.jpg", "text": "I need details about the area located within image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, two plates, a bowl, two cups, and a chopsticks.", "boxes_value": [[125.83465576699999, 151.7828979712, 316.35400388939996, 201.7347411968], [61.4733276443, 0.2881469952, 316.0963134534, 183.5534667776], [203.8640747077, 181.2820434432, 256.9624633869, 201.7347411968], [260.5023193385, 169.482360832, 318.7139282333, 208.4212036096], [227.8566894696, 151.7828979712, 316.35400388939996, 190.7217406976], [249.88269042349998, 139.1966552576, 306.1276245184, 168.6957397504], [125.83465576699999, 155.5523071488, 185.5017089758, 190.9301147648], [184.9736938332, 160.3045654528, 231.96807859179998, 184.5938110464]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5, 6], [7]]}, {"image_path": "objects365_v1_00047139_crop.jpg", "text": "I need details about the area located within image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, two plates, a bowl, two cups, and a chopsticks.", "boxes_value": [[47.83465576699999, 12.782897971199986, 238.35400388939996, 62.7347411968], [0, 0, 238.0963134534, 44.55346677759999], [125.86407470770001, 42.282043443199996, 178.96246338690003, 62.7347411968], [182.5023193385, 30.482360832000012, 240.7139282333, 69.42120360960001], [149.8566894696, 12.782897971199986, 238.35400388939996, 51.72174069760001], [171.88269042349998, 0.196655257600014, 228.1276245184, 29.69573975040001], [47.83465576699999, 16.552307148799997, 107.5017089758, 51.93011476480001], [106.9736938332, 21.304565452800006, 153.96807859179998, 45.59381104639999]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5, 6], [7]]}, {"image_path": "objects365_v1_00047140.jpg", "text": "In the image , could you provide a description for the coordinates ? Provide the coordinates for each element you describe.", "boxes_value": [[296.479248065, 207.438598656, 502.56201172100003, 510.576843264]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047140_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Provide the coordinates for each element you describe.", "boxes_value": [[52.47924806499998, 76.43859865600001, 258.56201172100003, 379.576843264]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047140.jpg", "text": "In the image , could you provide a description for the coordinates ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two drums, a necklace, a person, and a microphone.", "boxes_value": [[296.479248065, 207.438598656, 502.56201172100003, 510.576843264], [437.914916978, 343.6196289024, 570.408447301, 427.8640747008], [299.294433592, 451.605712896, 463.95410153399996, 510.576843264], [366.668823213, 284.5776367104, 409.755371102, 342.2581176832], [296.479248065, 207.438598656, 458.822143549, 459.7039794688], [456.73461911, 281.7890625024, 502.56201172100003, 321.9471435776]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047140_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two drums, a necklace, a person, and a microphone.", "boxes_value": [[52.47924806499998, 76.43859865600001, 258.56201172100003, 379.576843264], [193.914916978, 212.6196289024, 310, 296.8640747008], [55.29443359200002, 320.605712896, 219.95410153399996, 379.576843264], [122.668823213, 153.5776367104, 165.75537110200003, 211.2581176832], [52.47924806499998, 76.43859865600001, 214.82214354899997, 328.7039794688], [212.73461910999998, 150.7890625024, 258.56201172100003, 190.9471435776]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047141.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Include the coordinates for each object you identify.", "boxes_value": [[325.8929443068, 71.319335936, 539.1020508143, 365.495056128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047141_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Include the coordinates for each object you identify.", "boxes_value": [[53.89294430680002, 71.319335936, 267.1020508143, 365.495056128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047141.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a pillow, a desk, a cabinet, a picture, a speaker, and a moniter.", "boxes_value": [[325.8929443068, 71.319335936, 539.1020508143, 365.495056128], [325.8929443068, 317.2388916224, 432.34045410699997, 365.495056128], [299.3507690216, 303.9627685376, 564.5571288763, 353.8410034176], [428.4549560336, 232.5150756864, 539.1020508143, 314.9258422784], [340.8636474657, 71.319335936, 380.08154298169995, 109.8889770496], [478.0954590062, 132.6817627136, 494.18579101980004, 157.2677612544], [438.6301269332, 158.1869507072, 534.5301514009, 235.1317138432]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047141_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a pillow, a desk, a cabinet, a picture, a speaker, and a moniter.", "boxes_value": [[53.89294430680002, 71.319335936, 267.1020508143, 365.495056128], [53.89294430680002, 317.2388916224, 160.34045410699997, 365.495056128], [27.3507690216, 303.9627685376, 292.5571288763, 353.8410034176], [156.4549560336, 232.5150756864, 267.1020508143, 314.9258422784], [68.86364746570001, 71.319335936, 108.08154298169995, 109.8889770496], [206.09545900619997, 132.6817627136, 222.18579101980004, 157.2677612544], [166.63012693320002, 158.1869507072, 262.53015140089997, 235.1317138432]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047142.jpg", "text": "Please share details about the rectangular region within the image . Please point out the objects and their coordinates.", "boxes_value": [[0, 69.6699218944, 182.42224119499997, 265.1227417088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047142_crop.jpg", "text": "Please share details about the rectangular region within the image . Please point out the objects and their coordinates.", "boxes_value": [[0, 49.669921894400005, 182.42224119499997, 245.1227417088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047142.jpg", "text": "Please share details about the rectangular region within the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include a bench, a person, a hat, and two cars.", "boxes_value": [[0, 69.6699218944, 182.42224119499997, 265.1227417088], [0.4704590058, 179.7771606528, 109.5588378918, 265.1227417088], [112.1621093985, 113.2120361472, 149.943969753, 211.3361206272], [123.47991942280001, 112.7634277376, 149.0892333644, 127.5482177536], [0, 69.6699218944, 182.42224119499997, 188.2327880704], [105.886535673, 0, 633.6730957261001, 510.3795166208]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047142_crop.jpg", "text": "Please share details about the rectangular region within the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include a bench, a person, a hat, and two cars.", "boxes_value": [[0, 49.669921894400005, 182.42224119499997, 245.1227417088], [0.4704590058, 159.7771606528, 109.5588378918, 245.1227417088], [112.1621093985, 93.2120361472, 149.943969753, 191.3361206272], [123.47991942280001, 92.7634277376, 149.0892333644, 107.5482177536], [0, 49.669921894400005, 182.42224119499997, 168.2327880704], [105.886535673, 0, 228, 293]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047143.jpg", "text": "In the image , please describe the bounding box . Give coordinates for the items you reference.", "boxes_value": [[204.95806885739998, 304.6071166976, 389.0884399138, 412.6320800768]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047143_crop.jpg", "text": "In the image , please describe the bounding box . Give coordinates for the items you reference.", "boxes_value": [[46.95806885739998, 27.60711669760002, 231.08843991380002, 135.63208007679998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047143.jpg", "text": "In the image , please describe the bounding box . Give coordinates for the items you reference. For your reference, objects involved in this region include six people.", "boxes_value": [[204.95806885739998, 304.6071166976, 389.0884399138, 412.6320800768], [334.732360846, 340.4780273664, 376.5817260834, 412.6320800768], [354.935485848, 306.8060913152, 389.0884399138, 392.9099731456], [183.2088012646, 358.7570190336, 230.93353272660002, 429.536743168], [204.95806885739998, 304.6071166976, 242.065917988, 366.0411377152], [244.5397338592, 340.0656738304, 278.7614135562, 420.4659423744], [235.46893308539998, 328.933349632, 266.3921508758, 370.5765380608]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047143_crop.jpg", "text": "In the image , please describe the bounding box . Give coordinates for the items you reference. For your reference, objects involved in this region include six people.", "boxes_value": [[46.95806885739998, 27.60711669760002, 231.08843991380002, 135.63208007679998], [176.732360846, 63.4780273664, 218.58172608339999, 135.63208007679998], [196.93548584799998, 29.80609131519998, 231.08843991380002, 115.90997314560002], [25.208801264599998, 81.75701903359999, 72.93353272660002, 152.536743168], [46.95806885739998, 27.60711669760002, 84.065917988, 89.04113771520002], [86.5397338592, 63.06567383039999, 120.76141355620001, 143.46594237440002], [77.46893308539998, 51.93334963199999, 108.39215087579998, 93.57653806079998]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047144.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Please point out the objects and their coordinates.", "boxes_value": [[0, 143.75811769999999, 294.123535125, 499.87670899999995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047144_crop.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Please point out the objects and their coordinates.", "boxes_value": [[0, 89.75811769999999, 294.123535125, 445.87670899999995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047144.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, and three hurdles.", "boxes_value": [[0, 143.75811769999999, 294.123535125, 499.87670899999995], [1.0460205, 141.0505371, 38.952148425000004, 204.00183105], [119.502685575, 143.75811769999999, 180.423278775, 345.4729004], [0, 343.2284546, 294.123535125, 499.87670899999995], [0.5551758, 296.9816284, 82.788452175, 462.68792725000003], [86.507568375, 299.46099855, 317.50463865, 471.7789917]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047144_crop.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, and three hurdles.", "boxes_value": [[0, 89.75811769999999, 294.123535125, 445.87670899999995], [1.0460205, 87.05053710000001, 38.952148425000004, 150.00183105], [119.502685575, 89.75811769999999, 180.423278775, 291.4729004], [0, 289.2284546, 294.123535125, 445.87670899999995], [0.5551758, 242.98162839999998, 82.788452175, 408.68792725000003], [86.507568375, 245.46099855, 317.50463865, 417.7789917]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047145.jpg", "text": "I'd like some information about the bounding box in the photo . Provide the coordinates for all objects that you mention.", "boxes_value": [[400.7572021248, 194.968505856, 570.6259765248, 461.829101568]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047145_crop.jpg", "text": "I'd like some information about the bounding box in the photo . Provide the coordinates for all objects that you mention.", "boxes_value": [[42.75720212480002, 66.96850585600001, 212.62597652479997, 333.829101568]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047145.jpg", "text": "I'd like some information about the bounding box in the photo . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a nightstand, a lamp, two books, and a handbag.", "boxes_value": [[400.7572021248, 194.968505856, 570.6259765248, 461.829101568], [381.94848629759997, 257.1489257984, 528.6097412352001, 509.842224128], [507.44177249280006, 194.968505856, 570.6259765248, 262.3649291776], [442.79223636480003, 263.6130371072, 516.3602294784, 302.9070434816], [427.38623047680005, 266.5557861376, 513.7637939711999, 307.0614624256], [400.7572021248, 379.8403930624, 510.24523929599997, 461.829101568]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047145_crop.jpg", "text": "I'd like some information about the bounding box in the photo . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a nightstand, a lamp, two books, and a handbag.", "boxes_value": [[42.75720212480002, 66.96850585600001, 212.62597652479997, 333.829101568], [23.94848629759997, 129.1489257984, 170.6097412352001, 381.842224128], [149.44177249280006, 66.96850585600001, 212.62597652479997, 134.3649291776], [84.79223636480003, 135.6130371072, 158.36022947840002, 174.9070434816], [69.38623047680005, 138.55578613760002, 155.7637939711999, 179.06146242559998], [42.75720212480002, 251.84039306239998, 152.24523929599997, 333.829101568]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047148.jpg", "text": "Describe the bbox in the provided photo . Include the coordinates for each mentioned object.", "boxes_value": [[120.19317623629999, 335.6165771264, 284.1760254182, 381.4387207168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047148_crop.jpg", "text": "Describe the bbox in the provided photo . Include the coordinates for each mentioned object.", "boxes_value": [[41.19317623629999, 11.616577126399989, 205.17602541820003, 57.43872071679999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047148.jpg", "text": "Describe the bbox in the provided photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, two cars, and a van.", "boxes_value": [[120.19317623629999, 335.6165771264, 284.1760254182, 381.4387207168], [259.4750365916, 335.6165771264, 284.1760254182, 381.4387207168], [120.19317623629999, 336.5184936448, 134.6737670644, 363.9889526272], [126.95556640779999, 346.7733154304, 253.74523928710002, 406.553955072], [166.4830322202, 340.6654663168, 231.12573243789998, 365.5189819392], [193.2359619082, 338.7660522496, 252.65222168329998, 366.1268310528]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047148_crop.jpg", "text": "Describe the bbox in the provided photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, two cars, and a van.", "boxes_value": [[41.19317623629999, 11.616577126399989, 205.17602541820003, 57.43872071679999], [180.47503659159997, 11.616577126399989, 205.17602541820003, 57.43872071679999], [41.19317623629999, 12.518493644800003, 55.67376706440001, 39.98895262719998], [47.95556640779999, 22.773315430399975, 174.74523928710002, 68], [87.48303222019999, 16.665466316800007, 152.12573243789998, 41.51898193919999], [114.23596190820001, 14.766052249600023, 173.65222168329998, 42.126831052800014]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047149.jpg", "text": "What's inside the area of the provided graphic ? Please mention the objects and their locations.", "boxes_value": [[1.58636472, 318.5706787328, 478.8695068032, 512.2354736128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047149_crop.jpg", "text": "What's inside the area of the provided graphic ? Please mention the objects and their locations.", "boxes_value": [[1.58636472, 48.57067873279999, 478.8695068032, 242]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047149.jpg", "text": "What's inside the area of the provided graphic ? Please mention the objects and their locations. For your reference, objects involved in this region include two people, two chairs, and a desk.", "boxes_value": [[1.58636472, 318.5706787328, 478.8695068032, 512.2354736128], [202.0142822208, 187.8698730496, 428.2979736432, 512.1798095872], [0.0538940016, 188.9247436288, 177.75781247519998, 512.2655029248], [1.58636472, 429.6897583104, 164.5611572448, 512.2354736128], [149.7452392656, 391.5917968896, 327.5358886656, 511.1771240448], [93.65649416160001, 318.5706787328, 478.8695068032, 438.1560058368]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047149_crop.jpg", "text": "What's inside the area of the provided graphic ? Please mention the objects and their locations. For your reference, objects involved in this region include two people, two chairs, and a desk.", "boxes_value": [[1.58636472, 48.57067873279999, 478.8695068032, 242], [202.0142822208, 0, 428.2979736432, 242], [0.0538940016, 0, 177.75781247519998, 242], [1.58636472, 159.6897583104, 164.5611572448, 242], [149.7452392656, 121.5917968896, 327.5358886656, 241.1771240448], [93.65649416160001, 48.57067873279999, 478.8695068032, 168.15600583679998]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047151.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Provide the coordinates for all objects that you mention.", "boxes_value": [[77.7406616235, 293.5769653248, 351.415405242, 511.8872680448]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047151_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Provide the coordinates for all objects that you mention.", "boxes_value": [[68.7406616235, 55.57696532480003, 342.415405242, 273.8872680448]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047151.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a flower, a vase, a desk, a couch, and three pillows.", "boxes_value": [[77.7406616235, 293.5769653248, 351.415405242, 511.8872680448], [0.8734131225, 318.75299072, 227.07763675649997, 510.3046264832], [77.7406616235, 429.9761352704, 170.383178691, 511.3035278336], [159.693359391, 408.2326660096, 351.415405242, 511.8872680448], [28.3141479465, 274.053222656, 365.855590836, 512.393554688], [120.091796865, 293.5769653248, 198.671203647, 346.520568832], [154.64447023949998, 281.3163451904, 248.270996097, 387.203552256], [140.04034422750001, 276.6515503104, 195.7249145475, 306.1315917824]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6, 7]]}, {"image_path": "objects365_v1_00047151_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a flower, a vase, a desk, a couch, and three pillows.", "boxes_value": [[68.7406616235, 55.57696532480003, 342.415405242, 273.8872680448], [0, 80.75299072000001, 218.07763675649997, 272.3046264832], [68.7406616235, 191.9761352704, 161.383178691, 273.3035278336], [150.693359391, 170.2326660096, 342.415405242, 273.8872680448], [19.3141479465, 36.053222656, 356.855590836, 274], [111.091796865, 55.57696532480003, 189.671203647, 108.52056883199998], [145.64447023949998, 43.3163451904, 239.270996097, 149.20355225600002], [131.04034422750001, 38.65155031040001, 186.7249145475, 68.1315917824]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6, 7]]}, {"image_path": "objects365_v1_00047152.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[184.5870361088, 124.6240234496, 382.6409301504, 217.219299328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047152_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[49.58703610879999, 23.624023449600003, 247.64093015039998, 116.219299328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047152.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a chair, a tent, and three people.", "boxes_value": [[184.5870361088, 124.6240234496, 382.6409301504, 217.219299328], [184.5870361088, 124.6240234496, 271.2252197376, 203.2198486528], [53.3349609472, 55.6174926848, 496.1494751232, 210.1786499072], [282.0223998976, 145.8280639488, 321.7906493952, 217.219299328], [306.9375000064, 145.348937984, 338.5604248064, 215.3027954176], [349.1014404096, 162.1187133952, 382.6409301504, 215.3027954176]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047152_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a chair, a tent, and three people.", "boxes_value": [[49.58703610879999, 23.624023449600003, 247.64093015039998, 116.219299328], [49.58703610879999, 23.624023449600003, 136.2252197376, 102.21984865280001], [0, 0, 297, 109.1786499072], [147.0223998976, 44.82806394880001, 186.79064939519998, 116.219299328], [171.9375000064, 44.348937984, 203.56042480640002, 114.30279541760001], [214.1014404096, 61.11871339519999, 247.64093015039998, 114.30279541760001]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047153.jpg", "text": "What's going on in the section of contained within the bounding box ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[16.3023071423, 130.5836791808, 462.03808590890003, 349.5496216064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047153_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[16.3023071423, 55.583679180800004, 462.03808590890003, 274.5496216064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047153.jpg", "text": "What's going on in the section of contained within the bounding box ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a mirror, a cup, a bottle, and two moniters.", "boxes_value": [[16.3023071423, 130.5836791808, 462.03808590890003, 349.5496216064], [16.3023071423, 153.5261840896, 71.16418454160001, 349.5496216064], [426.0467529468, 238.8828735488, 462.03808590890003, 267.3487548928], [116.5635376125, 130.5836791808, 140.5867919593, 174.50115968], [140.7828979245, 192.2545165824, 230.25445556510002, 260.7014160384], [22.9846801756, 190.3714599424, 52.090209988699996, 240.1788940288]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047153_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a mirror, a cup, a bottle, and two moniters.", "boxes_value": [[16.3023071423, 55.583679180800004, 462.03808590890003, 274.5496216064], [16.3023071423, 78.52618408960001, 71.16418454160001, 274.5496216064], [426.0467529468, 163.8828735488, 462.03808590890003, 192.3487548928], [116.5635376125, 55.583679180800004, 140.5867919593, 99.50115968], [140.7828979245, 117.25451658239999, 230.25445556510002, 185.7014160384], [22.9846801756, 115.37145994240001, 52.090209988699996, 165.1788940288]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047155.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Provide the coordinates for each element you describe.", "boxes_value": [[573.82421877, 240.835083008, 676.45788573, 377.0119018496]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047155_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Provide the coordinates for each element you describe.", "boxes_value": [[25.824218770000016, 34.835083008, 128.45788573000004, 171.0119018496]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047155.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Provide the coordinates for each element you describe. For your reference, objects involved in this region include four wine glasses, and a moniter.", "boxes_value": [[573.82421877, 240.835083008, 676.45788573, 377.0119018496], [662.686157265, 349.6572875776, 676.45788573, 377.0119018496], [639.4819336125, 342.4885254144, 653.6308593374999, 371.1636962816], [604.58117676, 346.2615356416, 619.862060565, 376.068664576], [588.7343749649999, 340.4133300736, 601.751464875, 369.4658203136], [573.82421877, 240.835083008, 586.4709472875, 266.5299072512]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047155_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Provide the coordinates for each element you describe. For your reference, objects involved in this region include four wine glasses, and a moniter.", "boxes_value": [[25.824218770000016, 34.835083008, 128.45788573000004, 171.0119018496], [114.68615726500002, 143.65728757760002, 128.45788573000004, 171.0119018496], [91.48193361250003, 136.4885254144, 105.63085933749994, 165.16369628159998], [56.58117675999995, 140.2615356416, 71.86206056499998, 170.068664576], [40.734374964999915, 134.41333007359998, 53.75146487500001, 163.46582031359998], [25.824218770000016, 34.835083008, 38.47094728750005, 60.52990725119997]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047156.jpg", "text": "Tell me about the region of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[210.37445065, 241.2084960768, 744.7817382604001, 362.695251456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047156_crop.jpg", "text": "Tell me about the region of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[134.37445065, 31.208496076800003, 668.7817382604001, 152.695251456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047156.jpg", "text": "Tell me about the region of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three lamps, a guitar, and a car.", "boxes_value": [[210.37445065, 241.2084960768, 744.7817382604001, 362.695251456], [712.144165069, 241.2084960768, 744.7817382604001, 285.0879516672], [561.7290038758, 257.76129152, 585.997192357, 292.3840331776], [444.4281005848, 269.676879872, 467.01245116719997, 297.9667358208], [210.37445065, 339.3329467904, 229.15856937119997, 362.0556640768], [643.2663574516, 322.0531005952, 705.1972656452, 362.695251456]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047156_crop.jpg", "text": "Tell me about the region of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three lamps, a guitar, and a car.", "boxes_value": [[134.37445065, 31.208496076800003, 668.7817382604001, 152.695251456], [636.144165069, 31.208496076800003, 668.7817382604001, 75.0879516672], [485.7290038758, 47.761291519999986, 509.997192357, 82.38403317759997], [368.4281005848, 59.67687987199997, 391.01245116719997, 87.96673582080001], [134.37445065, 129.3329467904, 153.15856937119997, 152.05566407679999], [567.2663574516, 112.05310059520002, 629.1972656452, 152.695251456]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047157.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[537.7854004267999, 210.2527465984, 717.3729248097001, 312.6257324032]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047157_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[45.785400426799924, 26.252746598399995, 225.37292480970007, 128.6257324032]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047157.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a bed, a nightstand, a carpet, a desk, and a telephone.", "boxes_value": [[537.7854004267999, 210.2527465984, 717.3729248097001, 312.6257324032], [208.83917239689998, 78.0812988416, 773.0531006166002, 511.2636718592], [537.7854004267999, 245.1857910272, 673.5644530877, 312.6257324032], [684.1555175597999, 251.0295410176, 717.3729248097001, 281.052917504], [681.5490722534, 210.2527465984, 714.1320801039, 271.8383789056], [542.8060302476999, 235.983947776, 572.5874023299, 253.847045888]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047157_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a bed, a nightstand, a carpet, a desk, and a telephone.", "boxes_value": [[45.785400426799924, 26.252746598399995, 225.37292480970007, 128.6257324032], [0, 0, 270, 154], [45.785400426799924, 61.1857910272, 181.56445308770003, 128.6257324032], [192.15551755979993, 67.0295410176, 225.37292480970007, 97.05291750399999], [189.54907225340003, 26.252746598399995, 222.13208010389997, 87.8383789056], [50.80603024769994, 51.98394777600001, 80.58740232989999, 69.847045888]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047158.jpg", "text": "Offer a thorough description of the area within the illustration . Provide the coordinates for each element you describe.", "boxes_value": [[262.1470947358, 145.0435790848, 682.8928222740001, 217.2319335936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047158_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Provide the coordinates for each element you describe.", "boxes_value": [[106.14709473580001, 19.0435790848, 526.8928222740001, 91.23193359359999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047158.jpg", "text": "Offer a thorough description of the area within the illustration . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a bench, a person, a glasses, two trash bin cans, and a parrot.", "boxes_value": [[262.1470947358, 145.0435790848, 682.8928222740001, 217.2319335936], [370.2672119101, 174.9360351744, 435.49743651399996, 204.1981811712], [648.0786133115, 145.0435790848, 682.8928222740001, 217.2319335936], [284.1217651662, 202.9326171648, 365.6968994472, 233.4404296704], [456.47375491639997, 167.9191894528, 481.04638673339997, 206.6943359488], [262.1470947358, 175.8095092736, 281.3092040868, 200.1566772224], [359.4873046828, 141.7910766592, 430.70251465179996, 284.9176635904]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00047158_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a bench, a person, a glasses, two trash bin cans, and a parrot.", "boxes_value": [[106.14709473580001, 19.0435790848, 526.8928222740001, 91.23193359359999], [214.2672119101, 48.93603517439999, 279.49743651399996, 78.19818117119999], [492.0786133115, 19.0435790848, 526.8928222740001, 91.23193359359999], [128.12176516620002, 76.9326171648, 209.6968994472, 107.4404296704], [300.47375491639997, 41.9191894528, 325.04638673339997, 80.69433594879999], [106.14709473580001, 49.809509273600014, 125.30920408679998, 74.15667722239999], [203.48730468280002, 15.791076659200002, 274.70251465179996, 109]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00047159.jpg", "text": "Offer a thorough description of the area within the illustration . Please mention the objects and their locations.", "boxes_value": [[36.9278564151, 257.5307006976, 682.8874511619999, 303.3001709056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047159_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Please mention the objects and their locations.", "boxes_value": [[36.9278564151, 11.530700697600025, 682.8874511619999, 57.3001709056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047159.jpg", "text": "Offer a thorough description of the area within the illustration . Please mention the objects and their locations. For your reference, objects involved in this region include four buses, and two cars.", "boxes_value": [[36.9278564151, 257.5307006976, 682.8874511619999, 303.3001709056], [0.38195798309999995, 255.4544067584, 107.7083740393, 298.5174560768], [36.9278564151, 257.5307006976, 110.91442869859998, 283.0469360128], [152.784301749, 265.3717041152, 184.5441284279, 294.5761718784], [174.7551880179, 264.7633056768, 284.5694580062, 296.2797851648], [653.1357422204, 288.3739013632, 682.8874511619999, 303.3001709056], [644.5632323917, 286.9619750912, 665.5406493995999, 301.3839721472]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00047159_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Please mention the objects and their locations. For your reference, objects involved in this region include four buses, and two cars.", "boxes_value": [[36.9278564151, 11.530700697600025, 682.8874511619999, 57.3001709056], [0.38195798309999995, 9.454406758400012, 107.7083740393, 52.51745607679999], [36.9278564151, 11.530700697600025, 110.91442869859998, 37.04693601280002], [152.784301749, 19.371704115199975, 184.5441284279, 48.576171878399975], [174.7551880179, 18.763305676799973, 284.5694580062, 50.27978516479999], [653.1357422204, 42.37390136319999, 682.8874511619999, 57.3001709056], [644.5632323917, 40.961975091199974, 665.5406493995999, 55.383972147199984]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00047161.jpg", "text": "Please describe the region in the picture . Provide the coordinates for each element you describe.", "boxes_value": [[0, 141.5232543744, 227.6381835632, 512.0915527168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047161_crop.jpg", "text": "Please describe the region in the picture . Provide the coordinates for each element you describe.", "boxes_value": [[0, 93.52325437440001, 227.6381835632, 464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047161.jpg", "text": "Please describe the region in the picture . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a cabinet, two people, a book, and a handbag.", "boxes_value": [[0, 141.5232543744, 227.6381835632, 512.0915527168], [0, 141.5232543744, 83.0317382607, 463.39709470720004], [62.5672607413, 274.5054321152, 168.2326660121, 495.0245361152], [135.5963134688, 208.3974609408, 342.0261230277, 511.7405395456], [40.9834594633, 291.5703124992, 65.0978393286, 326.9380493312], [87.6418457186, 433.9196777472, 227.6381835632, 512.0915527168]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047161_crop.jpg", "text": "Please describe the region in the picture . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a cabinet, two people, a book, and a handbag.", "boxes_value": [[0, 93.52325437440001, 227.6381835632, 464], [0, 93.52325437440001, 83.0317382607, 415.39709470720004], [62.5672607413, 226.5054321152, 168.2326660121, 447.0245361152], [135.5963134688, 160.3974609408, 284, 463.7405395456], [40.9834594633, 243.57031249919999, 65.0978393286, 278.9380493312], [87.6418457186, 385.9196777472, 227.6381835632, 464]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047162.jpg", "text": "In , what elements can be found within the coordinates ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[47.005981440599996, 224.165344256, 191.9954833914, 399.1527099392]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047162_crop.jpg", "text": "In , what elements can be found within the coordinates ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[37.005981440599996, 44.165344256, 181.9954833914, 219.1527099392]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047162.jpg", "text": "In , what elements can be found within the coordinates ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, three boots, and two motorcycles.", "boxes_value": [[47.005981440599996, 224.165344256, 191.9954833914, 399.1527099392], [45.570861784499996, 166.0388183552, 162.8524169766, 350.338439936], [45.3967285473, 297.3195190272, 69.3185424672, 352.7574462976], [119.0607300129, 303.7746582016, 138.80572508400002, 351.2385864192], [163.9895630178, 273.5443115008, 181.2061156971, 295.2699585024], [130.3332519294, 224.165344256, 191.9954833914, 313.6033325056], [47.005981440599996, 234.7201538048, 155.3314819524, 399.1527099392]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00047162_crop.jpg", "text": "In , what elements can be found within the coordinates ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, three boots, and two motorcycles.", "boxes_value": [[37.005981440599996, 44.165344256, 181.9954833914, 219.1527099392], [35.570861784499996, 0, 152.8524169766, 170.338439936], [35.3967285473, 117.31951902719999, 59.318542467200004, 172.75744629759998], [109.0607300129, 123.7746582016, 128.80572508400002, 171.23858641919998], [153.9895630178, 93.5443115008, 171.2061156971, 115.26995850240002], [120.3332519294, 44.165344256, 181.9954833914, 133.6033325056], [37.005981440599996, 54.72015380479999, 145.3314819524, 219.1527099392]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00047163.jpg", "text": "What insights can you provide about the area in the selected picture ? Include the coordinates for each mentioned object.", "boxes_value": [[140.563842752, 70.393249488, 249.80535891199997, 375.099304176]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047163_crop.jpg", "text": "What insights can you provide about the area in the selected picture ? Include the coordinates for each mentioned object.", "boxes_value": [[27.563842752, 70.393249488, 136.80535891199997, 375.099304176]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047163.jpg", "text": "What insights can you provide about the area in the selected picture ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a gun, a person, two leather shoes, and a hat.", "boxes_value": [[140.563842752, 70.393249488, 249.80535891199997, 375.099304176], [140.563842752, 218.94561768, 168.963317888, 344.194702128], [133.64764403200002, 71.367858864, 284.11401369600003, 377.896362288], [227.790100096, 336.205688496, 249.80535891199997, 375.099304176], [169.962646464, 331.79144284800003, 212.28582764799998, 363.33166502399996], [145.17468262399998, 70.393249488, 179.061767552, 102.527587872]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047163_crop.jpg", "text": "What insights can you provide about the area in the selected picture ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a gun, a person, two leather shoes, and a hat.", "boxes_value": [[27.563842752, 70.393249488, 136.80535891199997, 375.099304176], [27.563842752, 218.94561768, 55.963317888000006, 344.194702128], [20.647644032000017, 71.367858864, 164, 377.896362288], [114.790100096, 336.205688496, 136.80535891199997, 375.099304176], [56.96264646399999, 331.79144284800003, 99.28582764799998, 363.33166502399996], [32.174682623999985, 70.393249488, 66.06176755199999, 102.527587872]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047169.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Remember to mention the objects and their corresponding locations.", "boxes_value": [[260.3939209228, 61.8859252736, 391.8164062778, 228.9019775488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047169_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Remember to mention the objects and their corresponding locations.", "boxes_value": [[33.39392092280002, 41.8859252736, 164.8164062778, 208.9019775488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047169.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three pillows, a lamp, and a bed.", "boxes_value": [[260.3939209228, 61.8859252736, 391.8164062778, 228.9019775488], [340.70764157220003, 121.2085571072, 391.8164062778, 228.9019775488], [306.02673341940005, 101.1301269504, 353.4848632448, 217.0374145536], [260.3939209228, 109.3440551936, 315.153320286, 178.7058715648], [287.7735595378, 61.8859252736, 316.065917958, 98.3921508864], [26.754028289799997, 53.3480224768, 716.3939208748, 485.6749267456]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047169_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three pillows, a lamp, and a bed.", "boxes_value": [[33.39392092280002, 41.8859252736, 164.8164062778, 208.9019775488], [113.70764157220003, 101.2085571072, 164.8164062778, 208.9019775488], [79.02673341940005, 81.1301269504, 126.48486324480001, 197.0374145536], [33.39392092280002, 89.3440551936, 88.153320286, 158.7058715648], [60.7735595378, 41.8859252736, 89.065917958, 78.3921508864], [0, 33.3480224768, 197, 250]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047170.jpg", "text": "Tell me what you see in the area within the context of the image . Include the coordinates for each object you identify.", "boxes_value": [[185.7022705152, 462.99999996720004, 477.3001098752, 772.2094726272001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047170_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Include the coordinates for each object you identify.", "boxes_value": [[73.70227051520001, 77.99999996720004, 365.3001098752, 387]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047170.jpg", "text": "Tell me what you see in the area within the context of the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, two tripods, and three drums.", "boxes_value": [[185.7022705152, 462.99999996720004, 477.3001098752, 772.2094726272001], [133.8231811584, 229.3280029192, 379.899536128, 768.8519286728], [267.5, 462.99999996720004, 381.5, 772.0], [336.4324340736, 551.8300781404, 512.0383300608, 771.965576134], [185.7022705152, 518.791870154, 372.5464477696, 772.2094726272001], [347.3652954112, 510.7338867012, 477.3001098752, 772.2094726272001], [381.1080932864, 498.6468506112, 509.9884643328, 771.1148681864]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00047170_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, two tripods, and three drums.", "boxes_value": [[73.70227051520001, 77.99999996720004, 365.3001098752, 387], [21.82318115839999, 0, 267.899536128, 383.8519286728], [155.5, 77.99999996720004, 269.5, 387], [224.4324340736, 166.83007814040002, 400, 386.965576134], [73.70227051520001, 133.79187015399998, 260.5464477696, 387], [235.3652954112, 125.73388670119999, 365.3001098752, 387], [269.1080932864, 113.64685061120002, 397.9884643328, 386.11486818640003]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00047171.jpg", "text": "I am interested in the region of the image ; please describe it. Remember to mention the objects and their corresponding locations.", "boxes_value": [[260.09921289650003, 379.64917961099997, 381.83502197350003, 476.5614624004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047171_crop.jpg", "text": "I am interested in the region of the image ; please describe it. Remember to mention the objects and their corresponding locations.", "boxes_value": [[31.099212896500035, 24.649179610999965, 152.83502197350003, 121.56146240039999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047171.jpg", "text": "I am interested in the region of the image ; please describe it. Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, and three skiboards.", "boxes_value": [[260.09921289650003, 379.64917961099997, 381.83502197350003, 476.5614624004], [355.36584472149997, 407.7414550846, 381.83502197350003, 470.0459594636], [278.401489262, 418.32916259679996, 314.23669435, 476.5614624004], [268.221008298, 395.1176757892, 299.9840698205, 468.41705320020003], [260.09921289650003, 379.64917961099997, 309.0104992015, 425.1783025126], [271.806701665, 409.568317522, 349.33629373, 439.7476218174], [343.092299746, 394.9989982172, 420.8820582755, 431.9426293468]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00047171_crop.jpg", "text": "I am interested in the region of the image ; please describe it. Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, and three skiboards.", "boxes_value": [[31.099212896500035, 24.649179610999965, 152.83502197350003, 121.56146240039999], [126.36584472149997, 52.74145508459998, 152.83502197350003, 115.04595946360001], [49.401489261999984, 63.32916259679996, 85.23669435, 121.56146240039999], [39.221008298000015, 40.11767578920001, 70.9840698205, 113.41705320020003], [31.099212896500035, 24.649179610999965, 80.01049920150001, 70.1783025126], [42.80670166499999, 54.56831752199997, 120.33629373000002, 84.74762181739999], [114.09229974599998, 39.99899821719998, 183, 76.9426293468]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00047172.jpg", "text": "I would like a description of the content within the bbox in . Give coordinates for the items you reference.", "boxes_value": [[458.607299824, 424.7825317376, 551.712768578, 497.8992920064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047172_crop.jpg", "text": "I would like a description of the content within the bbox in . Give coordinates for the items you reference.", "boxes_value": [[23.607299823999995, 18.78253173759998, 116.71276857800001, 91.8992920064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047172.jpg", "text": "I would like a description of the content within the bbox in . Give coordinates for the items you reference. For your reference, objects involved in this region include two people, and three sneakers.", "boxes_value": [[458.607299824, 424.7825317376, 551.712768578, 497.8992920064], [395.010375951, 22.5388793856, 603.259033189, 498.7254028288], [279.611389172, 27.8445434368, 569.435180649, 498.7254028288], [458.607299824, 424.7825317376, 497.79577636899995, 490.0089721856], [490.69445802700005, 474.2283935744, 519.625610375, 497.110229504], [521.729614256, 475.8064574976, 551.712768578, 497.8992920064]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047172_crop.jpg", "text": "I would like a description of the content within the bbox in . Give coordinates for the items you reference. For your reference, objects involved in this region include two people, and three sneakers.", "boxes_value": [[23.607299823999995, 18.78253173759998, 116.71276857800001, 91.8992920064], [0, 0, 139, 92.72540282879999], [0, 0, 134.43518064900002, 92.72540282879999], [23.607299823999995, 18.78253173759998, 62.79577636899995, 84.00897218559999], [55.69445802700005, 68.2283935744, 84.62561037499995, 91.11022950400002], [86.72961425599999, 69.80645749759998, 116.71276857800001, 91.8992920064]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047173.jpg", "text": "Can you divulge the contents of the area within the given image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[196.0004882751, 98.3791503872, 615.1809081726001, 325.6787109376]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047173_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[105.0004882751, 57.3791503872, 524.1809081726001, 284.6787109376]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047173.jpg", "text": "Can you divulge the contents of the area within the given image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include five lamps, and an umbrella.", "boxes_value": [[196.0004882751, 98.3791503872, 615.1809081726001, 325.6787109376], [569.9904785394, 98.3791503872, 615.1809081726001, 140.8335571456], [528.0614013423, 113.654846208, 570.6269531487, 157.7909545984], [487.88366699039995, 123.202087424, 527.9822998263, 164.3909912064], [492.6885985887, 227.9566040064, 521.8962402414, 273.715087872], [360.7944335898, 255.0169677824, 385.1340332079, 283.7377319424], [196.0004882751, 262.3368530432, 275.2856445246, 325.6787109376]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047173_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include five lamps, and an umbrella.", "boxes_value": [[105.0004882751, 57.3791503872, 524.1809081726001, 284.6787109376], [478.9904785394, 57.3791503872, 524.1809081726001, 99.8335571456], [437.0614013423, 72.654846208, 479.62695314869995, 116.79095459839999], [396.88366699039995, 82.202087424, 436.98229982630005, 123.3909912064], [401.6885985887, 186.9566040064, 430.89624024140005, 232.71508787200003], [269.7944335898, 214.0169677824, 294.1340332079, 242.7377319424], [105.0004882751, 221.3368530432, 184.2856445246, 284.6787109376]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047174.jpg", "text": "Please enlighten me about the region in the given photo . Please point out the objects and their coordinates.", "boxes_value": [[38.2886352384, 52.8866769925, 312.3827078144, 299.84954835179997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047174_crop.jpg", "text": "Please enlighten me about the region in the given photo . Please point out the objects and their coordinates.", "boxes_value": [[38.2886352384, 52.8866769925, 312.3827078144, 299.84954835179997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047174.jpg", "text": "Please enlighten me about the region in the given photo . Please point out the objects and their coordinates. For your reference, objects involved in this region include a flower, a vase, a person, a glasses, and a street lights.", "boxes_value": [[38.2886352384, 52.8866769925, 312.3827078144, 299.84954835179997], [38.2886352384, 208.09899902930002, 135.8955688448, 275.44781494970005], [46.0971679744, 262.7589111605, 130.0391845888, 299.84954835179997], [62.898071296, 190.2979736455, 78.2532348416, 225.8067626978], [264.6150313984, 154.4750463642, 312.3827078144, 168.62843200530003], [151.856941824, 52.8866769925, 161.9745102336, 143.5400902067]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047174_crop.jpg", "text": "Please enlighten me about the region in the given photo . Please point out the objects and their coordinates. For your reference, objects involved in this region include a flower, a vase, a person, a glasses, and a street lights.", "boxes_value": [[38.2886352384, 52.8866769925, 312.3827078144, 299.84954835179997], [38.2886352384, 208.09899902930002, 135.8955688448, 275.44781494970005], [46.0971679744, 262.7589111605, 130.0391845888, 299.84954835179997], [62.898071296, 190.2979736455, 78.2532348416, 225.8067626978], [264.6150313984, 154.4750463642, 312.3827078144, 168.62843200530003], [151.856941824, 52.8866769925, 161.9745102336, 143.5400902067]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047175.jpg", "text": "Please help me understand the content present within the rectangle in . Provide the coordinates for each element you describe.", "boxes_value": [[241.003906272, 139.6026000896, 367.20373536000005, 266.289733888]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047175_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Provide the coordinates for each element you describe.", "boxes_value": [[32.003906271999995, 32.6026000896, 158.20373536000005, 159.289733888]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047175.jpg", "text": "Please help me understand the content present within the rectangle in . Provide the coordinates for each element you describe. For your reference, objects involved in this region include six people, and a gloves.", "boxes_value": [[241.003906272, 139.6026000896, 367.20373536000005, 266.289733888], [163.042602552, 161.5292358144, 313.60534668, 320.3753661952], [241.003906272, 154.220336896, 336.506469696, 266.289733888], [282.42083743200004, 139.6026000896, 367.20373536000005, 260.929870592], [141.700561488, 209.73620608, 697.508056656, 435.1622314496], [163.042602552, 161.5292358144, 313.60534668, 320.3753661952], [241.003906272, 154.220336896, 336.506469696, 266.289733888], [282.42083743200004, 139.6026000896, 367.20373536000005, 260.929870592]], "boxes_seq": [[0], [0], [1, 2, 3, 5, 6, 7], [4]]}, {"image_path": "objects365_v1_00047175_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Provide the coordinates for each element you describe. For your reference, objects involved in this region include six people, and a gloves.", "boxes_value": [[32.003906271999995, 32.6026000896, 158.20373536000005, 159.289733888], [0, 54.52923581440001, 104.60534668000003, 190], [32.003906271999995, 47.22033689599999, 127.50646969600001, 159.289733888], [73.42083743200004, 32.6026000896, 158.20373536000005, 153.929870592], [0, 102.73620607999999, 189, 190], [0, 54.52923581440001, 104.60534668000003, 190], [32.003906271999995, 47.22033689599999, 127.50646969600001, 159.289733888], [73.42083743200004, 32.6026000896, 158.20373536000005, 153.929870592]], "boxes_seq": [[0], [0], [1, 2, 3, 5, 6, 7], [4]]}, {"image_path": "objects365_v1_00047176.jpg", "text": "What does the selected region in the image encompass? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[326.8847656468, 186.0936279552, 701.0109862984, 288.3396606464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047176_crop.jpg", "text": "What does the selected region in the image encompass? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[93.88476564680002, 26.09362795519999, 468.0109862984, 128.33966064639998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047176.jpg", "text": "What does the selected region in the image encompass? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a mirror, two wine glasses, a tea pot, and a plate.", "boxes_value": [[326.8847656468, 186.0936279552, 701.0109862984, 288.3396606464], [326.8847656468, 186.0936279552, 367.223999018, 231.0874633728], [618.2744140786, 264.8455200256, 637.0472412082, 300.8001709056], [670.3541259927, 215.6817627136, 701.0109862984, 239.7263794176], [547.3259277665001, 200.2531127808, 588.0013427755999, 219.8895263744], [492.426757815, 257.886108416, 508.4879150428, 288.3396606464]], "boxes_seq": [[0], [0], [1], [2, 5], [3], [4]]}, {"image_path": "objects365_v1_00047176_crop.jpg", "text": "What does the selected region in the image encompass? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a mirror, two wine glasses, a tea pot, and a plate.", "boxes_value": [[93.88476564680002, 26.09362795519999, 468.0109862984, 128.33966064639998], [93.88476564680002, 26.09362795519999, 134.22399901799997, 71.08746337279999], [385.2744140786, 104.84552002560002, 404.04724120820003, 140.8001709056], [437.3541259927, 55.681762713599994, 468.0109862984, 79.72637941759999], [314.32592776650006, 40.253112780799995, 355.00134277559994, 59.88952637439999], [259.426757815, 97.88610841600001, 275.4879150428, 128.33966064639998]], "boxes_seq": [[0], [0], [1], [2, 5], [3], [4]]}, {"image_path": "objects365_v1_00047177.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Provide the coordinates for each element you describe.", "boxes_value": [[227.6108398631, 300.2685547008, 387.4552001903, 410.448242176]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047177_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Provide the coordinates for each element you describe.", "boxes_value": [[40.61083986310001, 28.268554700799996, 200.4552001903, 138.448242176]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047177.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, two cups, a bottle, and a desk.", "boxes_value": [[227.6108398631, 300.2685547008, 387.4552001903, 410.448242176], [139.7241821516, 170.6788329984, 295.4031372088, 414.7951660032], [265.0009155037, 144.3669433344, 464.9689941548, 363.6330566656], [347.8226318108, 300.2685547008, 387.4552001903, 370.4226684416], [263.5465697952, 324.4125366272, 324.1342163016, 400.0332031488], [227.6108398631, 355.0514526208, 259.315917945, 410.448242176], [80.1724243287, 348.3329467904, 498.98706054739995, 512.0181884928]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047177_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, two cups, a bottle, and a desk.", "boxes_value": [[40.61083986310001, 28.268554700799996, 200.4552001903, 138.448242176], [0, 0, 108.40313720879999, 142.7951660032], [78.00091550370001, 0, 240, 91.63305666560001], [160.82263181079998, 28.268554700799996, 200.4552001903, 98.42266844160002], [76.54656979520001, 52.412536627199984, 137.13421630160002, 128.0332031488], [40.61083986310001, 83.05145262079998, 72.31591794500002, 138.448242176], [0, 76.3329467904, 240, 165]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047178.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[10.6024169664, 285.0661010944, 595.9331054567999, 361.933959936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047178_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[10.6024169664, 20.066101094399983, 595.9331054567999, 96.93395993600001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047178.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three trucks, a car, and a machinery vehicle.", "boxes_value": [[10.6024169664, 285.0661010944, 595.9331054567999, 361.933959936], [484.6677246432, 285.0661010944, 595.9331054567999, 341.9111328256], [321.2694091908, 331.7073974784, 365.0979003768, 361.933959936], [90.95086672560001, 287.4342041088, 220.1017456152, 321.3596801536], [33.328552262399995, 300.0016479744, 72.794555676, 335.2549438464], [10.6024169664, 302.8840331776, 49.6249389936, 322.3952636928]], "boxes_seq": [[0], [0], [1, 3, 5], [2], [4]]}, {"image_path": "objects365_v1_00047178_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three trucks, a car, and a machinery vehicle.", "boxes_value": [[10.6024169664, 20.066101094399983, 595.9331054567999, 96.93395993600001], [484.6677246432, 20.066101094399983, 595.9331054567999, 76.91113282560002], [321.2694091908, 66.70739747840003, 365.0979003768, 96.93395993600001], [90.95086672560001, 22.434204108799975, 220.1017456152, 56.35968015359998], [33.328552262399995, 35.00164797439999, 72.794555676, 70.25494384640001], [10.6024169664, 37.884033177599974, 49.6249389936, 57.39526369279997]], "boxes_seq": [[0], [0], [1, 3, 5], [2], [4]]}, {"image_path": "objects365_v1_00047179.jpg", "text": "Describe the visual elements within the selected area of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[279.8942870784, 99.3427124224, 434.36022950399996, 215.5313110528]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047179_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[38.8942870784, 29.342712422399998, 193.36022950399996, 145.5313110528]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047179.jpg", "text": "Describe the visual elements within the selected area of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a cabinet, a book, a barrel, and two stuffed toys.", "boxes_value": [[279.8942870784, 99.3427124224, 434.36022950399996, 215.5313110528], [348.99597166079997, 99.3427124224, 434.36022950399996, 177.6833496064], [325.670898432, 196.2969360384, 355.20935055359996, 215.5313110528], [311.6462402304, 97.3501586944, 349.7689208832, 137.5523681792], [279.8942870784, 112.9957275136, 317.9310302976, 183.0435180544], [314.5416259584, 135.9683837952, 356.34436032, 178.9009399296]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047179_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a cabinet, a book, a barrel, and two stuffed toys.", "boxes_value": [[38.8942870784, 29.342712422399998, 193.36022950399996, 145.5313110528], [107.99597166079997, 29.342712422399998, 193.36022950399996, 107.68334960639999], [84.670898432, 126.29693603839999, 114.20935055359996, 145.5313110528], [70.6462402304, 27.350158694399994, 108.7689208832, 67.55236817919999], [38.8942870784, 42.9957275136, 76.93103029759999, 113.04351805440001], [73.5416259584, 65.9683837952, 115.34436032000002, 108.90093992960001]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047180.jpg", "text": "In the image , please describe the bounding box . Provide the coordinates for all objects that you mention.", "boxes_value": [[253.2377929728, 357.6628418207, 441.3052368384, 400.26354980060006]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047180_crop.jpg", "text": "In the image , please describe the bounding box . Provide the coordinates for all objects that you mention.", "boxes_value": [[47.23779297280001, 10.66284182070001, 235.3052368384, 53.26354980060006]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047180.jpg", "text": "In the image , please describe the bounding box . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include five people.", "boxes_value": [[253.2377929728, 357.6628418207, 441.3052368384, 400.26354980060006], [428.8060913152, 357.6628418207, 441.3052368384, 390.93969728440004], [415.1707153408, 359.610839849, 426.2088623104, 391.4267578346], [295.4484863488, 367.11572266720003, 307.9875488256, 399.02209471400005], [282.5369872896, 365.1293945423, 295.4484863488, 395.7941894504], [253.2377929728, 363.7636719086, 266.8941650432, 400.26354980060006]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047180_crop.jpg", "text": "In the image , please describe the bounding box . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include five people.", "boxes_value": [[47.23779297280001, 10.66284182070001, 235.3052368384, 53.26354980060006], [222.80609131519998, 10.66284182070001, 235.3052368384, 43.93969728440004], [209.17071534079997, 12.610839849000001, 220.20886231039998, 44.42675783459998], [89.44848634879997, 20.115722667200032, 101.98754882560002, 52.02209471400005], [76.5369872896, 18.12939454230002, 89.44848634879997, 48.79418945039998], [47.23779297280001, 16.763671908599974, 60.89416504320002, 53.26354980060006]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047181.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Specify the location of each mentioned object.", "boxes_value": [[23.8632812576, 264.3866576896, 547.8124999822, 367.4497680896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047181_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Specify the location of each mentioned object.", "boxes_value": [[23.8632812576, 26.386657689599986, 547.8124999822, 129.4497680896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047181.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Specify the location of each mentioned object. For your reference, objects involved in this region include two people, an umbrella, a blackboard, two suvs, and a car.", "boxes_value": [[23.8632812576, 264.3866576896, 547.8124999822, 367.4497680896], [299.4409179624, 264.3866576896, 323.7145996374, 319.5541381632], [523.9433594016, 273.4442748928, 547.8124999822, 286.1021117952], [196.41308597120002, 280.6538085888, 224.3041381924, 311.9110107648], [497.0031737904, 244.9204711936, 524.9223633072, 340.3585205248], [23.8632812576, 294.9093627904, 146.1696777214, 367.4497680896], [179.9093627648, 303.3442993152, 269.3195800852, 350.57989504], [464.15307618380007, 299.1588134912, 561.0936279338, 342.968505856]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5, 7], [6]]}, {"image_path": "objects365_v1_00047181_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Specify the location of each mentioned object. For your reference, objects involved in this region include two people, an umbrella, a blackboard, two suvs, and a car.", "boxes_value": [[23.8632812576, 26.386657689599986, 547.8124999822, 129.4497680896], [299.4409179624, 26.386657689599986, 323.7145996374, 81.55413816319998], [523.9433594016, 35.444274892800024, 547.8124999822, 48.10211179520002], [196.41308597120002, 42.65380858880002, 224.3041381924, 73.91101076479998], [497.0031737904, 6.920471193600008, 524.9223633072, 102.35852052479999], [23.8632812576, 56.909362790399996, 146.1696777214, 129.4497680896], [179.9093627648, 65.34429931519998, 269.3195800852, 112.57989504], [464.15307618380007, 61.15881349120002, 561.0936279338, 104.96850585599998]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5, 7], [6]]}, {"image_path": "objects365_v1_00047182.jpg", "text": "Within the input image , what can be found in the region defined by ? Please mention the objects and their locations.", "boxes_value": [[262.9436034816, 111.2695922688, 662.2834472448, 362.3065185792]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047182_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Please mention the objects and their locations.", "boxes_value": [[99.94360348160001, 63.2695922688, 499.28344724479996, 314.3065185792]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047182.jpg", "text": "Within the input image , what can be found in the region defined by ? Please mention the objects and their locations. For your reference, objects involved in this region include a soccer, three people, and a sneakers.", "boxes_value": [[262.9436034816, 111.2695922688, 662.2834472448, 362.3065185792], [337.07312010239997, 330.312194816, 363.699462912, 355.1925048832], [586.9150390272, 111.2695922688, 662.2834472448, 294.0889892352], [449.3668213248, 155.1539306496, 493.2005615616, 362.3065185792], [262.9436034816, 132.4305419776, 332.71240235519997, 356.7934570496], [298.7124082176, 343.7069228032, 331.3521520128, 357.0723351552]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047182_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Please mention the objects and their locations. For your reference, objects involved in this region include a soccer, three people, and a sneakers.", "boxes_value": [[99.94360348160001, 63.2695922688, 499.28344724479996, 314.3065185792], [174.07312010239997, 282.312194816, 200.699462912, 307.1925048832], [423.9150390272, 63.2695922688, 499.28344724479996, 246.08898923520002], [286.3668213248, 107.1539306496, 330.2005615616, 314.3065185792], [99.94360348160001, 84.4305419776, 169.71240235519997, 308.7934570496], [135.7124082176, 295.7069228032, 168.3521520128, 309.0723351552]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047183.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Provide the coordinates for all objects that you mention.", "boxes_value": [[498.765869112, 116.3738403328, 649.414916997, 190.1958618112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047183_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Provide the coordinates for all objects that you mention.", "boxes_value": [[37.76586911200002, 19.3738403328, 188.41491699699998, 93.19586181119999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047183.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a person, a hat, a head phone, a moniter, and a microphone.", "boxes_value": [[498.765869112, 116.3738403328, 649.414916997, 190.1958618112], [512.1425781255, 117.4973144576, 669.073974639, 290.024353024], [600.436636737, 122.1464321536, 668.964139326, 185.7791130624], [595.1807861684999, 116.3738403328, 649.414916997, 180.651367168], [523.3348388415, 125.3396606464, 577.7271728339999, 179.0994872832], [498.765869112, 170.1264648192, 587.1417236685, 190.1958618112]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047183_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a person, a hat, a head phone, a moniter, and a microphone.", "boxes_value": [[37.76586911200002, 19.3738403328, 188.41491699699998, 93.19586181119999], [51.142578125499995, 20.497314457599998, 208.07397463899997, 111], [139.43663673699996, 25.146432153600003, 207.964139326, 88.7791130624], [134.18078616849994, 19.3738403328, 188.41491699699998, 83.65136716800001], [62.33483884149996, 28.339660646400006, 116.72717283399993, 82.0994872832], [37.76586911200002, 73.12646481920001, 126.14172366850005, 93.19586181119999]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047184.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[28.7893676544, 307.6519164966, 103.3225097728, 470.3667602535]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047184_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[18.7893676544, 41.651916496599995, 93.3225097728, 204.3667602535]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047184.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, two sneakers, and a desk.", "boxes_value": [[28.7893676544, 307.6519164966, 103.3225097728, 470.3667602535], [59.2755737088, 307.6519164966, 103.3225097728, 356.5543823199], [28.7893676544, 313.2299194479, 76.5736694272, 470.3667602535], [34.5192260608, 447.1732177641, 49.0459594752, 470.1738891825], [47.0283813376, 433.049987799, 61.555114752, 455.243591304], [55.346740736, 349.1557617087, 149.0682983424, 430.818786639]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047184_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, two sneakers, and a desk.", "boxes_value": [[18.7893676544, 41.651916496599995, 93.3225097728, 204.3667602535], [49.2755737088, 41.651916496599995, 93.3225097728, 90.55438231990001], [18.7893676544, 47.22991944789999, 66.5736694272, 204.3667602535], [24.5192260608, 181.1732177641, 39.0459594752, 204.17388918249998], [37.0283813376, 167.049987799, 51.555114752, 189.243591304], [45.346740736, 83.15576170870003, 111, 164.818786639]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047185.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Please mention the objects and their locations.", "boxes_value": [[173.46209717760001, 272.33288576, 305.15881344, 450.3610229248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047185_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Please mention the objects and their locations.", "boxes_value": [[33.462097177600015, 45.33288576000001, 165.15881344000002, 223.3610229248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047185.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a desk, a person, a bracelet, a spoon, and a cup.", "boxes_value": [[173.46209717760001, 272.33288576, 305.15881344, 450.3610229248], [0, 330.6027831808, 489.76013184, 512.4036865024], [102.6147460608, 0.9046020608, 729.1149902592, 511.55133056], [216.5364989952, 272.33288576, 233.9340209664, 320.5969238528], [180.393493632, 238.2599487488, 228.91333009919998, 355.400695808], [173.46209717760001, 318.6642456064, 305.15881344, 450.3610229248]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047185_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a desk, a person, a bracelet, a spoon, and a cup.", "boxes_value": [[33.462097177600015, 45.33288576000001, 165.15881344000002, 223.3610229248], [0, 103.60278318079997, 198, 267], [0, 0, 198, 267], [76.53649899519999, 45.33288576000001, 93.9340209664, 93.59692385279999], [40.393493632, 11.259948748800014, 88.91333009919998, 128.40069580800002], [33.462097177600015, 91.66424560640002, 165.15881344000002, 223.3610229248]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047186.jpg", "text": "Can you generate a description for the selected region in the image ? Include the coordinates for each mentioned object.", "boxes_value": [[185.5982055424, 408.1441650176, 360.3168945152, 509.3154296832]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047186_crop.jpg", "text": "Can you generate a description for the selected region in the image ? Include the coordinates for each mentioned object.", "boxes_value": [[44.598205542399995, 26.144165017600017, 219.31689451519998, 127.31542968320002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047186.jpg", "text": "Can you generate a description for the selected region in the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a stool, a bench, a desk, two people, a boots, and a tripod.", "boxes_value": [[185.5982055424, 408.1441650176, 360.3168945152, 509.3154296832], [180.4847412224, 435.4156494336, 231.9660644352, 482.9177856512], [207.3803710976, 433.2583007744, 360.3168945152, 509.3154296832], [185.5982055424, 408.1441650176, 253.836242688, 461.4031372288], [174.9028320256, 368.7864379904, 227.9182739456, 480.3334350336], [330.9066162176, 381.022460928, 362.4270019584, 435.77209472], [192.626708992, 445.6642456064, 215.1383056896, 474.2367553536], [316.5195922944, 393.3161620992, 364.345581056, 457.243652352]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6], [7]]}, {"image_path": "objects365_v1_00047186_crop.jpg", "text": "Can you generate a description for the selected region in the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a stool, a bench, a desk, two people, a boots, and a tripod.", "boxes_value": [[44.598205542399995, 26.144165017600017, 219.31689451519998, 127.31542968320002], [39.484741222400004, 53.41564943359998, 90.9660644352, 100.9177856512], [66.3803710976, 51.25830077440003, 219.31689451519998, 127.31542968320002], [44.598205542399995, 26.144165017600017, 112.836242688, 79.4031372288], [33.90283202559999, 0, 86.91827394559999, 98.33343503359998], [189.9066162176, 0, 221.4270019584, 53.772094719999984], [51.626708992000005, 63.664245606400016, 74.1383056896, 92.23675535360002], [175.5195922944, 11.316162099200028, 223.34558105600001, 75.24365235200003]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6], [7]]}, {"image_path": "objects365_v1_00047187.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Provide the coordinates for each element you describe.", "boxes_value": [[215.1856689132, 366.4019775488, 379.005981466, 431.7817382912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047187_crop.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Provide the coordinates for each element you describe.", "boxes_value": [[41.185668913200004, 16.401977548800005, 205.00598146599998, 81.78173829119999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047187.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two forks, a knife, a desk, and two napkins.", "boxes_value": [[215.1856689132, 366.4019775488, 379.005981466, 431.7817382912], [307.9721679564, 389.0357055488, 366.4335937624, 439.6394653184], [322.74462889759997, 390.6072387584, 379.005981466, 431.7817382912], [262.0830078008, 393.750366208, 310.4865722652, 411.0373535232], [205.2833862612, 298.5889282048, 553.6588135108001, 510.4011841024], [342.13452150399996, 375.5286255104, 432.27807615399996, 424.86395264], [215.1856689132, 366.4019775488, 286.0283203232, 408.8374633984]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047187_crop.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two forks, a knife, a desk, and two napkins.", "boxes_value": [[41.185668913200004, 16.401977548800005, 205.00598146599998, 81.78173829119999], [133.97216795640003, 39.035705548800024, 192.4335937624, 89.63946531840003], [148.74462889759997, 40.6072387584, 205.00598146599998, 81.78173829119999], [88.08300780079998, 43.750366208, 136.48657226519998, 61.03735352320001], [31.283386261200008, 0, 245, 98], [168.13452150399996, 25.52862551039999, 245, 74.86395263999998], [41.185668913200004, 16.401977548800005, 112.02832032319998, 58.83746339840002]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047188.jpg", "text": "Offer a thorough description of the area within the illustration . Specify the location of each mentioned object.", "boxes_value": [[390.2172851544, 263.2922973696, 585.5278017974999, 512.053955072]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047188_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Specify the location of each mentioned object.", "boxes_value": [[49.217285154399974, 62.29229736960002, 244.52780179749993, 311]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047188.jpg", "text": "Offer a thorough description of the area within the illustration . Specify the location of each mentioned object. For your reference, objects involved in this region include a person, two sneakers, and two dogs.", "boxes_value": [[390.2172851544, 263.2922973696, 585.5278017974999, 512.053955072], [405.2468261751, 0, 698.5762939502999, 497.3989868032], [527.6617474224, 422.6395969536, 585.5278017974999, 460.8900396544], [545.3157978801, 445.197550336, 609.556926018, 496.6885308928], [390.2172851544, 263.2922973696, 549.7706298699, 512.053955072], [338.1501464526, 275.8146362368, 428.1749267667, 477.4702148608]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047188_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Specify the location of each mentioned object. For your reference, objects involved in this region include a person, two sneakers, and two dogs.", "boxes_value": [[49.217285154399974, 62.29229736960002, 244.52780179749993, 311], [64.24682617510001, 0, 293, 296.3989868032], [186.66174742240003, 221.6395969536, 244.52780179749993, 259.8900396544], [204.31579788010004, 244.197550336, 268.556926018, 295.6885308928], [49.217285154399974, 62.29229736960002, 208.77062986989995, 311], [0, 74.81463623680003, 87.17492676670003, 276.4702148608]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047190.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each mentioned object.", "boxes_value": [[133.565185578, 328.7731933696, 336.494873036, 512.1842040832]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047190_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each mentioned object.", "boxes_value": [[51.56518557800001, 46.773193369599994, 254.494873036, 230]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047190.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two chairs, a desk, a cabinet, and a computer box.", "boxes_value": [[133.565185578, 328.7731933696, 336.494873036, 512.1842040832], [232.0294189476, 264.9509887488, 352.346313495, 435.8839721472], [226.2210083186, 292.3334350336, 395.3059081938, 409.331298816], [129.5341796712, 273.346740736, 237.36370851459998, 472.8818359296], [133.565185578, 328.7731933696, 332.09252932600003, 512.1842040832], [273.2753906036, 358.9934692352, 336.494873036, 402.0833129984]], "boxes_seq": [[0], [0], [1, 4], [2], [3], [5]]}, {"image_path": "objects365_v1_00047190_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two chairs, a desk, a cabinet, and a computer box.", "boxes_value": [[51.56518557800001, 46.773193369599994, 254.494873036, 230], [150.0294189476, 0, 270.346313495, 153.88397214719998], [144.2210083186, 10.333435033599983, 305, 127.33129881600001], [47.53417967120001, 0, 155.36370851459998, 190.88183592960002], [51.56518557800001, 46.773193369599994, 250.09252932600003, 230], [191.2753906036, 76.9934692352, 254.494873036, 120.0833129984]], "boxes_seq": [[0], [0], [1, 4], [2], [3], [5]]}, {"image_path": "objects365_v1_00047191.jpg", "text": "Can you elaborate on the content of the bounding box in ? Provide the coordinates for all objects that you mention.", "boxes_value": [[230.828552262, 330.5233764864, 383.125000027, 374.286682112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047191_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Provide the coordinates for all objects that you mention.", "boxes_value": [[38.82855226199999, 11.523376486400025, 191.125000027, 55.286682111999994]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047191.jpg", "text": "Can you elaborate on the content of the bounding box in ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two people, and three sneakers.", "boxes_value": [[230.828552262, 330.5233764864, 383.125000027, 374.286682112], [223.44793701999998, 198.9884033024, 309.61706545, 374.863525376], [295.233764672, 196.044616704, 399.371948206, 387.658935552], [230.828552262, 361.032653824, 258.587036169, 374.286682112], [293.347656217, 330.5233764864, 309.602661137, 359.0320434688], [352.86584471599997, 346.2781371904, 383.125000027, 357.7816772608]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047191_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two people, and three sneakers.", "boxes_value": [[38.82855226199999, 11.523376486400025, 191.125000027, 55.286682111999994], [31.447937019999983, 0, 117.61706544999998, 55.863525375999984], [103.233764672, 0, 207.371948206, 66], [38.82855226199999, 42.03265382400002, 66.58703616899999, 55.286682111999994], [101.34765621700001, 11.523376486400025, 117.60266113699998, 40.0320434688], [160.86584471599997, 27.278137190400003, 191.125000027, 38.781677260799995]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047192.jpg", "text": "Can you divulge the contents of the area within the given image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[368.7049560576, 8.0082397696, 425.6539306752, 419.9588622848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047192_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[14.704956057599986, 8.0082397696, 71.6539306752, 419.9588622848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047192.jpg", "text": "Can you divulge the contents of the area within the given image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a person, two handbags, a luggage, two moniters, and a lamp.", "boxes_value": [[368.7049560576, 8.0082397696, 425.6539306752, 419.9588622848], [368.7049560576, 306.7531127808, 413.16394045439995, 419.9588622848], [404.5318579968, 363.9640701952, 420.9053955072, 404.9422249472], [356.48864747519997, 346.479004928, 392.0684814336, 422.3053588992], [358.640625024, 363.9142455808, 390.20336916480005, 387.8732299776], [393.94421383680003, 285.1638793728, 425.6539306752, 309.0164184576], [355.3123779072, 286.0057373184, 384.9643554816, 308.6422729728], [394.8648681984, 8.0082397696, 425.3684081664, 149.9879150592]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5, 6], [7]]}, {"image_path": "objects365_v1_00047192_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a person, two handbags, a luggage, two moniters, and a lamp.", "boxes_value": [[14.704956057599986, 8.0082397696, 71.6539306752, 419.9588622848], [14.704956057599986, 306.7531127808, 59.16394045439995, 419.9588622848], [50.5318579968, 363.9640701952, 66.90539550720001, 404.9422249472], [2.4886474751999685, 346.479004928, 38.06848143360003, 422.3053588992], [4.640625023999974, 363.9142455808, 36.20336916480005, 387.8732299776], [39.94421383680003, 285.1638793728, 71.6539306752, 309.0164184576], [1.3123779072000161, 286.0057373184, 30.964355481600023, 308.6422729728], [40.86486819840002, 8.0082397696, 71.3684081664, 149.9879150592]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5, 6], [7]]}, {"image_path": "objects365_v1_00047193.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Give coordinates for the items you reference.", "boxes_value": [[562.568359353, 28.1425781248, 662.196044895, 296.596374528]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047193_crop.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Give coordinates for the items you reference.", "boxes_value": [[25.568359353000005, 28.1425781248, 125.196044895, 296.596374528]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047193.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Give coordinates for the items you reference. For your reference, objects involved in this region include a desk, three people, and a street lights.", "boxes_value": [[562.568359353, 28.1425781248, 662.196044895, 296.596374528], [562.568359353, 192.6995239424, 582.8244629129999, 225.3574829056], [593.2210693640001, 28.1425781248, 616.154296912, 68.4494628864], [569.103637718, 176.5388793856, 595.361694312, 231.0242309632], [598.643920875, 188.3549804544, 624.901977546, 246.122619648], [633.517333988, 155.7330932736, 662.196044895, 296.596374528]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047193_crop.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Give coordinates for the items you reference. For your reference, objects involved in this region include a desk, three people, and a street lights.", "boxes_value": [[25.568359353000005, 28.1425781248, 125.196044895, 296.596374528], [25.568359353000005, 192.6995239424, 45.824462912999934, 225.3574829056], [56.22106936400007, 28.1425781248, 79.15429691199995, 68.4494628864], [32.10363771799996, 176.5388793856, 58.361694312000054, 231.0242309632], [61.64392087500005, 188.3549804544, 87.90197754600001, 246.122619648], [96.51733398800002, 155.7330932736, 125.196044895, 296.596374528]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047194.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Include the coordinates for each object you identify.", "boxes_value": [[299.0928811008, 165.7872314368, 651.6998282495999, 243.3044126208]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047194_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Include the coordinates for each object you identify.", "boxes_value": [[89.09288110080001, 19.7872314368, 441.69982824959993, 97.30441262080001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047194.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a paddle, three helmets, and a boat.", "boxes_value": [[299.0928811008, 165.7872314368, 651.6998282495999, 243.3044126208], [389.8084717056, 165.7872314368, 562.4169921792, 230.9707641856], [299.0928811008, 196.70473728, 350.2783596288, 238.9195649536], [442.4097794304, 179.0343072256, 497.8908685056, 231.2194899968], [581.9364786432, 191.1192298496, 651.6998282495999, 243.3044126208], [0.21234132479999998, 181.0244750848, 766.4522704896, 389.8923339776]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047194_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a paddle, three helmets, and a boat.", "boxes_value": [[89.09288110080001, 19.7872314368, 441.69982824959993, 97.30441262080001], [179.80847170560003, 19.7872314368, 352.41699217919995, 84.97076418559999], [89.09288110080001, 50.70473727999999, 140.2783596288, 92.9195649536], [232.40977943040002, 33.0343072256, 287.8908685056, 85.21948999680001], [371.93647864319996, 45.1192298496, 441.69982824959993, 97.30441262080001], [0, 35.0244750848, 529, 116]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047195.jpg", "text": "What can be observed in the rectangular region in the photograph ? Please point out the objects and their coordinates.", "boxes_value": [[203.2098388429, 172.4248657408, 445.2823486583, 314.719299328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047195_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Please point out the objects and their coordinates.", "boxes_value": [[61.20983884290001, 36.42486574079999, 303.2823486583, 178.71929932799998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047195.jpg", "text": "What can be observed in the rectangular region in the photograph ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two hats, two ties, and a bottle.", "boxes_value": [[203.2098388429, 172.4248657408, 445.2823486583, 314.719299328], [203.2098388429, 261.9585571328, 252.9508056515, 305.0673828352], [386.9196777166, 241.3989868032, 410.79528809, 305.7305908224], [402.1735839569, 172.4248657408, 445.2823486583, 213.5440673792], [266.9663085858, 225.8876953088, 291.3604736639, 314.719299328], [281.7778320379, 239.313354496, 292.0044555814, 279.302124032]], "boxes_seq": [[0], [0], [1, 3], [2, 4], [5]]}, {"image_path": "objects365_v1_00047195_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two hats, two ties, and a bottle.", "boxes_value": [[61.20983884290001, 36.42486574079999, 303.2823486583, 178.71929932799998], [61.20983884290001, 125.95855713280002, 110.95080565149999, 169.06738283520002], [244.9196777166, 105.39898680319999, 268.79528809, 169.73059082240002], [260.1735839569, 36.42486574079999, 303.2823486583, 77.54406737919999], [124.9663085858, 89.8876953088, 149.3604736639, 178.71929932799998], [139.7778320379, 103.31335449599999, 150.0044555814, 143.302124032]], "boxes_seq": [[0], [0], [1, 3], [2, 4], [5]]}, {"image_path": "objects365_v1_00047198.jpg", "text": "Please provide information about the area within the bounding box in the picture . Provide the coordinates for each element you describe.", "boxes_value": [[237.9773559808, 213.6458740112, 512.8001709056, 681.0947265586]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047198_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Provide the coordinates for each element you describe.", "boxes_value": [[68.97735598080001, 117.6458740112, 343, 585.0947265586]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047198.jpg", "text": "Please provide information about the area within the bounding box in the picture . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two chairs, a storage box, and two cabinets.", "boxes_value": [[237.9773559808, 213.6458740112, 512.8001709056, 681.0947265586], [237.9773559808, 454.9768066731, 490.4240722432, 681.0947265586], [303.0250244096, 506.8601074124, 512.8001709056, 680.4949951183], [372.2612304896, 340.5568847774, 440.8967895552, 401.8221435749], [406.9098510848, 213.6458740112, 429.9366455296, 334.1978759768], [429.9366455296, 209.58233641540002, 510.7561645568, 435.3352050949]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047198_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two chairs, a storage box, and two cabinets.", "boxes_value": [[68.97735598080001, 117.6458740112, 343, 585.0947265586], [68.97735598080001, 358.9768066731, 321.4240722432, 585.0947265586], [134.0250244096, 410.8601074124, 343, 584.4949951183], [203.2612304896, 244.5568847774, 271.8967895552, 305.8221435749], [237.90985108479998, 117.6458740112, 260.9366455296, 238.1978759768], [260.9366455296, 113.58233641540002, 341.7561645568, 339.3352050949]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047204.jpg", "text": "Offer a thorough description of the area within the illustration . Include the coordinates for each object you identify.", "boxes_value": [[229.6315917824, 219.057311988, 475.0245361152, 368.1241454704]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047204_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Include the coordinates for each object you identify.", "boxes_value": [[61.631591782399994, 38.05731198800001, 307.0245361152, 187.12414547039998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047204.jpg", "text": "Offer a thorough description of the area within the illustration . Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, two bowls, three cups, two bottles, and a plate.", "boxes_value": [[229.6315917824, 219.057311988, 475.0245361152, 368.1241454704], [0, 184.44073489439998, 477.5952758784, 452.110961944], [187.4192504832, 272.6474609408, 262.5686034944, 316.99792479039996], [239.0375976448, 297.9025879, 300.1433105408, 367.5081787088], [287.2077636608, 313.3020019232, 346.3417358336, 374.4938964624], [388.8442382848, 219.057311988, 454.7539673088, 255.4000244128], [385.7643432448, 287.4309082024, 462.9907836928, 368.1241454704], [290.1604614144, 249.9309081776, 335.4160766464, 317.36169434560003], [229.6315917824, 219.46850585200002, 265.016174336, 289.3419189376], [393.4802856448, 261.44677735519997, 475.0245361152, 290.9858398624]], "boxes_seq": [[0], [0], [1], [2, 5], [3, 4, 6], [7, 8], [9]]}, {"image_path": "objects365_v1_00047204_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, two bowls, three cups, two bottles, and a plate.", "boxes_value": [[61.631591782399994, 38.05731198800001, 307.0245361152, 187.12414547039998], [0, 3.4407348943999807, 309.5952758784, 224], [19.41925048319999, 91.64746094079999, 94.56860349440001, 135.99792479039996], [71.0375976448, 116.90258790000001, 132.14331054079997, 186.50817870880002], [119.20776366080003, 132.3020019232, 178.3417358336, 193.49389646240002], [220.84423828479999, 38.05731198800001, 286.7539673088, 74.40002441280001], [217.7643432448, 106.43090820240002, 294.9907836928, 187.12414547039998], [122.1604614144, 68.9309081776, 167.41607664639997, 136.36169434560003], [61.631591782399994, 38.46850585200002, 97.016174336, 108.34191893759998], [225.4802856448, 80.44677735519997, 307.0245361152, 109.98583986239998]], "boxes_seq": [[0], [0], [1], [2, 5], [3, 4, 6], [7, 8], [9]]}, {"image_path": "objects365_v1_00047205.jpg", "text": "Please provide insights on the specified area within the graphic . Please mention the objects and their locations.", "boxes_value": [[99.1393432624, 254.9547119104, 290.552917466, 434.6002197504]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047205_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Please mention the objects and their locations.", "boxes_value": [[48.139343262400004, 44.95471191039999, 239.552917466, 224.60021975040002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047205.jpg", "text": "Please provide insights on the specified area within the graphic . Please mention the objects and their locations. For your reference, objects involved in this region include a lamp, two flags, a hat, and a handbag.", "boxes_value": [[99.1393432624, 254.9547119104, 290.552917466, 434.6002197504], [99.1393432624, 254.9547119104, 115.6879883096, 277.2317505024], [200.2800292892, 26.978271488, 631.5456543116, 501.2877197312], [163.02331540039998, 299.4641723392, 202.7597045992, 320.9432983552], [257.97167970559997, 388.283752448, 290.552917466, 434.6002197504], [200.2800292892, 26.978271488, 631.5456543116, 501.2877197312]], "boxes_seq": [[0], [0], [1], [2, 5], [3], [4]]}, {"image_path": "objects365_v1_00047205_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Please mention the objects and their locations. For your reference, objects involved in this region include a lamp, two flags, a hat, and a handbag.", "boxes_value": [[48.139343262400004, 44.95471191039999, 239.552917466, 224.60021975040002], [48.139343262400004, 44.95471191039999, 64.6879883096, 67.23175050240002], [149.2800292892, 0, 287, 269], [112.02331540039998, 89.46417233919999, 151.7597045992, 110.9432983552], [206.97167970559997, 178.28375244799997, 239.552917466, 224.60021975040002], [149.2800292892, 0, 287, 269]], "boxes_seq": [[0], [0], [1], [2, 5], [3], [4]]}, {"image_path": "objects365_v1_00047209.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Please point out the objects and their coordinates.", "boxes_value": [[134.9279174584, 282.23272704, 352.8612060314, 446.7580566528]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047209_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Please point out the objects and their coordinates.", "boxes_value": [[54.9279174584, 41.232727039999986, 272.8612060314, 205.7580566528]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047209.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include a chair, a stool, a cabinet, three people, and a bottle.", "boxes_value": [[134.9279174584, 282.23272704, 352.8612060314, 446.7580566528], [143.209838882, 367.055847168, 215.11505124340002, 446.7580566528], [292.4552612102, 418.66229248, 352.8612060314, 446.7580566528], [134.9279174584, 282.23272704, 194.1539306696, 397.5369262592], [270.4812621984, 304.0681152512, 357.2181396388, 443.0500488192], [330.733337385, 338.2598266368, 372.84399412700003, 439.9962768384], [226.3881836106, 301.7390136832, 252.474487331, 356.1475219968], [263.1076660212, 342.7753295872, 275.9045409978, 378.0946655232]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5, 6], [7]]}, {"image_path": "objects365_v1_00047209_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include a chair, a stool, a cabinet, three people, and a bottle.", "boxes_value": [[54.9279174584, 41.232727039999986, 272.8612060314, 205.7580566528], [63.209838882000014, 126.05584716800001, 135.11505124340002, 205.7580566528], [212.45526121019998, 177.66229248000002, 272.8612060314, 205.7580566528], [54.9279174584, 41.232727039999986, 114.15393066959999, 156.53692625920002], [190.48126219839997, 63.06811525120003, 277.2181396388, 202.05004881920001], [250.73333738500003, 97.25982663680003, 292.84399412700003, 198.99627683839998], [146.3881836106, 60.73901368320003, 172.474487331, 115.14752199679998], [183.10766602119998, 101.77532958720002, 195.90454099779998, 137.09466552319998]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5, 6], [7]]}, {"image_path": "objects365_v1_00047210.jpg", "text": "Could you give me a description of the rectangular region found in ? Please mention the objects and their locations.", "boxes_value": [[124.4840698476, 181.5917358592, 258.8466186732, 295.1282958848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047210_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Please mention the objects and their locations.", "boxes_value": [[34.4840698476, 28.591735859200014, 168.8466186732, 142.12829588480002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047210.jpg", "text": "Could you give me a description of the rectangular region found in ? Please mention the objects and their locations. For your reference, objects involved in this region include a lamp, a desk, three pillows, and two couches.", "boxes_value": [[124.4840698476, 181.5917358592, 258.8466186732, 295.1282958848], [181.42810061330002, 181.5917358592, 209.38012695729998, 228.632995584], [186.2003784301, 228.632995584, 233.9234618991, 263.4026489344], [226.2176513406, 276.2378540032, 258.8466186732, 295.1282958848], [94.67639158309998, 255.1741943296, 238.19476317730002, 325.8293456896], [124.4840698476, 209.9107055616, 163.67559814860002, 250.2062377984], [106.2683105438, 214.3266601472, 152.08374024809999, 251.8621826048], [0.28546142090000004, 206.0467529216, 197.3472289783, 312.0295410176]], "boxes_seq": [[0], [0], [1], [2], [3, 5, 6], [4, 7]]}, {"image_path": "objects365_v1_00047210_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Please mention the objects and their locations. For your reference, objects involved in this region include a lamp, a desk, three pillows, and two couches.", "boxes_value": [[34.4840698476, 28.591735859200014, 168.8466186732, 142.12829588480002], [91.42810061330002, 28.591735859200014, 119.38012695729998, 75.63299558400001], [96.20037843009999, 75.63299558400001, 143.9234618991, 110.40264893440002], [136.2176513406, 123.23785400320003, 168.8466186732, 142.12829588480002], [4.676391583099985, 102.1741943296, 148.19476317730002, 170], [34.4840698476, 56.9107055616, 73.67559814860002, 97.2062377984], [16.2683105438, 61.32666014719999, 62.083740248099986, 98.86218260480001], [0, 53.04675292159999, 107.3472289783, 159.02954101760002]], "boxes_seq": [[0], [0], [1], [2], [3, 5, 6], [4, 7]]}, {"image_path": "objects365_v1_00047213.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Please point out the objects and their coordinates.", "boxes_value": [[0, 308.8178711268, 138.3514404352, 486.1512450984]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047213_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Please point out the objects and their coordinates.", "boxes_value": [[0, 44.817871126800014, 138.3514404352, 222.1512450984]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047213.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Please point out the objects and their coordinates. For your reference, objects involved in this region include a pillow, a bed, a person, a bracelet, and a hat.", "boxes_value": [[0, 308.8178711268, 138.3514404352, 486.1512450984], [0, 308.8178711268, 138.3514404352, 486.1512450984], [0.207336448, 205.329406744, 512.7237549056, 685.0874023322], [0.2213745152, 172.1384887376, 334.4730834944, 686.1003417687999], [116.42840576, 435.279296893, 136.18371584, 472.88574218760004], [83.394714368, 374.6842040704, 145.1358032384, 431.1195068376]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047213_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Please point out the objects and their coordinates. For your reference, objects involved in this region include a pillow, a bed, a person, a bracelet, and a hat.", "boxes_value": [[0, 44.817871126800014, 138.3514404352, 222.1512450984], [0, 44.817871126800014, 138.3514404352, 222.1512450984], [0.207336448, 0, 172, 266], [0.2213745152, 0, 172, 266], [116.42840576, 171.27929689299998, 136.18371584, 208.88574218760004], [83.394714368, 110.68420407040003, 145.1358032384, 167.1195068376]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047214.jpg", "text": "Could you tell me more about the area in the snapshot ? Please mention the objects and their locations.", "boxes_value": [[170.7124634128, 305.0673828352, 644.90930173, 489.4404296704]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047214_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Please mention the objects and their locations.", "boxes_value": [[118.71246341279999, 47.06738283520002, 592.90930173, 231.44042967040002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047214.jpg", "text": "Could you tell me more about the area in the snapshot ? Please mention the objects and their locations. For your reference, objects involved in this region include a guitar, two cymbals, and two people.", "boxes_value": [[170.7124634128, 305.0673828352, 644.90930173, 489.4404296704], [170.7124634128, 305.0673828352, 644.90930173, 489.4404296704], [419.1125487979, 297.2306518528, 579.9077148594, 326.4661254656], [487.81591794310003, 399.0675659264, 605.2451172122, 417.0961303552], [136.7430419877, 166.8292236288, 390.3087157938, 511.8273925632001], [379.6027832037, 276.8479614464, 496.9920654257, 511.9953613312]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047214_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Please mention the objects and their locations. For your reference, objects involved in this region include a guitar, two cymbals, and two people.", "boxes_value": [[118.71246341279999, 47.06738283520002, 592.90930173, 231.44042967040002], [118.71246341279999, 47.06738283520002, 592.90930173, 231.44042967040002], [367.1125487979, 39.23065185280001, 527.9077148594, 68.46612546559999], [435.81591794310003, 141.0675659264, 553.2451172122, 159.0961303552], [84.7430419877, 0, 338.3087157938, 253.82739256320008], [327.6027832037, 18.847961446400006, 444.9920654257, 253.9953613312]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047215.jpg", "text": "Describe what can be found within the bounds of in the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[309.09472656680003, 257.5006713856, 473.29052734259994, 364.456848128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047215_crop.jpg", "text": "Describe what can be found within the bounds of in the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[41.094726566800034, 27.500671385600015, 205.29052734259994, 134.456848128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047215.jpg", "text": "Describe what can be found within the bounds of in the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two pillows, a lamp, a nightstand, a bed, and a telephone.", "boxes_value": [[309.09472656680003, 257.5006713856, 473.29052734259994, 364.456848128], [309.09472656680003, 259.6311035392, 400.925537127, 364.456848128], [271.8426513348, 286.487304704, 393.12854001860006, 372.2537841664], [398.18518062960004, 257.5006713856, 452.92297363640006, 335.7884521472], [388.6378173734, 328.787109376, 473.29052734259994, 361.2479247872], [117.21087646000001, 212.7406616064, 698.8997802554, 510.348937984], [385.65588377920005, 325.5152587776, 416.751098659, 341.8239746048]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047215_crop.jpg", "text": "Describe what can be found within the bounds of in the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two pillows, a lamp, a nightstand, a bed, and a telephone.", "boxes_value": [[41.094726566800034, 27.500671385600015, 205.29052734259994, 134.456848128], [41.094726566800034, 29.631103539200012, 132.92553712699998, 134.456848128], [3.8426513347999958, 56.487304703999996, 125.12854001860006, 142.2537841664], [130.18518062960004, 27.500671385600015, 184.92297363640006, 105.78845214720002], [120.63781737340003, 98.78710937599999, 205.29052734259994, 131.24792478720002], [0, 0, 246, 161], [117.65588377920005, 95.51525877760002, 148.75109865899998, 111.82397460480001]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047216.jpg", "text": "In the provided image , would you mind describing the selected area ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[0, 0, 640.6820068228, 132.0196558336]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047216_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[0, 0, 640.6820068228, 132.0196558336]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047216.jpg", "text": "In the provided image , would you mind describing the selected area ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four people, and two helmets.", "boxes_value": [[0, 0, 640.6820068228, 132.0196558336], [139.2781982646, 0, 284.1350708092, 94.8593750016], [0, 0, 98.1295165854, 96.2566528512], [346.5126952839, 0, 466.0026855572, 95.393981952], [535.7702636527, 0, 640.6820068228, 93.5717163008], [88.46624755469999, 80.8194313728, 142.2271553657, 132.0196558336], [455.19877260280003, 36.3491374592, 516.1061672352, 103.1130123264]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00047216_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four people, and two helmets.", "boxes_value": [[0, 0, 640.6820068228, 132.0196558336], [139.2781982646, 0, 284.1350708092, 94.8593750016], [0, 0, 98.1295165854, 96.2566528512], [346.5126952839, 0, 466.0026855572, 95.393981952], [535.7702636527, 0, 640.6820068228, 93.5717163008], [88.46624755469999, 80.8194313728, 142.2271553657, 132.0196558336], [455.19877260280003, 36.3491374592, 516.1061672352, 103.1130123264]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00047217.jpg", "text": "Help me understand the details within the area in photograph . Provide the coordinates for each element you describe.", "boxes_value": [[272.39886476839996, 294.744384768, 646.0711669632001, 376.0057373184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047217_crop.jpg", "text": "Help me understand the details within the area in photograph . Provide the coordinates for each element you describe.", "boxes_value": [[94.39886476839996, 20.744384767999975, 468.0711669632001, 102.00573731840001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047217.jpg", "text": "Help me understand the details within the area in photograph . Provide the coordinates for each element you describe. For your reference, objects involved in this region include three people, and four cars.", "boxes_value": [[272.39886476839996, 294.744384768, 646.0711669632001, 376.0057373184], [564.7850341604, 298.2892455936, 592.5441894321, 414.4552002048], [455.1528320285, 289.8134765568, 467.2050780998, 316.5976562688], [378.1536865551, 295.4672851456, 394.576660127, 325.6207885824], [495.27050778309996, 302.1373291008, 668.6734619392, 385.0733032448], [335.76647952589997, 297.146240256, 491.4891357234, 376.0057373184], [272.39886476839996, 295.9337158144, 401.4749755628, 366.3389282304], [586.5599365162001, 294.744384768, 646.0711669632001, 329.062561024]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6, 7]]}, {"image_path": "objects365_v1_00047217_crop.jpg", "text": "Help me understand the details within the area in photograph . Provide the coordinates for each element you describe. For your reference, objects involved in this region include three people, and four cars.", "boxes_value": [[94.39886476839996, 20.744384767999975, 468.0711669632001, 102.00573731840001], [386.78503416039996, 24.289245593600015, 414.5441894321, 122], [277.1528320285, 15.813476556800026, 289.2050780998, 42.597656268799994], [200.15368655510002, 21.467285145599988, 216.57666012700003, 51.620788582399996], [317.27050778309996, 28.137329100800002, 490.6734619392, 111.0733032448], [157.76647952589997, 23.146240256, 313.4891357234, 102.00573731840001], [94.39886476839996, 21.93371581439999, 223.4749755628, 92.33892823039997], [408.5599365162001, 20.744384767999975, 468.0711669632001, 55.06256102399999]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6, 7]]}, {"image_path": "objects365_v1_00047218.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for each element you describe.", "boxes_value": [[40.591186541, 420.1726074368, 563.1179199503999, 455.7823486464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047218_crop.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for each element you describe.", "boxes_value": [[40.591186541, 9.172607436799979, 563.1179199503999, 44.78234864640001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047218.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include five hats.", "boxes_value": [[40.591186541, 420.1726074368, 563.1179199503999, 455.7823486464], [294.0919799915, 429.869201664, 310.1110839841, 448.7151489024], [406.6965331701, 427.513427712, 429.31164552900003, 446.3593750016], [539.5604248181, 433.167236352, 563.1179199503999, 455.7823486464], [232.3291625923, 426.5952758784, 255.7404174528, 451.1845092864], [40.591186541, 420.1726074368, 58.849487319400005, 443.3544311296]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047218_crop.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include five hats.", "boxes_value": [[40.591186541, 9.172607436799979, 563.1179199503999, 44.78234864640001], [294.0919799915, 18.869201664000002, 310.1110839841, 37.71514890240002], [406.6965331701, 16.51342771200001, 429.31164552900003, 35.35937500159997], [539.5604248181, 22.167236351999975, 563.1179199503999, 44.78234864640001], [232.3291625923, 15.595275878400003, 255.7404174528, 40.18450928639999], [40.591186541, 9.172607436799979, 58.849487319400005, 32.354431129600016]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047219.jpg", "text": "In the provided image , would you mind describing the selected area ? Include the coordinates for each object you identify.", "boxes_value": [[145.0181274156, 115.0481567232, 603.9019775276, 254.81030272]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047219_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Include the coordinates for each object you identify.", "boxes_value": [[115.01812741559999, 35.048156723199995, 573.9019775276, 174.81030272]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047219.jpg", "text": "In the provided image , would you mind describing the selected area ? Include the coordinates for each object you identify. For your reference, objects involved in this region include five chairs, and a desk.", "boxes_value": [[145.0181274156, 115.0481567232, 603.9019775276, 254.81030272], [145.0181274156, 115.0481567232, 204.726623566, 229.4893798912], [233.0881347932, 118.0336303616, 291.3038330208, 197.147277824], [331.55834962119997, 116.262329088, 388.95153805160004, 197.4171142656], [415.637573264, 122.1113281024, 475.9553222632, 198.5137939456], [537.3696289139999, 126.8636474368, 603.9019775276, 254.81030272], [48.8728637536, 192.8281860096, 639.1795654548, 510.6091918848]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047219_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Include the coordinates for each object you identify. For your reference, objects involved in this region include five chairs, and a desk.", "boxes_value": [[115.01812741559999, 35.048156723199995, 573.9019775276, 174.81030272], [115.01812741559999, 35.048156723199995, 174.726623566, 149.4893798912], [203.0881347932, 38.033630361600004, 261.3038330208, 117.14727782400001], [301.55834962119997, 36.262329088, 358.95153805160004, 117.41711426559999], [385.637573264, 42.111328102399995, 445.9553222632, 118.51379394560001], [507.3696289139999, 46.863647436799994, 573.9019775276, 174.81030272], [18.8728637536, 112.82818600959999, 609.1795654548, 209]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047221.jpg", "text": "Could you describe the content of the bbox in the image ? Include the coordinates for each object you identify.", "boxes_value": [[581.6773681971, 248.8034057728, 745.0334472543001, 289.8870239232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047221_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Include the coordinates for each object you identify.", "boxes_value": [[41.67736819710001, 10.803405772800005, 205.03344725430009, 51.88702392319999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047221.jpg", "text": "Could you describe the content of the bbox in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include six potted plants.", "boxes_value": [[581.6773681971, 248.8034057728, 745.0334472543001, 289.8870239232], [719.2747802294, 248.8034057728, 745.0334472543001, 288.2567138816], [694.8201903913, 256.6288452096, 718.2965088009, 289.8870239232], [654.7148437482, 251.4118652416, 686.6687011366, 285.9743042048], [629.9342040949, 257.6070556672, 652.1063232437, 285.9743042048], [609.0664062809001, 261.8458251776, 629.9342040949, 285.6481933824], [581.6773681971, 257.9331054592, 609.3924560961, 284.6700439552]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047221_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include six potted plants.", "boxes_value": [[41.67736819710001, 10.803405772800005, 205.03344725430009, 51.88702392319999], [179.2747802294, 10.803405772800005, 205.03344725430009, 50.25671388159998], [154.8201903913, 18.6288452096, 178.29650880090003, 51.88702392319999], [114.7148437482, 13.411865241599997, 146.66870113660002, 47.97430420479998], [89.9342040949, 19.607055667199973, 112.10632324369999, 47.97430420479998], [69.06640628090008, 23.845825177599977, 89.9342040949, 47.648193382399995], [41.67736819710001, 19.93310545920002, 69.39245609609998, 46.670043955200015]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047223.jpg", "text": "Please describe the area in the image for me. Specify the location of each mentioned object.", "boxes_value": [[245.58428954939998, 0.3897094656, 357.8894043268, 447.118408192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047223_crop.jpg", "text": "Please describe the area in the image for me. Specify the location of each mentioned object.", "boxes_value": [[28.584289549399983, 0.3897094656, 140.8894043268, 447.118408192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047223.jpg", "text": "Please describe the area in the image for me. Specify the location of each mentioned object. For your reference, objects involved in this region include a flower, two flags, a car, and a street lights.", "boxes_value": [[245.58428954939998, 0.3897094656, 357.8894043268, 447.118408192], [269.486511242, 330.6965942272, 328.0432739136, 370.7951660032], [245.58428954939998, 0.3897094656, 357.8894043268, 95.8184204288], [327.1776733172, 214.1549682688, 340.1458740233, 237.7637328896], [256.9154053016, 414.886840832, 386.14379880049995, 467.4611206144], [267.2960815441, 240.0769653248, 323.7173461588, 447.118408192]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047223_crop.jpg", "text": "Please describe the area in the image for me. Specify the location of each mentioned object. For your reference, objects involved in this region include a flower, two flags, a car, and a street lights.", "boxes_value": [[28.584289549399983, 0.3897094656, 140.8894043268, 447.118408192], [52.486511242000006, 330.6965942272, 111.04327391359999, 370.7951660032], [28.584289549399983, 0.3897094656, 140.8894043268, 95.8184204288], [110.1776733172, 214.1549682688, 123.1458740233, 237.7637328896], [39.9154053016, 414.886840832, 168, 467.4611206144], [50.29608154409999, 240.0769653248, 106.71734615880001, 447.118408192]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047225.jpg", "text": "Can you break down the region in the image for me? Please mention the objects and their locations.", "boxes_value": [[76.3278808477, 27.5227661312, 186.8738403351, 230.1903076352]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047225_crop.jpg", "text": "Can you break down the region in the image for me? Please mention the objects and their locations.", "boxes_value": [[28.3278808477, 27.5227661312, 138.8738403351, 230.1903076352]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047225.jpg", "text": "Can you break down the region in the image for me? Please mention the objects and their locations. For your reference, objects involved in this region include six pictures.", "boxes_value": [[76.3278808477, 27.5227661312, 186.8738403351, 230.1903076352], [76.3278808477, 27.5227661312, 128.0183105147, 88.9371337728], [140.8129883091, 33.1524047872, 186.8738403351, 92.5196533248], [78.886840817, 99.1729125888, 129.0419311522, 158.5401611264], [141.83660887829998, 102.7554321408, 188.9209594897, 160.5872802816], [81.4457397261, 171.8466186752, 131.0890503068, 230.1903076352], [140.3012084863, 171.8466186752, 187.8973999124, 229.6785278464]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047225_crop.jpg", "text": "Can you break down the region in the image for me? Please mention the objects and their locations. For your reference, objects involved in this region include six pictures.", "boxes_value": [[28.3278808477, 27.5227661312, 138.8738403351, 230.1903076352], [28.3278808477, 27.5227661312, 80.0183105147, 88.9371337728], [92.8129883091, 33.1524047872, 138.8738403351, 92.5196533248], [30.886840817000007, 99.1729125888, 81.0419311522, 158.5401611264], [93.83660887829998, 102.7554321408, 140.9209594897, 160.5872802816], [33.4457397261, 171.8466186752, 83.0890503068, 230.1903076352], [92.3012084863, 171.8466186752, 139.8973999124, 229.6785278464]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047226.jpg", "text": "Kindly give an overview of the section in photo . Please mention the objects and their locations.", "boxes_value": [[0, 181.57562255419998, 462.1242675712, 384.8944702298]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047226_crop.jpg", "text": "Kindly give an overview of the section in photo . Please mention the objects and their locations.", "boxes_value": [[0, 51.57562255419998, 462.1242675712, 254.8944702298]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047226.jpg", "text": "Kindly give an overview of the section in photo . Please mention the objects and their locations. For your reference, objects involved in this region include a chair, three cars, a pickup truck, and a sports car.", "boxes_value": [[0, 181.57562255419998, 462.1242675712, 384.8944702298], [195.9504394752, 283.6346435602, 258.8626098688, 313.6823120283], [294.7112427008, 147.5751342939, 479.0695190528, 209.3137207141], [247.163024896, 152.5559692464, 455.7111816192, 234.6963500682], [169.4492797952, 181.57562255419998, 432.5938110464, 235.6801147201], [93.7030029312, 202.2337036125, 452.7600097792, 296.6705932431], [0, 244.5039062562, 462.1242675712, 384.8944702298]], "boxes_seq": [[0], [0], [1], [2, 5, 6], [3], [4]]}, {"image_path": "objects365_v1_00047226_crop.jpg", "text": "Kindly give an overview of the section in photo . Please mention the objects and their locations. For your reference, objects involved in this region include a chair, three cars, a pickup truck, and a sports car.", "boxes_value": [[0, 51.57562255419998, 462.1242675712, 254.8944702298], [195.9504394752, 153.6346435602, 258.8626098688, 183.68231202829998], [294.7112427008, 17.575134293899993, 479.0695190528, 79.31372071409999], [247.163024896, 22.555969246399997, 455.7111816192, 104.69635006819999], [169.4492797952, 51.57562255419998, 432.5938110464, 105.6801147201], [93.7030029312, 72.2337036125, 452.7600097792, 166.67059324309997], [0, 114.5039062562, 462.1242675712, 254.8944702298]], "boxes_seq": [[0], [0], [1], [2, 5, 6], [3], [4]]}, {"image_path": "objects365_v1_00047231.jpg", "text": "What is taking place within the specified area in this capture ? Include the coordinates for each mentioned object.", "boxes_value": [[263.6552124196, 123.2781982208, 322.0039062444, 286.0687866368]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047231_crop.jpg", "text": "What is taking place within the specified area in this capture ? Include the coordinates for each mentioned object.", "boxes_value": [[14.65521241959999, 41.27819822079999, 73.00390624440001, 204.06878663679998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047231.jpg", "text": "What is taking place within the specified area in this capture ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three street lights, two vans, and a car.", "boxes_value": [[263.6552124196, 123.2781982208, 322.0039062444, 286.0687866368], [298.6982421594, 157.079467776, 305.9700317621, 238.2326660096], [293.9131469597, 168.8473510912, 300.11053467, 237.4455566336], [273.90808105770003, 123.2781982208, 293.33746340299996, 275.534118656], [262.4149169987, 274.9087524352, 321.52636719230003, 297.0278320128], [263.6552124196, 264.9848022528, 322.0039062444, 286.0687866368], [274.5014648326, 257.5759887872, 317.2297973729, 274.807373056]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00047231_crop.jpg", "text": "What is taking place within the specified area in this capture ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three street lights, two vans, and a car.", "boxes_value": [[14.65521241959999, 41.27819822079999, 73.00390624440001, 204.06878663679998], [49.698242159400024, 75.079467776, 56.97003176210001, 156.2326660096], [44.913146959699986, 86.84735109120001, 51.11053466999999, 155.4455566336], [24.90808105770003, 41.27819822079999, 44.33746340299996, 193.53411865599998], [13.414916998699994, 192.90875243519997, 72.52636719230003, 215.0278320128], [14.65521241959999, 182.98480225280002, 73.00390624440001, 204.06878663679998], [25.501464832599993, 175.5759887872, 68.22979737290001, 192.80737305600002]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00047232.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Include the coordinates for each mentioned object.", "boxes_value": [[276.3613238904, 354.4711804416, 731.9418908388001, 439.3574829056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047232_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Include the coordinates for each mentioned object.", "boxes_value": [[114.3613238904, 21.471180441599984, 569.9418908388001, 106.35748290560002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047232.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a bench, two people, a handbag, two sneakers, and a bottle.", "boxes_value": [[276.3613238904, 354.4711804416, 731.9418908388001, 439.3574829056], [195.0645141444, 295.8184204288, 722.9616698939999, 441.976806656], [458.9793700968, 235.6550903296, 640.6534423524, 437.4493408256], [688.6413574044, 371.5332031488, 731.6837158296, 439.3574829056], [597.6417638772, 354.4711804416, 712.2010787688, 436.9709855232], [687.8534684724, 395.3569376256, 731.9418908388001, 423.829107968], [276.3613238904, 391.1808518144, 303.081620028, 438.4552219136], [302.1885986208, 398.5324096512, 318.016235388, 430.1876831232]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5, 6], [7]]}, {"image_path": "objects365_v1_00047232_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a bench, two people, a handbag, two sneakers, and a bottle.", "boxes_value": [[114.3613238904, 21.471180441599984, 569.9418908388001, 106.35748290560002], [33.06451414439999, 0, 560.9616698939999, 108.97680665600001], [296.9793700968, 0, 478.6534423524, 104.44934082560002], [526.6413574044, 38.5332031488, 569.6837158296, 106.35748290560002], [435.6417638772, 21.471180441599984, 550.2010787688, 103.9709855232], [525.8534684724, 62.35693762559998, 569.9418908388001, 90.82910796800002], [114.3613238904, 58.1808518144, 141.08162002799997, 105.45522191359998], [140.1885986208, 65.5324096512, 156.01623538799998, 97.18768312319997]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5, 6], [7]]}, {"image_path": "objects365_v1_00047233.jpg", "text": "Fill me in on the details of the rectangular box within the image . Please point out the objects and their coordinates.", "boxes_value": [[568.224487304, 211.7311401472, 725.5451188620001, 309.762671872]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047233_crop.jpg", "text": "Fill me in on the details of the rectangular box within the image . Please point out the objects and their coordinates.", "boxes_value": [[40.224487304000036, 24.731140147199994, 197.5451188620001, 122.762671872]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047233.jpg", "text": "Fill me in on the details of the rectangular box within the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include five people, and a gloves.", "boxes_value": [[568.224487304, 211.7311401472, 725.5451188620001, 309.762671872], [642.3635253688, 219.6733398528, 737.2346191104, 308.943725568], [649.5527343756, 220.3087158272, 670.2023925624001, 239.3699951104], [613.6539306464, 265.7381591552, 631.4444580376, 296.5538940416], [568.224487304, 213.6372680704, 586.332763642, 261.2905273344], [579.9790039256, 211.7311401472, 607.9355468476, 260.9728393728], [696.7419821348, 293.9674033152, 725.5451188620001, 309.762671872]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047233_crop.jpg", "text": "Fill me in on the details of the rectangular box within the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include five people, and a gloves.", "boxes_value": [[40.224487304000036, 24.731140147199994, 197.5451188620001, 122.762671872], [114.36352536879997, 32.67333985280001, 209.2346191104, 121.94372556799999], [121.55273437560004, 33.30871582719999, 142.20239256240006, 52.3699951104], [85.6539306464, 78.73815915519998, 103.44445803760004, 109.55389404160002], [40.224487304000036, 26.63726807040001, 58.33276364200003, 74.2905273344], [51.979003925599955, 24.731140147199994, 79.9355468476, 73.97283937280002], [168.74198213479997, 106.96740331519999, 197.5451188620001, 122.762671872]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047234.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each mentioned object.", "boxes_value": [[148.9118652672, 214.6503906304, 241.2709960704, 397.444580096]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047234_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each mentioned object.", "boxes_value": [[23.9118652672, 46.65039063040001, 116.2709960704, 229.44458009599998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047234.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include four people, and a train.", "boxes_value": [[148.9118652672, 214.6503906304, 241.2709960704, 397.444580096], [185.4707031552, 296.106018048, 215.6156616192, 397.444580096], [173.2844238336, 242.5505371136, 241.2709960704, 392.9548950016], [148.9118652672, 214.6503906304, 172.322326656, 263.7161865216], [166.2291869952, 307.6509399552, 188.0361938688, 390.7100219904], [80.02966310400001, 62.1054076928, 766.6325683968, 498.1911010816]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047234_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include four people, and a train.", "boxes_value": [[23.9118652672, 46.65039063040001, 116.2709960704, 229.44458009599998], [60.4707031552, 128.106018048, 90.61566161920001, 229.44458009599998], [48.28442383359999, 74.5505371136, 116.2709960704, 224.9548950016], [23.9118652672, 46.65039063040001, 47.322326656, 95.71618652159998], [41.229186995199996, 139.6509399552, 63.03619386880001, 222.71002199039998], [0, 0, 139, 275]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047236.jpg", "text": "Can you divulge the contents of the area within the given image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[314.6160888749, 0.325012224, 451.10559081270003, 434.7799072256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047236_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[34.616088874900015, 0.325012224, 171.10559081270003, 434.7799072256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047236.jpg", "text": "Can you divulge the contents of the area within the given image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, a book, three flags, and a tie.", "boxes_value": [[314.6160888749, 0.325012224, 451.10559081270003, 434.7799072256], [268.6611327888, 115.8215942144, 414.8503418013, 512.0173339648], [403.14855955859997, 270.1762084864, 451.10559081270003, 331.633117696], [314.6160888749, 141.3261718528, 325.7993164129, 184.0258788864], [341.87182615620003, 0.325012224, 400.5572510018, 434.7799072256], [405.84472657829997, 2.2689208832, 451.09948731979995, 435.221191424], [357.4223632579, 199.0299072512, 383.2816162002, 289.5375366144]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047236_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, a book, three flags, and a tie.", "boxes_value": [[34.616088874900015, 0.325012224, 171.10559081270003, 434.7799072256], [0, 115.8215942144, 134.85034180129998, 512], [123.14855955859997, 270.1762084864, 171.10559081270003, 331.633117696], [34.616088874900015, 141.3261718528, 45.7993164129, 184.0258788864], [61.87182615620003, 0.325012224, 120.55725100180001, 434.7799072256], [125.84472657829997, 2.2689208832, 171.09948731979995, 435.221191424], [77.42236325789997, 199.0299072512, 103.2816162002, 289.5375366144]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047237.jpg", "text": "Can you analyze the content of the area within the photograph ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[214.74871827750002, 360.3727416832, 581.2055664170999, 511.1954956288]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047237_crop.jpg", "text": "Can you analyze the content of the area within the photograph ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[91.74871827750002, 38.37274168319999, 458.20556641709993, 189.19549562880002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047237.jpg", "text": "Can you analyze the content of the area within the photograph ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a desk, a chair, a person, a mouse, and a keyboard.", "boxes_value": [[214.74871827750002, 360.3727416832, 581.2055664170999, 511.1954956288], [214.74871827750002, 378.1943359488, 491.81018069299995, 511.1954956288], [527.7407226253999, 360.3727416832, 581.2055664170999, 510.7425537024], [218.8427124142, 194.3108520448, 580.3881835717, 511.5159301632], [221.4471435866, 395.424621568, 254.7070922732, 421.6824951296], [226.26770021459998, 418.862060544, 397.7652588089, 512.7604980224]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047237_crop.jpg", "text": "Can you analyze the content of the area within the photograph ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a desk, a chair, a person, a mouse, and a keyboard.", "boxes_value": [[91.74871827750002, 38.37274168319999, 458.20556641709993, 189.19549562880002], [91.74871827750002, 56.19433594880002, 368.81018069299995, 189.19549562880002], [404.74072262539994, 38.37274168319999, 458.20556641709993, 188.74255370240002], [95.84271241420001, 0, 457.3881835717, 189.51593016319998], [98.44714358659999, 73.42462156800002, 131.7070922732, 99.6824951296], [103.26770021459998, 96.86206054399997, 274.7652588089, 190]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047238.jpg", "text": "Offer a thorough description of the area within the illustration . Remember to mention the objects and their corresponding locations.", "boxes_value": [[521.1461181656, 133.7449951232, 683.0021972792999, 379.252258304]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047238_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Remember to mention the objects and their corresponding locations.", "boxes_value": [[41.1461181656, 61.7449951232, 203, 307.252258304]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047238.jpg", "text": "Offer a thorough description of the area within the illustration . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, a car, and a suv.", "boxes_value": [[521.1461181656, 133.7449951232, 683.0021972792999, 379.252258304], [643.028686501, 216.4554443264, 683.0021972792999, 379.252258304], [489.5827636765, 199.0069580288, 669.2087402404001, 511.8813476352], [558.4592285169, 153.6611328, 629.4525146426, 293.083251968], [521.1461181656, 162.2339477504, 547.5708007856, 210.9110107648], [659.8209228618, 133.7449951232, 682.3420410338999, 176.427978496]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047238_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, a car, and a suv.", "boxes_value": [[41.1461181656, 61.7449951232, 203, 307.252258304], [163.02868650100004, 144.4554443264, 203, 307.252258304], [9.582763676500008, 127.0069580288, 189.20874024040006, 368], [78.4592285169, 81.66113279999999, 149.45251464260002, 221.083251968], [41.1461181656, 90.2339477504, 67.5708007856, 138.9110107648], [179.8209228618, 61.7449951232, 202.34204103389993, 104.42797849600001]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047240.jpg", "text": "Explain the content within the rectangular region of the image . Give coordinates for the items you reference.", "boxes_value": [[39.5170158441, 188.6378173952, 669.9333496249001, 353.0395507712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047240_crop.jpg", "text": "Explain the content within the rectangular region of the image . Give coordinates for the items you reference.", "boxes_value": [[39.5170158441, 41.63781739519999, 669.9333496249001, 206.03955077120003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047240.jpg", "text": "Explain the content within the rectangular region of the image . Give coordinates for the items you reference. For your reference, objects involved in this region include a stool, a person, a glasses, a handbag, and a backpack.", "boxes_value": [[39.5170158441, 188.6378173952, 669.9333496249001, 353.0395507712], [255.0859374838, 282.6674194432, 304.7183227282, 353.0395507712], [156.5750121945, 188.6378173952, 176.28460690080001, 251.4105224704], [432.35304241169996, 200.8188798464, 463.0544410656, 223.298598912], [39.5170158441, 203.5765380608, 53.561279293700004, 226.3961181696], [583.2296142415, 288.9668579328, 669.9333496249001, 343.1242065408]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047240_crop.jpg", "text": "Explain the content within the rectangular region of the image . Give coordinates for the items you reference. For your reference, objects involved in this region include a stool, a person, a glasses, a handbag, and a backpack.", "boxes_value": [[39.5170158441, 41.63781739519999, 669.9333496249001, 206.03955077120003], [255.0859374838, 135.6674194432, 304.7183227282, 206.03955077120003], [156.5750121945, 41.63781739519999, 176.28460690080001, 104.4105224704], [432.35304241169996, 53.818879846399994, 463.0544410656, 76.29859891199999], [39.5170158441, 56.576538060800004, 53.561279293700004, 79.39611816959999], [583.2296142415, 141.9668579328, 669.9333496249001, 196.1242065408]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047241.jpg", "text": "Describe what can be found within the bounds of in the image . Please mention the objects and their locations.", "boxes_value": [[422.9936523504, 408.6545410048, 489.111328108, 512.0458984448]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047241_crop.jpg", "text": "Describe what can be found within the bounds of in the image . Please mention the objects and their locations.", "boxes_value": [[16.993652350399998, 26.654541004800024, 83.11132810800001, 130]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047241.jpg", "text": "Describe what can be found within the bounds of in the image . Please mention the objects and their locations. For your reference, objects involved in this region include a person, a handbag, a sandals, a desk, and a chair.", "boxes_value": [[422.9936523504, 408.6545410048, 489.111328108, 512.0458984448], [284.007568343, 250.7670898688, 455.46362305659994, 512.2012939264], [422.9936523504, 466.9313965056, 468.1129150326, 512.0458984448], [450.2218017924, 408.6545410048, 489.111328108, 438.0963134976], [168.3388671972, 139.7187500032, 480.70788573560003, 457.1259155456], [294.6540527432, 389.380004864, 465.23339847259996, 511.376647936]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047241_crop.jpg", "text": "Describe what can be found within the bounds of in the image . Please mention the objects and their locations. For your reference, objects involved in this region include a person, a handbag, a sandals, a desk, and a chair.", "boxes_value": [[16.993652350399998, 26.654541004800024, 83.11132810800001, 130], [0, 0, 49.46362305659994, 130], [16.993652350399998, 84.93139650559999, 62.11291503259997, 130], [44.221801792400015, 26.654541004800024, 83.11132810800001, 56.09631349760002], [0, 0, 74.70788573560003, 75.12591554559998], [0, 7.380004864, 59.23339847259996, 129.37664793599998]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047242.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[279.7617187584, 72.3970947072, 508.49816893440004, 484.264221184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047242_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[57.76171875839998, 72.3970947072, 286.49816893440004, 484.264221184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047242.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, a helmet, three gloves, three sneakers, and two hockey sticks.", "boxes_value": [[279.7617187584, 72.3970947072, 508.49816893440004, 484.264221184], [279.7617187584, 72.3970947072, 508.49816893440004, 484.264221184], [390.53869632, 73.678771968, 457.21630855679996, 133.5237426688], [360.3054199296, 255.8895263744, 408.3780517632, 299.9561767424], [456.7369384704, 152.8765869056, 503.9511718656, 204.9553222656], [449.7961425408, 284.6973266432, 483.15844723199996, 339.8046875136], [278.8978271232, 433.4436645376, 323.1112060416, 476.7423095808], [409.3596191232, 449.42840576, 436.2423095808, 470.9345702912], [438.73864742399996, 442.8997192192, 473.8781738496, 483.0318603264], [154.9963989504, 161.119140608, 499.48730465279993, 464.71478272], [238.86645504, 262.3176880128, 548.007202176, 451.5451049984]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6, 7, 8], [9, 10]]}, {"image_path": "objects365_v1_00047242_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, a helmet, three gloves, three sneakers, and two hockey sticks.", "boxes_value": [[57.76171875839998, 72.3970947072, 286.49816893440004, 484.264221184], [57.76171875839998, 72.3970947072, 286.49816893440004, 484.264221184], [168.53869631999999, 73.678771968, 235.21630855679996, 133.5237426688], [138.30541992960002, 255.8895263744, 186.3780517632, 299.9561767424], [234.7369384704, 152.8765869056, 281.9511718656, 204.9553222656], [227.79614254080002, 284.6973266432, 261.15844723199996, 339.8046875136], [56.89782712319999, 433.4436645376, 101.11120604159998, 476.7423095808], [187.3596191232, 449.42840576, 214.2423095808, 470.9345702912], [216.73864742399996, 442.8997192192, 251.8781738496, 483.0318603264], [0, 161.119140608, 277.48730465279993, 464.71478272], [16.866455040000005, 262.3176880128, 326.00720217599996, 451.5451049984]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6, 7, 8], [9, 10]]}, {"image_path": "objects365_v1_00047245.jpg", "text": "Can you provide a description of the area in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[133.3308798388, 189.3822220288, 179.7877724468, 264.3572071424]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047245_crop.jpg", "text": "Can you provide a description of the area in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[12.330879838800001, 19.382222028799987, 58.787772446800005, 94.3572071424]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047245.jpg", "text": "Can you provide a description of the area in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a desk, three people, and two ties.", "boxes_value": [[133.3308798388, 189.3822220288, 179.7877724468, 264.3572071424], [22.152648940800002, 216.0385742336, 642.6341552764001, 511.0755004928], [138.0635986156, 137.6148681728, 259.9276122912, 267.1471557632], [81.5415649636, 161.9609985536, 164.22698971839998, 317.8928833024], [4.6428222372, 185.0845947392, 173.1574096532, 470.31109616640003], [133.3308798388, 228.4796069376, 142.53026450599998, 264.3572071424], [161.389003048, 189.3822220288, 179.7877724468, 233.0792992768]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00047245_crop.jpg", "text": "Can you provide a description of the area in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a desk, three people, and two ties.", "boxes_value": [[12.330879838800001, 19.382222028799987, 58.787772446800005, 94.3572071424], [0, 46.0385742336, 70, 113], [17.0635986156, 0, 70, 97.14715576319998], [0, 0, 43.22698971839998, 113], [0, 15.084594739200014, 52.157409653200006, 113], [12.330879838800001, 58.47960693760001, 21.53026450599998, 94.3572071424], [40.389003048000006, 19.382222028799987, 58.787772446800005, 63.079299276799986]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00047248.jpg", "text": "I request a description of the area in the picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[0.13037107199999998, 301.2401122816, 767.8520507904, 512.1365966848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047248_crop.jpg", "text": "I request a description of the area in the picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[0.13037107199999998, 53.24011228159998, 767.8520507904, 264]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047248.jpg", "text": "I request a description of the area in the picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two desks, three people, a sandals, and a sneakers.", "boxes_value": [[0.13037107199999998, 301.2401122816, 767.8520507904, 512.1365966848], [382.9821777408, 271.3507690496, 485.12231447040006, 435.03704832], [667.1949462528, 446.1142578176, 767.8520507904, 512.1365966848], [517.509033216, 82.794250496, 678.3177490176, 512.4425048576], [438.04541015039996, 155.9700927488, 629.9464111104, 512.4425048576], [223.2219238656, 301.2401122816, 263.629028352, 443.9020385792], [0.13037107199999998, 413.155883776, 22.380554188799998, 440.0096435712], [136.865356416, 478.9984741376, 209.2810669056, 506.1543579136]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00047248_crop.jpg", "text": "I request a description of the area in the picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two desks, three people, a sandals, and a sneakers.", "boxes_value": [[0.13037107199999998, 53.24011228159998, 767.8520507904, 264], [382.9821777408, 23.35076904959999, 485.12231447040006, 187.03704832], [667.1949462528, 198.1142578176, 767.8520507904, 264], [517.509033216, 0, 678.3177490176, 264], [438.04541015039996, 0, 629.9464111104, 264], [223.2219238656, 53.24011228159998, 263.629028352, 195.9020385792], [0.13037107199999998, 165.155883776, 22.380554188799998, 192.0096435712], [136.865356416, 230.9984741376, 209.2810669056, 258.1543579136]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00047250.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Give coordinates for the items you reference.", "boxes_value": [[0.000610368, 320.42236329599996, 112.374511744, 441.018798816]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047250_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Give coordinates for the items you reference.", "boxes_value": [[0.000610368, 30.422363295999958, 112.374511744, 151.01879881600001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047250.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Give coordinates for the items you reference. For your reference, objects involved in this region include five sneakers.", "boxes_value": [[0.000610368, 320.42236329599996, 112.374511744, 441.018798816], [0.000610368, 320.42236329599996, 11.74700928, 347.04754636800004], [22.318786624, 363.492553728, 54.42559814399999, 378.371276832], [53.250976576, 383.85296630399995, 73.611389184, 403.43029785600004], [39.546875008, 402.64721678399997, 93.971862784, 427.706176752], [64.605834944, 410.47814942400004, 112.374511744, 441.018798816]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047250_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Give coordinates for the items you reference. For your reference, objects involved in this region include five sneakers.", "boxes_value": [[0.000610368, 30.422363295999958, 112.374511744, 151.01879881600001], [0.000610368, 30.422363295999958, 11.74700928, 57.04754636800004], [22.318786624, 73.49255372800002, 54.42559814399999, 88.37127683199998], [53.250976576, 93.85296630399995, 73.611389184, 113.43029785600004], [39.546875008, 112.64721678399997, 93.971862784, 137.70617675199998], [64.605834944, 120.47814942400004, 112.374511744, 151.01879881600001]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047251.jpg", "text": "Can you break down the region in the image for me? Give coordinates for the items you reference.", "boxes_value": [[131.3225707776, 244.3183593984, 437.55334471680004, 512.5123291136]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047251_crop.jpg", "text": "Can you break down the region in the image for me? Give coordinates for the items you reference.", "boxes_value": [[77.32257077759999, 67.31835939839999, 383.55334471680004, 335]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047251.jpg", "text": "Can you break down the region in the image for me? Give coordinates for the items you reference. For your reference, objects involved in this region include a carpet, three cabinets, and a vase.", "boxes_value": [[131.3225707776, 244.3183593984, 437.55334471680004, 512.5123291136], [193.86248778239997, 368.76843264, 437.55334471680004, 512.5123291136], [0.7065429504, 260.9604492288, 219.69146726399998, 512.5123291136], [131.3225707776, 244.3183593984, 200.4255981312, 275.4926757888], [271.22656250880004, 95.590393088, 307.56347658239997, 377.5848998912], [307.56347658239997, 91.4960937472, 435.5101318656, 376.0495605248]], "boxes_seq": [[0], [0], [1], [2, 4, 5], [3]]}, {"image_path": "objects365_v1_00047251_crop.jpg", "text": "Can you break down the region in the image for me? Give coordinates for the items you reference. For your reference, objects involved in this region include a carpet, three cabinets, and a vase.", "boxes_value": [[77.32257077759999, 67.31835939839999, 383.55334471680004, 335], [139.86248778239997, 191.76843264000001, 383.55334471680004, 335], [0, 83.96044922879997, 165.69146726399998, 335], [77.32257077759999, 67.31835939839999, 146.4255981312, 98.4926757888], [217.22656250880004, 0, 253.56347658239997, 200.5848998912], [253.56347658239997, 0, 381.5101318656, 199.04956052479997]], "boxes_seq": [[0], [0], [1], [2, 4, 5], [3]]}, {"image_path": "objects365_v1_00047252.jpg", "text": "Can you discuss the entities within the region of image ? Provide the coordinates for each element you describe.", "boxes_value": [[10.874877952, 484.140747056, 165.1657104384, 603.2728271278]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047252_crop.jpg", "text": "Can you discuss the entities within the region of image ? Provide the coordinates for each element you describe.", "boxes_value": [[10.874877952, 30.14074705600001, 165.1657104384, 149.27282712780004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047252.jpg", "text": "Can you discuss the entities within the region of image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include four people, and a street lights.", "boxes_value": [[10.874877952, 484.140747056, 165.1657104384, 603.2728271278], [79.4757690368, 568.4299316189, 95.5290527232, 603.2728271278], [64.6082763776, 564.5991210823, 78.8373412864, 599.2595215183], [10.874877952, 553.6427002229001, 21.2534790144, 584.0095214916], [151.6694336, 484.140747056, 165.1657104384, 517.3817138749], [29.0804443136, 521.2266845724, 41.0238036992, 597.386962889]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047252_crop.jpg", "text": "Can you discuss the entities within the region of image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include four people, and a street lights.", "boxes_value": [[10.874877952, 30.14074705600001, 165.1657104384, 149.27282712780004], [79.4757690368, 114.42993161890001, 95.5290527232, 149.27282712780004], [64.6082763776, 110.59912108230003, 78.8373412864, 145.25952151829995], [10.874877952, 99.64270022290009, 21.2534790144, 130.00952149160003], [151.6694336, 30.14074705600001, 165.1657104384, 63.381713874900015], [29.0804443136, 67.22668457240002, 41.0238036992, 143.38696288899996]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047255.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Please point out the objects and their coordinates.", "boxes_value": [[0.0005493369, 49.8624267776, 302.0084228324, 493.053222656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047255_crop.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Please point out the objects and their coordinates.", "boxes_value": [[0.0005493369, 49.8624267776, 302.0084228324, 493.053222656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047255.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Please point out the objects and their coordinates. For your reference, objects involved in this region include a picture, a desk, a chair, an extention cord, and a speaker.", "boxes_value": [[0.0005493369, 49.8624267776, 302.0084228324, 493.053222656], [0.1459350391, 49.8624267776, 30.339904785099996, 144.382751488], [0.2752075419, 295.3526001152, 683.2752685338, 511.3989868032], [36.3048706006, 224.8585205248, 358.72094726480003, 511.05328368640005], [0.0005493369, 477.9071655424, 27.566345191299998, 493.053222656], [285.9702759119, 261.2219848704, 302.0084228324, 297.800231936]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047255_crop.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Please point out the objects and their coordinates. For your reference, objects involved in this region include a picture, a desk, a chair, an extention cord, and a speaker.", "boxes_value": [[0.0005493369, 49.8624267776, 302.0084228324, 493.053222656], [0.1459350391, 49.8624267776, 30.339904785099996, 144.382751488], [0.2752075419, 295.3526001152, 377, 511.3989868032], [36.3048706006, 224.8585205248, 358.72094726480003, 511.05328368640005], [0.0005493369, 477.9071655424, 27.566345191299998, 493.053222656], [285.9702759119, 261.2219848704, 302.0084228324, 297.800231936]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047256.jpg", "text": "I'd like some information about the specific region in the image . Provide the coordinates for each element you describe.", "boxes_value": [[74.0317993141, 159.6500243968, 625.7456054968001, 424.2239990272]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047256_crop.jpg", "text": "I'd like some information about the specific region in the image . Provide the coordinates for each element you describe.", "boxes_value": [[74.0317993141, 66.6500243968, 625.7456054968001, 331.2239990272]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047256.jpg", "text": "I'd like some information about the specific region in the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a bed, a nightstand, a lamp, two cabinets, a mirror, three pillows, and a moniter.", "boxes_value": [[74.0317993141, 159.6500243968, 625.7456054968001, 424.2239990272], [493.053466766, 197.8987426816, 679.7521972389, 511.55249024], [421.01184079850003, 269.899230976, 503.6938476798, 371.0757446144], [438.29943848169995, 217.4452514816, 476.1324463157, 277.9246825984], [192.35644532339998, 225.4004516352, 321.4636230558, 336.3873291264], [74.0317993141, 309.6777954304, 203.1389770465, 424.2239990272], [224.2167968985, 159.6500243968, 299.4583129722, 236.3179321344], [533.2340087775001, 218.6088257024, 625.7456054968001, 285.1523437568], [543.7487792979, 189.5443725824, 641.018554665, 284.03106688], [516.0195312302, 202.1218261504, 646.0484619259, 254.0489502208], [95.2886352419, 248.9560546816, 182.6577148704, 331.8865966592]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6], [7, 8, 9], [10]]}, {"image_path": "objects365_v1_00047256_crop.jpg", "text": "I'd like some information about the specific region in the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a bed, a nightstand, a lamp, two cabinets, a mirror, three pillows, and a moniter.", "boxes_value": [[74.0317993141, 66.6500243968, 625.7456054968001, 331.2239990272], [493.053466766, 104.8987426816, 679.7521972389, 397], [421.01184079850003, 176.899230976, 503.6938476798, 278.0757446144], [438.29943848169995, 124.4452514816, 476.1324463157, 184.9246825984], [192.35644532339998, 132.4004516352, 321.4636230558, 243.3873291264], [74.0317993141, 216.6777954304, 203.1389770465, 331.2239990272], [224.2167968985, 66.6500243968, 299.4583129722, 143.3179321344], [533.2340087775001, 125.6088257024, 625.7456054968001, 192.1523437568], [543.7487792979, 96.5443725824, 641.018554665, 191.03106688000003], [516.0195312302, 109.12182615040001, 646.0484619259, 161.0489502208], [95.2886352419, 155.9560546816, 182.6577148704, 238.8865966592]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6], [7, 8, 9], [10]]}, {"image_path": "objects365_v1_00047257.jpg", "text": "What does the area look like in the context of the image ? Give coordinates for the items you reference.", "boxes_value": [[136.3104247808, 295.58642575, 510.6204223488, 440.541015607]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047257_crop.jpg", "text": "What does the area look like in the context of the image ? Give coordinates for the items you reference.", "boxes_value": [[94.31042478079999, 36.58642574999999, 468.6204223488, 181.54101560700002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047257.jpg", "text": "What does the area look like in the context of the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include three hats, a handbag, and a moniter.", "boxes_value": [[136.3104247808, 295.58642575, 510.6204223488, 440.541015607], [136.3104247808, 330.22485349699997, 157.6503906304, 344.35595702200004], [162.9495849472, 326.495727558, 187.0695800832, 339.253051753], [479.2694091776, 383.064208989, 510.6204223488, 440.541015607], [235.6025390592, 310.193481444, 259.1694336, 323.086547861], [229.0279541248, 295.58642575, 269.7872314368, 322.849243178]], "boxes_seq": [[0], [0], [1, 2, 4], [3], [5]]}, {"image_path": "objects365_v1_00047257_crop.jpg", "text": "What does the area look like in the context of the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include three hats, a handbag, and a moniter.", "boxes_value": [[94.31042478079999, 36.58642574999999, 468.6204223488, 181.54101560700002], [94.31042478079999, 71.22485349699997, 115.65039063040001, 85.35595702200004], [120.94958494720001, 67.495727558, 145.0695800832, 80.25305175300002], [437.2694091776, 124.06420898900001, 468.6204223488, 181.54101560700002], [193.6025390592, 51.193481443999985, 217.1694336, 64.08654786099999], [187.0279541248, 36.58642574999999, 227.78723143680003, 63.849243177999995]], "boxes_seq": [[0], [0], [1, 2, 4], [3], [5]]}, {"image_path": "objects365_v1_00047259.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Include the coordinates for each mentioned object.", "boxes_value": [[0.0061035008, 113.21539307520001, 170.7755737088, 493.59985351679995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047259_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Include the coordinates for each mentioned object.", "boxes_value": [[0.0061035008, 95.21539307520001, 170.7755737088, 475.59985351679995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047259.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a lamp, a desk, a bench, two people, and two laptops.", "boxes_value": [[0.0061035008, 113.21539307520001, 170.7755737088, 493.59985351679995], [122.5136718848, 113.21539307520001, 170.7755737088, 202.71075440639999], [7.9514160128, 330.59240724480003, 198.3959961088, 486.1215820032], [12.1489257984, 430.3555908096, 199.5148315648, 487.4792480256], [40.87109376, 252.59130862080002, 137.9518432768, 493.59985351679995], [96.1359252992, 266.64404298240004, 165.4201659904, 333.9857177856], [0.0061035008, 296.8125000192, 34.4251708928, 328.9854736128], [110.0841674752, 301.4501952768, 148.3853149184, 334.1507568384]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6, 7]]}, {"image_path": "objects365_v1_00047259_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a lamp, a desk, a bench, two people, and two laptops.", "boxes_value": [[0.0061035008, 95.21539307520001, 170.7755737088, 475.59985351679995], [122.5136718848, 95.21539307520001, 170.7755737088, 184.71075440639999], [7.9514160128, 312.59240724480003, 198.3959961088, 468.1215820032], [12.1489257984, 412.3555908096, 199.5148315648, 469.4792480256], [40.87109376, 234.59130862080002, 137.9518432768, 475.59985351679995], [96.1359252992, 248.64404298240004, 165.4201659904, 315.9857177856], [0.0061035008, 278.8125000192, 34.4251708928, 310.9854736128], [110.0841674752, 283.4501952768, 148.3853149184, 316.1507568384]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6, 7]]}, {"image_path": "objects365_v1_00047261.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[489.37145998280005, 256.5786743296, 771.992553674, 504.4373169152]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047261_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[71.37145998280005, 62.578674329600005, 353.99255367399996, 310.4373169152]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047261.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a desk, a chair, a picture, a cabinet, a book, and a handbag.", "boxes_value": [[489.37145998280005, 256.5786743296, 771.992553674, 504.4373169152], [367.94934080120004, 287.8287963648, 668.5476073972001, 502.5419311616], [711.1496581764001, 306.9071655424, 770.6068115124, 504.4373169152], [489.37145998280005, 256.5786743296, 547.404418918, 302.0523071488], [639.6348876632, 367.105285632, 727.756347654, 492.9931640832], [728.525268532, 372.8003539968, 771.992553674, 398.459899904], [663.314086922, 293.0687866368, 731.0266112956, 374.6406249984]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047261_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a desk, a chair, a picture, a cabinet, a book, and a handbag.", "boxes_value": [[71.37145998280005, 62.578674329600005, 353.99255367399996, 310.4373169152], [0, 93.82879636479998, 250.54760739720007, 308.5419311616], [293.14965817640007, 112.90716554239998, 352.6068115124, 310.4373169152], [71.37145998280005, 62.578674329600005, 129.40441891800003, 108.05230714880003], [221.63488766319995, 173.105285632, 309.756347654, 298.9931640832], [310.52526853200004, 178.80035399680003, 353.99255367399996, 204.459899904], [245.314086922, 99.06878663679998, 313.0266112956, 180.64062499840003]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047264.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each object you identify.", "boxes_value": [[538.3986816767999, 213.4545898496, 654.7822265856, 413.4976196096]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047264_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each object you identify.", "boxes_value": [[29.398681676799924, 50.45458984960001, 145.78222658560003, 250.4976196096]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047264.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a potted plant, a storage box, a trash bin can, and a tissue.", "boxes_value": [[538.3986816767999, 213.4545898496, 654.7822265856, 413.4976196096], [539.4857177856, 241.8311767552, 769.055908224, 424.3809814528], [568.6374511872, 171.943725568, 671.4223633152, 260.0038452224], [580.9938964992, 361.7767944192, 654.7822265856, 413.4976196096], [538.3986816767999, 311.170471168, 600.5997314304, 396.9325561344], [548.254394496, 213.4545898496, 578.216186496, 254.686340352]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047264_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a potted plant, a storage box, a trash bin can, and a tissue.", "boxes_value": [[29.398681676799924, 50.45458984960001, 145.78222658560003, 250.4976196096], [30.485717785599945, 78.8311767552, 174, 261.3809814528], [59.637451187199986, 8.94372556799999, 162.42236331519996, 97.0038452224], [71.99389649919999, 198.77679441919997, 145.78222658560003, 250.4976196096], [29.398681676799924, 148.170471168, 91.59973143039997, 233.9325561344], [39.25439449600003, 50.45458984960001, 69.21618649599998, 91.686340352]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047265.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Include the coordinates for each mentioned object.", "boxes_value": [[415.02636715669996, 134.2587890688, 683.0222168289, 226.9579467776]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047265_crop.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Include the coordinates for each mentioned object.", "boxes_value": [[67.02636715669996, 23.258789068800013, 335, 115.9579467776]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047265.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three potted plants, a piano, and a person.", "boxes_value": [[415.02636715669996, 134.2587890688, 683.0222168289, 226.9579467776], [454.3803711176, 199.784912128, 483.4943847469, 226.9579467776], [653.6075439359, 142.0032958976, 672.0008544854, 216.5444946432], [666.1923828207, 134.2587890688, 683.0222168289, 219.44866944], [622.6680908228, 152.0792846848, 682.6285400684, 216.1466064384], [415.02636715669996, 157.0823364096, 438.4580077914, 184.1877441536]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047265_crop.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three potted plants, a piano, and a person.", "boxes_value": [[67.02636715669996, 23.258789068800013, 335, 115.9579467776], [106.38037111760002, 88.784912128, 135.49438474689998, 115.9579467776], [305.6075439359, 31.003295897599997, 324.00085448540005, 105.54449464320001], [318.1923828207, 23.258789068800013, 335, 108.44866944], [274.66809082279997, 41.0792846848, 334.62854006839996, 105.1466064384], [67.02636715669996, 46.0823364096, 90.45800779140001, 73.1877441536]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047266.jpg", "text": "What is taking place within the specified area in this capture ? Specify the location of each mentioned object.", "boxes_value": [[285.5136108424, 318.9743652352, 445.5532226335, 510.346740736]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047266_crop.jpg", "text": "What is taking place within the specified area in this capture ? Specify the location of each mentioned object.", "boxes_value": [[40.5136108424, 47.974365235200025, 200.5532226335, 239.34674073600002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047266.jpg", "text": "What is taking place within the specified area in this capture ? Specify the location of each mentioned object. For your reference, objects involved in this region include five people.", "boxes_value": [[285.5136108424, 318.9743652352, 445.5532226335, 510.346740736], [415.4824218974, 318.9743652352, 445.5532226335, 366.7654418944], [342.1884765672, 491.2497558528, 368.06188967040003, 510.346740736], [314.46710205520003, 392.068664576, 340.9564819185, 481.393249536], [296.6021728729, 378.5159911936, 320.0114135664, 474.0008544768], [285.5136108424, 404.3892822016, 310.1549072448, 488.1696166912]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047266_crop.jpg", "text": "What is taking place within the specified area in this capture ? Specify the location of each mentioned object. For your reference, objects involved in this region include five people.", "boxes_value": [[40.5136108424, 47.974365235200025, 200.5532226335, 239.34674073600002], [170.4824218974, 47.974365235200025, 200.5532226335, 95.7654418944], [97.18847656719998, 220.24975585279998, 123.06188967040003, 239.34674073600002], [69.46710205520003, 121.068664576, 95.9564819185, 210.39324953599998], [51.602172872899985, 107.51599119359997, 75.0114135664, 203.0008544768], [40.5136108424, 133.3892822016, 65.15490724479997, 217.16961669120002]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047267.jpg", "text": "I would like a description of the content within the bbox in . Give coordinates for the items you reference.", "boxes_value": [[179.6552734208, 480.4573974488, 328.2659301888, 592.9539794602999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047267_crop.jpg", "text": "I would like a description of the content within the bbox in . Give coordinates for the items you reference.", "boxes_value": [[37.6552734208, 28.45739744880001, 186.26593018879998, 140.95397946029993]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047267.jpg", "text": "I would like a description of the content within the bbox in . Give coordinates for the items you reference. For your reference, objects involved in this region include three people, and two handbags.", "boxes_value": [[179.6552734208, 480.4573974488, 328.2659301888, 592.9539794602999], [212.0100097536, 501.55615234360005, 224.660095232, 546.4063720485], [253.3555908096, 480.4573974488, 282.6002807808, 515.5510253995], [311.5941772288, 495.9714355769, 328.2659301888, 525.0501708902], [188.8220825088, 565.8354492294, 201.426513664, 592.9539794602999], [179.6552734208, 504.3411865423, 204.1001586688, 563.1617431428]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047267_crop.jpg", "text": "I would like a description of the content within the bbox in . Give coordinates for the items you reference. For your reference, objects involved in this region include three people, and two handbags.", "boxes_value": [[37.6552734208, 28.45739744880001, 186.26593018879998, 140.95397946029993], [70.0100097536, 49.55615234360005, 82.660095232, 94.40637204849997], [111.35559080959999, 28.45739744880001, 140.6002807808, 63.55102539949996], [169.5941772288, 43.97143557689998, 186.26593018879998, 73.05017089019998], [46.82208250880001, 113.83544922939996, 59.426513664, 140.95397946029993], [37.6552734208, 52.34118654230002, 62.10015866879999, 111.16174314279999]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047268.jpg", "text": "Explain the content within the rectangular region of the image . Please mention the objects and their locations.", "boxes_value": [[629.534057652, 414.2149658112, 720.788452155, 511.0421752832]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047268_crop.jpg", "text": "Explain the content within the rectangular region of the image . Please mention the objects and their locations.", "boxes_value": [[23.534057652, 24.21496581119999, 114.78845215499996, 121.04217528319998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047268.jpg", "text": "Explain the content within the rectangular region of the image . Please mention the objects and their locations. For your reference, objects involved in this region include two chairs, and three bottles.", "boxes_value": [[629.534057652, 414.2149658112, 720.788452155, 511.0421752832], [679.577880888, 363.6528320512, 764.928344745, 505.2569580032], [638.066528298, 357.4455566336, 752.9017333859999, 497.4978637824], [629.534057652, 414.9115600384, 660.1843261844999, 511.0421752832], [655.308105489, 420.4843749888, 690.1380615285, 508.9523926016], [683.868652362, 414.2149658112, 720.788452155, 509.6489868288]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047268_crop.jpg", "text": "Explain the content within the rectangular region of the image . Please mention the objects and their locations. For your reference, objects involved in this region include two chairs, and three bottles.", "boxes_value": [[23.534057652, 24.21496581119999, 114.78845215499996, 121.04217528319998], [73.57788088799998, 0, 137, 115.2569580032], [32.06652829799998, 0, 137, 107.49786378239997], [23.534057652, 24.91156003840001, 54.18432618449992, 121.04217528319998], [49.30810548900001, 30.484374988800027, 84.13806152849997, 118.95239260160002], [77.86865236200003, 24.21496581119999, 114.78845215499996, 119.64898682879999]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047270.jpg", "text": "Please provide details for the area marked as in this photographic . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[443.2266845952, 180.0108032, 637.5853271807999, 276.025512704]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047270_crop.jpg", "text": "Please provide details for the area marked as in this photographic . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[49.22668459520003, 24.010803199999998, 243.58532718079994, 120.025512704]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047270.jpg", "text": "Please provide details for the area marked as in this photographic . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four cars, a suv, and a pickup truck.", "boxes_value": [[443.2266845952, 180.0108032, 637.5853271807999, 276.025512704], [613.996948224, 203.904357888, 639.5606689536, 280.8703003136], [615.9477539328, 180.0108032, 637.5853271807999, 211.5151977472], [605.5616454912, 170.1440429568, 633.7770995712, 196.1091918848], [541.3463134464, 183.4555053568, 594.9550781184, 212.8877563392], [18.5515137024, 192.6809081856, 557.0017089792, 476.3090209792], [443.2266845952, 194.0101318144, 585.5820312576, 276.025512704]], "boxes_seq": [[0], [0], [1, 4, 5, 6], [2], [3]]}, {"image_path": "objects365_v1_00047270_crop.jpg", "text": "Please provide details for the area marked as in this photographic . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four cars, a suv, and a pickup truck.", "boxes_value": [[49.22668459520003, 24.010803199999998, 243.58532718079994, 120.025512704], [219.996948224, 47.90435788799999, 245.56066895360004, 124.87030031360001], [221.94775393279997, 24.010803199999998, 243.58532718079994, 55.515197747200006], [211.5616454912, 14.144042956800007, 239.77709957119998, 40.1091918848], [147.34631344640002, 27.45550535679999, 200.95507811840002, 56.887756339199996], [0, 36.6809081856, 163.00170897919998, 144], [49.22668459520003, 38.01013181440001, 191.58203125759997, 120.025512704]], "boxes_seq": [[0], [0], [1, 4, 5, 6], [2], [3]]}, {"image_path": "objects365_v1_00047272.jpg", "text": "What does the area within the given visual contain? Remember to mention the objects and their corresponding locations.", "boxes_value": [[272.47619627800003, 216.4972534272, 491.318115258, 264.6193847808]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047272_crop.jpg", "text": "What does the area within the given visual contain? Remember to mention the objects and their corresponding locations.", "boxes_value": [[55.47619627800003, 12.497253427200008, 274.318115258, 60.619384780799976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047272.jpg", "text": "What does the area within the given visual contain? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a nightstand, a clock, a bed, and two pillows.", "boxes_value": [[272.47619627800003, 216.4972534272, 491.318115258, 264.6193847808], [436.3721923635, 243.1021117952, 491.318115258, 264.6193847808], [438.2596435285, 223.3421630976, 464.63427735700003, 249.4747924992], [107.1994628895, 113.7070922752, 640.2263183934999, 447.8052368384], [272.47619627800003, 216.4972534272, 425.87145999049994, 260.1559448064], [248.483520488, 205.484252928, 362.9400635095, 239.3099365376]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047272_crop.jpg", "text": "What does the area within the given visual contain? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a nightstand, a clock, a bed, and two pillows.", "boxes_value": [[55.47619627800003, 12.497253427200008, 274.318115258, 60.619384780799976], [219.3721923635, 39.10211179519999, 274.318115258, 60.619384780799976], [221.25964352850002, 19.342163097600007, 247.63427735700003, 45.47479249919999], [0, 0, 329, 72], [55.47619627800003, 12.497253427200008, 208.87145999049994, 56.15594480639999], [31.48352048800001, 1.4842529279999894, 145.94006350950002, 35.30993653760001]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047276.jpg", "text": "Can you analyze the content of the area within the photograph ? Include the coordinates for each object you identify.", "boxes_value": [[16.1839599758, 275.9801025536, 288.7814330848, 372.132995584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047276_crop.jpg", "text": "Can you analyze the content of the area within the photograph ? Include the coordinates for each object you identify.", "boxes_value": [[16.1839599758, 24.980102553599977, 288.7814330848, 121.13299558400001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047276.jpg", "text": "Can you analyze the content of the area within the photograph ? Include the coordinates for each object you identify. For your reference, objects involved in this region include five people, and a sneakers.", "boxes_value": [[16.1839599758, 275.9801025536, 288.7814330848, 372.132995584], [266.0615844517, 278.2760620032, 318.4638061203, 358.1619262464], [230.882446267, 300.6294555648, 288.7814330848, 358.1619262464], [173.0620727492, 293.3004150272, 236.5100708012, 357.6907958784], [88.8834839175, 304.2939452928, 161.7545166039, 374.338012672], [16.1839599758, 275.9801025536, 124.80297852980001, 363.4791869952], [144.9622192512, 354.1314086912, 163.1693725501, 372.132995584]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047276_crop.jpg", "text": "Can you analyze the content of the area within the photograph ? Include the coordinates for each object you identify. For your reference, objects involved in this region include five people, and a sneakers.", "boxes_value": [[16.1839599758, 24.980102553599977, 288.7814330848, 121.13299558400001], [266.0615844517, 27.276062003200025, 318.4638061203, 107.16192624640001], [230.882446267, 49.629455564800026, 288.7814330848, 107.16192624640001], [173.0620727492, 42.30041502720002, 236.5100708012, 106.69079587840002], [88.8834839175, 53.293945292800004, 161.7545166039, 123.33801267199999], [16.1839599758, 24.980102553599977, 124.80297852980001, 112.4791869952], [144.9622192512, 103.13140869120002, 163.1693725501, 121.13299558400001]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047277.jpg", "text": "I'd like some information about the bounding box in the photo . Specify the location of each mentioned object.", "boxes_value": [[482.59887694649996, 196.824890112, 673.9569091817, 443.7230224384]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047277_crop.jpg", "text": "I'd like some information about the bounding box in the photo . Specify the location of each mentioned object.", "boxes_value": [[48.598876946499956, 61.82489011199999, 239.95690918169998, 308.7230224384]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047277.jpg", "text": "I'd like some information about the bounding box in the photo . Specify the location of each mentioned object. For your reference, objects involved in this region include two vases, three chairs, and a desk.", "boxes_value": [[482.59887694649996, 196.824890112, 673.9569091817, 443.7230224384], [635.4366455063999, 196.824890112, 657.1577148267, 245.4012451328], [482.59887694649996, 236.7127685632, 520.1171874762999, 285.2891235328], [598.8981933453, 271.757507328, 642.5943603808, 340.4732055552], [610.8793945079, 286.9101562368, 673.2521972947001, 361.2641601536], [620.3939208854, 318.2727661056, 673.9569091817, 443.7230224384], [318.4442749157, 268.713317888, 638.2475585751, 510.6253662208]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047277_crop.jpg", "text": "I'd like some information about the bounding box in the photo . Specify the location of each mentioned object. For your reference, objects involved in this region include two vases, three chairs, and a desk.", "boxes_value": [[48.598876946499956, 61.82489011199999, 239.95690918169998, 308.7230224384], [201.43664550639994, 61.82489011199999, 223.15771482670004, 110.4012451328], [48.598876946499956, 101.7127685632, 86.11718747629993, 150.28912353279998], [164.8981933453, 136.75750732799997, 208.59436038080003, 205.47320555520002], [176.87939450789997, 151.9101562368, 239.25219729470007, 226.26416015360002], [186.3939208854, 183.2727661056, 239.95690918169998, 308.7230224384], [0, 133.713317888, 204.24755857510002, 370]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047278.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each object you identify.", "boxes_value": [[144.4925537245, 0.7164306432, 419.258911163, 485.935974144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047278_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each object you identify.", "boxes_value": [[69.4925537245, 0.7164306432, 344.258911163, 485.935974144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047278.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each object you identify. For your reference, objects involved in this region include a lamp, a slippers, two handbags, a leather shoes, and a moniter.", "boxes_value": [[144.4925537245, 0.7164306432, 419.258911163, 485.935974144], [144.4925537245, 0.7164306432, 185.7189941315, 27.4373169152], [345.27178957949997, 461.6797485568, 374.379272449, 485.935974144], [235.214538587, 211.017211904, 263.646911618, 232.9876708864], [270.7550049075, 224.9102782976, 298.5411376765, 240.7419433472], [382.71453854450004, 363.4706420736, 419.258911163, 377.113891584], [235.6420287785, 127.3780517376, 269.37591552699996, 147.7077636608]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047278_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each object you identify. For your reference, objects involved in this region include a lamp, a slippers, two handbags, a leather shoes, and a moniter.", "boxes_value": [[69.4925537245, 0.7164306432, 344.258911163, 485.935974144], [69.4925537245, 0.7164306432, 110.71899413150001, 27.4373169152], [270.27178957949997, 461.6797485568, 299.379272449, 485.935974144], [160.214538587, 211.017211904, 188.646911618, 232.9876708864], [195.7550049075, 224.9102782976, 223.5411376765, 240.7419433472], [307.71453854450004, 363.4706420736, 344.258911163, 377.113891584], [160.6420287785, 127.3780517376, 194.37591552699996, 147.7077636608]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047279.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Give coordinates for the items you reference.", "boxes_value": [[307.945312466, 277.0980224512, 678.4998778916, 364.40100096]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047279_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Give coordinates for the items you reference.", "boxes_value": [[92.94531246600002, 22.098022451199995, 463.4998778916, 109.40100095999998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047279.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include three umbrellas, a person, and a cow.", "boxes_value": [[307.945312466, 277.0980224512, 678.4998778916, 364.40100096], [307.945312466, 333.9515380736, 353.3284911848, 361.2714233344], [269.7633056372, 314.8605346816, 353.0396728444, 360.942260736], [506.51977541019994, 277.0980224512, 520.3912353302001, 340.5369872896], [329.279052746, 315.7905883648, 441.22436526300004, 393.6880493056], [600.096923846, 335.72790528, 678.4998778916, 364.40100096]], "boxes_seq": [[0], [0], [1, 2, 4], [3], [5]]}, {"image_path": "objects365_v1_00047279_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include three umbrellas, a person, and a cow.", "boxes_value": [[92.94531246600002, 22.098022451199995, 463.4998778916, 109.40100095999998], [92.94531246600002, 78.95153807359998, 138.3284911848, 106.27142333440003], [54.763305637200006, 59.86053468159997, 138.0396728444, 105.94226073599998], [291.51977541019994, 22.098022451199995, 305.39123533020006, 85.5369872896], [114.27905274599999, 60.79058836479999, 226.22436526300004, 131], [385.096923846, 80.72790528000002, 463.4998778916, 109.40100095999998]], "boxes_seq": [[0], [0], [1, 2, 4], [3], [5]]}, {"image_path": "objects365_v1_00047281.jpg", "text": "Tell me what you see within the designated area in the picture . Provide the coordinates for each element you describe.", "boxes_value": [[418.465576144, 0.79351808, 686.4057617340001, 449.6619262464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047281_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Provide the coordinates for each element you describe.", "boxes_value": [[67.46557614400001, 0.79351808, 335.40576173400007, 449.6619262464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047281.jpg", "text": "Tell me what you see within the designated area in the picture . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a lamp, a picture, a desk, a handbag, and a tripod.", "boxes_value": [[418.465576144, 0.79351808, 686.4057617340001, 449.6619262464], [418.465576144, 0.79351808, 477.98229979499996, 121.8972168192], [531.626953088, 191.4973144576, 551.024047882, 213.7100830208], [484.64990235199997, 207.607604992, 593.033935571, 251.4931640832], [603.7430419789999, 431.4794921984, 625.060424835, 449.6619262464], [645.912475593, 285.9075317248, 686.4057617340001, 343.3514404352]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047281_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a lamp, a picture, a desk, a handbag, and a tripod.", "boxes_value": [[67.46557614400001, 0.79351808, 335.40576173400007, 449.6619262464], [67.46557614400001, 0.79351808, 126.98229979499996, 121.8972168192], [180.62695308800005, 191.4973144576, 200.024047882, 213.7100830208], [133.64990235199997, 207.607604992, 242.03393557100003, 251.4931640832], [252.74304197899994, 431.4794921984, 274.06042483500005, 449.6619262464], [294.91247559299995, 285.9075317248, 335.40576173400007, 343.3514404352]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047282.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Specify the location of each mentioned object.", "boxes_value": [[612.9544677619999, 139.257080064, 909.4372558179999, 442.99285888]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047282_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Specify the location of each mentioned object.", "boxes_value": [[74.95446776199992, 76.25708006400001, 371.4372558179999, 379.99285888]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047282.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include two carpets, three lamps, and a storage box.", "boxes_value": [[612.9544677619999, 139.257080064, 909.4372558179999, 442.99285888], [572.7847900319999, 289.7182617088, 714.1564941419999, 325.1194458112], [612.9544677619999, 181.9532470784, 627.7674560429999, 204.898803712], [674.8203125299999, 165.1071166976, 690.2141113140001, 196.7661743104], [833.696533215, 139.257080064, 868.550537127, 191.2476806656], [717.3480224819999, 266.3814087168, 844.460327165, 339.0703124992], [796.672485336, 371.513549824, 909.4372558179999, 442.99285888]], "boxes_seq": [[0], [0], [1, 6], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047282_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include two carpets, three lamps, and a storage box.", "boxes_value": [[74.95446776199992, 76.25708006400001, 371.4372558179999, 379.99285888], [34.78479003199993, 226.71826170880001, 176.15649414199993, 262.1194458112], [74.95446776199992, 118.95324707840001, 89.76745604299992, 141.898803712], [136.8203125299999, 102.10711669759999, 152.2141113140001, 133.7661743104], [295.69653321500004, 76.25708006400001, 330.550537127, 128.2476806656], [179.34802248199992, 203.38140871680002, 306.46032716499997, 276.0703124992], [258.672485336, 308.513549824, 371.4372558179999, 379.99285888]], "boxes_seq": [[0], [0], [1, 6], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047284.jpg", "text": "Please interpret and describe the area inside the given picture . Please point out the objects and their coordinates.", "boxes_value": [[164.3895874048, 411.9553222393, 281.0656128, 658.2115478173]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047284_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Please point out the objects and their coordinates.", "boxes_value": [[29.38958740480001, 61.95532223930002, 146.0656128, 308.2115478173]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047284.jpg", "text": "Please interpret and describe the area inside the given picture . Please point out the objects and their coordinates. For your reference, objects involved in this region include a stool, a desk, two people, two sneakers, and a leather shoes.", "boxes_value": [[164.3895874048, 411.9553222393, 281.0656128, 658.2115478173], [165.5195922944, 548.6704101741, 231.9877929472, 575.4304199159], [147.3919067136, 486.51843259350005, 198.7581787136, 546.6822509547], [164.3895874048, 411.9553222393, 281.0656128, 658.2115478173], [174.0677490176, 362.4888916218, 249.3425903104, 615.197387707], [255.2475586048, 610.2468261673, 275.9476928512, 643.3847656483], [233.3054199296, 611.6364746092, 270.65631104, 655.5238036902], [206.84851072, 596.6960449293, 230.504089344, 614.1264648495]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6], [7]]}, {"image_path": "objects365_v1_00047284_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Please point out the objects and their coordinates. For your reference, objects involved in this region include a stool, a desk, two people, two sneakers, and a leather shoes.", "boxes_value": [[29.38958740480001, 61.95532223930002, 146.0656128, 308.2115478173], [30.519592294400013, 198.67041017409997, 96.9877929472, 225.43041991589996], [12.391906713600008, 136.51843259350005, 63.75817871359999, 196.68225095469995], [29.38958740480001, 61.95532223930002, 146.0656128, 308.2115478173], [39.06774901759999, 12.488891621800008, 114.3425903104, 265.19738770699996], [120.24755860479999, 260.2468261673, 140.94769285119997, 293.3847656483], [98.30541992959999, 261.63647460920004, 135.65631104, 305.5238036902], [71.84851072000001, 246.6960449293, 95.504089344, 264.12646484950005]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6], [7]]}, {"image_path": "objects365_v1_00047285.jpg", "text": "Please elucidate the area of the image . Please point out the objects and their coordinates.", "boxes_value": [[244.22912597439998, 264.3977050624, 848.5209961284, 477.2454834176]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047285_crop.jpg", "text": "Please elucidate the area of the image . Please point out the objects and their coordinates.", "boxes_value": [[151.22912597439998, 53.39770506240001, 755.5209961284, 266.2454834176]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047285.jpg", "text": "Please elucidate the area of the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include five chairs, and a bench.", "boxes_value": [[244.22912597439998, 264.3977050624, 848.5209961284, 477.2454834176], [712.7387695596, 287.6397094912, 848.5209961284, 458.8965454336], [608.7614746192, 291.3095092736, 727.417968746, 458.8965454336], [487.6584472368, 264.3977050624, 600.1986084416001, 454.0034789888], [366.5554199412, 296.2025756672, 483.9886474836, 477.2454834176], [244.22912597439998, 297.4258422784, 362.8856201012, 477.2454834176], [68.69091799639999, 272.348937984, 476.56176753919993, 453.741332992]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047285_crop.jpg", "text": "Please elucidate the area of the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include five chairs, and a bench.", "boxes_value": [[151.22912597439998, 53.39770506240001, 755.5209961284, 266.2454834176], [619.7387695596, 76.6397094912, 755.5209961284, 247.8965454336], [515.7614746192, 80.30950927359999, 634.417968746, 247.8965454336], [394.6584472368, 53.39770506240001, 507.1986084416001, 243.0034789888], [273.5554199412, 85.2025756672, 390.9886474836, 266.2454834176], [151.22912597439998, 86.42584227840001, 269.8856201012, 266.2454834176], [0, 61.348937983999974, 383.56176753919993, 242.74133299200003]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047286.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Provide the coordinates for each element you describe.", "boxes_value": [[249.45007328640003, 295.2080078336, 527.9282226624, 488.0305175552]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047286_crop.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Provide the coordinates for each element you describe.", "boxes_value": [[70.45007328640003, 48.20800783359999, 348.9282226624, 241.0305175552]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047286.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two cabinets, and three storage boxes.", "boxes_value": [[249.45007328640003, 295.2080078336, 527.9282226624, 488.0305175552], [249.45007328640003, 342.8953247232, 378.2575683984, 488.0305175552], [392.7711181584, 345.6165771264, 527.9282226624, 472.6098632704], [265.4538574032, 295.2080078336, 312.1043701392, 341.8586425856], [330.7646484672, 325.5309448192, 357.9775390368, 340.303588864], [358.7550049008, 306.0931396608, 384.412841808, 338.7485961728]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047286_crop.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two cabinets, and three storage boxes.", "boxes_value": [[70.45007328640003, 48.20800783359999, 348.9282226624, 241.0305175552], [70.45007328640003, 95.89532472320002, 199.2575683984, 241.0305175552], [213.7711181584, 98.61657712639999, 348.9282226624, 225.6098632704], [86.4538574032, 48.20800783359999, 133.1043701392, 94.85864258560002], [151.7646484672, 78.53094481919999, 178.9775390368, 93.303588864], [179.75500490079997, 59.09313966079998, 205.412841808, 91.74859617279998]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047288.jpg", "text": "Could you give me a description of the rectangular region found in ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0, 48.7086792192, 327.5312499866, 512.0123291136]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047288_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0, 48.7086792192, 327.5312499866, 512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047288.jpg", "text": "Could you give me a description of the rectangular region found in ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, a trash bin can, a bottle, and three dogs.", "boxes_value": [[0, 48.7086792192, 327.5312499866, 512.0123291136], [268.6828002808, 33.1253662208, 340.4844970907, 245.7687988224], [212.98107910299998, 48.7086792192, 327.5312499866, 254.645385728], [10.407531734199999, 148.6171264512, 49.881835968400004, 231.0408935424], [137.7009277683, 132.076416, 150.0407104194, 167.1215820288], [0, 364.673217792, 75.0111084142, 512.0123291136], [0.010253879, 153.1879882752, 284.3983154178, 444.88635253760003], [122.02716066480001, 231.6505737216, 594.2530517439, 477.6449585152]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6, 7]]}, {"image_path": "objects365_v1_00047288_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, a trash bin can, a bottle, and three dogs.", "boxes_value": [[0, 48.7086792192, 327.5312499866, 512], [268.6828002808, 33.1253662208, 340.4844970907, 245.7687988224], [212.98107910299998, 48.7086792192, 327.5312499866, 254.645385728], [10.407531734199999, 148.6171264512, 49.881835968400004, 231.0408935424], [137.7009277683, 132.076416, 150.0407104194, 167.1215820288], [0, 364.673217792, 75.0111084142, 512], [0.010253879, 153.1879882752, 284.3983154178, 444.88635253760003], [122.02716066480001, 231.6505737216, 409, 477.6449585152]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6, 7]]}, {"image_path": "objects365_v1_00047290.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each mentioned object.", "boxes_value": [[223.2450561536, 170.783264149, 373.1174316544, 540.3925781532]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047290_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each mentioned object.", "boxes_value": [[38.24505615359999, 92.78326414899999, 188.11743165439998, 462.3925781532]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047290.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, two leather shoes, a belt, a tie, a glasses, and a hat.", "boxes_value": [[223.2450561536, 170.783264149, 373.1174316544, 540.3925781532], [223.2450561536, 170.783264149, 373.1174316544, 540.3925781532], [308.971557632, 206.7191772617, 367.36279296, 276.6801757531], [285.6996804096, 510.0012056299, 307.963397632, 531.9058306538], [326.2771004928, 513.592127784, 340.281696768, 536.9331216149], [307.9433619456, 313.597681713, 344.7906566656, 335.7060585456], [263.7266082816, 229.9703432741, 280.067582464, 330.8998897224], [242.1674951168, 198.1547795606, 267.5911476224, 214.3976687081], [234.3991568384, 175.0851689497, 277.9489319936, 210.8666058351]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6], [7], [8]]}, {"image_path": "objects365_v1_00047290_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, two leather shoes, a belt, a tie, a glasses, and a hat.", "boxes_value": [[38.24505615359999, 92.78326414899999, 188.11743165439998, 462.3925781532], [38.24505615359999, 92.78326414899999, 188.11743165439998, 462.3925781532], [123.97155763199999, 128.7191772617, 182.36279295999998, 198.6801757531], [100.69968040959998, 432.0012056299, 122.96339763200001, 453.90583065379997], [141.2771004928, 435.592127784, 155.28169676800002, 458.93312161489996], [122.94336194559997, 235.597681713, 159.7906566656, 257.7060585456], [78.72660828160002, 151.9703432741, 95.067582464, 252.89988972240002], [57.16749511680001, 120.15477956059999, 82.5911476224, 136.3976687081], [49.39915683839999, 97.0851689497, 92.94893199360001, 132.8666058351]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6], [7], [8]]}, {"image_path": "objects365_v1_00047294.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Give coordinates for the items you reference.", "boxes_value": [[335.0291137536, 673.5080566272, 436.64434816, 767.4024658176]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047294_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Give coordinates for the items you reference.", "boxes_value": [[26.029113753599972, 23.50805662719995, 127.64434815999999, 117.40246581760005]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047294.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Give coordinates for the items you reference. For your reference, objects involved in this region include three people, and two handbags.", "boxes_value": [[335.0291137536, 673.5080566272, 436.64434816, 767.4024658176], [386.1187744256, 650.0344238592, 422.0195922944, 767.86279296], [335.0291137536, 673.5080566272, 385.658508288, 767.4024658176], [325.8237304832, 665.2232666112, 353.9000244224, 766.9422607104], [400.1994018304, 715.0654296575999, 436.64434816, 748.9093017600001], [353.3373412864, 699.4859619072, 390.5579833856, 744.2758788864]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047294_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Give coordinates for the items you reference. For your reference, objects involved in this region include three people, and two handbags.", "boxes_value": [[26.029113753599972, 23.50805662719995, 127.64434815999999, 117.40246581760005], [77.11877442560001, 0.034423859200046536, 113.01959229440001, 117.86279295999998], [26.029113753599972, 23.50805662719995, 76.65850828800001, 117.40246581760005], [16.823730483199995, 15.22326661119996, 44.90002442240001, 116.94226071039998], [91.1994018304, 65.0654296575999, 127.64434815999999, 98.90930176000006], [44.337341286399976, 49.48596190720002, 81.55798338559998, 94.27587888640005]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047297.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for all objects that you mention.", "boxes_value": [[581.1545410154, 129.9806518784, 724.8369140496, 326.9204711936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047297_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for all objects that you mention.", "boxes_value": [[36.154541015400014, 49.98065187840001, 179.83691404959995, 246.9204711936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047297.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a lamp, two chairs, a stool, and a carpet.", "boxes_value": [[581.1545410154, 129.9806518784, 724.8369140496, 326.9204711936], [678.5924071927, 129.9806518784, 724.8369140496, 298.7731933696], [610.6391601387, 187.8797607424, 691.3310546527, 285.5595092992], [581.7598877046, 222.704711936, 606.3922119023999, 272.8186645504], [569.0537109147, 245.3248290816, 795.3132323927999, 451.1246338048], [581.1545410154, 277.5225829888, 631.6263427639, 326.9204711936]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5]]}, {"image_path": "objects365_v1_00047297_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a lamp, two chairs, a stool, and a carpet.", "boxes_value": [[36.154541015400014, 49.98065187840001, 179.83691404959995, 246.9204711936], [133.59240719269997, 49.98065187840001, 179.83691404959995, 218.7731933696], [65.63916013870005, 107.87976074240001, 146.3310546527, 205.5595092992], [36.75988770460003, 142.704711936, 61.392211902399936, 192.8186645504], [24.05371091469999, 165.3248290816, 215, 296], [36.154541015400014, 197.52258298880002, 86.62634276389997, 246.9204711936]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5]]}, {"image_path": "objects365_v1_00047299.jpg", "text": "Fill me in about the selected portion within the presented image . Include the coordinates for each object you identify.", "boxes_value": [[11.515014663299999, 363.619567872, 544.3306884679, 512.057739264]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047299_crop.jpg", "text": "Fill me in about the selected portion within the presented image . Include the coordinates for each object you identify.", "boxes_value": [[11.515014663299999, 37.619567872000005, 544.3306884679, 186]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047299.jpg", "text": "Fill me in about the selected portion within the presented image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a chair, a bench, a cabinet, two people, and a handbag.", "boxes_value": [[11.515014663299999, 363.619567872, 544.3306884679, 512.057739264], [11.515014663299999, 419.3941650432, 157.8258056669, 512.057739264], [108.44592285879999, 429.1482544128, 234.6389770703, 511.4480590848], [36.5097656147, 401.1053466624, 206.59606931199997, 511.4480590848], [441.56506344480005, 363.619567872, 544.3306884679, 474.606445312], [381.3129882935, 252.720336896, 479.74121095690003, 512.100463872], [296.5095825243, 243.5829467648, 380.32910159080006, 512.0758056448], [389.2803955016, 405.4077148672, 440.229125963, 486.1141357568]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6], [7]]}, {"image_path": "objects365_v1_00047299_crop.jpg", "text": "Fill me in about the selected portion within the presented image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a chair, a bench, a cabinet, two people, and a handbag.", "boxes_value": [[11.515014663299999, 37.619567872000005, 544.3306884679, 186], [11.515014663299999, 93.39416504320002, 157.8258056669, 186], [108.44592285879999, 103.14825441279999, 234.6389770703, 185.44805908479998], [36.5097656147, 75.1053466624, 206.59606931199997, 185.44805908479998], [441.56506344480005, 37.619567872000005, 544.3306884679, 148.606445312], [381.3129882935, 0, 479.74121095690003, 186], [296.5095825243, 0, 380.32910159080006, 186], [389.2803955016, 79.40771486720001, 440.229125963, 160.1141357568]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6], [7]]}, {"image_path": "objects365_v1_00047300.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give coordinates for the items you reference.", "boxes_value": [[0, 289.756286634, 346.59205496650003, 497.99023436939996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047300_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give coordinates for the items you reference.", "boxes_value": [[0, 52.75628663399999, 346.59205496650003, 260.99023436939996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047300.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give coordinates for the items you reference. For your reference, objects involved in this region include three people, a hat, and a camera.", "boxes_value": [[0, 289.756286634, 346.59205496650003, 497.99023436939996], [0.0852050535, 381.33331301159996, 143.5000000105, 497.99023436939996], [0, 289.756286634, 185.3638915765, 484.7561645442], [191.499999992, 319.99999998780004, 408.833374019, 498.04382325299997], [266.2106345605, 319.1239867134, 346.59205496650003, 377.5832015004], [77.4954833975, 329.0331421086, 98.387268035, 354.6473999034]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047300_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give coordinates for the items you reference. For your reference, objects involved in this region include three people, a hat, and a camera.", "boxes_value": [[0, 52.75628663399999, 346.59205496650003, 260.99023436939996], [0.0852050535, 144.33331301159996, 143.5000000105, 260.99023436939996], [0, 52.75628663399999, 185.3638915765, 247.75616454419998], [191.499999992, 82.99999998780004, 408.833374019, 261], [266.2106345605, 82.12398671339997, 346.59205496650003, 140.58320150039998], [77.4954833975, 92.03314210859998, 98.387268035, 117.64739990340001]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047302.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Specify the location of each mentioned object.", "boxes_value": [[105.3854980608, 211.3175048562, 318.827636736, 351.0047607624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047302_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Specify the location of each mentioned object.", "boxes_value": [[53.3854980608, 35.3175048562, 266.827636736, 175.0047607624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047302.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Specify the location of each mentioned object. For your reference, objects involved in this region include five hats.", "boxes_value": [[105.3854980608, 211.3175048562, 318.827636736, 351.0047607624], [179.5128784384, 250.42993167420002, 203.3528442368, 274.2698974698], [105.3854980608, 211.3175048562, 137.047973632, 229.94238283739998], [121.7755126784, 221.3749999728, 160.51544192, 244.09741211999997], [254.0127563264, 334.9873046952, 277.1077270528, 351.0047607624], [283.4402465792, 316.7347412466, 318.827636736, 335.359741221]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047302_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Specify the location of each mentioned object. For your reference, objects involved in this region include five hats.", "boxes_value": [[53.3854980608, 35.3175048562, 266.827636736, 175.0047607624], [127.51287843840001, 74.42993167420002, 151.3528442368, 98.26989746980001], [53.3854980608, 35.3175048562, 85.04797363200001, 53.942382837399975], [69.7755126784, 45.3749999728, 108.51544192, 68.09741211999997], [202.0127563264, 158.9873046952, 225.1077270528, 175.0047607624], [231.4402465792, 140.73474124659998, 266.827636736, 159.35974122099998]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047304.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Include the coordinates for each object you identify.", "boxes_value": [[112.136678336, 158.835571296, 350.957275392, 418.565570784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047304_crop.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Include the coordinates for each object you identify.", "boxes_value": [[60.136678336, 65.83557129600001, 298.957275392, 325.565570784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047304.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Include the coordinates for each object you identify. For your reference, objects involved in this region include a cabinet, two pictures, a person, two sneakers, and a bottle.", "boxes_value": [[112.136678336, 158.835571296, 350.957275392, 418.565570784], [292.873229952, 276.23876952, 350.957275392, 354.89422608], [170.302978496, 157.856994624, 213.088134784, 215.422424304], [229.150268544, 158.835571296, 269.8239136, 216.178771968], [67.512512192, 158.44647215999998, 196.19000243199997, 418.607482896], [112.136678336, 392.80346471999997, 149.23411104, 405.512770368], [164.69137472, 391.772980512, 195.26240723200002, 418.565570784], [249.01605222400002, 370.81689451200003, 269.14959718399996, 414.01245115200004]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5, 6], [7]]}, {"image_path": "objects365_v1_00047304_crop.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Include the coordinates for each object you identify. For your reference, objects involved in this region include a cabinet, two pictures, a person, two sneakers, and a bottle.", "boxes_value": [[60.136678336, 65.83557129600001, 298.957275392, 325.565570784], [240.87322995199997, 183.23876952, 298.957275392, 261.89422608], [118.30297849600001, 64.85699462400001, 161.088134784, 122.422424304], [177.150268544, 65.83557129600001, 217.82391360000003, 123.178771968], [15.512512192000003, 65.44647215999998, 144.19000243199997, 325.607482896], [60.136678336, 299.80346471999997, 97.23411103999999, 312.512770368], [112.69137472, 298.772980512, 143.26240723200002, 325.565570784], [197.01605222400002, 277.81689451200003, 217.14959718399996, 321.01245115200004]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5, 6], [7]]}, {"image_path": "objects365_v1_00047307.jpg", "text": "What's the story in the section of the included visual ? Give coordinates for the items you reference.", "boxes_value": [[84.7410888704, 209.69689942040003, 259.5922241024, 292.6328125154]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047307_crop.jpg", "text": "What's the story in the section of the included visual ? Give coordinates for the items you reference.", "boxes_value": [[43.741088870400006, 21.69689942040003, 218.5922241024, 104.63281251540002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047307.jpg", "text": "What's the story in the section of the included visual ? Give coordinates for the items you reference. For your reference, objects involved in this region include a cup, a desk, a candle, and three napkins.", "boxes_value": [[84.7410888704, 209.69689942040003, 259.5922241024, 292.6328125154], [109.3000488448, 257.746520969, 140.9731445248, 292.6328125154], [71.7333373952, 208.1666259806, 417.3333129728, 389.76660157960004], [246.9974975488, 249.7366332944, 259.5922241024, 281.4754028338], [84.7410888704, 260.7750244456, 172.4622192128, 282.4277343762], [108.0593871872, 232.45996092480001, 179.1246338048, 244.11914064159998], [132.48803712, 209.69689942040003, 183.5661621248, 219.6904297028]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00047307_crop.jpg", "text": "What's the story in the section of the included visual ? Give coordinates for the items you reference. For your reference, objects involved in this region include a cup, a desk, a candle, and three napkins.", "boxes_value": [[43.741088870400006, 21.69689942040003, 218.5922241024, 104.63281251540002], [68.3000488448, 69.74652096900002, 99.9731445248, 104.63281251540002], [30.733337395199996, 20.166625980600003, 262, 125], [205.9974975488, 61.73663329440001, 218.5922241024, 93.47540283379999], [43.741088870400006, 72.77502444560002, 131.4622192128, 94.42773437620002], [67.0593871872, 44.459960924800015, 138.1246338048, 56.119140641599984], [91.48803712, 21.69689942040003, 142.5661621248, 31.690429702800003]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00047308.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Please point out the objects and their coordinates.", "boxes_value": [[300.1215820584, 198.975646976, 682.4193115559, 511.8925781504]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047308_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Please point out the objects and their coordinates.", "boxes_value": [[96.12158205840001, 78.97564697600001, 478.4193115559, 391.8925781504]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047308.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a baseball glove, a baseball, two people, and two sneakers.", "boxes_value": [[300.1215820584, 198.975646976, 682.4193115559, 511.8925781504], [344.3582763867, 198.975646976, 379.6606445049, 239.407409664], [541.3925781236, 411.9232787968, 571.8012695462, 437.1004028416], [297.88415526709997, 61.2247924736, 471.3281250075, 351.7300414976], [537.0970459178, 410.2450561536, 682.4193115559, 511.8925781504], [300.1215820584, 313.3510131712, 339.32098391510004, 341.2738647552], [391.4079590109, 324.0905761792, 411.8131103307, 351.4765014528]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00047308_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a baseball glove, a baseball, two people, and two sneakers.", "boxes_value": [[96.12158205840001, 78.97564697600001, 478.4193115559, 391.8925781504], [140.35827638670003, 78.97564697600001, 175.6606445049, 119.407409664], [337.39257812359995, 291.9232787968, 367.80126954620005, 317.1004028416], [93.88415526709997, 0, 267.3281250075, 231.73004149759998], [333.09704591779996, 290.2450561536, 478.4193115559, 391.8925781504], [96.12158205840001, 193.3510131712, 135.32098391510004, 221.2738647552], [187.40795901090002, 204.0905761792, 207.8131103307, 231.4765014528]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00047309.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[644.6557616939999, 371.6854247936, 773.2517089518, 511.3546753024]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047309_crop.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[32.65576169399992, 35.68542479360002, 161.25170895179997, 175.35467530239998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047309.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, a handbag, a bottle, a chair, and a desk.", "boxes_value": [[644.6557616939999, 371.6854247936, 773.2517089518, 511.3546753024], [695.550659175, 301.5219116032, 773.8784179902, 426.7991332864], [510.4676513556, 294.0600585728, 733.7896728246, 511.3757324288], [690.6606445134, 396.6710815232, 757.2392577882, 468.0975952384], [644.6557616939999, 371.6854247936, 664.7370605784, 400.095214848], [685.3652343486, 392.3027343872, 768.2652587664, 429.0779419136], [719.0238036924001, 430.32458496, 773.2517089518, 511.3546753024]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047309_crop.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, a handbag, a bottle, a chair, and a desk.", "boxes_value": [[32.65576169399992, 35.68542479360002, 161.25170895179997, 175.35467530239998], [83.55065917499996, 0, 161.87841799019998, 90.79913328639998], [0, 0, 121.78967282459996, 175.3757324288], [78.66064451340003, 60.67108152319997, 145.2392577882, 132.0975952384], [32.65576169399992, 35.68542479360002, 52.73706057840002, 64.09521484800001], [73.36523434859998, 56.30273438720002, 156.26525876640005, 93.0779419136], [107.02380369240007, 94.32458495999998, 161.25170895179997, 175.35467530239998]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047313.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each mentioned object.", "boxes_value": [[370.3825683562, 336.6353759744, 472.49963379089996, 379.6320800768]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047313_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each mentioned object.", "boxes_value": [[26.38256835620001, 11.635375974400006, 128.49963379089996, 54.63208007679998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047313.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each mentioned object. For your reference, objects involved in this region include five people.", "boxes_value": [[370.3825683562, 336.6353759744, 472.49963379089996, 379.6320800768], [370.3825683562, 337.449707008, 388.7863769572, 379.6320800768], [395.3010254073, 342.498535168, 416.4736328189, 379.3063354368], [410.9362793019, 336.6353759744, 429.8287353353, 378.329101568], [430.6430664001, 339.8927002112, 453.6071777575, 378.8177490432], [447.9068603722, 340.2184448, 472.49963379089996, 378.654846208]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047313_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each mentioned object. For your reference, objects involved in this region include five people.", "boxes_value": [[26.38256835620001, 11.635375974400006, 128.49963379089996, 54.63208007679998], [26.38256835620001, 12.449707008000019, 44.78637695719999, 54.63208007679998], [51.30102540730002, 17.49853516799999, 72.4736328189, 54.3063354368], [66.93627930190002, 11.635375974400006, 85.8287353353, 53.329101568], [86.6430664001, 14.892700211200008, 109.60717775749998, 53.817749043200024], [103.90686037220001, 15.218444799999986, 128.49963379089996, 53.65484620799998]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047314.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Provide the coordinates for all objects that you mention.", "boxes_value": [[564.569702155, 214.2799072256, 683.1350097711, 293.660400384]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047314_crop.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Provide the coordinates for all objects that you mention.", "boxes_value": [[30.56970215499996, 20.2799072256, 149, 99.66040038400001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047314.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three people, a hat, and a bicycle.", "boxes_value": [[564.569702155, 214.2799072256, 683.1350097711, 293.660400384], [608.7026366942, 222.3521728512, 637.3166504066, 253.8276977664], [641.0364990557, 222.6383056896, 683.0992431784, 260.981201152], [564.569702155, 215.8059692544, 585.8024902675, 255.748901376], [604.7420654464, 252.2021484544, 645.2471924165001, 293.660400384], [629.9158935337999, 214.2799072256, 683.1350097711, 251.65515136]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047314_crop.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three people, a hat, and a bicycle.", "boxes_value": [[30.56970215499996, 20.2799072256, 149, 99.66040038400001], [74.70263669420001, 28.35217285120001, 103.31665040660005, 59.82769776640001], [107.0364990557, 28.638305689600003, 149, 66.98120115199998], [30.56970215499996, 21.805969254399997, 51.80249026750005, 61.74890137599999], [70.74206544640003, 58.2021484544, 111.24719241650007, 99.66040038400001], [95.91589353379993, 20.2799072256, 149, 57.65515135999999]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047316.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object.", "boxes_value": [[302.7288207706, 406.2929077248, 436.3256835672, 482.8820801024]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047316_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object.", "boxes_value": [[33.72882077060001, 19.29290772479999, 167.32568356719997, 95.88208010239998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047316.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a bench, an umbrella, a person, a trash bin can, and a car.", "boxes_value": [[302.7288207706, 406.2929077248, 436.3256835672, 482.8820801024], [339.2827758706, 456.3072509952, 436.3256835672, 482.8820801024], [375.13952634640003, 406.2929077248, 409.5856933825, 432.1886596608], [381.7530517503, 419.5548706304, 401.8553466637, 477.9772948992], [302.7288207706, 423.9687499776, 316.0952759081, 448.509765632], [402.8062743818, 421.685180672, 481.60388186349996, 448.9851074048]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047316_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a bench, an umbrella, a person, a trash bin can, and a car.", "boxes_value": [[33.72882077060001, 19.29290772479999, 167.32568356719997, 95.88208010239998], [70.2827758706, 69.30725099519998, 167.32568356719997, 95.88208010239998], [106.13952634640003, 19.29290772479999, 140.58569338249998, 45.1886596608], [112.75305175030002, 32.55487063039999, 132.8553466637, 90.97729489919999], [33.72882077060001, 36.9687499776, 47.09527590810001, 61.50976563199998], [133.8062743818, 34.685180672, 200, 61.985107404799976]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047318.jpg", "text": "What's inside the area of the provided graphic ? Please mention the objects and their locations.", "boxes_value": [[124.304016128, 353.7641601398, 239.5590210048, 569.8559570056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047318_crop.jpg", "text": "What's inside the area of the provided graphic ? Please mention the objects and their locations.", "boxes_value": [[29.304016128, 54.764160139800026, 144.5590210048, 270.8559570056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047318.jpg", "text": "What's inside the area of the provided graphic ? Please mention the objects and their locations. For your reference, objects involved in this region include a cabinet, a desk, two people, and a bakset.", "boxes_value": [[124.304016128, 353.7641601398, 239.5590210048, 569.8559570056], [181.4337158144, 380.5262451013, 393.5654907392, 610.2950439544001], [156.7645874176, 425.89648434099996, 210.957702656, 485.5437011808], [124.304016128, 353.7641601398, 171.5384521728, 494.4833984346], [219.1860351488, 368.5830078107, 239.5590210048, 391.0532226758], [175.3550414848, 506.1639404018, 190.9953613312, 569.8559570056]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047318_crop.jpg", "text": "What's inside the area of the provided graphic ? Please mention the objects and their locations. For your reference, objects involved in this region include a cabinet, a desk, two people, and a bakset.", "boxes_value": [[29.304016128, 54.764160139800026, 144.5590210048, 270.8559570056], [86.43371581439999, 81.52624510129999, 173, 311.29504395440006], [61.76458741760001, 126.89648434099996, 115.95770265600001, 186.54370118079999], [29.304016128, 54.764160139800026, 76.53845217279999, 195.48339843460002], [124.18603514879999, 69.5830078107, 144.5590210048, 92.05322267579999], [80.35504148480001, 207.1639404018, 95.9953613312, 270.8559570056]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047323.jpg", "text": "Can you provide a description of the area in the image ? Include the coordinates for each object you identify.", "boxes_value": [[362.181274402, 140.9773559808, 511.6807861576, 255.241332992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047323_crop.jpg", "text": "Can you provide a description of the area in the image ? Include the coordinates for each object you identify.", "boxes_value": [[38.181274401999985, 28.977355980800013, 187.68078615759998, 143.241332992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047323.jpg", "text": "Can you provide a description of the area in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, a helmet, and two gloves.", "boxes_value": [[362.181274402, 140.9773559808, 511.6807861576, 255.241332992], [319.61328125200004, 143.5844726784, 631.3713379032, 417.2199707136], [323.0019530956, 93.60150144, 501.75451661640005, 370.6256713728], [460.33752443720005, 143.4942016512, 511.6807861576, 182.7567138816], [416.5447998388, 140.9773559808, 457.317382774, 185.2734985216], [362.181274402, 205.40814208, 411.00769044080005, 255.241332992]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047323_crop.jpg", "text": "Can you provide a description of the area in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, a helmet, and two gloves.", "boxes_value": [[38.181274401999985, 28.977355980800013, 187.68078615759998, 143.241332992], [0, 31.584472678400004, 225, 171], [0, 0, 177.75451661640005, 171], [136.33752443720005, 31.4942016512, 187.68078615759998, 70.7567138816], [92.54479983879997, 28.977355980800013, 133.317382774, 73.2734985216], [38.181274401999985, 93.40814208, 87.00769044080005, 143.241332992]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047324.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Give coordinates for the items you reference.", "boxes_value": [[526.1102295094, 58.6346435584, 668.0549316069, 305.4656982528]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047324_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Give coordinates for the items you reference.", "boxes_value": [[36.11022950940003, 58.6346435584, 178.05493160690003, 305.4656982528]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047324.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Give coordinates for the items you reference. For your reference, objects involved in this region include two people, a leather shoes, and two trolleys.", "boxes_value": [[526.1102295094, 58.6346435584, 668.0549316069, 305.4656982528], [526.1102295094, 58.6346435584, 607.5518799096, 305.4656982528], [603.7930908351001, 135.4821777408, 668.0549316069, 211.076843264], [576.4547119198, 280.021301248, 607.1193847758001, 291.7943725568], [599.4275548858, 110.5597568, 642.9636975484, 169.1083623936], [584.4150919458, 117.690676736, 611.4375253061, 169.1083623936]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047324_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Give coordinates for the items you reference. For your reference, objects involved in this region include two people, a leather shoes, and two trolleys.", "boxes_value": [[36.11022950940003, 58.6346435584, 178.05493160690003, 305.4656982528], [36.11022950940003, 58.6346435584, 117.55187990959996, 305.4656982528], [113.79309083510009, 135.4821777408, 178.05493160690003, 211.076843264], [86.45471191980005, 280.021301248, 117.11938477580009, 291.7943725568], [109.42755488579996, 110.5597568, 152.96369754839998, 169.1083623936], [94.41509194579999, 117.690676736, 121.43752530610004, 169.1083623936]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047325.jpg", "text": "Please provide insights on the specified area within the graphic . Provide the coordinates for all objects that you mention.", "boxes_value": [[0, 247.55485499050002, 512.2501625856, 683.0187988237001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047325_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Provide the coordinates for all objects that you mention.", "boxes_value": [[0, 109.55485499050002, 512, 545]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047325.jpg", "text": "Please provide insights on the specified area within the graphic . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four drums, a cymbal, a person, and a sneakers.", "boxes_value": [[0, 247.55485499050002, 512.2501625856, 683.0187988237001], [0, 130.0111432584, 164.980651776, 367.24799176420004], [0, 346.5897268597, 92.7787480064, 618.6695873942], [154.4679445504, 154.898953075, 411.1390660096, 400.5541465619], [68.431260672, 330.70419130330004, 301.5193036288, 569.2551611781], [383.0493924352, 247.55485499050002, 512.2501625856, 387.84089834969996], [64.2558593536, 373.1191406314, 511.9616699392, 683.0187988237001], [287.611999488, 482.3577880549, 340.376342784, 581.4630126806001]], "boxes_seq": [[0], [0], [1, 3, 4, 5], [2], [6], [7]]}, {"image_path": "objects365_v1_00047325_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four drums, a cymbal, a person, and a sneakers.", "boxes_value": [[0, 109.55485499050002, 512, 545], [0, 0, 164.980651776, 229.24799176420004], [0, 208.58972685970002, 92.7787480064, 480.6695873942], [154.4679445504, 16.89895307500001, 411.1390660096, 262.5541465619], [68.431260672, 192.70419130330004, 301.5193036288, 431.2551611781], [383.0493924352, 109.55485499050002, 512, 249.84089834969996], [64.2558593536, 235.1191406314, 511.9616699392, 545], [287.611999488, 344.3577880549, 340.376342784, 443.46301268060006]], "boxes_seq": [[0], [0], [1, 3, 4, 5], [2], [6], [7]]}, {"image_path": "objects365_v1_00047326.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Please point out the objects and their coordinates.", "boxes_value": [[0.29180908629999996, 97.4198608384, 450.8580322036, 381.1206054912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047326_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Please point out the objects and their coordinates.", "boxes_value": [[0.29180908629999996, 71.4198608384, 450.8580322036, 355.1206054912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047326.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Please point out the objects and their coordinates. For your reference, objects involved in this region include three pictures, two people, and a belt.", "boxes_value": [[0.29180908629999996, 97.4198608384, 450.8580322036, 381.1206054912], [0.29180908629999996, 97.4198608384, 26.4296264472, 261.7147216896], [128.4915771286, 98.6645507584, 259.18066406969996, 179.5672607232], [350.0406493813, 99.9091796992, 450.8580322036, 211.92840576], [275.6895141685, 136.6406860288, 447.08251953260003, 511.84576414720004], [92.4804077292, 53.736938496, 303.91564938060003, 511.8465575936], [146.4512939388, 346.572021504, 248.9325561465, 381.1206054912]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00047326_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Please point out the objects and their coordinates. For your reference, objects involved in this region include three pictures, two people, and a belt.", "boxes_value": [[0.29180908629999996, 71.4198608384, 450.8580322036, 355.1206054912], [0.29180908629999996, 71.4198608384, 26.4296264472, 235.71472168960003], [128.4915771286, 72.6645507584, 259.18066406969996, 153.5672607232], [350.0406493813, 73.9091796992, 450.8580322036, 185.92840576], [275.6895141685, 110.64068602879999, 447.08251953260003, 426], [92.4804077292, 27.736938496, 303.91564938060003, 426], [146.4512939388, 320.572021504, 248.9325561465, 355.1206054912]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00047327.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Provide the coordinates for each element you describe.", "boxes_value": [[139.488647492, 120.3059692544, 254.27117917060002, 224.7432861184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047327_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Provide the coordinates for each element you describe.", "boxes_value": [[29.488647492000013, 26.305969254399997, 144.27117917060002, 130.7432861184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047327.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include three cabinets, a picture, and a plate.", "boxes_value": [[139.488647492, 120.3059692544, 254.27117917060002, 224.7432861184], [107.46777347039999, 154.2973633024, 174.4653320566, 226.221191424], [172.0021362668, 161.6868285952, 231.11761475420002, 204.052917504], [223.7282104588, 165.6278076416, 254.27117917060002, 224.7432861184], [139.488647492, 120.3059692544, 200.0820312126, 160.2089233408], [203.4979247952, 135.6257324032, 235.1986083656, 163.494445824]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047327_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include three cabinets, a picture, and a plate.", "boxes_value": [[29.488647492000013, 26.305969254399997, 144.27117917060002, 130.7432861184], [0, 60.29736330239999, 64.4653320566, 132.221191424], [62.002136266799994, 67.68682859520001, 121.11761475420002, 110.05291750399999], [113.72821045879999, 71.62780764159999, 144.27117917060002, 130.7432861184], [29.488647492000013, 26.305969254399997, 90.08203121259999, 66.2089233408], [93.49792479519999, 41.625732403200004, 125.1986083656, 69.494445824]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047328.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Give coordinates for the items you reference.", "boxes_value": [[251.65570071, 235.7712298496, 428.9139830537, 386.0357055488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047328_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Give coordinates for the items you reference.", "boxes_value": [[44.65570070999999, 37.77122984959999, 221.9139830537, 188.03570554880002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047328.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Give coordinates for the items you reference. For your reference, objects involved in this region include a person, a hat, a pie, and three breads.", "boxes_value": [[251.65570071, 235.7712298496, 428.9139830537, 386.0357055488], [378.111083952, 236.1343994368, 499.8261718767, 383.9824828928], [386.6719105193, 235.7712298496, 428.9139830537, 273.2821902336], [238.1468505907, 365.5852050944, 292.598327633, 381.8961181696], [317.916992154, 358.8672485376, 405.5737304827, 386.0357055488], [279.33679201859997, 349.9178466816, 325.6184082222, 365.442688], [251.65570071, 335.1253051904, 285.3416748129, 348.7461548032]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00047328_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Give coordinates for the items you reference. For your reference, objects involved in this region include a person, a hat, a pie, and three breads.", "boxes_value": [[44.65570070999999, 37.77122984959999, 221.9139830537, 188.03570554880002], [171.111083952, 38.13439943680001, 266, 185.98248289280002], [179.6719105193, 37.77122984959999, 221.9139830537, 75.28219023359998], [31.146850590700012, 167.5852050944, 85.598327633, 183.89611816960002], [110.91699215400001, 160.8672485376, 198.5737304827, 188.03570554880002], [72.33679201859997, 151.9178466816, 118.6184082222, 167.44268799999998], [44.65570070999999, 137.1253051904, 78.34167481290001, 150.7461548032]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00047330.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give coordinates for the items you reference.", "boxes_value": [[29.2396850829, 158.2663574016, 416.2993163978, 226.6491088896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047330_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give coordinates for the items you reference.", "boxes_value": [[29.2396850829, 17.266357401600004, 416.2993163978, 85.6491088896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047330.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give coordinates for the items you reference. For your reference, objects involved in this region include a lamp, a cabinet, three people, a hat, and two bowls.", "boxes_value": [[29.2396850829, 158.2663574016, 416.2993163978, 226.6491088896], [369.06567380229995, 184.6022339072, 429.8659668264, 231.3716430848], [29.2396850829, 158.2663574016, 416.2993163978, 226.6491088896], [325.8451538406, 194.1109008896, 341.2385254025, 220.2438354432], [387.4187011484, 167.2108154368, 445.412353542, 232.7733154304], [0, 29.9492187648, 449.88854983429997, 487.352722176], [402.16625973810005, 166.7931518464, 426.7388916234, 182.7991332864], [39.591308622700005, 177.5277709824, 77.0390014603, 190.9465332224], [76.4148559801, 177.8398437376, 114.17462155780001, 190.6344604672]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6], [7, 8]]}, {"image_path": "objects365_v1_00047330_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give coordinates for the items you reference. For your reference, objects involved in this region include a lamp, a cabinet, three people, a hat, and two bowls.", "boxes_value": [[29.2396850829, 17.266357401600004, 416.2993163978, 85.6491088896], [369.06567380229995, 43.6022339072, 429.8659668264, 90.37164308480001], [29.2396850829, 17.266357401600004, 416.2993163978, 85.6491088896], [325.8451538406, 53.1109008896, 341.2385254025, 79.2438354432], [387.4187011484, 26.210815436800004, 445.412353542, 91.7733154304], [0, 0, 449.88854983429997, 102], [402.16625973810005, 25.793151846400008, 426.7388916234, 41.79913328640001], [39.591308622700005, 36.5277709824, 77.0390014603, 49.94653322240001], [76.4148559801, 36.839843737600006, 114.17462155780001, 49.6344604672]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6], [7, 8]]}, {"image_path": "objects365_v1_00047331.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Provide the coordinates for all objects that you mention.", "boxes_value": [[192.087768565, 0, 341.368042015, 161.730834944]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047331_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Provide the coordinates for all objects that you mention.", "boxes_value": [[38.087768565000005, 0, 187.36804201500001, 161.730834944]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047331.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two lamps, a hat, and two plates.", "boxes_value": [[192.087768565, 0, 341.368042015, 161.730834944], [192.087768565, 0, 260.96020504399996, 16.53704832], [299.63745119, 6.6981201408, 341.368042015, 91.8557128704], [312.496948225, 145.0642700288, 337.699951204, 161.730834944], [301.292114277, 122.2119140864, 327.466308631, 150.0568237056], [251.673584008, 101.8970947072, 280.838623065, 125.7270508032]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047331_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two lamps, a hat, and two plates.", "boxes_value": [[38.087768565000005, 0, 187.36804201500001, 161.730834944], [38.087768565000005, 0, 106.96020504399996, 16.53704832], [145.63745118999998, 6.6981201408, 187.36804201500001, 91.8557128704], [158.49694822499998, 145.0642700288, 183.699951204, 161.730834944], [147.292114277, 122.2119140864, 173.466308631, 150.0568237056], [97.673584008, 101.8970947072, 126.83862306499998, 125.7270508032]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047334.jpg", "text": "Please interpret and describe the area inside the given picture . Provide the coordinates for all objects that you mention.", "boxes_value": [[168.856689456, 427.838745088, 407.36694336, 602.92309568]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047334_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Provide the coordinates for all objects that you mention.", "boxes_value": [[59.856689456, 43.838745087999996, 298.36694336, 218.92309567999996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047334.jpg", "text": "Please interpret and describe the area inside the given picture . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three people, a tricycle, and two motorcycles.", "boxes_value": [[168.856689456, 427.838745088, 407.36694336, 602.92309568], [251.92858886399998, 468.841918976, 274.531738272, 554.8498535040001], [168.856689456, 444.77514649600005, 206.472106944, 511.89733888], [374.44244385599995, 476.557739264, 395.271850608, 499.87133792000003], [207.39978028800002, 427.838745088, 407.36694336, 602.92309568], [151.302856464, 457.00683596799996, 214.411621104, 527.591186496], [287.0909424, 536.831665024, 464.421997056, 639.642089856]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047334_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three people, a tricycle, and two motorcycles.", "boxes_value": [[59.856689456, 43.838745087999996, 298.36694336, 218.92309567999996], [142.92858886399998, 84.84191897599999, 165.53173827199998, 170.84985350400007], [59.856689456, 60.77514649600005, 97.47210694399999, 127.89733888], [265.44244385599995, 92.55773926400002, 286.271850608, 115.87133792000003], [98.39978028800002, 43.838745087999996, 298.36694336, 218.92309567999996], [42.302856464, 73.00683596799996, 105.411621104, 143.59118649599998], [178.09094240000002, 152.83166502400002, 355.421997056, 255.64208985599998]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047335.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Give coordinates for the items you reference.", "boxes_value": [[123.88043616000002, 424.9543653376, 305.483458656, 500.2584590336]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047335_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Give coordinates for the items you reference.", "boxes_value": [[45.880436160000016, 18.954365337599995, 227.48345865599998, 94.25845903359999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047335.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include five sneakers.", "boxes_value": [[123.88043616000002, 424.9543653376, 305.483458656, 500.2584590336], [151.182506016, 478.970764032, 168.97518431999998, 500.2584590336], [188.53078550400002, 474.5223565824, 215.85075360000002, 495.22956544], [248.390653344, 468.6624614912, 269.393751744, 492.8630273024], [282.502734816, 470.9895503872, 305.483458656, 493.8319564288], [123.88043616000002, 424.9543653376, 147.349012704, 437.8531249664]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047335_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include five sneakers.", "boxes_value": [[45.880436160000016, 18.954365337599995, 227.48345865599998, 94.25845903359999], [73.18250601599999, 72.97076403199998, 90.97518431999998, 94.25845903359999], [110.53078550400002, 68.5223565824, 137.85075360000002, 89.22956543999999], [170.390653344, 62.66246149120002, 191.39375174399999, 86.86302730239998], [204.502734816, 64.98955038719998, 227.48345865599998, 87.83195642880003], [45.880436160000016, 18.954365337599995, 69.34901270399999, 31.853124966400003]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047339.jpg", "text": "Please enlighten me about the area in the photograph . Please point out the objects and their coordinates.", "boxes_value": [[267.489807104, 582.8365478861999, 438.2129516544, 636.7956542676]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047339_crop.jpg", "text": "Please enlighten me about the area in the photograph . Please point out the objects and their coordinates.", "boxes_value": [[43.48980710400002, 13.836547886199924, 214.2129516544, 67.79565426759996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047339.jpg", "text": "Please enlighten me about the area in the photograph . Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, a sneakers, and two boots.", "boxes_value": [[267.489807104, 582.8365478861999, 438.2129516544, 636.7956542676], [213.1989135872, 209.103210423, 396.9696045056, 630.382446297], [363.7979736576, 404.81567379539996, 479.8985595904, 637.0168457088], [267.489807104, 601.4125976219999, 319.6797485568, 628.8344726255999], [391.3303833088, 582.8365478861999, 410.7910766592, 632.3728027032], [416.540832512, 603.1817626608, 438.2129516544, 636.7956542676]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047339_crop.jpg", "text": "Please enlighten me about the area in the photograph . Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, a sneakers, and two boots.", "boxes_value": [[43.48980710400002, 13.836547886199924, 214.2129516544, 67.79565426759996], [0, 0, 172.9696045056, 61.382446297], [139.79797365759998, 0, 255.8985595904, 68.01684570880002], [43.48980710400002, 32.41259762199991, 95.6797485568, 59.834472625599915], [167.33038330879998, 13.836547886199924, 186.79107665919997, 63.37280270320002], [192.540832512, 34.18176266080002, 214.2129516544, 67.79565426759996]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047340.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Remember to mention the objects and their corresponding locations.", "boxes_value": [[489.465332032, 253.26977539199999, 536.718505856, 479.333190912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047340_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Remember to mention the objects and their corresponding locations.", "boxes_value": [[12.465332031999992, 57.269775391999985, 59.71850585599998, 283.333190912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047340.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a couch, a vase, a desk, a stool, and two books.", "boxes_value": [[489.465332032, 253.26977539199999, 536.718505856, 479.333190912], [399.068237312, 221.471130384, 563.3099365119999, 410.24902344000003], [509.012329088, 253.26977539199999, 536.718505856, 308.096862816], [488.721801728, 293.25201417600005, 638.732910144, 479.809326192], [489.465332032, 355.691833488, 531.621582016, 479.333190912], [513.895141632, 329.191589376, 565.4638672, 357.089416512], [514.846191424, 313.823608416, 558.866821312, 332.30139158400004]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047340_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a couch, a vase, a desk, a stool, and two books.", "boxes_value": [[12.465332031999992, 57.269775391999985, 59.71850585599998, 283.333190912], [0, 25.47113038399999, 71, 214.24902344000003], [32.012329088, 57.269775391999985, 59.71850585599998, 112.096862816], [11.721801728000003, 97.25201417600005, 71, 283.809326192], [12.465332031999992, 159.691833488, 54.62158201600005, 283.333190912], [36.89514163199999, 133.19158937600002, 71, 161.089416512], [37.84619142400004, 117.82360841600001, 71, 136.30139158400004]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047342.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Remember to mention the objects and their corresponding locations.", "boxes_value": [[475.2620849664, 79.9005127168, 681.0085449216, 177.1264037888]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047342_crop.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Remember to mention the objects and their corresponding locations.", "boxes_value": [[52.26208496639998, 24.900512716799994, 258.0085449216, 122.12640378879999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047342.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two hats, and three glasses.", "boxes_value": [[475.2620849664, 79.9005127168, 681.0085449216, 177.1264037888], [491.46630858239996, 130.6043701248, 589.7376708864, 177.1264037888], [475.2620849664, 143.149597184, 515.5114745856, 164.5811156992], [536.4202881024, 111.7864379904, 573.5334472704, 126.4226074112], [516.5568847872, 79.9005127168, 614.8282470912, 154.1267700224], [651.1154785536, 128.2076416, 681.0085449216, 139.4717407232]], "boxes_seq": [[0], [0], [1, 4], [2, 3, 5]]}, {"image_path": "objects365_v1_00047342_crop.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two hats, and three glasses.", "boxes_value": [[52.26208496639998, 24.900512716799994, 258.0085449216, 122.12640378879999], [68.46630858239996, 75.6043701248, 166.7376708864, 122.12640378879999], [52.26208496639998, 88.14959718399999, 92.51147458560001, 109.58111569920001], [113.42028810240004, 56.7864379904, 150.53344727039996, 71.4226074112], [93.55688478720003, 24.900512716799994, 191.82824709119996, 99.1267700224], [228.11547855360004, 73.20764159999999, 258.0085449216, 84.47174072320001]], "boxes_seq": [[0], [0], [1, 4], [2, 3, 5]]}, {"image_path": "objects365_v1_00047343.jpg", "text": "Can you divulge the contents of the area within the given image ? Specify the location of each mentioned object.", "boxes_value": [[0, 237.0462035968, 443.610473667, 472.5952758784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047343_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Specify the location of each mentioned object.", "boxes_value": [[0, 59.04620359680001, 443.610473667, 294.5952758784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047343.jpg", "text": "Can you divulge the contents of the area within the given image ? Specify the location of each mentioned object. For your reference, objects involved in this region include a bed, two pillows, a nightstand, a power outlet, and a laptop.", "boxes_value": [[0, 237.0462035968, 443.610473667, 472.5952758784], [48.945739705499996, 243.4503783936, 900.9005127345, 511.56707763199995], [281.33386233, 244.6920165888, 443.610473667, 358.643615744], [228.2359618755, 350.2911376896, 672.1103515545, 431.4295044096], [0, 334.1828002816, 187.6667480565, 472.5952758784], [102.434509311, 237.0462035968, 134.4537353865, 265.7531738112], [33.033569363999995, 332.1283569152, 161.03613284399998, 387.427673344]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047343_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Specify the location of each mentioned object. For your reference, objects involved in this region include a bed, two pillows, a nightstand, a power outlet, and a laptop.", "boxes_value": [[0, 59.04620359680001, 443.610473667, 294.5952758784], [48.945739705499996, 65.4503783936, 554, 333.56707763199995], [281.33386233, 66.69201658879999, 443.610473667, 180.643615744], [228.2359618755, 172.29113768960002, 554, 253.42950440959999], [0, 156.1828002816, 187.6667480565, 294.5952758784], [102.434509311, 59.04620359680001, 134.4537353865, 87.75317381119999], [33.033569363999995, 154.12835691520002, 161.03613284399998, 209.42767334400003]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047345.jpg", "text": "Explain the content within the rectangular region of the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[308.694763195, 214.04632566240002, 650.134521465, 393.8740844928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047345_crop.jpg", "text": "Explain the content within the rectangular region of the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[85.69476319500001, 45.04632566240002, 427, 224.87408449280002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047345.jpg", "text": "Explain the content within the rectangular region of the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two chairs, a flower, a vase, a desk, and a potted plant.", "boxes_value": [[308.694763195, 214.04632566240002, 650.134521465, 393.8740844928], [449.76818848, 216.4210815264, 515.508056635, 326.3557739184], [370.16143801, 223.05633544480003, 462.00427247000005, 275.2397460728], [378.96813968, 264.50408933200004, 451.04516599999994, 282.52337645759997], [308.694763195, 270.98858643520003, 525.58300782, 382.8832397664], [558.524169905, 214.04632566240002, 650.134521465, 393.8740844928], [542.2816162400001, 225.88909912160003, 649.612304705, 363.2393799008]], "boxes_seq": [[0], [0], [1, 6], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047345_crop.jpg", "text": "Explain the content within the rectangular region of the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two chairs, a flower, a vase, a desk, and a potted plant.", "boxes_value": [[85.69476319500001, 45.04632566240002, 427, 224.87408449280002], [226.76818848, 47.4210815264, 292.508056635, 157.35577391840002], [147.16143800999998, 54.05633544480003, 239.00427247000005, 106.23974607280002], [155.96813967999998, 95.50408933200004, 228.04516599999994, 113.52337645759997], [85.69476319500001, 101.98858643520003, 302.58300782000003, 213.88323976639998], [335.524169905, 45.04632566240002, 427, 224.87408449280002], [319.28161624000006, 56.889099121600026, 426.61230470500004, 194.23937990079997]], "boxes_seq": [[0], [0], [1, 6], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047346.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[425.0611572415, 129.749206528, 683.8309326092001, 376.8585815552]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047346_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[65.06115724149998, 62.749206528, 323, 309.8585815552]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047346.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a desk, two people, a leather shoes, and a motorcycle.", "boxes_value": [[425.0611572415, 129.749206528, 683.8309326092001, 376.8585815552], [592.2918701080999, 213.5642089984, 683.8309326092001, 312.0264892416], [594.2142333706, 129.749206528, 631.4893798499, 202.8378906112], [531.3579101592, 133.403625472, 623.4497070072, 370.2110595584], [548.6538086146, 330.0480956928, 563.9360351767, 365.7728881664], [425.0611572415, 165.4244995072, 527.3879394654, 376.8585815552]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047346_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a desk, two people, a leather shoes, and a motorcycle.", "boxes_value": [[65.06115724149998, 62.749206528, 323, 309.8585815552], [232.29187010809994, 146.5642089984, 323, 245.02648924160002], [234.21423337060003, 62.749206528, 271.48937984990005, 135.8378906112], [171.35791015919995, 66.40362547199999, 263.4497070072, 303.2110595584], [188.65380861460005, 263.0480956928, 203.9360351767, 298.7728881664], [65.06115724149998, 98.42449950720001, 167.3879394654, 309.8585815552]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047347.jpg", "text": "Can you discuss the entities within the region of image ? Specify the location of each mentioned object.", "boxes_value": [[30.0507201918, 176.609069824, 139.8572387674, 439.1557006848001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047347_crop.jpg", "text": "Can you discuss the entities within the region of image ? Specify the location of each mentioned object.", "boxes_value": [[28.0507201918, 66.60906982399999, 137.8572387674, 329.1557006848001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047347.jpg", "text": "Can you discuss the entities within the region of image ? Specify the location of each mentioned object. For your reference, objects involved in this region include a sneakers, two bowls, a plate, two desks, and three chairs.", "boxes_value": [[30.0507201918, 176.609069824, 139.8572387674, 439.1557006848001], [30.0507201918, 411.867309568, 57.103149389, 430.5785522688], [28.3931274209, 252.7571410944, 75.1199951205, 274.475219712], [37.6068725384, 233.6471557632, 73.5843505584, 252.2940063232], [0.1098632601, 272.0621338112, 51.2081299059, 295.5352173056], [51.025634756799995, 152.9206542848, 293.1738891367, 329.925781248], [0.3587646744, 238.4621581824, 73.39807127290001, 356.2462158336], [70.7659912036, 176.609069824, 139.8572387674, 439.1557006848001], [97.0864868127, 165.4229126144, 192.4981079054, 345.7180786176], [9.5709228659, 245.7003173888, 164.86163327260002, 511.5369872896]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5, 6], [7, 8, 9]]}, {"image_path": "objects365_v1_00047347_crop.jpg", "text": "Can you discuss the entities within the region of image ? Specify the location of each mentioned object. For your reference, objects involved in this region include a sneakers, two bowls, a plate, two desks, and three chairs.", "boxes_value": [[28.0507201918, 66.60906982399999, 137.8572387674, 329.1557006848001], [28.0507201918, 301.867309568, 55.103149389, 320.5785522688], [26.3931274209, 142.7571410944, 73.1199951205, 164.475219712], [35.6068725384, 123.6471557632, 71.5843505584, 142.2940063232], [0, 162.0621338112, 49.2081299059, 185.53521730559999], [49.025634756799995, 42.92065428480001, 165, 219.92578124800002], [0, 128.4621581824, 71.39807127290001, 246.2462158336], [68.7659912036, 66.60906982399999, 137.8572387674, 329.1557006848001], [95.0864868127, 55.42291261439999, 165, 235.71807861759999], [7.5709228659, 135.7003173888, 162.86163327260002, 394]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5, 6], [7, 8, 9]]}, {"image_path": "objects365_v1_00047349.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Provide the coordinates for all objects that you mention.", "boxes_value": [[154.7346801664, 57.121459967999996, 511.29949952, 726.910888704]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047349_crop.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Provide the coordinates for all objects that you mention.", "boxes_value": [[89.73468016640001, 57.121459967999996, 446.29949952, 726.910888704]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047349.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a mirror, and four people.", "boxes_value": [[154.7346801664, 57.121459967999996, 511.29949952, 726.910888704], [339.7664795136, 68.1726074112, 511.29949952, 361.8579101184], [154.7346801664, 157.856506368, 304.2935791104, 689.0819091456001], [263.8277587968, 129.1369628928, 392.2228393472, 649.0319824128001], [332.5357055488, 103.12719728639999, 511.1461791744, 726.910888704], [466.493591296, 57.121459967999996, 511.1461791744, 201.72625735679998]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047349_crop.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a mirror, and four people.", "boxes_value": [[89.73468016640001, 57.121459967999996, 446.29949952, 726.910888704], [274.7664795136, 68.1726074112, 446.29949952, 361.8579101184], [89.73468016640001, 157.856506368, 239.2935791104, 689.0819091456001], [198.82775879680003, 129.1369628928, 327.2228393472, 649.0319824128001], [267.5357055488, 103.12719728639999, 446.1461791744, 726.910888704], [401.493591296, 57.121459967999996, 446.1461791744, 201.72625735679998]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047354.jpg", "text": "In the submitted image , please give a synopsis of the area . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[267.248535168, 319.7541504, 450.2769775104, 407.4525756928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047354_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[46.24853516799999, 22.754150400000015, 229.2769775104, 110.4525756928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047354.jpg", "text": "In the submitted image , please give a synopsis of the area . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, and four horses.", "boxes_value": [[267.248535168, 319.7541504, 450.2769775104, 407.4525756928], [427.1871337728, 322.6413574144, 450.2769775104, 399.126281728], [400.18029788160004, 320.9920654336, 416.05456542720003, 398.92010496], [267.248535168, 331.7526244864, 371.78649899519996, 407.4525756928], [266.0468749824, 310.12408448, 309.9047851776, 405.049438464], [345.83593751039996, 333.7021484544, 386.44470213119996, 406.5330200064], [371.47546383360003, 319.7541504, 447.7514648064, 402.9226074112]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047354_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, and four horses.", "boxes_value": [[46.24853516799999, 22.754150400000015, 229.2769775104, 110.4525756928], [206.1871337728, 25.641357414399977, 229.2769775104, 102.12628172799998], [179.18029788160004, 23.992065433599976, 195.05456542720003, 101.92010496], [46.24853516799999, 34.75262448640001, 150.78649899519996, 110.4525756928], [45.04687498240003, 13.124084480000022, 88.90478517759999, 108.04943846399999], [124.83593751039996, 36.7021484544, 165.44470213119996, 109.53302000640002], [150.47546383360003, 22.754150400000015, 226.7514648064, 105.92260741119998]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047356.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Provide the coordinates for each element you describe.", "boxes_value": [[70.474182144, 0, 355.6706542848, 362.9596557824]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047356_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Provide the coordinates for each element you describe.", "boxes_value": [[70.474182144, 0, 355.6706542848, 362.9596557824]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047356.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two vases, two dinning tables, and two flags.", "boxes_value": [[70.474182144, 0, 355.6706542848, 362.9596557824], [278.3493652224, 105.8183593984, 355.6706542848, 144.479003904], [239.2071533568, 271.526977536, 375.9564208896, 379.416015616], [82.0722045696, 238.03936768, 240.62487790079996, 362.9596557824], [70.474182144, 0, 176.93957521919998, 37.7161865216], [145.713684096, 0, 239.39135738880003, 65.968200704], [291.137084928, 128.7173462016, 338.1246337536, 155.1849975808]], "boxes_seq": [[0], [0], [1, 5], [2, 3], [4, 6]]}, {"image_path": "objects365_v1_00047356_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two vases, two dinning tables, and two flags.", "boxes_value": [[70.474182144, 0, 355.6706542848, 362.9596557824], [278.3493652224, 105.8183593984, 355.6706542848, 144.479003904], [239.2071533568, 271.526977536, 375.9564208896, 379.416015616], [82.0722045696, 238.03936768, 240.62487790079996, 362.9596557824], [70.474182144, 0, 176.93957521919998, 37.7161865216], [145.713684096, 0, 239.39135738880003, 65.968200704], [291.137084928, 128.7173462016, 338.1246337536, 155.1849975808]], "boxes_seq": [[0], [0], [1, 5], [2, 3], [4, 6]]}, {"image_path": "objects365_v1_00047357.jpg", "text": "Kindly share your observations about the rectangular region within . Please mention the objects and their locations.", "boxes_value": [[340.9240112106, 185.9588012544, 466.4544677932, 369.8634643456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047357_crop.jpg", "text": "Kindly share your observations about the rectangular region within . Please mention the objects and their locations.", "boxes_value": [[31.924011210599986, 46.95880125439999, 157.45446779320002, 230.8634643456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047357.jpg", "text": "Kindly share your observations about the rectangular region within . Please mention the objects and their locations. For your reference, objects involved in this region include three people, and two hockey sticks.", "boxes_value": [[340.9240112106, 185.9588012544, 466.4544677932, 369.8634643456], [434.44824217089996, 219.0556030464, 489.81811523150003, 347.869506816], [362.60559083690003, 221.8127441408, 419.7476806746, 358.3961792], [370.9440918203, 204.5905151488, 424.7795410423, 263.6200561664], [340.9240112106, 291.4129028096, 380.5377197062, 369.8634643456], [447.05187987939996, 185.9588012544, 466.4544677932, 241.8839111168]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047357_crop.jpg", "text": "Kindly share your observations about the rectangular region within . Please mention the objects and their locations. For your reference, objects involved in this region include three people, and two hockey sticks.", "boxes_value": [[31.924011210599986, 46.95880125439999, 157.45446779320002, 230.8634643456], [125.44824217089996, 80.0556030464, 180.81811523150003, 208.869506816], [53.605590836900035, 82.81274414079999, 110.74768067460002, 219.3961792], [61.9440918203, 65.5905151488, 115.77954104230002, 124.62005616639999], [31.924011210599986, 152.41290280959998, 71.5377197062, 230.8634643456], [138.05187987939996, 46.95880125439999, 157.45446779320002, 102.8839111168]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047359.jpg", "text": "Help me understand what's happening in the selected bounding box within . Remember to mention the objects and their corresponding locations.", "boxes_value": [[183.3543090688, 336.4424438746, 295.1415405056, 643.1243896221]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047359_crop.jpg", "text": "Help me understand what's happening in the selected bounding box within . Remember to mention the objects and their corresponding locations.", "boxes_value": [[28.354309068800006, 77.44244387459997, 140.1415405056, 384.1243896221]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047359.jpg", "text": "Help me understand what's happening in the selected bounding box within . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four people, and a hat.", "boxes_value": [[183.3543090688, 336.4424438746, 295.1415405056, 643.1243896221], [183.3543090688, 336.4424438746, 202.4942016512, 367.36083982450003], [191.4519653376, 357.05468748780004, 225.3148803584, 413.0020752281], [229.2480468992, 562.4384765363, 295.1415405056, 643.1243896221], [186.8879394304, 545.6289062502001, 232.6099853312, 629.6766357652], [254.330686208, 563.2947006355, 273.995206912, 578.6152343454]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047359_crop.jpg", "text": "Help me understand what's happening in the selected bounding box within . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four people, and a hat.", "boxes_value": [[28.354309068800006, 77.44244387459997, 140.1415405056, 384.1243896221], [28.354309068800006, 77.44244387459997, 47.4942016512, 108.36083982450003], [36.4519653376, 98.05468748780004, 70.31488035839999, 154.0020752281], [74.2480468992, 303.43847653629996, 140.1415405056, 384.1243896221], [31.887939430399996, 286.6289062502001, 77.6099853312, 370.6766357652], [99.330686208, 304.2947006355, 118.99520691200001, 319.61523434540004]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047360.jpg", "text": "I would like a description of the content within the bbox in . Include the coordinates for each object you identify.", "boxes_value": [[293.8123168768, 367.9812011509, 389.9972534272, 438.5412597678]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047360_crop.jpg", "text": "I would like a description of the content within the bbox in . Include the coordinates for each object you identify.", "boxes_value": [[24.812316876800026, 17.981201150900006, 120.99725342720001, 88.5412597678]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047360.jpg", "text": "I would like a description of the content within the bbox in . Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, three storage boxes, a moniter, and a keyboard.", "boxes_value": [[293.8123168768, 367.9812011509, 389.9972534272, 438.5412597678], [104.1098632704, 395.8557128869, 466.6801757696, 521.6716308799], [293.8123168768, 367.9812011509, 343.1995849728, 402.2861328066], [331.6660156416, 399.3287963969, 351.4801025536, 416.1855468864], [297.2509155328, 390.4951781987, 351.5496216064, 415.0833130048], [348.1764526592, 366.63067625660005, 415.7581176832, 428.7816772346], [354.576843264, 426.89733886930003, 389.9972534272, 438.5412597678]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047360_crop.jpg", "text": "I would like a description of the content within the bbox in . Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, three storage boxes, a moniter, and a keyboard.", "boxes_value": [[24.812316876800026, 17.981201150900006, 120.99725342720001, 88.5412597678], [0, 45.855712886899994, 145, 106], [24.812316876800026, 17.981201150900006, 74.19958497279998, 52.286132806599994], [62.666015641599984, 49.328796396899975, 82.48010255359998, 66.1855468864], [28.25091553279998, 40.49517819869999, 82.54962160640002, 65.0833130048], [79.17645265919998, 16.630676256600054, 145, 78.78167723460001], [85.57684326399999, 76.89733886930003, 120.99725342720001, 88.5412597678]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047361.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each object you identify.", "boxes_value": [[172.2805786169, 332.5702514688, 323.7854614073, 439.6771850752]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047361_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each object you identify.", "boxes_value": [[38.28057861689999, 27.570251468799995, 189.78546140729998, 134.6771850752]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047361.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each object you identify. For your reference, objects involved in this region include five potted plants.", "boxes_value": [[172.2805786169, 332.5702514688, 323.7854614073, 439.6771850752], [280.920715326, 391.9243774464, 323.7854614073, 439.6771850752], [215.8715820251, 388.1643066368, 262.8724365012, 433.2850952192], [196.56097413109998, 363.4485473792, 231.3684082117, 410.1807861248], [172.2805786169, 332.5702514688, 210.8765869239, 371.6398925824], [189.09234618230002, 349.618835456, 218.2169799835, 388.4516601344]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047361_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each object you identify. For your reference, objects involved in this region include five potted plants.", "boxes_value": [[38.28057861689999, 27.570251468799995, 189.78546140729998, 134.6771850752], [146.920715326, 86.9243774464, 189.78546140729998, 134.6771850752], [81.8715820251, 83.1643066368, 128.8724365012, 128.2850952192], [62.56097413109998, 58.44854737920002, 97.3684082117, 105.18078612480002], [38.28057861689999, 27.570251468799995, 76.87658692389999, 66.63989258240002], [55.09234618230002, 44.618835456, 84.21697998350001, 83.45166013440002]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047362.jpg", "text": "Please describe the content within the area displayed in the image . Please mention the objects and their locations.", "boxes_value": [[5.73870848, 395.26916502819995, 257.909606912, 682.2174071938999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047362_crop.jpg", "text": "Please describe the content within the area displayed in the image . Please mention the objects and their locations.", "boxes_value": [[5.73870848, 72.26916502819995, 257.909606912, 359.21740719389993]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047362.jpg", "text": "Please describe the content within the area displayed in the image . Please mention the objects and their locations. For your reference, objects involved in this region include a cup, a plate, a desk, a napkin, and two chairs.", "boxes_value": [[5.73870848, 395.26916502819995, 257.909606912, 682.2174071938999], [101.8248290816, 380.0866699212, 131.7680664064, 408.7647704798], [94.6552734208, 395.26916502819995, 143.1549682688, 412.56030270040003], [67.7649536, 329.8265991393, 493.9500732416, 672.1783447392], [151.5211791872, 382.8043213206, 222.8045043712, 433.1219482393], [5.73870848, 415.0235595486, 257.909606912, 682.2174071938999], [0.1434326016, 357.5186767761, 62.6879272448, 629.7712402125001]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047362_crop.jpg", "text": "Please describe the content within the area displayed in the image . Please mention the objects and their locations. For your reference, objects involved in this region include a cup, a plate, a desk, a napkin, and two chairs.", "boxes_value": [[5.73870848, 72.26916502819995, 257.909606912, 359.21740719389993], [101.8248290816, 57.086669921199984, 131.7680664064, 85.7647704798], [94.6552734208, 72.26916502819995, 143.1549682688, 89.56030270040003], [67.7649536, 6.826599139300015, 320, 349.17834473920004], [151.5211791872, 59.804321320600025, 222.8045043712, 110.12194823930002], [5.73870848, 92.02355954860002, 257.909606912, 359.21740719389993], [0.1434326016, 34.518676776099994, 62.6879272448, 306.77124021250006]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047363.jpg", "text": "Please help me understand the content present within the rectangle in . Include the coordinates for each object you identify.", "boxes_value": [[593.837646488, 106.0106811392, 839.8121338122, 511.7606201344]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047363_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Include the coordinates for each object you identify.", "boxes_value": [[61.83764648800002, 102.0106811392, 307.81213381220005, 507.7606201344]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047363.jpg", "text": "Please help me understand the content present within the rectangle in . Include the coordinates for each object you identify. For your reference, objects involved in this region include a chair, a stool, three bottles, and a cup.", "boxes_value": [[593.837646488, 106.0106811392, 839.8121338122, 511.7606201344], [740.9881591744, 264.7006835712, 839.8121338122, 511.7606201344], [630.9414062540001, 343.6899414016, 723.348144494, 503.9874877952], [639.3947753876, 228.531921408, 655.4376220758, 259.1960449024], [758.802246072, 106.0106811392, 784.9987792644, 145.2039184384], [593.837646488, 183.1923828224, 614.3099365634, 212.8169555456], [782.6931152558, 123.1463623168, 796.4295653954, 145.7246704128]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 6], [5]]}, {"image_path": "objects365_v1_00047363_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Include the coordinates for each object you identify. For your reference, objects involved in this region include a chair, a stool, three bottles, and a cup.", "boxes_value": [[61.83764648800002, 102.0106811392, 307.81213381220005, 507.7606201344], [208.98815917440004, 260.7006835712, 307.81213381220005, 507.7606201344], [98.94140625400007, 339.6899414016, 191.34814449400005, 499.9874877952], [107.39477538760002, 224.531921408, 123.43762207580005, 255.1960449024], [226.802246072, 102.0106811392, 252.9987792644, 141.2039184384], [61.83764648800002, 179.1923828224, 82.30993656340002, 208.8169555456], [250.69311525579997, 119.1463623168, 264.4295653954, 141.7246704128]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 6], [5]]}, {"image_path": "objects365_v1_00047364.jpg", "text": "Kindly share your observations about the rectangular region within . Include the coordinates for each mentioned object.", "boxes_value": [[284.25701904150003, 435.875610368, 407.3234863267, 511.7153930752]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047364_crop.jpg", "text": "Kindly share your observations about the rectangular region within . Include the coordinates for each mentioned object.", "boxes_value": [[31.25701904150003, 19.875610368000025, 154.3234863267, 95.71539307519998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047364.jpg", "text": "Kindly share your observations about the rectangular region within . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, and five sneakers.", "boxes_value": [[284.25701904150003, 435.875610368, 407.3234863267, 511.7153930752], [306.3822631738, 92.6156616192, 398.10693358860004, 476.675720192], [308.7772216612, 446.3206176768, 338.7498169008, 476.2932128768], [367.35998532319996, 435.875610368, 407.3234863267, 511.7153930752], [364.01745606270003, 456.1425170944, 382.81481930380005, 481.988769536], [284.25701904150003, 453.8994140672, 303.2133788771, 472.8557739008], [291.9007568525, 481.416748032, 310.5513916134, 505.8765869056]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047364_crop.jpg", "text": "Kindly share your observations about the rectangular region within . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, and five sneakers.", "boxes_value": [[31.25701904150003, 19.875610368000025, 154.3234863267, 95.71539307519998], [53.38226317380003, 0, 145.10693358860004, 60.67572019200003], [55.777221661199974, 30.320617676799998, 85.7498169008, 60.2932128768], [114.35998532319996, 19.875610368000025, 154.3234863267, 95.71539307519998], [111.01745606270003, 40.14251709439998, 129.81481930380005, 65.988769536], [31.25701904150003, 37.899414067199984, 50.21337887710001, 56.85577390079999], [38.900756852500024, 65.41674803199999, 57.55139161340003, 89.87658690559999]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047365.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for all objects that you mention.", "boxes_value": [[534.6480712572, 274.4971313664, 604.2304687196, 458.771667456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047365_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for all objects that you mention.", "boxes_value": [[17.648071257199945, 46.497131366400026, 87.23046871960003, 230.771667456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047365.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a bed, a nightstand, a flower, a vase, two chairs, and a telephone.", "boxes_value": [[534.6480712572, 274.4971313664, 604.2304687196, 458.771667456], [49.169677732800004, 198.688842752, 578.4281006244, 445.2377929728], [525.7657470536, 290.2404785152, 591.8626708976, 346.5781250048], [485.5982665964, 317.8959350784, 663.5466308463999, 458.771667456], [534.6480712572, 428.5432739328, 604.2304687196, 458.771667456], [516.3969726804, 268.2757568512, 771.3421630511999, 436.528137216], [72.6669921704, 345.842956544, 599.0972900252, 511.8139648512], [554.7222900624, 274.4971313664, 593.8004150611999, 291.0785522688]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6], [7]]}, {"image_path": "objects365_v1_00047365_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a bed, a nightstand, a flower, a vase, two chairs, and a telephone.", "boxes_value": [[17.648071257199945, 46.497131366400026, 87.23046871960003, 230.771667456], [0, 0, 61.42810062440003, 217.23779297279998], [8.765747053599966, 62.24047851519998, 74.86267089759997, 118.57812500479997], [0, 89.89593507839999, 104, 230.771667456], [17.648071257199945, 200.5432739328, 87.23046871960003, 230.771667456], [0, 40.275756851200015, 104, 208.528137216], [0, 117.842956544, 82.09729002519998, 276], [37.72229006240002, 46.497131366400026, 76.80041506119994, 63.078552268800024]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6], [7]]}, {"image_path": "objects365_v1_00047367.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Please mention the objects and their locations.", "boxes_value": [[94.9260254208, 283.88262938139997, 345.4698486272, 550.4163818415001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047367_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Please mention the objects and their locations.", "boxes_value": [[62.9260254208, 66.88262938139997, 313.4698486272, 333.4163818415001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047367.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Please mention the objects and their locations. For your reference, objects involved in this region include a lamp, two people, a hat, and a sneakers.", "boxes_value": [[94.9260254208, 283.88262938139997, 345.4698486272, 550.4163818415001], [94.9260254208, 391.55175783090004, 132.7287597568, 413.2528076016], [236.7256469504, 283.7506103307, 287.9808349696, 495.0052490241], [295.5998535168, 374.48620604309997, 345.4698486272, 550.4163818415001], [246.4915771392, 283.88262938139997, 281.5322875904, 300.75408937099996], [243.6499633664, 471.7828369019, 280.8961792, 492.8740234054]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047367_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Please mention the objects and their locations. For your reference, objects involved in this region include a lamp, two people, a hat, and a sneakers.", "boxes_value": [[62.9260254208, 66.88262938139997, 313.4698486272, 333.4163818415001], [62.9260254208, 174.55175783090004, 100.7287597568, 196.2528076016], [204.7256469504, 66.7506103307, 255.98083496959998, 278.0052490241], [263.5998535168, 157.48620604309997, 313.4698486272, 333.4163818415001], [214.4915771392, 66.88262938139997, 249.53228759040002, 83.75408937099996], [211.6499633664, 254.7828369019, 248.8961792, 275.8740234054]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047368.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Provide the coordinates for each element you describe.", "boxes_value": [[0.005104512, 301.7963867136, 547.165819776, 511.9937055232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047368_crop.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Provide the coordinates for each element you describe.", "boxes_value": [[0.005104512, 52.796386713599986, 547.165819776, 262.9937055232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047368.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, two sneakers, a helmet, and a hockey stick.", "boxes_value": [[0.005104512, 301.7963867136, 547.165819776, 511.9937055232], [329.6678466816, 52.2960205312, 549.0964355328, 511.55133056], [0.6109009152, 301.7963867136, 86.3825073408, 511.9696045056], [71.4613224192, 380.3472622592, 158.1112686336, 427.9125518336], [0.005104512, 302.098949888, 84.89335526400001, 389.644366592], [477.17747880959996, 478.3515626496, 547.165819776, 511.9937055232], [448.8719482368, 267.3738403328, 481.28503418880007, 386.7907715072]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4], [6]]}, {"image_path": "objects365_v1_00047368_crop.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, two sneakers, a helmet, and a hockey stick.", "boxes_value": [[0.005104512, 52.796386713599986, 547.165819776, 262.9937055232], [329.6678466816, 0, 549.0964355328, 262.55133056], [0.6109009152, 52.796386713599986, 86.3825073408, 262.9696045056], [71.4613224192, 131.34726225920002, 158.1112686336, 178.9125518336], [0.005104512, 53.09894988799999, 84.89335526400001, 140.64436659199998], [477.17747880959996, 229.3515626496, 547.165819776, 262.9937055232], [448.8719482368, 18.3738403328, 481.28503418880007, 137.79077150720002]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4], [6]]}, {"image_path": "objects365_v1_00047370.jpg", "text": "Detail the chosen region in the depicted scene . Include the coordinates for each object you identify.", "boxes_value": [[154.67834475799998, 334.9600219648, 348.81311035140004, 471.9667358208]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047370_crop.jpg", "text": "Detail the chosen region in the depicted scene . Include the coordinates for each object you identify.", "boxes_value": [[48.67834475799998, 34.96002196479998, 242.81311035140004, 171.9667358208]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047370.jpg", "text": "Detail the chosen region in the depicted scene . Include the coordinates for each object you identify. For your reference, objects involved in this region include three suvs, three cars, and a van.", "boxes_value": [[154.67834475799998, 334.9600219648, 348.81311035140004, 471.9667358208], [224.1771850841, 413.069396992, 326.6585082837, 471.9667358208], [186.0902099837, 391.47375488, 311.7378539988, 450.3710327296], [154.67834475799998, 373.8045043712, 269.3317870857, 425.634155264], [289.675354003, 341.883483904, 382.2763671699, 397.8479614464], [251.3079223691, 334.9600219648, 348.81311035140004, 382.8471679488], [234.864746067, 314.4781494272, 327.4657593022, 362.3652954112], [210.6326904242, 310.1510009856, 295.73333738360003, 351.6916504064]], "boxes_seq": [[0], [0], [1, 2, 6], [3, 5, 7], [4]]}, {"image_path": "objects365_v1_00047370_crop.jpg", "text": "Detail the chosen region in the depicted scene . Include the coordinates for each object you identify. For your reference, objects involved in this region include three suvs, three cars, and a van.", "boxes_value": [[48.67834475799998, 34.96002196479998, 242.81311035140004, 171.9667358208], [118.17718508409999, 113.06939699200001, 220.65850828369997, 171.9667358208], [80.09020998369999, 91.47375488, 205.7378539988, 150.37103272960002], [48.67834475799998, 73.80450437119998, 163.3317870857, 125.63415526400001], [183.675354003, 41.883483904, 276.2763671699, 97.8479614464], [145.3079223691, 34.96002196479998, 242.81311035140004, 82.8471679488], [128.864746067, 14.47814942719998, 221.4657593022, 62.36529541120001], [104.6326904242, 10.151000985599978, 189.73333738360003, 51.691650406400015]], "boxes_seq": [[0], [0], [1, 2, 6], [3, 5, 7], [4]]}, {"image_path": "objects365_v1_00047379.jpg", "text": "Please provide insights on the specified area within the graphic . Please point out the objects and their coordinates.", "boxes_value": [[0, 246.6066894732, 234.3436279296, 398.0560913224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047379_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Please point out the objects and their coordinates.", "boxes_value": [[0, 38.60668947319999, 234.3436279296, 190.0560913224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047379.jpg", "text": "Please provide insights on the specified area within the graphic . Please point out the objects and their coordinates. For your reference, objects involved in this region include a cabinet, and four people.", "boxes_value": [[0, 246.6066894732, 234.3436279296, 398.0560913224], [17.733215308800002, 246.6066894732, 58.959533721599996, 323.3587036225], [0, 261.76220704130003, 40.0863647232, 398.0560913224], [66.7037353728, 251.17938232449998, 105.5074463232, 395.8112182444], [107.3738403072, 260.60858156620003, 126.70343016959998, 336.9750976732], [200.84851077119998, 255.93481444789998, 234.3436279296, 356.42016600750003]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047379_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Please point out the objects and their coordinates. For your reference, objects involved in this region include a cabinet, and four people.", "boxes_value": [[0, 38.60668947319999, 234.3436279296, 190.0560913224], [17.733215308800002, 38.60668947319999, 58.959533721599996, 115.3587036225], [0, 53.76220704130003, 40.0863647232, 190.0560913224], [66.7037353728, 43.17938232449998, 105.5074463232, 187.81121824439998], [107.3738403072, 52.60858156620003, 126.70343016959998, 128.97509767320003], [200.84851077119998, 47.93481444789998, 234.3436279296, 148.42016600750003]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047380.jpg", "text": "Describe what can be found within the bounds of in the image . Provide the coordinates for each element you describe.", "boxes_value": [[158.0057983488, 219.5207519366, 389.1484985344, 589.2189941301999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047380_crop.jpg", "text": "Describe what can be found within the bounds of in the image . Provide the coordinates for each element you describe.", "boxes_value": [[58.0057983488, 92.52075193659999, 289.1484985344, 462.2189941301999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047380.jpg", "text": "Describe what can be found within the bounds of in the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, two hats, a backpack, and a car.", "boxes_value": [[158.0057983488, 219.5207519366, 389.1484985344, 589.2189941301999], [162.210998528, 277.982055684, 349.8912964096, 802.0096435687999], [156.4030761472, 275.2089843742, 225.6265259008, 639.1083984498], [158.0057983488, 277.8483886698, 195.235351552, 302.9708251774], [225.8059692544, 281.4805908506, 277.8668212736, 346.8592529482], [185.8955688448, 357.66931150019997, 331.0518798848, 589.2189941301999], [353.7806396416, 219.5207519366, 389.1484985344, 269.0357666162]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047380_crop.jpg", "text": "Describe what can be found within the bounds of in the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, two hats, a backpack, and a car.", "boxes_value": [[58.0057983488, 92.52075193659999, 289.1484985344, 462.2189941301999], [62.210998528000005, 150.982055684, 249.8912964096, 554], [56.40307614720001, 148.20898437419999, 125.6265259008, 512.1083984498], [58.0057983488, 150.8483886698, 95.235351552, 175.9708251774], [125.8059692544, 154.48059085059998, 177.8668212736, 219.85925294819998], [85.89556884480001, 230.66931150019997, 231.05187988479997, 462.2189941301999], [253.78063964159998, 92.52075193659999, 289.1484985344, 142.03576661620002]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047381.jpg", "text": "For the image , can you assess and describe what's happening at ? Please point out the objects and their coordinates.", "boxes_value": [[284.3855590968, 266.1874389504, 718.4069824057, 392.0111694336]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047381_crop.jpg", "text": "For the image , can you assess and describe what's happening at ? Please point out the objects and their coordinates.", "boxes_value": [[109.38555909680002, 32.18743895040001, 543.4069824057, 158.0111694336]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047381.jpg", "text": "For the image , can you assess and describe what's happening at ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a desk, three people, and a trash bin can.", "boxes_value": [[284.3855590968, 266.1874389504, 718.4069824057, 392.0111694336], [617.0451660422, 307.5260009984, 664.6485595904, 358.9547729408], [284.3855590968, 288.9927978496, 322.3179931944, 357.0238036992], [381.60217284349994, 279.6275024384, 423.65759280810005, 392.0111694336], [697.5130615439, 266.1874389504, 718.4069824057, 306.2178955264], [643.7503662268, 338.7708129792, 661.1862793053, 372.997070336]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047381_crop.jpg", "text": "For the image , can you assess and describe what's happening at ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a desk, three people, and a trash bin can.", "boxes_value": [[109.38555909680002, 32.18743895040001, 543.4069824057, 158.0111694336], [442.0451660422, 73.52600099839998, 489.64855959040005, 124.95477294080001], [109.38555909680002, 54.99279784959998, 147.3179931944, 123.02380369920002], [206.60217284349994, 45.62750243839997, 248.65759280810005, 158.0111694336], [522.5130615439, 32.18743895040001, 543.4069824057, 72.21789552640001], [468.7503662268, 104.7708129792, 486.1862793053, 138.99707033599998]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047382.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Provide the coordinates for all objects that you mention.", "boxes_value": [[146.1500243968, 309.09600831, 417.8281860096, 416.0814208908]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047382_crop.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Provide the coordinates for all objects that you mention.", "boxes_value": [[68.1500243968, 27.096008310000002, 339.8281860096, 134.08142089080002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047382.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a bed, and five pillows.", "boxes_value": [[146.1500243968, 309.09600831, 417.8281860096, 416.0814208908], [2.4089965568, 258.9460449096, 485.0282592768, 556.0159912272001], [146.1500243968, 328.54791261639997, 246.65148928, 382.3648071056], [154.5792236544, 355.13214111, 277.7745361408, 423.21380612679997], [248.59667968, 321.4155273248, 334.1850586112, 405.7070922844], [288.7973022208, 309.09600831, 365.9564819456, 400.5198974568], [327.0526733312, 333.7350463952, 417.8281860096, 416.0814208908]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047382_crop.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a bed, and five pillows.", "boxes_value": [[68.1500243968, 27.096008310000002, 339.8281860096, 134.08142089080002], [0, 0, 407, 160], [68.1500243968, 46.54791261639997, 168.65148928, 100.3648071056], [76.57922365440001, 73.13214111000002, 199.77453614080002, 141.21380612679997], [170.59667968, 39.415527324799996, 256.1850586112, 123.7070922844], [210.79730222080002, 27.096008310000002, 287.9564819456, 118.51989745679998], [249.05267333120003, 51.735046395200015, 339.8281860096, 134.08142089080002]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047383.jpg", "text": "Kindly share your observations about the rectangular region within . Remember to mention the objects and their corresponding locations.", "boxes_value": [[172.5343017216, 42.7286987264, 328.48400878079997, 427.0722045952]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047383_crop.jpg", "text": "Kindly share your observations about the rectangular region within . Remember to mention the objects and their corresponding locations.", "boxes_value": [[39.5343017216, 42.7286987264, 195.48400878079997, 427.0722045952]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047383.jpg", "text": "Kindly share your observations about the rectangular region within . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, a binoculars, a train, and two street lights.", "boxes_value": [[172.5343017216, 42.7286987264, 328.48400878079997, 427.0722045952], [215.1928100352, 261.6718750208, 370.6636962816, 510.7509155328], [279.99597166079997, 390.3948364288, 328.48400878079997, 427.0722045952], [0, 130.06445312, 279.9710693376, 392.7648315392], [172.5343017216, 42.7286987264, 184.31768801279998, 218.093322752], [236.6282348544, 166.6907958784, 246.2353515264, 251.0776977408]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047383_crop.jpg", "text": "Kindly share your observations about the rectangular region within . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, a binoculars, a train, and two street lights.", "boxes_value": [[39.5343017216, 42.7286987264, 195.48400878079997, 427.0722045952], [82.19281003520001, 261.6718750208, 234, 510.7509155328], [146.99597166079997, 390.3948364288, 195.48400878079997, 427.0722045952], [0, 130.06445312, 146.9710693376, 392.7648315392], [39.5343017216, 42.7286987264, 51.31768801279998, 218.093322752], [103.6282348544, 166.6907958784, 113.2353515264, 251.0776977408]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047384.jpg", "text": "Could you please share some information on the region in this photograph ? Specify the location of each mentioned object.", "boxes_value": [[149.3602295168, 252.6173095936, 491.6983642668, 511.4590454272]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047384_crop.jpg", "text": "Could you please share some information on the region in this photograph ? Specify the location of each mentioned object.", "boxes_value": [[86.36022951679999, 65.6173095936, 428.6983642668, 324.4590454272]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047384.jpg", "text": "Could you please share some information on the region in this photograph ? Specify the location of each mentioned object. For your reference, objects involved in this region include two guitars, a cymbal, and two people.", "boxes_value": [[149.3602295168, 252.6173095936, 491.6983642668, 511.4590454272], [379.9559326222, 298.640563968, 491.6983642668, 427.14434816], [149.3602295168, 401.7483520512, 267.1976318542, 511.4590454272], [209.29998781199998, 375.7096557568, 301.8450927594, 401.4580688384], [359.9143066306, 252.6173095936, 478.82397461159997, 510.4855346688], [92.3673096066, 357.009033216, 209.894409174, 511.8682250752]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047384_crop.jpg", "text": "Could you please share some information on the region in this photograph ? Specify the location of each mentioned object. For your reference, objects involved in this region include two guitars, a cymbal, and two people.", "boxes_value": [[86.36022951679999, 65.6173095936, 428.6983642668, 324.4590454272], [316.9559326222, 111.64056396799998, 428.6983642668, 240.14434816], [86.36022951679999, 214.74835205120002, 204.19763185419998, 324.4590454272], [146.29998781199998, 188.70965575679998, 238.84509275940002, 214.45806883839998], [296.9143066306, 65.6173095936, 415.82397461159997, 323.4855346688], [29.367309606600003, 170.00903321599998, 146.894409174, 324.8682250752]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047387.jpg", "text": "I would like a description of the content within the bbox in . Include the coordinates for each object you identify.", "boxes_value": [[265.1797775224, 72.7399902208, 332.5560945974, 422.4432983552]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047387_crop.jpg", "text": "I would like a description of the content within the bbox in . Include the coordinates for each object you identify.", "boxes_value": [[17.179777522400002, 72.7399902208, 84.55609459739998, 422.4432983552]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047387.jpg", "text": "I would like a description of the content within the bbox in . Include the coordinates for each object you identify. For your reference, objects involved in this region include an american football, two people, a hat, and a sneakers.", "boxes_value": [[265.1797775224, 72.7399902208, 332.5560945974, 422.4432983552], [265.1797775224, 153.7486416896, 332.5560945974, 207.1009289216], [199.1567382836, 189.99517824, 667.1334228724, 434.0518188544], [265.7839355641, 16.0141601792, 511.67810055369995, 504.1275024384], [276.455383272, 72.7399902208, 308.6144409058, 96.5460205056], [272.239562963, 343.8118286336, 307.2697143583, 422.4432983552]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047387_crop.jpg", "text": "I would like a description of the content within the bbox in . Include the coordinates for each object you identify. For your reference, objects involved in this region include an american football, two people, a hat, and a sneakers.", "boxes_value": [[17.179777522400002, 72.7399902208, 84.55609459739998, 422.4432983552], [17.179777522400002, 153.7486416896, 84.55609459739998, 207.1009289216], [0, 189.99517824, 101, 434.0518188544], [17.783935564099977, 16.0141601792, 101, 504.1275024384], [28.455383272000006, 72.7399902208, 60.614440905799995, 96.5460205056], [24.239562963000026, 343.8118286336, 59.26971435830001, 422.4432983552]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047388.jpg", "text": "Can you provide some context for the area within the picture ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[527.1191406491, 197.5901489152, 652.3137207122, 248.4412841984]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047388_crop.jpg", "text": "Can you provide some context for the area within the picture ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[32.11914064910002, 13.59014891519999, 157.31372071220005, 64.44128419840001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047388.jpg", "text": "Can you provide some context for the area within the picture ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a cabinet, a barrel, and three pots.", "boxes_value": [[527.1191406491, 197.5901489152, 652.3137207122, 248.4412841984], [421.5062255675, 74.8293456896, 681.4689941666, 343.7231445504], [565.0209961125, 226.6552734208, 595.7054443456, 248.4412841984], [559.0952148266, 226.043457024, 594.0521240035999, 248.5350951936], [585.3807372894, 197.5901489152, 652.3137207122, 238.7797241344], [527.1191406491, 204.635742208, 563.7020263997, 247.1801757696]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047388_crop.jpg", "text": "Can you provide some context for the area within the picture ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a cabinet, a barrel, and three pots.", "boxes_value": [[32.11914064910002, 13.59014891519999, 157.31372071220005, 64.44128419840001], [0, 0, 186.46899416660005, 77], [70.02099611250003, 42.6552734208, 100.70544434559997, 64.44128419840001], [64.0952148266, 42.04345702399999, 99.05212400359994, 64.5350951936], [90.38073728940003, 13.59014891519999, 157.31372071220005, 54.7797241344], [32.11914064910002, 20.63574220800001, 68.7020263997, 63.1801757696]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047390.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Include the coordinates for each object you identify.", "boxes_value": [[290.4476928618, 347.9104004096, 464.162719758, 454.0695800832]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047390_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Include the coordinates for each object you identify.", "boxes_value": [[43.44769286180002, 26.910400409600015, 217.16271975799998, 133.06958008319998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047390.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include five chairs, and a desk.", "boxes_value": [[290.4476928618, 347.9104004096, 464.162719758, 454.0695800832], [436.52624515120004, 348.3490600448, 464.162719758, 427.3104247808], [375.9891357272, 365.8960571392, 443.9836425993, 454.0695800832], [290.4476928618, 347.9104004096, 320.2775268267, 397.4805907968], [264.12719724740003, 358.438598656, 330.3670654402, 426.8717651456], [311.5040283341, 364.5800171008, 414.59252931320003, 427.3104247808], [126.6027831795, 355.794250496, 390.5594482355999, 488.2559814656]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 6], [5]]}, {"image_path": "objects365_v1_00047390_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include five chairs, and a desk.", "boxes_value": [[43.44769286180002, 26.910400409600015, 217.16271975799998, 133.06958008319998], [189.52624515120004, 27.349060044800012, 217.16271975799998, 106.31042478080002], [128.9891357272, 44.896057139200025, 196.98364259930003, 133.06958008319998], [43.44769286180002, 26.910400409600015, 73.27752682670001, 76.48059079680002], [17.127197247400034, 37.43859865600001, 83.36706544020001, 105.87176514560002], [64.50402833409998, 43.58001710079998, 167.59252931320003, 106.31042478080002], [0, 34.79425049600002, 143.55944823559992, 159]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 6], [5]]}, {"image_path": "objects365_v1_00047391.jpg", "text": "In the displayed image , help me understand the region defined by . Give coordinates for the items you reference.", "boxes_value": [[27.0382690304, 546.9451904256, 322.2529296896, 602.2006835712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047391_crop.jpg", "text": "In the displayed image , help me understand the region defined by . Give coordinates for the items you reference.", "boxes_value": [[27.0382690304, 13.945190425600003, 322.2529296896, 69.20068357119999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047391.jpg", "text": "In the displayed image , help me understand the region defined by . Give coordinates for the items you reference. For your reference, objects involved in this region include two potted plants, two vases, and three cars.", "boxes_value": [[27.0382690304, 546.9451904256, 322.2529296896, 602.2006835712], [286.4278564352, 576.2584228608, 322.2529296896, 602.2006835712], [186.516906752, 577.6693115136, 214.1339111424, 594.7840576512], [113.3900756992, 566.0001220608, 141.3961181696, 585.448730496], [27.0382690304, 567.5560303104, 54.6552734208, 581.5589599488001], [271.6072387584, 553.1800537344, 362.7055053824, 579.1586913792], [161.1116333056, 546.9451904256, 221.728332544, 567.0354004224], [53.450134272, 541.9062500352, 105.6990966784, 557.9829101568]], "boxes_seq": [[0], [0], [1, 3], [2, 4], [5, 6, 7]]}, {"image_path": "objects365_v1_00047391_crop.jpg", "text": "In the displayed image , help me understand the region defined by . Give coordinates for the items you reference. For your reference, objects involved in this region include two potted plants, two vases, and three cars.", "boxes_value": [[27.0382690304, 13.945190425600003, 322.2529296896, 69.20068357119999], [286.4278564352, 43.25842286080001, 322.2529296896, 69.20068357119999], [186.516906752, 44.66931151359995, 214.1339111424, 61.78405765119999], [113.3900756992, 33.00012206079998, 141.3961181696, 52.44873049600005], [27.0382690304, 34.55603031040005, 54.6552734208, 48.558959948800066], [271.6072387584, 20.180053734399962, 362.7055053824, 46.158691379200036], [161.1116333056, 13.945190425600003, 221.728332544, 34.035400422400016], [53.450134272, 8.906250035199946, 105.6990966784, 24.982910156800017]], "boxes_seq": [[0], [0], [1, 3], [2, 4], [5, 6, 7]]}, {"image_path": "objects365_v1_00047392.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[299.8645629952, 210.5217895244, 488.1143798784, 383.3630371208]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047392_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[47.864562995200004, 43.52178952439999, 236.11437987839997, 216.36303712080002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047392.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four people, a book, and a necklace.", "boxes_value": [[299.8645629952, 210.5217895244, 488.1143798784, 383.3630371208], [421.5988158976, 156.0504760446, 510.9229736448, 287.62988283960004], [464.3888549888, 184.9337768386, 510.3881225728, 299.9320068402], [397.3383789056, 210.5217895244, 488.1143798784, 383.3630371208], [243.7433471488, 158.949218745, 430.1794433536, 441.63476561240003], [327.4526367232, 300.856811509, 444.381164544, 368.150390642], [299.8645629952, 244.5610351694, 344.6891479552, 324.163330116]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047392_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four people, a book, and a necklace.", "boxes_value": [[47.864562995200004, 43.52178952439999, 236.11437987839997, 216.36303712080002], [169.5988158976, 0, 258.9229736448, 120.62988283960004], [212.3888549888, 17.93377683860001, 258.3881225728, 132.93200684020002], [145.3383789056, 43.52178952439999, 236.11437987839997, 216.36303712080002], [0, 0, 178.1794433536, 259], [75.45263672319999, 133.856811509, 192.381164544, 201.150390642], [47.864562995200004, 77.56103516939999, 92.68914795519999, 157.163330116]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047393.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Specify the location of each mentioned object.", "boxes_value": [[187.0132445952, 349.7901001216, 681.222656256, 480.6885986304]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047393_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Specify the location of each mentioned object.", "boxes_value": [[124.0132445952, 32.790100121600005, 618.222656256, 163.6885986304]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047393.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Specify the location of each mentioned object. For your reference, objects involved in this region include a soccer, and four sneakers.", "boxes_value": [[187.0132445952, 349.7901001216, 681.222656256, 480.6885986304], [537.4088134656, 424.0879516672, 593.1708984576, 480.6885986304], [187.0132445952, 444.4423217664, 234.839965824, 464.5433959936], [228.60168460799997, 406.3195800576, 288.2117919744, 454.1463012864], [609.135986304, 435.739563008, 650.0312500224001, 460.692626944], [652.8038330112, 349.7901001216, 681.222656256, 419.7973022208]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047393_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Specify the location of each mentioned object. For your reference, objects involved in this region include a soccer, and four sneakers.", "boxes_value": [[124.0132445952, 32.790100121600005, 618.222656256, 163.6885986304], [474.4088134656, 107.0879516672, 530.1708984576, 163.6885986304], [124.0132445952, 127.44232176640003, 171.839965824, 147.54339599359997], [165.60168460799997, 89.31958005759998, 225.2117919744, 137.1463012864], [546.135986304, 118.739563008, 587.0312500224001, 143.69262694399998], [589.8038330112, 32.790100121600005, 618.222656256, 102.79730222080002]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047395.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Please point out the objects and their coordinates.", "boxes_value": [[185.6099243008, 287.6898193192, 258.900268544, 368.22387698529997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047395_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Please point out the objects and their coordinates.", "boxes_value": [[18.609924300800003, 20.689819319200012, 91.90026854400003, 101.22387698529997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047395.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Please point out the objects and their coordinates. For your reference, objects involved in this region include a guitar, two people, and two necklaces.", "boxes_value": [[185.6099243008, 287.6898193192, 258.900268544, 368.22387698529997], [83.009765632, 323.3547363363, 377.9415893504, 621.3911132477], [49.0222168064, 219.7027587863, 343.0700683776, 646.7722168091], [147.0381469696, 155.1366577016, 373.4083251712, 649.1059570503], [185.6099243008, 317.0911254924, 194.9843139584, 368.22387698529997], [206.0630493184, 287.6898193192, 258.900268544, 293.6553344716]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047395_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Please point out the objects and their coordinates. For your reference, objects involved in this region include a guitar, two people, and two necklaces.", "boxes_value": [[18.609924300800003, 20.689819319200012, 91.90026854400003, 101.22387698529997], [0, 56.35473633629999, 110, 121], [0, 0, 110, 121], [0, 0, 110, 121], [18.609924300800003, 50.09112549240001, 27.98431395840001, 101.22387698529997], [39.063049318400004, 20.689819319200012, 91.90026854400003, 26.655334471599986]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047396.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for all objects that you mention.", "boxes_value": [[150.952304128, 109.9538574303, 282.4991455232, 382.3945312241]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047396_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for all objects that you mention.", "boxes_value": [[32.95230412800001, 68.9538574303, 164.4991455232, 341.3945312241]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047396.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include five people, and a handbag.", "boxes_value": [[150.952304128, 109.9538574303, 282.4991455232, 382.3945312241], [236.2567138816, 163.99627682530001, 282.4991455232, 382.3945312241], [190.0142212096, 138.3679199558, 245.7280273408, 346.1805419695], [226.228210432, 109.9538574303, 263.5564575232, 218.0386962886], [157.7399902208, 71.5908813469, 200.7192382976, 189.54504397800002], [145.3237304832, 142.2678832862, 189.7356567552, 322.3031616016], [150.952304128, 256.8472739343, 194.2748521472, 304.4171305879]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047396_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include five people, and a handbag.", "boxes_value": [[32.95230412800001, 68.9538574303, 164.4991455232, 341.3945312241], [118.2567138816, 122.99627682530001, 164.4991455232, 341.3945312241], [72.01422120960001, 97.3679199558, 127.7280273408, 305.1805419695], [108.228210432, 68.9538574303, 145.55645752319998, 177.0386962886], [39.739990220799996, 30.590881346900005, 82.71923829759999, 148.54504397800002], [27.323730483199995, 101.2678832862, 71.73565675520001, 281.3031616016], [32.95230412800001, 215.8472739343, 76.2748521472, 263.4171305879]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047399.jpg", "text": "Fill me in about the selected portion within the presented image . Include the coordinates for each object you identify.", "boxes_value": [[175.4155273604, 392.6514281984, 428.585937518, 445.7426147328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047399_crop.jpg", "text": "Fill me in about the selected portion within the presented image . Include the coordinates for each object you identify.", "boxes_value": [[63.415527360400006, 13.651428198400026, 316.585937518, 66.7426147328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047399.jpg", "text": "Fill me in about the selected portion within the presented image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, and four sneakers.", "boxes_value": [[175.4155273604, 392.6514281984, 428.585937518, 445.7426147328], [130.2170410192, 398.3352660992, 337.228881858, 511.9165039104], [175.4155273604, 418.1000976384, 194.2826538364, 445.7426147328], [224.55780032840002, 416.3449707008, 251.76153563359998, 437.8447265792], [352.2399902328, 399.2329712128, 372.8621825896, 425.9979248128], [402.6984862992, 392.6514281984, 428.585937518, 419.8551635968]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047399_crop.jpg", "text": "Fill me in about the selected portion within the presented image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, and four sneakers.", "boxes_value": [[63.415527360400006, 13.651428198400026, 316.585937518, 66.7426147328], [18.217041019199996, 19.3352660992, 225.22888185800002, 80], [63.415527360400006, 39.10009763839997, 82.28265383639999, 66.7426147328], [112.55780032840002, 37.34497070079999, 139.76153563359998, 58.84472657920003], [240.23999023279998, 20.23297121280001, 260.8621825896, 46.99792481280002], [290.6984862992, 13.651428198400026, 316.585937518, 40.855163596800026]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047401.jpg", "text": "Please elucidate the area of the image . Please mention the objects and their locations.", "boxes_value": [[343.2401122768, 108.7274169856, 596.857421886, 443.86462402560005]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047401_crop.jpg", "text": "Please elucidate the area of the image . Please mention the objects and their locations.", "boxes_value": [[64.2401122768, 84.7274169856, 317.857421886, 419.86462402560005]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047401.jpg", "text": "Please elucidate the area of the image . Please mention the objects and their locations. For your reference, objects involved in this region include three people, and two sneakers.", "boxes_value": [[343.2401122768, 108.7274169856, 596.857421886, 443.86462402560005], [284.0162353388, 152.6227417088, 504.1895752284, 463.3736572416], [343.2401122768, 108.7274169856, 596.857421886, 443.86462402560005], [357.1884765736, 156.8308716032, 375.5631103588, 181.3303833088], [343.9130859068, 393.652526848, 394.5665283396, 424.3939209216], [563.2947997808, 394.70050048, 596.8308105828, 443.6072387584]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047401_crop.jpg", "text": "Please elucidate the area of the image . Please mention the objects and their locations. For your reference, objects involved in this region include three people, and two sneakers.", "boxes_value": [[64.2401122768, 84.7274169856, 317.857421886, 419.86462402560005], [5.016235338800016, 128.6227417088, 225.18957522839997, 439.3736572416], [64.2401122768, 84.7274169856, 317.857421886, 419.86462402560005], [78.18847657359998, 132.8308716032, 96.56311035879997, 157.3303833088], [64.9130859068, 369.652526848, 115.56652833959998, 400.3939209216], [284.29479978079996, 370.70050048, 317.83081058280004, 419.6072387584]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047404.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Specify the location of each mentioned object.", "boxes_value": [[100.8743896576, 200.16320799460001, 283.109497088, 303.68041994600003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047404_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Specify the location of each mentioned object.", "boxes_value": [[45.874389657600005, 26.163207994600015, 228.109497088, 129.68041994600003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047404.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include a desk, a handbag, a telephone, a folder, and a laptop.", "boxes_value": [[100.8743896576, 200.16320799460001, 283.109497088, 303.68041994600003], [54.0861206016, 196.5614624078, 514.0657958912, 574.4018554969], [113.1997070336, 243.5284424061, 236.4865112064, 303.68041994600003], [100.8743896576, 200.16320799460001, 126.2526244864, 251.7382812838], [182.4461059584, 213.4384765534, 283.109497088, 256.38201904700003], [184.1710204928, 206.9998169075, 370.8017577984, 350.3662109616]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047404_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include a desk, a handbag, a telephone, a folder, and a laptop.", "boxes_value": [[45.874389657600005, 26.163207994600015, 228.109497088, 129.68041994600003], [0, 22.561462407800008, 273, 155], [58.199707033600006, 69.5284424061, 181.4865112064, 129.68041994600003], [45.874389657600005, 26.163207994600015, 71.2526244864, 77.7382812838], [127.44610595840001, 39.438476553399994, 228.109497088, 82.38201904700003], [129.1710204928, 32.99981690749999, 273, 155]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047405.jpg", "text": "What can you tell me about the area within the image ? Please mention the objects and their locations.", "boxes_value": [[12.5173950253, 90.3836059648, 203.11010738779999, 325.5032958976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047405_crop.jpg", "text": "What can you tell me about the area within the image ? Please mention the objects and their locations.", "boxes_value": [[12.5173950253, 59.3836059648, 203.11010738779999, 294.5032958976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047405.jpg", "text": "What can you tell me about the area within the image ? Please mention the objects and their locations. For your reference, objects involved in this region include two potted plants, a stool, a lamp, a cabinet, a person, and a street lights.", "boxes_value": [[12.5173950253, 90.3836059648, 203.11010738779999, 325.5032958976], [175.28662109740003, 285.9152221696, 203.11010738779999, 316.4665527296], [12.5173950253, 279.8958129664, 48.075805637100004, 325.5032958976], [49.6150512508, 298.9066162176, 100.7282714708, 325.1934203904], [14.9276122791, 90.3836059648, 43.3054809271, 126.9660644352], [36.782043428099996, 242.4144897536, 68.174987778, 270.8176880128], [191.9562377852, 224.1636352512, 208.81378171810002, 251.6924438528], [143.2860717515, 0.5305175552, 204.3121948325, 509.6984252928]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00047405_crop.jpg", "text": "What can you tell me about the area within the image ? Please mention the objects and their locations. For your reference, objects involved in this region include two potted plants, a stool, a lamp, a cabinet, a person, and a street lights.", "boxes_value": [[12.5173950253, 59.3836059648, 203.11010738779999, 294.5032958976], [175.28662109740003, 254.91522216959999, 203.11010738779999, 285.4665527296], [12.5173950253, 248.8958129664, 48.075805637100004, 294.5032958976], [49.6150512508, 267.9066162176, 100.7282714708, 294.1934203904], [14.9276122791, 59.3836059648, 43.3054809271, 95.9660644352], [36.782043428099996, 211.4144897536, 68.174987778, 239.81768801279998], [191.9562377852, 193.1636352512, 208.81378171810002, 220.6924438528], [143.2860717515, 0, 204.3121948325, 353]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00047406.jpg", "text": "Describe the visual elements within the selected area of the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[458.76611325, 110.8866577, 721.374999975, 421.25860595]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047406_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[65.76611324999999, 77.8866577, 328.37499997500004, 388.25860595]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047406.jpg", "text": "Describe the visual elements within the selected area of the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three people, a glasses, and a briefcase.", "boxes_value": [[458.76611325, 110.8866577, 721.374999975, 421.25860595], [619.458862275, 111.34991455, 721.374999975, 208.6334839], [596.29614255, 110.8866577, 669.9536133, 181.30145265], [506.42456055, 122.00476075, 586.1044921499999, 308.2333374], [624.321533175, 136.0774536, 657.6413574000001, 146.52093505], [458.76611325, 307.15374755000005, 515.8186035, 421.25860595]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047406_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three people, a glasses, and a briefcase.", "boxes_value": [[65.76611324999999, 77.8866577, 328.37499997500004, 388.25860595], [226.458862275, 78.34991455, 328.37499997500004, 175.6334839], [203.29614255, 77.8866577, 276.95361330000003, 148.30145265], [113.42456055000002, 89.00476075, 193.10449214999994, 275.2333374], [231.321533175, 103.07745360000001, 264.64135740000006, 113.52093504999999], [65.76611324999999, 274.15374755000005, 122.8186035, 388.25860595]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047408.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Provide the coordinates for all objects that you mention.", "boxes_value": [[317.7037964034, 346.1752319488, 569.844360371, 511.8978882048]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047408_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Provide the coordinates for all objects that you mention.", "boxes_value": [[63.70379640340002, 42.17523194879999, 315.844360371, 207.89788820479998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047408.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three sneakers, and two handbags.", "boxes_value": [[317.7037964034, 346.1752319488, 569.844360371, 511.8978882048], [317.7037964034, 492.6126708736, 353.7587890958, 511.8978882048], [368.8515625124, 483.9483032064, 404.6269531414, 510.7799072256], [470.6905517908, 467.21246336, 496.446655254, 481.9751586816], [495.53601073019996, 346.1752319488, 505.6885986152, 375.9708862464], [548.3391113486, 364.1633911296, 569.844360371, 403.15631104]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047408_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three sneakers, and two handbags.", "boxes_value": [[63.70379640340002, 42.17523194879999, 315.844360371, 207.89788820479998], [63.70379640340002, 188.6126708736, 99.75878909580001, 207.89788820479998], [114.8515625124, 179.9483032064, 150.6269531414, 206.77990722560003], [216.6905517908, 163.21246336000002, 242.446655254, 177.97515868160002], [241.53601073019996, 42.17523194879999, 251.68859861520002, 71.97088624640003], [294.3391113486, 60.16339112959997, 315.844360371, 99.15631103999999]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047409.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Remember to mention the objects and their corresponding locations.", "boxes_value": [[1.1031494144, 23.5917358638, 314.3807983616, 567.7055664232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047409_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Remember to mention the objects and their corresponding locations.", "boxes_value": [[1.1031494144, 23.5917358638, 314.3807983616, 567.7055664232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047409.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four people, two helmets, and two gloves.", "boxes_value": [[1.1031494144, 23.5917358638, 314.3807983616, 567.7055664232], [1.1031494144, 23.5917358638, 314.3807983616, 567.7055664232], [0.6586914304, 71.25280764200001, 120.6534424064, 408.0755005135], [1.2454834176, 244.9419555415, 76.9884643328, 380.4886474533], [100.185485824, 227.1431274245, 148.514587392, 373.40240481179995], [41.8903808512, 71.7151489334, 102.930297856, 134.1190796175], [0.6078491136, 217.0877685285, 75.5803222528, 342.22973634090005], [148.9824218624, 24.0599365465, 245.6171264512, 131.8076172046], [256.2469482496, 228.9255371083, 312.7782593024, 298.98565673779996]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 7], [6, 8]]}, {"image_path": "objects365_v1_00047409_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four people, two helmets, and two gloves.", "boxes_value": [[1.1031494144, 23.5917358638, 314.3807983616, 567.7055664232], [1.1031494144, 23.5917358638, 314.3807983616, 567.7055664232], [0.6586914304, 71.25280764200001, 120.6534424064, 408.0755005135], [1.2454834176, 244.9419555415, 76.9884643328, 380.4886474533], [100.185485824, 227.1431274245, 148.514587392, 373.40240481179995], [41.8903808512, 71.7151489334, 102.930297856, 134.1190796175], [0.6078491136, 217.0877685285, 75.5803222528, 342.22973634090005], [148.9824218624, 24.0599365465, 245.6171264512, 131.8076172046], [256.2469482496, 228.9255371083, 312.7782593024, 298.98565673779996]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 7], [6, 8]]}, {"image_path": "objects365_v1_00047412.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Include the coordinates for each mentioned object.", "boxes_value": [[89.42095947, 255.0480956928, 321.5150146466, 424.9832763904]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047412_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Include the coordinates for each mentioned object.", "boxes_value": [[58.42095947, 43.04809569279999, 290.5150146466, 212.98327639040002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047412.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Include the coordinates for each mentioned object. For your reference, objects involved in this region include five benches, and a desk.", "boxes_value": [[89.42095947, 255.0480956928, 321.5150146466, 424.9832763904], [250.82281492459998, 334.7501220864, 331.860229485, 424.9832763904], [91.046814003, 360.0383910912, 192.77465824019998, 424.9832763904], [258.8690185664, 288.7714233344, 321.5150146466, 350.842651392], [123.2318725412, 315.7838745088, 186.4526367214, 360.0383910912], [146.221252441, 282.4493408256, 278.4100341436, 361.1878662144], [89.42095947, 255.0480956928, 216.6746215592, 288.6747436544]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 6], [5]]}, {"image_path": "objects365_v1_00047412_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Include the coordinates for each mentioned object. For your reference, objects involved in this region include five benches, and a desk.", "boxes_value": [[58.42095947, 43.04809569279999, 290.5150146466, 212.98327639040002], [219.82281492459998, 122.75012208639998, 300.860229485, 212.98327639040002], [60.046814002999994, 148.03839109120003, 161.77465824019998, 212.98327639040002], [227.86901856639997, 76.77142333440003, 290.5150146466, 138.842651392], [92.2318725412, 103.78387450880001, 155.4526367214, 148.03839109120003], [115.22125244099999, 70.44934082560002, 247.4100341436, 149.1878662144], [58.42095947, 43.04809569279999, 185.6746215592, 76.6747436544]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 6], [5]]}, {"image_path": "objects365_v1_00047413.jpg", "text": "Can you generate a description of the contents within the selected region in ? Provide the coordinates for each element you describe.", "boxes_value": [[314.91284183979997, 393.197875968, 697.4110107124, 511.7078857216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047413_crop.jpg", "text": "Can you generate a description of the contents within the selected region in ? Provide the coordinates for each element you describe.", "boxes_value": [[95.91284183979997, 30.197875968000005, 478.41101071239996, 148.7078857216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047413.jpg", "text": "Can you generate a description of the contents within the selected region in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a sneakers, a handbag, and three bottles.", "boxes_value": [[314.91284183979997, 393.197875968, 697.4110107124, 511.7078857216], [314.91284183979997, 478.4863891456, 344.7260742452, 506.3120727552], [592.4998779312, 393.197875968, 630.9774169783001, 475.283325184], [409.8258056475, 432.5839233536, 432.26965332130004, 456.9297485312], [450.1486815994, 454.6473388544, 485.14575197900007, 511.7078857216], [633.5031738616, 428.0190429696, 697.4110107124, 510.1862793216]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047413_crop.jpg", "text": "Can you generate a description of the contents within the selected region in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a sneakers, a handbag, and three bottles.", "boxes_value": [[95.91284183979997, 30.197875968000005, 478.41101071239996, 148.7078857216], [95.91284183979997, 115.48638914560001, 125.72607424519998, 143.3120727552], [373.4998779312, 30.197875968000005, 411.9774169783001, 112.28332518399998], [190.82580564749998, 69.58392335360003, 213.26965332130004, 93.9297485312], [231.1486815994, 91.6473388544, 266.14575197900007, 148.7078857216], [414.5031738616, 65.01904296959998, 478.41101071239996, 147.1862793216]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047414.jpg", "text": "I need details about the area located within image . Include the coordinates for each object you identify.", "boxes_value": [[302.5855712768, 383.622558627, 423.0819506176, 766.5490722889999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047414_crop.jpg", "text": "I need details about the area located within image . Include the coordinates for each object you identify.", "boxes_value": [[30.58557127680001, 96.62255862699999, 151.0819506176, 479.5490722889999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047414.jpg", "text": "I need details about the area located within image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a baseball bat, a person, a helmet, a gloves, and two sneakers.", "boxes_value": [[302.5855712768, 383.622558627, 423.0819506176, 766.5490722889999], [380.1472504832, 573.538378028, 423.0819506176, 736.4062863080001], [235.1853027328, 370.495727525, 405.8058471424, 768.1204833610001], [302.5855712768, 383.622558627, 347.1565551616, 436.25415040900003], [376.8919677952, 552.1524657880001, 396.400268544, 588.7304687879999], [312.6617431552, 744.4471435170001, 338.960327168, 766.5490722889999], [364.9790649344, 729.619140635, 392.3967285248, 762.6322021780001]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047414_crop.jpg", "text": "I need details about the area located within image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a baseball bat, a person, a helmet, a gloves, and two sneakers.", "boxes_value": [[30.58557127680001, 96.62255862699999, 151.0819506176, 479.5490722889999], [108.14725048320003, 286.538378028, 151.0819506176, 449.40628630800006], [0, 83.495727525, 133.8058471424, 481.1204833610001], [30.58557127680001, 96.62255862699999, 75.15655516160001, 149.25415040900003], [104.8919677952, 265.1524657880001, 124.40026854400003, 301.73046878799994], [40.661743155199986, 457.4471435170001, 66.96032716799999, 479.5490722889999], [92.97906493440001, 442.619140635, 120.39672852479998, 475.63220217800006]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047416.jpg", "text": "Tell me about the region of the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[97.5742797842, 76.9223022592, 453.63696290940004, 400.5950927872]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047416_crop.jpg", "text": "Tell me about the region of the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[89.5742797842, 76.9223022592, 445.63696290940004, 400.5950927872]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047416.jpg", "text": "Tell me about the region of the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, a bracelet, a helmet, and a sneakers.", "boxes_value": [[97.5742797842, 76.9223022592, 453.63696290940004, 400.5950927872], [97.5742797842, 112.429138176, 120.850646994, 147.8039550976], [259.1423950333, 76.9404907008, 470.0231933352, 400.0359497216], [428.39343262299997, 273.6681518592, 453.63696290940004, 288.3752441344], [279.4792480606, 76.9223022592, 329.2202148692, 116.7150268416], [385.2326660091, 372.6125488128, 438.2741699405, 400.5950927872]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047416_crop.jpg", "text": "Tell me about the region of the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, a bracelet, a helmet, and a sneakers.", "boxes_value": [[89.5742797842, 76.9223022592, 445.63696290940004, 400.5950927872], [89.5742797842, 112.429138176, 112.850646994, 147.8039550976], [251.14239503329998, 76.9404907008, 462.0231933352, 400.0359497216], [420.39343262299997, 273.6681518592, 445.63696290940004, 288.3752441344], [271.4792480606, 76.9223022592, 321.2202148692, 116.7150268416], [377.2326660091, 372.6125488128, 430.2741699405, 400.5950927872]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047417.jpg", "text": "Can you give me a description of the region in image ? Please point out the objects and their coordinates.", "boxes_value": [[176.367980984, 248.4446411264, 519.0328369376, 365.26611328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047417_crop.jpg", "text": "Can you give me a description of the region in image ? Please point out the objects and their coordinates.", "boxes_value": [[86.36798098400001, 29.4446411264, 429.0328369376, 146.26611328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047417.jpg", "text": "Can you give me a description of the region in image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two barrels, a bottle, a fork, two plates, two napkins, and a desk.", "boxes_value": [[176.367980984, 248.4446411264, 519.0328369376, 365.26611328], [378.1998290789, 277.4078979584, 433.86145016560005, 332.1210937344], [263.57788083739996, 257.0808715776, 289.965637199, 280.9948120064], [397.3261719055, 248.4446411264, 422.756469703, 289.2026366976], [461.0085448908, 339.7006225408, 519.0328369376, 365.26611328], [477.841064455, 340.1726074368, 568.7781982365, 360.3062744064], [170.1563110607, 327.2460937728, 258.46154784929996, 352.2480468992], [176.367980984, 296.970397952, 236.90502927949998, 341.7151489024], [282.4488525594, 305.1367187456, 384.5346679438, 356.4616699392], [139.8348999349, 310.2329712128, 623.5136718696, 511.8262329344]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6], [7, 8], [9]]}, {"image_path": "objects365_v1_00047417_crop.jpg", "text": "Can you give me a description of the region in image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two barrels, a bottle, a fork, two plates, two napkins, and a desk.", "boxes_value": [[86.36798098400001, 29.4446411264, 429.0328369376, 146.26611328], [288.1998290789, 58.407897958399985, 343.86145016560005, 113.1210937344], [173.57788083739996, 38.08087157760002, 199.965637199, 61.994812006400025], [307.3261719055, 29.4446411264, 332.756469703, 70.20263669759998], [371.0085448908, 120.7006225408, 429.0328369376, 146.26611328], [387.841064455, 121.17260743679998, 478.7781982365, 141.3062744064], [80.1563110607, 108.24609377280001, 168.46154784929996, 133.2480468992], [86.36798098400001, 77.97039795199998, 146.90502927949998, 122.71514890240002], [192.4488525594, 86.13671874559998, 294.5346679438, 137.46166993920002], [49.834899934899994, 91.23297121280001, 514, 175]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6], [7, 8], [9]]}, {"image_path": "objects365_v1_00047418.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Please point out the objects and their coordinates.", "boxes_value": [[152.8015746948, 238.1210937344, 262.87207031319997, 482.2533569536]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047418_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Please point out the objects and their coordinates.", "boxes_value": [[27.801574694799996, 61.121093734400006, 137.87207031319997, 305.2533569536]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047418.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Please point out the objects and their coordinates. For your reference, objects involved in this region include four people, and a hat.", "boxes_value": [[152.8015746948, 238.1210937344, 262.87207031319997, 482.2533569536], [218.5554809512, 99.8151855616, 504.5710449008001, 511.6324463104], [223.27276610720003, 259.2624511488, 267.06555177079997, 465.1389159936], [232.836669888, 199.8652954112, 285.6900635052, 449.5346069504], [152.8015746948, 238.1210937344, 223.27276610720003, 482.2533569536], [232.4714965952, 258.7682494976, 262.87207031319997, 290.3269653504]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047418_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Please point out the objects and their coordinates. For your reference, objects involved in this region include four people, and a hat.", "boxes_value": [[27.801574694799996, 61.121093734400006, 137.87207031319997, 305.2533569536], [93.5554809512, 0, 165, 334.6324463104], [98.27276610720003, 82.26245114879998, 142.06555177079997, 288.1389159936], [107.83666988799999, 22.86529541120001, 160.6900635052, 272.5346069504], [27.801574694799996, 61.121093734400006, 98.27276610720003, 305.2533569536], [107.4714965952, 81.76824949759998, 137.87207031319997, 113.32696535039997]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047419.jpg", "text": "Please tell me about the area in the image . What does it contain? Give coordinates for the items you reference.", "boxes_value": [[524.312377948, 183.0362548736, 641.1528320275, 511.9896240128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047419_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Give coordinates for the items you reference.", "boxes_value": [[29.31237794799995, 83.0362548736, 146, 411.9896240128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047419.jpg", "text": "Please tell me about the area in the image . What does it contain? Give coordinates for the items you reference. For your reference, objects involved in this region include three people, a hat, a gloves, and a camera.", "boxes_value": [[524.312377948, 183.0362548736, 641.1528320275, 511.9896240128], [257.1528320009, 144.5699462656, 609.9818115464, 512.6528320512], [524.312377948, 183.0362548736, 641.1528320275, 511.9896240128], [623.579834014, 350.4537964032, 642.2719726447, 450.1450195456], [554.5844726568, 183.0225219584, 628.3714599694999, 225.1332397568], [541.5413818372, 231.468505856, 577.6895752106, 274.3245239296], [568.376953115, 222.6966552576, 614.5452881026, 256.910034176]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047419_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Give coordinates for the items you reference. For your reference, objects involved in this region include three people, a hat, a gloves, and a camera.", "boxes_value": [[29.31237794799995, 83.0362548736, 146, 411.9896240128], [0, 44.56994626560001, 114.98181154639997, 412], [29.31237794799995, 83.0362548736, 146, 411.9896240128], [128.57983401399997, 250.4537964032, 146, 350.1450195456], [59.58447265680002, 83.0225219584, 133.37145996949994, 125.13323975680001], [46.54138183719999, 131.468505856, 82.68957521059997, 174.3245239296], [73.37695311499999, 122.69665525760001, 119.54528810260001, 156.910034176]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047422.jpg", "text": "I'd like some information about the specific region in the image . Include the coordinates for each object you identify.", "boxes_value": [[0, 402.4910278144, 470.46716310859995, 511.7019042816]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047422_crop.jpg", "text": "I'd like some information about the specific region in the image . Include the coordinates for each object you identify.", "boxes_value": [[0, 27.491027814400013, 470.46716310859995, 136.70190428159998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047422.jpg", "text": "I'd like some information about the specific region in the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, three suvs, and a car.", "boxes_value": [[0, 402.4910278144, 470.46716310859995, 511.7019042816], [0, 402.4910278144, 46.8088989494, 511.7019042816], [45.451965343400005, 420.8819579904, 187.644165016, 492.9414672896], [184.67578121440002, 430.0324096512, 245.20373535910002, 485.2562866176], [226.7957152979, 413.4964599808, 470.46716310859995, 510.5282592768], [403.08898926100005, 424.246582016, 487.8636474656, 476.1276244992]], "boxes_seq": [[0], [0], [1], [2, 4, 5], [3]]}, {"image_path": "objects365_v1_00047422_crop.jpg", "text": "I'd like some information about the specific region in the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, three suvs, and a car.", "boxes_value": [[0, 27.491027814400013, 470.46716310859995, 136.70190428159998], [0, 27.491027814400013, 46.8088989494, 136.70190428159998], [45.451965343400005, 45.8819579904, 187.644165016, 117.94146728959998], [184.67578121440002, 55.0324096512, 245.20373535910002, 110.25628661759998], [226.7957152979, 38.49645998080001, 470.46716310859995, 135.5282592768], [403.08898926100005, 49.24658201599999, 487.8636474656, 101.12762449920001]], "boxes_seq": [[0], [0], [1], [2, 4, 5], [3]]}, {"image_path": "objects365_v1_00047423.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give coordinates for the items you reference.", "boxes_value": [[510.31091307270003, 254.6200561664, 682.8632812262999, 479.3383789056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047423_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give coordinates for the items you reference.", "boxes_value": [[43.31091307270003, 56.62005616639999, 215.86328122629993, 281.3383789056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047423.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give coordinates for the items you reference. For your reference, objects involved in this region include a hat, a glasses, a backpack, a handbag, a bottle, and a cup.", "boxes_value": [[510.31091307270003, 254.6200561664, 682.8632812262999, 479.3383789056], [579.3293456784, 254.6200561664, 667.4505615038, 311.5805053952], [611.1601562741, 290.1365356544, 655.3883056577, 305.8844604416], [653.0428467074, 328.6686401536, 682.8632812262999, 415.1145019392], [549.3563232223, 331.1385498112, 570.8820800868, 354.714355456], [510.31091307270003, 442.2775268352, 536.2963867372, 479.3383789056], [630.8431396612, 386.3652343808, 663.2404785401001, 425.0330810368]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047423_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give coordinates for the items you reference. For your reference, objects involved in this region include a hat, a glasses, a backpack, a handbag, a bottle, and a cup.", "boxes_value": [[43.31091307270003, 56.62005616639999, 215.86328122629993, 281.3383789056], [112.32934567840005, 56.62005616639999, 200.45056150380003, 113.58050539520002], [144.16015627410002, 92.1365356544, 188.38830565770002, 107.88446044160003], [186.04284670740003, 130.6686401536, 215.86328122629993, 217.1145019392], [82.35632322230003, 133.1385498112, 103.88208008679999, 156.71435545600002], [43.31091307270003, 244.27752683519998, 69.29638673720001, 281.3383789056], [163.84313966119998, 188.36523438080002, 196.24047854010007, 227.0330810368]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047425.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give coordinates for the items you reference.", "boxes_value": [[0, 339.6365356544, 680.2332763483, 509.6100463616]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047425_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give coordinates for the items you reference.", "boxes_value": [[0, 42.63653565440001, 680.2332763483, 212.61004636159998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047425.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give coordinates for the items you reference. For your reference, objects involved in this region include a person, two glasses, and three chairs.", "boxes_value": [[0, 339.6365356544, 680.2332763483, 509.6100463616], [465.72509762550004, 268.7601928704, 682.6934814159, 471.762756352], [205.7996215777, 370.4264526336, 297.6970214631, 416.0296630784], [552.8208007668, 339.6365356544, 673.3496093501, 366.6140747264], [302.9018554739, 449.84851072, 680.2332763483, 509.6100463616], [54.127197248099996, 328.2407836672, 136.8204345618, 462.3567504896], [0, 417.1881713664, 140.2949218811, 508.9151611392]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00047425_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give coordinates for the items you reference. For your reference, objects involved in this region include a person, two glasses, and three chairs.", "boxes_value": [[0, 42.63653565440001, 680.2332763483, 212.61004636159998], [465.72509762550004, 0, 682.6934814159, 174.762756352], [205.7996215777, 73.42645263359998, 297.6970214631, 119.0296630784], [552.8208007668, 42.63653565440001, 673.3496093501, 69.61407472640002], [302.9018554739, 152.84851071999998, 680.2332763483, 212.61004636159998], [54.127197248099996, 31.24078366719999, 136.8204345618, 165.35675048960002], [0, 120.18817136640001, 140.2949218811, 211.9151611392]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00047427.jpg", "text": "Can you provide a description of the area in the image ? Please mention the objects and their locations.", "boxes_value": [[209.2418213014, 185.81860352, 462.5716552758, 396.764648448]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047427_crop.jpg", "text": "Can you provide a description of the area in the image ? Please mention the objects and their locations.", "boxes_value": [[64.2418213014, 52.81860352000001, 317.5716552758, 263.764648448]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047427.jpg", "text": "Can you provide a description of the area in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a chair, two cups, a plate, a knife, and a fork.", "boxes_value": [[209.2418213014, 185.81860352, 462.5716552758, 396.764648448], [137.76623534040002, 1.326782208, 540.3916015315, 313.5992431616], [344.6585693575, 226.566345216, 454.0380859408, 396.764648448], [415.8101806887, 280.0145263616, 462.5716552758, 320.5411377152], [323.6696777447, 185.81860352, 381.11950682410003, 262.8636474368], [188.3013916121, 193.1295776256, 314.925476059, 237.6279907328], [209.2418213014, 194.7655639552, 307.4000244226, 245.4806518784]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5], [6]]}, {"image_path": "objects365_v1_00047427_crop.jpg", "text": "Can you provide a description of the area in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a chair, two cups, a plate, a knife, and a fork.", "boxes_value": [[64.2418213014, 52.81860352000001, 317.5716552758, 263.764648448], [0, 0, 380, 180.5992431616], [199.65856935750003, 93.566345216, 309.0380859408, 263.764648448], [270.8101806887, 147.01452636160002, 317.5716552758, 187.54113771520002], [178.66967774469998, 52.81860352000001, 236.11950682410003, 129.86364743680002], [43.30139161209999, 60.12957762560001, 169.925476059, 104.62799073279999], [64.2418213014, 61.76556395520001, 162.40002442259998, 112.48065187840001]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5], [6]]}, {"image_path": "objects365_v1_00047428.jpg", "text": "Could you tell me more about the area in the snapshot ? Include the coordinates for each mentioned object.", "boxes_value": [[3.7550659049, 377.5445556736, 200.6951294191, 418.4310913024]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047428_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Include the coordinates for each mentioned object.", "boxes_value": [[3.7550659049, 10.544555673599973, 200.6951294191, 51.43109130239998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047428.jpg", "text": "Could you tell me more about the area in the snapshot ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a bus, two suvs, and two cars.", "boxes_value": [[3.7550659049, 377.5445556736, 200.6951294191, 418.4310913024], [0, 363.2828368896, 66.0686645442, 390.663024896], [3.7550659049, 378.578002944, 51.5288085767, 398.971557632], [0, 383.1721191424, 55.0368042175, 433.7635497984], [66.83099362819999, 383.3118286336, 138.26037600520002, 412.7305908224], [103.9827270617, 377.5445556736, 200.6951294191, 418.4310913024]], "boxes_seq": [[0], [0], [1], [2, 5], [3, 4]]}, {"image_path": "objects365_v1_00047428_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a bus, two suvs, and two cars.", "boxes_value": [[3.7550659049, 10.544555673599973, 200.6951294191, 51.43109130239998], [0, 0, 66.0686645442, 23.663024896000024], [3.7550659049, 11.57800294399999, 51.5288085767, 31.971557631999985], [0, 16.172119142399993, 55.0368042175, 61], [66.83099362819999, 16.311828633599987, 138.26037600520002, 45.73059082240002], [103.9827270617, 10.544555673599973, 200.6951294191, 51.43109130239998]], "boxes_seq": [[0], [0], [1], [2, 5], [3, 4]]}, {"image_path": "objects365_v1_00047430.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Please mention the objects and their locations.", "boxes_value": [[113.79632568480001, 363.869201664, 644.1026611248001, 414.4618530304]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047430_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Please mention the objects and their locations.", "boxes_value": [[113.79632568480001, 12.869201664000002, 644.1026611248001, 63.46185303039999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047430.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Please mention the objects and their locations. For your reference, objects involved in this region include three cabinets, a hanger, a handbag, a hat, and a fire extinguisher.", "boxes_value": [[113.79632568480001, 363.869201664, 644.1026611248001, 414.4618530304], [550.0534668084, 307.4353637888, 609.0455322432, 429.2119751168], [535.8463134804, 320.41046144, 551.6766357444001, 407.362487808], [532.41882327, 360.0702514688, 553.1964111036, 434.0039672832], [113.79632568480001, 363.869201664, 159.01391601359998, 387.4609985536], [374.2297363584, 398.9423827968, 403.25366211840003, 414.4618530304], [117.0173340108, 365.2954712064, 159.297485382, 390.2559814656], [628.842407238, 368.8344116224, 644.1026611248001, 403.0946655232]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00047430_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Please mention the objects and their locations. For your reference, objects involved in this region include three cabinets, a hanger, a handbag, a hat, and a fire extinguisher.", "boxes_value": [[113.79632568480001, 12.869201664000002, 644.1026611248001, 63.46185303039999], [550.0534668084, 0, 609.0455322432, 76], [535.8463134804, 0, 551.6766357444001, 56.362487808000026], [532.41882327, 9.070251468799995, 553.1964111036, 76], [113.79632568480001, 12.869201664000002, 159.01391601359998, 36.46099855360001], [374.2297363584, 47.94238279680002, 403.25366211840003, 63.46185303039999], [117.0173340108, 14.29547120640001, 159.297485382, 39.25598146559997], [628.842407238, 17.83441162240001, 644.1026611248001, 52.09466552319998]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00047431.jpg", "text": "Please explain what is contained in the portion of defined by the box . Remember to mention the objects and their corresponding locations.", "boxes_value": [[0, 86.6130371072, 126.31988526250001, 277.0163574272]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047431_crop.jpg", "text": "Please explain what is contained in the portion of defined by the box . Remember to mention the objects and their corresponding locations.", "boxes_value": [[0, 47.6130371072, 126.31988526250001, 238.01635742719998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047431.jpg", "text": "Please explain what is contained in the portion of defined by the box . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a mirror, a cabinet, and three cameras.", "boxes_value": [[0, 86.6130371072, 126.31988526250001, 277.0163574272], [0, 179.53222656, 126.31988526250001, 277.0163574272], [0.06530764039999999, 17.80548096, 233.7194824465, 290.7511596544], [91.9326172095, 133.729370112, 123.49383547949999, 158.5274658304], [40.3075561748, 86.6130371072, 71.1924438715, 113.2145996288], [8.3555908045, 130.92010496, 34.9940185477, 155.2308959744]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047431_crop.jpg", "text": "Please explain what is contained in the portion of defined by the box . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a mirror, a cabinet, and three cameras.", "boxes_value": [[0, 47.6130371072, 126.31988526250001, 238.01635742719998], [0, 140.53222656, 126.31988526250001, 238.01635742719998], [0.06530764039999999, 0, 157, 251.7511596544], [91.9326172095, 94.729370112, 123.49383547949999, 119.52746583039999], [40.3075561748, 47.6130371072, 71.1924438715, 74.2145996288], [8.3555908045, 91.92010496, 34.9940185477, 116.2308959744]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047434.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Provide the coordinates for each element you describe.", "boxes_value": [[595.3981933568, 300.4735107584, 728.8770751488, 505.8942260736]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047434_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Provide the coordinates for each element you describe.", "boxes_value": [[33.39819335679999, 51.47351075839998, 166.87707514880003, 256.8942260736]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047434.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include three people, and two sneakers.", "boxes_value": [[595.3981933568, 300.4735107584, 728.8770751488, 505.8942260736], [595.4279785472, 300.4735107584, 671.8831787008, 505.7442627072], [670.956542976, 309.7407836672, 712.6593017856, 445.6126098432], [706.8757323776, 330.5922241024, 728.8770751488, 416.7781371904], [625.1716308992, 491.2438354432, 655.8901366784, 505.1853027328], [595.3981933568, 491.2438354432, 625.8804931584, 505.8942260736]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047434_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include three people, and two sneakers.", "boxes_value": [[33.39819335679999, 51.47351075839998, 166.87707514880003, 256.8942260736], [33.42797854720004, 51.47351075839998, 109.88317870080004, 256.7442627072], [108.95654297600004, 60.74078366719999, 150.65930178559995, 196.6126098432], [144.8757323776, 81.5922241024, 166.87707514880003, 167.7781371904], [63.171630899199954, 242.2438354432, 93.89013667840004, 256.1853027328], [33.39819335679999, 242.2438354432, 63.880493158399986, 256.8942260736]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047437.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[91.6472168008, 90.4159545856, 221.85034182520002, 191.7465209856]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047437_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[32.647216800799995, 25.415954585600005, 162.85034182520002, 126.7465209856]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047437.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four cars, and a bicycle.", "boxes_value": [[91.6472168008, 90.4159545856, 221.85034182520002, 191.7465209856], [97.16381832760001, 115.0949707264, 138.3279419068, 149.9949340672], [112.63031004800001, 90.4159545856, 221.85034182520002, 191.7465209856], [0.08404540120000001, 144.1521606656, 203.28680419479997, 447.3003539968], [128.5326538228, 49.4710083072, 620.1848144915999, 436.42724608], [91.6472168008, 124.8308716032, 114.8303222976, 171.5922241024]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047437_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four cars, and a bicycle.", "boxes_value": [[32.647216800799995, 25.415954585600005, 162.85034182520002, 126.7465209856], [38.16381832760001, 50.09497072640001, 79.32794190679999, 84.9949340672], [53.63031004800001, 25.415954585600005, 162.85034182520002, 126.7465209856], [0, 79.15216066560001, 144.28680419479997, 152], [69.5326538228, 0, 195, 152], [32.647216800799995, 59.830871603199995, 55.830322297600006, 106.5922241024]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047439.jpg", "text": "Could you describe the content of the bbox in the image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[201.9312744391, 384.8293456896, 610.2537841794, 512.2375488512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047439_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[102.93127443910001, 32.82934568960002, 511.2537841794, 160]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047439.jpg", "text": "Could you describe the content of the bbox in the image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a high heels, a handbag, two leather shoes, and a boots.", "boxes_value": [[201.9312744391, 384.8293456896, 610.2537841794, 512.2375488512], [473.4111328359, 464.7889404416, 498.45349121059996, 512.2375488512], [271.31542970180004, 384.8293456896, 366.21252440439997, 445.0187378176], [201.9312744391, 445.709106432, 249.75640868379998, 476.9649048064], [230.6778564665, 410.987304704, 280.7547607368, 436.1640625152], [566.4866943167, 387.2841186304, 610.2537841794, 462.575683584]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047439_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a high heels, a handbag, two leather shoes, and a boots.", "boxes_value": [[102.93127443910001, 32.82934568960002, 511.2537841794, 160], [374.4111328359, 112.7889404416, 399.45349121059996, 160], [172.31542970180004, 32.82934568960002, 267.21252440439997, 93.01873781760003], [102.93127443910001, 93.709106432, 150.75640868379998, 124.9649048064], [131.6778564665, 58.987304703999996, 181.7547607368, 84.16406251519999], [467.4866943167, 35.28411863039997, 511.2537841794, 110.57568358399999]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047442.jpg", "text": "Can you break down the region in the image for me? Specify the location of each mentioned object.", "boxes_value": [[519.6546630864, 93.6715698176, 642.989868196, 330.4409179648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047442_crop.jpg", "text": "Can you break down the region in the image for me? Specify the location of each mentioned object.", "boxes_value": [[31.654663086399978, 59.6715698176, 154.98986819599997, 296.4409179648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047442.jpg", "text": "Can you break down the region in the image for me? Specify the location of each mentioned object. For your reference, objects involved in this region include three lamps, a picture, and a trash bin can.", "boxes_value": [[519.6546630864, 93.6715698176, 642.989868196, 330.4409179648], [519.6546630864, 124.2497558528, 537.6694335897, 203.6004028416], [542.1405029425, 102.540100096, 562.5450439335, 197.1174926848], [611.270751937, 93.6715698176, 635.6280517486, 194.9760131584], [614.0429687528, 252.6826171904, 642.989868196, 285.84002688], [554.8571777657, 294.0408935424, 576.8937988232, 330.4409179648]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047442_crop.jpg", "text": "Can you break down the region in the image for me? Specify the location of each mentioned object. For your reference, objects involved in this region include three lamps, a picture, and a trash bin can.", "boxes_value": [[31.654663086399978, 59.6715698176, 154.98986819599997, 296.4409179648], [31.654663086399978, 90.2497558528, 49.66943358970002, 169.6004028416], [54.14050294250001, 68.540100096, 74.54504393349998, 163.1174926848], [123.270751937, 59.6715698176, 147.6280517486, 160.9760131584], [126.0429687528, 218.6826171904, 154.98986819599997, 251.84002687999998], [66.85717776570004, 260.0408935424, 88.89379882319997, 296.4409179648]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047443.jpg", "text": "I am interested in the region of the image ; please describe it. Give coordinates for the items you reference.", "boxes_value": [[545.5001221061, 243.867004416, 728.8834228634, 415.5516967936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047443_crop.jpg", "text": "I am interested in the region of the image ; please describe it. Give coordinates for the items you reference.", "boxes_value": [[46.50012210609998, 43.867004415999986, 229.88342286340003, 215.5516967936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047443.jpg", "text": "I am interested in the region of the image ; please describe it. Give coordinates for the items you reference. For your reference, objects involved in this region include two desks, a handbag, and two sandals.", "boxes_value": [[545.5001221061, 243.867004416, 728.8834228634, 415.5516967936], [635.1552734296, 295.8784179712, 719.0909424111, 364.0532836864], [545.5001221061, 353.2575683584, 597.6651611091, 415.5516967936], [702.888671856, 243.867004416, 728.8834228634, 306.4815063552], [648.6872558474, 385.404785152, 677.3641357084, 399.743225088], [611.0488281198, 383.6124877824, 633.4526367428, 397.5924682752]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047443_crop.jpg", "text": "I am interested in the region of the image ; please describe it. Give coordinates for the items you reference. For your reference, objects involved in this region include two desks, a handbag, and two sandals.", "boxes_value": [[46.50012210609998, 43.867004415999986, 229.88342286340003, 215.5516967936], [136.15527342960002, 95.87841797120001, 220.09094241109995, 164.0532836864], [46.50012210609998, 153.25756835840002, 98.66516110910004, 215.5516967936], [203.88867185599997, 43.867004415999986, 229.88342286340003, 106.4815063552], [149.68725584740002, 185.404785152, 178.3641357084, 199.74322508799997], [112.04882811979996, 183.61248778240002, 134.4526367428, 197.59246827520002]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047444.jpg", "text": "Tell me what you see within the designated area in the picture . Provide the coordinates for each element you describe.", "boxes_value": [[5.9830932365, 144.0249633792, 254.511840788, 464.3671875072]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047444_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Provide the coordinates for each element you describe.", "boxes_value": [[5.9830932365, 81.02496337919999, 254.511840788, 401.3671875072]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047444.jpg", "text": "Tell me what you see within the designated area in the picture . Provide the coordinates for each element you describe. For your reference, objects involved in this region include four lamps, a paddle, and a car.", "boxes_value": [[5.9830932365, 144.0249633792, 254.511840788, 464.3671875072], [47.0526122734, 142.1996460032, 152.0079956204, 364.8876953088], [128.2789916884, 144.0249633792, 198.553466806, 184.181823744], [5.9830932365, 192.395751936, 199.4661254648, 464.3671875072], [120.06506349799999, 202.4349365248, 323.58734129960004, 498.13549803520004], [0.16717531360000001, 312.7112427008, 653.248779287, 511.1741943296], [233.26385498160002, 170.9014892544, 254.511840788, 199.1068115456]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047444_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Provide the coordinates for each element you describe. For your reference, objects involved in this region include four lamps, a paddle, and a car.", "boxes_value": [[5.9830932365, 81.02496337919999, 254.511840788, 401.3671875072], [47.0526122734, 79.1996460032, 152.0079956204, 301.8876953088], [128.2789916884, 81.02496337919999, 198.553466806, 121.18182374400001], [5.9830932365, 129.395751936, 199.4661254648, 401.3671875072], [120.06506349799999, 139.4349365248, 316, 435.13549803520004], [0.16717531360000001, 249.71124270080003, 316, 448.1741943296], [233.26385498160002, 107.90148925439999, 254.511840788, 136.1068115456]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047447.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Remember to mention the objects and their corresponding locations.", "boxes_value": [[213.56677248, 216.9086914048, 717.9752197632, 512.0528564224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047447_crop.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Remember to mention the objects and their corresponding locations.", "boxes_value": [[126.56677248, 73.90869140480001, 630.9752197632, 369]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047447.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two guitars, two people, and a speaker.", "boxes_value": [[213.56677248, 216.9086914048, 717.9752197632, 512.0528564224], [213.56677248, 234.9881591808, 580.7333984256001, 402.5301513728], [499.33886722560004, 372.824157696, 717.9752197632, 483.3305664], [215.83465574400003, 47.210571264, 443.98266600960005, 511.9001464832], [484.8687744, 216.9086914048, 643.4077148160001, 511.8065185792], [338.0847168, 464.8031616, 520.350463872, 512.0528564224]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047447_crop.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two guitars, two people, and a speaker.", "boxes_value": [[126.56677248, 73.90869140480001, 630.9752197632, 369], [126.56677248, 91.98815918080001, 493.73339842560006, 259.5301513728], [412.33886722560004, 229.824157696, 630.9752197632, 340.3305664], [128.83465574400003, 0, 356.98266600960005, 368.9001464832], [397.8687744, 73.90869140480001, 556.4077148160001, 368.8065185792], [251.08471680000002, 321.8031616, 433.350463872, 369]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047454.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Provide the coordinates for each element you describe.", "boxes_value": [[140.80914302940002, 272.5848388608, 266.65783690430004, 402.3058471424]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047454_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Provide the coordinates for each element you describe.", "boxes_value": [[31.80914302940002, 32.584838860800005, 157.65783690430004, 162.3058471424]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047454.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a couch, and four pillows.", "boxes_value": [[140.80914302940002, 272.5848388608, 266.65783690430004, 402.3058471424], [36.257873537500004, 271.6167602688, 296.6679687796, 486.5276489216], [120.4797362921, 292.9142455808, 188.2444458366, 384.8806152192], [185.3402099551, 286.1377563648, 221.1586913768, 356.80664064], [140.80914302940002, 272.5848388608, 241.48809811409998, 317.1159057408], [199.8612060377, 279.3612670976, 266.65783690430004, 402.3058471424]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047454_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a couch, and four pillows.", "boxes_value": [[31.80914302940002, 32.584838860800005, 157.65783690430004, 162.3058471424], [0, 31.61676026880002, 187.66796877960002, 194], [11.4797362921, 52.914245580800014, 79.2444458366, 144.88061521920002], [76.3402099551, 46.1377563648, 112.1586913768, 116.80664064000001], [31.80914302940002, 32.584838860800005, 132.48809811409998, 77.11590574079997], [90.86120603769999, 39.36126709759998, 157.65783690430004, 162.3058471424]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047455.jpg", "text": "What can you share about the area in the presented image ? Specify the location of each mentioned object.", "boxes_value": [[47.793273933600005, 227.6142578176, 199.3917846778, 314.2247314432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047455_crop.jpg", "text": "What can you share about the area in the presented image ? Specify the location of each mentioned object.", "boxes_value": [[38.793273933600005, 22.61425781759999, 190.3917846778, 109.22473144320003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047455.jpg", "text": "What can you share about the area in the presented image ? Specify the location of each mentioned object. For your reference, objects involved in this region include a desk, a barrel, a cup, a plate, a bottle, a tissue, and a toiletry.", "boxes_value": [[47.793273933600005, 227.6142578176, 199.3917846778, 314.2247314432], [87.0999145302, 248.5766601728, 393.28576658540004, 487.5509643776], [80.72247313129999, 272.690429696, 125.7179565629, 314.2247314432], [166.9470825061, 227.6142578176, 197.4285278355, 270.2882690048], [164.0333862622, 265.5657958912, 199.3917846778, 276.5390624768], [49.82867433569999, 256.4274292224, 74.3418578864, 302.7130126848], [119.46661379899999, 221.4930419712, 174.9022216591, 277.4371948032], [47.793273933600005, 243.130798336, 75.01660159240001, 303.6755981312]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00047455_crop.jpg", "text": "What can you share about the area in the presented image ? Specify the location of each mentioned object. For your reference, objects involved in this region include a desk, a barrel, a cup, a plate, a bottle, a tissue, and a toiletry.", "boxes_value": [[38.793273933600005, 22.61425781759999, 190.3917846778, 109.22473144320003], [78.0999145302, 43.57666017279999, 228, 130], [71.72247313129999, 67.69042969600002, 116.7179565629, 109.22473144320003], [157.9470825061, 22.61425781759999, 188.4285278355, 65.28826900479999], [155.0333862622, 60.565795891200025, 190.3917846778, 71.53906247679998], [40.82867433569999, 51.42742922240001, 65.3418578864, 97.71301268479999], [110.46661379899999, 16.4930419712, 165.9022216591, 72.43719480319999], [38.793273933600005, 38.130798336, 66.01660159240001, 98.67559813119999]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00047456.jpg", "text": "I need details about the area located within image . Include the coordinates for each object you identify.", "boxes_value": [[170.56439208, 53.9299927, 505.34716793999996, 139.6045532]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047456_crop.jpg", "text": "I need details about the area located within image . Include the coordinates for each object you identify.", "boxes_value": [[84.56439208, 21.9299927, 419.34716793999996, 107.6045532]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047456.jpg", "text": "I need details about the area located within image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a lamp, a vase, four bottles, two tea pots, and a plate.", "boxes_value": [[170.56439208, 53.9299927, 505.34716793999996, 139.6045532], [281.37097170000004, 0, 338.63372802, 101.40313719999999], [239.17468259999998, 112.12420655, 255.16381836, 146.82403564999998], [241.14971922, 113.63879395, 253.75653078, 145.57598875], [491.0316162, 124.2536621, 510.76782227999996, 140.7391968], [365.02423097999997, 59.250793449999996, 391.5993042, 84.58428955], [466.28564454, 111.29913330000001, 505.34716793999996, 139.6045532], [454.92956544000003, 53.9299927, 496.40466306, 81.0005493], [201.85699463999998, 67.34106445, 236.12982179999997, 89.19622805], [170.56439208, 69.824646, 201.36029052, 89.69293215]], "boxes_seq": [[0], [0], [1], [2], [3, 5, 7, 8], [4, 9], [6]]}, {"image_path": "objects365_v1_00047456_crop.jpg", "text": "I need details about the area located within image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a lamp, a vase, four bottles, two tea pots, and a plate.", "boxes_value": [[84.56439208, 21.9299927, 419.34716793999996, 107.6045532], [195.37097170000004, 0, 252.63372801999998, 69.40313719999999], [153.17468259999998, 80.12420655, 169.16381836, 114.82403564999998], [155.14971922, 81.63879395, 167.75653078, 113.57598875], [405.0316162, 92.2536621, 424.76782227999996, 108.7391968], [279.02423097999997, 27.250793449999996, 305.5993042, 52.584289549999994], [380.28564454, 79.29913330000001, 419.34716793999996, 107.6045532], [368.92956544000003, 21.9299927, 410.40466306, 49.0005493], [115.85699463999998, 35.341064450000005, 150.12982179999997, 57.19622805], [84.56439208, 37.824646, 115.36029052, 57.692932150000004]], "boxes_seq": [[0], [0], [1], [2], [3, 5, 7, 8], [4, 9], [6]]}, {"image_path": "objects365_v1_00047457.jpg", "text": "Can you provide some context for the area within the picture ? Include the coordinates for each mentioned object.", "boxes_value": [[0, 356.4843749888, 232.3764648501, 474.5733032448]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047457_crop.jpg", "text": "Can you provide some context for the area within the picture ? Include the coordinates for each mentioned object.", "boxes_value": [[0, 30.484374988800027, 232.3764648501, 148.5733032448]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047457.jpg", "text": "Can you provide some context for the area within the picture ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, two vans, and a bus.", "boxes_value": [[0, 356.4843749888, 232.3764648501, 474.5733032448], [9.1619262615, 372.1500243968, 34.7238769534, 452.0787963904], [25.5673218039, 378.8266601472, 55.7075195063, 450.9342041088], [0, 365.3973999104, 15.5988769731, 474.5733032448], [42.5906372132, 329.4926147584, 199.48022459150002, 448.425048832], [191.8887939257, 356.4843749888, 232.3764648501, 421.4332885504]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00047457_crop.jpg", "text": "Can you provide some context for the area within the picture ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, two vans, and a bus.", "boxes_value": [[0, 30.484374988800027, 232.3764648501, 148.5733032448], [9.1619262615, 46.150024396800006, 34.7238769534, 126.07879639039999], [25.5673218039, 52.826660147200016, 55.7075195063, 124.93420410879997], [0, 39.3973999104, 15.5988769731, 148.5733032448], [42.5906372132, 3.492614758400009, 199.48022459150002, 122.42504883200002], [191.8887939257, 30.484374988800027, 232.3764648501, 95.4332885504]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00047458.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Provide the coordinates for each element you describe.", "boxes_value": [[487.95581051519997, 175.1220703232, 777.8742676007, 373.862487808]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047458_crop.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Provide the coordinates for each element you describe.", "boxes_value": [[72.95581051519997, 50.122070323200006, 362.8742676007, 248.86248780800003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047458.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include five cabinets, and a bowl.", "boxes_value": [[487.95581051519997, 175.1220703232, 777.8742676007, 373.862487808], [487.95581051519997, 175.1220703232, 664.6014403931, 305.575683584], [711.9378661825, 149.7300414976, 786.8757323852001, 258.8564453376], [498.39916995190003, 304.017639168, 574.3116455431, 373.4368286208], [575.2593994222999, 305.5697631744, 641.0687256032, 372.3103637504], [641.3791503554, 304.017639168, 678.6297607243, 373.862487808], [751.3775634844001, 236.7948608512, 777.8742676007, 251.085144064]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047458_crop.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include five cabinets, and a bowl.", "boxes_value": [[72.95581051519997, 50.122070323200006, 362.8742676007, 248.86248780800003], [72.95581051519997, 50.122070323200006, 249.6014403931, 180.575683584], [296.9378661825, 24.730041497600013, 371.8757323852001, 133.8564453376], [83.39916995190003, 179.01763916800002, 159.31164554309998, 248.4368286208], [160.2593994222999, 180.5697631744, 226.06872560320005, 247.31036375039997], [226.37915035540004, 179.01763916800002, 263.6297607243, 248.86248780800003], [336.37756348440007, 111.79486085120001, 362.8742676007, 126.08514406399999]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047459.jpg", "text": "Analyze and describe the region in the included photo . Provide the coordinates for all objects that you mention.", "boxes_value": [[614.9555663743, 292.9904785408, 665.0179443088, 417.5936889856]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047459_crop.jpg", "text": "Analyze and describe the region in the included photo . Provide the coordinates for all objects that you mention.", "boxes_value": [[12.95556637430002, 31.990478540799984, 63.0179443088, 156.5936889856]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047459.jpg", "text": "Analyze and describe the region in the included photo . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a person, a bottle, a plate, a bowl, and a chopsticks.", "boxes_value": [[614.9555663743, 292.9904785408, 665.0179443088, 417.5936889856], [545.7506103617, 328.9653320192, 682.4320068184, 510.8862304768], [614.9555663743, 333.9758910976, 653.2805175938, 417.5936889856], [630.2855224790001, 327.7045287936, 665.0179443088, 348.098876928], [630.2855224790001, 309.9357299712, 682.5970459138999, 337.8083495936], [622.272216783, 292.9904785408, 656.0677490035, 304.8363037184]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047459_crop.jpg", "text": "Analyze and describe the region in the included photo . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a person, a bottle, a plate, a bowl, and a chopsticks.", "boxes_value": [[12.95556637430002, 31.990478540799984, 63.0179443088, 156.5936889856], [0, 67.96533201919999, 75, 187], [12.95556637430002, 72.97589109760003, 51.28051759380003, 156.5936889856], [28.285522479000065, 66.70452879359999, 63.0179443088, 87.09887692799998], [28.285522479000065, 48.935729971199976, 75, 76.80834959359998], [20.272216782999976, 31.990478540799984, 54.06774900350001, 43.83630371840002]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047460.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Include the coordinates for each mentioned object.", "boxes_value": [[66.1578369024, 48.8925170688, 277.3693237248, 169.5336303616]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047460_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Include the coordinates for each mentioned object.", "boxes_value": [[53.15783690240001, 30.892517068799997, 264.3693237248, 151.5336303616]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047460.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, and three microphones.", "boxes_value": [[66.1578369024, 48.8925170688, 277.3693237248, 169.5336303616], [1.4631347712, 7.6639404544, 169.7720336896, 169.4902953984], [172.1419067392, 0.943725568, 336.4854736384, 169.7431030272], [66.1578369024, 48.8925170688, 98.0185546752, 82.543151872], [134.5330810368, 128.7232666112, 170.3316650496, 150.202392576], [240.8547973632, 96.8625488384, 277.3693237248, 169.5336303616]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047460_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, and three microphones.", "boxes_value": [[53.15783690240001, 30.892517068799997, 264.3693237248, 151.5336303616], [0, 0, 156.7720336896, 151.4902953984], [159.1419067392, 0, 317, 151.7431030272], [53.15783690240001, 30.892517068799997, 85.0185546752, 64.543151872], [121.53308103680001, 110.72326661119999, 157.3316650496, 132.202392576], [227.8547973632, 78.8625488384, 264.3693237248, 151.5336303616]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047462.jpg", "text": "Describe the bbox in the provided photo . Please mention the objects and their locations.", "boxes_value": [[76.5780639744, 246.4520263528, 433.3662719488, 751.982910148]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047462_crop.jpg", "text": "Describe the bbox in the provided photo . Please mention the objects and their locations.", "boxes_value": [[76.5780639744, 126.4520263528, 433.3662719488, 631.982910148]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047462.jpg", "text": "Describe the bbox in the provided photo . Please mention the objects and their locations. For your reference, objects involved in this region include two benches, a desk, four people, a glasses, a hat, and a stroller.", "boxes_value": [[76.5780639744, 246.4520263528, 433.3662719488, 751.982910148], [336.0304565248, 577.3079834280001, 512.0346679808, 646.3336181944001], [84.079772928, 579.1286621096, 344.2335204864, 643.9425048448], [281.5507812352, 491.109497068, 353.2770996224, 527.0590820328], [76.5780639744, 246.4520263528, 433.3662719488, 751.982910148], [316.9312743936, 518.9029541312, 407.4296875008, 634.0440673936], [83.761596672, 465.2797851864, 171.9990234624, 614.4062500384], [141.0385132032, 458.05566403439997, 175.0950927872, 531.8448486608], [163.2399902208, 254.43640133039997, 200.8499756032, 296.624999988], [331.4840088064, 519.284667956, 363.3806152192, 536.1442871184], [229.4205932544, 502.29382321040003, 351.1528320512, 640.1282959199999]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5, 6, 7], [8], [9], [10]]}, {"image_path": "objects365_v1_00047462_crop.jpg", "text": "Describe the bbox in the provided photo . Please mention the objects and their locations. For your reference, objects involved in this region include two benches, a desk, four people, a glasses, a hat, and a stroller.", "boxes_value": [[76.5780639744, 126.4520263528, 433.3662719488, 631.982910148], [336.0304565248, 457.30798342800006, 512, 526.3336181944001], [84.079772928, 459.1286621096, 344.2335204864, 523.9425048448], [281.5507812352, 371.109497068, 353.2770996224, 407.0590820328], [76.5780639744, 126.4520263528, 433.3662719488, 631.982910148], [316.9312743936, 398.9029541312, 407.4296875008, 514.0440673936], [83.761596672, 345.2797851864, 171.9990234624, 494.4062500384], [141.0385132032, 338.05566403439997, 175.0950927872, 411.84484866080004], [163.2399902208, 134.43640133039997, 200.8499756032, 176.624999988], [331.4840088064, 399.284667956, 363.3806152192, 416.14428711840003], [229.4205932544, 382.29382321040003, 351.1528320512, 520.1282959199999]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5, 6, 7], [8], [9], [10]]}, {"image_path": "objects365_v1_00047463.jpg", "text": "I'd like a thorough description of the area in the image . Please point out the objects and their coordinates.", "boxes_value": [[77.5717163008, 217.2263793908, 511.7207031296, 457.8707275321]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047463_crop.jpg", "text": "I'd like a thorough description of the area in the image . Please point out the objects and their coordinates.", "boxes_value": [[77.5717163008, 60.22637939079999, 511.7207031296, 300.8707275321]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047463.jpg", "text": "I'd like a thorough description of the area in the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, four pots, a cabinet, and a giraffe.", "boxes_value": [[77.5717163008, 217.2263793908, 511.7207031296, 457.8707275321], [150.3125610496, 170.8064575313, 456.539672832, 631.7587890633], [283.6968994304, 217.2263793908, 456.6961059328, 431.2382812297], [478.9854736384, 235.1403198335, 502.9949340672, 275.84381104], [467.238403328, 334.119995126, 477.5766601728, 371.30139159099997], [449.2985839616, 391.24816897529996, 511.7207031296, 428.0772704887], [432.444580096, 423.3956299137, 511.7207031296, 452.10974123750003], [223.2183227392, 254.31835940820002, 261.7471313408, 277.9640502622], [77.5717163008, 428.8477782928, 134.5808715776, 457.8707275321]], "boxes_seq": [[0], [0], [1, 2], [3, 5, 7, 8], [4], [6]]}, {"image_path": "objects365_v1_00047463_crop.jpg", "text": "I'd like a thorough description of the area in the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, four pots, a cabinet, and a giraffe.", "boxes_value": [[77.5717163008, 60.22637939079999, 511.7207031296, 300.8707275321], [150.3125610496, 13.806457531299998, 456.539672832, 361], [283.6968994304, 60.22637939079999, 456.6961059328, 274.2382812297], [478.9854736384, 78.1403198335, 502.9949340672, 118.84381103999999], [467.238403328, 177.119995126, 477.5766601728, 214.30139159099997], [449.2985839616, 234.24816897529996, 511.7207031296, 271.0772704887], [432.444580096, 266.3956299137, 511.7207031296, 295.10974123750003], [223.2183227392, 97.31835940820002, 261.7471313408, 120.96405026219998], [77.5717163008, 271.8477782928, 134.5808715776, 300.8707275321]], "boxes_seq": [[0], [0], [1, 2], [3, 5, 7, 8], [4], [6]]}, {"image_path": "objects365_v1_00047464.jpg", "text": "Regarding the image , what's going on in the section ? Include the coordinates for each object you identify.", "boxes_value": [[334.4130859506, 293.8388061696, 461.3781737946, 340.165344256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047464_crop.jpg", "text": "Regarding the image , what's going on in the section ? Include the coordinates for each object you identify.", "boxes_value": [[32.41308595060002, 11.83880616959999, 159.3781737946, 58.165344256000026]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047464.jpg", "text": "Regarding the image , what's going on in the section ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a carpet, a chair, a pillow, a desk, and a stool.", "boxes_value": [[334.4130859506, 293.8388061696, 461.3781737946, 340.165344256], [334.4130859506, 318.1864013824, 405.8940429699, 340.165344256], [375.01538084279997, 281.9848022528, 576.9443359422, 448.4283447296], [432.7534179855, 303.5044555776, 476.9143066764, 347.6654052864], [383.0057373033, 305.1006469632, 444.1926269448, 333.299804672], [417.96276851880003, 293.8388061696, 461.3781737946, 307.4436035072]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047464_crop.jpg", "text": "Regarding the image , what's going on in the section ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a carpet, a chair, a pillow, a desk, and a stool.", "boxes_value": [[32.41308595060002, 11.83880616959999, 159.3781737946, 58.165344256000026], [32.41308595060002, 36.18640138239999, 103.8940429699, 58.165344256000026], [73.01538084279997, 0, 191, 69], [130.7534179855, 21.504455577600027, 174.91430667639997, 65.66540528640002], [81.00573730330001, 23.100646963200006, 142.1926269448, 51.29980467199999], [115.96276851880003, 11.83880616959999, 159.3781737946, 25.44360350720001]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047465.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Give coordinates for the items you reference.", "boxes_value": [[459.0136453891, 58.4154662912, 538.3277587804, 257.028273408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047465_crop.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Give coordinates for the items you reference.", "boxes_value": [[20.01364538910002, 50.4154662912, 99.32775878040002, 249.02827340800002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047465.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Give coordinates for the items you reference. For your reference, objects involved in this region include two people, two sneakers, and a storage box.", "boxes_value": [[459.0136453891, 58.4154662912, 538.3277587804, 257.028273408], [138.2281494015, 116.106018048, 514.1491699485, 265.35681152], [506.2896728473, 58.4154662912, 538.3277587804, 151.5682983424], [459.0136453891, 233.350566656, 477.584395787, 257.028273408], [496.3872805367, 230.5649540608, 512.4045527916, 254.9390639616], [491.9321289087, 122.1589965824, 533.7279052698, 146.2719116288]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047465_crop.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Give coordinates for the items you reference. For your reference, objects involved in this region include two people, two sneakers, and a storage box.", "boxes_value": [[20.01364538910002, 50.4154662912, 99.32775878040002, 249.02827340800002], [0, 108.106018048, 75.1491699485, 257.35681152], [67.28967284729998, 50.4154662912, 99.32775878040002, 143.5682983424], [20.01364538910002, 225.350566656, 38.584395787000005, 249.02827340800002], [57.38728053670002, 222.5649540608, 73.40455279160005, 246.9390639616], [52.93212890870001, 114.1589965824, 94.72790526979998, 138.2719116288]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047466.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Provide the coordinates for all objects that you mention.", "boxes_value": [[256.47888184320004, 478.36163328, 751.6905517824, 512.8946533376]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047466_crop.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Provide the coordinates for all objects that you mention.", "boxes_value": [[124.47888184320004, 9.361633279999978, 619.6905517824, 43]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047466.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a leather shoes, and four sneakers.", "boxes_value": [[256.47888184320004, 478.36163328, 751.6905517824, 512.8946533376], [699.33288576, 487.5843505664, 751.6905517824, 503.6486816256], [569.2202148096, 485.1284179456, 604.8995361023999, 505.428710912], [554.148803712, 478.36163328, 599.055542016, 503.8908081152], [371.9871826176, 461.675720192, 396.96203612159997, 509.2238769664], [256.47888184320004, 493.1343383552, 305.4678955008, 512.8946533376]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047466_crop.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a leather shoes, and four sneakers.", "boxes_value": [[124.47888184320004, 9.361633279999978, 619.6905517824, 43], [567.33288576, 18.58435056640002, 619.6905517824, 34.64868162559998], [437.22021480959995, 16.128417945600006, 472.8995361023999, 36.428710911999985], [422.148803712, 9.361633279999978, 467.055542016, 34.8908081152], [239.9871826176, 0, 264.96203612159997, 40.223876966399985], [124.47888184320004, 24.134338355199986, 173.4678955008, 43]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047468.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Include the coordinates for each mentioned object.", "boxes_value": [[145.16503904, 0.637207008, 472.525024384, 318.162048336]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047468_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Include the coordinates for each mentioned object.", "boxes_value": [[82.16503904000001, 0.637207008, 409.525024384, 318.162048336]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047468.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include an elephant, a speaker, and three lamps.", "boxes_value": [[145.16503904, 0.637207008, 472.525024384, 318.162048336], [362.025390656, 224.969543472, 472.525024384, 296.369506848], [413.54260256000003, 290.562133776, 437.807128896, 318.162048336], [145.16503904, 52.011230448000006, 160.14916992, 85.832397456], [343.811157248, 31.46160888, 361.792053248, 56.72052], [199.53588864, 0.637207008, 222.22607424, 17.333770752]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047468_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include an elephant, a speaker, and three lamps.", "boxes_value": [[82.16503904000001, 0.637207008, 409.525024384, 318.162048336], [299.025390656, 224.969543472, 409.525024384, 296.369506848], [350.54260256000003, 290.562133776, 374.807128896, 318.162048336], [82.16503904000001, 52.011230448000006, 97.14916991999999, 85.832397456], [280.811157248, 31.46160888, 298.792053248, 56.72052], [136.53588864, 0.637207008, 159.22607424, 17.333770752]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047469.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Include the coordinates for each mentioned object.", "boxes_value": [[0.0591430656, 90.1727905305, 170.5502319104, 482.00231933889995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047469_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Include the coordinates for each mentioned object.", "boxes_value": [[0.0591430656, 90.1727905305, 170.5502319104, 482.00231933889995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047469.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include four people, a necklace, and a sneakers.", "boxes_value": [[0.0591430656, 90.1727905305, 170.5502319104, 482.00231933889995], [0.0591430656, 90.1727905305, 43.7161254912, 342.2282714939], [0.2969360384, 328.2311401668, 106.9667968512, 481.7020263681], [85.9095458816, 141.2038573932, 226.598876928, 450.5803222863], [51.312194816, 87.00799562799999, 252.89685058559996, 669.8903808655], [120.9320678912, 168.2720337223, 170.5502319104, 206.2774657942], [28.5863647232, 462.3547363199, 81.798767104, 482.00231933889995]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047469_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include four people, a necklace, and a sneakers.", "boxes_value": [[0.0591430656, 90.1727905305, 170.5502319104, 482.00231933889995], [0.0591430656, 90.1727905305, 43.7161254912, 342.2282714939], [0.2969360384, 328.2311401668, 106.9667968512, 481.7020263681], [85.9095458816, 141.2038573932, 213, 450.5803222863], [51.312194816, 87.00799562799999, 213, 579], [120.9320678912, 168.2720337223, 170.5502319104, 206.2774657942], [28.5863647232, 462.3547363199, 81.798767104, 482.00231933889995]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047470.jpg", "text": "Please provide information about the area within the bounding box in the picture . Give coordinates for the items you reference.", "boxes_value": [[620.229492155, 81.0377197056, 726.534667976, 182.0943603712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047470_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Give coordinates for the items you reference.", "boxes_value": [[27.229492155000003, 26.037719705599997, 133.53466797600004, 127.0943603712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047470.jpg", "text": "Please provide information about the area within the bounding box in the picture . Give coordinates for the items you reference. For your reference, objects involved in this region include a person, a helmet, two gloves, and a hockey stick.", "boxes_value": [[620.229492155, 81.0377197056, 726.534667976, 182.0943603712], [621.066162145, 80.147277824, 697.689331053, 229.6782836736], [633.8933105635, 81.0377197056, 656.7978515799999, 103.1521606656], [620.229492155, 127.0034789888, 636.479370111, 152.1907958784], [661.395751939, 140.2742309376, 679.2706298755, 170.6072997888], [633.7110596115, 143.5267944448, 726.534667976, 182.0943603712]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047470_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Give coordinates for the items you reference. For your reference, objects involved in this region include a person, a helmet, two gloves, and a hockey stick.", "boxes_value": [[27.229492155000003, 26.037719705599997, 133.53466797600004, 127.0943603712], [28.06616214500002, 25.147277824, 104.68933105300005, 152], [40.893310563499995, 26.037719705599997, 63.79785157999993, 48.15216066559999], [27.229492155000003, 72.0034789888, 43.47937011099998, 97.1907958784], [68.39575193899998, 85.27423093760001, 86.27062987550005, 115.60729978879999], [40.71105961149999, 88.5267944448, 133.53466797600004, 127.0943603712]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047472.jpg", "text": "Could you please provide a description of the rectangular area in ? Provide the coordinates for all objects that you mention.", "boxes_value": [[91.20697021139999, 207.1749267456, 226.7399902068, 510.502624512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047472_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Provide the coordinates for all objects that you mention.", "boxes_value": [[34.20697021139999, 76.1749267456, 169.7399902068, 379.502624512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047472.jpg", "text": "Could you please provide a description of the rectangular area in ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a cabinet, three bottles, and a cup.", "boxes_value": [[91.20697021139999, 207.1749267456, 226.7399902068, 510.502624512], [91.20697021139999, 308.0427856384, 210.6219482685, 510.502624512], [105.3557129223, 199.734313984, 118.64251707790001, 244.3778686464], [114.9221801521, 207.1749267456, 129.2719115952, 243.8463745024], [96.93688963630001, 461.265686016, 118.14538576099999, 489.0264282112], [198.42047120499998, 466.391235328, 226.7399902068, 502.4694824448]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047472_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a cabinet, three bottles, and a cup.", "boxes_value": [[34.20697021139999, 76.1749267456, 169.7399902068, 379.502624512], [34.20697021139999, 177.0427856384, 153.6219482685, 379.502624512], [48.3557129223, 68.73431398400001, 61.64251707790001, 113.3778686464], [57.9221801521, 76.1749267456, 72.27191159520001, 112.84637450240001], [39.93688963630001, 330.265686016, 61.145385760999986, 358.0264282112], [141.42047120499998, 335.391235328, 169.7399902068, 371.4694824448]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047473.jpg", "text": "For the image , can you assess and describe what's happening at ? Give coordinates for the items you reference.", "boxes_value": [[204.05548094990002, 387.3634643456, 472.8336181955, 467.1447143424]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047473_crop.jpg", "text": "For the image , can you assess and describe what's happening at ? Give coordinates for the items you reference.", "boxes_value": [[68.05548094990002, 20.363464345599994, 336.8336181955, 100.1447143424]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047473.jpg", "text": "For the image , can you assess and describe what's happening at ? Give coordinates for the items you reference. For your reference, objects involved in this region include a glasses, a hat, and three cups.", "boxes_value": [[204.05548094990002, 387.3634643456, 472.8336181955, 467.1447143424], [235.6281738248, 387.3634643456, 268.5797729409, 422.489074688], [204.05548094990002, 413.0604248064, 231.6144409305, 435.0728759808], [390.8352050461, 442.5452270592, 412.7014160327, 467.1447143424], [425.0012207131, 426.1455688704, 447.32299802709997, 454.8449707008], [448.6896972825, 414.7568969728, 472.8336181955, 436.6231078912]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047473_crop.jpg", "text": "For the image , can you assess and describe what's happening at ? Give coordinates for the items you reference. For your reference, objects involved in this region include a glasses, a hat, and three cups.", "boxes_value": [[68.05548094990002, 20.363464345599994, 336.8336181955, 100.1447143424], [99.6281738248, 20.363464345599994, 132.5797729409, 55.48907468800002], [68.05548094990002, 46.06042480640002, 95.6144409305, 68.0728759808], [254.8352050461, 75.54522705919999, 276.7014160327, 100.1447143424], [289.0012207131, 59.145568870399984, 311.32299802709997, 87.84497070079999], [312.6896972825, 47.75689697280001, 336.8336181955, 69.6231078912]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047475.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give coordinates for the items you reference.", "boxes_value": [[142.5814819584, 140.6926879744, 494.9462401536, 235.4125366272]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047475_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give coordinates for the items you reference.", "boxes_value": [[88.58148195839999, 23.692687974400002, 440.9462401536, 118.41253662720001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047475.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give coordinates for the items you reference. For your reference, objects involved in this region include two paddles, three people, and two helmets.", "boxes_value": [[142.5814819584, 140.6926879744, 494.9462401536, 235.4125366272], [142.5814819584, 163.2086792192, 333.7670898432, 191.1444702208], [445.07373043199993, 175.4305419776, 504.43737792, 233.4846191616], [312.09985351679995, 162.0810546688, 407.66186526719997, 286.4826660352], [241.82391359999997, 140.6926879744, 348.7656250368, 235.4125366272], [394.2128906496, 173.695983872, 422.82763668480004, 278.7119751168], [468.56584358399994, 207.600421632, 494.9462401536, 224.5592480256], [307.860774528, 141.1110546944, 333.1644201984, 159.6850074112]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6, 7]]}, {"image_path": "objects365_v1_00047475_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give coordinates for the items you reference. For your reference, objects involved in this region include two paddles, three people, and two helmets.", "boxes_value": [[88.58148195839999, 23.692687974400002, 440.9462401536, 118.41253662720001], [88.58148195839999, 46.20867921920001, 279.7670898432, 74.1444702208], [391.07373043199993, 58.4305419776, 450.43737792, 116.4846191616], [258.09985351679995, 45.08105466879999, 353.66186526719997, 142], [187.82391359999997, 23.692687974400002, 294.7656250368, 118.41253662720001], [340.2128906496, 56.695983872, 368.82763668480004, 142], [414.56584358399994, 90.600421632, 440.9462401536, 107.5592480256], [253.86077452799998, 24.111054694399996, 279.1644201984, 42.68500741119999]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6, 7]]}, {"image_path": "objects365_v1_00047476.jpg", "text": "In the image , could you provide a description for the coordinates ? Include the coordinates for each object you identify.", "boxes_value": [[111.37194823680001, 247.3604736512, 676.9000243968001, 299.0841064448]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047476_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Include the coordinates for each object you identify.", "boxes_value": [[111.37194823680001, 13.36047365120001, 676.9000243968001, 65.0841064448]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047476.jpg", "text": "In the image , could you provide a description for the coordinates ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a soccer, and four sneakers.", "boxes_value": [[111.37194823680001, 247.3604736512, 676.9000243968001, 299.0841064448], [308.3596191744, 266.7946777088, 329.9683838208, 287.26611328], [111.37194823680001, 250.3209839104, 122.9703369216, 281.3745727488], [246.93200686080002, 247.3604736512, 265.6982421504, 274.1198119936], [573.02026368, 285.7502441472, 600.3079834368, 299.0841064448], [660.155273472, 260.3229370368, 676.9000243968001, 294.4327392768]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047476_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a soccer, and four sneakers.", "boxes_value": [[111.37194823680001, 13.36047365120001, 676.9000243968001, 65.0841064448], [308.3596191744, 32.79467770880001, 329.9683838208, 53.26611328000001], [111.37194823680001, 16.320983910400003, 122.9703369216, 47.374572748800006], [246.93200686080002, 13.36047365120001, 265.6982421504, 40.119811993600024], [573.02026368, 51.75024414720002, 600.3079834368, 65.0841064448], [660.155273472, 26.3229370368, 676.9000243968001, 60.43273927680002]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047478.jpg", "text": "Can you provide a description of the area in the image ? Please point out the objects and their coordinates.", "boxes_value": [[447.06591795869997, 124.6676635648, 779.4309081697, 389.4575195136]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047478_crop.jpg", "text": "Can you provide a description of the area in the image ? Please point out the objects and their coordinates.", "boxes_value": [[84.06591795869997, 66.6676635648, 416.43090816970005, 331.4575195136]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047478.jpg", "text": "Can you provide a description of the area in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, four cars, and a suv.", "boxes_value": [[447.06591795869997, 124.6676635648, 779.4309081697, 389.4575195136], [761.4965820036, 124.6676635648, 779.4309081697, 170.4998169088], [447.06591795869997, 166.5917968896, 500.970458964, 200.5652465664], [450.2366943105, 246.3161620992, 536.3027343609, 306.109374976], [661.3250732058, 213.2486572032, 728.3659668235, 258.0935668736], [620.1038818085, 296.1438598656, 760.9804687136001, 389.4575195136], [545.3623046924, 319.6987304448, 732.8957519493999, 422.9779662848]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 6], [5]]}, {"image_path": "objects365_v1_00047478_crop.jpg", "text": "Can you provide a description of the area in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, four cars, and a suv.", "boxes_value": [[84.06591795869997, 66.6676635648, 416.43090816970005, 331.4575195136], [398.4965820036, 66.6676635648, 416.43090816970005, 112.4998169088], [84.06591795869997, 108.5917968896, 137.970458964, 142.5652465664], [87.23669431050001, 188.3161620992, 173.3027343609, 248.10937497600003], [298.3250732058, 155.2486572032, 365.3659668235, 200.09356687360003], [257.1038818085, 238.1438598656, 397.9804687136001, 331.4575195136], [182.36230469240002, 261.6987304448, 369.89575194939994, 364.9779662848]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 6], [5]]}, {"image_path": "objects365_v1_00047480.jpg", "text": "In the photo , can you delve into the details of the region ? Please point out the objects and their coordinates.", "boxes_value": [[251.59344483840002, 152.5578002944, 507.5333251584, 302.1043701248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047480_crop.jpg", "text": "In the photo , can you delve into the details of the region ? Please point out the objects and their coordinates.", "boxes_value": [[64.59344483840002, 37.55780029440001, 320.5333251584, 187.10437012480003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047480.jpg", "text": "In the photo , can you delve into the details of the region ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, and four lanterns.", "boxes_value": [[251.59344483840002, 152.5578002944, 507.5333251584, 302.1043701248], [269.6872558848, 256.0480956928, 294.5131835904, 302.1043701248], [251.59344483840002, 152.5578002944, 294.7104492288, 215.4852905472], [324.6674804736, 153.8963622912, 365.1208495872, 217.053222656], [394.84167482879997, 155.547546368, 436.1207275008, 219.5299682816], [465.42871096319993, 156.7858886656, 507.5333251584, 220.768310528]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047480_crop.jpg", "text": "In the photo , can you delve into the details of the region ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, and four lanterns.", "boxes_value": [[64.59344483840002, 37.55780029440001, 320.5333251584, 187.10437012480003], [82.68725588479998, 141.04809569280002, 107.51318359039999, 187.10437012480003], [64.59344483840002, 37.55780029440001, 107.71044922879997, 100.48529054720001], [137.6674804736, 38.896362291200006, 178.12084958719998, 102.053222656], [207.84167482879997, 40.54754636800001, 249.1207275008, 104.52996828159999], [278.42871096319993, 41.7858886656, 320.5333251584, 105.768310528]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047483.jpg", "text": "Can you generate a description for the selected region in the image ? Please mention the objects and their locations.", "boxes_value": [[93.93743896859999, 30.9232787968, 717.7457275518, 510.8806762496]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047483_crop.jpg", "text": "Can you generate a description for the selected region in the image ? Please mention the objects and their locations.", "boxes_value": [[93.93743896859999, 30.9232787968, 717.7457275518, 510.8806762496]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047483.jpg", "text": "Can you generate a description for the selected region in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include five cabinets, and a person.", "boxes_value": [[93.93743896859999, 30.9232787968, 717.7457275518, 510.8806762496], [344.8242187678, 36.377380352, 573.8947754228, 269.5384521728], [573.2130126674, 30.9232787968, 717.7457275518, 283.1736450048], [93.93743896859999, 32.286804224, 342.77893064520003, 242.2531127808], [273.2396850392, 266.1296996864, 717.7457275518, 510.8806762496], [0, 259.3121337856, 271.876159672, 506.1083984384], [401.4010009702, 120.4140014592, 521.7329101696, 376.8262939648]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047483_crop.jpg", "text": "Can you generate a description for the selected region in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include five cabinets, and a person.", "boxes_value": [[93.93743896859999, 30.9232787968, 717.7457275518, 510.8806762496], [344.8242187678, 36.377380352, 573.8947754228, 269.5384521728], [573.2130126674, 30.9232787968, 717.7457275518, 283.1736450048], [93.93743896859999, 32.286804224, 342.77893064520003, 242.2531127808], [273.2396850392, 266.1296996864, 717.7457275518, 510.8806762496], [0, 259.3121337856, 271.876159672, 506.1083984384], [401.4010009702, 120.4140014592, 521.7329101696, 376.8262939648]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047484.jpg", "text": "Explain the content within the rectangular region of the image . Provide the coordinates for each element you describe.", "boxes_value": [[317.8477173081, 195.6779174912, 436.5185547078, 291.1580200448]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047484_crop.jpg", "text": "Explain the content within the rectangular region of the image . Provide the coordinates for each element you describe.", "boxes_value": [[29.847717308100016, 24.67791749119999, 148.51855470779998, 120.15802004480003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047484.jpg", "text": "Explain the content within the rectangular region of the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a flower, a vase, a person, a telephone, and a moniter.", "boxes_value": [[317.8477173081, 195.6779174912, 436.5185547078, 291.1580200448], [313.3978271706, 237.1936035328, 342.0726318444, 262.6567993344], [317.8477173081, 262.731689472, 333.69018554819996, 287.1696166912], [353.19738768409997, 195.6779174912, 436.5185547078, 291.1580200448], [382.35534669450004, 215.155578624, 395.2355957148, 244.8361206272], [294.1218262036, 204.806213376, 353.53588869939995, 269.1260376064]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047484_crop.jpg", "text": "Explain the content within the rectangular region of the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a flower, a vase, a person, a telephone, and a moniter.", "boxes_value": [[29.847717308100016, 24.67791749119999, 148.51855470779998, 120.15802004480003], [25.39782717060001, 66.19360353280001, 54.07263184440001, 91.65679933439998], [29.847717308100016, 91.73168947200003, 45.69018554819996, 116.16961669120002], [65.19738768409997, 24.67791749119999, 148.51855470779998, 120.15802004480003], [94.35534669450004, 44.155578623999986, 107.23559571480001, 73.83612062719999], [6.121826203599994, 33.80621337599999, 65.53588869939995, 98.12603760640002]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047485.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each object you identify.", "boxes_value": [[382.6053466596, 297.6132812288, 512.0671386897, 436.2994384896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047485_crop.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each object you identify.", "boxes_value": [[32.6053466596, 35.61328122880002, 162.06713868969996, 174.2994384896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047485.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a chair, a carpet, a cabinet, two barrels, and a cup.", "boxes_value": [[382.6053466596, 297.6132812288, 512.0671386897, 436.2994384896], [401.48681640030003, 297.6132812288, 512.0671386897, 436.2994384896], [174.84344482050003, 364.5595092992, 513.2873535129, 509.99517824], [357.96356202690004, 288.8730468864, 471.1608886653, 366.8569335808], [381.2351073975, 317.8972168192, 401.1862182534, 338.0386962944], [382.6053466596, 336.4431762944, 400.3779907035, 353.3794555904], [381.5173950354, 318.9139404288, 399.2526855486, 338.4227905024]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00047485_crop.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a chair, a carpet, a cabinet, two barrels, and a cup.", "boxes_value": [[32.6053466596, 35.61328122880002, 162.06713868969996, 174.2994384896], [51.48681640030003, 35.61328122880002, 162.06713868969996, 174.2994384896], [0, 102.55950929919999, 163, 208], [7.963562026900036, 26.873046886400004, 121.16088866529998, 104.85693358079999], [31.235107397499974, 55.897216819200025, 51.1862182534, 76.03869629439998], [32.6053466596, 74.44317629440002, 50.37799070350002, 91.37945559040003], [31.5173950354, 56.913940428800004, 49.25268554860003, 76.4227905024]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00047486.jpg", "text": "Explain the content within the rectangular region of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[304.792663552, 206.3815308, 511.6148071424, 492.93005367999996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047486_crop.jpg", "text": "Explain the content within the rectangular region of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[51.79266355200002, 72.38153080000001, 258.6148071424, 358.93005367999996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047486.jpg", "text": "Explain the content within the rectangular region of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two cabinets, three storage boxes, and a trash bin can.", "boxes_value": [[304.792663552, 206.3815308, 511.6148071424, 492.93005367999996], [298.0455932416, 150.33831784, 371.4017944576, 487.2333984], [373.2815552, 267.59252928, 511.4551391744, 375.49206544], [304.792663552, 388.00427248, 387.840332032, 492.93005367999996], [460.2746582016, 206.3815308, 511.6148071424, 241.16027832000003], [451.7573852672, 235.95532224000002, 511.1416626176, 274.28295896], [387.4711913984, 304.67199704, 465.3258056704, 400.086792]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047486_crop.jpg", "text": "Explain the content within the rectangular region of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two cabinets, three storage boxes, and a trash bin can.", "boxes_value": [[51.79266355200002, 72.38153080000001, 258.6148071424, 358.93005367999996], [45.04559324159999, 16.338317840000002, 118.40179445759998, 353.2333984], [120.28155520000001, 133.59252928, 258.4551391744, 241.49206543999998], [51.79266355200002, 254.00427248, 134.840332032, 358.93005367999996], [207.2746582016, 72.38153080000001, 258.6148071424, 107.16027832000003], [198.7573852672, 101.95532224000002, 258.1416626176, 140.28295895999997], [134.47119139839998, 170.67199704, 212.32580567039997, 266.086792]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047488.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Give coordinates for the items you reference.", "boxes_value": [[506.9421386565, 225.1394653184, 770.0042724666, 351.1053466624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047488_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Give coordinates for the items you reference.", "boxes_value": [[65.94213865649999, 32.1394653184, 329.0042724666, 158.1053466624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047488.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Give coordinates for the items you reference. For your reference, objects involved in this region include two storage boxes, a desk, a handbag, and a trolley.", "boxes_value": [[506.9421386565, 225.1394653184, 770.0042724666, 351.1053466624], [593.5404053085, 268.3209228288, 659.0985107136, 316.6938476544], [666.5284424193001, 282.9341430784, 770.0042724666, 351.1053466624], [652.5069580086, 263.662597632, 770.7772217088001, 357.969665536], [506.9421386565, 312.1815185408, 530.2066650495, 348.4467163136], [541.3001708706, 225.1394653184, 573.6235351641, 254.920532224]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5]]}, {"image_path": "objects365_v1_00047488_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Give coordinates for the items you reference. For your reference, objects involved in this region include two storage boxes, a desk, a handbag, and a trolley.", "boxes_value": [[65.94213865649999, 32.1394653184, 329.0042724666, 158.1053466624], [152.5404053085, 75.32092282880001, 218.09851071360004, 123.69384765439997], [225.52844241930006, 89.93414307839998, 329.0042724666, 158.1053466624], [211.5069580086, 70.66259763199997, 329.7772217088001, 164.96966553599998], [65.94213865649999, 119.18151854080003, 89.20666504949997, 155.4467163136], [100.30017087060003, 32.1394653184, 132.62353516409996, 61.920532224]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5]]}, {"image_path": "objects365_v1_00047489.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Please point out the objects and their coordinates.", "boxes_value": [[448.06774900799996, 470.088073728, 634.6383056640001, 501.9326171648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047489_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Please point out the objects and their coordinates.", "boxes_value": [[47.067749007999964, 8.088073727999983, 233.6383056640001, 39.93261716479998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047489.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Please point out the objects and their coordinates. For your reference, objects involved in this region include five rickshaws.", "boxes_value": [[448.06774900799996, 470.088073728, 634.6383056640001, 501.9326171648], [581.244995144, 471.911926272, 600.034423824, 490.701232896], [448.06774900799996, 472.118408192, 476.35510251999995, 499.5797118976], [478.41979979200005, 472.118408192, 500.925781224, 496.4826050048], [554.387573256, 472.2688598528, 578.522460912, 491.7977905152], [602.007446304, 470.088073728, 634.6383056640001, 501.9326171648]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047489_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Please point out the objects and their coordinates. For your reference, objects involved in this region include five rickshaws.", "boxes_value": [[47.067749007999964, 8.088073727999983, 233.6383056640001, 39.93261716479998], [180.24499514399997, 9.911926272000017, 199.034423824, 28.701232896000022], [47.067749007999964, 10.118408192000004, 75.35510251999995, 37.57971189760002], [77.41979979200005, 10.118408192000004, 99.92578122399999, 34.48260500480001], [153.387573256, 10.268859852800006, 177.52246091200004, 29.797790515200006], [201.00744630400004, 8.088073727999983, 233.6383056640001, 39.93261716479998]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047490.jpg", "text": "Please, can you help me understand what's inside the region in image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[234.5597958144, 498.2741550509, 337.7145302528, 550.7749022698]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047490_crop.jpg", "text": "Please, can you help me understand what's inside the region in image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[26.55979581439999, 13.274155050899992, 129.7145302528, 65.77490226980001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047490.jpg", "text": "Please, can you help me understand what's inside the region in image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, two leather shoes, and a high heels.", "boxes_value": [[234.5597958144, 498.2741550509, 337.7145302528, 550.7749022698], [70.4216308736, 214.755187995, 365.0317382656, 552.7163085968], [177.4721679872, 192.2994384552, 366.8011474432, 534.1373290734], [318.9831078912, 505.925017713, 337.7145302528, 532.3073027665], [234.5597958144, 498.2741550509, 278.0905660928, 515.1588174988], [246.9594697728, 515.4226403186, 282.57555456, 550.7749022698]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047490_crop.jpg", "text": "Please, can you help me understand what's inside the region in image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, two leather shoes, and a high heels.", "boxes_value": [[26.55979581439999, 13.274155050899992, 129.7145302528, 65.77490226980001], [0, 0, 155, 67.71630859679999], [0, 0, 155, 49.1373290734], [110.9831078912, 20.92501771299999, 129.7145302528, 47.307302766500015], [26.55979581439999, 13.274155050899992, 70.09056609279997, 30.158817498799976], [38.95946977279999, 30.422640318599974, 74.57555456, 65.77490226980001]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047492.jpg", "text": "Please describe the content within the area displayed in the image . Give coordinates for the items you reference.", "boxes_value": [[323.0654906938, 46.7934570496, 557.5584716672, 300.8986206208]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047492_crop.jpg", "text": "Please describe the content within the area displayed in the image . Give coordinates for the items you reference.", "boxes_value": [[59.06549069379997, 46.7934570496, 293.5584716672, 300.8986206208]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047492.jpg", "text": "Please describe the content within the area displayed in the image . Give coordinates for the items you reference. For your reference, objects involved in this region include a chair, a person, a glasses, a tie, and two microphones.", "boxes_value": [[323.0654906938, 46.7934570496, 557.5584716672, 300.8986206208], [510.5870361321, 163.221191424, 590.784301754, 253.4430541824], [323.0654906938, 46.7934570496, 557.5584716672, 300.8986206208], [420.3483276426, 100.7029419008, 476.80651853, 114.6777344], [418.3445434615, 178.0225219584, 465.9829101449, 270.3676758016], [315.6356811424, 49.9327392768, 335.6120605196, 174.7849731584], [524.5906982206, 171.0399170048, 538.1818847492, 251.844421376]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047492_crop.jpg", "text": "Please describe the content within the area displayed in the image . Give coordinates for the items you reference. For your reference, objects involved in this region include a chair, a person, a glasses, a tie, and two microphones.", "boxes_value": [[59.06549069379997, 46.7934570496, 293.5584716672, 300.8986206208], [246.58703613210002, 163.221191424, 326.784301754, 253.4430541824], [59.06549069379997, 46.7934570496, 293.5584716672, 300.8986206208], [156.3483276426, 100.7029419008, 212.80651853, 114.6777344], [154.3445434615, 178.0225219584, 201.98291014490002, 270.3676758016], [51.63568114240002, 49.9327392768, 71.61206051959999, 174.7849731584], [260.59069822059996, 171.0399170048, 274.1818847492, 251.844421376]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047495.jpg", "text": "What can you share about the area in the presented image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[189.8031006116, 171.7616577024, 638.1347656028, 423.7824096768]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047495_crop.jpg", "text": "What can you share about the area in the presented image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[112.8031006116, 63.76165770239999, 561.1347656028, 315.7824096768]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047495.jpg", "text": "What can you share about the area in the presented image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a person, a helmet, and three hockey sticks.", "boxes_value": [[189.8031006116, 171.7616577024, 638.1347656028, 423.7824096768], [260.1036376896, 171.7616577024, 638.1347656028, 423.7824096768], [370.7222727684, 170.8550432256, 421.59002820160003, 239.67612416], [189.8031006116, 301.7512817152, 405.34716795279996, 383.9896240128], [57.823852506, 273.233154304, 362.9016113096, 356.797912576], [295.4481811804, 358.6212768768, 344.3132324232, 423.7746582016]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047495_crop.jpg", "text": "What can you share about the area in the presented image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a person, a helmet, and three hockey sticks.", "boxes_value": [[112.8031006116, 63.76165770239999, 561.1347656028, 315.7824096768], [183.10363768960002, 63.76165770239999, 561.1347656028, 315.7824096768], [293.7222727684, 62.855043225600014, 344.59002820160003, 131.67612416], [112.8031006116, 193.75128171519998, 328.34716795279996, 275.9896240128], [0, 165.23315430399998, 285.9016113096, 248.797912576], [218.4481811804, 250.62127687679998, 267.3132324232, 315.7746582016]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047496.jpg", "text": "I'd like some information about the specific region in the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[36.5738525481, 207.6306152448, 122.8775024526, 306.9121703936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047496_crop.jpg", "text": "I'd like some information about the specific region in the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[22.5738525481, 25.630615244799998, 108.8775024526, 124.91217039359998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047496.jpg", "text": "I'd like some information about the specific region in the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a picture, a lamp, and three pigs.", "boxes_value": [[36.5738525481, 207.6306152448, 122.8775024526, 306.9121703936], [36.5738525481, 207.6306152448, 122.8775024526, 306.9121703936], [0.10894775429999999, 229.7542114304, 186.57482912039998, 280.2292480512], [48.2362060761, 235.8986816512, 82.4281005555, 289.3597412352], [79.965087867, 233.1459350528, 95.3225097279, 279.3629760512], [94.0185546993, 234.884521472, 112.56329341950001, 280.0874023424]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047496_crop.jpg", "text": "I'd like some information about the specific region in the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a picture, a lamp, and three pigs.", "boxes_value": [[22.5738525481, 25.630615244799998, 108.8775024526, 124.91217039359998], [22.5738525481, 25.630615244799998, 108.8775024526, 124.91217039359998], [0, 47.754211430400005, 130, 98.22924805119999], [34.2362060761, 53.89868165120001, 68.4281005555, 107.35974123519998], [65.965087867, 51.14593505280001, 81.3225097279, 97.36297605120001], [80.0185546993, 52.88452147199999, 98.56329341950001, 98.08740234240003]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047497.jpg", "text": "Please provide details for the area within the bounding box in . Please mention the objects and their locations.", "boxes_value": [[40.397399890399996, 217.1095059456, 163.91674803979998, 319.7153320448]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047497_crop.jpg", "text": "Please provide details for the area within the bounding box in . Please mention the objects and their locations.", "boxes_value": [[31.397399890399996, 26.109505945600006, 154.91674803979998, 128.7153320448]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047497.jpg", "text": "Please provide details for the area within the bounding box in . Please mention the objects and their locations. For your reference, objects involved in this region include a desk, two people, a tie, and a cup.", "boxes_value": [[40.397399890399996, 217.1095059456, 163.91674803979998, 319.7153320448], [0.5747070084, 249.0869750784, 915.4483642906, 364.1032714752], [35.656616170700005, 152.2030639616, 164.7648926189, 479.9395141632], [40.397399890399996, 274.3408813568, 163.91674803979998, 319.7153320448], [96.64097178889999, 217.1095059456, 108.0912735798, 246.2805128192], [65.00433350530001, 256.8206176768, 83.8528442094, 286.921997056]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047497_crop.jpg", "text": "Please provide details for the area within the bounding box in . Please mention the objects and their locations. For your reference, objects involved in this region include a desk, two people, a tie, and a cup.", "boxes_value": [[31.397399890399996, 26.109505945600006, 154.91674803979998, 128.7153320448], [0, 58.0869750784, 185, 154], [26.656616170700005, 0, 155.7648926189, 154], [31.397399890399996, 83.34088135680003, 154.91674803979998, 128.7153320448], [87.64097178889999, 26.109505945600006, 99.0912735798, 55.2805128192], [56.00433350530001, 65.8206176768, 74.8528442094, 95.92199705600001]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047498.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[335.205932655, 65.786804224, 754.899780294, 304.1660156416]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047498_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[105.20593265500003, 59.786804223999994, 524.899780294, 298.1660156416]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047498.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, a hat, two backpacks, and a bottle.", "boxes_value": [[335.205932655, 65.786804224, 754.899780294, 304.1660156416], [320.2934570055, 139.9039306752, 451.1870117172, 420.0731201024], [638.7663574083, 65.786804224, 754.899780294, 304.1660156416], [403.6093749666, 141.0524292096, 450.03161622539994, 168.0072631808], [335.205932655, 179.0334472704, 424.73474120550003, 274.0173339648], [636.3443603253, 107.9168090624, 744.2783203284, 208.6935425024], [666.7487792823, 149.5762329088, 680.5371093675001, 176.201965312]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00047498_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, a hat, two backpacks, and a bottle.", "boxes_value": [[105.20593265500003, 59.786804223999994, 524.899780294, 298.1660156416], [90.29345700549999, 133.9039306752, 221.1870117172, 357], [408.7663574083, 59.786804223999994, 524.899780294, 298.1660156416], [173.6093749666, 135.0524292096, 220.03161622539994, 162.0072631808], [105.20593265500003, 173.0334472704, 194.73474120550003, 268.0173339648], [406.34436032530004, 101.9168090624, 514.2783203284, 202.6935425024], [436.7487792823, 143.5762329088, 450.5371093675001, 170.201965312]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00047501.jpg", "text": "Describe the bbox in the provided photo . Include the coordinates for each mentioned object.", "boxes_value": [[0.5924682752, 331.3106078793, 189.5234985472, 468.25781250020003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047501_crop.jpg", "text": "Describe the bbox in the provided photo . Include the coordinates for each mentioned object.", "boxes_value": [[0.5924682752, 34.310607879299994, 189.5234985472, 171.25781250020003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047501.jpg", "text": "Describe the bbox in the provided photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a cymbal, two drums, two people, a tripod, and a speaker.", "boxes_value": [[0.5924682752, 331.3106078793, 189.5234985472, 468.25781250020003], [13.4904174592, 362.81445312479997, 92.3112182784, 379.5340576264], [0.5924682752, 388.6103515335, 47.4072265728, 451.1893310522], [49.3180541952, 392.43200686489996, 126.2280273408, 458.35485842209994], [85.4267578368, 76.5714111228, 284.1520385536, 637.2169189515], [5.8513794048, 331.3106078793, 60.2111816192, 448.99670407499997], [157.2583618048, 420.7299804951, 192.6895752192, 476.060913062], [159.167541504, 402.4365234528, 189.5234985472, 468.25781250020003]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6], [7]]}, {"image_path": "objects365_v1_00047501_crop.jpg", "text": "Describe the bbox in the provided photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a cymbal, two drums, two people, a tripod, and a speaker.", "boxes_value": [[0.5924682752, 34.310607879299994, 189.5234985472, 171.25781250020003], [13.4904174592, 65.81445312479997, 92.3112182784, 82.5340576264], [0.5924682752, 91.61035153350002, 47.4072265728, 154.18933105219998], [49.3180541952, 95.43200686489996, 126.2280273408, 161.35485842209994], [85.4267578368, 0, 236, 205], [5.8513794048, 34.310607879299994, 60.2111816192, 151.99670407499997], [157.2583618048, 123.7299804951, 192.6895752192, 179.060913062], [159.167541504, 105.43652345279997, 189.5234985472, 171.25781250020003]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6], [7]]}, {"image_path": "objects365_v1_00047502.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Include the coordinates for each mentioned object.", "boxes_value": [[35.1313476637, 117.2734374912, 468.70837406059997, 390.3809204224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047502_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Include the coordinates for each mentioned object.", "boxes_value": [[35.1313476637, 69.2734374912, 468.70837406059997, 342.3809204224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047502.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a hat, two glasses, a handbag, and a bowl.", "boxes_value": [[35.1313476637, 117.2734374912, 468.70837406059997, 390.3809204224], [35.1313476637, 117.2734374912, 177.4102172708, 201.946777344], [112.86419681090001, 186.6777954304, 160.7531738065, 207.4990844928], [239.18005368200002, 229.0144653312, 269.7180175436, 247.0595703296], [396.55078121919996, 309.7784423936, 418.98681638880004, 355.1603393536], [450.18359377779996, 373.9832153088, 468.70837406059997, 390.3809204224]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047502_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a hat, two glasses, a handbag, and a bowl.", "boxes_value": [[35.1313476637, 69.2734374912, 468.70837406059997, 342.3809204224], [35.1313476637, 69.2734374912, 177.4102172708, 153.946777344], [112.86419681090001, 138.6777954304, 160.7531738065, 159.4990844928], [239.18005368200002, 181.0144653312, 269.7180175436, 199.0595703296], [396.55078121919996, 261.7784423936, 418.98681638880004, 307.1603393536], [450.18359377779996, 325.9832153088, 468.70837406059997, 342.3809204224]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047503.jpg", "text": "Please interpret and describe the area inside the given picture . Please mention the objects and their locations.", "boxes_value": [[390.866882304, 404.2023315456, 498.8870849536, 457.4822998016]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047503_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Please mention the objects and their locations.", "boxes_value": [[27.866882304, 14.202331545599975, 135.88708495359998, 67.48229980159999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047503.jpg", "text": "Please interpret and describe the area inside the given picture . Please mention the objects and their locations. For your reference, objects involved in this region include two people, a bicycle, a motorcycle, and a van.", "boxes_value": [[390.866882304, 404.2023315456, 498.8870849536, 457.4822998016], [360.5308837888, 416.961059584, 453.7623901184, 512.0423583744], [434.2569580032, 409.1936645632, 453.84484864, 449.0567016448], [432.2130126848, 427.0059204096, 454.4841918976, 457.4822998016], [481.806945792, 430.78753664, 498.8870849536, 457.2450561536], [390.866882304, 404.2023315456, 413.9606933504, 419.0709838848]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047503_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Please mention the objects and their locations. For your reference, objects involved in this region include two people, a bicycle, a motorcycle, and a van.", "boxes_value": [[27.866882304, 14.202331545599975, 135.88708495359998, 67.48229980159999], [0, 26.961059583999997, 90.76239011839999, 80], [71.2569580032, 19.193664563200002, 90.84484864000001, 59.0567016448], [69.21301268479999, 37.00592040959998, 91.4841918976, 67.48229980159999], [118.80694579200002, 40.787536639999985, 135.88708495359998, 67.24505615359999], [27.866882304, 14.202331545599975, 50.96069335039999, 29.0709838848]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047504.jpg", "text": "In the image , elaborate on the details found within the section . Please point out the objects and their coordinates.", "boxes_value": [[150.2833862615, 359.1530151424, 390.5728759473, 432.1516723712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047504_crop.jpg", "text": "In the image , elaborate on the details found within the section . Please point out the objects and their coordinates.", "boxes_value": [[60.2833862615, 19.153015142400022, 300.5728759473, 92.1516723712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047504.jpg", "text": "In the image , elaborate on the details found within the section . Please point out the objects and their coordinates. For your reference, objects involved in this region include three people, a luggage, three handbags, and a trash bin can.", "boxes_value": [[150.2833862615, 359.1530151424, 390.5728759473, 432.1516723712], [277.14044187179996, 314.6701049856, 342.12597653489996, 495.7262573056], [316.241149887, 315.9311523328, 333.2514038092, 432.6107788288], [344.14855959749997, 303.705017088, 384.0163574116, 421.9793701376], [150.2833862615, 359.1530151424, 164.9011230798, 396.323852544], [178.4747314343, 392.5820312576, 199.95385743, 411.9132690432], [269.6076049564, 365.8865356288, 302.440002473, 409.4584961024], [329.4423217565, 375.7055664128, 350.3077392633, 407.3106079232], [361.51708984379997, 368.9124145664, 390.5728759473, 432.1516723712]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5, 6, 7], [8]]}, {"image_path": "objects365_v1_00047504_crop.jpg", "text": "In the image , elaborate on the details found within the section . Please point out the objects and their coordinates. For your reference, objects involved in this region include three people, a luggage, three handbags, and a trash bin can.", "boxes_value": [[60.2833862615, 19.153015142400022, 300.5728759473, 92.1516723712], [187.14044187179996, 0, 252.12597653489996, 110], [226.241149887, 0, 243.25140380919999, 92.6107788288], [254.14855959749997, 0, 294.0163574116, 81.97937013759997], [60.2833862615, 19.153015142400022, 74.90112307979999, 56.323852543999976], [88.4747314343, 52.58203125760002, 109.95385743, 71.91326904319999], [179.6076049564, 25.886535628800004, 212.440002473, 69.45849610239998], [239.4423217565, 35.70556641280001, 260.3077392633, 67.3106079232], [271.51708984379997, 28.912414566400003, 300.5728759473, 92.1516723712]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5, 6, 7], [8]]}, {"image_path": "objects365_v1_00047510.jpg", "text": "Analyze and describe the region in the included photo . Include the coordinates for each mentioned object.", "boxes_value": [[166.065673818, 186.5086669824, 334.3244629065, 423.715515136]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047510_crop.jpg", "text": "Analyze and describe the region in the included photo . Include the coordinates for each mentioned object.", "boxes_value": [[42.06567381799999, 59.5086669824, 210.3244629065, 296.715515136]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047510.jpg", "text": "Analyze and describe the region in the included photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include four people, and a sneakers.", "boxes_value": [[166.065673818, 186.5086669824, 334.3244629065, 423.715515136], [149.1439208721, 246.9124755968, 189.1142578347, 409.7113037312], [166.065673818, 245.161987328, 260.5939941231, 423.715515136], [203.7853393554, 186.5086669824, 334.3244629065, 399.6666259968], [267.0495605415, 305.6055908352, 321.4299926574, 384.3307494912], [150.6010131945, 390.3192138752, 177.6534423972, 410.4152832]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047510_crop.jpg", "text": "Analyze and describe the region in the included photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include four people, and a sneakers.", "boxes_value": [[42.06567381799999, 59.5086669824, 210.3244629065, 296.715515136], [25.14392087210001, 119.9124755968, 65.1142578347, 282.7113037312], [42.06567381799999, 118.16198732800001, 136.5939941231, 296.715515136], [79.7853393554, 59.5086669824, 210.3244629065, 272.6666259968], [143.0495605415, 178.60559083520002, 197.42999265740002, 257.3307494912], [26.601013194500013, 263.3192138752, 53.653442397199996, 283.4152832]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047511.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Provide the coordinates for all objects that you mention.", "boxes_value": [[83.1365356634, 213.8800658944, 395.7011718853, 302.0208130048]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047511_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Provide the coordinates for all objects that you mention.", "boxes_value": [[79.1365356634, 22.88006589439999, 391.7011718853, 111.0208130048]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047511.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a cabinet, a flower, a vase, a bowl, and a bottle.", "boxes_value": [[83.1365356634, 213.8800658944, 395.7011718853, 302.0208130048], [47.90643311150001, 37.2442016768, 315.3146362088, 312.8663330304], [59.8030395333, 200.1610717696, 137.2987060631, 257.291931136], [83.1365356634, 251.3153076224, 124.4188842593, 302.0208130048], [206.1196289337, 257.4317016576, 292.2411499144, 293.0681762816], [369.68750001940003, 213.8800658944, 395.7011718853, 262.1527710208]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047511_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a cabinet, a flower, a vase, a bowl, and a bottle.", "boxes_value": [[79.1365356634, 22.88006589439999, 391.7011718853, 111.0208130048], [43.90643311150001, 0, 311.3146362088, 121.86633303040003], [55.8030395333, 9.1610717696, 133.2987060631, 66.29193113600002], [79.1365356634, 60.31530762240001, 120.4188842593, 111.0208130048], [202.1196289337, 66.4317016576, 288.2411499144, 102.06817628160002], [365.68750001940003, 22.88006589439999, 391.7011718853, 71.1527710208]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047512.jpg", "text": "Analyze and describe the region in the included photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[561.5913085860001, 0.3622436352, 634.4307861016999, 285.1347656192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047512_crop.jpg", "text": "Analyze and describe the region in the included photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[18.591308586000082, 0.3622436352, 91.43078610169994, 285.1347656192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047512.jpg", "text": "Analyze and describe the region in the included photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, and four lanterns.", "boxes_value": [[561.5913085860001, 0.3622436352, 634.4307861016999, 285.1347656192], [349.3209228586, 103.3560180736, 610.2475586158, 323.3530883584], [567.6298828298001, 0.3622436352, 632.0672607102, 76.613159168], [571.3083496029, 90.9618530304, 634.4307861016999, 164.604553216], [561.5913085860001, 172.5378418176, 627.7120361449, 225.2427978752], [589.3812255891, 239.6168823296, 627.7120361449, 285.1347656192]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047512_crop.jpg", "text": "Analyze and describe the region in the included photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, and four lanterns.", "boxes_value": [[18.591308586000082, 0.3622436352, 91.43078610169994, 285.1347656192], [0, 103.3560180736, 67.24755861580002, 323.3530883584], [24.629882829800067, 0.3622436352, 89.0672607102, 76.613159168], [28.308349602899966, 90.9618530304, 91.43078610169994, 164.604553216], [18.591308586000082, 172.5378418176, 84.71203614490003, 225.2427978752], [46.38122558910004, 239.6168823296, 84.71203614490003, 285.1347656192]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047513.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[9.5852660957, 0.247314432, 311.7929687319, 110.4118042112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047513_crop.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[9.5852660957, 0.247314432, 311.7929687319, 110.4118042112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047513.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, and two flags.", "boxes_value": [[9.5852660957, 0.247314432, 311.7929687319, 110.4118042112], [9.5852660957, 65.3638916096, 58.422241221899995, 104.5177001984], [109.99578859159999, 86.4143066624, 131.0462036489, 110.4118042112], [40.2903442695, 0.3776245248, 83.4555053736, 62.6936035328], [125.5568237263, 0.247314432, 166.9849853174, 76.850402816], [286.1962890402, 66.640136704, 311.7929687319, 100.7689819136]], "boxes_seq": [[0], [0], [1, 2, 5], [3, 4]]}, {"image_path": "objects365_v1_00047513_crop.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, and two flags.", "boxes_value": [[9.5852660957, 0.247314432, 311.7929687319, 110.4118042112], [9.5852660957, 65.3638916096, 58.422241221899995, 104.5177001984], [109.99578859159999, 86.4143066624, 131.0462036489, 110.4118042112], [40.2903442695, 0.3776245248, 83.4555053736, 62.6936035328], [125.5568237263, 0.247314432, 166.9849853174, 76.850402816], [286.1962890402, 66.640136704, 311.7929687319, 100.7689819136]], "boxes_seq": [[0], [0], [1, 2, 5], [3, 4]]}, {"image_path": "objects365_v1_00047514.jpg", "text": "Tell me about the region of the image . Provide the coordinates for each element you describe.", "boxes_value": [[229.3519286806, 172.2501220864, 591.3022461004999, 511.8616943104]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047514_crop.jpg", "text": "Tell me about the region of the image . Provide the coordinates for each element you describe.", "boxes_value": [[91.3519286806, 85.25012208640001, 453.30224610049993, 424.8616943104]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047514.jpg", "text": "Tell me about the region of the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two chairs, a person, a hat, a handbag, a fork, and a cell phone.", "boxes_value": [[229.3519286806, 172.2501220864, 591.3022461004999, 511.8616943104], [315.8591308862, 268.5108032, 769.2089843823001, 511.8297119232], [195.7592773499, 388.7299194368, 332.4868163936, 512.6213379072], [229.3519286806, 172.2501220864, 591.3022461004999, 511.8616943104], [302.7864990238, 219.0421752832, 369.51037596329996, 272.2127685632], [527.4094237929, 436.3651733504, 634.2733154274, 511.7235107328], [221.58386230199997, 366.7708129792, 254.58630373440002, 461.0634765824], [201.441589371, 477.4604492288, 264.8144531186, 496.2376098816]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00047514_crop.jpg", "text": "Tell me about the region of the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two chairs, a person, a hat, a handbag, a fork, and a cell phone.", "boxes_value": [[91.3519286806, 85.25012208640001, 453.30224610049993, 424.8616943104], [177.85913088619998, 181.5108032, 543, 424.8297119232], [57.7592773499, 301.7299194368, 194.4868163936, 425], [91.3519286806, 85.25012208640001, 453.30224610049993, 424.8616943104], [164.7864990238, 132.0421752832, 231.51037596329996, 185.21276856319997], [389.40942379290004, 349.3651733504, 496.2733154274, 424.7235107328], [83.58386230199997, 279.7708129792, 116.58630373440002, 374.0634765824], [63.44158937099999, 390.4604492288, 126.8144531186, 409.2376098816]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00047515.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Include the coordinates for each object you identify.", "boxes_value": [[177.9916381579, 136.488769536, 680.3382568633, 339.2095947264]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047515_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Include the coordinates for each object you identify.", "boxes_value": [[125.99163815790001, 51.48876953600001, 628.3382568633, 254.20959472639998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047515.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Include the coordinates for each object you identify. For your reference, objects involved in this region include two chairs, and three cabinets.", "boxes_value": [[177.9916381579, 136.488769536, 680.3382568633, 339.2095947264], [651.3781738125999, 207.7752075264, 680.3382568633, 339.2095947264], [499.8945312294, 136.488769536, 656.9473877087, 332.5264892416], [239.2534179583, 134.2610473472, 543.3347167631999, 429.43145753600004], [177.9916381579, 179.9289550848, 265.98583982, 291.3140258816], [247.8460083285, 154.5717773312, 282.2162475523, 198.7620849664]], "boxes_seq": [[0], [0], [1, 4], [2, 3, 5]]}, {"image_path": "objects365_v1_00047515_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Include the coordinates for each object you identify. For your reference, objects involved in this region include two chairs, and three cabinets.", "boxes_value": [[125.99163815790001, 51.48876953600001, 628.3382568633, 254.20959472639998], [599.3781738125999, 122.77520752640001, 628.3382568633, 254.20959472639998], [447.8945312294, 51.48876953600001, 604.9473877087, 247.52648924160002], [187.2534179583, 49.26104734719999, 491.33471676319994, 304], [125.99163815790001, 94.92895508480001, 213.98583982000002, 206.3140258816], [195.8460083285, 69.5717773312, 230.2162475523, 113.76208496640001]], "boxes_seq": [[0], [0], [1, 4], [2, 3, 5]]}, {"image_path": "objects365_v1_00047516.jpg", "text": "What does the selected region in the image encompass? Include the coordinates for each mentioned object.", "boxes_value": [[79.56225585, 129.5914917205, 155.97961425, 262.1456908958]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047516_crop.jpg", "text": "What does the selected region in the image encompass? Include the coordinates for each mentioned object.", "boxes_value": [[19.56225585, 33.59149172049999, 95.97961425, 166.1456908958]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047516.jpg", "text": "What does the selected region in the image encompass? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a potted plant, three flowers, a picture, and a person.", "boxes_value": [[79.56225585, 129.5914917205, 155.97961425, 262.1456908958], [130.7182007, 221.7386474708, 173.27038575, 269.98095704459996], [93.69439695, 223.7639160358, 142.25476074999997, 262.1456908958], [74.18542479999999, 232.6701660135, 96.66314695, 253.8755492938], [79.56225585, 129.5914917205, 155.97961425, 229.1443481626], [51.0564575, 176.20318603779998, 122.04284669999998, 241.53887942080001], [98.4944458, 152.4857177634, 138.50463865, 215.5559692283]], "boxes_seq": [[0], [0], [1], [2, 3, 5], [4], [6]]}, {"image_path": "objects365_v1_00047516_crop.jpg", "text": "What does the selected region in the image encompass? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a potted plant, three flowers, a picture, and a person.", "boxes_value": [[19.56225585, 33.59149172049999, 95.97961425, 166.1456908958], [70.71820070000001, 125.7386474708, 113.27038575, 173.98095704459996], [33.69439695, 127.76391603580001, 82.25476074999997, 166.1456908958], [14.185424799999993, 136.6701660135, 36.66314695, 157.8755492938], [19.56225585, 33.59149172049999, 95.97961425, 133.1443481626], [0, 80.20318603779998, 62.042846699999984, 145.53887942080001], [38.494445799999994, 56.485717763400004, 78.50463865, 119.5559692283]], "boxes_seq": [[0], [0], [1], [2, 3, 5], [4], [6]]}, {"image_path": "objects365_v1_00047517.jpg", "text": "Describe the image content present in the specified rectangular area of . Specify the location of each mentioned object.", "boxes_value": [[131.27886960749998, 258.3124389888, 319.3232422185, 384.257080064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047517_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Specify the location of each mentioned object.", "boxes_value": [[47.278869607499985, 32.31243898880001, 235.3232422185, 158.25708006399998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047517.jpg", "text": "Describe the image content present in the specified rectangular area of . Specify the location of each mentioned object. For your reference, objects involved in this region include two flowers, a vase, a cabinet, and a plate.", "boxes_value": [[131.27886960749998, 258.3124389888, 319.3232422185, 384.257080064], [189.73510744950002, 258.3124389888, 226.9913329755, 343.023620608], [204.26806642949998, 305.0391235584, 220.8799438155, 345.5559081984], [289.2808837665, 328.2066039808, 319.3232422185, 341.5587158016], [192.5261230455, 359.7485961728, 232.2899169795, 384.257080064], [131.27886960749998, 339.398376448, 185.92333980749999, 354.9660034048]], "boxes_seq": [[0], [0], [1, 4], [2], [3], [5]]}, {"image_path": "objects365_v1_00047517_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Specify the location of each mentioned object. For your reference, objects involved in this region include two flowers, a vase, a cabinet, and a plate.", "boxes_value": [[47.278869607499985, 32.31243898880001, 235.3232422185, 158.25708006399998], [105.73510744950002, 32.31243898880001, 142.9913329755, 117.02362060799999], [120.26806642949998, 79.03912355839998, 136.8799438155, 119.5559081984], [205.28088376649998, 102.20660398080003, 235.3232422185, 115.55871580159999], [108.52612304549999, 133.74859617279998, 148.2899169795, 158.25708006399998], [47.278869607499985, 113.39837644800002, 101.92333980749999, 128.9660034048]], "boxes_seq": [[0], [0], [1, 4], [2], [3], [5]]}, {"image_path": "objects365_v1_00047518.jpg", "text": "What's the story in the section of the included visual ? Please point out the objects and their coordinates.", "boxes_value": [[104.86385177, 218.0319824384, 681.9644775411, 294.7258405888]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047518_crop.jpg", "text": "What's the story in the section of the included visual ? Please point out the objects and their coordinates.", "boxes_value": [[104.86385177, 20.031982438400007, 681.9644775411, 96.7258405888]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047518.jpg", "text": "What's the story in the section of the included visual ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, a necklace, two gloves, a bottle, and a bicycle.", "boxes_value": [[104.86385177, 218.0319824384, 681.9644775411, 294.7258405888], [319.0181274177, 224.0826416128, 351.5690917791, 275.6325683712], [631.6861572041, 218.0319824384, 681.9644775411, 268.9160766464], [104.86385177, 222.1074496, 131.8363969662, 255.3044283392], [111.91820975700001, 256.1343527936, 145.5301507579, 294.7258405888], [544.907836923, 250.9603881984, 586.3128661838, 346.7938842624], [315.28216550089996, 236.2403564544, 335.0725097946, 280.0189209088]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047518_crop.jpg", "text": "What's the story in the section of the included visual ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, a necklace, two gloves, a bottle, and a bicycle.", "boxes_value": [[104.86385177, 20.031982438400007, 681.9644775411, 96.7258405888], [319.0181274177, 26.08264161279999, 351.5690917791, 77.63256837120002], [631.6861572041, 20.031982438400007, 681.9644775411, 70.91607664639997], [104.86385177, 24.107449599999995, 131.8363969662, 57.3044283392], [111.91820975700001, 58.13435279359999, 145.5301507579, 96.7258405888], [544.907836923, 52.96038819840001, 586.3128661838, 115], [315.28216550089996, 38.2403564544, 335.0725097946, 82.0189209088]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047519.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Please point out the objects and their coordinates.", "boxes_value": [[168.4702148526, 121.9176635904, 312.2622070536, 307.0051879936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047519_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Please point out the objects and their coordinates.", "boxes_value": [[36.47021485260001, 46.9176635904, 180.26220705359998, 232.00518799359998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047519.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two lamps, a cabinet, and three people.", "boxes_value": [[168.4702148526, 121.9176635904, 312.2622070536, 307.0051879936], [168.4702148526, 138.97607424, 196.63214109479998, 170.5864258048], [222.1309814298, 121.9176635904, 258.131591769, 150.3581542912], [195.4501342602, 227.4301757952, 312.2622070536, 307.0051879936], [268.5387573378, 199.7824707072, 344.5214233488, 301.5639037952], [195.5023803582, 201.5927734272, 238.8633422682, 258.19567872], [172.91308596119998, 203.4102782976, 205.3688964876, 254.8203125248]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00047519_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two lamps, a cabinet, and three people.", "boxes_value": [[36.47021485260001, 46.9176635904, 180.26220705359998, 232.00518799359998], [36.47021485260001, 63.97607424, 64.63214109479998, 95.5864258048], [90.13098142979999, 46.9176635904, 126.13159176900001, 75.35815429120001], [63.4501342602, 152.4301757952, 180.26220705359998, 232.00518799359998], [136.5387573378, 124.78247070719999, 212.5214233488, 226.56390379520002], [63.502380358200014, 126.5927734272, 106.8633422682, 183.19567872], [40.91308596119998, 128.4102782976, 73.36889648760001, 179.8203125248]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00047520.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Provide the coordinates for each element you describe.", "boxes_value": [[490.40307616300004, 232.7976684544, 567.1943634280001, 278.3894882304]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047520_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Provide the coordinates for each element you describe.", "boxes_value": [[19.403076163000037, 11.797668454399997, 96.19436342800009, 57.38948823039999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047520.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a desk, two cups, and two plates.", "boxes_value": [[490.40307616300004, 232.7976684544, 567.1943634280001, 278.3894882304], [295.771606438, 176.3660278272, 678.555908184, 510.7523803648], [490.40307616300004, 236.7023315456, 514.612182623, 273.4063720448], [517.345459015, 232.7976684544, 541.554443353, 269.8921508864], [512.825707933, 246.7512311296, 567.1943634280001, 278.3894882304], [469.207916562, 256.916557056, 525.1124097449999, 288.554814208]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047520_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a desk, two cups, and two plates.", "boxes_value": [[19.403076163000037, 11.797668454399997, 96.19436342800009, 57.38948823039999], [0, 0, 115, 68], [19.403076163000037, 15.702331545600003, 43.61218262299997, 52.40637204479998], [46.34545901499996, 11.797668454399997, 70.55444335300001, 48.892150886399975], [41.82570793299999, 25.7512311296, 96.19436342800009, 57.38948823039999], [0, 35.91655705599999, 54.11240974499992, 67.55481420799998]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047521.jpg", "text": "Describe the bbox in the provided photo . Provide the coordinates for all objects that you mention.", "boxes_value": [[447.3610839552, 3.5855712768, 767.1816406272, 511.3276977664]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047521_crop.jpg", "text": "Describe the bbox in the provided photo . Provide the coordinates for all objects that you mention.", "boxes_value": [[80.3610839552, 3.5855712768, 400.18164062719995, 511.3276977664]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047521.jpg", "text": "Describe the bbox in the provided photo . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three people, a necklace, and a glasses.", "boxes_value": [[447.3610839552, 3.5855712768, 767.1816406272, 511.3276977664], [447.3610839552, 63.846740736, 767.1816406272, 511.3276977664], [609.1546630656001, 329.9786376704, 655.5260009472, 365.9060058624], [693.6883544832, 3.5855712768, 766.5361327872, 235.0692138496], [614.1721191168, 0.5211791872, 698.71777344, 99.8522338816], [501.56677248, 153.14801024, 642.27441408, 218.9963989504]], "boxes_seq": [[0], [0], [1, 3, 4], [2], [5]]}, {"image_path": "objects365_v1_00047521_crop.jpg", "text": "Describe the bbox in the provided photo . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three people, a necklace, and a glasses.", "boxes_value": [[80.3610839552, 3.5855712768, 400.18164062719995, 511.3276977664], [80.3610839552, 63.846740736, 400.18164062719995, 511.3276977664], [242.15466306560006, 329.9786376704, 288.52600094720003, 365.9060058624], [326.6883544832, 3.5855712768, 399.5361327872, 235.0692138496], [247.1721191168, 0.5211791872, 331.71777344, 99.8522338816], [134.56677248, 153.14801024, 275.27441408000004, 218.9963989504]], "boxes_seq": [[0], [0], [1, 3, 4], [2], [5]]}, {"image_path": "objects365_v1_00047524.jpg", "text": "Can you give me a description of the region in image ? Please mention the objects and their locations.", "boxes_value": [[179.62109376, 217.92840576, 294.331787136, 316.9264526336]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047524_crop.jpg", "text": "Can you give me a description of the region in image ? Please mention the objects and their locations.", "boxes_value": [[29.621093760000008, 24.928405760000004, 144.331787136, 123.92645263359998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047524.jpg", "text": "Can you give me a description of the region in image ? Please mention the objects and their locations. For your reference, objects involved in this region include a paddle, two people, and two boats.", "boxes_value": [[179.62109376, 217.92840576, 294.331787136, 316.9264526336], [227.94152832000003, 249.1530761728, 294.331787136, 316.9264526336], [179.62109376, 217.92840576, 229.38171386879998, 284.5253296128], [236.116210944, 222.0439453184, 271.2854003712, 283.777038592], [126.35925296639999, 244.6724242944, 296.9567870976, 306.2912597504], [147.1779174912, 279.6921997312, 295.218994176, 326.3447876096]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047524_crop.jpg", "text": "Can you give me a description of the region in image ? Please mention the objects and their locations. For your reference, objects involved in this region include a paddle, two people, and two boats.", "boxes_value": [[29.621093760000008, 24.928405760000004, 144.331787136, 123.92645263359998], [77.94152832000003, 56.15307617280001, 144.331787136, 123.92645263359998], [29.621093760000008, 24.928405760000004, 79.38171386879998, 91.52532961280002], [86.11621094399999, 29.043945318400006, 121.28540037120001, 90.777038592], [0, 51.6724242944, 146.9567870976, 113.2912597504], [0, 86.69219973119999, 145.21899417600002, 133.34478760960002]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047526.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Please point out the objects and their coordinates.", "boxes_value": [[143.61279295999998, 300.131958, 292.938354496, 449.45758056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047526_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Please point out the objects and their coordinates.", "boxes_value": [[37.61279295999998, 38.131958, 186.938354496, 187.45758056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047526.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a lamp, a nightstand, a pillow, and two beds.", "boxes_value": [[143.61279295999998, 300.131958, 292.938354496, 449.45758056], [181.205200192, 300.131958, 229.24005126400002, 393.068908704], [164.49743654399998, 361.74182131199996, 292.938354496, 449.45758056], [143.61279295999998, 318.928222656, 184.17474368, 420.619689936], [232.372741696, 265.67224123200003, 640.668579072, 479.740417488], [0, 316.126220688, 383.78680422400004, 478.696166976]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047526_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a lamp, a nightstand, a pillow, and two beds.", "boxes_value": [[37.61279295999998, 38.131958, 186.938354496, 187.45758056], [75.205200192, 38.131958, 123.24005126400002, 131.06890870400002], [58.49743654399998, 99.74182131199996, 186.938354496, 187.45758056], [37.61279295999998, 56.928222656, 78.17474368, 158.619689936], [126.37274169599999, 3.672241232000033, 224, 217.740417488], [0, 54.12622068799999, 224, 216.69616697599997]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047527.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each object you identify.", "boxes_value": [[171.0767211777, 138.7833862144, 602.8048095738001, 456.2101440512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047527_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each object you identify.", "boxes_value": [[108.0767211777, 79.7833862144, 539.8048095738001, 397.2101440512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047527.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a necklace, three people, and a suv.", "boxes_value": [[171.0767211777, 138.7833862144, 602.8048095738001, 456.2101440512], [171.0767211777, 154.9107055616, 202.34387209020002, 181.6520385536], [134.7211913743, 90.4793701376, 327.4510498044, 503.39654543359995], [243.3125610638, 138.6249389568, 284.6198120446, 206.470397952], [281.1583252205, 173.2399902208, 298.9273071092, 205.3165283328], [222.5338745169, 138.7833862144, 602.8048095738001, 456.2101440512]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047527_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a necklace, three people, and a suv.", "boxes_value": [[108.0767211777, 79.7833862144, 539.8048095738001, 397.2101440512], [108.0767211777, 95.9107055616, 139.34387209020002, 122.6520385536], [71.72119137429999, 31.4793701376, 264.4510498044, 444.39654543359995], [180.3125610638, 79.62493895680001, 221.6198120446, 147.470397952], [218.1583252205, 114.2399902208, 235.9273071092, 146.3165283328], [159.5338745169, 79.7833862144, 539.8048095738001, 397.2101440512]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047529.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Please mention the objects and their locations.", "boxes_value": [[122.5170287878, 228.6079711744, 249.8522338654, 453.2800903168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047529_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Please mention the objects and their locations.", "boxes_value": [[32.5170287878, 56.60797117440001, 159.8522338654, 281.2800903168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047529.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Please mention the objects and their locations. For your reference, objects involved in this region include a cabinet, two people, a handbag, and two sneakers.", "boxes_value": [[122.5170287878, 228.6079711744, 249.8522338654, 453.2800903168], [91.32269284280001, 199.9548950016, 192.3015746704, 279.0627441152], [173.03582764799998, 241.7836303872, 249.8522338654, 451.1229858304], [122.5170287878, 228.6079711744, 176.3686523792, 394.901855488], [174.8681029964, 339.6687622144, 219.95684810720002, 374.2014770688], [208.5173339738, 433.3707885568, 246.6768188252, 447.7497558528], [182.8012085036, 438.6246338048, 215.1537475278, 453.2800903168]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047529_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Please mention the objects and their locations. For your reference, objects involved in this region include a cabinet, two people, a handbag, and two sneakers.", "boxes_value": [[32.5170287878, 56.60797117440001, 159.8522338654, 281.2800903168], [1.3226928428000093, 27.954895001599994, 102.30157467039999, 107.06274411520002], [83.03582764799998, 69.78363038719999, 159.8522338654, 279.1229858304], [32.5170287878, 56.60797117440001, 86.3686523792, 222.90185548800002], [84.86810299640001, 167.66876221439998, 129.95684810720002, 202.2014770688], [118.51733397379999, 261.3707885568, 156.6768188252, 275.7497558528], [92.80120850360001, 266.6246338048, 125.1537475278, 281.2800903168]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047533.jpg", "text": "In the provided image , please explain the content within the region . Provide the coordinates for all objects that you mention.", "boxes_value": [[97.9765014528, 433.1453857135, 357.1376342528, 653.6358642690001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047533_crop.jpg", "text": "In the provided image , please explain the content within the region . Provide the coordinates for all objects that you mention.", "boxes_value": [[64.9765014528, 55.14538571349999, 324.1376342528, 275.6358642690001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047533.jpg", "text": "In the provided image , please explain the content within the region . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two cups, a plate, a wine glass, and a lemon.", "boxes_value": [[97.9765014528, 433.1453857135, 357.1376342528, 653.6358642690001], [274.3688964608, 480.635620133, 357.1376342528, 615.6436767929999], [169.2119140864, 560.012207012, 398.5219726336, 600.0396728764999], [183.4589843968, 469.10229495, 282.510070784, 653.6358642690001], [97.9765014528, 433.1453857135, 232.98455808, 629.8907470965], [245.4258422784, 466.0407714625, 310.7528076288, 501.970581026]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5]]}, {"image_path": "objects365_v1_00047533_crop.jpg", "text": "In the provided image , please explain the content within the region . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two cups, a plate, a wine glass, and a lemon.", "boxes_value": [[64.9765014528, 55.14538571349999, 324.1376342528, 275.6358642690001], [241.3688964608, 102.63562013299997, 324.1376342528, 237.64367679299994], [136.2119140864, 182.01220701199998, 365.5219726336, 222.03967287649994], [150.4589843968, 91.10229494999999, 249.510070784, 275.6358642690001], [64.9765014528, 55.14538571349999, 199.98455808, 251.89074709650004], [212.4258422784, 88.0407714625, 277.7528076288, 123.97058102599999]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5]]}, {"image_path": "objects365_v1_00047534.jpg", "text": "Describe the selected rectangular area in the photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0.505432128, 372.545471184, 163.583618176, 480.00762940799996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047534_crop.jpg", "text": "Describe the selected rectangular area in the photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0.505432128, 27.545471184000007, 163.583618176, 135]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047534.jpg", "text": "Describe the selected rectangular area in the photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a desk, a cabinet, a person, an electric drill, a brush, and a cup.", "boxes_value": [[0.505432128, 372.545471184, 163.583618176, 480.00762940799996], [1.046691904, 225.249389664, 334.019409152, 421.97479248], [0, 362.951049792, 170.34600831999998, 480.1368408], [48.772705087999995, 430.78863523200005, 65.776367168, 479.935241712], [97.90917971200001, 450.41223144, 163.583618176, 469.396240224], [35.567443839999996, 372.545471184, 96.37005612799999, 402.373168944], [0.505432128, 463.00286865600003, 38.086059584, 480.00762940799996]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047534_crop.jpg", "text": "Describe the selected rectangular area in the photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a desk, a cabinet, a person, an electric drill, a brush, and a cup.", "boxes_value": [[0.505432128, 27.545471184000007, 163.583618176, 135], [1.046691904, 0, 204, 76.97479248000002], [0, 17.951049791999992, 170.34600831999998, 135], [48.772705087999995, 85.78863523200005, 65.776367168, 134.935241712], [97.90917971200001, 105.41223144000003, 163.583618176, 124.396240224], [35.567443839999996, 27.545471184000007, 96.37005612799999, 57.373168943999985], [0.505432128, 118.00286865600003, 38.086059584, 135]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047535.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Specify the location of each mentioned object.", "boxes_value": [[351.9898681948, 247.1377563648, 481.9565429668, 346.481323264]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047535_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Specify the location of each mentioned object.", "boxes_value": [[32.98986819480001, 25.137756364799998, 162.9565429668, 124.48132326400003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047535.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Specify the location of each mentioned object. For your reference, objects involved in this region include four lamps, two cabinets, two vases, and a person.", "boxes_value": [[351.9898681948, 247.1377563648, 481.9565429668, 346.481323264], [351.9898681948, 250.3994750976, 367.5909424176, 278.7650756608], [364.6413574584, 247.1377563648, 418.8852538772, 308.8513794048], [453.3626708952, 274.9507446272, 486.885986312, 318.2263794176], [469.2460937252, 249.3030395392, 481.9565429668, 276.7849731584], [382.6984863152, 304.2285156352, 418.238159152, 346.481323264], [351.9898681948, 250.3994750976, 367.5909424176, 278.7650756608], [364.6413574584, 247.1377563648, 418.8852538772, 308.8513794048], [453.3626708952, 274.9507446272, 486.885986312, 318.2263794176], [469.2460937252, 249.3030395392, 481.9565429668, 276.7849731584]], "boxes_seq": [[0], [0], [1, 4, 6, 9], [2, 7], [3, 8], [5]]}, {"image_path": "objects365_v1_00047535_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Specify the location of each mentioned object. For your reference, objects involved in this region include four lamps, two cabinets, two vases, and a person.", "boxes_value": [[32.98986819480001, 25.137756364799998, 162.9565429668, 124.48132326400003], [32.98986819480001, 28.399475097600003, 48.59094241759999, 56.765075660799994], [45.64135745840002, 25.137756364799998, 99.8852538772, 86.85137940480001], [134.36267089519998, 52.95074462719998, 167.885986312, 96.22637941760001], [150.2460937252, 27.3030395392, 162.9565429668, 54.78497315840002], [63.6984863152, 82.22851563519998, 99.23815915199998, 124.48132326400003], [32.98986819480001, 28.399475097600003, 48.59094241759999, 56.765075660799994], [45.64135745840002, 25.137756364799998, 99.8852538772, 86.85137940480001], [134.36267089519998, 52.95074462719998, 167.885986312, 96.22637941760001], [150.2460937252, 27.3030395392, 162.9565429668, 54.78497315840002]], "boxes_seq": [[0], [0], [1, 4, 6, 9], [2, 7], [3, 8], [5]]}, {"image_path": "objects365_v1_00047537.jpg", "text": "Can you give me a visual rundown of the area in ? Provide the coordinates for each element you describe.", "boxes_value": [[7.692993153000001, 266.8209228288, 349.050048804, 383.2667846656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047537_crop.jpg", "text": "Can you give me a visual rundown of the area in ? Provide the coordinates for each element you describe.", "boxes_value": [[7.692993153000001, 29.820922828800008, 349.050048804, 146.26678466560003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047537.jpg", "text": "Can you give me a visual rundown of the area in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two chairs, a potted plant, two books, and a stuffed toy.", "boxes_value": [[7.692993153000001, 266.8209228288, 349.050048804, 383.2667846656], [168.3485717625, 285.4522704896, 240.5450439135, 383.2667846656], [226.571533182, 266.8209228288, 283.241821302, 351.4382934528], [296.811889623, 275.9300537344, 349.050048804, 382.0088500736], [7.692993153000001, 355.0478515712, 57.351379401, 374.1870117376], [294.114624012, 310.1299438592, 355.9945068225, 326.2844238336], [52.7677612605, 302.3432006656, 96.4200439215, 355.2490234368]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00047537_crop.jpg", "text": "Can you give me a visual rundown of the area in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two chairs, a potted plant, two books, and a stuffed toy.", "boxes_value": [[7.692993153000001, 29.820922828800008, 349.050048804, 146.26678466560003], [168.3485717625, 48.45227048959998, 240.5450439135, 146.26678466560003], [226.571533182, 29.820922828800008, 283.241821302, 114.4382934528], [296.811889623, 38.93005373440002, 349.050048804, 145.0088500736], [7.692993153000001, 118.0478515712, 57.351379401, 137.18701173760002], [294.114624012, 73.12994385920001, 355.9945068225, 89.28442383359999], [52.7677612605, 65.34320066560002, 96.4200439215, 118.24902343679997]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00047538.jpg", "text": "Tell me about the region of the image . Include the coordinates for each mentioned object.", "boxes_value": [[526.7390136524, 154.5891723776, 682.9771728423, 510.6504516608]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047538_crop.jpg", "text": "Tell me about the region of the image . Include the coordinates for each mentioned object.", "boxes_value": [[39.73901365239999, 89.5891723776, 195.97717284229998, 445.6504516608]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047538.jpg", "text": "Tell me about the region of the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a chair, a desk, two ties, and a wine glass.", "boxes_value": [[526.7390136524, 154.5891723776, 682.9771728423, 510.6504516608], [574.9606933366999, 297.0591430656, 658.2250976647, 367.0494994944], [598.4919433744, 355.5855712768, 682.9631347335, 510.6504516608], [526.7390136524, 154.5891723776, 548.3675536843, 246.4001464832], [602.6596679758001, 165.6241455104, 623.4053955381, 263.1733398528], [666.0225830103, 342.5872802816, 682.9771728423, 412.7902221824]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047538_crop.jpg", "text": "Tell me about the region of the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a chair, a desk, two ties, and a wine glass.", "boxes_value": [[39.73901365239999, 89.5891723776, 195.97717284229998, 445.6504516608], [87.96069333669993, 232.05914306559998, 171.2250976647, 302.0494994944], [111.49194337439997, 290.5855712768, 195.96313473350006, 445.6504516608], [39.73901365239999, 89.5891723776, 61.36755368429999, 181.4001464832], [115.65966797580006, 100.62414551040001, 136.40539553810004, 198.17333985279998], [179.02258301029997, 277.5872802816, 195.97717284229998, 347.7902221824]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047541.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Include the coordinates for each object you identify.", "boxes_value": [[18.4966430976, 72.4425659392, 158.6618652672, 196.0867919872]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047541_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Include the coordinates for each object you identify.", "boxes_value": [[18.4966430976, 31.442565939199994, 158.6618652672, 155.0867919872]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047541.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include three people, two traffic cones, and two tripods.", "boxes_value": [[18.4966430976, 72.4425659392, 158.6618652672, 196.0867919872], [24.326171904, 62.7073974784, 74.0333862144, 200.925476096], [51.905639654400005, 61.7453613056, 106.743896448, 194.511657728], [80.51318361599999, 81.0765380608, 563.7451171584, 497.8445434368], [18.4966430976, 172.7614135808, 32.3976440064, 196.0867919872], [136.0661010432, 168.284790016, 153.5012206848, 192.317016576], [83.7378540288, 108.2989501952, 158.6618652672, 191.4692993024], [49.8275146752, 72.4425659392, 101.23858644479999, 188.9521484288]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6, 7]]}, {"image_path": "objects365_v1_00047541_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include three people, two traffic cones, and two tripods.", "boxes_value": [[18.4966430976, 31.442565939199994, 158.6618652672, 155.0867919872], [24.326171904, 21.707397478399997, 74.0333862144, 159.925476096], [51.905639654400005, 20.7453613056, 106.743896448, 153.511657728], [80.51318361599999, 40.076538060800004, 193, 185], [18.4966430976, 131.7614135808, 32.3976440064, 155.0867919872], [136.0661010432, 127.28479001599999, 153.5012206848, 151.317016576], [83.7378540288, 67.2989501952, 158.6618652672, 150.4692993024], [49.8275146752, 31.442565939199994, 101.23858644479999, 147.9521484288]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6, 7]]}, {"image_path": "objects365_v1_00047546.jpg", "text": "Explain the content within the rectangular region of the image . Specify the location of each mentioned object.", "boxes_value": [[329.0598754759, 330.3749389824, 442.9145507862, 397.0894164992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047546_crop.jpg", "text": "Explain the content within the rectangular region of the image . Specify the location of each mentioned object.", "boxes_value": [[29.059875475900014, 17.37493898240001, 142.91455078619998, 84.08941649920001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047546.jpg", "text": "Explain the content within the rectangular region of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include a person, three tripods, and a speaker.", "boxes_value": [[329.0598754759, 330.3749389824, 442.9145507862, 397.0894164992], [329.2927856463, 192.0612182528, 426.1743163839, 393.9400024576], [366.69866941519996, 330.3749389824, 377.9019165238, 393.3452148224], [392.19787594459996, 350.1170043904, 442.9145507862, 397.0894164992], [329.0598754759, 343.5273437696, 347.692932111, 390.855346688], [362.226745607, 261.914550784, 405.0827636695, 397.1905517568]], "boxes_seq": [[0], [0], [1], [2, 4, 5], [3]]}, {"image_path": "objects365_v1_00047546_crop.jpg", "text": "Explain the content within the rectangular region of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include a person, three tripods, and a speaker.", "boxes_value": [[29.059875475900014, 17.37493898240001, 142.91455078619998, 84.08941649920001], [29.29278564629999, 0, 126.17431638390002, 80.94000245759997], [66.69866941519996, 17.37493898240001, 77.90191652380003, 80.34521482240001], [92.19787594459996, 37.117004390399984, 142.91455078619998, 84.08941649920001], [29.059875475900014, 30.52734376960001, 47.692932111000005, 77.855346688], [62.226745607, 0, 105.08276366950003, 84.1905517568]], "boxes_seq": [[0], [0], [1], [2, 4, 5], [3]]}, {"image_path": "objects365_v1_00047547.jpg", "text": "Please describe the area in the image for me. Include the coordinates for each object you identify.", "boxes_value": [[139.747314432, 169.4368286208, 315.8037247488, 321.2346801664]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047547_crop.jpg", "text": "Please describe the area in the image for me. Include the coordinates for each object you identify.", "boxes_value": [[44.747314431999996, 38.436828620799986, 220.8037247488, 190.23468016639998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047547.jpg", "text": "Please describe the area in the image for me. Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, a glasses, and two hats.", "boxes_value": [[139.747314432, 169.4368286208, 315.8037247488, 321.2346801664], [261.7401123072, 204.0938720768, 348.3826904064, 324.0072021504], [139.747314432, 169.4368286208, 267.9783935232, 321.2346801664], [185.497018368, 199.987371264, 224.3926952448, 212.2702166016], [166.7002553856, 169.1139088896, 242.3556525312, 241.5474353152], [269.62930106880003, 205.0278111744, 315.8037247488, 235.053802496]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047547_crop.jpg", "text": "Please describe the area in the image for me. Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, a glasses, and two hats.", "boxes_value": [[44.747314431999996, 38.436828620799986, 220.8037247488, 190.23468016639998], [166.74011230719998, 73.09387207680001, 253.3826904064, 193.00720215040002], [44.747314431999996, 38.436828620799986, 172.9783935232, 190.23468016639998], [90.497018368, 68.98737126399999, 129.3926952448, 81.2702166016], [71.7002553856, 38.1139088896, 147.3556525312, 110.5474353152], [174.62930106880003, 74.0278111744, 220.8037247488, 104.053802496]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047549.jpg", "text": "In the provided image , please explain the content within the region . Provide the coordinates for each element you describe.", "boxes_value": [[8.7301635584, 93.1256103168, 337.0697631744, 399.4859619072]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047549_crop.jpg", "text": "In the provided image , please explain the content within the region . Provide the coordinates for each element you describe.", "boxes_value": [[8.7301635584, 77.1256103168, 337.0697631744, 383.4859619072]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047549.jpg", "text": "In the provided image , please explain the content within the region . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, a sneakers, a suv, a car, and a stuffed toy.", "boxes_value": [[8.7301635584, 93.1256103168, 337.0697631744, 399.4859619072], [90.9805297664, 144.4473877248, 224.7210082816, 412.520141568], [202.4087524352, 360.218139648, 223.4793091072, 399.4859619072], [219.7410888704, 93.1256103168, 337.0697631744, 150.32855224320002], [8.7301635584, 104.2366943232, 107.8148803584, 153.31890869760002], [111.22558592, 115.20874022400001, 255.92456053760003, 375.85852047360004]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047549_crop.jpg", "text": "In the provided image , please explain the content within the region . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, a sneakers, a suv, a car, and a stuffed toy.", "boxes_value": [[8.7301635584, 77.1256103168, 337.0697631744, 383.4859619072], [90.9805297664, 128.4473877248, 224.7210082816, 396.520141568], [202.4087524352, 344.218139648, 223.4793091072, 383.4859619072], [219.7410888704, 77.1256103168, 337.0697631744, 134.32855224320002], [8.7301635584, 88.2366943232, 107.8148803584, 137.31890869760002], [111.22558592, 99.20874022400001, 255.92456053760003, 359.85852047360004]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047551.jpg", "text": "Please describe the area in the image for me. Remember to mention the objects and their corresponding locations.", "boxes_value": [[307.16918947349996, 209.884521472, 499.84350584000003, 289.071289088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047551_crop.jpg", "text": "Please describe the area in the image for me. Remember to mention the objects and their corresponding locations.", "boxes_value": [[48.169189473499955, 19.88452147199999, 240.84350584000003, 99.07128908800001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047551.jpg", "text": "Please describe the area in the image for me. Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include five pillows, and a bed.", "boxes_value": [[307.16918947349996, 209.884521472, 499.84350584000003, 289.071289088], [306.123901383, 208.3164673024, 376.42492678729997, 277.8335571456], [390.79882816180003, 209.884521472, 467.3720703359, 289.071289088], [307.16918947349996, 230.5305786368, 394.1962890835, 278.6175537152], [391.06018069860005, 230.007873536, 486.972778333, 287.7645874176], [479.6827392449, 229.522888192, 499.84350584000003, 287.1248779264], [74.9337158219, 193.6374511616, 539.1126708781, 512.066650368]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047551_crop.jpg", "text": "Please describe the area in the image for me. Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include five pillows, and a bed.", "boxes_value": [[48.169189473499955, 19.88452147199999, 240.84350584000003, 99.07128908800001], [47.123901382999975, 18.316467302400014, 117.42492678729997, 87.83355714560003], [131.79882816180003, 19.88452147199999, 208.3720703359, 99.07128908800001], [48.169189473499955, 40.53057863679999, 135.1962890835, 88.61755371520002], [132.06018069860005, 40.007873536000005, 227.972778333, 97.76458741760001], [220.6827392449, 39.52288819200001, 240.84350584000003, 97.12487792640002], [0, 3.637451161600012, 280.11267087809995, 118]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047555.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Include the coordinates for each object you identify.", "boxes_value": [[293.7462158336, 365.302001922, 498.5818481664, 691.9526367325]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047555_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Include the coordinates for each object you identify.", "boxes_value": [[51.74621583359999, 82.30200192199999, 256.5818481664, 408.95263673249997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047555.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two chairs, a desk, a plate, and an oven.", "boxes_value": [[293.7462158336, 365.302001922, 498.5818481664, 691.9526367325], [329.4024047616, 442.432250975, 483.1316528128, 691.9526367325], [374.9804077056, 413.849365215, 498.5818481664, 634.7869873345], [14.1337280512, 364.9914550425, 496.841491712, 714.1610107775], [388.5310058496, 375.92419435999994, 455.51196288, 381.63000489899997], [293.7462158336, 365.302001922, 321.997375488, 398.9060058555]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047555_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two chairs, a desk, a plate, and an oven.", "boxes_value": [[51.74621583359999, 82.30200192199999, 256.5818481664, 408.95263673249997], [87.4024047616, 159.432250975, 241.13165281279998, 408.95263673249997], [132.9804077056, 130.84936521499998, 256.5818481664, 351.78698733450005], [0, 81.99145504249998, 254.841491712, 431.1610107775], [146.53100584959998, 92.92419435999994, 213.51196288, 98.63000489899997], [51.74621583359999, 82.30200192199999, 79.99737548799999, 115.90600585549998]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047556.jpg", "text": "Can you share some insights about the rectangular region in the image ? Please mention the objects and their locations.", "boxes_value": [[174.16900635, 341.4633178604, 275.64801025, 435.22363279480004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047556_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Please mention the objects and their locations.", "boxes_value": [[26.16900634999999, 23.463317860400025, 127.64801025000003, 117.22363279480004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047556.jpg", "text": "Can you share some insights about the rectangular region in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include two pictures, a flower, a vase, and a bed.", "boxes_value": [[174.16900635, 341.4633178604, 275.64801025, 435.22363279480004], [181.88769530000002, 341.4633178604, 226.0755615, 399.37524413660003], [217.15533445, 317.3780517302, 327.2592163, 418.8801269832], [252.42297365, 388.7735595822, 275.64801025, 435.22363279480004], [0, 314.7730102828, 499.42858885000004, 645.5843506154], [174.16900635, 385.29431152880005, 223.6665039, 423.7923583996]], "boxes_seq": [[0], [0], [1, 5], [2], [3], [4]]}, {"image_path": "objects365_v1_00047556_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include two pictures, a flower, a vase, and a bed.", "boxes_value": [[26.16900634999999, 23.463317860400025, 127.64801025000003, 117.22363279480004], [33.88769530000002, 23.463317860400025, 78.07556149999999, 81.37524413660003], [69.15533445, 0, 153, 100.8801269832], [104.42297364999999, 70.7735595822, 127.64801025000003, 117.22363279480004], [0, 0, 153, 140], [26.16900634999999, 67.29431152880005, 75.66650390000001, 105.7923583996]], "boxes_seq": [[0], [0], [1, 5], [2], [3], [4]]}, {"image_path": "objects365_v1_00047558.jpg", "text": "In , what elements can be found within the coordinates ? Provide the coordinates for all objects that you mention.", "boxes_value": [[198.155212375, 275.2296753152, 466.47143553750004, 381.0216674816]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047558_crop.jpg", "text": "In , what elements can be found within the coordinates ? Provide the coordinates for all objects that you mention.", "boxes_value": [[67.15521237499999, 27.229675315199984, 335.47143553750004, 133.0216674816]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047558.jpg", "text": "In , what elements can be found within the coordinates ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a sink, two faucets, a gas stove, an oven, and two cabinets.", "boxes_value": [[198.155212375, 275.2296753152, 466.47143553750004, 381.0216674816], [184.1275634975, 299.9564209152, 274.23767088, 316.5994872832], [198.155212375, 275.2296753152, 226.448364295, 309.2290039296], [199.10626222750003, 311.8442993152, 226.9238891825, 334.9067993088], [342.651611335, 284.4141845504, 414.64074707000003, 305.0794067456], [341.9548340225, 305.7761230336, 416.5037842175, 381.0216674816], [304.67077636, 288.9275512832, 345.19445802250004, 368.6536254976], [414.34912108000003, 294.7922363392, 466.47143553750004, 378.3440551936]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00047558_crop.jpg", "text": "In , what elements can be found within the coordinates ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a sink, two faucets, a gas stove, an oven, and two cabinets.", "boxes_value": [[67.15521237499999, 27.229675315199984, 335.47143553750004, 133.0216674816], [53.127563497500006, 51.9564209152, 143.23767088, 68.5994872832], [67.15521237499999, 27.229675315199984, 95.448364295, 61.229003929600026], [68.10626222750003, 63.844299315199976, 95.9238891825, 86.90679930879998], [211.65161133499998, 36.41418455040002, 283.64074707000003, 57.07940674560001], [210.9548340225, 57.776123033600015, 285.5037842175, 133.0216674816], [173.67077636, 40.92755128319999, 214.19445802250004, 120.65362549759999], [283.34912108000003, 46.792236339199974, 335.47143553750004, 130.3440551936]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00047560.jpg", "text": "Help me understand the details within the area in photograph . Provide the coordinates for all objects that you mention.", "boxes_value": [[501.4495849414, 142.1099014656, 606.1930440992, 292.5293579264]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047560_crop.jpg", "text": "Help me understand the details within the area in photograph . Provide the coordinates for all objects that you mention.", "boxes_value": [[26.44958494140002, 38.10990146559999, 131.19304409920005, 188.5293579264]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047560.jpg", "text": "Help me understand the details within the area in photograph . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a paddle, two people, two helmets, and a boat.", "boxes_value": [[501.4495849414, 142.1099014656, 606.1930440992, 292.5293579264], [516.8474120734, 234.6425781248, 668.2499999674, 316.2202148352], [501.4495849414, 165.4042358272, 595.0520019868001, 292.5293579264], [568.0596923782, 141.4594116096, 647.2952881112, 276.4210205184], [505.3229596148, 165.6434214912, 537.2254853342, 199.2045175296], [575.0150179551999, 142.1099014656, 606.1930440992, 166.563255296], [254.45806884639998, 213.7254028288, 676.705932643, 378.3268432384]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00047560_crop.jpg", "text": "Help me understand the details within the area in photograph . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a paddle, two people, two helmets, and a boat.", "boxes_value": [[26.44958494140002, 38.10990146559999, 131.19304409920005, 188.5293579264], [41.84741207340005, 130.6425781248, 157, 212.2202148352], [26.44958494140002, 61.40423582720001, 120.05200198680006, 188.5293579264], [93.0596923782, 37.45941160960001, 157, 172.42102051839998], [30.322959614800027, 61.6434214912, 62.225485334200016, 95.2045175296], [100.01501795519994, 38.10990146559999, 131.19304409920005, 62.563255295999994], [0, 109.72540282879999, 157, 226]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00047561.jpg", "text": "Within the input image , what can be found in the region defined by ? Include the coordinates for each mentioned object.", "boxes_value": [[167.100341808, 12.3499756032, 369.16906741919996, 230.4053955072]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047561_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Include the coordinates for each mentioned object.", "boxes_value": [[51.100341807999996, 12.3499756032, 253.16906741919996, 230.4053955072]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047561.jpg", "text": "Within the input image , what can be found in the region defined by ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three people, a leather shoes, and a handbag.", "boxes_value": [[167.100341808, 12.3499756032, 369.16906741919996, 230.4053955072], [340.390991232, 20.2639770624, 369.16906741919996, 83.9354858496], [234.6314697408, 14.868041984, 272.402709984, 133.937438976], [169.52111812799998, 12.3499756032, 242.545532232, 146.1680908288], [167.100341808, 217.0008544768, 192.8784179808, 230.4053955072], [259.1359863408, 88.0548095488, 272.1846923712, 114.6216430592]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047561_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three people, a leather shoes, and a handbag.", "boxes_value": [[51.100341807999996, 12.3499756032, 253.16906741919996, 230.4053955072], [224.39099123199998, 20.2639770624, 253.16906741919996, 83.9354858496], [118.63146974079999, 14.868041984, 156.402709984, 133.937438976], [53.52111812799998, 12.3499756032, 126.545532232, 146.1680908288], [51.100341807999996, 217.0008544768, 76.87841798080001, 230.4053955072], [143.1359863408, 88.0548095488, 156.18469237120001, 114.6216430592]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047562.jpg", "text": "Help me understand the details within the area in photograph . Specify the location of each mentioned object.", "boxes_value": [[82.7954101523, 12.765808128, 324.1428832984, 368.8068847616]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047562_crop.jpg", "text": "Help me understand the details within the area in photograph . Specify the location of each mentioned object.", "boxes_value": [[60.79541015229999, 12.765808128, 302.1428832984, 368.8068847616]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047562.jpg", "text": "Help me understand the details within the area in photograph . Specify the location of each mentioned object. For your reference, objects involved in this region include a carpet, a cabinet, a couch, a storage box, a person, and a bakset.", "boxes_value": [[82.7954101523, 12.765808128, 324.1428832984, 368.8068847616], [19.8370361544, 212.2708129792, 623.6240234283, 512.3967285248], [82.7954101523, 12.765808128, 180.9685058836, 76.7536010752], [0.40014647799999997, 89.9017944576, 230.05505371509997, 295.8900146688], [284.8091430596, 172.0817870848, 324.1428832984, 206.7575683584], [141.1163940524, 99.2161254912, 228.1823120418, 264.2084960768], [200.55261231690002, 296.4826660352, 268.35656737979997, 368.8068847616]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047562_crop.jpg", "text": "Help me understand the details within the area in photograph . Specify the location of each mentioned object. For your reference, objects involved in this region include a carpet, a cabinet, a couch, a storage box, a person, and a bakset.", "boxes_value": [[60.79541015229999, 12.765808128, 302.1428832984, 368.8068847616], [0, 212.2708129792, 362, 457], [60.79541015229999, 12.765808128, 158.9685058836, 76.7536010752], [0, 89.9017944576, 208.05505371509997, 295.8900146688], [262.8091430596, 172.0817870848, 302.1428832984, 206.7575683584], [119.11639405240001, 99.2161254912, 206.1823120418, 264.2084960768], [178.55261231690002, 296.4826660352, 246.35656737979997, 368.8068847616]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047563.jpg", "text": "What can be observed in the rectangular region in the photograph ? Provide the coordinates for all objects that you mention.", "boxes_value": [[479.62451167999996, 228.6365966848, 717.0020752, 412.2755126784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047563_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Provide the coordinates for all objects that you mention.", "boxes_value": [[59.624511679999955, 46.6365966848, 297.00207520000004, 230.2755126784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047563.jpg", "text": "What can be observed in the rectangular region in the photograph ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four pictures, and a sneakers.", "boxes_value": [[479.62451167999996, 228.6365966848, 717.0020752, 412.2755126784], [631.67517088, 229.7819213824, 666.89404296, 288.4799194112], [664.0306396799999, 228.6365966848, 717.0020752, 289.0526123008], [645.77136232, 290.0537109504, 673.11889648, 327.7092284928], [668.91162112, 288.7915039232, 704.8842773599999, 323.0811767808], [479.62451167999996, 378.923584, 499.51867672, 412.2755126784]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047563_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four pictures, and a sneakers.", "boxes_value": [[59.624511679999955, 46.6365966848, 297.00207520000004, 230.2755126784], [211.67517088, 47.781921382399986, 246.89404295999998, 106.4799194112], [244.03063967999992, 46.6365966848, 297.00207520000004, 107.05261230079998], [225.77136231999998, 108.05371095039999, 253.11889648, 145.70922849279998], [248.91162111999995, 106.79150392320003, 284.88427735999994, 141.08117678079998], [59.624511679999955, 196.923584, 79.51867671999997, 230.2755126784]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047564.jpg", "text": "Please provide information about the area within the bounding box in the picture . Please point out the objects and their coordinates.", "boxes_value": [[47.8193359321, 198.1065063424, 425.8770751971, 346.5734253056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047564_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Please point out the objects and their coordinates.", "boxes_value": [[47.8193359321, 38.106506342399996, 425.8770751971, 186.57342530559998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047564.jpg", "text": "Please provide information about the area within the bounding box in the picture . Please point out the objects and their coordinates. For your reference, objects involved in this region include a couch, two desks, a chair, a bed, and a person.", "boxes_value": [[47.8193359321, 198.1065063424, 425.8770751971, 346.5734253056], [71.3455810626, 219.78637696, 208.675048818, 316.1729125888], [47.8193359321, 280.7722778112, 166.9924926613, 346.5734253056], [278.7645263447, 198.1065063424, 351.14575194139996, 299.756347648], [254.7940673809, 215.614318848, 425.8770751971, 305.3859252736], [249.1466674526, 117.8813476352, 601.3801269737, 512.3150635008], [277.2677612177, 228.865417472, 578.5892333895, 311.0997924864]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5], [6]]}, {"image_path": "objects365_v1_00047564_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Please point out the objects and their coordinates. For your reference, objects involved in this region include a couch, two desks, a chair, a bed, and a person.", "boxes_value": [[47.8193359321, 38.106506342399996, 425.8770751971, 186.57342530559998], [71.3455810626, 59.78637696000001, 208.675048818, 156.1729125888], [47.8193359321, 120.77227781120001, 166.9924926613, 186.57342530559998], [278.7645263447, 38.106506342399996, 351.14575194139996, 139.75634764799997], [254.7940673809, 55.61431884800001, 425.8770751971, 145.38592527359998], [249.1466674526, 0, 520, 223], [277.2677612177, 68.86541747199999, 520, 151.09979248640002]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5], [6]]}, {"image_path": "objects365_v1_00047565.jpg", "text": "Please describe the region in the picture . Specify the location of each mentioned object.", "boxes_value": [[89.3787842048, 652.6948241939999, 245.7726440448, 683.0266113191999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047565_crop.jpg", "text": "Please describe the region in the picture . Specify the location of each mentioned object.", "boxes_value": [[39.3787842048, 7.694824193999921, 195.7726440448, 38]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047565.jpg", "text": "Please describe the region in the picture . Specify the location of each mentioned object. For your reference, objects involved in this region include four people, and two sneakers.", "boxes_value": [[89.3787842048, 652.6948241939999, 245.7726440448, 683.0266113191999], [160.8881836032, 182.10229489920002, 295.9905395712, 683.2744140325], [0.7901001216, 292.2913818324, 159.3930053632, 682.6352538901], [89.3787842048, 652.6948241939999, 123.1832275456, 682.9641113552], [212.5260009984, 666.4033203358, 245.7726440448, 683.0266113191999], [160.8881836032, 182.10229489920002, 295.9905395712, 683.2744140325], [0.7901001216, 292.2913818324, 159.3930053632, 682.6352538901]], "boxes_seq": [[0], [0], [1, 2, 5, 6], [3, 4]]}, {"image_path": "objects365_v1_00047565_crop.jpg", "text": "Please describe the region in the picture . Specify the location of each mentioned object. For your reference, objects involved in this region include four people, and two sneakers.", "boxes_value": [[39.3787842048, 7.694824193999921, 195.7726440448, 38], [110.88818360319999, 0, 234, 38], [0, 0, 109.39300536319999, 37.63525389009999], [39.3787842048, 7.694824193999921, 73.1832275456, 37.964111355199975], [162.5260009984, 21.403320335800004, 195.7726440448, 38], [110.88818360319999, 0, 234, 38], [0, 0, 109.39300536319999, 37.63525389009999]], "boxes_seq": [[0], [0], [1, 2, 5, 6], [3, 4]]}, {"image_path": "objects365_v1_00047566.jpg", "text": "Can you give me a visual rundown of the area in ? Give coordinates for the items you reference.", "boxes_value": [[133.14099123649999, 259.6729736192, 467.97363279710004, 465.811157248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047566_crop.jpg", "text": "Can you give me a visual rundown of the area in ? Give coordinates for the items you reference.", "boxes_value": [[84.14099123649999, 51.67297361919998, 418.97363279710004, 257.811157248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047566.jpg", "text": "Can you give me a visual rundown of the area in ? Give coordinates for the items you reference. For your reference, objects involved in this region include six chairs, and a desk.", "boxes_value": [[133.14099123649999, 259.6729736192, 467.97363279710004, 465.811157248], [264.6426391796, 285.4039917056, 383.10278323669996, 493.5242919936], [385.8197021186, 272.9058838016, 528.7326659961, 466.8979492352], [133.14099123649999, 272.362487808, 265.7294311701, 465.811157248], [398.87133789980004, 261.100463872, 515.8454589816, 406.8190918144], [160.01464841429998, 260.7366943232, 266.5394287365, 371.3962402304], [374.93750000060004, 259.6729736192, 389.2678222912, 301.5327148544], [179.9659424093, 302.8052978688, 467.97363279710004, 444.3847656448]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6], [7]]}, {"image_path": "objects365_v1_00047566_crop.jpg", "text": "Can you give me a visual rundown of the area in ? Give coordinates for the items you reference. For your reference, objects involved in this region include six chairs, and a desk.", "boxes_value": [[84.14099123649999, 51.67297361919998, 418.97363279710004, 257.811157248], [215.6426391796, 77.40399170559999, 334.10278323669996, 285.5242919936], [336.8197021186, 64.9058838016, 479.73266599609997, 258.8979492352], [84.14099123649999, 64.36248780800003, 216.72943117009999, 257.811157248], [349.87133789980004, 53.10046387199998, 466.8454589816, 198.8190918144], [111.01464841429998, 52.73669432320003, 217.53942873649999, 163.3962402304], [325.93750000060004, 51.67297361919998, 340.2678222912, 93.53271485440001], [130.9659424093, 94.80529786879998, 418.97363279710004, 236.38476564479998]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6], [7]]}, {"image_path": "objects365_v1_00047567.jpg", "text": "Please describe the region in the picture . Give coordinates for the items you reference.", "boxes_value": [[95.1288452096, 443.885864251, 225.460327168, 470.4732666351]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047567_crop.jpg", "text": "Please describe the region in the picture . Give coordinates for the items you reference.", "boxes_value": [[33.1288452096, 6.885864250999987, 163.460327168, 33.4732666351]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047567.jpg", "text": "Please describe the region in the picture . Give coordinates for the items you reference. For your reference, objects involved in this region include a cabinet, a person, a cup, a bowl, and a wine glass.", "boxes_value": [[95.1288452096, 443.885864251, 225.460327168, 470.4732666351], [82.176147456, 362.1583252134, 186.7898559488, 520.1882324533], [146.2872314368, 279.1660156548, 299.5280761856, 682.9980468932], [203.636779776, 443.885864251, 225.460327168, 470.4732666351], [95.1288452096, 444.1125488092, 127.7982177792, 459.81896973290003], [136.8720092672, 444.0230713014, 161.7255248896, 460.03039552469994]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047567_crop.jpg", "text": "Please describe the region in the picture . Give coordinates for the items you reference. For your reference, objects involved in this region include a cabinet, a person, a cup, a bowl, and a wine glass.", "boxes_value": [[33.1288452096, 6.885864250999987, 163.460327168, 33.4732666351], [20.176147455999995, 0, 124.78985594880001, 40], [84.2872314368, 0, 196, 40], [141.636779776, 6.885864250999987, 163.460327168, 33.4732666351], [33.1288452096, 7.112548809200007, 65.7982177792, 22.81896973290003], [74.87200926720001, 7.023071301400023, 99.7255248896, 23.03039552469994]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047572.jpg", "text": "Can you break down the region in the image for me? Provide the coordinates for all objects that you mention.", "boxes_value": [[488.76647227100005, 340.771150592, 746.82372688, 450.5736579584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047572_crop.jpg", "text": "Can you break down the region in the image for me? Provide the coordinates for all objects that you mention.", "boxes_value": [[64.76647227100005, 27.771150592000026, 322.82372688, 137.57365795840002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047572.jpg", "text": "Can you break down the region in the image for me? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three sneakers, a leather shoes, and a boots.", "boxes_value": [[488.76647227100005, 340.771150592, 746.82372688, 450.5736579584], [488.76647227100005, 422.8025957888, 549.008314855, 450.5736579584], [528.0732064299999, 353.5885639168, 569.516176152, 381.7868731904], [579.3428596340001, 371.1056954368, 608.822910311, 405.7127113728], [623.776559175, 392.895298048, 695.126826548, 426.220572672], [688.290872808, 340.771150592, 746.82372688, 397.5950162432]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047572_crop.jpg", "text": "Can you break down the region in the image for me? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three sneakers, a leather shoes, and a boots.", "boxes_value": [[64.76647227100005, 27.771150592000026, 322.82372688, 137.57365795840002], [64.76647227100005, 109.80259578879998, 125.00831485499998, 137.57365795840002], [104.07320642999991, 40.58856391680001, 145.51617615199996, 68.7868731904], [155.3428596340001, 58.10569543679998, 184.822910311, 92.71271137280002], [199.776559175, 79.89529804799997, 271.12682654800005, 113.220572672], [264.290872808, 27.771150592000026, 322.82372688, 84.5950162432]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047573.jpg", "text": "What insights can you provide about the area in the selected picture ? Specify the location of each mentioned object.", "boxes_value": [[267.9783935232, 291.6842040832, 767.7540283392, 425.106750464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047573_crop.jpg", "text": "What insights can you provide about the area in the selected picture ? Specify the location of each mentioned object.", "boxes_value": [[124.9783935232, 33.68420408319997, 624.7540283392, 167.10675046400002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047573.jpg", "text": "What insights can you provide about the area in the selected picture ? Specify the location of each mentioned object. For your reference, objects involved in this region include five chairs.", "boxes_value": [[267.9783935232, 291.6842040832, 767.7540283392, 425.106750464], [267.9783935232, 294.7396850688, 370.5632324352, 424.5126342656], [386.8378906368, 294.2304687616, 486.65014648320005, 424.5975341568], [486.1408691712, 293.721191424, 592.0640868864, 425.106750464], [593.0826415872, 293.721191424, 706.6445312256, 418.486572288], [702.0612793344, 291.6842040832, 767.7540283392, 420.5235595776]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047573_crop.jpg", "text": "What insights can you provide about the area in the selected picture ? Specify the location of each mentioned object. For your reference, objects involved in this region include five chairs.", "boxes_value": [[124.9783935232, 33.68420408319997, 624.7540283392, 167.10675046400002], [124.9783935232, 36.739685068799986, 227.5632324352, 166.5126342656], [243.8378906368, 36.23046876159998, 343.65014648320005, 166.5975341568], [343.1408691712, 35.72119142399998, 449.0640868864, 167.10675046400002], [450.0826415872, 35.72119142399998, 563.6445312256, 160.486572288], [559.0612793344, 33.68420408319997, 624.7540283392, 162.5235595776]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047574.jpg", "text": "I am interested in the region of the image ; please describe it. Specify the location of each mentioned object.", "boxes_value": [[389.85021973790003, 178.6121216, 465.9942626664, 481.3919067136]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047574_crop.jpg", "text": "I am interested in the region of the image ; please describe it. Specify the location of each mentioned object.", "boxes_value": [[19.850219737900034, 76.6121216, 95.9942626664, 379.3919067136]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047574.jpg", "text": "I am interested in the region of the image ; please describe it. Specify the location of each mentioned object. For your reference, objects involved in this region include two people, a hat, and two leather shoes.", "boxes_value": [[389.85021973790003, 178.6121216, 465.9942626664, 481.3919067136], [362.2128906298, 178.866882304, 503.054565417, 481.162597632], [279.2774047588, 141.7052612096, 467.7830810562, 511.9783935488], [408.792724577, 178.6121216, 465.9942626664, 221.1934203904], [396.9554443548, 437.4497680896, 442.3997802838, 481.3919067136], [389.85021973790003, 438.50115968, 409.7172851484, 479.0631103488]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047574_crop.jpg", "text": "I am interested in the region of the image ; please describe it. Specify the location of each mentioned object. For your reference, objects involved in this region include two people, a hat, and two leather shoes.", "boxes_value": [[19.850219737900034, 76.6121216, 95.9942626664, 379.3919067136], [0, 76.866882304, 115, 379.162597632], [0, 39.705261209599996, 97.78308105619999, 409.9783935488], [38.792724577, 76.6121216, 95.9942626664, 119.19342039040001], [26.955444354800022, 335.4497680896, 72.3997802838, 379.3919067136], [19.850219737900034, 336.50115968, 39.71728514839998, 377.0631103488]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047575.jpg", "text": "Please interpret and describe the area inside the given picture . Include the coordinates for each mentioned object.", "boxes_value": [[152.6901855425, 206.5023803904, 359.87792966250004, 396.3601074176]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047575_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Include the coordinates for each mentioned object.", "boxes_value": [[52.69018554249999, 47.50238039039999, 259.87792966250004, 237.36010741759998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047575.jpg", "text": "Please interpret and describe the area inside the given picture . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a chair, two cabinets, a plate, and a tea pot.", "boxes_value": [[152.6901855425, 206.5023803904, 359.87792966250004, 396.3601074176], [292.863281226, 261.0410766848, 382.050659173, 433.7828979712], [150.7355956735, 100.5637817344, 357.27172848500004, 281.038635264], [152.6901855425, 269.311035136, 359.87792966250004, 396.3601074176], [231.00732418849998, 237.4620361216, 259.306762667, 258.3026123264], [311.8759765945, 206.5023803904, 327.348632823, 229.020568832]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047575_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a chair, two cabinets, a plate, and a tea pot.", "boxes_value": [[52.69018554249999, 47.50238039039999, 259.87792966250004, 237.36010741759998], [192.86328122600003, 102.04107668479998, 282.050659173, 274.7828979712], [50.7355956735, 0, 257.27172848500004, 122.03863526399999], [52.69018554249999, 110.31103513599999, 259.87792966250004, 237.36010741759998], [131.00732418849998, 78.4620361216, 159.306762667, 99.30261232639998], [211.8759765945, 47.50238039039999, 227.348632823, 70.02056883200001]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047577.jpg", "text": "In the photo , can you delve into the details of the region ? Include the coordinates for each mentioned object.", "boxes_value": [[402.140136704, 105.592723632, 597.0645973759999, 480.5523681599999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047577_crop.jpg", "text": "In the photo , can you delve into the details of the region ? Include the coordinates for each mentioned object.", "boxes_value": [[49.140136703999985, 94.592723632, 244.06459737599994, 469]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047577.jpg", "text": "In the photo , can you delve into the details of the region ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three cabinets, a person, and a trolley.", "boxes_value": [[402.140136704, 105.592723632, 597.0645973759999, 480.5523681599999], [402.140136704, 226.114624032, 534.3874512, 459.191345232], [511.659790016, 308.64135744, 639.418823232, 480.55236815999996], [442.14392089599994, 115.12402344, 583.99401856, 480.5523681599999], [335.65631104, 121.205322288, 468.657348608, 480.03515625600005], [451.001028608, 105.592723632, 597.0645973759999, 478.470600864]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047577_crop.jpg", "text": "In the photo , can you delve into the details of the region ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three cabinets, a person, and a trolley.", "boxes_value": [[49.140136703999985, 94.592723632, 244.06459737599994, 469], [49.140136703999985, 215.114624032, 181.3874512, 448.191345232], [158.659790016, 297.64135744, 286.418823232, 469], [89.14392089599994, 104.12402344, 230.99401855999997, 469], [0, 110.205322288, 115.657348608, 469], [98.00102860800001, 94.592723632, 244.06459737599994, 467.470600864]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047578.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Specify the location of each mentioned object.", "boxes_value": [[325.578369123, 350.2633056768, 590.4428710857, 512.487182592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047578_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Specify the location of each mentioned object.", "boxes_value": [[66.57836912300002, 41.26330567679997, 331.44287108569995, 203]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047578.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Specify the location of each mentioned object. For your reference, objects involved in this region include a storage box, a desk, and three people.", "boxes_value": [[325.578369123, 350.2633056768, 590.4428710857, 512.487182592], [457.8228760057, 480.839904768, 538.0375976365, 512.487182592], [403.7446288895, 410.877807616, 555.7691650584, 504.8489990144], [325.578369123, 363.598266624, 399.5023193221, 512.07275392], [420.7576904198, 350.2633056768, 465.3269043226, 499.0062866432], [555.5393066675, 354.022155776, 590.4428710857, 415.237670912]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047578_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Specify the location of each mentioned object. For your reference, objects involved in this region include a storage box, a desk, and three people.", "boxes_value": [[66.57836912300002, 41.26330567679997, 331.44287108569995, 203], [198.82287600569998, 171.839904768, 279.0375976365, 203], [144.7446288895, 101.87780761599998, 296.76916505839995, 195.84899901440002], [66.57836912300002, 54.59826662400002, 140.5023193221, 203], [161.75769041979999, 41.26330567679997, 206.3269043226, 190.00628664319999], [296.5393066675, 45.02215577599998, 331.44287108569995, 106.237670912]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047579.jpg", "text": "Please enlighten me about the area in the photograph . Give coordinates for the items you reference.", "boxes_value": [[440.131530752, 489.7248534939, 511.7284546048, 614.5866699466]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047579_crop.jpg", "text": "Please enlighten me about the area in the photograph . Give coordinates for the items you reference.", "boxes_value": [[18.131530752000003, 31.724853493900014, 89.72845460479999, 156.58666994659995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047579.jpg", "text": "Please enlighten me about the area in the photograph . Give coordinates for the items you reference. For your reference, objects involved in this region include a couch, a chair, two cups, and a bottle.", "boxes_value": [[440.131530752, 489.7248534939, 511.7284546048, 614.5866699466], [0.6651000832, 404.2778320318, 511.2778930688, 590.9111328122], [440.131530752, 489.7248534939, 510.8978271232, 573.2609862984], [428.3401489408, 548.1152343603001, 479.1712036352, 621.3404541132], [494.8115234304, 553.0916748103, 511.7284546048, 614.5866699466], [473.1136474624, 502.14733883919996, 511.469665536, 612.4959717038]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047579_crop.jpg", "text": "Please enlighten me about the area in the photograph . Give coordinates for the items you reference. For your reference, objects involved in this region include a couch, a chair, two cups, and a bottle.", "boxes_value": [[18.131530752000003, 31.724853493900014, 89.72845460479999, 156.58666994659995], [0, 0, 89.27789306879998, 132.91113281219998], [18.131530752000003, 31.724853493900014, 88.89782712319999, 115.26098629839998], [6.34014894080002, 90.11523436030006, 57.171203635200015, 163.34045411320005], [72.81152343039997, 95.09167481029999, 89.72845460479999, 156.58666994659995], [51.113647462400024, 44.14733883919996, 89.46966553599998, 154.49597170380002]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047580.jpg", "text": "Describe what can be found within the bounds of in the image . Specify the location of each mentioned object.", "boxes_value": [[90.00439455829999, 227.0137329152, 602.1328124989, 448.6723022336]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047580_crop.jpg", "text": "Describe what can be found within the bounds of in the image . Specify the location of each mentioned object.", "boxes_value": [[90.00439455829999, 56.013732915199995, 602.1328124989, 277.6723022336]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047580.jpg", "text": "Describe what can be found within the bounds of in the image . Specify the location of each mentioned object. For your reference, objects involved in this region include a chair, a pillow, a flower, a vase, a lamp, and a desk.", "boxes_value": [[90.00439455829999, 227.0137329152, 602.1328124989, 448.6723022336], [84.5541992177, 232.8496093696, 219.0994872887, 427.9754638848], [90.00439455829999, 270.9511108608, 185.10186766220002, 331.6323852288], [387.97644045100003, 305.3673705984, 533.792480461, 419.4843139584], [435.3881835608, 386.0747680768, 493.6334228292, 401.5788574208], [500.3083495916, 227.0137329152, 602.1328124989, 422.3503417856], [386.0156249715, 395.3356933632, 588.9718017806999, 448.6723022336]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047580_crop.jpg", "text": "Describe what can be found within the bounds of in the image . Specify the location of each mentioned object. For your reference, objects involved in this region include a chair, a pillow, a flower, a vase, a lamp, and a desk.", "boxes_value": [[90.00439455829999, 56.013732915199995, 602.1328124989, 277.6723022336], [84.5541992177, 61.84960936959999, 219.0994872887, 256.9754638848], [90.00439455829999, 99.95111086079999, 185.10186766220002, 160.6323852288], [387.97644045100003, 134.3673705984, 533.792480461, 248.48431395839998], [435.3881835608, 215.0747680768, 493.6334228292, 230.57885742079998], [500.3083495916, 56.013732915199995, 602.1328124989, 251.3503417856], [386.0156249715, 224.3356933632, 588.9718017806999, 277.6723022336]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047581.jpg", "text": "Please describe the content within the area displayed in the image . Please mention the objects and their locations.", "boxes_value": [[49.7529296896, 334.29602048140003, 499.5275268608, 563.8090820053]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047581_crop.jpg", "text": "Please describe the content within the area displayed in the image . Please mention the objects and their locations.", "boxes_value": [[49.7529296896, 58.296020481400035, 499.5275268608, 287.80908200529996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047581.jpg", "text": "Please describe the content within the area displayed in the image . Please mention the objects and their locations. For your reference, objects involved in this region include three vases, a person, and two pumpkins.", "boxes_value": [[49.7529296896, 334.29602048140003, 499.5275268608, 563.8090820053], [49.7529296896, 334.29602048140003, 146.42242432, 491.4761962884], [414.4774170112, 358.69140621710005, 499.5275268608, 512.1175537327999], [58.53222656, 300.861267097, 113.9586181632, 360.0611572531], [213.386840832, 135.9974365002, 354.0044555776, 552.6099853721], [331.730834944, 528.2749023211, 369.2390747136, 563.8090820053], [193.049133312, 519.3914794863, 223.6479492096, 555.9125976537999]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047581_crop.jpg", "text": "Please describe the content within the area displayed in the image . Please mention the objects and their locations. For your reference, objects involved in this region include three vases, a person, and two pumpkins.", "boxes_value": [[49.7529296896, 58.296020481400035, 499.5275268608, 287.80908200529996], [49.7529296896, 58.296020481400035, 146.42242432, 215.4761962884], [414.4774170112, 82.69140621710005, 499.5275268608, 236.11755373279993], [58.53222656, 24.861267096999995, 113.9586181632, 84.06115725310002], [213.386840832, 0, 354.0044555776, 276.6099853721], [331.730834944, 252.27490232109994, 369.2390747136, 287.80908200529996], [193.049133312, 243.3914794863, 223.6479492096, 279.91259765379993]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047584.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object.", "boxes_value": [[420.42419430300004, 193.5737304576, 624.5916747966, 433.4476318208]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047584_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object.", "boxes_value": [[51.424194303000036, 60.57373045759999, 255.59167479660005, 300.4476318208]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047584.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a chair, two people, a glasses, a bottle, a cup, and a canned.", "boxes_value": [[420.42419430300004, 193.5737304576, 624.5916747966, 433.4476318208], [583.991332995, 351.3239135744, 619.7645264033999, 402.879272448], [288.5852050818, 73.636779776, 536.6989746297, 423.3765869056], [420.42419430300004, 193.5737304576, 624.5916747966, 433.4476318208], [460.7738037396, 219.9938964992, 501.05798337869993, 236.735351552], [483.58618164809997, 344.6798706176, 511.1065673718, 424.1057739264], [417.0495605094, 375.335510272, 447.0084228477, 435.601623552], [588.6870117384, 395.9516601344, 614.2476806822999, 445.27923584]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00047584_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a chair, two people, a glasses, a bottle, a cup, and a canned.", "boxes_value": [[51.424194303000036, 60.57373045759999, 255.59167479660005, 300.4476318208], [214.991332995, 218.32391357440002, 250.76452640339994, 269.879272448], [0, 0, 167.69897462970005, 290.3765869056], [51.424194303000036, 60.57373045759999, 255.59167479660005, 300.4476318208], [91.7738037396, 86.99389649919999, 132.05798337869993, 103.735351552], [114.58618164809997, 211.6798706176, 142.1065673718, 291.1057739264], [48.04956050940001, 242.33551027200002, 78.00842284769999, 302.601623552], [219.68701173839997, 262.9516601344, 245.24768068229992, 312.27923584]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00047585.jpg", "text": "Can you share some insights about the rectangular region in the image ? Provide the coordinates for each element you describe.", "boxes_value": [[69.3952636416, 184.2993774592, 218.95533980160002, 510.9237060608]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047585_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Provide the coordinates for each element you describe.", "boxes_value": [[37.395263641599996, 82.2993774592, 186.95533980160002, 408.9237060608]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047585.jpg", "text": "Can you share some insights about the rectangular region in the image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include three people, a handbag, and a hat.", "boxes_value": [[69.3952636416, 184.2993774592, 218.95533980160002, 510.9237060608], [96.8316039936, 184.2993774592, 217.4918212608, 510.9237060608], [69.3952636416, 193.8284912128, 128.35675046400002, 281.6521606656], [21.4133911296, 190.2814941184, 106.2914428416, 394.6943359488], [163.2815533824, 300.2291261952, 218.95533980160002, 358.8854368768], [89.1648503808, 194.9379185664, 127.87679808000001, 217.5807559168]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047585_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include three people, a handbag, and a hat.", "boxes_value": [[37.395263641599996, 82.2993774592, 186.95533980160002, 408.9237060608], [64.8316039936, 82.2993774592, 185.4918212608, 408.9237060608], [37.395263641599996, 91.8284912128, 96.35675046400002, 179.65216066559998], [0, 88.28149411839999, 74.2914428416, 292.6943359488], [131.2815533824, 198.22912619520002, 186.95533980160002, 256.8854368768], [57.164850380800004, 92.93791856639999, 95.87679808000001, 115.5807559168]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047587.jpg", "text": "Please help me understand the content present within the rectangle in . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[315.7768554496, 161.897399928, 498.4904174592, 526.5891113472001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047587_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[45.77685544960002, 91.897399928, 228.4904174592, 456.58911134720006]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047587.jpg", "text": "Please help me understand the content present within the rectangle in . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, a bracelet, a hat, two gloves, a handbag, a street lights, and two motorcycles.", "boxes_value": [[315.7768554496, 161.897399928, 498.4904174592, 526.5891113472001], [315.7768554496, 161.897399928, 498.4904174592, 526.5891113472001], [341.4038086144, 422.41870117919996, 370.0930176, 443.75170899840003], [358.7681274368, 162.00549314399998, 435.6483154432, 225.3186035232], [346.215515136, 430.94921876160004, 382.2889404416, 482.6860962], [411.7172851712, 326.5261230528, 456.8090820096, 358.8023071248], [441.6202392576, 404.36877439200003, 508.5459594752, 477.93957520320004], [483.7485961728, 155.7945556464, 511.6052246016, 225.7324829232], [337.186340352, 239.3312988528, 370.7729491968, 264.20080568640003], [290.524047872, 233.6908569072, 336.417175296, 270.0976562592]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6], [7], [8, 9]]}, {"image_path": "objects365_v1_00047587_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, a bracelet, a hat, two gloves, a handbag, a street lights, and two motorcycles.", "boxes_value": [[45.77685544960002, 91.897399928, 228.4904174592, 456.58911134720006], [45.77685544960002, 91.897399928, 228.4904174592, 456.58911134720006], [71.40380861440002, 352.41870117919996, 100.0930176, 373.75170899840003], [88.7681274368, 92.00549314399998, 165.64831544319998, 155.3186035232], [76.21551513600002, 360.94921876160004, 112.2889404416, 412.6860962], [141.7172851712, 256.5261230528, 186.8090820096, 288.8023071248], [171.62023925760002, 334.36877439200003, 238.5459594752, 407.93957520320004], [213.74859617279998, 85.7945556464, 241.6052246016, 155.7324829232], [67.186340352, 169.3312988528, 100.77294919680003, 194.20080568640003], [20.524047871999983, 163.6908569072, 66.41717529599998, 200.0976562592]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6], [7], [8, 9]]}, {"image_path": "objects365_v1_00047589.jpg", "text": "What can I find in the bbox of the provided image ? Please point out the objects and their coordinates.", "boxes_value": [[233.3358028288, 577.5075683328, 363.5106201088, 663.294591673]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047589_crop.jpg", "text": "What can I find in the bbox of the provided image ? Please point out the objects and their coordinates.", "boxes_value": [[33.33580282880001, 21.50756833280002, 163.51062010880003, 107.29459167300001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047589.jpg", "text": "What can I find in the bbox of the provided image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, a sneakers, two cups, and a speaker.", "boxes_value": [[233.3358028288, 577.5075683328, 363.5106201088, 663.294591673], [16.3717651456, 88.21502685920001, 319.568664576, 665.7772217089998], [233.3358028288, 598.7022561412, 318.1985221632, 663.294591673], [337.4699096576, 577.5075683328, 363.5106201088, 604.9847412068], [316.8186645504, 590.9042968762, 355.77441408, 620.9425048836], [185.5343627776, 443.1806640546, 388.9705200128, 618.1643066294]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047589_crop.jpg", "text": "What can I find in the bbox of the provided image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, a sneakers, two cups, and a speaker.", "boxes_value": [[33.33580282880001, 21.50756833280002, 163.51062010880003, 107.29459167300001], [0, 0, 119.568664576, 109.77722170899983], [33.33580282880001, 42.702256141199996, 118.19852216319998, 107.29459167300001], [137.4699096576, 21.50756833280002, 163.51062010880003, 48.98474120679998], [116.8186645504, 34.904296876199965, 155.77441407999999, 64.94250488360001], [0, 0, 188.97052001280002, 62.164306629400016]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047590.jpg", "text": "Could you give me a description of the rectangular region found in ? Give coordinates for the items you reference.", "boxes_value": [[456.16308594619994, 251.4804077056, 606.0708007615, 452.8319702016]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047590_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Give coordinates for the items you reference.", "boxes_value": [[38.16308594619994, 50.48040770559999, 188.07080076149998, 251.83197020159997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047590.jpg", "text": "Could you give me a description of the rectangular region found in ? Give coordinates for the items you reference. For your reference, objects involved in this region include two plates, six chairs, two napkins, and a desk.", "boxes_value": [[456.16308594619994, 251.4804077056, 606.0708007615, 452.8319702016], [435.9683837965, 344.5742797824, 511.47949219590004, 366.7426757632], [549.1534423752, 280.089233408, 599.5385742136, 290.9102172672], [457.4031982548, 410.6687622144, 600.634155253, 511.1164550656], [550.4102783486, 375.9461059584, 603.7343750326, 452.8319702016], [456.16308594619994, 312.0812378112, 528.7086181808, 354.2444457984], [389.8179931342, 370.98571776, 502.66662599309996, 441.0510864384], [446.86230469189996, 350.5241699328, 543.5897217114999, 415.0090942464], [316.14892576299997, 238.0092773376, 682.2119140298, 447.1044921856], [559.2147216587999, 262.0230102528, 606.0708007615, 286.036743168], [500.0589599278, 251.4804077056, 535.2010498019999, 268.4657592832], [460.8170165965, 244.452026368, 491.27343749339997, 264.365844736]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 9, 10, 11], [6, 7], [8]]}, {"image_path": "objects365_v1_00047590_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Give coordinates for the items you reference. For your reference, objects involved in this region include two plates, six chairs, two napkins, and a desk.", "boxes_value": [[38.16308594619994, 50.48040770559999, 188.07080076149998, 251.83197020159997], [17.968383796500007, 143.57427978240003, 93.47949219590004, 165.7426757632], [131.15344237520003, 79.08923340799998, 181.5385742136, 89.91021726719998], [39.403198254799975, 209.66876221439998, 182.63415525300002, 302], [132.41027834859995, 174.94610595839998, 185.73437503260004, 251.83197020159997], [38.16308594619994, 111.08123781120003, 110.70861818080004, 153.24444579840002], [0, 169.98571776, 84.66662599309996, 240.05108643839998], [28.862304691899965, 149.52416993280002, 125.58972171149992, 214.00909424640002], [0, 37.0092773376, 225, 246.1044921856], [141.21472165879993, 61.02301025280002, 188.07080076149998, 85.03674316799999], [82.0589599278, 50.48040770559999, 117.20104980199994, 67.46575928319999], [42.817016596500025, 43.45202636799999, 73.27343749339997, 63.365844735999985]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 9, 10, 11], [6, 7], [8]]}, {"image_path": "objects365_v1_00047595.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[71.8347778384, 122.2910156288, 322.974975592, 510.5828857344]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047595_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[62.8347778384, 97.2910156288, 313.974975592, 485.5828857344]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047595.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a chair, a flower, a vase, a lamp, and a plate.", "boxes_value": [[71.8347778384, 122.2910156288, 322.974975592, 510.5828857344], [118.3846435264, 352.632995584, 244.74450682239998, 510.5828857344], [230.383789048, 278.8418578944, 382.1569824464, 373.8194579968], [276.67938232, 355.2058105344, 322.974975592, 437.7742309376], [71.8347778384, 122.2910156288, 146.60925294720002, 486.5990600704], [235.79211425440002, 441.8175659008, 336.2460937312, 480.4499511808]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047595_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a chair, a flower, a vase, a lamp, and a plate.", "boxes_value": [[62.8347778384, 97.2910156288, 313.974975592, 485.5828857344], [109.3846435264, 327.632995584, 235.74450682239998, 485.5828857344], [221.383789048, 253.8418578944, 373.1569824464, 348.8194579968], [267.67938232, 330.2058105344, 313.974975592, 412.7742309376], [62.8347778384, 97.2910156288, 137.60925294720002, 461.5990600704], [226.79211425440002, 416.8175659008, 327.2460937312, 455.4499511808]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047596.jpg", "text": "Please provide details for the area marked as in this photographic . Provide the coordinates for all objects that you mention.", "boxes_value": [[68.54846190559999, 44.8816528384, 413.7193603306, 512.0183105536]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047596_crop.jpg", "text": "Please provide details for the area marked as in this photographic . Provide the coordinates for all objects that you mention.", "boxes_value": [[68.54846190559999, 44.8816528384, 413.7193603306, 512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047596.jpg", "text": "Please provide details for the area marked as in this photographic . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three people, a blackboard, a bottle, and a coffee machine.", "boxes_value": [[68.54846190559999, 44.8816528384, 413.7193603306, 512.0183105536], [68.54846190559999, 44.8816528384, 413.7193603306, 512.0183105536], [76.0569458383, 41.2338256896, 313.13244630649996, 230.0294799872], [59.4655151458, 99.8398437376, 135.2098388758, 229.3058471424], [280.5889892734, 38.3225097728, 349.4267578225, 118.88818360320002], [141.15924075040002, 345.338134784, 191.6063232379, 450.7498779136], [310.7375488375, 123.6754760704, 432.5703125017, 244.3825683456]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047596_crop.jpg", "text": "Please provide details for the area marked as in this photographic . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three people, a blackboard, a bottle, and a coffee machine.", "boxes_value": [[68.54846190559999, 44.8816528384, 413.7193603306, 512], [68.54846190559999, 44.8816528384, 413.7193603306, 512], [76.0569458383, 41.2338256896, 313.13244630649996, 230.0294799872], [59.4655151458, 99.8398437376, 135.2098388758, 229.3058471424], [280.5889892734, 38.3225097728, 349.4267578225, 118.88818360320002], [141.15924075040002, 345.338134784, 191.6063232379, 450.7498779136], [310.7375488375, 123.6754760704, 432.5703125017, 244.3825683456]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047598.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[294.0283813376, 590.5498046976, 428.1470337024, 697.7579345664]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047598_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[34.028381337600024, 27.549804697600052, 168.1470337024, 134.75793456639997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047598.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, two leather shoes, and a sneakers.", "boxes_value": [[294.0283813376, 590.5498046976, 428.1470337024, 697.7579345664], [259.7885131776, 246.3812256, 412.0263671808, 701.94152832], [264.4017944576, 49.164001459199994, 512.6840820224, 693.8682861312], [294.0283813376, 590.5498046976, 333.2384643584, 637.4262695424], [335.2622680576, 637.4262695424, 366.9472656384, 697.7579345664], [360.870666496, 631.3496093952, 428.1470337024, 695.1536865024]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047598_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, two leather shoes, and a sneakers.", "boxes_value": [[34.028381337600024, 27.549804697600052, 168.1470337024, 134.75793456639997], [0, 0, 152.02636718079998, 138.94152831999997], [4.401794457599976, 0, 201, 130.86828613119997], [34.028381337600024, 27.549804697600052, 73.2384643584, 74.42626954239995], [75.26226805760001, 74.42626954239995, 106.94726563839998, 134.75793456639997], [100.87066649600001, 68.34960939519999, 168.1470337024, 132.15368650239998]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047600.jpg", "text": "I am interested in the region of the image ; please describe it. Remember to mention the objects and their corresponding locations.", "boxes_value": [[59.30578611199999, 180.1555176, 432.778686528, 481.19830319999994]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047600_crop.jpg", "text": "I am interested in the region of the image ; please describe it. Remember to mention the objects and their corresponding locations.", "boxes_value": [[59.30578611199999, 76.1555176, 432.778686528, 376]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047600.jpg", "text": "I am interested in the region of the image ; please describe it. Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a bed, a nightstand, a lamp, a chair, a flower, and a vase.", "boxes_value": [[59.30578611199999, 180.1555176, 432.778686528, 481.19830319999994], [59.30578611199999, 180.1555176, 432.778686528, 481.19830319999994], [263.030029312, 259.46881104, 312.70697024000003, 284.92309569599996], [253.587280256, 195.422485344, 297.10595705599997, 270.964294416], [315.815063488, 225.67065431999998, 378.85589600000003, 289.656372048], [276.473632832, 205.012695312, 313.32366944, 264.466552752], [280.652526848, 251.740051248, 310.284484864, 269.785156272]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047600_crop.jpg", "text": "I am interested in the region of the image ; please describe it. Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a bed, a nightstand, a lamp, a chair, a flower, and a vase.", "boxes_value": [[59.30578611199999, 76.1555176, 432.778686528, 376], [59.30578611199999, 76.1555176, 432.778686528, 376], [263.030029312, 155.46881104, 312.70697024000003, 180.92309569599996], [253.587280256, 91.422485344, 297.10595705599997, 166.96429441599997], [315.815063488, 121.67065431999998, 378.85589600000003, 185.65637204799998], [276.473632832, 101.012695312, 313.32366944, 160.46655275199998], [280.652526848, 147.740051248, 310.284484864, 165.785156272]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047601.jpg", "text": "Describe what's happening within the coordinates of the given image . Give coordinates for the items you reference.", "boxes_value": [[122.6995849728, 257.0272826823, 321.2512817152, 435.3073730498]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047601_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Give coordinates for the items you reference.", "boxes_value": [[49.6995849728, 45.02728268229998, 248.25128171519998, 223.3073730498]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047601.jpg", "text": "Describe what's happening within the coordinates of the given image . Give coordinates for the items you reference. For your reference, objects involved in this region include three people, a handbag, a boots, and a hat.", "boxes_value": [[122.6995849728, 257.0272826823, 321.2512817152, 435.3073730498], [211.706176768, 257.798156711, 373.1039428608, 657.0152587847], [154.5769042944, 322.83459474939997, 194.2731933696, 435.3073730498], [122.6995849728, 311.4068603829, 160.8922119168, 426.8869628816], [126.0779419136, 352.0577392798, 141.7158813696, 378.8226318494], [154.947937024, 404.98620602820006, 171.7888183808, 434.15686036939996], [257.4967041024, 257.0272826823, 321.2512817152, 293.7162475534]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047601_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Give coordinates for the items you reference. For your reference, objects involved in this region include three people, a handbag, a boots, and a hat.", "boxes_value": [[49.6995849728, 45.02728268229998, 248.25128171519998, 223.3073730498], [138.706176768, 45.79815671099999, 297, 267], [81.57690429440001, 110.83459474939997, 121.2731933696, 223.3073730498], [49.6995849728, 99.40686038289999, 87.8922119168, 214.88696288160003], [53.0779419136, 140.0577392798, 68.7158813696, 166.8226318494], [81.947937024, 192.98620602820006, 98.7888183808, 222.15686036939996], [184.49670410239997, 45.02728268229998, 248.25128171519998, 81.71624755340002]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047602.jpg", "text": "Can you share some insights about the rectangular region in the image ? Include the coordinates for each mentioned object.", "boxes_value": [[152.0914306665, 209.6594848768, 503.40454100700003, 316.91760256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047602_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Include the coordinates for each mentioned object.", "boxes_value": [[88.0914306665, 27.659484876800008, 439.40454100700003, 134.91760255999998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047602.jpg", "text": "Can you share some insights about the rectangular region in the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three pictures, and three people.", "boxes_value": [[152.0914306665, 209.6594848768, 503.40454100700003, 316.91760256], [152.0914306665, 218.1865234432, 208.3697509875, 265.9378051584], [279.1439819085, 209.6594848768, 335.4223022295, 254.0], [479.528808594, 221.5973510656, 503.40454100700003, 283.8445434368], [289.660644537, 128.6528320512, 478.0129394625, 473.5233154048], [180.230590827, 169.7720336896, 343.380859383, 474.1865234432], [259.598510724, 238.518798848, 299.6929321605, 316.91760256]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00047602_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three pictures, and three people.", "boxes_value": [[88.0914306665, 27.659484876800008, 439.40454100700003, 134.91760255999998], [88.0914306665, 36.1865234432, 144.3697509875, 83.93780515840001], [215.1439819085, 27.659484876800008, 271.4223022295, 72.0], [415.528808594, 39.59735106560001, 439.40454100700003, 101.8445434368], [225.660644537, 0, 414.0129394625, 161], [116.23059082699999, 0, 279.380859383, 161], [195.598510724, 56.51879884799999, 235.6929321605, 134.91760255999998]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00047604.jpg", "text": "What insights can you provide about the area in the selected picture ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[39.7655029234, 140.8080444416, 482.133300776, 243.2941894656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047604_crop.jpg", "text": "What insights can you provide about the area in the selected picture ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[39.7655029234, 25.808044441600003, 482.133300776, 128.2941894656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047604.jpg", "text": "What insights can you provide about the area in the selected picture ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a paddle, two helmets, a hat, and a boat.", "boxes_value": [[39.7655029234, 140.8080444416, 482.133300776, 243.2941894656], [71.0579223771, 215.6365966848, 85.7864990308, 238.6499633664], [426.68066402930003, 140.8080444416, 479.84667968030004, 173.3936157184], [434.684204123, 167.1051635712, 482.133300776, 184.2554321408], [238.59906004139998, 166.5334472704, 285.4765625319, 204.835815424], [39.7655029234, 210.3959961088, 80.37420657029999, 243.2941894656]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5]]}, {"image_path": "objects365_v1_00047604_crop.jpg", "text": "What insights can you provide about the area in the selected picture ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a paddle, two helmets, a hat, and a boat.", "boxes_value": [[39.7655029234, 25.808044441600003, 482.133300776, 128.2941894656], [71.0579223771, 100.6365966848, 85.7864990308, 123.64996336639999], [426.68066402930003, 25.808044441600003, 479.84667968030004, 58.393615718400014], [434.684204123, 52.105163571199995, 482.133300776, 69.2554321408], [238.59906004139998, 51.53344727039999, 285.4765625319, 89.835815424], [39.7655029234, 95.3959961088, 80.37420657029999, 128.2941894656]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5]]}, {"image_path": "objects365_v1_00047605.jpg", "text": "In , what elements can be found within the coordinates ? Give coordinates for the items you reference.", "boxes_value": [[202.02020259840003, 297.2299804672, 283.95239255039996, 507.6231078912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047605_crop.jpg", "text": "In , what elements can be found within the coordinates ? Give coordinates for the items you reference.", "boxes_value": [[21.020202598400033, 53.229980467199994, 102.95239255039996, 263.6231078912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047605.jpg", "text": "In , what elements can be found within the coordinates ? Give coordinates for the items you reference. For your reference, objects involved in this region include a desk, two people, a bracelet, a necklace, and a bottle.", "boxes_value": [[202.02020259840003, 297.2299804672, 283.95239255039996, 507.6231078912], [24.052856448, 375.5880126976, 767.7667236096001, 510.9534912], [188.528869632, 216.4583740416, 346.4783935488, 439.5687256064], [33.0025024512, 197.0781250048, 285.6087646464, 475.289733888], [224.906616192, 396.2915039232, 241.29772945919999, 428.8737793024], [246.63183590399998, 297.2299804672, 283.95239255039996, 339.9879150592], [202.02020259840003, 444.3073120256, 234.961486848, 507.6231078912]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047605_crop.jpg", "text": "In , what elements can be found within the coordinates ? Give coordinates for the items you reference. For your reference, objects involved in this region include a desk, two people, a bracelet, a necklace, and a bottle.", "boxes_value": [[21.020202598400033, 53.229980467199994, 102.95239255039996, 263.6231078912], [0, 131.5880126976, 123, 266.9534912], [7.52886963200001, 0, 123, 195.5687256064], [0, 0, 104.6087646464, 231.289733888], [43.906616192, 152.29150392320003, 60.297729459199985, 184.8737793024], [65.63183590399998, 53.229980467199994, 102.95239255039996, 95.98791505920002], [21.020202598400033, 200.30731202560003, 53.96148684799999, 263.6231078912]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047606.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[426.4677734074, 9.3043823104, 682.2659912378, 136.3934936576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047606_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[64.46777340739999, 9.3043823104, 320.2659912378, 136.3934936576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047606.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two pictures, two cups, and a cake.", "boxes_value": [[426.4677734074, 9.3043823104, 682.2659912378, 136.3934936576], [566.8994140622, 51.0396118016, 637.5545653972, 92.9911499264], [642.5224609708, 24.543884288, 682.2659912378, 62.079467776], [584.6326904184, 12.3207397376, 612.6457519646, 54.9229126144], [650.1352538959, 9.3043823104, 674.7171631068001, 26.2385253888], [426.4677734074, 84.9889526272, 514.8073730188, 136.3934936576]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047606_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two pictures, two cups, and a cake.", "boxes_value": [[64.46777340739999, 9.3043823104, 320.2659912378, 136.3934936576], [204.89941406219998, 51.0396118016, 275.5545653972, 92.9911499264], [280.5224609708, 24.543884288, 320.2659912378, 62.079467776], [222.6326904184, 12.3207397376, 250.64575196459998, 54.9229126144], [288.13525389589995, 9.3043823104, 312.71716310680006, 26.2385253888], [64.46777340739999, 84.9889526272, 152.80737301880004, 136.3934936576]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047608.jpg", "text": "Please provide details for the area within the bounding box in . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0.9796142496000001, 91.4003906048, 291.5894775744, 216.0754394624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047608_crop.jpg", "text": "Please provide details for the area within the bounding box in . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0.9796142496000001, 31.400390604799995, 291.5894775744, 156.0754394624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047608.jpg", "text": "Please provide details for the area within the bounding box in . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include five glasses.", "boxes_value": [[0.9796142496000001, 91.4003906048, 291.5894775744, 216.0754394624], [87.95599365119999, 185.7934570496, 176.6389160016, 216.0754394624], [209.304443376, 144.2595214848, 257.7073974816, 160.3937988096], [249.1024170312, 111.4530639872, 291.5894775744, 127.587341312], [119.49005122800001, 114.1420898304, 168.9686279232, 129.2008056832], [0.9796142496000001, 91.4003906048, 36.4750976352, 102.4639282176]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047608_crop.jpg", "text": "Please provide details for the area within the bounding box in . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include five glasses.", "boxes_value": [[0.9796142496000001, 31.400390604799995, 291.5894775744, 156.0754394624], [87.95599365119999, 125.7934570496, 176.6389160016, 156.0754394624], [209.304443376, 84.25952148479999, 257.7073974816, 100.39379880960001], [249.1024170312, 51.4530639872, 291.5894775744, 67.587341312], [119.49005122800001, 54.142089830399996, 168.9686279232, 69.2008056832], [0.9796142496000001, 31.400390604799995, 36.4750976352, 42.4639282176]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047609.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Please point out the objects and their coordinates.", "boxes_value": [[389.13519288, 103.249206528, 479.83862304, 405.001098624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047609_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Please point out the objects and their coordinates.", "boxes_value": [[23.135192879999977, 76.249206528, 113.83862304000002, 378.001098624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047609.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Please point out the objects and their coordinates. For your reference, objects involved in this region include three handbags, a hat, and a sneakers.", "boxes_value": [[389.13519288, 103.249206528, 479.83862304, 405.001098624], [389.13519288, 217.73156736, 409.40716550400003, 264.250305152], [398.097534192, 136.430358912, 448.24389648000005, 226.05371091199999], [447.16186521599997, 103.249206528, 472.01525879999997, 123.204467776], [404.197509744, 318.42773439999996, 426.04998777599997, 336.783813504], [415.437133776, 317.82348633600003, 479.83862304, 405.001098624]], "boxes_seq": [[0], [0], [1, 2, 5], [3], [4]]}, {"image_path": "objects365_v1_00047609_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Please point out the objects and their coordinates. For your reference, objects involved in this region include three handbags, a hat, and a sneakers.", "boxes_value": [[23.135192879999977, 76.249206528, 113.83862304000002, 378.001098624], [23.135192879999977, 190.73156736, 43.407165504000034, 237.250305152], [32.09753419200001, 109.430358912, 82.24389648000005, 199.05371091199999], [81.16186521599997, 76.249206528, 106.01525879999997, 96.204467776], [38.197509744, 291.42773439999996, 60.049987775999966, 309.783813504], [49.437133775999996, 290.82348633600003, 113.83862304000002, 378.001098624]], "boxes_seq": [[0], [0], [1, 2, 5], [3], [4]]}, {"image_path": "objects365_v1_00047610.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[72.1507568686, 206.2346801664, 298.88610841260004, 411.724548352]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047610_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[57.1507568686, 52.23468016640001, 283.88610841260004, 257.724548352]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047610.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three sneakers, a leather shoes, a handbag, and a backpack.", "boxes_value": [[72.1507568686, 206.2346801664, 298.88610841260004, 411.724548352], [180.0276489322, 379.0976562688, 209.615966789, 395.1972045824], [200.07562253339998, 356.2905273344, 220.3526000925, 373.2793579008], [280.0874023252, 390.2681274368, 302.5564575246, 403.6947631616], [271.93035886940004, 276.437744128, 298.88610841260004, 312.3787842048], [141.8704834244, 399.6677856256, 169.6535644593, 411.724548352], [72.1507568686, 206.2346801664, 129.8137206741, 298.4954223616]], "boxes_seq": [[0], [0], [1, 2, 5], [3], [4], [6]]}, {"image_path": "objects365_v1_00047610_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three sneakers, a leather shoes, a handbag, and a backpack.", "boxes_value": [[57.1507568686, 52.23468016640001, 283.88610841260004, 257.724548352], [165.0276489322, 225.0976562688, 194.615966789, 241.1972045824], [185.07562253339998, 202.2905273344, 205.3526000925, 219.2793579008], [265.0874023252, 236.2681274368, 287.5564575246, 249.6947631616], [256.93035886940004, 122.43774412800002, 283.88610841260004, 158.3787842048], [126.87048342439999, 245.6677856256, 154.6535644593, 257.724548352], [57.1507568686, 52.23468016640001, 114.81372067410001, 144.4954223616]], "boxes_seq": [[0], [0], [1, 2, 5], [3], [4], [6]]}, {"image_path": "objects365_v1_00047611.jpg", "text": "What can you tell me about the selected region in the photo ? Please mention the objects and their locations.", "boxes_value": [[542.8206787080001, 242.8952026112, 624.885498083, 459.5481567232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047611_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Please mention the objects and their locations.", "boxes_value": [[20.82067870800006, 54.895202611200006, 102.88549808300002, 271.5481567232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047611.jpg", "text": "What can you tell me about the selected region in the photo ? Please mention the objects and their locations. For your reference, objects involved in this region include a handbag, a sneakers, two boots, and a street lights.", "boxes_value": [[542.8206787080001, 242.8952026112, 624.885498083, 459.5481567232], [542.8206787080001, 330.1874389504, 562.057495153, 367.2618408448], [558.539917012, 437.3023071232, 575.279418945, 459.5481567232], [594.333862276, 371.3478393344, 605.790771509, 400.9674072064], [609.609741202, 368.9609985536, 624.885498083, 401.1984252928], [566.8156738089999, 242.8952026112, 574.6491699410001, 281.6505737216]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047611_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Please mention the objects and their locations. For your reference, objects involved in this region include a handbag, a sneakers, two boots, and a street lights.", "boxes_value": [[20.82067870800006, 54.895202611200006, 102.88549808300002, 271.5481567232], [20.82067870800006, 142.1874389504, 40.05749515299999, 179.26184084480002], [36.539917012000046, 249.30230712320002, 53.279418944999975, 271.5481567232], [72.33386227599999, 183.34783933440002, 83.79077150900002, 212.96740720640003], [87.60974120200001, 180.9609985536, 102.88549808300002, 213.19842529279998], [44.81567380899992, 54.895202611200006, 52.64916994100008, 93.65057372159998]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047614.jpg", "text": "Fill me in on the details of the rectangular box within the image . Include the coordinates for each mentioned object.", "boxes_value": [[0.9804077056, 198.366726456, 149.6032714752, 569.275268541]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047614_crop.jpg", "text": "Fill me in on the details of the rectangular box within the image . Include the coordinates for each mentioned object.", "boxes_value": [[0.9804077056, 93.36672645600001, 149.6032714752, 464.27526854099995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047614.jpg", "text": "Fill me in on the details of the rectangular box within the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a chair, two stools, a person, a sneakers, and two cars.", "boxes_value": [[0.9804077056, 198.366726456, 149.6032714752, 569.275268541], [24.3832397312, 344.69128418400004, 194.2608032256, 527.045776353], [0.9804077056, 402.276916491, 33.0210571264, 462.741821262], [0.9804077056, 474.258911118, 57.0150756864, 569.275268541], [42.3068237312, 282.894531264, 211.2816162304, 539.837524434], [119.0503540224, 508.404785172, 149.6032714752, 540.809326155], [56.808676352, 198.366726456, 126.58584191999998, 280.260884853], [2.4911223296, 212.57285600699998, 63.9117411328, 284.02133091900004]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00047614_crop.jpg", "text": "Fill me in on the details of the rectangular box within the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a chair, two stools, a person, a sneakers, and two cars.", "boxes_value": [[0.9804077056, 93.36672645600001, 149.6032714752, 464.27526854099995], [24.3832397312, 239.69128418400004, 186, 422.04577635299995], [0.9804077056, 297.276916491, 33.0210571264, 357.741821262], [0.9804077056, 369.258911118, 57.0150756864, 464.27526854099995], [42.3068237312, 177.89453126400002, 186, 434.837524434], [119.0503540224, 403.404785172, 149.6032714752, 435.809326155], [56.808676352, 93.36672645600001, 126.58584191999998, 175.260884853], [2.4911223296, 107.57285600699998, 63.9117411328, 179.02133091900004]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00047615.jpg", "text": "For the image , can you assess and describe what's happening at ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[344.73315433240003, 50.3496704, 736.207641638, 291.3259277312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047615_crop.jpg", "text": "For the image , can you assess and describe what's happening at ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[98.73315433240003, 50.3496704, 490.20764163800004, 291.3259277312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047615.jpg", "text": "For the image , can you assess and describe what's happening at ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a bracelet, and four people.", "boxes_value": [[344.73315433240003, 50.3496704, 736.207641638, 291.3259277312], [407.4406738592, 255.4930419712, 427.1488037024, 291.3259277312], [474.62744137439995, 69.1619262464, 531.96008298, 232.20166016000002], [344.73315433240003, 50.3496704, 400.2741699128, 176.6606445568], [686.9373779408, 54.8287963648, 736.207641638, 193.6812744192], [659.166870112, 139.9319457792, 755.019897442, 423.90771486719996]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047615_crop.jpg", "text": "For the image , can you assess and describe what's happening at ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a bracelet, and four people.", "boxes_value": [[98.73315433240003, 50.3496704, 490.20764163800004, 291.3259277312], [161.4406738592, 255.4930419712, 181.14880370240002, 291.3259277312], [228.62744137439995, 69.1619262464, 285.96008298000004, 232.20166016000002], [98.73315433240003, 50.3496704, 154.2741699128, 176.6606445568], [440.93737794080005, 54.8287963648, 490.20764163800004, 193.6812744192], [413.16687011199997, 139.9319457792, 509.019897442, 351]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047616.jpg", "text": "Can you analyze the content of the area within the photograph ? Include the coordinates for each object you identify.", "boxes_value": [[488.1348877056, 28.3693237248, 689.1182861568, 144.805053696]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047616_crop.jpg", "text": "Can you analyze the content of the area within the photograph ? Include the coordinates for each object you identify.", "boxes_value": [[51.13488770560002, 28.3693237248, 252.11828615679997, 144.805053696]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047616.jpg", "text": "Can you analyze the content of the area within the photograph ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a cabinet, two storage boxes, a person, and a moniter.", "boxes_value": [[488.1348877056, 28.3693237248, 689.1182861568, 144.805053696], [497.64794918399997, 55.5502319104, 562.8071288832, 131.533691392], [511.71386718720004, 40.5256957952, 555.1026611712, 61.6090087936], [488.1348877056, 44.8670654464, 513.032592768, 61.6247558656], [616.2579345408001, 28.3693237248, 689.1182861568, 133.3551025152], [579.2004394752, 92.4254150144, 633.8249511936, 144.805053696]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047616_crop.jpg", "text": "Can you analyze the content of the area within the photograph ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a cabinet, two storage boxes, a person, and a moniter.", "boxes_value": [[51.13488770560002, 28.3693237248, 252.11828615679997, 144.805053696], [60.64794918399997, 55.5502319104, 125.8071288832, 131.533691392], [74.71386718720004, 40.5256957952, 118.1026611712, 61.6090087936], [51.13488770560002, 44.8670654464, 76.03259276799997, 61.6247558656], [179.25793454080008, 28.3693237248, 252.11828615679997, 133.3551025152], [142.20043947520003, 92.4254150144, 196.8249511936, 144.805053696]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047618.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Include the coordinates for each mentioned object.", "boxes_value": [[158.9352417028, 197.6975097856, 416.44311139300004, 425.9379883008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047618_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Include the coordinates for each mentioned object.", "boxes_value": [[64.9352417028, 57.697509785600005, 322.44311139300004, 285.9379883008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047618.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, a necklace, a glasses, a bicycle, and a bus.", "boxes_value": [[158.9352417028, 197.6975097856, 416.44311139300004, 425.9379883008], [181.0715332235, 177.900695808, 483.6588134453, 512.4382324224], [303.7218628151, 363.379272448, 386.9080810596, 425.9379883008], [301.9490154924, 276.1116279808, 416.44311139300004, 304.4076516864], [158.9352417028, 205.7286987264, 179.2557372899, 228.028442368], [276.5927124123, 197.6975097856, 304.560180635, 209.333618176]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047618_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, a necklace, a glasses, a bicycle, and a bus.", "boxes_value": [[64.9352417028, 57.697509785600005, 322.44311139300004, 285.9379883008], [87.07153322350001, 37.900695807999995, 386, 342], [209.7218628151, 223.379272448, 292.9080810596, 285.9379883008], [207.9490154924, 136.1116279808, 322.44311139300004, 164.40765168640002], [64.9352417028, 65.72869872640001, 85.2557372899, 88.02844236799999], [182.5927124123, 57.697509785600005, 210.560180635, 69.33361817599999]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047624.jpg", "text": "What details can you provide about the region in the snapshot ? Include the coordinates for each object you identify.", "boxes_value": [[0.1212768768, 292.2386474496, 93.8819579904, 370.8015136768]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047624_crop.jpg", "text": "What details can you provide about the region in the snapshot ? Include the coordinates for each object you identify.", "boxes_value": [[0.1212768768, 20.238647449600023, 93.8819579904, 98.80151367680003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047624.jpg", "text": "What details can you provide about the region in the snapshot ? Include the coordinates for each object you identify. For your reference, objects involved in this region include four chairs, and a desk.", "boxes_value": [[0.1212768768, 292.2386474496, 93.8819579904, 370.8015136768], [24.1681518592, 302.1907958784, 93.8819579904, 370.8015136768], [0.1212768768, 301.3083495936, 38.7285766656, 370.3602905088], [72.4824218624, 294.6654052864, 115.722656256, 354.6722412032], [0.3418578944, 297.7540283392, 122.782287616, 365.2617187328], [31.8895874048, 292.2386474496, 60.3486938624, 360.8494262784]], "boxes_seq": [[0], [0], [1, 2, 3, 5], [4]]}, {"image_path": "objects365_v1_00047624_crop.jpg", "text": "What details can you provide about the region in the snapshot ? Include the coordinates for each object you identify. For your reference, objects involved in this region include four chairs, and a desk.", "boxes_value": [[0.1212768768, 20.238647449600023, 93.8819579904, 98.80151367680003], [24.1681518592, 30.190795878400024, 93.8819579904, 98.80151367680003], [0.1212768768, 29.308349593599985, 38.7285766656, 98.3602905088], [72.4824218624, 22.665405286400016, 115.722656256, 82.67224120319997], [0.3418578944, 25.754028339199976, 117, 93.26171873279998], [31.8895874048, 20.238647449600023, 60.3486938624, 88.84942627840002]], "boxes_seq": [[0], [0], [1, 2, 3, 5], [4]]}, {"image_path": "objects365_v1_00047626.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[618.7485351593, 453.8190918144, 682.2623291284, 497.2950439424]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047626_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[16.74853515929999, 11.819091814399997, 80.26232912839998, 55.295043942400014]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047626.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three cars, a van, and a suv.", "boxes_value": [[618.7485351593, 453.8190918144, 682.2623291284, 497.2950439424], [666.9250488049, 463.63836672, 682.2623291284, 497.2950439424], [655.5042724325999, 463.4774780416, 673.4796142338, 491.1113281024], [647.8580322462, 461.5994872832, 665.5650635078, 486.2821044736], [632.0289306576, 453.8190918144, 669.7236328357, 482.123657216], [618.7485351593, 454.6239624192, 633.7728271812, 475.6846923776]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047626_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three cars, a van, and a suv.", "boxes_value": [[16.74853515929999, 11.819091814399997, 80.26232912839998, 55.295043942400014], [64.92504880490003, 21.638366720000022, 80.26232912839998, 55.295043942400014], [53.50427243259992, 21.477478041600023, 71.47961423380002, 49.11132810240002], [45.85803224619997, 19.599487283200006, 63.565063507800005, 44.282104473599986], [30.02893065759997, 11.819091814399997, 67.72363283569996, 40.123657216000026], [16.74853515929999, 12.623962419199984, 31.7728271812, 33.684692377600015]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047627.jpg", "text": "Please, can you help me understand what's inside the region in image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[0, 0, 342.2821044736, 337.1154784965]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047627_crop.jpg", "text": "Please, can you help me understand what's inside the region in image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[0, 0, 342.2821044736, 337.1154784965]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047627.jpg", "text": "Please, can you help me understand what's inside the region in image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a flower, a vase, a storage box, a picture, a leather shoes, and a bottle.", "boxes_value": [[0, 0, 342.2821044736, 337.1154784965], [164.4461669888, 217.835632359, 219.3410034176, 325.687866192], [190.9248047104, 285.0010986105, 218.6951904256, 326.333740239], [199.966308608, 313.41723635700004, 258.736083968, 347.645874015], [0, 0, 342.2821044736, 337.1154784965], [54.6231078912, 280.6354980495, 99.612426752, 351.9671630985], [285.3038329856, 316.219238271, 304.2709960704, 352.7310790935]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047627_crop.jpg", "text": "Please, can you help me understand what's inside the region in image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a flower, a vase, a storage box, a picture, a leather shoes, and a bottle.", "boxes_value": [[0, 0, 342.2821044736, 337.1154784965], [164.4461669888, 217.835632359, 219.3410034176, 325.687866192], [190.9248047104, 285.0010986105, 218.6951904256, 326.333740239], [199.966308608, 313.41723635700004, 258.736083968, 347.645874015], [0, 0, 342.2821044736, 337.1154784965], [54.6231078912, 280.6354980495, 99.612426752, 351.9671630985], [285.3038329856, 316.219238271, 304.2709960704, 352.7310790935]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047628.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Provide the coordinates for all objects that you mention.", "boxes_value": [[405.086181668, 207.8745117184, 687.116455053, 436.7332763648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047628_crop.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Provide the coordinates for all objects that you mention.", "boxes_value": [[71.086181668, 57.87451171839999, 352, 286.7332763648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047628.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a bench, a desk, two guitars, two people, and a barrel.", "boxes_value": [[405.086181668, 207.8745117184, 687.116455053, 436.7332763648], [405.086181668, 318.6290893312, 685.825683627, 436.7332763648], [450.9079590148, 288.9417114112, 687.116455053, 410.2727661056], [414.7158916446, 237.2761137664, 434.6518032948, 276.0986784768], [433.77742117779997, 233.4288326144, 452.6640742876, 286.2415107072], [427.0616454748, 207.8745117184, 447.50476076039996, 264.731994624], [494.0705566258, 195.1001586688, 528.7264404604, 267.515319808], [562.0834960733999, 293.6591796736, 590.5650634561999, 316.4096679936]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6], [7]]}, {"image_path": "objects365_v1_00047628_crop.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a bench, a desk, two guitars, two people, and a barrel.", "boxes_value": [[71.086181668, 57.87451171839999, 352, 286.7332763648], [71.086181668, 168.62908933120002, 351.825683627, 286.7332763648], [116.90795901479999, 138.9417114112, 352, 260.2727661056], [80.71589164459999, 87.27611376639999, 100.6518032948, 126.09867847679999], [99.77742117779997, 83.4288326144, 118.66407428759999, 136.2415107072], [93.0616454748, 57.87451171839999, 113.50476076039996, 114.73199462399998], [160.0705566258, 45.10015866879999, 194.7264404604, 117.51531980800002], [228.08349607339994, 143.65917967360002, 256.56506345619994, 166.4096679936]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6], [7]]}, {"image_path": "objects365_v1_00047629.jpg", "text": "Describe what can be found within the bounds of in the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[222.22714528699998, 133.2530491904, 503.909912117, 324.5436460032]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047629_crop.jpg", "text": "Describe what can be found within the bounds of in the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[71.22714528699998, 48.25304919039999, 352.909912117, 239.54364600320002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047629.jpg", "text": "Describe what can be found within the bounds of in the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, a tent, and two hats.", "boxes_value": [[222.22714528699998, 133.2530491904, 503.909912117, 324.5436460032], [364.1871337705, 202.9421997056, 503.909912117, 315.3149413888], [201.870971654, 131.5944214016, 321.9730834725, 427.09313966080003], [302.858642562, 186.0269165056, 407.666748078, 289.5463867392], [222.22714528699998, 133.2530491904, 260.3321625995, 176.853982464], [378.49772927600003, 284.4668808704, 416.11579714500004, 324.5436460032]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047629_crop.jpg", "text": "Describe what can be found within the bounds of in the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, a tent, and two hats.", "boxes_value": [[71.22714528699998, 48.25304919039999, 352.909912117, 239.54364600320002], [213.1871337705, 117.94219970559999, 352.909912117, 230.31494138879998], [50.87097165399999, 46.59442140159999, 170.97308347249998, 287], [151.858642562, 101.02691650560001, 256.666748078, 204.5463867392], [71.22714528699998, 48.25304919039999, 109.33216259950001, 91.85398246400001], [227.49772927600003, 199.46688087040002, 265.11579714500004, 239.54364600320002]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047631.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Provide the coordinates for each element you describe.", "boxes_value": [[461.9333496347, 113.5226440192, 683.1726074185, 366.1774369792]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047631_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Provide the coordinates for each element you describe.", "boxes_value": [[55.933349634699994, 63.5226440192, 277, 316.1774369792]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047631.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two chairs, a pillow, a lamp, a cabinet, a handbag, and a stuffed toy.", "boxes_value": [[461.9333496347, 113.5226440192, 683.1726074185, 366.1774369792], [380.36755373949995, 215.2659912192, 604.103759737, 401.1812744192], [461.9333496347, 226.6578369024, 546.688842749, 299.5657958912], [570.662597627, 268.0309448192, 681.9774169761, 399.2651977728], [532.1516113456, 113.5226440192, 574.7832031512, 251.78692628480002], [583.8734131150001, 91.6463012864, 682.4925537439, 269.4146728448], [631.1360504948, 323.6572377088, 682.9510579275, 366.1774369792], [614.2640380948, 235.97375488, 683.1726074185, 307.775451648]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00047631_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two chairs, a pillow, a lamp, a cabinet, a handbag, and a stuffed toy.", "boxes_value": [[55.933349634699994, 63.5226440192, 277, 316.1774369792], [0, 165.2659912192, 198.10375973700002, 351.1812744192], [55.933349634699994, 176.6578369024, 140.68884274899995, 249.56579589120003], [164.66259762699997, 218.0309448192, 275.9774169761, 349.2651977728], [126.15161134560003, 63.5226440192, 168.78320315120004, 201.78692628480002], [177.87341311500006, 41.6463012864, 276.4925537439, 219.4146728448], [225.13605049479997, 273.6572377088, 276.95105792749996, 316.1774369792], [208.26403809479996, 185.97375488, 277, 257.775451648]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00047632.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[2.5846557696, 91.72076416, 267.4387206912, 190.3265380864]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047632_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[2.5846557696, 24.72076416, 267.4387206912, 123.3265380864]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047632.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a cabinet, a blackboard, a moniter, and two chairs.", "boxes_value": [[2.5846557696, 91.72076416, 267.4387206912, 190.3265380864], [222.22668456959997, 149.0198974464, 304.3199463168, 190.0273437696], [2.5846557696, 91.72076416, 223.32733155839998, 180.76611328], [246.1353759744, 114.728637696, 267.4387206912, 135.0976562688], [237.88641354240002, 168.6927490048, 271.98352051200004, 189.470703104], [142.87255856640002, 170.6141357568, 181.2318115584, 190.3265380864]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047632_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a cabinet, a blackboard, a moniter, and two chairs.", "boxes_value": [[2.5846557696, 24.72076416, 267.4387206912, 123.3265380864], [222.22668456959997, 82.0198974464, 304.3199463168, 123.02734376960001], [2.5846557696, 24.72076416, 223.32733155839998, 113.76611328000001], [246.1353759744, 47.72863769600001, 267.4387206912, 68.0976562688], [237.88641354240002, 101.69274900479999, 271.98352051200004, 122.470703104], [142.87255856640002, 103.61413575680001, 181.2318115584, 123.3265380864]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047633.jpg", "text": "Please describe the content within the area displayed in the image . Provide the coordinates for each element you describe.", "boxes_value": [[102.492187488, 334.4232787968, 238.617370584, 404.6095581184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047633_crop.jpg", "text": "Please describe the content within the area displayed in the image . Provide the coordinates for each element you describe.", "boxes_value": [[34.492187488, 18.42327879679999, 170.617370584, 88.6095581184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047633.jpg", "text": "Please describe the content within the area displayed in the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, a glasses, and three leather shoes.", "boxes_value": [[102.492187488, 334.4232787968, 238.617370584, 404.6095581184], [0.372375468, 321.0327758848, 192.8790893736, 512.7340087808], [102.492187488, 354.5538330112, 147.02795410320002, 367.9913330176], [145.386474588, 393.0446777344, 184.87628172720002, 404.6095581184], [180.38330076239998, 349.4648437248, 216.891235332, 360.3446655488], [206.25244138079998, 334.4232787968, 238.617370584, 346.1685791232]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047633_crop.jpg", "text": "Please describe the content within the area displayed in the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, a glasses, and three leather shoes.", "boxes_value": [[34.492187488, 18.42327879679999, 170.617370584, 88.6095581184], [0, 5.032775884800003, 124.87908937360001, 106], [34.492187488, 38.55383301120003, 79.02795410320002, 51.99133301760003], [77.386474588, 77.04467773440001, 116.87628172720002, 88.6095581184], [112.38330076239998, 33.464843724800005, 148.891235332, 44.34466554879998], [138.25244138079998, 18.42327879679999, 170.617370584, 30.168579123200004]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047634.jpg", "text": "Analyze and describe the region in the included photo . Include the coordinates for each mentioned object.", "boxes_value": [[350.38403321600003, 196.180114752, 456.76013184, 308.534790048]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047634_crop.jpg", "text": "Analyze and describe the region in the included photo . Include the coordinates for each mentioned object.", "boxes_value": [[27.384033216000034, 28.18011475200001, 133.76013183999999, 140.534790048]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047634.jpg", "text": "Analyze and describe the region in the included photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include three lamps, and two flowers.", "boxes_value": [[350.38403321600003, 196.180114752, 456.76013184, 308.534790048], [350.38403321600003, 196.180114752, 431.79675296, 230.73657225600002], [408.196655296, 272.561828592, 437.57458496, 307.33569336], [430.97949216000006, 280.655761728, 456.76013184, 308.534790048], [355.39550784, 267.519348144, 368.660522432, 294.786315936], [449.577026368, 262.950317376, 462.84204102399997, 288.301208496]], "boxes_seq": [[0], [0], [1, 4, 5], [2, 3]]}, {"image_path": "objects365_v1_00047634_crop.jpg", "text": "Analyze and describe the region in the included photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include three lamps, and two flowers.", "boxes_value": [[27.384033216000034, 28.18011475200001, 133.76013183999999, 140.534790048], [27.384033216000034, 28.18011475200001, 108.79675295999999, 62.73657225600002], [85.19665529600002, 104.56182859199998, 114.57458495999998, 139.33569336], [107.97949216000006, 112.65576172800002, 133.76013183999999, 140.534790048], [32.39550783999999, 99.51934814399999, 45.66052243199999, 126.786315936], [126.57702636800002, 94.95031737599999, 139.84204102399997, 120.30120849600002]], "boxes_seq": [[0], [0], [1, 4, 5], [2, 3]]}, {"image_path": "objects365_v1_00047635.jpg", "text": "What information can you give me about the coordinates in image ? Provide the coordinates for each element you describe.", "boxes_value": [[74.53802491260001, 415.5626830848, 638.9317627041, 490.6688842752]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047635_crop.jpg", "text": "What information can you give me about the coordinates in image ? Provide the coordinates for each element you describe.", "boxes_value": [[74.53802491260001, 19.562683084800028, 638.9317627041, 94.66888427520001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047635.jpg", "text": "What information can you give me about the coordinates in image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two chairs, a desk, two people, a book, and a glasses.", "boxes_value": [[74.53802491260001, 415.5626830848, 638.9317627041, 490.6688842752], [154.2161865244, 353.3387451392, 304.5469970958, 511.556274432], [58.4708252211, 434.824157696, 387.8077392460999, 512.9143066624], [377.622070295, 347.9063720448, 479.47875973770005, 511.556274432], [611.8961181431, 415.5626830848, 638.9317627041, 451.6102905344], [544.041870102, 418.7433471488, 571.077514663, 453.7307128832], [74.53802491260001, 429.2406616064, 173.1603393742, 468.263916032], [139.1840210151, 462.2638550016, 168.03991699190001, 490.6688842752]], "boxes_seq": [[0], [0], [1, 3], [2], [4, 5], [6], [7]]}, {"image_path": "objects365_v1_00047635_crop.jpg", "text": "What information can you give me about the coordinates in image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two chairs, a desk, two people, a book, and a glasses.", "boxes_value": [[74.53802491260001, 19.562683084800028, 638.9317627041, 94.66888427520001], [154.2161865244, 0, 304.5469970958, 113], [58.4708252211, 38.824157695999986, 387.8077392460999, 113], [377.622070295, 0, 479.47875973770005, 113], [611.8961181431, 19.562683084800028, 638.9317627041, 55.61029053440001], [544.041870102, 22.743347148800012, 571.077514663, 57.7307128832], [74.53802491260001, 33.24066160640001, 173.1603393742, 72.263916032], [139.1840210151, 66.2638550016, 168.03991699190001, 94.66888427520001]], "boxes_seq": [[0], [0], [1, 3], [2], [4, 5], [6], [7]]}, {"image_path": "objects365_v1_00047636.jpg", "text": "I request a description of the area in the picture . Specify the location of each mentioned object.", "boxes_value": [[224.8856548352, 281.6602783112, 362.7082519552, 374.1699219014]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047636_crop.jpg", "text": "I request a description of the area in the picture . Specify the location of each mentioned object.", "boxes_value": [[34.8856548352, 23.66027831119999, 172.7082519552, 116.16992190140002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047636.jpg", "text": "I request a description of the area in the picture . Specify the location of each mentioned object. For your reference, objects involved in this region include a paddle, two people, two helmets, and a boat.", "boxes_value": [[224.8856548352, 281.6602783112, 362.7082519552, 374.1699219014], [292.2760620032, 298.2178954724, 362.7082519552, 363.81115721239996], [274.3861694464, 281.6602783112, 343.1902466048, 374.1699219014], [182.1655883776, 285.4184570022, 273.8079834112, 372.1462402128], [224.8856548352, 285.3465096862, 246.064991744, 302.8951031922], [305.9722590208, 282.62345207320004, 324.4285383168, 298.9617977512], [161.878112768, 298.3426513646, 360.0032958976, 433.37072750339996]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00047636_crop.jpg", "text": "I request a description of the area in the picture . Specify the location of each mentioned object. For your reference, objects involved in this region include a paddle, two people, two helmets, and a boat.", "boxes_value": [[34.8856548352, 23.66027831119999, 172.7082519552, 116.16992190140002], [102.27606200320002, 40.217895472400016, 172.7082519552, 105.81115721239996], [84.3861694464, 23.66027831119999, 153.1902466048, 116.16992190140002], [0, 27.4184570022, 83.80798341119998, 114.14624021280002], [34.8856548352, 27.346509686200022, 56.064991744, 44.8951031922], [115.97225902079998, 24.623452073200042, 134.42853831679997, 40.96179775119998], [0, 40.34265136459999, 170.00329589760003, 139]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00047637.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[286.7141723968, 73.6062011904, 542.1054687307001, 512.1175537152]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047637_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[64.7141723968, 73.6062011904, 320.10546873070007, 512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047637.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a cabinet, three people, and two hats.", "boxes_value": [[286.7141723968, 73.6062011904, 542.1054687307001, 512.1175537152], [279.3846193668, 53.8436903936, 432.8009085971, 130.5518349824], [286.7141723968, 181.5533447168, 542.1054687307001, 512.1175537152], [359.638061497, 69.929138176, 440.56958008510003, 262.0252075008], [411.2667236492, 116.4414672896, 526.6174316613, 389.3552246272], [360.39123537579997, 73.6062011904, 424.0595703206, 127.3264160256], [295.39636232690003, 181.709838848, 381.6140136452, 255.3264160256]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00047637_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a cabinet, three people, and two hats.", "boxes_value": [[64.7141723968, 73.6062011904, 320.10546873070007, 512], [57.384619366799996, 53.8436903936, 210.80090859709998, 130.5518349824], [64.7141723968, 181.5533447168, 320.10546873070007, 512], [137.63806149700002, 69.929138176, 218.56958008510003, 262.0252075008], [189.2667236492, 116.4414672896, 304.6174316613, 389.3552246272], [138.39123537579997, 73.6062011904, 202.05957032060002, 127.3264160256], [73.39636232690003, 181.709838848, 159.61401364519998, 255.3264160256]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00047640.jpg", "text": "Please share details about the rectangular region within the image . Include the coordinates for each mentioned object.", "boxes_value": [[111.4412841984, 388.6235351552, 673.4904784896, 431.2809448448]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047640_crop.jpg", "text": "Please share details about the rectangular region within the image . Include the coordinates for each mentioned object.", "boxes_value": [[111.4412841984, 11.623535155199988, 673.4904784896, 54.28094484479999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047640.jpg", "text": "Please share details about the rectangular region within the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include five sneakers.", "boxes_value": [[111.4412841984, 388.6235351552, 673.4904784896, 431.2809448448], [129.53387450879998, 386.8423461888, 167.4780273408, 415.4889526272], [111.4412841984, 388.8526611456, 151.1444702208, 413.478698752], [176.524291968, 370.7600708096, 197.632324224, 404.6836548096], [479.7664794624, 407.0654297088, 498.93713379839994, 431.2809448448], [650.0317383168, 388.6235351552, 673.4904784896, 428.4782104576]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047640_crop.jpg", "text": "Please share details about the rectangular region within the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include five sneakers.", "boxes_value": [[111.4412841984, 11.623535155199988, 673.4904784896, 54.28094484479999], [129.53387450879998, 9.84234618879998, 167.4780273408, 38.48895262719998], [111.4412841984, 11.852661145599996, 151.1444702208, 36.478698752000014], [176.524291968, 0, 197.632324224, 27.6836548096], [479.7664794624, 30.065429708800025, 498.93713379839994, 54.28094484479999], [650.0317383168, 11.623535155199988, 673.4904784896, 51.47821045760003]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047641.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[0.0672607375, 80.105712896, 86.8876342884, 446.0390624768]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047641_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[0.0672607375, 80.105712896, 86.8876342884, 446.0390624768]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047641.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four people, and a helmet.", "boxes_value": [[0.0672607375, 80.105712896, 86.8876342884, 446.0390624768], [0.27319336990000004, 152.0274048, 135.78692628710002, 474.4806518784], [0.19879149070000002, 189.0482177536, 86.8876342884, 446.0390624768], [59.003906260200004, 75.8632812544, 97.93371579200002, 100.7643432448], [0.0672607375, 80.105712896, 16.4109497277, 127.36993408], [1.0314331148, 153.3812255744, 54.467407228300004, 194.8793334784]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047641_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four people, and a helmet.", "boxes_value": [[0.0672607375, 80.105712896, 86.8876342884, 446.0390624768], [0.27319336990000004, 152.0274048, 108, 474.4806518784], [0.19879149070000002, 189.0482177536, 86.8876342884, 446.0390624768], [59.003906260200004, 75.8632812544, 97.93371579200002, 100.7643432448], [0.0672607375, 80.105712896, 16.4109497277, 127.36993408], [1.0314331148, 153.3812255744, 54.467407228300004, 194.8793334784]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047643.jpg", "text": "Can you give me a description of the region in image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[277.0358886912, 354.0679931411, 378.056274432, 567.2180176032999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047643_crop.jpg", "text": "Can you give me a description of the region in image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[26.0358886912, 54.067993141099976, 127.05627443200001, 267.2180176032999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047643.jpg", "text": "Can you give me a description of the region in image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include six bottles.", "boxes_value": [[277.0358886912, 354.0679931411, 378.056274432, 567.2180176032999], [326.0758666752, 354.0679931411, 378.056274432, 506.543945313], [362.1156005888, 358.91955569469997, 398.1553955328, 430.30603030310004], [277.0358886912, 469.0823974753, 301.154113792, 551.3316650254], [288.9709472768, 449.9967041115, 324.4099731456, 560.2081299038], [299.4857787904, 494.00341799340003, 325.5782470656, 567.2180176032999], [315.2127685632, 438.1293945637, 372.136779776, 559.0130615216]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047643_crop.jpg", "text": "Can you give me a description of the region in image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include six bottles.", "boxes_value": [[26.0358886912, 54.067993141099976, 127.05627443200001, 267.2180176032999], [75.07586667520002, 54.067993141099976, 127.05627443200001, 206.543945313], [111.11560058880002, 58.919555694699966, 147.15539553280001, 130.30603030310004], [26.0358886912, 169.0823974753, 50.154113791999976, 251.33166502539996], [37.97094727680002, 149.99670411149998, 73.40997314560002, 260.2081299038], [48.48577879039999, 194.00341799340003, 74.57824706560001, 267.2180176032999], [64.21276856319997, 138.12939456369998, 121.13677977600003, 259.0130615216]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047644.jpg", "text": "Please explain what is contained in the portion of defined by the box . Specify the location of each mentioned object.", "boxes_value": [[569.733276372, 164.210266112, 758.778198204, 362.9995117056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047644_crop.jpg", "text": "Please explain what is contained in the portion of defined by the box . Specify the location of each mentioned object.", "boxes_value": [[47.73327637199998, 50.210266112, 236.77819820399998, 248.9995117056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047644.jpg", "text": "Please explain what is contained in the portion of defined by the box . Specify the location of each mentioned object. For your reference, objects involved in this region include a chair, a desk, a flower, and two moniters.", "boxes_value": [[569.733276372, 164.210266112, 758.778198204, 362.9995117056], [640.815063486, 277.9563598848, 758.778198204, 362.9995117056], [523.511230476, 210.6972045824, 778.7044677360001, 338.8507080192], [596.557983372, 164.210266112, 648.0876465179999, 207.1516113408], [627.409790046, 216.6893920768, 743.705444352, 300.6807250944], [569.733276372, 169.0390624768, 598.1975097540001, 200.7460937728]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047644_crop.jpg", "text": "Please explain what is contained in the portion of defined by the box . Specify the location of each mentioned object. For your reference, objects involved in this region include a chair, a desk, a flower, and two moniters.", "boxes_value": [[47.73327637199998, 50.210266112, 236.77819820399998, 248.9995117056], [118.81506348599999, 163.9563598848, 236.77819820399998, 248.9995117056], [1.511230476000037, 96.69720458239999, 256.7044677360001, 224.8507080192], [74.55798337199997, 50.210266112, 126.08764651799993, 93.1516113408], [105.40979004600001, 102.6893920768, 221.70544435199997, 186.68072509439997], [47.73327637199998, 55.03906247680001, 76.19750975400007, 86.74609377280001]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047645.jpg", "text": "Regarding the coordinates in image , can you provide a description? Provide the coordinates for each element you describe.", "boxes_value": [[206.3918456832, 79.653015168, 369.0903320064, 543.9504394752]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047645_crop.jpg", "text": "Regarding the coordinates in image , can you provide a description? Provide the coordinates for each element you describe.", "boxes_value": [[41.39184568319999, 79.653015168, 204.0903320064, 543.9504394752]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047645.jpg", "text": "Regarding the coordinates in image , can you provide a description? Provide the coordinates for each element you describe. For your reference, objects involved in this region include three lamps, a chair, and a person.", "boxes_value": [[206.3918456832, 79.653015168, 369.0903320064, 543.9504394752], [206.3918456832, 79.653015168, 222.4276123136, 120.70452879359999], [216.0133056512, 174.5847168, 240.3876953088, 379.84240719359997], [314.1521606656, 207.29766842879997, 336.6022339072, 401.65100098560004], [209.686645504, 505.1384277504, 252.5841674752, 543.9504394752], [331.3538208256, 453.2185058304, 369.0903320064, 499.2792968448]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047645_crop.jpg", "text": "Regarding the coordinates in image , can you provide a description? Provide the coordinates for each element you describe. For your reference, objects involved in this region include three lamps, a chair, and a person.", "boxes_value": [[41.39184568319999, 79.653015168, 204.0903320064, 543.9504394752], [41.39184568319999, 79.653015168, 57.42761231360001, 120.70452879359999], [51.0133056512, 174.5847168, 75.3876953088, 379.84240719359997], [149.15216066559998, 207.29766842879997, 171.6022339072, 401.65100098560004], [44.68664550400001, 505.1384277504, 87.58416747519999, 543.9504394752], [166.3538208256, 453.2185058304, 204.0903320064, 499.2792968448]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047647.jpg", "text": "What can be observed in the rectangular region in the photograph ? Include the coordinates for each mentioned object.", "boxes_value": [[186.74377439120002, 294.107604992, 393.1506347403, 376.3540649472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047647_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Include the coordinates for each mentioned object.", "boxes_value": [[51.74377439120002, 21.107604992000006, 258.1506347403, 103.35406494720002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047647.jpg", "text": "What can be observed in the rectangular region in the photograph ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include four umbrellas, and a chair.", "boxes_value": [[186.74377439120002, 294.107604992, 393.1506347403, 376.3540649472], [213.7011108166, 298.3341675008, 227.4675903117, 350.6770629632], [259.00866698000004, 298.7966308352, 277.7489624293, 366.9599609344], [323.9689941621, 299.1896362496, 338.55944827220003, 347.8790893568], [377.0847167666, 294.107604992, 393.1506347403, 354.2727050752], [186.74377439120002, 349.072021504, 207.963134792, 376.3540649472]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047647_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include four umbrellas, and a chair.", "boxes_value": [[51.74377439120002, 21.107604992000006, 258.1506347403, 103.35406494720002], [78.70111081659999, 25.334167500799992, 92.46759031170001, 77.6770629632], [124.00866698000004, 25.796630835200006, 142.74896242929998, 93.95996093439999], [188.9689941621, 26.189636249600028, 203.55944827220003, 74.87908935680002], [242.08471676660002, 21.107604992000006, 258.1506347403, 81.27270507520001], [51.74377439120002, 76.07202150400002, 72.963134792, 103.35406494720002]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047648.jpg", "text": "Could you describe the content of the bbox in the image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[139.2694091652, 168.8168334848, 410.55334474529997, 265.3773193216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047648_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[68.26940916519999, 24.816833484799986, 339.55334474529997, 121.3773193216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047648.jpg", "text": "Could you describe the content of the bbox in the image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include five pictures.", "boxes_value": [[139.2694091652, 168.8168334848, 410.55334474529997, 265.3773193216], [139.2694091652, 179.810729984, 191.5968628128, 265.3773193216], [191.9197998036, 179.7969360384, 241.9761962625, 265.3773193216], [242.62207031280002, 174.3068847616, 296.553833016, 263.7625732608], [296.2308960252, 173.0151367168, 351.4544677659, 261.5019531264], [354.6839599599, 168.8168334848, 410.55334474529997, 260.2102050816]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047648_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include five pictures.", "boxes_value": [[68.26940916519999, 24.816833484799986, 339.55334474529997, 121.3773193216], [68.26940916519999, 35.810729984000005, 120.5968628128, 121.3773193216], [120.9197998036, 35.79693603839999, 170.9761962625, 121.3773193216], [171.62207031280002, 30.306884761600003, 225.553833016, 119.76257326080002], [225.23089602520002, 29.015136716799987, 280.4544677659, 117.5019531264], [283.6839599599, 24.816833484799986, 339.55334474529997, 116.21020508160001]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047650.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Include the coordinates for each object you identify.", "boxes_value": [[492.3359374961, 70.376647936, 683.1629638683, 284.0239868416]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047650_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Include the coordinates for each object you identify.", "boxes_value": [[48.33593749609997, 54.376647936, 239, 268.0239868416]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047650.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a soccer, and four people.", "boxes_value": [[492.3359374961, 70.376647936, 683.1629638683, 284.0239868416], [608.9552001907, 251.2946166784, 641.5363769205, 282.91760256], [492.3359374961, 85.7675170816, 551.9754638925, 214.6659546112], [552.9374999908999, 70.376647936, 642.3968505774, 257.3195190272], [635.479003937, 214.9776611328, 666.3781738467, 284.0239868416], [665.6152343656, 152.4163208192, 683.1629638683, 281.7351684608]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047650_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a soccer, and four people.", "boxes_value": [[48.33593749609997, 54.376647936, 239, 268.0239868416], [164.9552001907, 235.2946166784, 197.53637692049995, 266.91760256], [48.33593749609997, 69.7675170816, 107.97546389249999, 198.6659546112], [108.93749999089994, 54.376647936, 198.39685057739996, 241.3195190272], [191.47900393700002, 198.9776611328, 222.37817384669995, 268.0239868416], [221.61523436560003, 136.4163208192, 239, 265.7351684608]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047651.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Include the coordinates for each mentioned object.", "boxes_value": [[248.0683594, 376.9561767756, 359.7911377, 475.71118164660004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047651_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Include the coordinates for each mentioned object.", "boxes_value": [[28.06835939999999, 24.9561767756, 139.79113769999998, 123.71118164660004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047651.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a cabinet, a cup, and three wine glasses.", "boxes_value": [[248.0683594, 376.9561767756, 359.7911377, 475.71118164660004], [135.11279295, 370.35510252780006, 453.8973999, 536.0343017484], [325.8753052, 376.9561767756, 359.7911377, 426.8323974732], [289.96441649999997, 387.43017578160004, 327.37158205, 455.2618408458], [248.0683594, 392.417846679, 287.96936035, 475.71118164660004], [229.1154175, 395.9091797028, 265.02630615, 473.71606446]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047651_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a cabinet, a cup, and three wine glasses.", "boxes_value": [[28.06835939999999, 24.9561767756, 139.79113769999998, 123.71118164660004], [0, 18.355102527800057, 167, 148], [105.87530520000001, 24.9561767756, 139.79113769999998, 74.83239747319999], [69.96441649999997, 35.43017578160004, 107.37158204999997, 103.26184084580001], [28.06835939999999, 40.41784667899998, 67.96936034999999, 123.71118164660004], [9.115417500000007, 43.909179702799975, 45.02630614999998, 121.71606445999998]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047652.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Give coordinates for the items you reference.", "boxes_value": [[18.4605102336, 0, 638.472534144, 131.24700928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047652_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Give coordinates for the items you reference.", "boxes_value": [[18.4605102336, 0, 638.472534144, 131.24700928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047652.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Give coordinates for the items you reference. For your reference, objects involved in this region include five lamps.", "boxes_value": [[18.4605102336, 0, 638.472534144, 131.24700928], [18.4605102336, 0, 110.4268798464, 67.3545532416], [186.90417477120002, 37.3444824064, 249.82855226879997, 108.01336668160002], [264.349609344, 66.3864745984, 315.65710448640004, 131.24700928], [603.7237549056, 37.4579467776, 638.472534144, 91.4100952064], [587.2406006016, 98.9385375744, 619.3833007872, 125.6486816256]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047652_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Give coordinates for the items you reference. For your reference, objects involved in this region include five lamps.", "boxes_value": [[18.4605102336, 0, 638.472534144, 131.24700928], [18.4605102336, 0, 110.4268798464, 67.3545532416], [186.90417477120002, 37.3444824064, 249.82855226879997, 108.01336668160002], [264.349609344, 66.3864745984, 315.65710448640004, 131.24700928], [603.7237549056, 37.4579467776, 638.472534144, 91.4100952064], [587.2406006016, 98.9385375744, 619.3833007872, 125.6486816256]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047653.jpg", "text": "Please describe the section of the picture defined by the bbox . Remember to mention the objects and their corresponding locations.", "boxes_value": [[381.9145507885, 202.8361816576, 623.471801784, 347.3108520448]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047653_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Remember to mention the objects and their corresponding locations.", "boxes_value": [[60.91455078849998, 36.83618165760001, 302.47180178400004, 181.31085204480001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047653.jpg", "text": "Please describe the section of the picture defined by the bbox . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a clock, a potted plant, a luggage, a backpack, and a trolley.", "boxes_value": [[381.9145507885, 202.8361816576, 623.471801784, 347.3108520448], [447.5235595428, 202.8361816576, 467.099975601, 223.1655883776], [404.95288088729995, 265.2882080256, 444.6658935532, 332.69567872], [381.9145507885, 306.716247552, 413.5703125139, 344.4115600384], [566.9760741871, 279.4019165184, 623.471801784, 347.3108520448], [503.75244143019995, 309.3767089664, 530.0151367353001, 335.978271488]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047653_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a clock, a potted plant, a luggage, a backpack, and a trolley.", "boxes_value": [[60.91455078849998, 36.83618165760001, 302.47180178400004, 181.31085204480001], [126.5235595428, 36.83618165760001, 146.09997560099998, 57.16558837759999], [83.95288088729995, 99.2882080256, 123.66589355320002, 166.69567872], [60.91455078849998, 140.71624755200003, 92.57031251389998, 178.4115600384], [245.97607418710004, 113.40191651840001, 302.47180178400004, 181.31085204480001], [182.75244143019995, 143.37670896639997, 209.01513673530008, 169.97827148800002]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047654.jpg", "text": "What's inside the area of the provided graphic ? Please point out the objects and their coordinates.", "boxes_value": [[94.15655516059999, 137.6398925824, 243.01995846810001, 202.5216674816]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047654_crop.jpg", "text": "What's inside the area of the provided graphic ? Please point out the objects and their coordinates.", "boxes_value": [[38.15655516059999, 16.639892582399995, 187.01995846810001, 81.52166748159999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047654.jpg", "text": "What's inside the area of the provided graphic ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a lamp, a storage box, and three pictures.", "boxes_value": [[94.15655516059999, 137.6398925824, 243.01995846810001, 202.5216674816], [148.9207153401, 84.2311401472, 207.3358154108, 177.695251456], [133.5867919616, 170.3933715968, 198.57354735060002, 202.5216674816], [130.4573974272, 137.899963392, 168.6358032105, 174.2008056832], [94.15655516059999, 157.9280395264, 136.0902710213, 197.3582153216], [221.4891967845, 137.6398925824, 243.01995846810001, 157.9041137664]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047654_crop.jpg", "text": "What's inside the area of the provided graphic ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a lamp, a storage box, and three pictures.", "boxes_value": [[38.15655516059999, 16.639892582399995, 187.01995846810001, 81.52166748159999], [92.9207153401, 0, 151.3358154108, 56.695251455999994], [77.5867919616, 49.393371596799994, 142.57354735060002, 81.52166748159999], [74.4573974272, 16.89996339199999, 112.6358032105, 53.2008056832], [38.15655516059999, 36.9280395264, 80.0902710213, 76.3582153216], [165.4891967845, 16.639892582399995, 187.01995846810001, 36.9041137664]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047655.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give coordinates for the items you reference.", "boxes_value": [[0.07531737599999999, 264.5687866368, 113.5672607232, 407.7916870144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047655_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give coordinates for the items you reference.", "boxes_value": [[0.07531737599999999, 36.568786636799985, 113.5672607232, 179.7916870144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047655.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give coordinates for the items you reference. For your reference, objects involved in this region include a flower, a potted plant, a vase, a person, and a parking meter.", "boxes_value": [[0.07531737599999999, 264.5687866368, 113.5672607232, 407.7916870144], [52.483093248, 264.5687866368, 113.5672607232, 305.1283569152], [25.264099123199998, 292.2070312448, 47.8945923072, 310.953186048], [69.1242065664, 285.013549824, 98.60272220160002, 305.2205810688], [0.07531737599999999, 318.055847168, 10.4027099904, 352.2670898688], [89.2179565056, 329.2948608512, 101.68676759040001, 407.7916870144]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047655_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give coordinates for the items you reference. For your reference, objects involved in this region include a flower, a potted plant, a vase, a person, and a parking meter.", "boxes_value": [[0.07531737599999999, 36.568786636799985, 113.5672607232, 179.7916870144], [52.483093248, 36.568786636799985, 113.5672607232, 77.12835691520002], [25.264099123199998, 64.20703124480002, 47.8945923072, 82.95318604800002], [69.1242065664, 57.013549823999995, 98.60272220160002, 77.22058106880002], [0.07531737599999999, 90.05584716800001, 10.4027099904, 124.26708986879999], [89.2179565056, 101.29486085119999, 101.68676759040001, 179.7916870144]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047656.jpg", "text": "What's inside the area of the provided graphic ? Please mention the objects and their locations.", "boxes_value": [[198.611389184, 149.9763183405, 504.5914916864, 682.5235595534]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047656_crop.jpg", "text": "What's inside the area of the provided graphic ? Please mention the objects and their locations.", "boxes_value": [[76.61138918399999, 133.9763183405, 382.5914916864, 666.5235595534]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047656.jpg", "text": "What's inside the area of the provided graphic ? Please mention the objects and their locations. For your reference, objects involved in this region include a carpet, two people, a van, and a bus.", "boxes_value": [[198.611389184, 149.9763183405, 504.5914916864, 682.5235595534], [208.6679077376, 443.33886717240006, 350.42724608, 651.9506836049], [198.611389184, 244.8394775713, 222.9446411264, 320.2290649715], [234.5991821312, 149.9763183405, 504.5914916864, 682.5235595534], [182.5855712768, 215.1399535862, 243.6447143424, 261.99926758180004], [239.9248657408, 219.7631225312, 286.54418944, 253.3037109319]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047656_crop.jpg", "text": "What's inside the area of the provided graphic ? Please mention the objects and their locations. For your reference, objects involved in this region include a carpet, two people, a van, and a bus.", "boxes_value": [[76.61138918399999, 133.9763183405, 382.5914916864, 666.5235595534], [86.66790773759999, 427.33886717240006, 228.42724607999997, 635.9506836049], [76.61138918399999, 228.8394775713, 100.9446411264, 304.2290649715], [112.5991821312, 133.9763183405, 382.5914916864, 666.5235595534], [60.58557127680001, 199.1399535862, 121.6447143424, 245.99926758180004], [117.92486574079999, 203.7631225312, 164.54418944000003, 237.3037109319]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047657.jpg", "text": "Please enlighten me about the area in the photograph . Specify the location of each mentioned object.", "boxes_value": [[39.1588134624, 222.0691528192, 187.752990738, 292.7554321408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047657_crop.jpg", "text": "Please enlighten me about the area in the photograph . Specify the location of each mentioned object.", "boxes_value": [[37.1588134624, 18.069152819200013, 185.752990738, 88.7554321408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047657.jpg", "text": "Please enlighten me about the area in the photograph . Specify the location of each mentioned object. For your reference, objects involved in this region include four people, a car, and a sports car.", "boxes_value": [[39.1588134624, 222.0691528192, 187.752990738, 292.7554321408], [161.7508544764, 222.0691528192, 187.752990738, 289.6502075392], [155.618286129, 225.5034179584, 168.2514037852, 284.3761596928], [140.6547851334, 233.4757690368, 156.9674682794, 281.6777954304], [127.4084472496, 233.9663696384, 140.9000854498, 280.0833740288], [6.1250000376000004, 237.9676513792, 64.9412842104, 279.2598877184], [39.1588134624, 243.204711936, 133.8288574086, 292.7554321408]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047657_crop.jpg", "text": "Please enlighten me about the area in the photograph . Specify the location of each mentioned object. For your reference, objects involved in this region include four people, a car, and a sports car.", "boxes_value": [[37.1588134624, 18.069152819200013, 185.752990738, 88.7554321408], [159.7508544764, 18.069152819200013, 185.752990738, 85.65020753919998], [153.618286129, 21.503417958400007, 166.2514037852, 80.3761596928], [138.6547851334, 29.47576903679999, 154.9674682794, 77.67779543040001], [125.4084472496, 29.96636963840001, 138.9000854498, 76.0833740288], [4.1250000376000004, 33.96765137919999, 62.9412842104, 75.25988771840002], [37.1588134624, 39.204711935999995, 131.8288574086, 88.7554321408]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047658.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give coordinates for the items you reference.", "boxes_value": [[358.482299791, 127.4127807488, 554.99914547, 351.3917236224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047658_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give coordinates for the items you reference.", "boxes_value": [[49.482299791, 56.4127807488, 245.99914547000003, 280.3917236224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047658.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give coordinates for the items you reference. For your reference, objects involved in this region include two people, a helmet, a hat, and a sneakers.", "boxes_value": [[358.482299791, 127.4127807488, 554.99914547, 351.3917236224], [333.41064456699996, 127.1227417088, 416.50634764200004, 326.3736572416], [471.903442395, 153.9277954048, 554.99914547, 351.3917236224], [358.482299791, 127.4127807488, 383.647338844, 151.3629760512], [507.02368162600004, 154.5304565248, 530.484619126, 180.0819091968], [520.722534178, 336.5298461696, 553.943481476, 350.1850586112]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047658_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give coordinates for the items you reference. For your reference, objects involved in this region include two people, a helmet, a hat, and a sneakers.", "boxes_value": [[49.482299791, 56.4127807488, 245.99914547000003, 280.3917236224], [24.410644566999963, 56.12274170880001, 107.50634764200004, 255.37365724159997], [162.903442395, 82.92779540480001, 245.99914547000003, 280.3917236224], [49.482299791, 56.4127807488, 74.64733884399999, 80.36297605120001], [198.02368162600004, 83.5304565248, 221.48461912599998, 109.08190919680001], [211.72253417800005, 265.5298461696, 244.943481476, 279.1850586112]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047661.jpg", "text": "Tell me what you see in the area within the context of the image . Please point out the objects and their coordinates.", "boxes_value": [[42.5676879872, 193.6900024164, 111.0845947392, 338.5098266484]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047661_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Please point out the objects and their coordinates.", "boxes_value": [[17.567687987200003, 36.690002416400006, 86.0845947392, 181.5098266484]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047661.jpg", "text": "Tell me what you see in the area within the context of the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include three people, a slippers, three chairs, and two desks.", "boxes_value": [[42.5676879872, 193.6900024164, 111.0845947392, 338.5098266484], [69.726318336, 125.6572875888, 446.92840576, 537.4782715068], [42.5676879872, 193.6900024164, 111.0845947392, 338.5098266484], [97.8656616448, 225.1275634824, 110.8339843584, 256.3132934724], [69.2163085824, 310.9061279436, 81.4831542784, 339.21429445800004], [50.4395751936, 203.1627197448, 85.9677124096, 257.4418335024], [90.2442627072, 215.00543213280002, 107.0214843904, 254.81011960080002], [28.7279663104, 202.5047607288, 109.98211671040002, 250.86254880479999], [40.5706787328, 240.33569336760002, 110.96905518079998, 339.3539428884], [90.9022216704, 254.4811401492, 110.9690551808, 274.5479736372]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5, 6, 8], [7, 9]]}, {"image_path": "objects365_v1_00047661_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include three people, a slippers, three chairs, and two desks.", "boxes_value": [[17.567687987200003, 36.690002416400006, 86.0845947392, 181.5098266484], [44.726318336000006, 0, 103, 217], [17.567687987200003, 36.690002416400006, 86.0845947392, 181.5098266484], [72.8656616448, 68.1275634824, 85.8339843584, 99.3132934724], [44.2163085824, 153.90612794359998, 56.483154278399994, 182.21429445800004], [25.4395751936, 46.1627197448, 60.9677124096, 100.44183350240002], [65.2442627072, 58.005432132800024, 82.0214843904, 97.81011960080002], [3.7279663103999994, 45.504760728799994, 84.98211671040002, 93.86254880479999], [15.570678732799998, 83.33569336760002, 85.96905518079998, 182.3539428884], [65.9022216704, 97.4811401492, 85.9690551808, 117.54797363720002]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5, 6, 8], [7, 9]]}, {"image_path": "objects365_v1_00047665.jpg", "text": "Please provide details for the area within the bounding box in . Include the coordinates for each object you identify.", "boxes_value": [[0.1337890452, 218.1208496128, 569.500122065, 360.6735944192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047665_crop.jpg", "text": "Please provide details for the area within the bounding box in . Include the coordinates for each object you identify.", "boxes_value": [[0.1337890452, 36.12084961279999, 569.500122065, 178.67359441920001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047665.jpg", "text": "Please provide details for the area within the bounding box in . Include the coordinates for each object you identify. For your reference, objects involved in this region include a handbag, and four buses.", "boxes_value": [[0.1337890452, 218.1208496128, 569.500122065, 360.6735944192], [269.4978299863, 336.7659804672, 282.3711606264, 360.6735944192], [178.3342284991, 218.1208496128, 322.1138915785, 294.0337524224], [0.1337890452, 230.547973632, 170.92956541270001, 325.596374528], [434.16162108429995, 226.7905883648, 569.500122065, 355.3052368384], [318.5715331832, 261.6132201984, 370.74609374019997, 290.1420898304]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047665_crop.jpg", "text": "Please provide details for the area within the bounding box in . Include the coordinates for each object you identify. For your reference, objects involved in this region include a handbag, and four buses.", "boxes_value": [[0.1337890452, 36.12084961279999, 569.500122065, 178.67359441920001], [269.4978299863, 154.7659804672, 282.3711606264, 178.67359441920001], [178.3342284991, 36.12084961279999, 322.1138915785, 112.03375242240003], [0.1337890452, 48.54797363200001, 170.92956541270001, 143.596374528], [434.16162108429995, 44.79058836479999, 569.500122065, 173.3052368384], [318.5715331832, 79.61322019839997, 370.74609374019997, 108.14208983039998]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047667.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each mentioned object.", "boxes_value": [[110.87164305149999, 111.6346435584, 306.314636202, 205.4532470784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047667_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each mentioned object.", "boxes_value": [[48.87164305149999, 23.6346435584, 244.31463620199997, 117.45324707840001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047667.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two chairs, two sports cars, and a suv.", "boxes_value": [[110.87164305149999, 111.6346435584, 306.314636202, 205.4532470784], [245.51330563259998, 144.7768554496, 286.418762193, 196.590393088], [184.8368530074, 153.6396484608, 238.69573971589998, 205.4532470784], [3.2758789104000003, 130.3809204224, 667.0933837852, 401.1180420096], [110.87164305149999, 111.6346435584, 306.314636202, 164.5671386624], [14.778808571699999, 83.1325683712, 222.4370117301, 180.8540649472]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047667_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two chairs, two sports cars, and a suv.", "boxes_value": [[48.87164305149999, 23.6346435584, 244.31463620199997, 117.45324707840001], [183.51330563259998, 56.77685544959999, 224.41876219300002, 108.59039308800001], [122.83685300740001, 65.6396484608, 176.69573971589998, 117.45324707840001], [0, 42.38092042240001, 293, 140], [48.87164305149999, 23.6346435584, 244.31463620199997, 76.5671386624], [0, 0, 160.4370117301, 92.85406494719999]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047668.jpg", "text": "Could you tell me more about the area in the snapshot ? Please point out the objects and their coordinates.", "boxes_value": [[139.3016357327, 195.4431762944, 408.5214844074, 297.9583740416]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047668_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Please point out the objects and their coordinates.", "boxes_value": [[68.30163573269999, 26.44317629439999, 337.5214844074, 128.9583740416]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047668.jpg", "text": "Could you tell me more about the area in the snapshot ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, two glasses, and a car.", "boxes_value": [[139.3016357327, 195.4431762944, 408.5214844074, 297.9583740416], [20.0663452269, 201.85198976, 265.1619873301, 511.26354979840005], [205.1917724976, 165.3483886592, 318.17919921059996, 433.9106445312], [139.3016357327, 229.4710083072, 184.9048461715, 248.8178100736], [370.0816650286, 234.9986572288, 408.5214844074, 255.7274169856], [267.4438476626, 195.4431762944, 397.72985840280006, 297.9583740416]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047668_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, two glasses, and a car.", "boxes_value": [[68.30163573269999, 26.44317629439999, 337.5214844074, 128.9583740416], [0, 32.85198976000001, 194.16198733009998, 154], [134.1917724976, 0, 247.17919921059996, 154], [68.30163573269999, 60.47100830720001, 113.9048461715, 79.81781007359999], [299.0816650286, 65.9986572288, 337.5214844074, 86.7274169856], [196.44384766259998, 26.44317629439999, 326.72985840280006, 128.9583740416]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047669.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give coordinates for the items you reference.", "boxes_value": [[601.5281982645, 311.772827136, 794.9598389055, 512.1535644672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047669_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give coordinates for the items you reference.", "boxes_value": [[48.52819826450002, 50.77282713599999, 241.95983890549996, 251]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047669.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give coordinates for the items you reference. For your reference, objects involved in this region include three chairs, and two people.", "boxes_value": [[601.5281982645, 311.772827136, 794.9598389055, 512.1535644672], [601.5281982645, 358.6066284032, 710.5895995815, 512.1535644672], [696.2393798955, 347.1264648192, 736.4199218445, 510.7185058816], [697.5947265825, 333.4938354688, 726.143798829, 361.4766845952], [571.2811278915001, 214.2848510976, 699.651855465, 478.7594604544], [762.818237343, 311.772827136, 794.9598389055, 343.600891136]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047669_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give coordinates for the items you reference. For your reference, objects involved in this region include three chairs, and two people.", "boxes_value": [[48.52819826450002, 50.77282713599999, 241.95983890549996, 251], [48.52819826450002, 97.60662840319998, 157.5895995815, 251], [143.23937989549995, 86.12646481920001, 183.41992184449998, 249.71850588159998], [144.59472658250002, 72.4938354688, 173.14379882900005, 100.47668459520003], [18.28112789150009, 0, 146.65185546500004, 217.75946045440003], [209.81823734299996, 50.77282713599999, 241.95983890549996, 82.60089113599997]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047670.jpg", "text": "I'd like a thorough description of the area in the image . Provide the coordinates for each element you describe.", "boxes_value": [[388.90527346060003, 165.1295165952, 683.3756103177, 512.66345216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047670_crop.jpg", "text": "I'd like a thorough description of the area in the image . Provide the coordinates for each element you describe.", "boxes_value": [[73.90527346060003, 87.12951659519999, 368, 434]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047670.jpg", "text": "I'd like a thorough description of the area in the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, two glasses, and a bottle.", "boxes_value": [[388.90527346060003, 165.1295165952, 683.3756103177, 512.66345216], [449.2617187298, 165.1295165952, 683.3756103177, 512.6528320512], [334.4575805798, 138.6078491136, 526.1917724696, 434.19799802879993], [388.90527346060003, 182.3890991104, 457.52307131159995, 196.3270874112], [524.5325928072, 167.378967296, 615.6656494416, 232.7802734592], [431.4028320485, 435.793823232, 502.41577149920005, 512.66345216]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047670_crop.jpg", "text": "I'd like a thorough description of the area in the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, two glasses, and a bottle.", "boxes_value": [[73.90527346060003, 87.12951659519999, 368, 434], [134.2617187298, 87.12951659519999, 368, 434], [19.45758057979998, 60.6078491136, 211.19177246959998, 356.19799802879993], [73.90527346060003, 104.3890991104, 142.52307131159995, 118.32708741120001], [209.5325928072, 89.37896729600001, 300.66564944159995, 154.7802734592], [116.40283204849999, 357.793823232, 187.41577149920005, 434]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047671.jpg", "text": "Offer a thorough description of the area within the illustration . Include the coordinates for each mentioned object.", "boxes_value": [[268.1992797696, 438.3739013376, 343.052917504, 485.672729472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047671_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Include the coordinates for each mentioned object.", "boxes_value": [[19.199279769600025, 12.373901337599989, 94.05291750399999, 59.672729472000015]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047671.jpg", "text": "Offer a thorough description of the area within the illustration . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two cars, two suvs, and a truck.", "boxes_value": [[268.1992797696, 438.3739013376, 343.052917504, 485.672729472], [314.3607177728, 465.60253908479996, 337.4414672896, 485.672729472], [268.1992797696, 459.07971194879997, 291.2800292864, 476.13940431360004], [298.8165893632, 453.54064942080004, 319.9868164096, 472.18322757120006], [253.948242176, 431.7385254144, 284.5977172992, 460.49218752], [314.2993164288, 438.3739013376, 343.052917504, 451.01293946879997]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00047671_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two cars, two suvs, and a truck.", "boxes_value": [[19.199279769600025, 12.373901337599989, 94.05291750399999, 59.672729472000015], [65.3607177728, 39.60253908479996, 88.44146728959998, 59.672729472000015], [19.199279769600025, 33.07971194879997, 42.28002928640001, 50.13940431360004], [49.81658936320002, 27.540649420800037, 70.98681640960001, 46.183227571200064], [4.948242176000008, 5.738525414399987, 35.597717299199985, 34.492187520000016], [65.29931642880001, 12.373901337599989, 94.05291750399999, 25.01293946879997]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00047673.jpg", "text": "In the provided image , please explain the content within the region . Specify the location of each mentioned object.", "boxes_value": [[185.78753660499999, 83.554382336, 398.01556399, 375.3678588928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047673_crop.jpg", "text": "In the provided image , please explain the content within the region . Specify the location of each mentioned object.", "boxes_value": [[53.787536604999985, 73.554382336, 266.01556399, 365.3678588928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047673.jpg", "text": "In the provided image , please explain the content within the region . Specify the location of each mentioned object. For your reference, objects involved in this region include an american football, two people, two helmets, two gloves, and three sneakers.", "boxes_value": [[185.78753660499999, 83.554382336, 398.01556399, 375.3678588928], [313.42065430499997, 123.945617664, 349.834533675, 159.8895874048], [185.78753660499999, 83.554382336, 398.01556399, 375.3678588928], [175.83935545, 273.233154304, 385.394226085, 392.3741454848], [206.75738522999998, 86.32342528, 261.26953127, 147.0458374144], [328.76544189500004, 112.5585327104, 356.648193385, 146.524780288], [245.62420652, 247.4095458816, 271.479125995, 289.9940795904], [279.38500975, 275.0705566208, 332.061401375, 345.9442748928], [366.54052731999997, 310.1881713664, 397.18859862, 338.2822265856], [318.972106915, 354.8833007616, 354.08972170000004, 373.0805664256], [176.26696779999997, 345.9442748928, 195.102783205, 372.7613525504]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 7], [5, 6], [8, 9, 10]]}, {"image_path": "objects365_v1_00047673_crop.jpg", "text": "In the provided image , please explain the content within the region . Specify the location of each mentioned object. For your reference, objects involved in this region include an american football, two people, two helmets, two gloves, and three sneakers.", "boxes_value": [[53.787536604999985, 73.554382336, 266.01556399, 365.3678588928], [181.42065430499997, 113.945617664, 217.834533675, 149.8895874048], [53.787536604999985, 73.554382336, 266.01556399, 365.3678588928], [43.83935545, 263.233154304, 253.394226085, 382.3741454848], [74.75738522999998, 76.32342528, 129.26953127000002, 137.0458374144], [196.76544189500004, 102.5585327104, 224.648193385, 136.524780288], [113.62420652, 237.4095458816, 139.479125995, 279.9940795904], [147.38500975, 265.0705566208, 200.061401375, 335.9442748928], [234.54052731999997, 300.1881713664, 265.18859862, 328.2822265856], [186.97210691499998, 344.8833007616, 222.08972170000004, 363.0805664256], [44.266967799999975, 335.9442748928, 63.10278320500001, 362.7613525504]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 7], [5, 6], [8, 9, 10]]}, {"image_path": "objects365_v1_00047676.jpg", "text": "Please share details about the rectangular region within the image . Please point out the objects and their coordinates.", "boxes_value": [[134.076049792, 219.69543456, 239.622619648, 409.944702144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047676_crop.jpg", "text": "Please share details about the rectangular region within the image . Please point out the objects and their coordinates.", "boxes_value": [[27.076049791999992, 47.695434559999995, 132.622619648, 237.94470214400002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047676.jpg", "text": "Please share details about the rectangular region within the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include four beds, and a desk.", "boxes_value": [[134.076049792, 219.69543456, 239.622619648, 409.944702144], [168.032836928, 226.3347168, 184.398986816, 310.3475952], [223.67773440000002, 228.516845712, 241.134948736, 305.255920416], [167.899658176, 350.76068116799996, 184.952636736, 409.944702144], [224.07427980800003, 331.701416016, 239.622619648, 396.40264891199996], [134.076049792, 219.69543456, 143.42956544, 314.299438464]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047676_crop.jpg", "text": "Please share details about the rectangular region within the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include four beds, and a desk.", "boxes_value": [[27.076049791999992, 47.695434559999995, 132.622619648, 237.94470214400002], [61.032836927999995, 54.334716799999995, 77.39898681599999, 138.3475952], [116.67773440000002, 56.51684571199999, 134.134948736, 133.25592041599998], [60.899658176, 178.76068116799996, 77.95263673599999, 237.94470214400002], [117.07427980800003, 159.701416016, 132.622619648, 224.40264891199996], [27.076049791999992, 47.695434559999995, 36.429565440000005, 142.299438464]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047680.jpg", "text": "In the image , could you provide a description for the coordinates ? Include the coordinates for each mentioned object.", "boxes_value": [[244.7282715113, 64.782592768, 517.9143066201, 511.840759296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047680_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Include the coordinates for each mentioned object.", "boxes_value": [[68.7282715113, 64.782592768, 341.9143066201, 511.840759296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047680.jpg", "text": "In the image , could you provide a description for the coordinates ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two pictures, a cabinet, three people, and a bracelet.", "boxes_value": [[244.7282715113, 64.782592768, 517.9143066201, 511.840759296], [169.5739746059, 6.4738769408, 435.7924804932, 261.0755615232], [466.7706298592, 29.7075195392, 553.8966064291, 186.534423808], [408.5625000298, 248.1384277504, 439.6589355704, 325.8793945088], [244.7282715113, 122.2778930688, 404.7926025067, 511.840759296], [378.7565918281, 149.962097152, 641.3389892375, 511.6170043904], [372.69152833289996, 301.2761230336, 395.690063505, 324.9786376704], [483.2135009969, 64.782592768, 517.9143066201, 149.2429809664]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5, 7], [6]]}, {"image_path": "objects365_v1_00047680_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two pictures, a cabinet, three people, and a bracelet.", "boxes_value": [[68.7282715113, 64.782592768, 341.9143066201, 511.840759296], [0, 6.4738769408, 259.7924804932, 261.0755615232], [290.7706298592, 29.7075195392, 377.89660642909996, 186.534423808], [232.5625000298, 248.1384277504, 263.6589355704, 325.8793945088], [68.7282715113, 122.2778930688, 228.7926025067, 511.840759296], [202.7565918281, 149.962097152, 410, 511.6170043904], [196.69152833289996, 301.2761230336, 219.690063505, 324.9786376704], [307.2135009969, 64.782592768, 341.9143066201, 149.2429809664]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5, 7], [6]]}, {"image_path": "objects365_v1_00047681.jpg", "text": "Offer a thorough description of the area within the illustration . Provide the coordinates for each element you describe.", "boxes_value": [[574.2569580439999, 75.4923705856, 765.993774392, 319.4863891456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047681_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Provide the coordinates for each element you describe.", "boxes_value": [[48.25695804399993, 61.4923705856, 239.99377439199998, 305.4863891456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047681.jpg", "text": "Offer a thorough description of the area within the illustration . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two lamps, an air conditioner, a person, and a street lights.", "boxes_value": [[574.2569580439999, 75.4923705856, 765.993774392, 319.4863891456], [739.107543944, 204.5433349632, 765.993774392, 268.8080444416], [574.2569580439999, 186.9113769472, 618.0993652679999, 221.11810304], [735.718872042, 75.4923705856, 759.021118163, 103.9728393728], [621.731079123, 255.2784423936, 664.619262692, 319.1729736192], [610.7807617450001, 137.8854980608, 658.6573486570001, 319.4863891456]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5]]}, {"image_path": "objects365_v1_00047681_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two lamps, an air conditioner, a person, and a street lights.", "boxes_value": [[48.25695804399993, 61.4923705856, 239.99377439199998, 305.4863891456], [213.10754394399999, 190.5433349632, 239.99377439199998, 254.80804444159998], [48.25695804399993, 172.9113769472, 92.09936526799993, 207.11810304], [209.71887204200004, 61.4923705856, 233.02111816299998, 89.9728393728], [95.73107912299997, 241.2784423936, 138.619262692, 305.1729736192], [84.78076174500006, 123.88549806079999, 132.65734865700006, 305.4863891456]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5]]}, {"image_path": "objects365_v1_00047682.jpg", "text": "Please provide information about the area within the bounding box in the picture . Include the coordinates for each object you identify.", "boxes_value": [[296.8205566589, 354.2009887744, 683.3670654412999, 511.8001709056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047682_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Include the coordinates for each object you identify.", "boxes_value": [[96.82055665889999, 40.2009887744, 483, 197.8001709056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047682.jpg", "text": "Please provide information about the area within the bounding box in the picture . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, a glasses, a suv, a street lights, a traffic sign, and a truck.", "boxes_value": [[296.8205566589, 354.2009887744, 683.3670654412999, 511.8001709056], [521.9916992124, 405.5200195072, 629.1884765412, 511.8001709056], [527.8848661554999, 426.7167348224, 565.9428311041, 441.6553752576], [408.2446288929, 402.032958976, 544.0469970415, 481.321228032], [467.2891845639, 287.3179321344, 493.4373779242, 404.563415552], [296.8205566589, 354.2009887744, 328.2359619419, 363.7622680576], [654.7670898377, 408.1861572096, 683.3670654412999, 483.449401856]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047682_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, a glasses, a suv, a street lights, a traffic sign, and a truck.", "boxes_value": [[96.82055665889999, 40.2009887744, 483, 197.8001709056], [321.9916992124, 91.5200195072, 429.1884765412, 197.8001709056], [327.88486615549994, 112.71673482239999, 365.94283110410004, 127.65537525759999], [208.24462889289998, 88.03295897599997, 344.04699704150005, 167.32122803200002], [267.2891845639, 0, 293.4373779242, 90.56341555199998], [96.82055665889999, 40.2009887744, 128.23596194189997, 49.76226805760001], [454.76708983770004, 94.18615720960003, 483, 169.449401856]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047683.jpg", "text": "Tell me about the region of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[1.3419799828, 291.3070068224, 681.6872558322, 510.7732544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047683_crop.jpg", "text": "Tell me about the region of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[1.3419799828, 55.30700682240001, 681.6872558322, 274.7732544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047683.jpg", "text": "Tell me about the region of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two desks, two storage boxes, a person, two plates, a pie, and two baksets.", "boxes_value": [[1.3419799828, 291.3070068224, 681.6872558322, 510.7732544], [1.3419799828, 291.3070068224, 681.6872558322, 510.7732544], [614.5922851628, 348.9548339712, 681.2274169983, 413.3687744], [567.1606445555, 374.4817504768, 609.0455321998, 458.2515869184], [0.0299682642, 209.6594848768, 48.9182129195, 391.5692138496], [18.6721802008, 371.5369872896, 84.6770629949, 397.1307373056], [14.0136718796, 288.1683349504, 74.44708250640001, 305.1013793792], [20.7217407389, 281.6024780288, 68.0774536022, 300.8406982656], [16.2131347989, 231.560363776, 312.7117919869, 367.0248413184], [147.7110595724, 357.4981689344, 252.0257568034, 432.6540527104], [247.5081787022, 350.1057739264, 351.412231472, 409.6555175936]], "boxes_seq": [[0], [0], [1, 8], [2, 3], [4], [5, 6], [7], [9, 10]]}, {"image_path": "objects365_v1_00047683_crop.jpg", "text": "Tell me about the region of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two desks, two storage boxes, a person, two plates, a pie, and two baksets.", "boxes_value": [[1.3419799828, 55.30700682240001, 681.6872558322, 274.7732544], [1.3419799828, 55.30700682240001, 681.6872558322, 274.7732544], [614.5922851628, 112.9548339712, 681.2274169983, 177.3687744], [567.1606445555, 138.48175047680002, 609.0455321998, 222.2515869184], [0.0299682642, 0, 48.9182129195, 155.56921384959998], [18.6721802008, 135.5369872896, 84.6770629949, 161.1307373056], [14.0136718796, 52.16833495039998, 74.44708250640001, 69.10137937920001], [20.7217407389, 45.60247802880002, 68.0774536022, 64.8406982656], [16.2131347989, 0, 312.7117919869, 131.02484131839998], [147.7110595724, 121.49816893439998, 252.0257568034, 196.65405271039998], [247.5081787022, 114.10577392639999, 351.412231472, 173.6555175936]], "boxes_seq": [[0], [0], [1, 8], [2, 3], [4], [5, 6], [7], [9, 10]]}, {"image_path": "objects365_v1_00047684.jpg", "text": "I am interested in the region of the image ; please describe it. Specify the location of each mentioned object.", "boxes_value": [[422.11779783730003, 0.9738159104, 599.6668700947, 195.9514770432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047684_crop.jpg", "text": "I am interested in the region of the image ; please describe it. Specify the location of each mentioned object.", "boxes_value": [[45.11779783730003, 0.9738159104, 222.66687009470002, 195.9514770432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047684.jpg", "text": "I am interested in the region of the image ; please describe it. Specify the location of each mentioned object. For your reference, objects involved in this region include a cabinet, four pictures, a lamp, and a blackboard.", "boxes_value": [[422.11779783730003, 0.9738159104, 599.6668700947, 195.9514770432], [491.73718260920003, 108.9538574336, 517.3897704746, 224.2921142784], [553.7652587674, 137.9958496256, 583.0120849568, 211.845581056], [422.11779783730003, 130.303588864, 450.9106445132, 156.2172241408], [446.87963868549997, 172.917175296, 466.45886228349997, 195.9514770432], [419.2384032955, 172.3413085696, 445.7280273313, 195.375610368], [424.8803711029, 0.9738159104, 599.6668700947, 61.1154174976], [507.226684577, 139.5841064448, 552.0260009749, 242.6838989312]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00047684_crop.jpg", "text": "I am interested in the region of the image ; please describe it. Specify the location of each mentioned object. For your reference, objects involved in this region include a cabinet, four pictures, a lamp, and a blackboard.", "boxes_value": [[45.11779783730003, 0.9738159104, 222.66687009470002, 195.9514770432], [114.73718260920003, 108.9538574336, 140.38977047460003, 224.2921142784], [176.76525876740004, 137.9958496256, 206.01208495679998, 211.845581056], [45.11779783730003, 130.303588864, 73.9106445132, 156.2172241408], [69.87963868549997, 172.917175296, 89.45886228349997, 195.9514770432], [42.23840329550001, 172.3413085696, 68.72802733129998, 195.375610368], [47.88037110290003, 0.9738159104, 222.66687009470002, 61.1154174976], [130.22668457700001, 139.5841064448, 175.02600097489994, 242.6838989312]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00047685.jpg", "text": "I request a description of the area in the picture . Give coordinates for the items you reference.", "boxes_value": [[257.0936889548, 242.6024780288, 512.8951416076, 325.6134033408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047685_crop.jpg", "text": "I request a description of the area in the picture . Give coordinates for the items you reference.", "boxes_value": [[64.09368895479997, 21.602478028799993, 319.89514160759995, 104.6134033408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047685.jpg", "text": "I request a description of the area in the picture . Give coordinates for the items you reference. For your reference, objects involved in this region include a bed, three pillows, and a desk.", "boxes_value": [[257.0936889548, 242.6024780288, 512.8951416076, 325.6134033408], [43.38482665040001, 240.4234619392, 541.9394531004, 510.5142212096], [388.98486331400005, 245.26379392, 496.440307642, 325.6134033408], [314.4611206124, 242.6024780288, 398.3865966848, 312.3033447424], [486.57946777079997, 246.1586303488, 512.8951416076, 320.838134784], [257.0936889548, 284.72558592, 297.344360344, 302.5108032]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047685_crop.jpg", "text": "I request a description of the area in the picture . Give coordinates for the items you reference. For your reference, objects involved in this region include a bed, three pillows, and a desk.", "boxes_value": [[64.09368895479997, 21.602478028799993, 319.89514160759995, 104.6134033408], [0, 19.423461939199996, 348.9394531004, 125], [195.98486331400005, 24.263793920000012, 303.440307642, 104.6134033408], [121.46112061240001, 21.602478028799993, 205.3865966848, 91.30334474239999], [293.57946777079997, 25.15863034879999, 319.89514160759995, 99.83813478399998], [64.09368895479997, 63.725585920000015, 104.344360344, 81.5108032]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047686.jpg", "text": "I'd like some information about the bounding box in the photo . Provide the coordinates for each element you describe.", "boxes_value": [[115.86816404300001, 136.896606464, 338.5056762969, 414.2936401408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047686_crop.jpg", "text": "I'd like some information about the bounding box in the photo . Provide the coordinates for each element you describe.", "boxes_value": [[55.86816404300001, 69.896606464, 278.5056762969, 347.2936401408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047686.jpg", "text": "I'd like some information about the bounding box in the photo . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, a barrel, two leather shoes, and a horse.", "boxes_value": [[115.86816404300001, 136.896606464, 338.5056762969, 414.2936401408], [166.7331542649, 29.8341675008, 425.3859863043, 354.145080576], [115.86816404300001, 136.896606464, 164.0052490548, 204.2404174848], [165.4309692088, 295.2786254848, 201.4707641627, 321.6798706176], [296.1798706262, 366.5200195072, 338.5056762969, 414.2936401408], [98.8696288982, 145.3416748032, 396.3701171796, 474.8109130752]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047686_crop.jpg", "text": "I'd like some information about the bounding box in the photo . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, a barrel, two leather shoes, and a horse.", "boxes_value": [[55.86816404300001, 69.896606464, 278.5056762969, 347.2936401408], [106.73315426490001, 0, 334, 287.145080576], [55.86816404300001, 69.896606464, 104.0052490548, 137.2404174848], [105.43096920880001, 228.2786254848, 141.4707641627, 254.6798706176], [236.1798706262, 299.5200195072, 278.5056762969, 347.2936401408], [38.869628898200006, 78.3416748032, 334, 407.8109130752]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047687.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Include the coordinates for each object you identify.", "boxes_value": [[219.4735717853, 373.1801147392, 371.6435547064, 459.7528076288]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047687_crop.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Include the coordinates for each object you identify.", "boxes_value": [[38.47357178530001, 22.180114739200008, 190.64355470639998, 108.75280762879999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047687.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, three bottles, and a cup.", "boxes_value": [[219.4735717853, 373.1801147392, 371.6435547064, 459.7528076288], [90.2145995753, 148.47607424, 323.3173828317, 478.877502464], [219.4735717853, 373.1801147392, 269.0042724237, 438.8210449408], [227.4941406618, 423.2218017792, 279.8223877075, 451.8542480384], [290.6828613569, 429.6394653184, 326.7202148296, 459.7528076288], [330.1759033504, 407.424621568, 371.6435547064, 440.9936523264]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047687_crop.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, three bottles, and a cup.", "boxes_value": [[38.47357178530001, 22.180114739200008, 190.64355470639998, 108.75280762879999], [0, 0, 142.3173828317, 127.87750246399997], [38.47357178530001, 22.180114739200008, 88.0042724237, 87.8210449408], [46.4941406618, 72.2218017792, 98.82238770750001, 100.85424803839999], [109.68286135689999, 78.63946531840003, 145.72021482960002, 108.75280762879999], [149.1759033504, 56.42462156800002, 190.64355470639998, 89.99365232640002]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047688.jpg", "text": "Can you give me a description of the region in image ? Specify the location of each mentioned object.", "boxes_value": [[0.0988769538, 156.0053100544, 243.17218018070002, 344.5]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047688_crop.jpg", "text": "Can you give me a description of the region in image ? Specify the location of each mentioned object.", "boxes_value": [[0.0988769538, 48.00531005440001, 243.17218018070002, 236.5]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047688.jpg", "text": "Can you give me a description of the region in image ? Specify the location of each mentioned object. For your reference, objects involved in this region include two storage boxes, a picture, a moniter, and a remote.", "boxes_value": [[0.0988769538, 156.0053100544, 243.17218018070002, 344.5], [190.5629272789, 245.96545408, 230.1287231196, 283.4872436736], [191.6917114071, 277.9962768384, 233.3317871268, 313.6877441536], [189.5280761407, 156.0053100544, 243.17218018070002, 220.7756958208], [0.0988769538, 178.245117184, 107.4486694363, 344.5], [100.0502929551, 293.6692505088, 128.7192993004, 307.2996826112]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047688_crop.jpg", "text": "Can you give me a description of the region in image ? Specify the location of each mentioned object. For your reference, objects involved in this region include two storage boxes, a picture, a moniter, and a remote.", "boxes_value": [[0.0988769538, 48.00531005440001, 243.17218018070002, 236.5], [190.5629272789, 137.96545408, 230.1287231196, 175.4872436736], [191.6917114071, 169.99627683839998, 233.3317871268, 205.68774415360002], [189.5280761407, 48.00531005440001, 243.17218018070002, 112.7756958208], [0.0988769538, 70.24511718400001, 107.4486694363, 236.5], [100.0502929551, 185.66925050880002, 128.7192993004, 199.2996826112]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047690.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Give coordinates for the items you reference.", "boxes_value": [[196.54296873380002, 13.3809204224, 281.6459960795, 96.3129882624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047690_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Give coordinates for the items you reference.", "boxes_value": [[21.542968733800024, 13.3809204224, 106.64599607949998, 96.3129882624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047690.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include five lamps.", "boxes_value": [[196.54296873380002, 13.3809204224, 281.6459960795, 96.3129882624], [196.54296873380002, 31.1831054848, 207.8321533418, 96.3129882624], [220.8580932493, 15.5519409152, 229.9763183354, 84.58959959039998], [232.5814819263, 13.3809204224, 242.1338501049, 80.2476196352], [257.3308105108, 18.5913085952, 267.7515869041, 70.695251456], [271.2252197629, 21.1965331968, 281.6459960795, 65.919067392]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047690_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include five lamps.", "boxes_value": [[21.542968733800024, 13.3809204224, 106.64599607949998, 96.3129882624], [21.542968733800024, 31.1831054848, 32.832153341799994, 96.3129882624], [45.8580932493, 15.5519409152, 54.97631833540001, 84.58959959039998], [57.5814819263, 13.3809204224, 67.1338501049, 80.2476196352], [82.3308105108, 18.5913085952, 92.75158690410001, 70.695251456], [96.2252197629, 21.1965331968, 106.64599607949998, 65.919067392]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047691.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Provide the coordinates for each element you describe.", "boxes_value": [[41.2738036992, 187.6832275456, 392.7507323904, 511.3854980608]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047691_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Provide the coordinates for each element you describe.", "boxes_value": [[41.2738036992, 81.6832275456, 392.7507323904, 405.3854980608]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047691.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Provide the coordinates for each element you describe. For your reference, objects involved in this region include three plates, and two bowls.", "boxes_value": [[41.2738036992, 187.6832275456, 392.7507323904, 511.3854980608], [41.2738036992, 361.6156005888, 98.70434572799999, 390.8939209216], [78.9978027264, 394.8352660992, 166.2697143552, 447.7614135808], [153.31970211840002, 449.450561536, 230.8355712768, 511.3854980608], [312.79907228160005, 187.6832275456, 365.2514648064, 211.1085204992], [354.5572509696, 197.8681030144, 392.7507323904, 217.2194824192]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047691_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Provide the coordinates for each element you describe. For your reference, objects involved in this region include three plates, and two bowls.", "boxes_value": [[41.2738036992, 81.6832275456, 392.7507323904, 405.3854980608], [41.2738036992, 255.61560058880002, 98.70434572799999, 284.8939209216], [78.9978027264, 288.8352660992, 166.2697143552, 341.7614135808], [153.31970211840002, 343.450561536, 230.8355712768, 405.3854980608], [312.79907228160005, 81.6832275456, 365.2514648064, 105.10852049920001], [354.5572509696, 91.86810301439999, 392.7507323904, 111.2194824192]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047694.jpg", "text": "Within the input image , what can be found in the region defined by ? Please point out the objects and their coordinates.", "boxes_value": [[143.205444305, 228.722961408, 307.190917957, 341.8137206784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047694_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Please point out the objects and their coordinates.", "boxes_value": [[41.205444304999986, 28.722961408000003, 205.190917957, 141.8137206784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047694.jpg", "text": "Within the input image , what can be found in the region defined by ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a cup, three bottles, and a laptop.", "boxes_value": [[143.205444305, 228.722961408, 307.190917957, 341.8137206784], [293.279541016, 319.7723999232, 307.190917957, 341.8137206784], [228.78155515599997, 261.7783203328, 245.764221164, 321.03704832], [143.205444305, 243.5892944384, 157.40808106100002, 288.3209228288], [279.922363259, 228.722961408, 289.877441392, 262.5703735296], [223.171508791, 231.859863296, 268.728881806, 260.8141479424]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047694_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a cup, three bottles, and a laptop.", "boxes_value": [[41.205444304999986, 28.722961408000003, 205.190917957, 141.8137206784], [191.279541016, 119.7723999232, 205.190917957, 141.8137206784], [126.78155515599997, 61.77832033279998, 143.764221164, 121.03704832], [41.205444304999986, 43.5892944384, 55.40808106100002, 88.32092282880001], [177.922363259, 28.722961408000003, 187.87744139199998, 62.570373529599976], [121.17150879100001, 31.859863295999986, 166.728881806, 60.814147942399984]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047696.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[459.58605957, 32.787841792, 658.3050536931, 458.308410624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047696_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[50.586059569999975, 32.787841792, 249.30505369310004, 458.308410624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047696.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a soccer, two people, a sneakers, and a handbag.", "boxes_value": [[459.58605957, 32.787841792, 658.3050536931, 458.308410624], [482.1086425419, 396.0206908928, 549.4604492322001, 458.308410624], [459.58605957, 32.787841792, 495.5650635078, 196.0770263552], [265.8531493827, 56.7738037248, 597.9667968945, 448.8524169728], [460.9057617252, 425.7554931712, 485.41882328279996, 449.8732299776], [603.6235351938, 155.23272704, 658.3050536931, 181.3387451392]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047696_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a soccer, two people, a sneakers, and a handbag.", "boxes_value": [[50.586059569999975, 32.787841792, 249.30505369310004, 458.308410624], [73.1086425419, 396.0206908928, 140.46044923220006, 458.308410624], [50.586059569999975, 32.787841792, 86.5650635078, 196.0770263552], [0, 56.7738037248, 188.96679689450002, 448.8524169728], [51.90576172520002, 425.7554931712, 76.41882328279996, 449.8732299776], [194.62353519379997, 155.23272704, 249.30505369310004, 181.3387451392]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047697.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Include the coordinates for each object you identify.", "boxes_value": [[44.0612792832, 368.1654052842, 373.6022949376, 497.52343750859995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047697_crop.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Include the coordinates for each object you identify.", "boxes_value": [[44.0612792832, 33.165405284200006, 373.6022949376, 162.52343750859995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047697.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Include the coordinates for each object you identify. For your reference, objects involved in this region include three sneakers, a belt, and a gloves.", "boxes_value": [[44.0612792832, 368.1654052842, 373.6022949376, 497.52343750859995], [44.0612792832, 448.5141601434, 84.3103637504, 471.95751951540007], [110.8011474432, 368.1654052842, 188.010498048, 418.3117675974], [289.9431762944, 436.6221924078, 359.4404907008, 497.52343750859995], [345.414550784, 472.2908935446, 373.6022949376, 494.21472164280004], [59.1622314496, 417.2727051054, 82.5986938368, 445.2404785362]], "boxes_seq": [[0], [0], [1, 4, 5], [2], [3]]}, {"image_path": "objects365_v1_00047697_crop.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Include the coordinates for each object you identify. For your reference, objects involved in this region include three sneakers, a belt, and a gloves.", "boxes_value": [[44.0612792832, 33.165405284200006, 373.6022949376, 162.52343750859995], [44.0612792832, 113.51416014339998, 84.3103637504, 136.95751951540007], [110.8011474432, 33.165405284200006, 188.010498048, 83.31176759739998], [289.9431762944, 101.62219240780001, 359.4404907008, 162.52343750859995], [345.414550784, 137.2908935446, 373.6022949376, 159.21472164280004], [59.1622314496, 82.27270510540001, 82.5986938368, 110.24047853619999]], "boxes_seq": [[0], [0], [1, 4, 5], [2], [3]]}, {"image_path": "objects365_v1_00047699.jpg", "text": "Tell me what you see in the area within the context of the image . Include the coordinates for each object you identify.", "boxes_value": [[187.71875000720001, 191.014587392, 450.9940185504, 365.2025146368]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047699_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Include the coordinates for each object you identify.", "boxes_value": [[66.71875000720001, 44.01458739200001, 329.9940185504, 218.2025146368]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047699.jpg", "text": "Tell me what you see in the area within the context of the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a flower, a vase, a radiator, a desk, a cabinet, a lamp, a potted plant, two books, and a moniter.", "boxes_value": [[187.71875000720001, 191.014587392, 450.9940185504, 365.2025146368], [387.6074218844, 235.705444352, 450.9940185504, 269.717834496], [405.6442871416, 263.5337524224, 426.257812486, 291.362060544], [334.5274657988, 241.8895263744, 530.356323232, 306.8222656], [341.7421874756, 281.0552978432, 483.9757080308, 336.7119140864], [246.1407470712, 259.3851318272, 314.352417, 361.3435058688], [187.71875000720001, 211.9901733376, 206.7820434588, 247.2925414912], [195.48522950240002, 226.817138688, 255.499267586, 365.2025146368], [424.6707763424, 314.4274902528, 452.35595701840003, 328.018371584], [426.30676269, 304.360168448, 456.76037597320004, 316.189270016], [231.14050289800002, 191.014587392, 306.90063473559997, 286.6380615168]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6], [7], [8, 9], [10]]}, {"image_path": "objects365_v1_00047699_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a flower, a vase, a radiator, a desk, a cabinet, a lamp, a potted plant, two books, and a moniter.", "boxes_value": [[66.71875000720001, 44.01458739200001, 329.9940185504, 218.2025146368], [266.6074218844, 88.705444352, 329.9940185504, 122.71783449600002], [284.6442871416, 116.53375242240003, 305.257812486, 144.36206054399997], [213.5274657988, 94.88952637439999, 395, 159.82226559999998], [220.74218747560002, 134.05529784319998, 362.9757080308, 189.7119140864], [125.1407470712, 112.38513182719998, 193.352417, 214.34350586879998], [66.71875000720001, 64.9901733376, 85.78204345879999, 100.29254149120001], [74.48522950240002, 79.817138688, 134.499267586, 218.2025146368], [303.6707763424, 167.4274902528, 331.35595701840003, 181.01837158400002], [305.30676269, 157.36016844800002, 335.76037597320004, 169.18927001600002], [110.14050289800002, 44.01458739200001, 185.90063473559997, 139.6380615168]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6], [7], [8, 9], [10]]}, {"image_path": "objects365_v1_00047700.jpg", "text": "Offer a thorough description of the area within the illustration . Provide the coordinates for each element you describe.", "boxes_value": [[236.7294311406, 134.0165405184, 340.5032348542, 334.5741577216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047700_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Provide the coordinates for each element you describe.", "boxes_value": [[26.729431140600013, 51.01654051840001, 130.5032348542, 251.5741577216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047700.jpg", "text": "Offer a thorough description of the area within the illustration . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, two helmets, a sneakers, and a gloves.", "boxes_value": [[236.7294311406, 134.0165405184, 340.5032348542, 334.5741577216], [236.7294311406, 134.0165405184, 340.5032348542, 334.5741577216], [276.1936035479, 79.2018432512, 355.6411132594, 307.3618774528], [251.7565307706, 134.338439936, 287.8318481447, 172.6485595648], [246.3488159127, 223.383361792, 274.9641113429, 257.3933715968], [324.4209594403, 298.2053833216, 338.8961792247, 333.857238784], [227.72894285709998, 310.6249389568, 245.8967895743, 331.0084228608]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047700_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, two helmets, a sneakers, and a gloves.", "boxes_value": [[26.729431140600013, 51.01654051840001, 130.5032348542, 251.5741577216], [26.729431140600013, 51.01654051840001, 130.5032348542, 251.5741577216], [66.19360354790001, 0, 145.64111325940002, 224.3618774528], [41.756530770599994, 51.338439935999986, 77.83184814470002, 89.6485595648], [36.34881591269999, 140.383361792, 64.96411134290003, 174.39337159680002], [114.42095944030001, 215.20538332159998, 128.8961792247, 250.857238784], [17.72894285709998, 227.6249389568, 35.89678957429999, 248.0084228608]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047707.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Specify the location of each mentioned object.", "boxes_value": [[0, 279.6895141376, 237.37768556460003, 362.8531493888]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047707_crop.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Specify the location of each mentioned object.", "boxes_value": [[0, 21.689514137599986, 237.37768556460003, 104.85314938879998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047707.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Specify the location of each mentioned object. For your reference, objects involved in this region include a vase, a bed, two pillows, and a nightstand.", "boxes_value": [[0, 279.6895141376, 237.37768556460003, 362.8531493888], [155.3048095696, 231.296447744, 215.5096435233, 317.7015380992], [0, 239.7686767616, 388.2022705079, 511.6777954304], [5.024536148899999, 279.6895141376, 128.7128906431, 356.5101318144], [0, 309.6425170944, 24.7583007909, 362.8531493888], [139.8531494217, 303.2040405504, 237.37768556460003, 339.5745849856]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047707_crop.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Specify the location of each mentioned object. For your reference, objects involved in this region include a vase, a bed, two pillows, and a nightstand.", "boxes_value": [[0, 21.689514137599986, 237.37768556460003, 104.85314938879998], [155.3048095696, 0, 215.5096435233, 59.70153809919998], [0, 0, 296, 125], [5.024536148899999, 21.689514137599986, 128.7128906431, 98.51013181439998], [0, 51.64251709439998, 24.7583007909, 104.85314938879998], [139.8531494217, 45.20404055040001, 237.37768556460003, 81.57458498559998]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047710.jpg", "text": "Kindly describe what I should be seeing in the area of image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[248.82586668800002, 455.6879272448, 478.837280264, 497.8457169408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047710_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[57.82586668800002, 10.687927244799994, 287.837280264, 52.8457169408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047710.jpg", "text": "Kindly describe what I should be seeing in the area of image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, and two bicycles.", "boxes_value": [[248.82586668800002, 455.6879272448, 478.837280264, 497.8457169408], [248.82586668800002, 455.9485473792, 262.9795532, 493.1788940288], [396.153930648, 455.6879272448, 410.056457496, 490.4442748928], [466.754760752, 458.8400879104, 478.837280264, 487.613891584], [319.00968310400003, 475.9704137216, 353.10942044, 497.8457169408], [400.08353716799996, 474.7198088192, 418.518724344, 493.1549382656]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047710_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, and two bicycles.", "boxes_value": [[57.82586668800002, 10.687927244799994, 287.837280264, 52.8457169408], [57.82586668800002, 10.948547379200022, 71.9795532, 48.178894028800016], [205.15393064800003, 10.687927244799994, 219.056457496, 45.444274892800024], [275.754760752, 13.840087910399973, 287.837280264, 42.61389158399999], [128.00968310400003, 30.970413721599982, 162.10942044, 52.8457169408], [209.08353716799996, 29.71980881920001, 227.51872434400002, 48.15493826559998]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047711.jpg", "text": "Help me understand what's happening in the selected bounding box within . Please point out the objects and their coordinates.", "boxes_value": [[452.435058624, 240.210021984, 588.1455078399999, 325.06140134400005]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047711_crop.jpg", "text": "Help me understand what's happening in the selected bounding box within . Please point out the objects and their coordinates.", "boxes_value": [[34.43505862400002, 22.210021984000008, 170.14550783999994, 107.06140134400005]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047711.jpg", "text": "Help me understand what's happening in the selected bounding box within . Please point out the objects and their coordinates. For your reference, objects involved in this region include two bottles, two pots, a plate, and a bakset.", "boxes_value": [[452.435058624, 240.210021984, 588.1455078399999, 325.06140134400005], [490.33129881599996, 240.210021984, 513.5985107199999, 303.875000016], [438.9049072, 270.765747072, 511.923461888, 332.76232910399995], [526.430053696, 298.03808592, 586.682006848, 325.06140134400005], [518.2532958720001, 254.228698752, 563.299438464, 297.718505856], [452.435058624, 243.891723648, 472.80017088, 280.599121104], [513.434326144, 277.805358864, 588.1455078399999, 300.243347184]], "boxes_seq": [[0], [0], [1, 5], [2, 3], [4], [6]]}, {"image_path": "objects365_v1_00047711_crop.jpg", "text": "Help me understand what's happening in the selected bounding box within . Please point out the objects and their coordinates. For your reference, objects involved in this region include two bottles, two pots, a plate, and a bakset.", "boxes_value": [[34.43505862400002, 22.210021984000008, 170.14550783999994, 107.06140134400005], [72.33129881599996, 22.210021984000008, 95.59851071999992, 85.875000016], [20.904907200000025, 52.76574707200001, 93.92346188800002, 114.76232910399995], [108.43005369599996, 80.03808592000001, 168.68200684800001, 107.06140134400005], [100.25329587200008, 36.228698752000014, 145.299438464, 79.71850585599998], [34.43505862400002, 25.89172364800001, 54.800170879999996, 62.599121104000005], [95.43432614400001, 59.80535886400003, 170.14550783999994, 82.24334718400002]], "boxes_seq": [[0], [0], [1, 5], [2, 3], [4], [6]]}, {"image_path": "objects365_v1_00047712.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Include the coordinates for each object you identify.", "boxes_value": [[327.9733886706, 297.6987915264, 555.5101318444, 511.8909912064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047712_crop.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Include the coordinates for each object you identify.", "boxes_value": [[56.9733886706, 53.69879152639999, 284.5101318444, 267.8909912064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047712.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Include the coordinates for each object you identify. For your reference, objects involved in this region include three chairs, four desks, a bench, and a toilet paper.", "boxes_value": [[327.9733886706, 297.6987915264, 555.5101318444, 511.8909912064], [383.88073731170005, 362.4592285184, 555.5101318444, 511.4349975552], [217.2304077248, 467.2575683584, 449.2525634563, 511.2775878656], [498.0395507662, 308.7344970752, 577.7343749826, 398.4328613376], [483.367675748, 310.0682983424, 526.0494384797, 364.7542724608], [392.57714846289997, 282.4216308736, 436.67431640940003, 339.2580566528], [312.2597045883, 328.2068481536, 384.0751953345, 383.8233032192], [340.6079101365, 320.6473388544, 388.6649170182, 358.9849243136], [421.0628662259, 297.6987915264, 436.4517822373, 342.785949696], [327.9733886706, 489.5112914944, 351.87890624799996, 511.8909912064]], "boxes_seq": [[0], [0], [1, 3, 5], [2, 4, 6, 8], [7], [9]]}, {"image_path": "objects365_v1_00047712_crop.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Include the coordinates for each object you identify. For your reference, objects involved in this region include three chairs, four desks, a bench, and a toilet paper.", "boxes_value": [[56.9733886706, 53.69879152639999, 284.5101318444, 267.8909912064], [112.88073731170005, 118.45922851839998, 284.5101318444, 267.4349975552], [0, 223.25756835840002, 178.25256345629998, 267.2775878656], [227.03955076620002, 64.73449707520001, 306.7343749826, 154.4328613376], [212.367675748, 66.0682983424, 255.04943847970003, 120.7542724608], [121.57714846289997, 38.42163087360001, 165.67431640940003, 95.25805665280001], [41.259704588299996, 84.20684815359999, 113.0751953345, 139.8233032192], [69.60791013649998, 76.6473388544, 117.66491701820001, 114.9849243136], [150.0628662259, 53.69879152639999, 165.4517822373, 98.78594969599999], [56.9733886706, 245.51129149439998, 80.87890624799996, 267.8909912064]], "boxes_seq": [[0], [0], [1, 3, 5], [2, 4, 6, 8], [7], [9]]}, {"image_path": "objects365_v1_00047713.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give coordinates for the items you reference.", "boxes_value": [[266.0760497664, 139.7493285888, 348.4044189696, 226.6812744192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047713_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give coordinates for the items you reference.", "boxes_value": [[21.07604976639999, 21.74932858880001, 103.40441896959999, 108.68127441920001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047713.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give coordinates for the items you reference. For your reference, objects involved in this region include five potted plants.", "boxes_value": [[266.0760497664, 139.7493285888, 348.4044189696, 226.6812744192], [281.2067870976, 174.0322875904, 305.116210944, 226.6812744192], [309.7048339968, 168.4507446272, 326.6104736256, 200.8129882624], [324.97094730239996, 161.4534301696, 338.6588134656, 183.5061035008], [266.0760497664, 141.2282714624, 292.204223616, 163.6591186432], [324.2481689088, 139.7493285888, 348.4044189696, 164.3985595904]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047713_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give coordinates for the items you reference. For your reference, objects involved in this region include five potted plants.", "boxes_value": [[21.07604976639999, 21.74932858880001, 103.40441896959999, 108.68127441920001], [36.2067870976, 56.03228759039999, 60.11621094399999, 108.68127441920001], [64.7048339968, 50.45074462720001, 81.61047362559998, 82.81298826240001], [79.97094730239996, 43.45343016960001, 93.65881346560002, 65.50610350080001], [21.07604976639999, 23.228271462399988, 47.20422361599998, 45.6591186432], [79.24816890879998, 21.74932858880001, 103.40441896959999, 46.3985595904]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047720.jpg", "text": "Can you give me a visual rundown of the area in ? Include the coordinates for each object you identify.", "boxes_value": [[156.6400756992, 364.2104492032, 400.3294677504, 466.7227172864]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047720_crop.jpg", "text": "Can you give me a visual rundown of the area in ? Include the coordinates for each object you identify.", "boxes_value": [[61.6400756992, 26.210449203200028, 305.3294677504, 128.72271728639998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047720.jpg", "text": "Can you give me a visual rundown of the area in ? Include the coordinates for each object you identify. For your reference, objects involved in this region include four people, a trash bin can, and a motorcycle.", "boxes_value": [[156.6400756992, 364.2104492032, 400.3294677504, 466.7227172864], [156.6400756992, 396.4122925056, 182.6385497856, 466.7227172864], [229.2396240384, 395.758239744, 258.99890135040005, 464.5970459136], [272.00817868800004, 359.9089355264, 286.90454100479997, 388.3801880064], [379.0246582272, 393.6640625152, 400.3294677504, 450.0466308608], [277.0751953152, 422.8610229248, 297.2573242368, 463.2037353472], [266.5732422144, 364.2104492032, 298.3610840064, 387.8475952128]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047720_crop.jpg", "text": "Can you give me a visual rundown of the area in ? Include the coordinates for each object you identify. For your reference, objects involved in this region include four people, a trash bin can, and a motorcycle.", "boxes_value": [[61.6400756992, 26.210449203200028, 305.3294677504, 128.72271728639998], [61.6400756992, 58.41229250560002, 87.63854978559999, 128.72271728639998], [134.2396240384, 57.75823974399998, 163.99890135040005, 126.59704591360003], [177.00817868800004, 21.9089355264, 191.90454100479997, 50.380188006399976], [284.0246582272, 55.66406251519999, 305.3294677504, 112.04663086080001], [182.0751953152, 84.86102292480001, 202.2573242368, 125.2037353472], [171.5732422144, 26.210449203200028, 203.3610840064, 49.8475952128]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047722.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Include the coordinates for each object you identify.", "boxes_value": [[575.8116455214, 166.5101318144, 763.8531493972, 296.1549682688]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047722_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Include the coordinates for each object you identify.", "boxes_value": [[47.81164552140001, 32.51013181440001, 235.8531493972, 162.15496826880002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047722.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Include the coordinates for each object you identify. For your reference, objects involved in this region include a soccer, three people, and a gloves.", "boxes_value": [[575.8116455214, 166.5101318144, 763.8531493972, 296.1549682688], [658.2937011806, 277.0977783296, 675.3492431896, 294.9113159168], [591.8696289284001, 168.1874389504, 623.7554931397999, 231.1485595648], [614.297851595, 175.4833374208, 648.8858642394, 240.6062622208], [745.5187988298, 166.5101318144, 763.8531493972, 202.1159668224], [575.8116455214, 258.95465088, 601.1755371332, 296.1549682688]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047722_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Include the coordinates for each object you identify. For your reference, objects involved in this region include a soccer, three people, and a gloves.", "boxes_value": [[47.81164552140001, 32.51013181440001, 235.8531493972, 162.15496826880002], [130.2937011806, 143.09777832959998, 147.34924318959997, 160.9113159168], [63.86962892840006, 34.18743895040001, 95.75549313979991, 97.1485595648], [86.297851595, 41.48333742080001, 120.88586423940001, 106.6062622208], [217.51879882979995, 32.51013181440001, 235.8531493972, 68.1159668224], [47.81164552140001, 124.95465087999997, 73.17553713320001, 162.15496826880002]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047724.jpg", "text": "Within the input image , what can be found in the region defined by ? Provide the coordinates for all objects that you mention.", "boxes_value": [[314.5202636624, 244.9672851456, 669.7252197578, 414.576110848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047724_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Provide the coordinates for all objects that you mention.", "boxes_value": [[89.5202636624, 42.96728514559999, 444.7252197578, 212.57611084799998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047724.jpg", "text": "Within the input image , what can be found in the region defined by ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include five people, and a fire truck.", "boxes_value": [[314.5202636624, 244.9672851456, 669.7252197578, 414.576110848], [619.1145019266, 258.4345092608, 669.7252197578, 412.6452026368], [547.104492197, 249.2171631104, 607.7618408241999, 411.0989380096], [487.6062011498, 244.9672851456, 543.2409668282, 411.8716430848], [394.10888673460005, 261.194091776, 452.061767573, 414.576110848], [314.5202636624, 261.9667968512, 369.3823242364, 413.4170532352], [272.6860351826, 171.6887206912, 514.4560546744, 397.2052612096]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047724_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include five people, and a fire truck.", "boxes_value": [[89.5202636624, 42.96728514559999, 444.7252197578, 212.57611084799998], [394.11450192660004, 56.434509260799985, 444.7252197578, 210.64520263679998], [322.104492197, 47.21716311040001, 382.7618408241999, 209.09893800959998], [262.6062011498, 42.96728514559999, 318.24096682820004, 209.87164308479998], [169.10888673460005, 59.19409177599999, 227.061767573, 212.57611084799998], [89.5202636624, 59.9667968512, 144.3823242364, 211.4170532352], [47.686035182599994, 0, 289.4560546744, 195.2052612096]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047725.jpg", "text": "I need details about the area located within image . Provide the coordinates for all objects that you mention.", "boxes_value": [[107.077636736, 267.8829956096, 445.403686528, 356.6099853312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047725_crop.jpg", "text": "I need details about the area located within image . Provide the coordinates for all objects that you mention.", "boxes_value": [[85.077636736, 22.882995609600016, 423.403686528, 111.6099853312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047725.jpg", "text": "I need details about the area located within image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include five dogs.", "boxes_value": [[107.077636736, 267.8829956096, 445.403686528, 356.6099853312], [107.077636736, 275.1031494144, 135.77301024000002, 341.1950683648], [158.359069824, 267.8829956096, 206.122985856, 352.3029785088], [285.699462912, 273.1421508608, 379.36187744, 356.6099853312], [333.00531008, 308.2899780096, 377.94451904, 355.5697631744], [376.66351315199995, 272.5878296064, 445.403686528, 353.6797485568]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047725_crop.jpg", "text": "I need details about the area located within image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include five dogs.", "boxes_value": [[85.077636736, 22.882995609600016, 423.403686528, 111.6099853312], [85.077636736, 30.10314941439998, 113.77301024000002, 96.19506836480002], [136.359069824, 22.882995609600016, 184.122985856, 107.30297850879998], [263.699462912, 28.142150860799973, 357.36187744, 111.6099853312], [311.00531008, 63.28997800960002, 355.94451904, 110.56976317440001], [354.66351315199995, 27.58782960640002, 423.403686528, 108.6797485568]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047726.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Give coordinates for the items you reference.", "boxes_value": [[421.9947509478, 305.3831787008, 626.5589599399, 454.9267578368]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047726_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Give coordinates for the items you reference.", "boxes_value": [[51.994750947800014, 38.38317870079999, 256.5589599399, 187.9267578368]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047726.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Give coordinates for the items you reference. For your reference, objects involved in this region include five people.", "boxes_value": [[421.9947509478, 305.3831787008, 626.5589599399, 454.9267578368], [485.71972653619997, 409.462097152, 520.7498779012, 454.9267578368], [478.6391601317, 408.7167968768, 512.1787109188, 448.9641723392], [421.9947509478, 393.0650024448, 457.3974609182, 444.4922485248], [462.0557861271, 312.7299194368, 482.49890138719996, 337.3255615488], [615.0598144401, 305.3831787008, 626.5589599399, 340.2003784192]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047726_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Give coordinates for the items you reference. For your reference, objects involved in this region include five people.", "boxes_value": [[51.994750947800014, 38.38317870079999, 256.5589599399, 187.9267578368], [115.71972653619997, 142.462097152, 150.74987790119997, 187.9267578368], [108.63916013170001, 141.7167968768, 142.1787109188, 181.9641723392], [51.994750947800014, 126.06500244479997, 87.39746091820001, 177.4922485248], [92.05578612710002, 45.7299194368, 112.49890138719996, 70.32556154880001], [245.0598144401, 38.38317870079999, 256.5589599399, 73.20037841919998]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047732.jpg", "text": "What does the area look like in the context of the image ? Include the coordinates for each mentioned object.", "boxes_value": [[364.51867676160003, 37.3861694464, 473.5097124864, 416.401424896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047732_crop.jpg", "text": "What does the area look like in the context of the image ? Include the coordinates for each mentioned object.", "boxes_value": [[27.518676761600034, 37.3861694464, 136.50971248640002, 416.401424896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047732.jpg", "text": "What does the area look like in the context of the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three people, a sneakers, and a hat.", "boxes_value": [[364.51867676160003, 37.3861694464, 473.5097124864, 416.401424896], [403.3094482176, 92.8880615424, 424.68225100800004, 123.122619648], [412.1713867008, 77.7707519488, 492.9707031552, 131.9844970496], [364.51867676160003, 37.3861694464, 393.6226806528, 87.22192384], [382.59864575999995, 353.5369345536, 412.3470206976, 416.401424896], [447.270232704, 77.428530432, 473.5097124864, 98.2030029312]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047732_crop.jpg", "text": "What does the area look like in the context of the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three people, a sneakers, and a hat.", "boxes_value": [[27.518676761600034, 37.3861694464, 136.50971248640002, 416.401424896], [66.30944821759999, 92.8880615424, 87.68225100800004, 123.122619648], [75.17138670079999, 77.7707519488, 155.9707031552, 131.9844970496], [27.518676761600034, 37.3861694464, 56.6226806528, 87.22192384], [45.598645759999954, 353.5369345536, 75.34702069759999, 416.401424896], [110.27023270400002, 77.428530432, 136.50971248640002, 98.2030029312]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047733.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[211.6604614478, 108.399108864, 451.288208006, 419.259704576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047733_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[60.6604614478, 78.399108864, 300.288208006, 389.259704576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047733.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a cabinet, and four people.", "boxes_value": [[211.6604614478, 108.399108864, 451.288208006, 419.259704576], [355.66467286629995, 219.7302856192, 451.288208006, 419.259704576], [219.1269531092, 232.7772216832, 282.1321410882, 379.3471679488], [303.3549194259, 281.854919424, 366.36010740489996, 388.6321411072], [211.6604614478, 166.4406128128, 239.0932616981, 217.3159179776], [262.6528930495, 108.399108864, 283.9069213692, 142.112426752]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047733_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a cabinet, and four people.", "boxes_value": [[60.6604614478, 78.399108864, 300.288208006, 389.259704576], [204.66467286629995, 189.7302856192, 300.288208006, 389.259704576], [68.1269531092, 202.7772216832, 131.1321410882, 349.3471679488], [152.3549194259, 251.854919424, 215.36010740489996, 358.6321411072], [60.6604614478, 136.4406128128, 88.09326169810001, 187.3159179776], [111.6528930495, 78.399108864, 132.9069213692, 112.112426752]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047734.jpg", "text": "Please describe the content within the area displayed in the image . Give coordinates for the items you reference.", "boxes_value": [[0, 202.68896485500002, 253.9112548864, 394.5309448246]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047734_crop.jpg", "text": "Please describe the content within the area displayed in the image . Give coordinates for the items you reference.", "boxes_value": [[0, 48.68896485500002, 253.9112548864, 240.5309448246]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047734.jpg", "text": "Please describe the content within the area displayed in the image . Give coordinates for the items you reference. For your reference, objects involved in this region include a bench, a soccer, four people, and a sneakers.", "boxes_value": [[0, 202.68896485500002, 253.9112548864, 394.5309448246], [0, 321.4010009494, 118.5465088, 393.5803833146], [220.520996096, 235.4578857468, 280.6250610176, 293.45300290480003], [6.368041984, 174.7405395656, 269.083251968, 514.1143798732], [14.5488281088, 202.68896485500002, 126.6033325056, 394.5309448246], [100.2030639616, 202.68896485500002, 155.9369506816, 354.6372070466], [211.6708374016, 216.1824340554, 253.9112548864, 388.077514666], [211.3619384832, 352.4658203342, 241.8532104704, 409.0574951092]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5, 6], [7]]}, {"image_path": "objects365_v1_00047734_crop.jpg", "text": "Please describe the content within the area displayed in the image . Give coordinates for the items you reference. For your reference, objects involved in this region include a bench, a soccer, four people, and a sneakers.", "boxes_value": [[0, 48.68896485500002, 253.9112548864, 240.5309448246], [0, 167.4010009494, 118.5465088, 239.5803833146], [220.520996096, 81.4578857468, 280.6250610176, 139.45300290480003], [6.368041984, 20.74053956559999, 269.083251968, 288], [14.5488281088, 48.68896485500002, 126.6033325056, 240.5309448246], [100.2030639616, 48.68896485500002, 155.9369506816, 200.6372070466], [211.6708374016, 62.182434055399995, 253.9112548864, 234.077514666], [211.3619384832, 198.46582033419998, 241.8532104704, 255.05749510919998]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5, 6], [7]]}, {"image_path": "objects365_v1_00047740.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Provide the coordinates for each element you describe.", "boxes_value": [[168.12932850779998, 111.183752704, 342.96907305900004, 417.504767488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047740_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Provide the coordinates for each element you describe.", "boxes_value": [[44.12932850779998, 77.183752704, 218.96907305900004, 383.504767488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047740.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, a hat, and three sneakers.", "boxes_value": [[168.12932850779998, 111.183752704, 342.96907305900004, 417.504767488], [167.8590698524, 111.76611328, 304.914184552, 416.7730713088], [250.2952250062, 111.183752704, 301.351809926, 141.8611452416], [168.12932850779998, 364.0326524928, 199.08686877429997, 411.524333568], [257.4840470362, 394.9901927424, 301.4578258314, 417.504767488], [314.1222741256, 341.8698679296, 342.96907305900004, 391.120500224]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047740_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, a hat, and three sneakers.", "boxes_value": [[44.12932850779998, 77.183752704, 218.96907305900004, 383.504767488], [43.85906985240001, 77.76611328, 180.914184552, 382.7730713088], [126.29522500620001, 77.183752704, 177.351809926, 107.86114524160001], [44.12932850779998, 330.0326524928, 75.08686877429997, 377.524333568], [133.48404703620002, 360.9901927424, 177.4578258314, 383.504767488], [190.12227412559997, 307.8698679296, 218.96907305900004, 357.120500224]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047741.jpg", "text": "What information can you give me about the coordinates in image ? Give coordinates for the items you reference.", "boxes_value": [[92.1508178832, 109.1506957824, 458.38732913, 335.117895936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047741_crop.jpg", "text": "What information can you give me about the coordinates in image ? Give coordinates for the items you reference.", "boxes_value": [[92.1508178832, 57.15069578240001, 458.38732913, 283.117895936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047741.jpg", "text": "What information can you give me about the coordinates in image ? Give coordinates for the items you reference. For your reference, objects involved in this region include a bench, a person, a handbag, a trash bin can, a bottle, and a camera.", "boxes_value": [[92.1508178832, 109.1506957824, 458.38732913, 335.117895936], [155.1419067615, 109.1506957824, 294.0694579982, 172.6874999808], [287.34039303590004, 55.4626464768, 490.53222656450004, 375.3933105664], [394.3041203831, 286.9282742784, 448.0407488622, 335.117895936], [92.1508178832, 119.5515136512, 126.4356689251, 160.9169311744], [255.00738524399998, 324.7235717632, 273.2292480407, 347.5009155072], [428.0823974282, 167.3666381824, 458.38732913, 198.7013549568]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047741_crop.jpg", "text": "What information can you give me about the coordinates in image ? Give coordinates for the items you reference. For your reference, objects involved in this region include a bench, a person, a handbag, a trash bin can, a bottle, and a camera.", "boxes_value": [[92.1508178832, 57.15069578240001, 458.38732913, 283.117895936], [155.1419067615, 57.15069578240001, 294.0694579982, 120.6874999808], [287.34039303590004, 3.4626464768000034, 490.53222656450004, 323.3933105664], [394.3041203831, 234.9282742784, 448.0407488622, 283.117895936], [92.1508178832, 67.5515136512, 126.4356689251, 108.91693117439999], [255.00738524399998, 272.7235717632, 273.2292480407, 295.5009155072], [428.0823974282, 115.36663818240001, 458.38732913, 146.7013549568]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047744.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[545.0556640228999, 5.784790016, 716.7443847273, 90.5635376128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047744_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[43.05566402289992, 5.784790016, 214.74438472730003, 90.5635376128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047744.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, two gloves, and two helmets.", "boxes_value": [[545.0556640228999, 5.784790016, 716.7443847273, 90.5635376128], [544.4107666111, 26.5643310592, 719.0659179561001, 476.6374511616], [634.6173095696, 7.3714599424, 791.998901329, 477.597106944], [547.6605224896999, 49.5950317568, 580.1037597807, 104.061828608], [545.0556640228999, 27.5715332096, 612.5471191368999, 72.5658569216], [648.068847636, 5.784790016, 716.7443847273, 58.120300288], [650.200195344, 67.5927734272, 669.6187744362, 90.5635376128]], "boxes_seq": [[0], [0], [1, 2], [3, 6], [4, 5]]}, {"image_path": "objects365_v1_00047744_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, two gloves, and two helmets.", "boxes_value": [[43.05566402289992, 5.784790016, 214.74438472730003, 90.5635376128], [42.41076661110003, 26.5643310592, 217.06591795610007, 111], [132.61730956960002, 7.3714599424, 257, 111], [45.66052248969993, 49.5950317568, 78.10375978069999, 104.061828608], [43.05566402289992, 27.5715332096, 110.54711913689994, 72.5658569216], [146.068847636, 5.784790016, 214.74438472730003, 58.120300288], [148.200195344, 67.5927734272, 167.61877443620006, 90.5635376128]], "boxes_seq": [[0], [0], [1, 2], [3, 6], [4, 5]]}, {"image_path": "objects365_v1_00047745.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Give coordinates for the items you reference.", "boxes_value": [[594.9565429733, 299.5099487232, 682.502929675, 392.6290893312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047745_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Give coordinates for the items you reference.", "boxes_value": [[21.956542973299975, 23.50994872320001, 109.50292967500002, 116.62908933120002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047745.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Give coordinates for the items you reference. For your reference, objects involved in this region include a knife, two chairs, a desk, and a napkin.", "boxes_value": [[594.9565429733, 299.5099487232, 682.502929675, 392.6290893312], [594.9565429733, 361.9562377728, 677.5518798866, 377.0307006976], [598.5867919942, 249.307678208, 675.1118164207, 357.9770507776], [311.6940917983, 318.0540161024, 682.2485351238, 511.7913818112], [629.5063476780999, 362.832153344, 681.3940429759999, 392.6290893312], [661.0013427462, 299.5099487232, 682.502929675, 357.2956542976]], "boxes_seq": [[0], [0], [1], [2, 5], [3], [4]]}, {"image_path": "objects365_v1_00047745_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Give coordinates for the items you reference. For your reference, objects involved in this region include a knife, two chairs, a desk, and a napkin.", "boxes_value": [[21.956542973299975, 23.50994872320001, 109.50292967500002, 116.62908933120002], [21.956542973299975, 85.95623777280002, 104.55187988659998, 101.03070069760003], [25.586791994199984, 0, 102.11181642070005, 81.97705077760003], [0, 42.0540161024, 109.24853512380002, 139], [56.50634767809993, 86.832153344, 108.39404297599992, 116.62908933120002], [88.00134274619995, 23.50994872320001, 109.50292967500002, 81.29565429759998]], "boxes_seq": [[0], [0], [1], [2, 5], [3], [4]]}, {"image_path": "objects365_v1_00047748.jpg", "text": "Offer a thorough description of the area within the illustration . Give coordinates for the items you reference.", "boxes_value": [[562.1586913968, 264.7014770688, 896.653564416, 346.2745971712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047748_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Give coordinates for the items you reference.", "boxes_value": [[84.15869139680001, 20.70147706879999, 418.653564416, 102.27459717120001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047748.jpg", "text": "Offer a thorough description of the area within the illustration . Give coordinates for the items you reference. For your reference, objects involved in this region include a potted plant, and four people.", "boxes_value": [[562.1586913968, 264.7014770688, 896.653564416, 346.2745971712], [853.9028319888, 264.7014770688, 896.653564416, 317.002929664], [745.4669189088, 271.1984252928, 797.3674316016001, 346.2745971712], [781.0465087872, 264.017211904, 810.7504883232, 339.0933837824], [562.1586913968, 266.8030395392, 576.3886719024, 326.3442382848], [803.1563720304, 287.6421508608, 816.8658447696, 316.460205056]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047748_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Give coordinates for the items you reference. For your reference, objects involved in this region include a potted plant, and four people.", "boxes_value": [[84.15869139680001, 20.70147706879999, 418.653564416, 102.27459717120001], [375.9028319888, 20.70147706879999, 418.653564416, 73.00292966400002], [267.46691890880004, 27.198425292799982, 319.36743160160006, 102.27459717120001], [303.04650878719997, 20.01721190400002, 332.7504883232, 95.0933837824], [84.15869139680001, 22.803039539199972, 98.3886719024, 82.34423828479999], [325.1563720304, 43.64215086079997, 338.8658447696, 72.460205056]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047749.jpg", "text": "Kindly give an overview of the section in photo . Include the coordinates for each object you identify.", "boxes_value": [[497.8583984, 259.0088500868, 624.4112548639999, 313.4976806522]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047749_crop.jpg", "text": "Kindly give an overview of the section in photo . Include the coordinates for each object you identify.", "boxes_value": [[31.8583984, 14.00885008680001, 158.41125486399994, 68.49768065220002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047749.jpg", "text": "Kindly give an overview of the section in photo . Include the coordinates for each object you identify. For your reference, objects involved in this region include a pot, a bowl, two bottles, and a bakset.", "boxes_value": [[497.8583984, 259.0088500868, 624.4112548639999, 313.4976806522], [529.607177772, 291.4035034108, 595.487915028, 310.2839355528], [581.8297119040001, 279.7538452214, 624.4112548639999, 313.4976806522], [497.8583984, 259.0088500868, 510.84436031999996, 292.6542968536], [509.79919433599997, 259.8256836098, 533.7052002199999, 295.83221437000003], [539.810302716, 278.163146987, 583.14208986, 299.829101539]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047749_crop.jpg", "text": "Kindly give an overview of the section in photo . Include the coordinates for each object you identify. For your reference, objects involved in this region include a pot, a bowl, two bottles, and a bakset.", "boxes_value": [[31.8583984, 14.00885008680001, 158.41125486399994, 68.49768065220002], [63.607177772, 46.4035034108, 129.48791502799997, 65.28393555280002], [115.82971190400008, 34.75384522140001, 158.41125486399994, 68.49768065220002], [31.8583984, 14.00885008680001, 44.844360319999964, 47.65429685359999], [43.79919433599997, 14.825683609800024, 67.70520021999994, 50.83221437000003], [73.81030271600002, 33.163146987000005, 117.14208986000006, 54.82910153900002]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047752.jpg", "text": "Analyze and describe the region in the included photo . Remember to mention the objects and their corresponding locations.", "boxes_value": [[7.0280151127999995, 0, 311.4527587952, 231.6399536128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047752_crop.jpg", "text": "Analyze and describe the region in the included photo . Remember to mention the objects and their corresponding locations.", "boxes_value": [[7.0280151127999995, 0, 311.4527587952, 231.6399536128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047752.jpg", "text": "Analyze and describe the region in the included photo . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a cabinet, two lamps, a clock, and a storage box.", "boxes_value": [[7.0280151127999995, 0, 311.4527587952, 231.6399536128], [121.62030027620001, 105.6934203904, 283.1604003558, 231.6399536128], [159.03924560040002, 0, 243.9162597872, 52.7593383936], [277.68444824119996, 60.9732665856, 311.4527587952, 111.1693115392], [62.29766843080001, 74.6630859264, 94.24066164039999, 117.5579223552], [7.0280151127999995, 141.814514176, 61.589172354599995, 209.4052734464]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047752_crop.jpg", "text": "Analyze and describe the region in the included photo . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a cabinet, two lamps, a clock, and a storage box.", "boxes_value": [[7.0280151127999995, 0, 311.4527587952, 231.6399536128], [121.62030027620001, 105.6934203904, 283.1604003558, 231.6399536128], [159.03924560040002, 0, 243.9162597872, 52.7593383936], [277.68444824119996, 60.9732665856, 311.4527587952, 111.1693115392], [62.29766843080001, 74.6630859264, 94.24066164039999, 117.5579223552], [7.0280151127999995, 141.814514176, 61.589172354599995, 209.4052734464]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047753.jpg", "text": "Kindly share your observations about the rectangular region within . Give coordinates for the items you reference.", "boxes_value": [[124.176147456, 141.7405395456, 366.911926272, 687.4093017600001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047753_crop.jpg", "text": "Kindly share your observations about the rectangular region within . Give coordinates for the items you reference.", "boxes_value": [[61.176147455999995, 136.7405395456, 303.911926272, 682.4093017600001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047753.jpg", "text": "Kindly share your observations about the rectangular region within . Give coordinates for the items you reference. For your reference, objects involved in this region include an american football, three people, two helmets, three sneakers, and two gloves.", "boxes_value": [[124.176147456, 141.7405395456, 366.911926272, 687.4093017600001], [167.9529418752, 288.0008544768, 256.4536742912, 329.38171384320003], [140.0932617216, 74.60101317120001, 258.4766845952, 598.8704834304001], [23.1440429568, 117.3782348544, 455.4508056576, 664.5284424192], [124.176147456, 141.7405395456, 366.911926272, 687.4093017600001], [204.8393554432, 119.435485824, 297.3125610496, 208.0755614976], [241.2536621056, 143.39227292159998, 344.2678833152, 265.09277345280003], [152.613586432, 492.2030029056, 200.0480346624, 589.9467773184], [308.3327026176, 641.6933594112, 365.3498534912, 683.85729984], [340.9138793984, 599.0502929664001, 403.2015380992, 666.1292724479999], [204.6490478592, 295.150390656, 265.3281860096, 355.0788574464], [283.9323120128, 281.3652344064, 327.074951168, 325.1588134656]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6], [7, 8, 9], [10, 11]]}, {"image_path": "objects365_v1_00047753_crop.jpg", "text": "Kindly share your observations about the rectangular region within . Give coordinates for the items you reference. For your reference, objects involved in this region include an american football, three people, two helmets, three sneakers, and two gloves.", "boxes_value": [[61.176147455999995, 136.7405395456, 303.911926272, 682.4093017600001], [104.9529418752, 283.0008544768, 193.4536742912, 324.38171384320003], [77.09326172159999, 69.60101317120001, 195.47668459520003, 593.8704834304001], [0, 112.3782348544, 364, 659.5284424192], [61.176147455999995, 136.7405395456, 303.911926272, 682.4093017600001], [141.8393554432, 114.435485824, 234.3125610496, 203.0755614976], [178.2536621056, 138.39227292159998, 281.2678833152, 260.09277345280003], [89.613586432, 487.2030029056, 137.0480346624, 584.9467773184], [245.33270261759998, 636.6933594112, 302.3498534912, 678.85729984], [277.9138793984, 594.0502929664001, 340.2015380992, 661.1292724479999], [141.6490478592, 290.150390656, 202.32818600960002, 350.0788574464], [220.93231201280003, 276.3652344064, 264.074951168, 320.1588134656]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6], [7, 8, 9], [10, 11]]}, {"image_path": "objects365_v1_00047754.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Include the coordinates for each object you identify.", "boxes_value": [[195.3130582865, 290.294982912, 664.9776611016, 392.4291992064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047754_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Include the coordinates for each object you identify.", "boxes_value": [[118.3130582865, 26.294982912000023, 587.9776611016, 128.42919920640003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047754.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Include the coordinates for each object you identify. For your reference, objects involved in this region include four people, two helmets, a ladder, and a pickup truck.", "boxes_value": [[195.3130582865, 290.294982912, 664.9776611016, 392.4291992064], [614.7709960777, 303.3498534912, 644.0673828204, 344.6124877824], [215.3029784906, 290.1253662208, 266.11157228039997, 448.6261596672], [261.6934814571, 297.8571167232, 314.7111816002, 449.7306518528], [289.85913082279995, 293.9912109568, 345.0859374886, 453.5965576192], [195.3130582865, 297.6399504896, 216.62043456130002, 311.9314833408], [412.5443576331, 295.3013360128, 434.37142600389996, 311.9314833408], [636.1495361067, 290.294982912, 664.9776611016, 392.4291992064], [135.0461425467, 175.7013549568, 653.6680908065, 457.6976928768]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6], [7], [8]]}, {"image_path": "objects365_v1_00047754_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Include the coordinates for each object you identify. For your reference, objects involved in this region include four people, two helmets, a ladder, and a pickup truck.", "boxes_value": [[118.3130582865, 26.294982912000023, 587.9776611016, 128.42919920640003], [537.7709960777, 39.34985349120001, 567.0673828204, 80.61248778240002], [138.3029784906, 26.125366220800004, 189.11157228039997, 153], [184.6934814571, 33.85711672320002, 237.7111816002, 153], [212.85913082279995, 29.99121095679999, 268.0859374886, 153], [118.3130582865, 33.63995048959998, 139.62043456130002, 47.931483340800014], [335.5443576331, 31.301336012799993, 357.37142600389996, 47.931483340800014], [559.1495361067, 26.294982912000023, 587.9776611016, 128.42919920640003], [58.0461425467, 0, 576.6680908065, 153]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6], [7], [8]]}, {"image_path": "objects365_v1_00047755.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Remember to mention the objects and their corresponding locations.", "boxes_value": [[121.3314819584, 106.38806153280001, 151.1861572096, 268.5616455]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047755_crop.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Remember to mention the objects and their corresponding locations.", "boxes_value": [[8.331481958400005, 41.38806153280001, 38.1861572096, 203.5616455]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047755.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a cabinet, and four wine glasses.", "boxes_value": [[121.3314819584, 106.38806153280001, 151.1861572096, 268.5616455], [88.3386840576, 62.8644409272, 206.5901489152, 288.9211425712], [132.7573242368, 106.38806153280001, 148.2375488512, 134.3998413376], [133.8630981632, 149.5114745848, 150.0804443136, 178.260437012], [134.2316284416, 194.10919189359998, 151.1861572096, 222.12097169839998], [121.3314819584, 237.2326050088, 137.5488281088, 268.5616455]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047755_crop.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a cabinet, and four wine glasses.", "boxes_value": [[8.331481958400005, 41.38806153280001, 38.1861572096, 203.5616455], [0, 0, 45, 223.9211425712], [19.757324236800002, 41.38806153280001, 35.23754885119999, 69.39984133760001], [20.863098163199993, 84.5114745848, 37.0804443136, 113.26043701200001], [21.23162844160001, 129.10919189359998, 38.1861572096, 157.12097169839998], [8.331481958400005, 172.2326050088, 24.548828108799995, 203.5616455]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047756.jpg", "text": "What can you tell me about the area within the image ? Please point out the objects and their coordinates.", "boxes_value": [[109.7562866424, 0.4400024576, 714.5504150415001, 267.9901123072]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047756_crop.jpg", "text": "What can you tell me about the area within the image ? Please point out the objects and their coordinates.", "boxes_value": [[109.7562866424, 0.4400024576, 714.5504150415001, 267.9901123072]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047756.jpg", "text": "What can you tell me about the area within the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two lamps, a glasses, a tie, and a microphone.", "boxes_value": [[109.7562866424, 0.4400024576, 714.5504150415001, 267.9901123072], [553.1016846039, 0.4400024576, 579.7280273544001, 55.805908224], [679.048583973, 0.8626098688, 714.5504150415001, 24.5305175552], [109.7562866424, 99.003784192, 152.9504394519, 112.90100096], [582.1873779576, 237.5564575232, 596.0745849312, 267.9901123072], [168.2849731767, 161.0613403136, 185.6112060669, 228.6337890816]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047756_crop.jpg", "text": "What can you tell me about the area within the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two lamps, a glasses, a tie, and a microphone.", "boxes_value": [[109.7562866424, 0.4400024576, 714.5504150415001, 267.9901123072], [553.1016846039, 0.4400024576, 579.7280273544001, 55.805908224], [679.048583973, 0.8626098688, 714.5504150415001, 24.5305175552], [109.7562866424, 99.003784192, 152.9504394519, 112.90100096], [582.1873779576, 237.5564575232, 596.0745849312, 267.9901123072], [168.2849731767, 161.0613403136, 185.6112060669, 228.6337890816]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047758.jpg", "text": "Please enlighten me about the region in the given photo . Give coordinates for the items you reference.", "boxes_value": [[0.044128435199999996, 338.2351074304, 548.975341824, 458.9753417728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047758_crop.jpg", "text": "Please enlighten me about the region in the given photo . Give coordinates for the items you reference.", "boxes_value": [[0.044128435199999996, 30.235107430399978, 548.975341824, 150.9753417728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047758.jpg", "text": "Please enlighten me about the region in the given photo . Give coordinates for the items you reference. For your reference, objects involved in this region include a carpet, a trash bin can, a bicycle, a car, and a suv.", "boxes_value": [[0.044128435199999996, 338.2351074304, 548.975341824, 458.9753417728], [473.8231201536, 433.37475584, 548.975341824, 450.5147704832], [84.9137573376, 345.7493285888, 141.7543335168, 425.1101074432], [114.10101319679998, 371.053955072, 157.317871104, 431.6013793792], [0.044128435199999996, 349.8679199232, 62.5619506944, 458.9753417728], [219.3246459648, 338.2351074304, 287.6550292992, 365.4042358272]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047758_crop.jpg", "text": "Please enlighten me about the region in the given photo . Give coordinates for the items you reference. For your reference, objects involved in this region include a carpet, a trash bin can, a bicycle, a car, and a suv.", "boxes_value": [[0.044128435199999996, 30.235107430399978, 548.975341824, 150.9753417728], [473.8231201536, 125.37475583999998, 548.975341824, 142.51477048319998], [84.9137573376, 37.74932858879998, 141.7543335168, 117.11010744319998], [114.10101319679998, 63.05395507200001, 157.317871104, 123.60137937920001], [0.044128435199999996, 41.86791992320002, 62.5619506944, 150.9753417728], [219.3246459648, 30.235107430399978, 287.6550292992, 57.40423582720001]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047762.jpg", "text": "Can you elaborate on the content of the bounding box in ? Provide the coordinates for each element you describe.", "boxes_value": [[452.343505864, 226.2137451008, 583.2880859482, 251.9014892544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047762_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Provide the coordinates for each element you describe.", "boxes_value": [[33.34350586400001, 7.213745100799997, 164.28808594819998, 32.90148925439999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047762.jpg", "text": "Can you elaborate on the content of the bounding box in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, two glasses, and a hat.", "boxes_value": [[452.343505864, 226.2137451008, 583.2880859482, 251.9014892544], [397.497924785, 204.0147704832, 524.7401123338, 511.1713867264], [524.9066162372, 215.385376, 632.2424316312, 510.5588379136], [452.343505864, 226.2137451008, 484.2965087524, 238.744384768], [555.7208252044001, 239.3709106688, 583.2880859482, 251.9014892544], [550.7086181653999, 216.8157959168, 589.5534668345999, 243.13006592]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047762_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, two glasses, and a hat.", "boxes_value": [[33.34350586400001, 7.213745100799997, 164.28808594819998, 32.90148925439999], [0, 0, 105.74011233379997, 39], [105.90661623719996, 0, 197, 39], [33.34350586400001, 7.213745100799997, 65.29650875239997, 19.744384768000003], [136.72082520440006, 20.370910668800008, 164.28808594819998, 32.90148925439999], [131.7086181653999, 0, 170.55346683459993, 24.130065919999993]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047768.jpg", "text": "In , what elements can be found within the coordinates ? Please point out the objects and their coordinates.", "boxes_value": [[0.4948730457, 179.936279296, 418.3999023633, 511.7719726592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047768_crop.jpg", "text": "In , what elements can be found within the coordinates ? Please point out the objects and their coordinates.", "boxes_value": [[0.4948730457, 83.93627929600001, 418.3999023633, 415.7719726592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047768.jpg", "text": "In , what elements can be found within the coordinates ? Please point out the objects and their coordinates. For your reference, objects involved in this region include three chairs, two people, and a handbag.", "boxes_value": [[0.4948730457, 179.936279296, 418.3999023633, 511.7719726592], [206.7228393711, 235.5415038976, 252.0159912011, 281.455078144], [360.934082, 260.4805297664, 455.03906253289995, 343.4009399296], [357.0773925934, 257.0094604288, 418.3999023633, 319.8746948096], [0.4948730457, 201.8987426816, 13.411132831200002, 256.9053955072], [31.3319091477, 179.936279296, 149.6099853484, 511.7719726592], [40.1282734558, 481.1940827136, 105.2851095731, 511.3719857664]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00047768_crop.jpg", "text": "In , what elements can be found within the coordinates ? Please point out the objects and their coordinates. For your reference, objects involved in this region include three chairs, two people, and a handbag.", "boxes_value": [[0.4948730457, 83.93627929600001, 418.3999023633, 415.7719726592], [206.7228393711, 139.5415038976, 252.0159912011, 185.45507814400003], [360.934082, 164.48052976640002, 455.03906253289995, 247.40093992959999], [357.0773925934, 161.00946042880003, 418.3999023633, 223.8746948096], [0.4948730457, 105.8987426816, 13.411132831200002, 160.9053955072], [31.3319091477, 83.93627929600001, 149.6099853484, 415.7719726592], [40.1282734558, 385.1940827136, 105.2851095731, 415.3719857664]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00047771.jpg", "text": "I would like a description of the content within the bbox in . Provide the coordinates for each element you describe.", "boxes_value": [[489.459716787, 294.2941894656, 653.4278564711, 358.3211670016]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047771_crop.jpg", "text": "I would like a description of the content within the bbox in . Provide the coordinates for each element you describe.", "boxes_value": [[41.45971678699999, 16.294189465600027, 205.42785647109997, 80.32116700159997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047771.jpg", "text": "I would like a description of the content within the bbox in . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, two sneakers, and a gloves.", "boxes_value": [[489.459716787, 294.2941894656, 653.4278564711, 358.3211670016], [488.6376952788, 190.100036608, 631.4201659959, 392.295288064], [580.6660156094, 110.2896728576, 682.1058349776, 358.3450927616], [489.459716787, 307.2932128768, 511.73950196330003, 358.3211670016], [633.0611572317, 339.5259399168, 653.4278564711, 356.724548352], [535.707031228, 294.2941894656, 564.1374511937, 322.724548352]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047771_crop.jpg", "text": "I would like a description of the content within the bbox in . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, two sneakers, and a gloves.", "boxes_value": [[41.45971678699999, 16.294189465600027, 205.42785647109997, 80.32116700159997], [40.63769527879998, 0, 183.4201659959, 96], [132.6660156094, 0, 234.1058349776, 80.34509276159997], [41.45971678699999, 29.2932128768, 63.73950196330003, 80.32116700159997], [185.0611572317, 61.525939916799985, 205.42785647109997, 78.724548352], [87.707031228, 16.294189465600027, 116.13745119370003, 44.724548352]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047775.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Provide the coordinates for all objects that you mention.", "boxes_value": [[473.04956051880004, 190.7843017728, 555.3276417741, 421.5367309312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047775_crop.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Provide the coordinates for all objects that you mention.", "boxes_value": [[21.049560518800035, 57.784301772800006, 103.32764177410002, 288.5367309312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047775.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three people, a gloves, and a sneakers.", "boxes_value": [[473.04956051880004, 190.7843017728, 555.3276417741, 421.5367309312], [509.7630614887, 190.7843017728, 527.5202636867, 219.1958618112], [417.78027341919994, 159.2591552512, 556.3059081942, 511.2772216832], [314.006103519, 153.8690796032, 506.6557617446, 512.7779540992], [473.04956051880004, 342.608056064, 506.4875379762, 406.2718505984], [530.1230860424, 373.5569741824, 555.3276417741, 421.5367309312]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047775_crop.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three people, a gloves, and a sneakers.", "boxes_value": [[21.049560518800035, 57.784301772800006, 103.32764177410002, 288.5367309312], [57.763061488699975, 57.784301772800006, 75.52026368669999, 86.19586181119999], [0, 26.259155251200013, 104.30590819420001, 346], [0, 20.869079603199992, 54.6557617446, 346], [21.049560518800035, 209.60805606399998, 54.487537976199974, 273.2718505984], [78.12308604240002, 240.55697418239998, 103.32764177410002, 288.5367309312]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047777.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object.", "boxes_value": [[399.08471679999997, 180.66467284799998, 640.014892608, 256.274230944]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047777_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object.", "boxes_value": [[61.08471679999997, 19.66467284799998, 302, 95.27423094400001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047777.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three cars, a suv, and a van.", "boxes_value": [[399.08471679999997, 180.66467284799998, 640.014892608, 256.274230944], [399.08471679999997, 180.66467284799998, 471.041870144, 217.27447512], [596.785034176, 182.674011216, 640.014892608, 203.46813964799998], [501.02270508799995, 182.263610832, 606.361328128, 226.040649408], [582.831054656, 195.533508288, 640.014892608, 256.274230944], [392.87133792, 208.125, 639.820800768, 335.67041016]], "boxes_seq": [[0], [0], [1, 2, 5], [3], [4]]}, {"image_path": "objects365_v1_00047777_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three cars, a suv, and a van.", "boxes_value": [[61.08471679999997, 19.66467284799998, 302, 95.27423094400001], [61.08471679999997, 19.66467284799998, 133.04187014399997, 56.274475120000005], [258.78503417599995, 21.674011215999997, 302, 42.468139647999976], [163.02270508799995, 21.263610832000012, 268.36132812799997, 65.04064940800001], [244.831054656, 34.53350828800001, 302, 95.27423094400001], [54.871337919999974, 47.125, 301.820800768, 114]], "boxes_seq": [[0], [0], [1, 2, 5], [3], [4]]}, {"image_path": "objects365_v1_00047781.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[98.04486082559998, 295.6629638656, 228.2186889984, 358.8233032192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047781_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[33.04486082559998, 16.662963865599977, 163.2186889984, 79.8233032192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047781.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a cabinet, a chair, a person, and two bowls.", "boxes_value": [[98.04486082559998, 295.6629638656, 228.2186889984, 358.8233032192], [27.4331665152, 0, 766.684448256, 412.3458251776], [86.7557983488, 287.3119506944, 422.613159168, 510.912658688], [124.1516113152, 236.6715087872, 385.8122558976, 510.46209715199996], [166.3327636992, 299.424316416, 228.2186889984, 358.3742675968], [98.04486082559998, 295.6629638656, 160.7658081024, 358.8233032192]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047781_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a cabinet, a chair, a person, and two bowls.", "boxes_value": [[33.04486082559998, 16.662963865599977, 163.2186889984, 79.8233032192], [0, 0, 195, 95], [21.7557983488, 8.311950694400025, 195, 95], [59.1516113152, 0, 195, 95], [101.3327636992, 20.42431641600001, 163.2186889984, 79.3742675968], [33.04486082559998, 16.662963865599977, 95.7658081024, 79.8233032192]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047783.jpg", "text": "In the displayed image , help me understand the region defined by . Include the coordinates for each object you identify.", "boxes_value": [[597.816406272, 174.3286132736, 767.7877197312, 447.312927232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047783_crop.jpg", "text": "In the displayed image , help me understand the region defined by . Include the coordinates for each object you identify.", "boxes_value": [[42.816406271999995, 68.32861327360001, 212.7877197312, 341.312927232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047783.jpg", "text": "In the displayed image , help me understand the region defined by . Include the coordinates for each object you identify. For your reference, objects involved in this region include three people, a gloves, and a sneakers.", "boxes_value": [[597.816406272, 174.3286132736, 767.7877197312, 447.312927232], [597.816406272, 174.3286132736, 767.7877197312, 447.312927232], [605.2280273664, 147.4979858432, 741.9538574592, 340.5227051008], [582.9183349248001, 164.8228759552, 611.1152343552, 228.045654272], [607.4243164416, 226.9573974528, 627.5031738624, 249.2187500032], [731.8259277312, 415.524047872, 745.35729984, 437.7854003712]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047783_crop.jpg", "text": "In the displayed image , help me understand the region defined by . Include the coordinates for each object you identify. For your reference, objects involved in this region include three people, a gloves, and a sneakers.", "boxes_value": [[42.816406271999995, 68.32861327360001, 212.7877197312, 341.312927232], [42.816406271999995, 68.32861327360001, 212.7877197312, 341.312927232], [50.228027366400056, 41.49798584320001, 186.95385745919998, 234.5227051008], [27.918334924800092, 58.822875955200004, 56.11523435519996, 122.04565427200001], [52.42431644160001, 120.9573974528, 72.50317386239999, 143.2187500032], [176.82592773119995, 309.524047872, 190.35729984, 331.7854003712]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047784.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Provide the coordinates for all objects that you mention.", "boxes_value": [[0.1755067904, 45.194588304899995, 89.324279808, 237.2512817301]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047784_crop.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Provide the coordinates for all objects that you mention.", "boxes_value": [[0.1755067904, 45.194588304899995, 89.324279808, 237.2512817301]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047784.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three people, a hat, and a glasses.", "boxes_value": [[0.1755067904, 45.194588304899995, 89.324279808, 237.2512817301], [0, 15.0079345817, 490.301696768, 682.4080810552], [0, 41.0881347456, 80.4093627904, 477.881713883], [58.6609497088, 179.9971313311, 89.324279808, 237.2512817301], [0.3094995968, 45.194588304899995, 67.4542033408, 126.6075416405], [0.1755067904, 122.9682709409, 48.2730394624, 139.6174168893]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047784_crop.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three people, a hat, and a glasses.", "boxes_value": [[0.1755067904, 45.194588304899995, 89.324279808, 237.2512817301], [0, 15.0079345817, 111, 285], [0, 41.0881347456, 80.4093627904, 285], [58.6609497088, 179.9971313311, 89.324279808, 237.2512817301], [0.3094995968, 45.194588304899995, 67.4542033408, 126.6075416405], [0.1755067904, 122.9682709409, 48.2730394624, 139.6174168893]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047785.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[41.295776384, 223.059448224, 551.147949248, 474.471435552]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047785_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[41.295776384, 63.05944822399999, 551.147949248, 314.471435552]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047785.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a desk, a chair, a cabinet, a book, and a bottle.", "boxes_value": [[41.295776384, 223.059448224, 551.147949248, 474.471435552], [348.829040512, 227.284362816, 634.139770496, 408.048583968], [425.04675296000005, 258.311035152, 621.3243408000001, 478.870361328], [41.295776384, 233.04852292799998, 353.133728, 474.471435552], [430.10998534400005, 266.271606432, 498.242797824, 287.7871704], [537.6993408, 223.059448224, 551.147949248, 248.895019536]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047785_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a desk, a chair, a cabinet, a book, and a bottle.", "boxes_value": [[41.295776384, 63.05944822399999, 551.147949248, 314.471435552], [348.829040512, 67.284362816, 634.139770496, 248.048583968], [425.04675296000005, 98.31103515199999, 621.3243408000001, 318.870361328], [41.295776384, 73.04852292799998, 353.133728, 314.471435552], [430.10998534400005, 106.271606432, 498.242797824, 127.78717039999998], [537.6993408, 63.05944822399999, 551.147949248, 88.895019536]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047787.jpg", "text": "I need details about the area located within image . Provide the coordinates for each element you describe.", "boxes_value": [[1.6495971749999998, 383.3183593984, 312.3037109484, 485.3101196288]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047787_crop.jpg", "text": "I need details about the area located within image . Provide the coordinates for each element you describe.", "boxes_value": [[1.6495971749999998, 26.31835939839999, 312.3037109484, 128.3101196288]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047787.jpg", "text": "I need details about the area located within image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, two boots, and two hockey sticks.", "boxes_value": [[1.6495971749999998, 383.3183593984, 312.3037109484, 485.3101196288], [189.5411987478, 244.665222144, 311.671264635, 488.4205932544], [189.31304932199998, 456.6285400576, 236.71063229220002, 485.3101196288], [290.4278564538, 427.4607543808, 312.3037109484, 463.1912841728], [1.6495971749999998, 384.8580932608, 84.2788085904, 438.233459456], [109.9400024232, 383.3183593984, 205.9130859138, 450.5508422656]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047787_crop.jpg", "text": "I need details about the area located within image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, two boots, and two hockey sticks.", "boxes_value": [[1.6495971749999998, 26.31835939839999, 312.3037109484, 128.3101196288], [189.5411987478, 0, 311.671264635, 131.4205932544], [189.31304932199998, 99.62854005759999, 236.71063229220002, 128.3101196288], [290.4278564538, 70.46075438079998, 312.3037109484, 106.19128417280001], [1.6495971749999998, 27.85809326079999, 84.2788085904, 81.23345945599999], [109.9400024232, 26.31835939839999, 205.9130859138, 93.55084226560001]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047788.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Remember to mention the objects and their corresponding locations.", "boxes_value": [[260.6484375211, 223.4380493, 551.7958984518, 367.92736815]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047788_crop.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Remember to mention the objects and their corresponding locations.", "boxes_value": [[73.64843752109999, 36.43804929999999, 364, 180.92736815]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047788.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two chairs, two pillows, two desks, and a cup.", "boxes_value": [[260.6484375211, 223.4380493, 551.7958984518, 367.92736815], [344.24182131640003, 223.4380493, 511.7012939595, 367.92736815], [406.4445190499, 238.92889405, 465.23834227329996, 289.0125122], [500.3513183527, 257.01464845, 551.7958984518, 361.80914305], [307.9459838932, 256.03143309999996, 350.7006835945, 338.67578125], [209.2737426805, 230.0214844, 327.3610839745, 346.52111814999995], [241.651489246, 241.8231201, 285.30328367339996, 281.2026367], [260.6484375211, 292.27319335000004, 281.05657961149996, 311.01538085000004]], "boxes_seq": [[0], [0], [1, 5], [2, 6], [3, 4], [7]]}, {"image_path": "objects365_v1_00047788_crop.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two chairs, two pillows, two desks, and a cup.", "boxes_value": [[73.64843752109999, 36.43804929999999, 364, 180.92736815], [157.24182131640003, 36.43804929999999, 324.7012939595, 180.92736815], [219.4445190499, 51.92889405, 278.23834227329996, 102.0125122], [313.3513183527, 70.01464844999998, 364, 174.80914305], [120.94598389319998, 69.03143309999996, 163.7006835945, 151.67578125], [22.273742680499993, 43.02148439999999, 140.3610839745, 159.52111814999995], [54.65148924600001, 54.82312010000001, 98.30328367339996, 94.20263670000003], [73.64843752109999, 105.27319335000004, 94.05657961149996, 124.01538085000004]], "boxes_seq": [[0], [0], [1, 5], [2, 6], [3, 4], [7]]}, {"image_path": "objects365_v1_00047789.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Include the coordinates for each mentioned object.", "boxes_value": [[262.3529052672, 136.8708495921, 487.63488768, 496.80395506829996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047789_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Include the coordinates for each mentioned object.", "boxes_value": [[56.352905267200015, 90.8708495921, 281.63488768, 450.80395506829996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047789.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, a handbag, and two cups.", "boxes_value": [[262.3529052672, 136.8708495921, 487.63488768, 496.80395506829996], [339.1566772224, 245.1726074063, 468.6232300032, 638.2033691581], [322.4767456256, 136.8708495921, 487.63488768, 496.80395506829996], [345.5081176576, 299.7527465543, 368.296875008, 369.6384277671], [262.3529052672, 359.3697509574, 284.8863525376, 382.5860595867], [320.0522461184, 361.41821289, 340.8786620928, 384.2930908438]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047789_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, a handbag, and two cups.", "boxes_value": [[56.352905267200015, 90.8708495921, 281.63488768, 450.80395506829996], [133.1566772224, 199.1726074063, 262.6232300032, 540], [116.47674562560002, 90.8708495921, 281.63488768, 450.80395506829996], [139.5081176576, 253.75274655430002, 162.29687500799997, 323.6384277671], [56.352905267200015, 313.3697509574, 78.88635253759998, 336.5860595867], [114.05224611839998, 315.41821289, 134.87866209280003, 338.2930908438]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047790.jpg", "text": "Could you please share some information on the region in this photograph ? Specify the location of each mentioned object.", "boxes_value": [[233.6826171927, 187.9395141632, 399.9764404298, 361.4489135616]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047790_crop.jpg", "text": "Could you please share some information on the region in this photograph ? Specify the location of each mentioned object.", "boxes_value": [[41.68261719270001, 43.93951416319999, 207.9764404298, 217.44891356160002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047790.jpg", "text": "Could you please share some information on the region in this photograph ? Specify the location of each mentioned object. For your reference, objects involved in this region include a chair, a cabinet, and three people.", "boxes_value": [[233.6826171927, 187.9395141632, 399.9764404298, 361.4489135616], [223.5429687725, 299.8865356288, 241.24169925180001, 362.636474624], [304.568847622, 187.9395141632, 359.4826659957, 361.4489135616], [233.6826171927, 211.2747802624, 252.59960936079997, 254.7643432448], [247.52911379559998, 226.4863891456, 260.0103759396, 256.3245239296], [361.3828124673, 194.5830688256, 399.9764404298, 322.1098632704]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047790_crop.jpg", "text": "Could you please share some information on the region in this photograph ? Specify the location of each mentioned object. For your reference, objects involved in this region include a chair, a cabinet, and three people.", "boxes_value": [[41.68261719270001, 43.93951416319999, 207.9764404298, 217.44891356160002], [31.54296877249999, 155.8865356288, 49.241699251800014, 218.63647462400002], [112.56884762200002, 43.93951416319999, 167.48266599570002, 217.44891356160002], [41.68261719270001, 67.27478026239999, 60.599609360799974, 110.76434324479999], [55.52911379559998, 82.48638914559999, 68.01037593960001, 112.32452392959999], [169.38281246730003, 50.58306882560001, 207.9764404298, 178.1098632704]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047791.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[449.0213622808, 98.6056518656, 675.0522460658, 164.6354980352]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047791_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[57.02136228080002, 16.605651865599995, 283.0522460658, 82.63549803519999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047791.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four pictures, and a bowl.", "boxes_value": [[449.0213622808, 98.6056518656, 675.0522460658, 164.6354980352], [442.1546630675, 97.813659648, 484.9224853744, 146.6534424064], [479.114501987, 111.2775878656, 502.8743896441, 144.5414428672], [498.12243654570005, 98.6056518656, 531.6502685359001, 142.4294433792], [633.2569579827, 107.7835082752, 675.0522460658, 164.6354980352], [449.0213622808, 128.2504272384, 478.38354492120004, 143.7246704128]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047791_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four pictures, and a bowl.", "boxes_value": [[57.02136228080002, 16.605651865599995, 283.0522460658, 82.63549803519999], [50.154663067499996, 15.813659647999998, 92.92248537440003, 64.65344240639999], [87.11450198699998, 29.277587865599997, 110.8743896441, 62.541442867200004], [106.12243654570005, 16.605651865599995, 139.65026853590007, 60.429443379199995], [241.2569579827, 25.783508275200006, 283.0522460658, 82.63549803519999], [57.02136228080002, 46.25042723839999, 86.38354492120004, 61.72467041280001]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047792.jpg", "text": "In the image , could you provide a description for the coordinates ? Please point out the objects and their coordinates.", "boxes_value": [[632.7192382667, 163.730957056, 769.1345214776001, 263.8072509952]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047792_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Please point out the objects and their coordinates.", "boxes_value": [[34.71923826670002, 25.730957055999994, 171, 125.80725099519998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047792.jpg", "text": "In the image , could you provide a description for the coordinates ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a picture, three people, and a helmet.", "boxes_value": [[632.7192382667, 163.730957056, 769.1345214776001, 263.8072509952], [568.85058592, 99.228332544, 768.4376220717, 301.2940674048], [697.3217773813, 171.8062744064, 769.1345214776001, 263.8072509952], [679.1523437702, 164.5961303552, 711.1651611199001, 262.3652343808], [632.7192382667, 163.730957056, 671.3653564136, 259.1928100352], [719.3857422055, 172.774719232, 742.8057861608, 197.43670656]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047792_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a picture, three people, and a helmet.", "boxes_value": [[34.71923826670002, 25.730957055999994, 171, 125.80725099519998], [0, 0, 170.43762207170005, 150], [99.32177738129997, 33.80627440640001, 171, 125.80725099519998], [81.15234377019999, 26.59613035519999, 113.16516111990006, 124.36523438080002], [34.71923826670002, 25.730957055999994, 73.36535641360001, 121.19281003520001], [121.38574220550004, 34.774719231999995, 144.80578616080004, 59.436706560000005]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047793.jpg", "text": "Please provide details for the area within the bounding box in . Include the coordinates for each object you identify.", "boxes_value": [[0, 131.1846923776, 414.4260254213, 511.058776832]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047793_crop.jpg", "text": "Please provide details for the area within the bounding box in . Include the coordinates for each object you identify.", "boxes_value": [[0, 95.18469237759999, 414.4260254213, 475.058776832]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047793.jpg", "text": "Please provide details for the area within the bounding box in . Include the coordinates for each object you identify. For your reference, objects involved in this region include five people, a glasses, and a microphone.", "boxes_value": [[0, 131.1846923776, 414.4260254213, 511.058776832], [0, 251.9178466816, 117.02844239310001, 510.4650268672], [20.3071288793, 257.6680908288, 74.1846313166, 431.6862793216], [5.444335901400001, 151.1516113408, 132.3971557801, 323.3119506944], [50.703796373299994, 188.4719238144, 212.33154296000004, 511.058776832], [159.33471677900002, 78.471557632, 431.3663330347, 511.259704576], [293.9942627153, 131.1846923776, 377.4737548907, 158.2827148288], [350.1773681872, 221.0117187584, 414.4260254213, 409.16827392]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00047793_crop.jpg", "text": "Please provide details for the area within the bounding box in . Include the coordinates for each object you identify. For your reference, objects involved in this region include five people, a glasses, and a microphone.", "boxes_value": [[0, 95.18469237759999, 414.4260254213, 475.058776832], [0, 215.9178466816, 117.02844239310001, 474.4650268672], [20.3071288793, 221.66809082880002, 74.1846313166, 395.6862793216], [5.444335901400001, 115.1516113408, 132.3971557801, 287.3119506944], [50.703796373299994, 152.4719238144, 212.33154296000004, 475.058776832], [159.33471677900002, 42.471557632, 431.3663330347, 475.259704576], [293.9942627153, 95.18469237759999, 377.4737548907, 122.28271482880001], [350.1773681872, 185.0117187584, 414.4260254213, 373.16827392]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00047795.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Include the coordinates for each mentioned object.", "boxes_value": [[176.147277824, 37.8519287296, 389.1008300544, 317.5313720832]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047795_crop.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Include the coordinates for each mentioned object.", "boxes_value": [[54.147277824000014, 37.8519287296, 267.1008300544, 317.5313720832]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047795.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, a helmet, a belt, and two gloves.", "boxes_value": [[176.147277824, 37.8519287296, 389.1008300544, 317.5313720832], [109.0361328128, 37.7536621056, 389.6129150464, 498.4809570304001], [231.2110595584, 37.8519287296, 309.806213376, 103.45104977919999], [202.8990478336, 230.3956298752, 280.870361344, 264.9193115136], [356.531494144, 208.7997436416, 389.1008300544, 253.8958129664], [176.147277824, 273.437438976, 204.2070922752, 317.5313720832]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047795_crop.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, a helmet, a belt, and two gloves.", "boxes_value": [[54.147277824000014, 37.8519287296, 267.1008300544, 317.5313720832], [0, 37.7536621056, 267.6129150464, 387], [109.2110595584, 37.8519287296, 187.80621337600002, 103.45104977919999], [80.89904783360001, 230.3956298752, 158.870361344, 264.9193115136], [234.53149414400002, 208.7997436416, 267.1008300544, 253.8958129664], [54.147277824000014, 273.437438976, 82.20709227520001, 317.5313720832]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047796.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Specify the location of each mentioned object.", "boxes_value": [[0, 188.4566650368, 99.8948364179, 508.5531616256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047796_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Specify the location of each mentioned object.", "boxes_value": [[0, 80.45666503679999, 99.8948364179, 400.5531616256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047796.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Specify the location of each mentioned object. For your reference, objects involved in this region include a bed, a pillow, a desk, a flower, and a vase.", "boxes_value": [[0, 188.4566650368, 99.8948364179, 508.5531616256], [52.2982788355, 223.1882934784, 502.9052734329, 509.547546368], [54.2096557698, 282.7783813632, 79.7523803532, 337.2137451008], [0, 344.6998901248, 99.8948364179, 508.5531616256], [0, 188.4566650368, 84.1989135457, 313.3813476352], [0, 306.0147704832, 38.247009308500004, 358.093750016]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047796_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Specify the location of each mentioned object. For your reference, objects involved in this region include a bed, a pillow, a desk, a flower, and a vase.", "boxes_value": [[0, 80.45666503679999, 99.8948364179, 400.5531616256], [52.2982788355, 115.1882934784, 124, 401.547546368], [54.2096557698, 174.77838136320003, 79.7523803532, 229.2137451008], [0, 236.6998901248, 99.8948364179, 400.5531616256], [0, 80.45666503679999, 84.1989135457, 205.38134763519997], [0, 198.01477048319998, 38.247009308500004, 250.093750016]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047798.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each object you identify.", "boxes_value": [[0.5680542208, 400.10046388020004, 471.9574584832, 682.7248535183]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047798_crop.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each object you identify.", "boxes_value": [[0.5680542208, 71.10046388020004, 471.9574584832, 353.72485351830005]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047798.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two bottles, three plates, a chopsticks, and a bowl.", "boxes_value": [[0.5680542208, 400.10046388020004, 471.9574584832, 682.7248535183], [422.2387695104, 477.7897949234, 511.0219726336, 682.5048827818], [404.5225829888, 313.6791382054, 469.9516601344, 519.9119872978], [141.9848632832, 474.15515134390006, 471.9574584832, 682.7248535183], [145.9131469824, 419.1597900638, 268.3434448384, 470.22692868359997], [183.8861694464, 400.82800292490003, 229.7156982272, 487.2493896599], [0.5680542208, 400.10046388020004, 50.9805297664, 462.29772951999996], [1.2227783168, 483.97583008090004, 155.079040512, 645.0338134496]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 7], [5], [6]]}, {"image_path": "objects365_v1_00047798_crop.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two bottles, three plates, a chopsticks, and a bowl.", "boxes_value": [[0.5680542208, 71.10046388020004, 471.9574584832, 353.72485351830005], [422.2387695104, 148.7897949234, 511.0219726336, 353.5048827818], [404.5225829888, 0, 469.9516601344, 190.9119872978], [141.9848632832, 145.15515134390006, 471.9574584832, 353.72485351830005], [145.9131469824, 90.15979006380002, 268.3434448384, 141.22692868359997], [183.8861694464, 71.82800292490003, 229.7156982272, 158.24938965989998], [0.5680542208, 71.10046388020004, 50.9805297664, 133.29772951999996], [1.2227783168, 154.97583008090004, 155.079040512, 316.03381344959996]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 7], [5], [6]]}, {"image_path": "objects365_v1_00047801.jpg", "text": "In the submitted image , please give a synopsis of the area . Remember to mention the objects and their corresponding locations.", "boxes_value": [[0.9405517317000001, 301.864257792, 149.2088623109, 465.51312256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047801_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Remember to mention the objects and their corresponding locations.", "boxes_value": [[0.9405517317000001, 41.86425779199999, 149.2088623109, 205.51312256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047801.jpg", "text": "In the submitted image , please give a synopsis of the area . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two chairs, a desk, a lamp, and a napkin.", "boxes_value": [[0.9405517317000001, 301.864257792, 149.2088623109, 465.51312256], [75.3082885884, 305.0927734272, 218.7801513947, 459.1715087872], [0.9405517317000001, 301.864257792, 149.2088623109, 465.51312256], [1.5997314269999998, 267.203124992, 108.7235717932, 422.2388916224], [71.6596069175, 299.0529785344, 94.32415772009999, 319.9740600832], [2.5237426921, 306.4183960064, 36.1859130745, 318.94384768]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5]]}, {"image_path": "objects365_v1_00047801_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two chairs, a desk, a lamp, and a napkin.", "boxes_value": [[0.9405517317000001, 41.86425779199999, 149.2088623109, 205.51312256], [75.3082885884, 45.09277342719997, 186, 199.17150878720003], [0.9405517317000001, 41.86425779199999, 149.2088623109, 205.51312256], [1.5997314269999998, 7.203124992000028, 108.7235717932, 162.2388916224], [71.6596069175, 39.05297853439998, 94.32415772009999, 59.974060083200015], [2.5237426921, 46.41839600639997, 36.1859130745, 58.943847679999976]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5]]}, {"image_path": "objects365_v1_00047802.jpg", "text": "Tell me about the region of the image . Include the coordinates for each object you identify.", "boxes_value": [[383.1416233728, 108.2411499008, 700.8028564223999, 406.7374169088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047802_crop.jpg", "text": "Tell me about the region of the image . Include the coordinates for each object you identify.", "boxes_value": [[80.14162337279998, 75.2411499008, 397.80285642239994, 373.7374169088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047802.jpg", "text": "Tell me about the region of the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include three people, and three sneakers.", "boxes_value": [[383.1416233728, 108.2411499008, 700.8028564223999, 406.7374169088], [382.4455566336, 173.428710912, 530.8612060416, 459.240600576], [592.299926784, 163.6015625216, 696.2573242368, 408.8392944128], [599.4970702848, 108.2411499008, 700.8028564223999, 353.0319824384], [383.1416233728, 328.8082844672, 413.71302144000003, 363.5867556864], [669.9967122432, 328.980854016, 693.3585495551999, 353.0400595456], [627.1085632511999, 383.7242637312, 656.398030848, 406.7374169088]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00047802_crop.jpg", "text": "Tell me about the region of the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include three people, and three sneakers.", "boxes_value": [[80.14162337279998, 75.2411499008, 397.80285642239994, 373.7374169088], [79.4455566336, 140.428710912, 227.86120604159998, 426.240600576], [289.29992678400004, 130.6015625216, 393.25732423679995, 375.8392944128], [296.4970702848, 75.2411499008, 397.80285642239994, 320.0319824384], [80.14162337279998, 295.8082844672, 110.71302144000003, 330.5867556864], [366.9967122432, 295.980854016, 390.35854955519994, 320.0400595456], [324.10856325119994, 350.7242637312, 353.39803084799996, 373.7374169088]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00047808.jpg", "text": "In the displayed image , help me understand the region defined by . Include the coordinates for each mentioned object.", "boxes_value": [[0.3524780032, 211.38745114079998, 194.9275512832, 428.7423095964]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047808_crop.jpg", "text": "In the displayed image , help me understand the region defined by . Include the coordinates for each mentioned object.", "boxes_value": [[0.3524780032, 54.38745114079998, 194.9275512832, 271.7423095964]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047808.jpg", "text": "In the displayed image , help me understand the region defined by . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two pillows, a lamp, two cabinets, and a moniter.", "boxes_value": [[0.3524780032, 211.38745114079998, 194.9275512832, 428.7423095964], [108.402893056, 313.39544674679996, 170.3363037184, 372.5965576296], [174.8902587904, 316.1278075932, 194.9275512832, 363.4886474892], [54.6665649664, 211.38745114079998, 151.209838848, 368.0426025252], [127.7261352448, 127.49395749480001, 226.2305908224, 365.310180666], [0.3524780032, 290.6293945104, 139.4380493312, 428.7423095964], [10.9652099584, 206.93084715720002, 121.0051269632, 309.70281980519997]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00047808_crop.jpg", "text": "In the displayed image , help me understand the region defined by . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two pillows, a lamp, two cabinets, and a moniter.", "boxes_value": [[0.3524780032, 54.38745114079998, 194.9275512832, 271.7423095964], [108.402893056, 156.39544674679996, 170.3363037184, 215.5965576296], [174.8902587904, 159.12780759319998, 194.9275512832, 206.4886474892], [54.6665649664, 54.38745114079998, 151.209838848, 211.04260252519998], [127.7261352448, 0, 226.2305908224, 208.310180666], [0.3524780032, 133.6293945104, 139.4380493312, 271.7423095964], [10.9652099584, 49.93084715720002, 121.0051269632, 152.70281980519997]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00047816.jpg", "text": "In the provided image , please explain the content within the region . Specify the location of each mentioned object.", "boxes_value": [[274.941894514, 179.5973510656, 407.3099364946, 418.552062976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047816_crop.jpg", "text": "In the provided image , please explain the content within the region . Specify the location of each mentioned object.", "boxes_value": [[33.94189451400001, 60.59735106560001, 166.30993649459998, 299.552062976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047816.jpg", "text": "In the provided image , please explain the content within the region . Specify the location of each mentioned object. For your reference, objects involved in this region include two people, two street lights, and a machinery vehicle.", "boxes_value": [[274.941894514, 179.5973510656, 407.3099364946, 418.552062976], [381.6014404348, 310.281127936, 399.8988036907, 327.6858520576], [274.941894514, 302.4713134592, 293.6854247879, 326.347045888], [395.6564941233, 300.44635008, 407.3099364946, 391.4888915968], [356.6903076025, 262.9368286208, 373.07800292670004, 408.6048584192], [293.2126464507, 179.5973510656, 358.5913086057, 418.552062976]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047816_crop.jpg", "text": "In the provided image , please explain the content within the region . Specify the location of each mentioned object. For your reference, objects involved in this region include two people, two street lights, and a machinery vehicle.", "boxes_value": [[33.94189451400001, 60.59735106560001, 166.30993649459998, 299.552062976], [140.6014404348, 191.28112793600002, 158.8988036907, 208.68585205760002], [33.94189451400001, 183.47131345920002, 52.68542478789999, 207.34704588800003], [154.65649412329998, 181.44635008, 166.30993649459998, 272.4888915968], [115.69030760250001, 143.9368286208, 132.07800292670004, 289.6048584192], [52.21264645069999, 60.59735106560001, 117.59130860570002, 299.552062976]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047817.jpg", "text": "Describe the image content present in the specified rectangular area of . Provide the coordinates for each element you describe.", "boxes_value": [[258.408813504, 103.443115248, 526.733398464, 265.944946272]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047817_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Provide the coordinates for each element you describe.", "boxes_value": [[67.40881350400002, 41.443115248, 335.73339846399995, 203.94494627199998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047817.jpg", "text": "Describe the image content present in the specified rectangular area of . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a flower, a picture, a vase, a speaker, and a bakset.", "boxes_value": [[258.408813504, 103.443115248, 526.733398464, 265.944946272], [235.56512448, 179.374023456, 353.35498048, 263.57537841600004], [494.415771456, 103.443115248, 526.733398464, 156.96105955200002], [257.780761728, 234.37530518399998, 343.091735808, 268.257690432], [359.356872576, 231.65136719999998, 372.416870144, 264.664306656], [258.408813504, 238.55517576, 340.57800294400005, 265.944946272]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047817_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a flower, a picture, a vase, a speaker, and a bakset.", "boxes_value": [[67.40881350400002, 41.443115248, 335.73339846399995, 203.94494627199998], [44.56512448000001, 117.374023456, 162.35498048, 201.57537841600004], [303.415771456, 41.443115248, 335.73339846399995, 94.96105955200002], [66.78076172800002, 172.37530518399998, 152.091735808, 206.257690432], [168.356872576, 169.65136719999998, 181.41687014399997, 202.664306656], [67.40881350400002, 176.55517576, 149.57800294400005, 203.94494627199998]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047819.jpg", "text": "I'd like some information about the bounding box in the photo . Include the coordinates for each mentioned object.", "boxes_value": [[286.7941284065, 54.9616089088, 536.3721923608, 406.55242920960006]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047819_crop.jpg", "text": "I'd like some information about the bounding box in the photo . Include the coordinates for each mentioned object.", "boxes_value": [[62.79412840650002, 54.9616089088, 312.3721923608, 406.55242920960006]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047819.jpg", "text": "I'd like some information about the bounding box in the photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, and three sneakers.", "boxes_value": [[286.7941284065, 54.9616089088, 536.3721923608, 406.55242920960006], [36.0319823901, 18.2966919168, 418.3951415801, 475.2992553472], [286.7941284065, 54.9616089088, 508.74804686019996, 406.55242920960006], [289.1298217545, 352.1974487552, 309.8876342505, 399.9402465792], [474.8870849472, 294.276611328, 500.3809814508, 349.388305664], [511.6281738512, 380.1309203968, 536.3721923608, 404.5]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047819_crop.jpg", "text": "I'd like some information about the bounding box in the photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, and three sneakers.", "boxes_value": [[62.79412840650002, 54.9616089088, 312.3721923608, 406.55242920960006], [0, 18.2966919168, 194.39514158010002, 475.2992553472], [62.79412840650002, 54.9616089088, 284.74804686019996, 406.55242920960006], [65.12982175450003, 352.1974487552, 85.88763425050001, 399.9402465792], [250.88708494719998, 294.276611328, 276.3809814508, 349.388305664], [287.6281738512, 380.1309203968, 312.3721923608, 404.5]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047820.jpg", "text": "In the displayed image , help me understand the region defined by . Include the coordinates for each mentioned object.", "boxes_value": [[83.79931637, 0, 396.255615217, 337.607910144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047820_crop.jpg", "text": "In the displayed image , help me understand the region defined by . Include the coordinates for each mentioned object.", "boxes_value": [[78.79931637, 0, 391.255615217, 337.607910144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047820.jpg", "text": "In the displayed image , help me understand the region defined by . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a chair, two pictures, three vases, and a desk.", "boxes_value": [[83.79931637, 0, 396.255615217, 337.607910144], [312.860107406, 235.2364502016, 429.908813488, 349.6046752768], [83.79931637, 11.3478393344, 211.244445797, 163.269165056], [96.45941158699999, 0, 205.336425756, 55.23620608], [361.97363279, 201.96063232, 396.255615217, 234.6481323008], [238.39910887099998, 261.7547607552, 282.248168972, 314.3735962112], [244.26464847399998, 256.9712524288, 318.067626954, 337.607910144], [218.501831086, 153.7966919168, 244.058837869, 179.786804224]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5, 7], [6]]}, {"image_path": "objects365_v1_00047820_crop.jpg", "text": "In the displayed image , help me understand the region defined by . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a chair, two pictures, three vases, and a desk.", "boxes_value": [[78.79931637, 0, 391.255615217, 337.607910144], [307.860107406, 235.2364502016, 424.908813488, 349.6046752768], [78.79931637, 11.3478393344, 206.244445797, 163.269165056], [91.45941158699999, 0, 200.336425756, 55.23620608], [356.97363279, 201.96063232, 391.255615217, 234.6481323008], [233.39910887099998, 261.7547607552, 277.248168972, 314.3735962112], [239.26464847399998, 256.9712524288, 313.067626954, 337.607910144], [213.501831086, 153.7966919168, 239.058837869, 179.786804224]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5, 7], [6]]}, {"image_path": "objects365_v1_00047822.jpg", "text": "Please detail the contents of the chosen region in the visual input . Please point out the objects and their coordinates.", "boxes_value": [[193.436340352, 278.484436032, 327.333435072, 380.85247804799997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047822_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Please point out the objects and their coordinates.", "boxes_value": [[34.436340352, 26.48443603200002, 168.333435072, 128.85247804799997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047822.jpg", "text": "Please detail the contents of the chosen region in the visual input . Please point out the objects and their coordinates. For your reference, objects involved in this region include four chairs, a desk, and a barrel.", "boxes_value": [[193.436340352, 278.484436032, 327.333435072, 380.85247804799997], [212.219482432, 278.484436032, 265.75134278400003, 354.556091328], [271.70190432, 285.369995136, 327.333435072, 377.10675048], [193.436340352, 292.571777328, 270.447143552, 368.173828128], [223.01977535999998, 298.67633054400005, 291.108581568, 380.85247804799997], [177.940307648, 302.902526832, 264.447082496, 404.05493164800004], [312.782592768, 327.002563488, 333.03839110399997, 352.782592752]], "boxes_seq": [[0], [0], [1, 2, 4, 5], [3], [6]]}, {"image_path": "objects365_v1_00047822_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Please point out the objects and their coordinates. For your reference, objects involved in this region include four chairs, a desk, and a barrel.", "boxes_value": [[34.436340352, 26.48443603200002, 168.333435072, 128.85247804799997], [53.21948243200001, 26.48443603200002, 106.75134278400003, 102.55609132799998], [112.70190431999998, 33.369995136, 168.333435072, 125.10675048000002], [34.436340352, 40.571777327999996, 111.447143552, 116.17382812800003], [64.01977535999998, 46.67633054400005, 132.10858156799998, 128.85247804799997], [18.940307647999987, 50.90252683199998, 105.44708249600001, 152.05493164800004], [153.78259276799997, 75.00256348800002, 174.03839110399997, 100.78259275200003]], "boxes_seq": [[0], [0], [1, 2, 4, 5], [3], [6]]}, {"image_path": "objects365_v1_00047823.jpg", "text": "I request a description of the area in the picture . Give coordinates for the items you reference.", "boxes_value": [[437.86584474240004, 227.9310913024, 610.9359130994001, 318.4694213632]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047823_crop.jpg", "text": "I request a description of the area in the picture . Give coordinates for the items you reference.", "boxes_value": [[43.86584474240004, 22.931091302400006, 216.93591309940007, 113.46942136320001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047823.jpg", "text": "I request a description of the area in the picture . Give coordinates for the items you reference. For your reference, objects involved in this region include five people, three barrels, and a trolley.", "boxes_value": [[437.86584474240004, 227.9310913024, 610.9359130994001, 318.4694213632], [490.7369384663, 210.394287104, 548.9821777347, 350.5254516736], [484.64318849299997, 196.8363647488, 513.7167968668, 255.864685056], [532.4453124785, 198.3473510912, 587.4735107594, 281.9004516352], [556.9111327941, 227.9310913024, 593.2972411905, 291.654724096], [588.0716552492, 195.9454956032, 627.4279785415, 300.6481323008], [437.86584474240004, 270.6152953856, 457.8134765597, 290.562988288], [444.5568847725, 289.56445312, 474.5659179938, 313.97857664], [585.7781982114, 300.3984374784, 610.9359130994001, 318.4694213632], [447.5920410159, 245.0051879936, 498.056274431, 295.8576659968]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6, 7, 8], [9]]}, {"image_path": "objects365_v1_00047823_crop.jpg", "text": "I request a description of the area in the picture . Give coordinates for the items you reference. For your reference, objects involved in this region include five people, three barrels, and a trolley.", "boxes_value": [[43.86584474240004, 22.931091302400006, 216.93591309940007, 113.46942136320001], [96.73693846629999, 5.394287104, 154.98217773470003, 136], [90.64318849299997, 0, 119.7167968668, 50.86468505600001], [138.4453124785, 0, 193.47351075940003, 76.9004516352], [162.9111327941, 22.931091302400006, 199.29724119050002, 86.654724096], [194.07165524920003, 0, 233.4279785415, 95.6481323008], [43.86584474240004, 65.6152953856, 63.81347655970001, 85.56298828799999], [50.556884772499984, 84.56445312, 80.56591799379999, 108.97857664000003], [191.77819821139997, 95.39843747840001, 216.93591309940007, 113.46942136320001], [53.59204101590001, 40.0051879936, 104.05627443100002, 90.8576659968]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6, 7, 8], [9]]}, {"image_path": "objects365_v1_00047824.jpg", "text": "Regarding the image , what's going on in the section ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[214.59472657499998, 237.495056128, 642.3015136775, 315.8091430912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047824_crop.jpg", "text": "Regarding the image , what's going on in the section ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[107.59472657499998, 20.495056127999987, 535.3015136775, 98.80914309119999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047824.jpg", "text": "Regarding the image , what's going on in the section ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a flower, two pillows, a bed, and a lamp.", "boxes_value": [[214.59472657499998, 237.495056128, 642.3015136775, 315.8091430912], [214.59472657499998, 271.4353027584, 281.5858154625, 303.4083251712], [316.40246579850003, 283.317871104, 460.63208009150003, 315.8091430912], [462.6132812545, 283.317871104, 599.710693356, 313.8280029184], [228.440307641, 239.4332885504, 682.356079118, 433.9686279168], [622.5024414430001, 237.495056128, 642.3015136775, 310.3270263808]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047824_crop.jpg", "text": "Regarding the image , what's going on in the section ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a flower, two pillows, a bed, and a lamp.", "boxes_value": [[107.59472657499998, 20.495056127999987, 535.3015136775, 98.80914309119999], [107.59472657499998, 54.435302758399985, 174.58581546250002, 86.40832517119998], [209.40246579850003, 66.317871104, 353.63208009150003, 98.80914309119999], [355.6132812545, 66.317871104, 492.710693356, 96.82800291839999], [121.440307641, 22.433288550399993, 575.356079118, 118], [515.5024414430001, 20.495056127999987, 535.3015136775, 93.32702638080002]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047825.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Please point out the objects and their coordinates.", "boxes_value": [[456.59411618160004, 311.4152221696, 681.5131835835999, 511.9177246208]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047825_crop.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Please point out the objects and their coordinates.", "boxes_value": [[56.59411618160004, 50.415222169599986, 281.5131835835999, 250.91772462080002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047825.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a chair, a desk, a person, a moniter, and a keyboard.", "boxes_value": [[456.59411618160004, 311.4152221696, 681.5131835835999, 511.9177246208], [456.59411618160004, 445.0495605248, 645.5771484188, 511.9177246208], [490.02209475240005, 422.2901001216, 681.5131835835999, 472.238403328], [275.6998901679, 267.561401344, 548.1601562265, 511.0496216064], [458.783447274, 311.4152221696, 591.2364502199999, 446.710571264], [535.5804443495, 423.2332153344, 640.9218749905, 442.3302002176]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047825_crop.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a chair, a desk, a person, a moniter, and a keyboard.", "boxes_value": [[56.59411618160004, 50.415222169599986, 281.5131835835999, 250.91772462080002], [56.59411618160004, 184.04956052479997, 245.5771484188, 250.91772462080002], [90.02209475240005, 161.2901001216, 281.5131835835999, 211.238403328], [0, 6.561401343999989, 148.16015622650002, 250.04962160640002], [58.783447274000025, 50.415222169599986, 191.23645021999994, 185.710571264], [135.5804443495, 162.23321533439997, 240.9218749905, 181.3302002176]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047826.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Include the coordinates for each mentioned object.", "boxes_value": [[735.0410156208, 182.6356811776, 819.32727048, 294.5788574208]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047826_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Include the coordinates for each mentioned object.", "boxes_value": [[22.041015620799953, 28.63568117759999, 106.32727048000004, 140.57885742079998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047826.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, a glasses, a bottle, a cup, and a bowl.", "boxes_value": [[735.0410156208, 182.6356811776, 819.32727048, 294.5788574208], [717.7807617168, 156.6684570112, 821.3369140992, 447.5070800896], [735.0410156208, 182.6356811776, 762.003051744, 197.0796508672], [757.65991212, 237.6393432576, 768.7600097664, 269.7364502016], [793.221435552, 261.4840698368, 810.2827148591999, 281.2176513536], [789.5213622672, 282.997802752, 819.32727048, 294.5788574208]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047826_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, a glasses, a bottle, a cup, and a bowl.", "boxes_value": [[22.041015620799953, 28.63568117759999, 106.32727048000004, 140.57885742079998], [4.780761716800043, 2.6684570111999903, 108.33691409920004, 168], [22.041015620799953, 28.63568117759999, 49.003051744000004, 43.0796508672], [44.659912119999944, 83.63934325759999, 55.7600097664, 115.73645020160001], [80.221435552, 107.48406983680002, 97.28271485919993, 127.21765135359999], [76.52136226719995, 128.99780275199998, 106.32727048000004, 140.57885742079998]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047829.jpg", "text": "In the image , elaborate on the details found within the section . Specify the location of each mentioned object.", "boxes_value": [[223.14801022229997, 365.0019531264, 417.30078126660004, 432.5783691264]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047829_crop.jpg", "text": "In the image , elaborate on the details found within the section . Specify the location of each mentioned object.", "boxes_value": [[49.14801022229997, 17.001953126399997, 243.30078126660004, 84.57836912639999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047829.jpg", "text": "In the image , elaborate on the details found within the section . Specify the location of each mentioned object. For your reference, objects involved in this region include a bottle, a plate, a bowl, and two canneds.", "boxes_value": [[223.14801022229997, 365.0019531264, 417.30078126660004, 432.5783691264], [223.14801022229997, 365.0019531264, 264.3224487113, 432.5783691264], [306.62420656669997, 387.5697631744, 338.9073486652, 404.1475830272], [334.76287843800003, 398.0399780352, 374.462402332, 419.1985473536], [399.6760254144, 370.0717773312, 417.30078126660004, 407.3356323328], [379.5334472976, 374.6038818304, 398.66894532939995, 412.3712768512]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047829_crop.jpg", "text": "In the image , elaborate on the details found within the section . Specify the location of each mentioned object. For your reference, objects involved in this region include a bottle, a plate, a bowl, and two canneds.", "boxes_value": [[49.14801022229997, 17.001953126399997, 243.30078126660004, 84.57836912639999], [49.14801022229997, 17.001953126399997, 90.32244871130001, 84.57836912639999], [132.62420656669997, 39.56976317440001, 164.9073486652, 56.14758302720003], [160.76287843800003, 50.03997803520002, 200.462402332, 71.19854735360002], [225.6760254144, 22.071777331199996, 243.30078126660004, 59.3356323328], [205.5334472976, 26.603881830399985, 224.66894532939995, 64.37127685119998]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047830.jpg", "text": "In the submitted image , please give a synopsis of the area . Include the coordinates for each mentioned object.", "boxes_value": [[329.72436523709996, 191.7910766592, 487.5972900699, 392.1854247936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047830_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Include the coordinates for each mentioned object.", "boxes_value": [[39.72436523709996, 50.7910766592, 197.5972900699, 251.18542479360002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047830.jpg", "text": "In the submitted image , please give a synopsis of the area . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, a handbag, and two trash bin cans.", "boxes_value": [[329.72436523709996, 191.7910766592, 487.5972900699, 392.1854247936], [414.8792724833, 191.7910766592, 479.12780760019996, 392.1854247936], [329.72436523709996, 193.8306884608, 399.5819091464, 379.9476318208], [463.04675082650004, 251.9817122304, 484.5427896181, 292.1287258624], [465.797241201, 271.8728027136, 487.5972900699, 310.0461425664], [384.57177730650005, 279.18621824, 397.0753173496, 303.4856567296]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047830_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, a handbag, and two trash bin cans.", "boxes_value": [[39.72436523709996, 50.7910766592, 197.5972900699, 251.18542479360002], [124.87927248329999, 50.7910766592, 189.12780760019996, 251.18542479360002], [39.72436523709996, 52.83068846079999, 109.5819091464, 238.94763182079998], [173.04675082650004, 110.9817122304, 194.54278961810002, 151.12872586240002], [175.79724120100002, 130.87280271359998, 197.5972900699, 169.04614256640002], [94.57177730650005, 138.18621824000002, 107.07531734960003, 162.4856567296]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047832.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Provide the coordinates for each element you describe.", "boxes_value": [[162.7602538768, 296.487609856, 437.1379394424, 457.7356567552]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047832_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Provide the coordinates for each element you describe.", "boxes_value": [[68.7602538768, 40.487609856000006, 343.1379394424, 201.7356567552]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047832.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Provide the coordinates for each element you describe. For your reference, objects involved in this region include three sneakers, and two glasses.", "boxes_value": [[162.7602538768, 296.487609856, 437.1379394424, 457.7356567552], [264.628051786, 411.6647949312, 299.43713382, 457.7356567552], [162.7602538768, 313.3802490368, 215.485839806, 328.7371826176], [279.984985358, 344.0941772288, 308.651367226, 371.2247924736], [340.3890380732, 296.487609856, 393.62658691919995, 310.308837888], [420.24523925040006, 394.2601928704, 437.1379394424, 440.3311157248]], "boxes_seq": [[0], [0], [1, 3, 5], [2, 4]]}, {"image_path": "objects365_v1_00047832_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Provide the coordinates for each element you describe. For your reference, objects involved in this region include three sneakers, and two glasses.", "boxes_value": [[68.7602538768, 40.487609856000006, 343.1379394424, 201.7356567552], [170.62805178600001, 155.6647949312, 205.43713381999999, 201.7356567552], [68.7602538768, 57.38024903680002, 121.485839806, 72.73718261760001], [185.98498535800002, 88.09417722879999, 214.651367226, 115.22479247360002], [246.38903807320003, 40.487609856000006, 299.62658691919995, 54.30883788800003], [326.24523925040006, 138.26019287039998, 343.1379394424, 184.3311157248]], "boxes_seq": [[0], [0], [1, 3, 5], [2, 4]]}, {"image_path": "objects365_v1_00047833.jpg", "text": "Can you break down the region in the image for me? Please mention the objects and their locations.", "boxes_value": [[321.5583495936, 158.1924438528, 377.192626944, 251.6596069376]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047833_crop.jpg", "text": "Can you break down the region in the image for me? Please mention the objects and their locations.", "boxes_value": [[14.558349593599985, 24.19244385280001, 70.19262694399998, 117.65960693759999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047833.jpg", "text": "Can you break down the region in the image for me? Please mention the objects and their locations. For your reference, objects involved in this region include a person, a gloves, a sneakers, and two horses.", "boxes_value": [[321.5583495936, 158.1924438528, 377.192626944, 251.6596069376], [297.6989746176, 93.024780288, 379.29675294720005, 248.77807616], [321.5583495936, 158.1924438528, 343.107666048, 172.6464843776], [355.739501952, 231.0881957888, 377.192626944, 251.6596069376], [252.14294430720003, 127.5815429632, 451.52819827200005, 378.1077880832], [203.5913696256, 117.2238769664, 375.7583007744, 352.21362304]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047833_crop.jpg", "text": "Can you break down the region in the image for me? Please mention the objects and their locations. For your reference, objects involved in this region include a person, a gloves, a sneakers, and two horses.", "boxes_value": [[14.558349593599985, 24.19244385280001, 70.19262694399998, 117.65960693759999], [0, 0, 72.29675294720005, 114.77807616000001], [14.558349593599985, 24.19244385280001, 36.107666048, 38.64648437759999], [48.73950195200001, 97.0881957888, 70.19262694399998, 117.65960693759999], [0, 0, 84, 141], [0, 0, 68.75830077440003, 141]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047834.jpg", "text": "Can you provide some context for the area within the picture ? Give coordinates for the items you reference.", "boxes_value": [[24.7397461, 259.6381225711, 199.07843019999999, 460.53118895570003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047834_crop.jpg", "text": "Can you provide some context for the area within the picture ? Give coordinates for the items you reference.", "boxes_value": [[24.7397461, 50.638122571099984, 199.07843019999999, 251.53118895570003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047834.jpg", "text": "Can you provide some context for the area within the picture ? Give coordinates for the items you reference. For your reference, objects involved in this region include a bench, a desk, a cup, a bottle, and a bakset.", "boxes_value": [[24.7397461, 259.6381225711, 199.07843019999999, 460.53118895570003], [101.23529055, 190.12835694860001, 415.22283934999996, 450.7468872248], [24.7397461, 339.5615234351, 199.07843019999999, 460.53118895570003], [131.72601319999998, 323.8282470471, 157.9170532, 357.00360107169996], [66.77423095, 259.6381225711, 87.59436035, 323.795898424], [51.561171599999994, 304.6700439397, 127.7876587, 360.1488632411]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047834_crop.jpg", "text": "Can you provide some context for the area within the picture ? Give coordinates for the items you reference. For your reference, objects involved in this region include a bench, a desk, a cup, a bottle, and a bakset.", "boxes_value": [[24.7397461, 50.638122571099984, 199.07843019999999, 251.53118895570003], [101.23529055, 0, 242, 241.7468872248], [24.7397461, 130.5615234351, 199.07843019999999, 251.53118895570003], [131.72601319999998, 114.82824704709998, 157.9170532, 148.00360107169996], [66.77423095, 50.638122571099984, 87.59436035, 114.79589842399997], [51.561171599999994, 95.67004393970001, 127.7876587, 151.1488632411]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047835.jpg", "text": "What can I find in the bbox of the provided image ? Provide the coordinates for each element you describe.", "boxes_value": [[0, 299.7310791168, 60.3237915173, 449.7237549056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047835_crop.jpg", "text": "What can I find in the bbox of the provided image ? Provide the coordinates for each element you describe.", "boxes_value": [[0, 37.731079116800004, 60.3237915173, 187.7237549056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047835.jpg", "text": "What can I find in the bbox of the provided image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include four people, and a hat.", "boxes_value": [[0, 299.7310791168, 60.3237915173, 449.7237549056], [0.20806884679999998, 299.7310791168, 26.1511230205, 449.7237549056], [14.6074218791, 320.8619995136, 49.0324096999, 442.037902848], [33.0592041269, 326.3699951104, 60.3237915173, 433.7759399424], [45.1274414138, 327.4118042112, 62.2969360347, 424.451721216], [0, 300.5389467136, 22.136595114200002, 316.6902099456]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047835_crop.jpg", "text": "What can I find in the bbox of the provided image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include four people, and a hat.", "boxes_value": [[0, 37.731079116800004, 60.3237915173, 187.7237549056], [0.20806884679999998, 37.731079116800004, 26.1511230205, 187.7237549056], [14.6074218791, 58.86199951359998, 49.0324096999, 180.037902848], [33.0592041269, 64.3699951104, 60.3237915173, 171.7759399424], [45.1274414138, 65.41180421119998, 62.2969360347, 162.451721216], [0, 38.53894671360001, 22.136595114200002, 54.690209945599975]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047836.jpg", "text": "In the image , please describe the bounding box . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[129.005329408, 159.5698829361, 440.1975097856, 447.0839843706]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047836_crop.jpg", "text": "In the image , please describe the bounding box . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[78.005329408, 72.56988293609999, 389.1975097856, 360.0839843706]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047836.jpg", "text": "In the image , please describe the bounding box . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three sneakers, two gloves, and two hockey sticks.", "boxes_value": [[129.005329408, 159.5698829361, 440.1975097856, 447.0839843706], [227.3351788032, 378.58765506569995, 264.6075369472, 437.08630373010004], [395.3918797824, 410.8592807784, 440.1975097856, 447.0839843706], [233.6600160256, 239.8582471365, 291.6640907264, 288.7246936188], [173.1034793984, 280.574887545, 200.2221791744, 296.1725710572], [129.005329408, 159.5698829361, 182.9171743232, 224.0296975161], [254.46063232, 269.20263671910004, 511.498535168, 474.6486816558], [414.7638550016, 163.2551269734, 463.59185792, 353.03930664119997]], "boxes_seq": [[0], [0], [1, 2, 4], [3, 5], [6, 7]]}, {"image_path": "objects365_v1_00047836_crop.jpg", "text": "In the image , please describe the bounding box . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three sneakers, two gloves, and two hockey sticks.", "boxes_value": [[78.005329408, 72.56988293609999, 389.1975097856, 360.0839843706], [176.3351788032, 291.58765506569995, 213.6075369472, 350.08630373010004], [344.3918797824, 323.8592807784, 389.1975097856, 360.0839843706], [182.6600160256, 152.8582471365, 240.6640907264, 201.72469361880002], [122.1034793984, 193.57488754500002, 149.2221791744, 209.1725710572], [78.005329408, 72.56988293609999, 131.9171743232, 137.0296975161], [203.46063232, 182.20263671910004, 460.498535168, 387.6486816558], [363.7638550016, 76.2551269734, 412.59185792, 266.03930664119997]], "boxes_seq": [[0], [0], [1, 2, 4], [3, 5], [6, 7]]}, {"image_path": "objects365_v1_00047837.jpg", "text": "Can you discuss the entities within the region of image ? Give coordinates for the items you reference.", "boxes_value": [[126.45617675140001, 320.7689208832, 201.46533205880002, 484.9509887488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047837_crop.jpg", "text": "Can you discuss the entities within the region of image ? Give coordinates for the items you reference.", "boxes_value": [[19.456176751400008, 41.768920883199996, 94.46533205880002, 205.9509887488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047837.jpg", "text": "Can you discuss the entities within the region of image ? Give coordinates for the items you reference. For your reference, objects involved in this region include two people, two handbags, and a sneakers.", "boxes_value": [[126.45617675140001, 320.7689208832, 201.46533205880002, 484.9509887488], [131.35687256970002, 320.7689208832, 173.81793211570002, 484.6132812288], [170.2358398539, 329.1188964864, 201.46533205880002, 474.9366455296], [126.45617675140001, 366.1080932864, 141.7107543807, 406.8746337792], [133.0126342952, 472.1010742272, 160.0650634446, 484.9509887488], [163.1412964008, 376.7418212864, 177.3804321127, 417.7841186304]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00047837_crop.jpg", "text": "Can you discuss the entities within the region of image ? Give coordinates for the items you reference. For your reference, objects involved in this region include two people, two handbags, and a sneakers.", "boxes_value": [[19.456176751400008, 41.768920883199996, 94.46533205880002, 205.9509887488], [24.356872569700016, 41.768920883199996, 66.81793211570002, 205.61328122880002], [63.23583985389999, 50.11889648639999, 94.46533205880002, 195.9366455296], [19.456176751400008, 87.10809328639999, 34.710754380699996, 127.8746337792], [26.0126342952, 193.1010742272, 53.065063444600014, 205.9509887488], [56.1412964008, 97.74182128640001, 70.38043211269999, 138.78411863039997]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00047839.jpg", "text": "Please provide insights on the specified area within the graphic . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[135.8082885888, 379.9650268672, 239.830200192, 418.7397460992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047839_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[26.808288588799996, 9.965026867199981, 130.830200192, 48.73974609919998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047839.jpg", "text": "Please provide insights on the specified area within the graphic . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, two sandals, and two leather shoes.", "boxes_value": [[135.8082885888, 379.9650268672, 239.830200192, 418.7397460992], [172.4996338176, 156.5406494208, 240.37774656000002, 407.6347045888], [104.62145994240001, 131.7071533056, 187.3997192448, 419.7755126784], [135.8082885888, 391.6445312512, 157.0132445952, 418.7397460992], [149.961730944, 384.2059936768, 180.8603515392, 407.834350592], [199.8437499648, 388.4470214656, 217.8174438144, 406.82458496], [222.4623412992, 379.9650268672, 239.830200192, 402.9874877952]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00047839_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, two sandals, and two leather shoes.", "boxes_value": [[26.808288588799996, 9.965026867199981, 130.830200192, 48.73974609919998], [63.4996338176, 0, 131.37774656000002, 37.63470458879999], [0, 0, 78.3997192448, 49.77551267839999], [26.808288588799996, 21.64453125120002, 48.01324459520001, 48.73974609919998], [40.96173094400001, 14.205993676800006, 71.8603515392, 37.83435059200002], [90.8437499648, 18.447021465600017, 108.81744381440001, 36.82458495999998], [113.4623412992, 9.965026867199981, 130.830200192, 32.987487795200025]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00047840.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Provide the coordinates for all objects that you mention.", "boxes_value": [[463.92590330639996, 281.242492672, 625.2517089504, 401.1696777216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047840_crop.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Provide the coordinates for all objects that you mention.", "boxes_value": [[40.92590330639996, 30.242492672000026, 202.25170895040003, 150.1696777216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047840.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four people, and a handbag.", "boxes_value": [[463.92590330639996, 281.242492672, 625.2517089504, 401.1696777216], [577.2014160012, 294.3471069184, 625.2517089504, 401.1696777216], [520.8117675552, 288.3904418816, 545.8297118832, 376.5488281088], [499.7650146408, 281.242492672, 538.2846679392, 389.256347648], [469.9818115344, 297.9210815488, 500.1621094044, 385.6823730688], [463.92590330639996, 332.3036498944, 484.9605713088, 363.9384155136]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047840_crop.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four people, and a handbag.", "boxes_value": [[40.92590330639996, 30.242492672000026, 202.25170895040003, 150.1696777216], [154.20141600119996, 43.347106918400016, 202.25170895040003, 150.1696777216], [97.81176755520005, 37.3904418816, 122.82971188320005, 125.54882810880002], [76.76501464080002, 30.242492672000026, 115.28466793919995, 138.25634764799997], [46.98181153439998, 46.921081548799975, 77.16210940439998, 134.68237306880002], [40.92590330639996, 81.3036498944, 61.96057130880001, 112.93841551359998]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047847.jpg", "text": "Could you tell me more about the area in the snapshot ? Include the coordinates for each object you identify.", "boxes_value": [[374.39111328, 154.892639184, 518.843750016, 275.335449216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047847_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Include the coordinates for each object you identify.", "boxes_value": [[36.39111328000001, 30.89263918399999, 180.84375001599994, 151.33544921599997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047847.jpg", "text": "Could you tell me more about the area in the snapshot ? Include the coordinates for each object you identify. For your reference, objects involved in this region include four chairs, and a desk.", "boxes_value": [[374.39111328, 154.892639184, 518.843750016, 275.335449216], [374.39111328, 188.34899904, 454.68627929599995, 275.335449216], [386.986450176, 154.892639184, 455.86706540800003, 240.30468748799998], [412.57067872, 170.243225088, 518.843750016, 262.74011232], [472.79211424000005, 197.795471184, 560.172119168, 284.38830566400003], [456.260742208, 163.1583252, 535.768676736, 247.783142112]], "boxes_seq": [[0], [0], [1, 2, 4, 5], [3]]}, {"image_path": "objects365_v1_00047847_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Include the coordinates for each object you identify. For your reference, objects involved in this region include four chairs, and a desk.", "boxes_value": [[36.39111328000001, 30.89263918399999, 180.84375001599994, 151.33544921599997], [36.39111328000001, 64.34899904, 116.68627929599995, 151.33544921599997], [48.986450176000005, 30.89263918399999, 117.86706540800003, 116.30468748799998], [74.57067871999999, 46.243225088, 180.84375001599994, 138.74011231999998], [134.79211424000005, 73.79547118400001, 216, 160.38830566400003], [118.26074220800001, 39.15832520000001, 197.76867673599997, 123.78314211200001]], "boxes_seq": [[0], [0], [1, 2, 4, 5], [3]]}, {"image_path": "objects365_v1_00047848.jpg", "text": "Kindly describe what I should be seeing in the area of image . Give coordinates for the items you reference.", "boxes_value": [[433.0119629052, 186.7041015808, 570.8770751994, 303.9206542848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047848_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Give coordinates for the items you reference.", "boxes_value": [[35.011962905199994, 29.7041015808, 172.8770751994, 146.92065428479998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047848.jpg", "text": "Kindly describe what I should be seeing in the area of image . Give coordinates for the items you reference. For your reference, objects involved in this region include a hat, three street lights, and a moniter.", "boxes_value": [[433.0119629052, 186.7041015808, 570.8770751994, 303.9206542848], [511.8991699468, 257.747314432, 540.0917969057, 278.7603149312], [546.9204101424999, 186.7041015808, 570.8770751994, 303.9206542848], [496.7442627124, 196.1058349568, 514.7556152105, 276.694946304], [433.0119629052, 193.7966919168, 449.1759033314, 263.0709838848], [415.75122071370004, 176.6012573184, 457.09997555549995, 215.1226806784]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047848_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Give coordinates for the items you reference. For your reference, objects involved in this region include a hat, three street lights, and a moniter.", "boxes_value": [[35.011962905199994, 29.7041015808, 172.8770751994, 146.92065428479998], [113.89916994679999, 100.747314432, 142.0917969057, 121.76031493120001], [148.92041014249992, 29.7041015808, 172.8770751994, 146.92065428479998], [98.74426271239997, 39.10583495680001, 116.75561521049997, 119.69494630399998], [35.011962905199994, 36.7966919168, 51.17590333139998, 106.0709838848], [17.751220713700036, 19.601257318400002, 59.09997555549995, 58.1226806784]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047849.jpg", "text": "In the image , elaborate on the details found within the section . Provide the coordinates for all objects that you mention.", "boxes_value": [[467.5933837824, 68.9694213632, 734.0672607744, 384.2812499968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047849_crop.jpg", "text": "In the image , elaborate on the details found within the section . Provide the coordinates for all objects that you mention.", "boxes_value": [[67.5933837824, 68.9694213632, 334.0672607744, 384.2812499968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047849.jpg", "text": "In the image , elaborate on the details found within the section . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two lamps, two desks, and a chair.", "boxes_value": [[467.5933837824, 68.9694213632, 734.0672607744, 384.2812499968], [659.0686035456, 262.958435072, 745.9056396288, 407.344116224], [636.9942627072, 299.8276977664, 734.0672607744, 384.2812499968], [467.5933837824, 68.9694213632, 523.4338378752, 236.71667481600002], [534.9843750144, 273.1880493056, 603.2702636544, 344.9937744384], [583.6296387072, 273.6283569152, 637.2714843648, 332.1467895296]], "boxes_seq": [[0], [0], [1, 3], [2, 4], [5]]}, {"image_path": "objects365_v1_00047849_crop.jpg", "text": "In the image , elaborate on the details found within the section . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two lamps, two desks, and a chair.", "boxes_value": [[67.5933837824, 68.9694213632, 334.0672607744, 384.2812499968], [259.0686035456, 262.958435072, 345.90563962880003, 407.344116224], [236.99426270720005, 299.8276977664, 334.0672607744, 384.2812499968], [67.5933837824, 68.9694213632, 123.43383787519997, 236.71667481600002], [134.98437501440003, 273.1880493056, 203.27026365439997, 344.9937744384], [183.6296387072, 273.6283569152, 237.27148436480002, 332.1467895296]], "boxes_seq": [[0], [0], [1, 3], [2, 4], [5]]}, {"image_path": "objects365_v1_00047851.jpg", "text": "Could you tell me more about the area in the snapshot ? Specify the location of each mentioned object.", "boxes_value": [[275.248901376, 116.2480468992, 376.58312985599997, 159.7888794112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047851_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Specify the location of each mentioned object.", "boxes_value": [[26.248901375999992, 11.248046899200006, 127.58312985599997, 54.788879411200014]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047851.jpg", "text": "Could you tell me more about the area in the snapshot ? Specify the location of each mentioned object. For your reference, objects involved in this region include a mirror, three pictures, and a person.", "boxes_value": [[275.248901376, 116.2480468992, 376.58312985599997, 159.7888794112], [253.94189452800003, 91.4533691392, 326.26416015359996, 214.2422485504], [316.6307373312, 116.2480468992, 338.0661620736, 159.4539794944], [354.8127441408, 118.9274902528, 376.58312985599997, 159.7888794112], [275.248901376, 118.6973876736, 296.2924804608, 147.924621568], [322.0433349888, 125.0758056448, 332.4404296704, 154.880859392]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047851_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Specify the location of each mentioned object. For your reference, objects involved in this region include a mirror, three pictures, and a person.", "boxes_value": [[26.248901375999992, 11.248046899200006, 127.58312985599997, 54.788879411200014], [4.941894528000034, 0, 77.26416015359996, 65], [67.63073733120001, 11.248046899200006, 89.06616207360003, 54.45397949439999], [105.81274414080002, 13.927490252799998, 127.58312985599997, 54.788879411200014], [26.248901375999992, 13.697387673600005, 47.29248046079999, 42.92462156799999], [73.04333498879998, 20.0758056448, 83.44042967040002, 49.88085939199999]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047852.jpg", "text": "In the provided image , would you mind describing the selected area ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[216.49438479559998, 158.592163072, 628.2275390483, 219.9866943488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047852_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[103.49438479559998, 15.592163072000005, 515.2275390483, 76.9866943488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047852.jpg", "text": "In the provided image , would you mind describing the selected area ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include five helmets.", "boxes_value": [[216.49438479559998, 158.592163072, 628.2275390483, 219.9866943488], [216.49438479559998, 184.9041137664, 261.80938722990004, 211.2160034304], [292.9938965084, 166.8755493376, 336.3598633008, 191.725708032], [350.4903564566, 158.592163072, 389.47106936290004, 184.9041137664], [534.1866454982, 194.161987328, 574.6291503839, 219.9866943488], [585.8360595809, 161.5156860416, 628.2275390483, 205.8562011648]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047852_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include five helmets.", "boxes_value": [[103.49438479559998, 15.592163072000005, 515.2275390483, 76.9866943488], [103.49438479559998, 41.9041137664, 148.80938722990004, 68.21600343040001], [179.99389650839998, 23.875549337600006, 223.3598633008, 48.725708032], [237.4903564566, 15.592163072000005, 276.47106936290004, 41.9041137664], [421.18664549820005, 51.16198732800001, 461.62915038389997, 76.9866943488], [472.83605958090004, 18.51568604159999, 515.2275390483, 62.85620116480001]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047853.jpg", "text": "Please give me some details about the rectangle in the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[192.4192504832, 394.8422851789, 281.3461303808, 638.7452392289]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047853_crop.jpg", "text": "Please give me some details about the rectangle in the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[22.41925048319999, 61.8422851789, 111.34613038079999, 305.7452392289]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047853.jpg", "text": "Please give me some details about the rectangle in the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, a gloves, a belt, and two sneakers.", "boxes_value": [[192.4192504832, 394.8422851789, 281.3461303808, 638.7452392289], [192.061828608, 294.8850707808, 292.8010864128, 638.1574706701999], [254.8031005696, 394.8422851789, 273.1924438528, 423.805541994], [213.8469238272, 459.66491696559996, 254.765808128, 470.82470701709997], [233.603088384, 586.1822509712999, 281.3461303808, 610.1871337791999], [192.4192504832, 603.2768554618, 237.4896240128, 638.7452392289]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047853_crop.jpg", "text": "Please give me some details about the rectangle in the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, a gloves, a belt, and two sneakers.", "boxes_value": [[22.41925048319999, 61.8422851789, 111.34613038079999, 305.7452392289], [22.061828608000013, 0, 122.80108641279998, 305.1574706701999], [84.80310056959999, 61.8422851789, 103.19244385280001, 90.80554199400001], [43.84692382719999, 126.66491696559996, 84.765808128, 137.82470701709997], [63.60308838399999, 253.18225097129994, 111.34613038079999, 277.18713377919994], [22.41925048319999, 270.2768554618, 67.4896240128, 305.7452392289]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047854.jpg", "text": "Please give me some details about the rectangle in the image . Include the coordinates for each mentioned object.", "boxes_value": [[113.25622556159999, 135.9210815488, 181.39318848, 260.4011841024]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047854_crop.jpg", "text": "Please give me some details about the rectangle in the image . Include the coordinates for each mentioned object.", "boxes_value": [[17.25622556159999, 31.921081548800004, 85.39318847999999, 156.4011841024]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047854.jpg", "text": "Please give me some details about the rectangle in the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, a bracelet, and two gloves.", "boxes_value": [[113.25622556159999, 135.9210815488, 181.39318848, 260.4011841024], [122.49993899519998, 57.2201538048, 376.6937256192, 414.6995849728], [56.9414062848, 70.2081298944, 181.2551880192, 425.83215334400006], [162.45324710399998, 135.9210815488, 179.6220703488, 154.8067627008], [113.25622556159999, 157.7521362432, 174.44750976, 201.0707397632], [144.2630005248, 211.3269653504, 181.39318848, 260.4011841024]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047854_crop.jpg", "text": "Please give me some details about the rectangle in the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, a bracelet, and two gloves.", "boxes_value": [[17.25622556159999, 31.921081548800004, 85.39318847999999, 156.4011841024], [26.499938995199983, 0, 102, 187], [0, 0, 85.2551880192, 187], [66.45324710399998, 31.921081548800004, 83.62207034880001, 50.80676270079999], [17.25622556159999, 53.7521362432, 78.44750976, 97.07073976320001], [48.26300052479999, 107.3269653504, 85.39318847999999, 156.4011841024]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047855.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give coordinates for the items you reference.", "boxes_value": [[0, 227.15301512119999, 289.4299859968, 408.0711670196]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047855_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give coordinates for the items you reference.", "boxes_value": [[0, 46.153015121199985, 289.4299859968, 227.0711670196]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047855.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give coordinates for the items you reference. For your reference, objects involved in this region include a slide, a person, a hat, and two cars.", "boxes_value": [[0, 227.15301512119999, 289.4299859968, 408.0711670196], [78.5865478656, 227.15301512119999, 225.5382690304, 408.0711670196], [221.1398925824, 325.3471679682, 249.119140608, 382.4249267482], [266.1115997184, 316.40246829939997, 289.4299859968, 335.4811479698], [0, 318.4225463566, 26.236083968, 340.15478515899997], [91.7151489024, 325.7607422076, 148.4448242176, 351.9735107084]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047855_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give coordinates for the items you reference. For your reference, objects involved in this region include a slide, a person, a hat, and two cars.", "boxes_value": [[0, 46.153015121199985, 289.4299859968, 227.0711670196], [78.5865478656, 46.153015121199985, 225.5382690304, 227.0711670196], [221.1398925824, 144.34716796819998, 249.119140608, 201.4249267482], [266.1115997184, 135.40246829939997, 289.4299859968, 154.48114796980002], [0, 137.42254635659998, 26.236083968, 159.15478515899997], [91.7151489024, 144.7607422076, 148.4448242176, 170.9735107084]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047857.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[0.20294190730000003, 327.8326415872, 597.2565917807, 512.2401122816]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047857_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[0.20294190730000003, 46.83264158719999, 597.2565917807, 231]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047857.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include five people, and a handbag.", "boxes_value": [[0.20294190730000003, 327.8326415872, 597.2565917807, 512.2401122816], [149.00830080129998, 327.8326415872, 265.6933593447, 511.5633544704], [0.20294190730000003, 344.2924804608, 122.9725952188, 511.929870592], [0, 352.4409789952, 45.6813964866, 479.2564697088], [312.9847411893, 338.6721191424, 338.745849627, 357.5436401152], [459.9312743984, 350.498107904, 482.8192138393, 375.3666992128], [530.5178222765, 475.4187011584, 597.2565917807, 512.2401122816]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047857_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include five people, and a handbag.", "boxes_value": [[0.20294190730000003, 46.83264158719999, 597.2565917807, 231], [149.00830080129998, 46.83264158719999, 265.6933593447, 230.5633544704], [0.20294190730000003, 63.29248046079999, 122.9725952188, 230.929870592], [0, 71.4409789952, 45.6813964866, 198.2564697088], [312.9847411893, 57.67211914239999, 338.745849627, 76.54364011519999], [459.9312743984, 69.498107904, 482.8192138393, 94.36669921279997], [530.5178222765, 194.41870115839998, 597.2565917807, 231]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047860.jpg", "text": "Within the input image , what can be found in the region defined by ? Give coordinates for the items you reference.", "boxes_value": [[340.762573256, 125.1490478592, 458.7662353631, 213.699035648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047860_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Give coordinates for the items you reference.", "boxes_value": [[29.762573255999996, 22.149047859199996, 147.7662353631, 110.699035648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047860.jpg", "text": "Within the input image , what can be found in the region defined by ? Give coordinates for the items you reference. For your reference, objects involved in this region include two mirrors, and three people.", "boxes_value": [[340.762573256, 125.1490478592, 458.7662353631, 213.699035648], [396.1187744047, 95.9984741376, 475.505126991, 307.6954345472], [67.9984741486, 92.5409545728, 390.7203368915, 292.6614990336], [340.762573256, 125.1490478592, 363.41918941930004, 174.9938354688], [360.3983154683, 129.680358912, 383.9991454887, 164.9871215616], [445.3610839479, 135.7221679616, 458.7662353631, 213.699035648]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047860_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Give coordinates for the items you reference. For your reference, objects involved in this region include two mirrors, and three people.", "boxes_value": [[29.762573255999996, 22.149047859199996, 147.7662353631, 110.699035648], [85.11877440469999, 0, 164.505126991, 132], [0, 0, 79.72033689149998, 132], [29.762573255999996, 22.149047859199996, 52.41918941930004, 71.9938354688], [49.398315468299984, 26.680358912000003, 72.99914548869998, 61.98712156159999], [134.3610839479, 32.72216796160001, 147.7662353631, 110.699035648]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047861.jpg", "text": "Kindly share your observations about the rectangular region within . Please point out the objects and their coordinates.", "boxes_value": [[20.7321777408, 322.9623413248, 182.0359497216, 407.3679809536]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047861_crop.jpg", "text": "Kindly share your observations about the rectangular region within . Please point out the objects and their coordinates.", "boxes_value": [[20.7321777408, 21.96234132479998, 182.0359497216, 106.36798095360001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047861.jpg", "text": "Kindly share your observations about the rectangular region within . Please point out the objects and their coordinates. For your reference, objects involved in this region include a hat, two cups, a tea pot, and a stuffed toy.", "boxes_value": [[20.7321777408, 322.9623413248, 182.0359497216, 407.3679809536], [20.7321777408, 355.9341430784, 87.95269777920001, 407.3679809536], [98.5709839104, 325.931335424, 113.2179565056, 355.225280768], [105.3006591744, 322.9623413248, 118.95800778240002, 353.2459716608], [146.67968747519998, 327.5860595712, 182.0359497216, 361.1963501056], [111.55426022399999, 22.4767456256, 472.9415283456001, 511.4473266688]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047861_crop.jpg", "text": "Kindly share your observations about the rectangular region within . Please point out the objects and their coordinates. For your reference, objects involved in this region include a hat, two cups, a tea pot, and a stuffed toy.", "boxes_value": [[20.7321777408, 21.96234132479998, 182.0359497216, 106.36798095360001], [20.7321777408, 54.934143078399984, 87.95269777920001, 106.36798095360001], [98.5709839104, 24.931335423999997, 113.2179565056, 54.225280768000005], [105.3006591744, 21.96234132479998, 118.95800778240002, 52.24597166080002], [146.67968747519998, 26.586059571199996, 182.0359497216, 60.196350105600004], [111.55426022399999, 0, 222, 127]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047862.jpg", "text": "Tell me about the region of the image . Include the coordinates for each object you identify.", "boxes_value": [[195.6229247829, 35.7374877696, 460.30700686689994, 420.140136704]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047862_crop.jpg", "text": "Tell me about the region of the image . Include the coordinates for each object you identify.", "boxes_value": [[66.62292478289999, 35.7374877696, 331.30700686689994, 420.140136704]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047862.jpg", "text": "Tell me about the region of the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include five people, and two sneakers.", "boxes_value": [[195.6229247829, 35.7374877696, 460.30700686689994, 420.140136704], [431.2596435781, 188.1231078912, 478.13354492530004, 319.2434692608], [362.8489990487, 181.1553344512, 412.2567138426, 318.6100463616], [260.8664550604, 278.07043456, 293.1715087591, 321.7772216832], [336.5162353722, 68.3139648512, 676.1263428054, 420.140136704], [195.6229247829, 35.7374877696, 460.30700686689994, 420.140136704], [216.4205932341, 369.2846069248, 282.7198486452, 417.8477172736], [421.2302246431, 390.3989868032, 460.92529297949994, 418.692321792]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6, 7]]}, {"image_path": "objects365_v1_00047862_crop.jpg", "text": "Tell me about the region of the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include five people, and two sneakers.", "boxes_value": [[66.62292478289999, 35.7374877696, 331.30700686689994, 420.140136704], [302.2596435781, 188.1231078912, 349.13354492530004, 319.2434692608], [233.84899904870002, 181.1553344512, 283.2567138426, 318.6100463616], [131.86645506040003, 278.07043456, 164.1715087591, 321.7772216832], [207.51623537220001, 68.3139648512, 397, 420.140136704], [66.62292478289999, 35.7374877696, 331.30700686689994, 420.140136704], [87.42059323410001, 369.2846069248, 153.7198486452, 417.8477172736], [292.2302246431, 390.3989868032, 331.92529297949994, 418.692321792]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6, 7]]}, {"image_path": "objects365_v1_00047863.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Give coordinates for the items you reference.", "boxes_value": [[574.1627197247, 262.5830688256, 675.7667236072, 460.6802978304]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047863_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Give coordinates for the items you reference.", "boxes_value": [[26.16271972469997, 49.58306882559998, 127.76672360719999, 247.68029783039998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047863.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Give coordinates for the items you reference. For your reference, objects involved in this region include four cabinets, a coffee machine, and a stuffed toy.", "boxes_value": [[574.1627197247, 262.5830688256, 675.7667236072, 460.6802978304], [616.4913330223, 287.863525376, 658.1340332325001, 460.6802978304], [596.3641357627, 307.2967529472, 620.6556396630999, 410.0151367168], [656.0518799120999, 288.5575561728, 703.2468261794, 464.8445434368], [631.9934082349, 241.0018310656, 669.9583740196, 289.3208617984], [644.8796386672, 262.5830688256, 675.7667236072, 296.1870117376], [574.1627197247, 347.0935668736, 601.515136691, 397.2396240384]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047863_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Give coordinates for the items you reference. For your reference, objects involved in this region include four cabinets, a coffee machine, and a stuffed toy.", "boxes_value": [[26.16271972469997, 49.58306882559998, 127.76672360719999, 247.68029783039998], [68.49133302229995, 74.86352537599998, 110.13403323250009, 247.68029783039998], [48.364135762700016, 94.29675294719999, 72.65563966309992, 197.0151367168], [108.05187991209993, 75.55755617279999, 153, 251.8445434368], [83.99340823490002, 28.001831065599987, 121.9583740196, 76.32086179840002], [96.87963866719997, 49.58306882559998, 127.76672360719999, 83.18701173760002], [26.16271972469997, 134.09356687360003, 53.51513669099995, 184.2396240384]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047864.jpg", "text": "Can you generate a description for the selected region in the image ? Give coordinates for the items you reference.", "boxes_value": [[274.9840088064, 403.0275878963, 491.1745605632, 570.8326416099]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047864_crop.jpg", "text": "Can you generate a description for the selected region in the image ? Give coordinates for the items you reference.", "boxes_value": [[54.98400880640003, 42.02758789630002, 271.1745605632, 209.83264160989995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047864.jpg", "text": "Can you generate a description for the selected region in the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include three hats, a handbag, and a boots.", "boxes_value": [[274.9840088064, 403.0275878963, 491.1745605632, 570.8326416099], [274.9840088064, 422.2041015807, 319.7251587072, 440.1005859094], [407.6241455104, 403.0275878963, 447.2606811648, 422.1329345516], [470.9285888512, 418.7110595419, 491.1745605632, 440.6680908143], [427.55963136, 546.5903320198, 440.8929443328, 570.8326416099], [325.9780273664, 451.4637450847, 362.9579467776, 473.65173342180003]], "boxes_seq": [[0], [0], [1, 2, 5], [3], [4]]}, {"image_path": "objects365_v1_00047864_crop.jpg", "text": "Can you generate a description for the selected region in the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include three hats, a handbag, and a boots.", "boxes_value": [[54.98400880640003, 42.02758789630002, 271.1745605632, 209.83264160989995], [54.98400880640003, 61.20410158070001, 99.72515870720002, 79.10058590940002], [187.6241455104, 42.02758789630002, 227.26068116480002, 61.13293455159999], [250.9285888512, 57.7110595419, 271.1745605632, 79.6680908143], [207.55963136000003, 185.59033201980003, 220.89294433280003, 209.83264160989995], [105.9780273664, 90.46374508470001, 142.9579467776, 112.65173342180003]], "boxes_seq": [[0], [0], [1, 2, 5], [3], [4]]}, {"image_path": "objects365_v1_00047868.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Include the coordinates for each mentioned object.", "boxes_value": [[296.7075195392, 170.676757812, 414.0863036928, 666.7835693163]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047868_crop.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Include the coordinates for each mentioned object.", "boxes_value": [[29.707519539200007, 124.676757812, 147.08630369280002, 620.7835693163]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047868.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, a necklace, a glasses, and two leather shoes.", "boxes_value": [[296.7075195392, 170.676757812, 414.0863036928, 666.7835693163], [223.3701171712, 135.1486206288, 485.7811279360001, 665.4565429355], [337.2008056832, 252.8444824268, 380.9603881984, 292.57360842869997], [330.6104125952, 170.676757812, 381.4180297728, 183.9918212975], [351.109069824, 609.7021484562, 414.0863036928, 661.9598388556], [296.7075195392, 615.8658447327, 329.4020385792, 666.7835693163]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047868_crop.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, a necklace, a glasses, and two leather shoes.", "boxes_value": [[29.707519539200007, 124.676757812, 147.08630369280002, 620.7835693163], [0, 89.14862062879999, 176, 619.4565429355], [70.20080568319997, 206.8444824268, 113.96038819839998, 246.57360842869997], [63.61041259519999, 124.676757812, 114.41802977280003, 137.9918212975], [84.10906982400002, 563.7021484562, 147.08630369280002, 615.9598388556], [29.707519539200007, 569.8658447327, 62.402038579199996, 620.7835693163]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047869.jpg", "text": "Could you please provide a description of the rectangular area in ? Provide the coordinates for each element you describe.", "boxes_value": [[463.41650394420003, 167.5255737344, 757.9484862951999, 385.5613403136]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047869_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Provide the coordinates for each element you describe.", "boxes_value": [[74.41650394420003, 54.52557373440001, 368.9484862951999, 272.5613403136]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047869.jpg", "text": "Could you please provide a description of the rectangular area in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a cabinet, 13 barrels, and a fire truck.", "boxes_value": [[463.41650394420003, 167.5255737344, 757.9484862951999, 385.5613403136], [463.41650394420003, 167.5255737344, 757.9484862951999, 385.5613403136], [694.4931640661, 342.9829101568, 720.1818847526, 381.1948852736], [668.1622314087, 338.4873657344, 693.8509521721, 376.0571289088], [706.6953124769, 339.4506835968, 736.2374267215, 377.3415527424], [681.96997069, 335.9185180672, 706.6953124769, 373.8093872128], [641.5101318226, 334.6340942336, 668.4833984143, 373.1671752704], [616.1425781417, 331.4229736448, 641.1890869342, 367.0661010944], [652.749023451, 331.1018676736, 677.1533203495, 369.3138427904], [627.0603027645001, 328.8541259776, 652.106811557, 365.7816772608], [585.9582519262, 327.2485351424, 610.3625488247, 363.5338745344], [606.1882324185, 325.3218994176, 621.6014404531, 360.3228149248], [565.4073486627, 324.3585815552, 586.2794189318, 358.3961181696], [575.361694309, 320.505249024, 597.1971435546, 357.4328003072], [627.3809814551, 269.4434814464, 653.2729492261, 300.7033691648], [407.52075193269997, 138.9962768384, 767.4438476781, 346.4929809408]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], [15]]}, {"image_path": "objects365_v1_00047869_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a cabinet, 13 barrels, and a fire truck.", "boxes_value": [[74.41650394420003, 54.52557373440001, 368.9484862951999, 272.5613403136], [74.41650394420003, 54.52557373440001, 368.9484862951999, 272.5613403136], [305.4931640661, 229.98291015680002, 331.1818847526, 268.1948852736], [279.1622314087, 225.4873657344, 304.85095217210005, 263.0571289088], [317.69531247689997, 226.4506835968, 347.2374267215, 264.3415527424], [292.96997068999997, 222.9185180672, 317.69531247689997, 260.8093872128], [252.5101318226, 221.63409423360002, 279.48339841430004, 260.1671752704], [227.14257814170003, 218.42297364479998, 252.18908693419996, 254.06610109439998], [263.74902345099997, 218.1018676736, 288.15332034949995, 256.3138427904], [238.06030276450008, 215.8541259776, 263.106811557, 252.7816772608], [196.95825192619998, 214.2485351424, 221.36254882469996, 250.5338745344], [217.18823241849998, 212.32189941759998, 232.60144045310005, 247.3228149248], [176.40734866269997, 211.35858155519998, 197.27941893180002, 245.39611816960002], [186.36169430899997, 207.50524902400002, 208.19714355459996, 244.4328003072], [238.38098145510003, 156.44348144640003, 264.27294922609997, 187.7033691648], [18.520751932699966, 25.996276838400007, 378.44384767810004, 233.4929809408]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], [15]]}, {"image_path": "objects365_v1_00047871.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[454.7181396154, 171.0786743296, 567.8859863209, 449.9956054528]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047871_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[28.71813961539999, 70.0786743296, 141.88598632089997, 348.9956054528]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047871.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a flower, a vase, a desk, a chair, and a person.", "boxes_value": [[454.7181396154, 171.0786743296, 567.8859863209, 449.9956054528], [454.7181396154, 171.0786743296, 553.9555664041, 295.7958984192], [492.267456023, 263.6108398592, 521.0998535214, 289.7612304896], [512.4652099533, 287.4280395264, 567.8859863209, 449.9956054528], [300.5996093872, 263.7291259904, 559.6520996279, 498.7131347456], [367.68652345690003, 189.0051879936, 529.5103759669, 511.98962401279994]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047871_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a flower, a vase, a desk, a chair, and a person.", "boxes_value": [[28.71813961539999, 70.0786743296, 141.88598632089997, 348.9956054528], [28.71813961539999, 70.0786743296, 127.95556640409995, 194.7958984192], [66.26745602300002, 162.61083985919998, 95.09985352139995, 188.7612304896], [86.46520995330002, 186.42803952640003, 141.88598632089997, 348.9956054528], [0, 162.7291259904, 133.65209962790004, 397.7131347456], [0, 88.0051879936, 103.51037596690003, 410.98962401279994]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047872.jpg", "text": "Can you provide a description of the area in the image ? Provide the coordinates for each element you describe.", "boxes_value": [[0.220397952, 217.07397460800001, 185.85827635200002, 449.945983872]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047872_crop.jpg", "text": "Can you provide a description of the area in the image ? Provide the coordinates for each element you describe.", "boxes_value": [[0.220397952, 59.073974608000015, 185.85827635200002, 291.945983872]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047872.jpg", "text": "Can you provide a description of the area in the image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include an umbrella, three hats, and a cup.", "boxes_value": [[0.220397952, 217.07397460800001, 185.85827635200002, 449.945983872], [83.00311276800001, 217.07397460800001, 120.591552704, 297.341064432], [163.116516096, 295.186706544, 185.85827635200002, 308.78930664], [97.75390623999999, 306.7420044, 119.537109376, 328.876525872], [0.220397952, 393.98699952, 80.59851072000001, 449.945983872], [15.358093248, 375.73986816, 31.048156736000003, 399.06860352]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047872_crop.jpg", "text": "Can you provide a description of the area in the image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include an umbrella, three hats, and a cup.", "boxes_value": [[0.220397952, 59.073974608000015, 185.85827635200002, 291.945983872], [83.00311276800001, 59.073974608000015, 120.591552704, 139.341064432], [163.116516096, 137.186706544, 185.85827635200002, 150.78930664], [97.75390623999999, 148.74200439999998, 119.537109376, 170.876525872], [0.220397952, 235.98699951999998, 80.59851072000001, 291.945983872], [15.358093248, 217.73986816000001, 31.048156736000003, 241.06860352]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047873.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Please point out the objects and their coordinates.", "boxes_value": [[11.161560046000002, 62.9746093568, 138.5118408279, 128.815551744]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047873_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Please point out the objects and their coordinates.", "boxes_value": [[11.161560046000002, 16.974609356800002, 138.5118408279, 82.815551744]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047873.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Please point out the objects and their coordinates. For your reference, objects involved in this region include a cabinet, two pictures, and two people.", "boxes_value": [[11.161560046000002, 62.9746093568, 138.5118408279, 128.815551744], [0, 33.5194702336, 174.0313110143, 433.76312253440005], [85.6658325329, 69.9052734464, 138.5118408279, 128.815551744], [11.161560046000002, 62.9746093568, 69.2055664097, 128.815551744], [97.2380981686, 85.5700683776, 121.9913940721, 117.2745971712], [26.8778076322, 80.4837646336, 54.3437499794, 115.2400512512]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047873_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Please point out the objects and their coordinates. For your reference, objects involved in this region include a cabinet, two pictures, and two people.", "boxes_value": [[11.161560046000002, 16.974609356800002, 138.5118408279, 82.815551744], [0, 0, 170, 99], [85.6658325329, 23.905273446400003, 138.5118408279, 82.815551744], [11.161560046000002, 16.974609356800002, 69.2055664097, 82.815551744], [97.2380981686, 39.570068377599995, 121.9913940721, 71.2745971712], [26.8778076322, 34.4837646336, 54.3437499794, 69.2400512512]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047875.jpg", "text": "Share some details about the objects or environment within the bounding box in . Give coordinates for the items you reference.", "boxes_value": [[56.965270978, 105.0429687296, 162.81695553199998, 243.68969728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047875_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Give coordinates for the items you reference.", "boxes_value": [[26.965270978, 35.042968729600005, 132.81695553199998, 173.68969728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047875.jpg", "text": "Share some details about the objects or environment within the bounding box in . Give coordinates for the items you reference. For your reference, objects involved in this region include a cup, a bowl, two plates, and a bottle.", "boxes_value": [[56.965270978, 105.0429687296, 162.81695553199998, 243.68969728], [97.51080323599999, 196.2689208832, 114.05792236299999, 224.66229248], [62.880554198, 136.539794944, 125.87872312399999, 159.7496948224], [127.84265139799999, 143.8864745984, 162.81695553199998, 153.5435790848], [87.06799316600001, 236.2874756096, 141.445861788, 243.68969728], [56.965270978, 105.0429687296, 89.626709018, 131.8253784064]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047875_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Give coordinates for the items you reference. For your reference, objects involved in this region include a cup, a bowl, two plates, and a bottle.", "boxes_value": [[26.965270978, 35.042968729600005, 132.81695553199998, 173.68969728], [67.51080323599999, 126.2689208832, 84.05792236299999, 154.66229248], [32.880554198, 66.539794944, 95.87872312399999, 89.74969482239999], [97.84265139799999, 73.88647459840001, 132.81695553199998, 83.5435790848], [57.06799316600001, 166.2874756096, 111.445861788, 173.68969728], [26.965270978, 35.042968729600005, 59.626709018, 61.825378406400006]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047876.jpg", "text": "What does the area look like in the context of the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[84.164733884, 389.8964233216, 412.66333007900005, 501.8651733504]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047876_crop.jpg", "text": "What does the area look like in the context of the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[82.164733884, 28.896423321600025, 410.66333007900005, 140.86517335040003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047876.jpg", "text": "What does the area look like in the context of the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three flowers, and three vases.", "boxes_value": [[84.164733884, 389.8964233216, 412.66333007900005, 501.8651733504], [75.550720245, 382.243225088, 144.79376219, 433.2644653568], [166.295532249, 389.8964233216, 255.947143529, 446.7485961728], [320.167968736, 390.4529418752, 438.55798340200005, 456.6763915776], [84.164733884, 430.856384256, 119.52313234399999, 461.6755371008], [188.806457532, 443.1469116416, 224.88159178100003, 478.2663574016], [365.83740231499996, 453.1279297024, 412.66333007900005, 501.8651733504]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00047876_crop.jpg", "text": "What does the area look like in the context of the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three flowers, and three vases.", "boxes_value": [[82.164733884, 28.896423321600025, 410.66333007900005, 140.86517335040003], [73.550720245, 21.243225087999974, 142.79376219, 72.26446535679997], [164.295532249, 28.896423321600025, 253.947143529, 85.74859617279998], [318.167968736, 29.452941875199997, 436.55798340200005, 95.67639157759999], [82.164733884, 69.85638425600001, 117.52313234399999, 100.6755371008], [186.806457532, 82.14691164160001, 222.88159178100003, 117.26635740159998], [363.83740231499996, 92.12792970240002, 410.66333007900005, 140.86517335040003]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00047878.jpg", "text": "Kindly describe what I should be seeing in the area of image . Please point out the objects and their coordinates.", "boxes_value": [[343.20117186, 0, 533.64685056, 417.5709838922]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047878_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Please point out the objects and their coordinates.", "boxes_value": [[48.20117185999999, 0, 238.64685055999996, 417.5709838922]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047878.jpg", "text": "Kindly describe what I should be seeing in the area of image . Please point out the objects and their coordinates. For your reference, objects involved in this region include a vase, a cabinet, three pictures, and a person.", "boxes_value": [[343.20117186, 0, 533.64685056, 417.5709838922], [480.26318358000003, 234.20977781219997, 533.64685056, 417.5709838922], [286.84429932, 120.4794922014, 476.39489747999994, 419.89196779480005], [433.09826658, 37.031799334400006, 490.77978515999996, 96.24731444999999], [430.33703616, 0, 490.16613768, 29.3613891382], [343.20117186, 0, 381.0437622, 29.9750060826], [335.28564456, 34.160034176399996, 385.83709716000004, 103.1989135636]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047878_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Please point out the objects and their coordinates. For your reference, objects involved in this region include a vase, a cabinet, three pictures, and a person.", "boxes_value": [[48.20117185999999, 0, 238.64685055999996, 417.5709838922], [185.26318358000003, 234.20977781219997, 238.64685055999996, 417.5709838922], [0, 120.4794922014, 181.39489747999994, 419.89196779480005], [138.09826657999997, 37.031799334400006, 195.77978515999996, 96.24731444999999], [135.33703616000003, 0, 195.16613768000002, 29.3613891382], [48.20117185999999, 0, 86.0437622, 29.9750060826], [40.28564455999998, 34.160034176399996, 90.83709716000004, 103.1989135636]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047880.jpg", "text": "Please describe the region in the picture . Specify the location of each mentioned object.", "boxes_value": [[212.4692382906, 400.4641723392, 429.068542475, 469.6367187456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047880_crop.jpg", "text": "Please describe the region in the picture . Specify the location of each mentioned object.", "boxes_value": [[54.4692382906, 17.46417233919999, 271.068542475, 86.63671874559998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047880.jpg", "text": "Please describe the region in the picture . Specify the location of each mentioned object. For your reference, objects involved in this region include a carpet, four people, and a luggage.", "boxes_value": [[212.4692382906, 400.4641723392, 429.068542475, 469.6367187456], [176.3665161392, 448.1211547648, 306.4906616212, 466.6646728704], [212.4692382906, 403.6459350528, 232.4752807744, 466.1646728704], [405.86224366619996, 400.4641723392, 429.068542475, 469.6367187456], [406.01098631440004, 405.5219116032, 416.27532959, 457.5872802816], [354.8381957904, 408.3483276288, 367.7801513762, 455.6534424064], [386.2839355456, 441.4086303744, 408.0224609156, 469.0166625792]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047880_crop.jpg", "text": "Please describe the region in the picture . Specify the location of each mentioned object. For your reference, objects involved in this region include a carpet, four people, and a luggage.", "boxes_value": [[54.4692382906, 17.46417233919999, 271.068542475, 86.63671874559998], [18.3665161392, 65.1211547648, 148.4906616212, 83.66467287040001], [54.4692382906, 20.645935052799985, 74.4752807744, 83.16467287040001], [247.86224366619996, 17.46417233919999, 271.068542475, 86.63671874559998], [248.01098631440004, 22.52191160320001, 258.27532959, 74.58728028159999], [196.8381957904, 25.348327628800007, 209.78015137620002, 72.65344240640002], [228.2839355456, 58.40863037439999, 250.0224609156, 86.01666257919999]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047881.jpg", "text": "Within the input image , what can be found in the region defined by ? Provide the coordinates for each element you describe.", "boxes_value": [[271.338378906, 141.2212524544, 488.53540039639995, 353.9187011584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047881_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Provide the coordinates for each element you describe.", "boxes_value": [[54.338378906, 53.2212524544, 271.53540039639995, 265.9187011584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047881.jpg", "text": "Within the input image , what can be found in the region defined by ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include three chairs, a clock, and a bowl.", "boxes_value": [[271.338378906, 141.2212524544, 488.53540039639995, 353.9187011584], [271.338378906, 252.5109863424, 312.67932128020004, 292.2977905152], [332.1522827056, 244.2723999232, 401.15747071680005, 295.9597168128], [431.5396728678, 249.9118042112, 488.53540039639995, 353.9187011584], [347.2431640314, 141.2212524544, 390.30261230940005, 208.460327168], [288.9853515752, 285.2981567488, 333.00653074499996, 310.2841186304]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047881_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include three chairs, a clock, and a bowl.", "boxes_value": [[54.338378906, 53.2212524544, 271.53540039639995, 265.9187011584], [54.338378906, 164.5109863424, 95.67932128020004, 204.2977905152], [115.15228270559999, 156.2723999232, 184.15747071680005, 207.95971681280002], [214.5396728678, 161.9118042112, 271.53540039639995, 265.9187011584], [130.2431640314, 53.2212524544, 173.30261230940005, 120.46032716799999], [71.98535157520001, 197.2981567488, 116.00653074499996, 222.28411863039997]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047882.jpg", "text": "Can you discuss the entities within the region of image ? Please point out the objects and their coordinates.", "boxes_value": [[379.36828614999996, 52.5178222592, 632.3839111652, 270.5195922944]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047882_crop.jpg", "text": "Can you discuss the entities within the region of image ? Please point out the objects and their coordinates.", "boxes_value": [[63.36828614999996, 52.5178222592, 316.38391116519995, 270.5195922944]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047882.jpg", "text": "Can you discuss the entities within the region of image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a picture, a cabinet, a vase, a wild bird, and a moniter.", "boxes_value": [[379.36828614999996, 52.5178222592, 632.3839111652, 270.5195922944], [379.36828614999996, 52.5178222592, 488.5947265877, 143.4176635904], [501.71386722029996, 46.6531372032, 627.8696288805, 314.1586914304], [552.3787842032, 123.0767822336, 568.0019530958, 144.5028076032], [583.4929198937, 67.7872924672, 600.2739257538, 112.37225344], [527.7672118925, 185.6502685696, 632.3839111652, 270.5195922944]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047882_crop.jpg", "text": "Can you discuss the entities within the region of image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a picture, a cabinet, a vase, a wild bird, and a moniter.", "boxes_value": [[63.36828614999996, 52.5178222592, 316.38391116519995, 270.5195922944], [63.36828614999996, 52.5178222592, 172.5947265877, 143.4176635904], [185.71386722029996, 46.6531372032, 311.86962888050004, 314.1586914304], [236.37878420319998, 123.0767822336, 252.0019530958, 144.5028076032], [267.4929198937, 67.7872924672, 284.2739257538, 112.37225344], [211.76721189249997, 185.6502685696, 316.38391116519995, 270.5195922944]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047884.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object.", "boxes_value": [[341.41552737020004, 0.0346069504, 769.4229736440001, 176.1650390528]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047884_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object.", "boxes_value": [[107.41552737020004, 0.0346069504, 535.4229736440001, 176.1650390528]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047884.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three flags, two glasses, and a street lights.", "boxes_value": [[341.41552737020004, 0.0346069504, 769.4229736440001, 176.1650390528], [447.5546875314, 97.4107055616, 539.5339355798, 163.4573974528], [518.8690185621999, 84.444457984, 577.2169189378001, 174.397644032], [341.41552737020004, 0.0346069504, 419.0666503802, 29.0717163008], [729.8393554372, 159.5399780352, 769.4229736440001, 176.1650390528], [692.630737348, 144.893981952, 721.9226074286, 163.1024780288], [644.5662841506, 24.654724096, 711.3839111264, 184.6872558592]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00047884_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three flags, two glasses, and a street lights.", "boxes_value": [[107.41552737020004, 0.0346069504, 535.4229736440001, 176.1650390528], [213.55468753140002, 97.4107055616, 305.53393557979996, 163.4573974528], [284.8690185621999, 84.444457984, 343.2169189378001, 174.397644032], [107.41552737020004, 0.0346069504, 185.06665038019997, 29.0717163008], [495.83935543719997, 159.5399780352, 535.4229736440001, 176.1650390528], [458.630737348, 144.893981952, 487.92260742860003, 163.1024780288], [410.56628415060004, 24.654724096, 477.3839111264, 184.6872558592]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00047885.jpg", "text": "Could you describe the content of the bbox in the image ? Give coordinates for the items you reference.", "boxes_value": [[257.108398426, 138.865844736, 373.45458986610004, 235.79119872]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047885_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Give coordinates for the items you reference.", "boxes_value": [[29.108398426000008, 24.865844736000014, 145.45458986610004, 121.79119872000001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047885.jpg", "text": "Could you describe the content of the bbox in the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include two machinery vehicles, three street lights, and a truck.", "boxes_value": [[257.108398426, 138.865844736, 373.45458986610004, 235.79119872], [274.52764893529996, 180.1524658176, 321.446899391, 235.79119872], [257.108398426, 186.6506957824, 292.1462402344, 218.8122558464], [351.7628173527, 136.4472656384, 367.23400877349997, 177.766845696], [352.7899169873, 176.168701184, 373.45458986610004, 190.7006225408], [337.8665771464, 138.865844736, 347.6895752148, 205.9262695424], [319.35412598930003, 132.2542724608, 328.0436401382, 187.036010752]], "boxes_seq": [[0], [0], [1, 2], [3, 5, 6], [4]]}, {"image_path": "objects365_v1_00047885_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include two machinery vehicles, three street lights, and a truck.", "boxes_value": [[29.108398426000008, 24.865844736000014, 145.45458986610004, 121.79119872000001], [46.527648935299965, 66.15246581759999, 93.44689939099999, 121.79119872000001], [29.108398426000008, 72.65069578239999, 64.14624023440001, 104.8122558464], [123.76281735269998, 22.44726563840001, 139.23400877349997, 63.76684569599999], [124.78991698729999, 62.168701184000014, 145.45458986610004, 76.7006225408], [109.8665771464, 24.865844736000014, 119.68957521480002, 91.92626954240001], [91.35412598930003, 18.254272460799996, 100.04364013819998, 73.03601075200001]], "boxes_seq": [[0], [0], [1, 2], [3, 5, 6], [4]]}, {"image_path": "objects365_v1_00047888.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Please mention the objects and their locations.", "boxes_value": [[194.35430909939998, 319.7073974796, 337.6801147686, 505.51641847589997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047888_crop.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Please mention the objects and their locations.", "boxes_value": [[36.354309099399984, 46.70739747959999, 179.6801147686, 232.51641847589997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047888.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Please mention the objects and their locations. For your reference, objects involved in this region include a couch, a carpet, a desk, a power outlet, and a telephone.", "boxes_value": [[194.35430909939998, 319.7073974796, 337.6801147686, 505.51641847589997], [0, 301.80395506170004, 255.7988891418, 505.05883789530003], [194.35430909939998, 478.9053955023, 337.6801147686, 505.51641847589997], [190.34716799159997, 336.2795409948, 285.116760258, 460.2090453909], [304.1818237494, 340.94738768220003, 322.218139659, 363.7145385939], [208.7898559812, 319.7073974796, 228.3070068666, 352.6186523535]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047888_crop.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Please mention the objects and their locations. For your reference, objects involved in this region include a couch, a carpet, a desk, a power outlet, and a telephone.", "boxes_value": [[36.354309099399984, 46.70739747959999, 179.6801147686, 232.51641847589997], [0, 28.803955061700037, 97.7988891418, 232.05883789530003], [36.354309099399984, 205.9053955023, 179.6801147686, 232.51641847589997], [32.347167991599974, 63.279540994800016, 127.116760258, 187.20904539089997], [146.1818237494, 67.94738768220003, 164.21813965899997, 90.71453859389999], [50.78985598119999, 46.70739747959999, 70.3070068666, 79.61865235350001]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047890.jpg", "text": "Describe what can be found within the bounds of in the image . Include the coordinates for each mentioned object.", "boxes_value": [[194.0639648572, 93.3246459904, 663.8974609528, 377.365844736]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047890_crop.jpg", "text": "Describe what can be found within the bounds of in the image . Include the coordinates for each mentioned object.", "boxes_value": [[118.0639648572, 71.3246459904, 587.8974609528, 355.365844736]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047890.jpg", "text": "Describe what can be found within the bounds of in the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a lamp, two potted plants, a wine glass, and a plate.", "boxes_value": [[194.0639648572, 93.3246459904, 663.8974609528, 377.365844736], [326.81573488370003, 96.4697265664, 376.7343749901, 148.843383808], [194.0639648572, 93.3246459904, 253.03350827740002, 176.809570304], [437.23034670939995, 107.238830592, 505.47595217960003, 190.0611572224], [497.3101806668, 268.5721435648, 514.8283691217999, 301.9241333248], [578.7392577883, 355.0428466688, 663.8974609528, 377.365844736]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047890_crop.jpg", "text": "Describe what can be found within the bounds of in the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a lamp, two potted plants, a wine glass, and a plate.", "boxes_value": [[118.0639648572, 71.3246459904, 587.8974609528, 355.365844736], [250.81573488370003, 74.4697265664, 300.7343749901, 126.843383808], [118.0639648572, 71.3246459904, 177.03350827740002, 154.809570304], [361.23034670939995, 85.238830592, 429.47595217960003, 168.0611572224], [421.3101806668, 246.5721435648, 438.82836912179994, 279.9241333248], [502.73925778830005, 333.0428466688, 587.8974609528, 355.365844736]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047891.jpg", "text": "I'd like some information about the specific region in the image . Include the coordinates for each object you identify.", "boxes_value": [[26.083007784000003, 262.1136474624, 133.3591918622, 510.345581056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047891_crop.jpg", "text": "I'd like some information about the specific region in the image . Include the coordinates for each object you identify.", "boxes_value": [[26.083007784000003, 62.113647462400024, 133.3591918622, 310.345581056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047891.jpg", "text": "I'd like some information about the specific region in the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include three chairs, a desk, and two napkins.", "boxes_value": [[26.083007784000003, 262.1136474624, 133.3591918622, 510.345581056], [26.083007784000003, 384.9578247168, 133.3591918622, 510.345581056], [82.5075073596, 313.2081909248, 190.4802856332, 450.4381103616], [0.7630615474000001, 299.276184064, 70.6652831714, 501.2897949184], [47.677551244, 263.749633792, 115.944213881, 384.2612304896], [80.4630737352, 262.1136474624, 118.17474368180001, 279.4488525312], [88.0662231302, 274.8869628928, 117.87060545, 289.1808471552]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047891_crop.jpg", "text": "I'd like some information about the specific region in the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include three chairs, a desk, and two napkins.", "boxes_value": [[26.083007784000003, 62.113647462400024, 133.3591918622, 310.345581056], [26.083007784000003, 184.95782471680002, 133.3591918622, 310.345581056], [82.5075073596, 113.20819092480002, 160, 250.43811036160002], [0.7630615474000001, 99.276184064, 70.6652831714, 301.2897949184], [47.677551244, 63.749633792, 115.944213881, 184.2612304896], [80.4630737352, 62.113647462400024, 118.17474368180001, 79.44885253119998], [88.0662231302, 74.8869628928, 117.87060545, 89.18084715520001]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047892.jpg", "text": "I'd like a thorough description of the area in the image . Specify the location of each mentioned object.", "boxes_value": [[303.8835449088, 153.11578368, 401.03051758079994, 385.9679565312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047892_crop.jpg", "text": "I'd like a thorough description of the area in the image . Specify the location of each mentioned object.", "boxes_value": [[24.88354490879999, 59.11578367999999, 122.03051758079994, 291.9679565312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047892.jpg", "text": "I'd like a thorough description of the area in the image . Specify the location of each mentioned object. For your reference, objects involved in this region include a baseball, two people, a hat, and a belt.", "boxes_value": [[303.8835449088, 153.11578368, 401.03051758079994, 385.9679565312], [377.93652341760003, 147.8977050624, 396.8994140928, 165.9576416256], [303.8835449088, 153.11578368, 401.03051758079994, 385.9679565312], [254.4474487296, 130.9302368256, 641.9713135104, 493.6409301504], [335.596435584, 150.1655273472, 365.71472171519997, 171.1510009856], [327.8165283072, 243.3750610432, 373.2556152576, 256.5523681792]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047892_crop.jpg", "text": "I'd like a thorough description of the area in the image . Specify the location of each mentioned object. For your reference, objects involved in this region include a baseball, two people, a hat, and a belt.", "boxes_value": [[24.88354490879999, 59.11578367999999, 122.03051758079994, 291.9679565312], [98.93652341760003, 53.89770506240001, 117.89941409279999, 71.95764162559999], [24.88354490879999, 59.11578367999999, 122.03051758079994, 291.9679565312], [0, 36.93023682559999, 146, 350], [56.596435584000005, 56.1655273472, 86.71472171519997, 77.1510009856], [48.816528307199974, 149.3750610432, 94.25561525760003, 162.55236817920002]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047893.jpg", "text": "Detail the chosen region in the depicted scene . Give coordinates for the items you reference.", "boxes_value": [[651.4967041057, 357.0268554752, 736.4906005926999, 434.1563720704]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047893_crop.jpg", "text": "Detail the chosen region in the depicted scene . Give coordinates for the items you reference.", "boxes_value": [[21.49670410570002, 20.026855475200023, 106.49060059269993, 97.15637207039998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047893.jpg", "text": "Detail the chosen region in the depicted scene . Give coordinates for the items you reference. For your reference, objects involved in this region include a cabinet, and four people.", "boxes_value": [[651.4967041057, 357.0268554752, 736.4906005926999, 434.1563720704], [646.5897216422001, 245.0567016448, 769.1600342060999, 454.98803712], [717.8668213262, 366.4614867968, 736.4906005926999, 434.1563720704], [687.2092285179, 363.5963134976, 707.2656250241, 425.7995605504], [668.6878662094, 359.9738769408, 684.4055175759, 420.388610816], [651.4967041057, 357.0268554752, 668.6878662094, 414.0032958976]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047893_crop.jpg", "text": "Detail the chosen region in the depicted scene . Give coordinates for the items you reference. For your reference, objects involved in this region include a cabinet, and four people.", "boxes_value": [[21.49670410570002, 20.026855475200023, 106.49060059269993, 97.15637207039998], [16.58972164220006, 0, 127, 116], [87.86682132620001, 29.46148679679999, 106.49060059269993, 97.15637207039998], [57.20922851789999, 26.596313497600022, 77.26562502410002, 88.79956055039997], [38.6878662094, 22.973876940799983, 54.405517575900035, 83.38861081599998], [21.49670410570002, 20.026855475200023, 38.6878662094, 77.00329589760003]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047896.jpg", "text": "Please detail the contents of the chosen region in the visual input . Include the coordinates for each mentioned object.", "boxes_value": [[282.6295166114, 397.3217773568, 471.48242189719997, 434.5090942464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047896_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Include the coordinates for each mentioned object.", "boxes_value": [[47.62951661139999, 9.321777356799998, 236.48242189719997, 46.509094246400025]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047896.jpg", "text": "Please detail the contents of the chosen region in the visual input . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a tea pot, three cups, and a plate.", "boxes_value": [[282.6295166114, 397.3217773568, 471.48242189719997, 434.5090942464], [404.11853030500004, 401.2843628032, 448.01171873920003, 429.022460928], [450.75500488759997, 397.3217773568, 471.48242189719997, 428.1080322048], [385.2199707048, 404.3325195264, 404.42333986180006, 434.5090942464], [313.893493683, 407.9902953984, 335.8400878658, 422.926208512], [282.6295166114, 404.4641723392, 317.216308584, 420.3296508928]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047896_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a tea pot, three cups, and a plate.", "boxes_value": [[47.62951661139999, 9.321777356799998, 236.48242189719997, 46.509094246400025], [169.11853030500004, 13.284362803199997, 213.01171873920003, 41.02246092799999], [215.75500488759997, 9.321777356799998, 236.48242189719997, 40.1080322048], [150.2199707048, 16.332519526400006, 169.42333986180006, 46.509094246400025], [78.89349368299997, 19.990295398400008, 100.8400878658, 34.92620851200002], [47.62951661139999, 16.46417233919999, 82.21630858399999, 32.329650892799975]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047897.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[325.150390656, 131.9937133568, 420.86096190719996, 239.9786376704]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047897_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[24.150390656000013, 27.993713356799987, 119.86096190719996, 135.9786376704]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047897.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four benches, and two street lights.", "boxes_value": [[325.150390656, 131.9937133568, 420.86096190719996, 239.9786376704], [395.5651855488, 206.134582528, 420.86096190719996, 239.9786376704], [362.767944303, 203.6922607616, 378.99218748600003, 229.8602905088], [345.2547607776, 201.3531493888, 361.4041748016, 223.544921856], [325.150390656, 201.6827392512, 344.0462646546, 221.677307136], [368.8035888726, 127.998107904, 381.3933105648, 216.9299926528], [350.70361326899996, 131.9937133568, 362.708496072, 211.5010376192]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00047897_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four benches, and two street lights.", "boxes_value": [[24.150390656000013, 27.993713356799987, 119.86096190719996, 135.9786376704], [94.5651855488, 102.13458252800001, 119.86096190719996, 135.9786376704], [61.76794430299998, 99.69226076160001, 77.99218748600003, 125.8602905088], [44.254760777599984, 97.3531493888, 60.40417480159999, 119.544921856], [24.150390656000013, 97.68273925119999, 43.04626465460001, 117.677307136], [67.80358887260002, 23.998107903999994, 80.3933105648, 112.9299926528], [49.70361326899996, 27.993713356799987, 61.708496072, 107.50103761919999]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00047898.jpg", "text": "Help me grasp the context of the region within image . Provide the coordinates for all objects that you mention.", "boxes_value": [[549.6665038974, 204.789306624, 699.4724350150001, 465.6603718656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047898_crop.jpg", "text": "Help me grasp the context of the region within image . Provide the coordinates for all objects that you mention.", "boxes_value": [[37.66650389740005, 65.789306624, 187.4724350150001, 326.6603718656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047898.jpg", "text": "Help me grasp the context of the region within image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a slippers, a handbag, two bottles, and a laptop.", "boxes_value": [[549.6665038974, 204.789306624, 699.4724350150001, 465.6603718656], [543.0935058354, 370.9039917056, 574.1340332002001, 382.0544433664], [549.6665038974, 214.4578857472, 585.3919677614, 228.97857664], [687.911560305, 252.8546413056, 699.4724350150001, 279.4018350592], [644.6653253285999, 394.796491776, 675.2802342392, 465.6603718656], [610.5192870854, 204.789306624, 637.2149657926, 220.4926147584]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047898_crop.jpg", "text": "Help me grasp the context of the region within image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a slippers, a handbag, two bottles, and a laptop.", "boxes_value": [[37.66650389740005, 65.789306624, 187.4724350150001, 326.6603718656], [31.093505835400038, 231.9039917056, 62.134033200200065, 243.0544433664], [37.66650389740005, 75.45788574720001, 73.3919677614, 89.97857664], [175.91156030499997, 113.85464130560001, 187.4724350150001, 140.40183505919998], [132.66532532859992, 255.79649177599998, 163.2802342392, 326.6603718656], [98.51928708540004, 65.789306624, 125.21496579259997, 81.49261475840001]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047899.jpg", "text": "Can you share some insights about the rectangular region in the image ? Provide the coordinates for each element you describe.", "boxes_value": [[87.6270751744, 366.7456054815, 264.1452026368, 526.3245849364]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047899_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Provide the coordinates for each element you describe.", "boxes_value": [[44.627075174400005, 40.74560548149998, 221.14520263679998, 200.32458493640002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047899.jpg", "text": "Can you share some insights about the rectangular region in the image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two moniters, a keyboard, three speakers, and a remote.", "boxes_value": [[87.6270751744, 366.7456054815, 264.1452026368, 526.3245849364], [87.6270751744, 453.4655761779, 133.4558715904, 515.6441650447], [117.6693115392, 511.5444335892, 154.619567872, 526.3245849364], [96.390930176, 424.3480224245, 152.01544192, 455.5930175808], [209.517211904, 427.2670898649, 250.574646016, 453.1690673557], [94.8648681472, 366.7456054815, 183.6010742272, 430.0460205136], [189.1123657216, 377.0263671527, 261.8063354368, 447.6433105831], [236.6479492096, 461.70300292300004, 264.1452026368, 475.45166013939996]], "boxes_seq": [[0], [0], [1, 5], [2], [3, 4, 6], [7]]}, {"image_path": "objects365_v1_00047899_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two moniters, a keyboard, three speakers, and a remote.", "boxes_value": [[44.627075174400005, 40.74560548149998, 221.14520263679998, 200.32458493640002], [44.627075174400005, 127.4655761779, 90.4558715904, 189.6441650447], [74.6693115392, 185.54443358920003, 111.619567872, 200.32458493640002], [53.390930176, 98.34802242450002, 109.01544192, 129.5930175808], [166.517211904, 101.26708986490002, 207.574646016, 127.16906735570001], [51.8648681472, 40.74560548149998, 140.6010742272, 104.04602051360001], [146.1123657216, 51.026367152700004, 218.8063354368, 121.6433105831], [193.6479492096, 135.70300292300004, 221.14520263679998, 149.45166013939996]], "boxes_seq": [[0], [0], [1, 5], [2], [3, 4, 6], [7]]}, {"image_path": "objects365_v1_00047902.jpg", "text": "I need details about the area located within image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0.2155151177, 187.9534912, 145.12854006450002, 302.950561536]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047902_crop.jpg", "text": "I need details about the area located within image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0.2155151177, 28.953491200000002, 145.12854006450002, 143.950561536]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047902.jpg", "text": "I need details about the area located within image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a potted plant, a bench, and three people.", "boxes_value": [[0.2155151177, 187.9534912, 145.12854006450002, 302.950561536], [0.2155151177, 187.9534912, 103.47601318650001, 290.5435180544], [113.3109741365, 270.0593261568, 163.9813842835, 305.1063232512], [102.94665526589999, 256.8858642432, 145.12854006450002, 302.950561536], [96.5972290236, 256.2626953216, 135.5979004193, 300.7390136832], [5.0263061434, 255.5621948416, 43.346801744100006, 278.869506816]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047902_crop.jpg", "text": "I need details about the area located within image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a potted plant, a bench, and three people.", "boxes_value": [[0.2155151177, 28.953491200000002, 145.12854006450002, 143.950561536], [0.2155151177, 28.953491200000002, 103.47601318650001, 131.5435180544], [113.3109741365, 111.05932615680001, 163.9813842835, 146.10632325120002], [102.94665526589999, 97.88586424319999, 145.12854006450002, 143.950561536], [96.5972290236, 97.2626953216, 135.5979004193, 141.73901368320003], [5.0263061434, 96.56219484159999, 43.346801744100006, 119.86950681600001]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047905.jpg", "text": "Please share details about the rectangular region within the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[326.2574462976, 273.1452026368, 506.2609863168, 485.1729736192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047905_crop.jpg", "text": "Please share details about the rectangular region within the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[45.257446297599984, 53.14520263679998, 225.26098631679997, 265.1729736192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047905.jpg", "text": "Please share details about the rectangular region within the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two street lights, and three cars.", "boxes_value": [[326.2574462976, 273.1452026368, 506.2609863168, 485.1729736192], [326.2574462976, 273.1452026368, 370.0155028992, 437.9414062592], [277.47045895680003, 453.5696411136, 384.07617185280003, 496.3009033216], [401.65832517120003, 446.4477538816, 506.2609863168, 485.1729736192], [430.76123043840005, 397.662597632, 452.064575232, 417.9848632832], [467.079101568, 276.4781494272, 502.82446287360005, 435.5447997952]], "boxes_seq": [[0], [0], [1, 5], [2, 3, 4]]}, {"image_path": "objects365_v1_00047905_crop.jpg", "text": "Please share details about the rectangular region within the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two street lights, and three cars.", "boxes_value": [[45.257446297599984, 53.14520263679998, 225.26098631679997, 265.1729736192], [45.257446297599984, 53.14520263679998, 89.01550289919999, 217.9414062592], [0, 233.56964111360003, 103.07617185280003, 276.3009033216], [120.65832517120003, 226.44775388160002, 225.26098631679997, 265.1729736192], [149.76123043840005, 177.66259763199997, 171.06457523199998, 197.9848632832], [186.079101568, 56.47814942719998, 221.82446287360005, 215.5447997952]], "boxes_seq": [[0], [0], [1, 5], [2, 3, 4]]}, {"image_path": "objects365_v1_00047906.jpg", "text": "Please share details about the rectangular region within the image . Give coordinates for the items you reference.", "boxes_value": [[0.1113281325, 330.2792968704, 440.5819091622, 510.9851684352]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047906_crop.jpg", "text": "Please share details about the rectangular region within the image . Give coordinates for the items you reference.", "boxes_value": [[0.1113281325, 45.2792968704, 440.5819091622, 225.98516843520002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047906.jpg", "text": "Please share details about the rectangular region within the image . Give coordinates for the items you reference. For your reference, objects involved in this region include a potted plant, two people, a barrel, a van, and a bakset.", "boxes_value": [[0.1113281325, 330.2792968704, 440.5819091622, 510.9851684352], [375.4801025768, 375.7039794688, 419.1914062687, 425.37591552], [43.7018432838, 364.6027221504, 148.9785766508, 426.649902336], [73.7082519186, 353.4138793984, 130.6695556998, 423.5983886848], [317.66979982270004, 308.9946899456, 366.2911376943, 348.669738752], [0.1113281325, 330.2792968704, 440.5819091622, 510.9851684352], [201.512451166, 321.5171508736, 244.0687255695, 361.0480956928]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047906_crop.jpg", "text": "Please share details about the rectangular region within the image . Give coordinates for the items you reference. For your reference, objects involved in this region include a potted plant, two people, a barrel, a van, and a bakset.", "boxes_value": [[0.1113281325, 45.2792968704, 440.5819091622, 225.98516843520002], [375.4801025768, 90.70397946880001, 419.1914062687, 140.37591551999998], [43.7018432838, 79.60272215039998, 148.9785766508, 141.64990233600003], [73.7082519186, 68.41387939840001, 130.6695556998, 138.5983886848], [317.66979982270004, 23.994689945599987, 366.2911376943, 63.669738752], [0.1113281325, 45.2792968704, 440.5819091622, 225.98516843520002], [201.512451166, 36.517150873599974, 244.0687255695, 76.04809569280002]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047907.jpg", "text": "Tell me what you see in the area within the context of the image . Give coordinates for the items you reference.", "boxes_value": [[329.8386230784, 172.3751220736, 668.1236572416001, 449.550598144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047907_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Give coordinates for the items you reference.", "boxes_value": [[84.83862307840002, 69.37512207360001, 423.1236572416001, 346.550598144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047907.jpg", "text": "Tell me what you see in the area within the context of the image . Give coordinates for the items you reference. For your reference, objects involved in this region include a person, two gloves, and two boots.", "boxes_value": [[329.8386230784, 172.3751220736, 668.1236572416001, 449.550598144], [328.97473144319997, 16.252685568, 676.9313965056, 462.6353759744], [329.8386230784, 231.3021850624, 392.69421388800004, 308.9985961984], [426.30444334079993, 172.3751220736, 490.906005888, 239.1591186432], [428.923461888, 363.1242065408, 514.4768066304, 433.4002075136], [593.9191894272001, 373.1636352512, 668.1236572416001, 449.550598144]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047907_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Give coordinates for the items you reference. For your reference, objects involved in this region include a person, two gloves, and two boots.", "boxes_value": [[84.83862307840002, 69.37512207360001, 423.1236572416001, 346.550598144], [83.97473144319997, 0, 431.9313965056, 359.6353759744], [84.83862307840002, 128.3021850624, 147.69421388800004, 205.99859619839998], [181.30444334079993, 69.37512207360001, 245.90600588799998, 136.1591186432], [183.92346188800002, 260.1242065408, 269.4768066304, 330.4002075136], [348.9191894272001, 270.1636352512, 423.1236572416001, 346.550598144]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047909.jpg", "text": "What information can you give me about the coordinates in image ? Please point out the objects and their coordinates.", "boxes_value": [[232.06304931840003, 176.6123046912, 377.979858432, 255.3148803584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047909_crop.jpg", "text": "What information can you give me about the coordinates in image ? Please point out the objects and their coordinates.", "boxes_value": [[37.06304931840003, 20.612304691199995, 182.97985843200001, 99.31488035839999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047909.jpg", "text": "What information can you give me about the coordinates in image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two paddles, three people, and a boat.", "boxes_value": [[232.06304931840003, 176.6123046912, 377.979858432, 255.3148803584], [232.06304931840003, 182.8778686464, 270.9736328448, 216.1763305472], [340.8713379072, 216.3508911104, 377.979858432, 255.3148803584], [307.3376464896, 189.368041984, 345.2298584064, 249.9579467776], [308.27563476480003, 174.7364502016, 334.1623534848, 213.3789062656], [338.4768066048, 176.6123046912, 372.0544433664, 236.639404288], [186.4357910016, 206.4830322176, 416.4696044544, 281.996948224]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047909_crop.jpg", "text": "What information can you give me about the coordinates in image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two paddles, three people, and a boat.", "boxes_value": [[37.06304931840003, 20.612304691199995, 182.97985843200001, 99.31488035839999], [37.06304931840003, 26.877868646400003, 75.97363284480002, 60.176330547199996], [145.87133790719997, 60.3508911104, 182.97985843200001, 99.31488035839999], [112.33764648959999, 33.368041984, 150.2298584064, 93.9579467776], [113.27563476480003, 18.736450201600007, 139.1623534848, 57.378906265599994], [143.4768066048, 20.612304691199995, 177.0544433664, 80.63940428800001], [0, 50.4830322176, 219, 118]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00047910.jpg", "text": "Tell me about the region of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[70.4972534143, 428.8914795008, 240.8758544746, 512.5537109504]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047910_crop.jpg", "text": "Tell me about the region of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[43.497253414300005, 21.891479500800017, 213.8758544746, 105]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047910.jpg", "text": "Tell me about the region of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a bottle, three cups, two bowls, and a plate.", "boxes_value": [[70.4972534143, 428.8914795008, 240.8758544746, 512.5537109504], [217.6170043707, 474.2044677632, 240.8758544746, 512.4484863488], [161.9843139346, 501.9334716928, 197.186950666, 511.991394048], [103.5228881595, 472.31860352, 131.4963989515, 512.3156738048], [166.51330564879999, 414.147705088, 182.2186278987, 445.8788452352], [125.9680175957, 424.7247924736, 186.0648193221, 438.0261840896], [92.63427731590001, 428.8914795008, 124.68591308110001, 440.4301147648], [70.4972534143, 497.792663552, 155.2541503668, 512.5537109504]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6], [7]]}, {"image_path": "objects365_v1_00047910_crop.jpg", "text": "Tell me about the region of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a bottle, three cups, two bowls, and a plate.", "boxes_value": [[43.497253414300005, 21.891479500800017, 213.8758544746, 105], [190.6170043707, 67.2044677632, 213.8758544746, 105], [134.9843139346, 94.93347169280003, 170.186950666, 104.99139404800002], [76.5228881595, 65.31860352000001, 104.4963989515, 105], [139.51330564879999, 7.147705088000009, 155.2186278987, 38.878845235200004], [98.9680175957, 17.72479247360002, 159.0648193221, 31.026184089600008], [65.63427731590001, 21.891479500800017, 97.68591308110001, 33.43011476480001], [43.497253414300005, 90.79266355200002, 128.2541503668, 105]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6], [7]]}, {"image_path": "objects365_v1_00047911.jpg", "text": "What details can you provide about the region in the snapshot ? Include the coordinates for each object you identify.", "boxes_value": [[139.886474637, 191.2488403092, 418.73852540660005, 292.121093766]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047911_crop.jpg", "text": "What details can you provide about the region in the snapshot ? Include the coordinates for each object you identify.", "boxes_value": [[69.88647463699999, 25.248840309200006, 348.73852540660005, 126.121093766]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047911.jpg", "text": "What details can you provide about the region in the snapshot ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two desks, a chair, a picture, and a globe.", "boxes_value": [[139.886474637, 191.2488403092, 418.73852540660005, 292.121093766], [210.82550049960003, 220.14556885439998, 420.1976318526, 339.23187254280003], [226.0631713606, 191.2488403092, 285.8808593438, 233.84155272959998], [140.4375000238, 223.6685790852, 270.9000244384, 292.121093766], [139.886474637, 194.630249004, 168.53302004839998, 226.38305663879999], [384.39611814520003, 197.2225952328, 418.73852540660005, 230.9603881836]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5]]}, {"image_path": "objects365_v1_00047911_crop.jpg", "text": "What details can you provide about the region in the snapshot ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two desks, a chair, a picture, and a globe.", "boxes_value": [[69.88647463699999, 25.248840309200006, 348.73852540660005, 126.121093766], [140.82550049960003, 54.14556885439998, 350.1976318526, 151], [156.0631713606, 25.248840309200006, 215.8808593438, 67.84155272959998], [70.4375000238, 57.66857908520001, 200.9000244384, 126.121093766], [69.88647463699999, 28.630249004000007, 98.53302004839998, 60.383056638799985], [314.39611814520003, 31.22259523279999, 348.73852540660005, 64.9603881836]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5]]}, {"image_path": "objects365_v1_00047913.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[387.9091186688, 376.4248047156, 494.9091796992, 444.12548830760005]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047913_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[26.909118668799977, 17.424804715599976, 133.90917969920002, 85.12548830760005]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047913.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, a barrel, a bottle, and two cups.", "boxes_value": [[387.9091186688, 376.4248047156, 494.9091796992, 444.12548830760005], [133.6885986304, 174.5383300856, 433.2828979712, 675.9552002272], [452.4315795968, 381.24804687159997, 494.9091796992, 415.960937474], [387.9091186688, 376.4248047156, 407.9824218624, 434.9720458872], [427.0432739328, 409.98559571400006, 456.9534912, 444.12548830760005], [450.140991232, 382.2246093472, 493.9830322176, 418.35363768120004]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047913_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, a barrel, a bottle, and two cups.", "boxes_value": [[26.909118668799977, 17.424804715599976, 133.90917969920002, 85.12548830760005], [0, 0, 72.28289797119999, 102], [91.43157959680002, 22.248046871599968, 133.90917969920002, 56.96093747399999], [26.909118668799977, 17.424804715599976, 46.982421862399974, 75.97204588720001], [66.04327393279999, 50.985595714000056, 95.95349119999997, 85.12548830760005], [89.14099123199998, 23.224609347199987, 132.9830322176, 59.35363768120004]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047914.jpg", "text": "Please elucidate the area of the image . Include the coordinates for each mentioned object.", "boxes_value": [[242.3493042, 113.7968750025, 500.30578614999996, 572.5963134792]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047914_crop.jpg", "text": "Please elucidate the area of the image . Include the coordinates for each mentioned object.", "boxes_value": [[65.3493042, 113.7968750025, 323, 572.5963134792]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047914.jpg", "text": "Please elucidate the area of the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a desk, a person, a tie, a pen, a cup, and a microphone.", "boxes_value": [[242.3493042, 113.7968750025, 500.30578614999996, 572.5963134792], [0.43255615000000003, 365.56726075089995, 499.4697876, 665.0439453281], [449.27752685, 249.3774414075, 499.975708, 383.3656006068], [389.7414551, 113.7968750025, 406.32769774999997, 155.6394653089], [207.40777590000002, 438.7427978347, 283.2860718, 446.1510009518], [472.16900634999996, 466.9392089947, 500.30578614999996, 547.2652588148], [242.3493042, 283.4691161929, 449.07055665, 572.5963134792]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047914_crop.jpg", "text": "Please elucidate the area of the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a desk, a person, a tie, a pen, a cup, and a microphone.", "boxes_value": [[65.3493042, 113.7968750025, 323, 572.5963134792], [0, 365.56726075089995, 322.4697876, 665.0439453281], [272.27752685, 249.3774414075, 322.975708, 383.3656006068], [212.7414551, 113.7968750025, 229.32769774999997, 155.6394653089], [30.40777590000002, 438.7427978347, 106.2860718, 446.1510009518], [295.16900634999996, 466.9392089947, 323, 547.2652588148], [65.3493042, 283.4691161929, 272.07055665, 572.5963134792]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047915.jpg", "text": "Can you divulge the contents of the area within the given image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[554.1423339897, 26.5233764864, 771.8193359175, 358.9552002048]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047915_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[55.14233398969998, 26.5233764864, 272, 358.9552002048]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047915.jpg", "text": "Can you divulge the contents of the area within the given image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a gloves, three street lights, and a bus.", "boxes_value": [[554.1423339897, 26.5233764864, 771.8193359175, 358.9552002048], [599.0512695183, 324.7100219904, 630.2924804937, 358.9552002048], [554.1423339897, 78.9328003072, 566.6530762053, 129.828674304], [743.6180420147999, 26.5233764864, 771.8193359175, 252.1340332032], [556.5491943732001, 130.7638550016, 573.9400635087001, 228.05847168], [564.5350341951, 166.6098632704, 734.4337158459, 234.9837036032]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047915_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a gloves, three street lights, and a bus.", "boxes_value": [[55.14233398969998, 26.5233764864, 272, 358.9552002048], [100.05126951830005, 324.7100219904, 131.29248049370005, 358.9552002048], [55.14233398969998, 78.9328003072, 67.65307620529995, 129.828674304], [244.61804201479993, 26.5233764864, 272, 252.1340332032], [57.549194373200066, 130.7638550016, 74.94006350870006, 228.05847168], [65.53503419510002, 166.6098632704, 235.4337158459, 234.9837036032]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00047916.jpg", "text": "Please, can you help me understand what's inside the region in image ? Include the coordinates for each mentioned object.", "boxes_value": [[6.6467895424000005, 35.5162353664, 489.32312011519997, 330.9896240128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047916_crop.jpg", "text": "Please, can you help me understand what's inside the region in image ? Include the coordinates for each mentioned object.", "boxes_value": [[6.6467895424000005, 35.5162353664, 489.32312011519997, 330.9896240128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047916.jpg", "text": "Please, can you help me understand what's inside the region in image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a chair, a lamp, a person, and two moniters.", "boxes_value": [[6.6467895424000005, 35.5162353664, 489.32312011519997, 330.9896240128], [453.3920898304, 280.516479488, 489.32312011519997, 330.9896240128], [6.6467895424000005, 35.5162353664, 229.1450195328, 192.8591919104], [371.7121581824, 258.0718383616, 385.0410156544, 288.3645629952], [178.6394043392, 261.6823119872, 200.0828857216, 282.9155884032], [115.5701904, 248.64801024, 130.7067870976, 272.404052736]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047916_crop.jpg", "text": "Please, can you help me understand what's inside the region in image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a chair, a lamp, a person, and two moniters.", "boxes_value": [[6.6467895424000005, 35.5162353664, 489.32312011519997, 330.9896240128], [453.3920898304, 280.516479488, 489.32312011519997, 330.9896240128], [6.6467895424000005, 35.5162353664, 229.1450195328, 192.8591919104], [371.7121581824, 258.0718383616, 385.0410156544, 288.3645629952], [178.6394043392, 261.6823119872, 200.0828857216, 282.9155884032], [115.5701904, 248.64801024, 130.7067870976, 272.404052736]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047917.jpg", "text": "Please describe the section of the picture defined by the bbox . Please point out the objects and their coordinates.", "boxes_value": [[385.74182127520004, 201.743652352, 585.7093505592001, 336.2166137856]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047917_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Please point out the objects and their coordinates.", "boxes_value": [[50.74182127520004, 33.743652352, 250.70935055920006, 168.21661378559998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047917.jpg", "text": "Please describe the section of the picture defined by the bbox . Please point out the objects and their coordinates. For your reference, objects involved in this region include a baseball bat, a baseball, a person, a helmet, and a gloves.", "boxes_value": [[385.74182127520004, 201.743652352, 585.7093505592001, 336.2166137856], [385.74182127520004, 201.743652352, 585.7093505592001, 336.2166137856], [379.96875, 245.0417480704, 412.218383764, 268.333190912], [373.8970947552, 234.490966784, 612.185913048, 511.7978515456], [493.9616699244, 233.1891479552, 564.9956054872, 310.7688598528], [543.1762695652, 303.7382202368, 585.3602294624, 351.0133056512]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047917_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Please point out the objects and their coordinates. For your reference, objects involved in this region include a baseball bat, a baseball, a person, a helmet, and a gloves.", "boxes_value": [[50.74182127520004, 33.743652352, 250.70935055920006, 168.21661378559998], [50.74182127520004, 33.743652352, 250.70935055920006, 168.21661378559998], [44.96875, 77.04174807039999, 77.21838376400001, 100.33319091200002], [38.89709475519999, 66.490966784, 277.185913048, 201], [158.96166992439998, 65.18914795520001, 229.9956054872, 142.7688598528], [208.17626956519996, 135.73822023679998, 250.36022946239996, 183.01330565120003]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047918.jpg", "text": "Please provide details for the area marked as in this photographic . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[393.12023928400004, 64.5791015424, 582.260375966, 482.54443361280005]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047918_crop.jpg", "text": "Please provide details for the area marked as in this photographic . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[48.120239284000036, 64.5791015424, 237.26037596599997, 482.54443361280005]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047918.jpg", "text": "Please provide details for the area marked as in this photographic . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, a sneakers, a handbag, and a hat.", "boxes_value": [[393.12023928400004, 64.5791015424, 582.260375966, 482.54443361280005], [393.12023928400004, 64.8598022656, 582.260375966, 482.54443361280005], [329.08837887, 82.5916748288, 481.77966311200004, 484.5146484224], [522.665527358, 459.026977536, 583.847534182, 482.6654663168], [382.90441896, 290.121459968, 415.572509798, 334.5941772288], [543.527954122, 64.5791015424, 569.302856456, 79.7217406976]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047918_crop.jpg", "text": "Please provide details for the area marked as in this photographic . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, a sneakers, a handbag, and a hat.", "boxes_value": [[48.120239284000036, 64.5791015424, 237.26037596599997, 482.54443361280005], [48.120239284000036, 64.8598022656, 237.26037596599997, 482.54443361280005], [0, 82.5916748288, 136.77966311200004, 484.5146484224], [177.66552735799996, 459.026977536, 238.847534182, 482.6654663168], [37.90441895999999, 290.121459968, 70.572509798, 334.5941772288], [198.52795412199998, 64.5791015424, 224.30285645599997, 79.7217406976]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047920.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Provide the coordinates for each element you describe.", "boxes_value": [[0.003423808, 97.9958496, 547.616821312, 435.51850632]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047920_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Provide the coordinates for each element you describe.", "boxes_value": [[0.003423808, 84.9958496, 547.616821312, 422.51850632]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047920.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include three people, two leather shoes, a glasses, and a moniter.", "boxes_value": [[0.003423808, 97.9958496, 547.616821312, 435.51850632], [463.43420409600003, 181.81848144, 547.616821312, 318.46826169599996], [422.60888672, 189.47021486399998, 440.85522464, 212.154785136], [181.012390144, 48.362670912, 359.34716799999995, 230.964111312], [0.003423808, 176.53159684800002, 207.63006009600002, 327.334101072], [0.549809664, 325.148557536, 160.094488128, 435.51850632], [219.3773376, 139.416365136, 288.690289856, 163.769564592], [465.911987328, 97.9958496, 504.04602048000004, 129.210998544]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6], [7]]}, {"image_path": "objects365_v1_00047920_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include three people, two leather shoes, a glasses, and a moniter.", "boxes_value": [[0.003423808, 84.9958496, 547.616821312, 422.51850632], [463.43420409600003, 168.81848144, 547.616821312, 305.46826169599996], [422.60888672, 176.47021486399998, 440.85522464, 199.154785136], [181.012390144, 35.362670912, 359.34716799999995, 217.964111312], [0.003423808, 163.53159684800002, 207.63006009600002, 314.334101072], [0.549809664, 312.148557536, 160.094488128, 422.51850632], [219.3773376, 126.416365136, 288.690289856, 150.769564592], [465.911987328, 84.9958496, 504.04602048000004, 116.210998544]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6], [7]]}, {"image_path": "objects365_v1_00047921.jpg", "text": "What does the selected region in the image encompass? Include the coordinates for each mentioned object.", "boxes_value": [[50.6200561695, 32.0612792832, 246.01062012350002, 444.1636439552]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047921_crop.jpg", "text": "What does the selected region in the image encompass? Include the coordinates for each mentioned object.", "boxes_value": [[49.6200561695, 32.0612792832, 245.01062012350002, 444.1636439552]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047921.jpg", "text": "What does the selected region in the image encompass? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two lamps, a cabinet, a person, and a handbag.", "boxes_value": [[50.6200561695, 32.0612792832, 246.01062012350002, 444.1636439552], [229.7136230579, 65.7922363392, 246.01062012350002, 103.6922607616], [147.8497314214, 32.0612792832, 178.169677696, 85.1212768768], [50.6200561695, 147.314514176, 184.2694091978, 337.0966186496], [70.9144286775, 245.4290771456, 214.17700198109998, 511.9943847424], [150.6611402804, 357.7820776448, 240.06304110870002, 444.1636439552]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047921_crop.jpg", "text": "What does the selected region in the image encompass? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two lamps, a cabinet, a person, and a handbag.", "boxes_value": [[49.6200561695, 32.0612792832, 245.01062012350002, 444.1636439552], [228.7136230579, 65.7922363392, 245.01062012350002, 103.6922607616], [146.8497314214, 32.0612792832, 177.169677696, 85.1212768768], [49.6200561695, 147.314514176, 183.2694091978, 337.0966186496], [69.9144286775, 245.4290771456, 213.17700198109998, 511.9943847424], [149.6611402804, 357.7820776448, 239.06304110870002, 444.1636439552]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047922.jpg", "text": "In the submitted image , please give a synopsis of the area . Provide the coordinates for each element you describe.", "boxes_value": [[82.89318850059999, 124.6948242432, 200.96252439719999, 375.6463012864]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047922_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Provide the coordinates for each element you describe.", "boxes_value": [[29.89318850059999, 63.6948242432, 147.96252439719999, 314.6463012864]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047922.jpg", "text": "In the submitted image , please give a synopsis of the area . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, two sneakers, and a hat.", "boxes_value": [[82.89318850059999, 124.6948242432, 200.96252439719999, 375.6463012864], [107.921203601, 124.669006336, 243.0646362301, 289.8443603456], [82.4774780235, 168.882629376, 327.7379150635, 376.1860351488], [101.8298950112, 342.7700805632, 134.7061157202, 375.6463012864], [82.89318850059999, 339.0879516672, 116.2954101852, 375.6463012864], [147.03912355769998, 124.6948242432, 200.96252439719999, 172.4112548864]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047922_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, two sneakers, and a hat.", "boxes_value": [[29.89318850059999, 63.6948242432, 147.96252439719999, 314.6463012864], [54.921203601, 63.669006335999995, 177, 228.84436034560002], [29.477478023499998, 107.88262937600001, 177, 315.1860351488], [48.829895011199994, 281.7700805632, 81.70611572019999, 314.6463012864], [29.89318850059999, 278.0879516672, 63.2954101852, 314.6463012864], [94.03912355769998, 63.6948242432, 147.96252439719999, 111.4112548864]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047923.jpg", "text": "What does the selected region in the image encompass? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[420.43273927679996, 140.3480224768, 767.9205321984, 510.7461548032]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047923_crop.jpg", "text": "What does the selected region in the image encompass? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[87.43273927679996, 93.3480224768, 434.9205321984, 463.7461548032]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047923.jpg", "text": "What does the selected region in the image encompass? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a couch, two pillows, a lamp, and a bottle.", "boxes_value": [[420.43273927679996, 140.3480224768, 767.9205321984, 510.7461548032], [305.0084228352, 238.91760256, 767.7445068288, 509.97644042239995], [462.0903320064, 324.4030761472, 622.8289794816001, 473.0507812352], [594.3796386816, 335.07159424, 767.9205321984, 510.7461548032], [420.43273927679996, 140.3480224768, 489.839965824, 269.9405517824], [479.3126220288, 234.2662963712, 497.52685547519997, 257.5566406144]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047923_crop.jpg", "text": "What does the selected region in the image encompass? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a couch, two pillows, a lamp, and a bottle.", "boxes_value": [[87.43273927679996, 93.3480224768, 434.9205321984, 463.7461548032], [0, 191.91760256, 434.7445068288, 462.97644042239995], [129.0903320064, 277.4030761472, 289.8289794816001, 426.0507812352], [261.3796386816, 288.07159424, 434.9205321984, 463.7461548032], [87.43273927679996, 93.3480224768, 156.839965824, 222.9405517824], [146.31262202879998, 187.2662963712, 164.52685547519997, 210.5566406144]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047925.jpg", "text": "What sort of things can be seen in the region of the photo ? Include the coordinates for each mentioned object.", "boxes_value": [[630.3986816256, 377.89733888, 710.7547607040001, 510.4540405248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047925_crop.jpg", "text": "What sort of things can be seen in the region of the photo ? Include the coordinates for each mentioned object.", "boxes_value": [[20.398681625600034, 33.89733888000001, 100.75476070400009, 166.4540405248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047925.jpg", "text": "What sort of things can be seen in the region of the photo ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two bottles, and three people.", "boxes_value": [[630.3986816256, 377.89733888, 710.7547607040001, 510.4540405248], [671.061157248, 404.8120727552, 710.7547607040001, 431.0098266624], [642.7977295104, 377.89733888, 680.1258544896, 401.519104], [599.8176269568, 378.401123072, 678.6361083648, 476.795410176], [622.9996337664, 402.6133423104, 727.0605468672001, 500.4924926976], [630.3986816256, 417.510192896, 707.7708740352, 510.4540405248]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047925_crop.jpg", "text": "What sort of things can be seen in the region of the photo ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two bottles, and three people.", "boxes_value": [[20.398681625600034, 33.89733888000001, 100.75476070400009, 166.4540405248], [61.06115724799997, 60.812072755200006, 100.75476070400009, 87.00982666239997], [32.79772951040002, 33.89733888000001, 70.12585448959999, 57.51910400000003], [0, 34.40112307200002, 68.63610836479995, 132.79541017600002], [12.999633766400052, 58.613342310400014, 117.06054686720006, 156.49249269760003], [20.398681625600034, 73.51019289599998, 97.7708740352, 166.4540405248]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047926.jpg", "text": "What can you tell me about the area within the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[442.90417482759995, 40.1178588672, 680.4941406170001, 436.4592895488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047926_crop.jpg", "text": "What can you tell me about the area within the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[59.904174827599945, 40.1178588672, 297.4941406170001, 436.4592895488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047926.jpg", "text": "What can you tell me about the area within the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a carpet, two cabinets, a bench, a desk, and a picture.", "boxes_value": [[442.90417482759995, 40.1178588672, 680.4941406170001, 436.4592895488], [403.2475585645, 366.7915039232, 670.4987793026, 509.9002075136], [442.90417482759995, 78.8498534912, 545.4941406516, 334.0316772352], [564.254394522, 110.47491456, 638.2429199123, 289.8551025152], [476.5886230402, 260.59069824, 680.4941406170001, 436.4592895488], [653.7315673788, 352.985412608, 680.4941406170001, 435.8220825088], [553.0532226616, 40.1178588672, 574.718139615, 62.4199828992]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047926_crop.jpg", "text": "What can you tell me about the area within the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a carpet, two cabinets, a bench, a desk, and a picture.", "boxes_value": [[59.904174827599945, 40.1178588672, 297.4941406170001, 436.4592895488], [20.247558564500025, 366.7915039232, 287.49877930260004, 509.9002075136], [59.904174827599945, 78.8498534912, 162.4941406516, 334.0316772352], [181.25439452199998, 110.47491456, 255.24291991229995, 289.8551025152], [93.58862304019999, 260.59069824, 297.4941406170001, 436.4592895488], [270.73156737880004, 352.985412608, 297.4941406170001, 435.8220825088], [170.0532226616, 40.1178588672, 191.71813961500004, 62.4199828992]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047927.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each mentioned object.", "boxes_value": [[145.735900849, 379.441223168, 246.999206531, 453.8392944128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047927_crop.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each mentioned object.", "boxes_value": [[25.73590084899999, 19.441223168000022, 126.999206531, 93.83929441279997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047927.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a couch, a flower, a person, and two bottles.", "boxes_value": [[145.735900849, 379.441223168, 246.999206531, 453.8392944128], [0, 294.6013793792, 191.171936032, 500.2543334912], [23.886596657000002, 352.7493285888, 200.214416479, 504.7755737088], [145.735900849, 434.7051391488, 187.420959495, 453.8392944128], [227.114013665, 386.2982177792, 246.999206531, 434.9826050048], [213.74291992899998, 379.441223168, 233.97094728700003, 429.8398437376]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047927_crop.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a couch, a flower, a person, and two bottles.", "boxes_value": [[25.73590084899999, 19.441223168000022, 126.999206531, 93.83929441279997], [0, 0, 71.17193603199999, 112], [0, 0, 80.214416479, 112], [25.73590084899999, 74.70513914880001, 67.420959495, 93.83929441279997], [107.11401366499999, 26.2982177792, 126.999206531, 74.9826050048], [93.74291992899998, 19.441223168000022, 113.97094728700003, 69.8398437376]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047928.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Provide the coordinates for all objects that you mention.", "boxes_value": [[389.7214355712, 253.9129638912, 740.2175292672, 382.2281494016]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047928_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Provide the coordinates for all objects that you mention.", "boxes_value": [[87.7214355712, 32.91296389120001, 438.2175292672, 161.22814940159998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047928.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a chair, a desk, a moniter, a laptop, a keyboard, and a speaker.", "boxes_value": [[389.7214355712, 253.9129638912, 740.2175292672, 382.2281494016], [556.7735596032, 265.4082031104, 740.2175292672, 382.2281494016], [372.41711424000005, 300.0891113472, 747.5186767872001, 509.087341312], [444.5019531264, 215.9702148608, 511.28588866559994, 334.6973266432], [413.0740966656, 223.3906860544, 454.54138183680004, 298.9046020608], [483.35009763840003, 319.4199828992, 572.8319091456001, 355.64916992], [389.7214355712, 253.9129638912, 419.4541015296, 297.7009887744]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047928_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a chair, a desk, a moniter, a laptop, a keyboard, and a speaker.", "boxes_value": [[87.7214355712, 32.91296389120001, 438.2175292672, 161.22814940159998], [254.77355960320006, 44.408203110399995, 438.2175292672, 161.22814940159998], [70.41711424000005, 79.0891113472, 445.5186767872001, 193], [142.5019531264, 0, 209.28588866559994, 113.69732664319997], [111.0740966656, 2.3906860543999926, 152.54138183680004, 77.90460206080002], [181.35009763840003, 98.41998289920002, 270.8319091456001, 134.64916992000002], [87.7214355712, 32.91296389120001, 117.4541015296, 76.7009887744]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047929.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for each element you describe.", "boxes_value": [[317.7796020596, 143.8999023616, 408.24682617220003, 339.0777587712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047929_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for each element you describe.", "boxes_value": [[22.779602059599995, 48.8999023616, 113.24682617220003, 244.07775877120002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047929.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a couch, a cabinet, a barrel, a microwave, and a toiletry.", "boxes_value": [[317.7796020596, 143.8999023616, 408.24682617220003, 339.0777587712], [77.8009033278, 244.99517824, 420.7249756077, 511.56243896319995], [317.7796020596, 208.1802978304, 401.63586427219997, 339.0777587712], [335.63494875189997, 143.8999023616, 370.7861328394, 195.7941894656], [337.8004760649, 165.101623552, 400.4385986026, 218.5397338624], [396.4616699146, 247.8268432384, 408.24682617220003, 277.5703124992]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047929_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a couch, a cabinet, a barrel, a microwave, and a toiletry.", "boxes_value": [[22.779602059599995, 48.8999023616, 113.24682617220003, 244.07775877120002], [0, 149.99517824, 125.7249756077, 292], [22.779602059599995, 113.18029783040001, 106.63586427219997, 244.07775877120002], [40.63494875189997, 48.8999023616, 75.78613283940001, 100.7941894656], [42.80047606490001, 70.101623552, 105.4385986026, 123.5397338624], [101.46166991460001, 152.8268432384, 113.24682617220003, 182.57031249919999]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047930.jpg", "text": "Can you share some insights about the rectangular region in the image ? Please point out the objects and their coordinates.", "boxes_value": [[0, 208.3357543936, 299.93817135570004, 512.061523456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047930_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Please point out the objects and their coordinates.", "boxes_value": [[0, 76.33575439360001, 299.93817135570004, 380]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047930.jpg", "text": "Can you share some insights about the rectangular region in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a picture, a lamp, a couch, a desk, and two cabinets.", "boxes_value": [[0, 208.3357543936, 299.93817135570004, 512.061523456], [0, 208.3357543936, 18.6117553957, 251.6912231424], [107.57635498859999, 248.7550659072, 199.4999389767, 426.889465344], [0, 354.7008667136, 147.5656738019, 512.061523456], [79.0125122023, 407.154418944, 236.37322997540002, 511.5422363136], [195.82318113910003, 143.0114135552, 272.0854492503, 321.3539428864], [198.82257078979998, 297.1475219968, 299.93817135570004, 469.4185790976]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047930_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a picture, a lamp, a couch, a desk, and two cabinets.", "boxes_value": [[0, 76.33575439360001, 299.93817135570004, 380], [0, 76.33575439360001, 18.6117553957, 119.69122314239999], [107.57635498859999, 116.75506590719999, 199.4999389767, 294.889465344], [0, 222.70086671360002, 147.5656738019, 380], [79.0125122023, 275.154418944, 236.37322997540002, 379.5422363136], [195.82318113910003, 11.011413555199994, 272.0854492503, 189.35394288639998], [198.82257078979998, 165.14752199679998, 299.93817135570004, 337.4185790976]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047931.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[253.6456298841, 189.0078125056, 411.3841552542, 496.4452514816]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047931_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[39.6456298841, 77.00781250559999, 197.38415525419998, 384.4452514816]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047931.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a chair, a person, a book, a sneakers, and a glasses.", "boxes_value": [[253.6456298841, 189.0078125056, 411.3841552542, 496.4452514816], [240.9910278084, 250.1674194432, 506.1793212886, 509.8691406336], [256.26684571320004, 161.6239624192, 449.830078136, 511.23168947199997], [253.6456298841, 203.6021728768, 411.3841552542, 320.4631958016], [260.406982442, 454.5282592768, 329.4467163151, 496.4452514816], [350.3823242293, 189.0078125056, 387.648193383, 200.6533813248]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047931_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a chair, a person, a book, a sneakers, and a glasses.", "boxes_value": [[39.6456298841, 77.00781250559999, 197.38415525419998, 384.4452514816], [26.99102780839999, 138.1674194432, 236, 397.8691406336], [42.26684571320004, 49.62396241920001, 235.830078136, 399.23168947199997], [39.6456298841, 91.60217287680001, 197.38415525419998, 208.46319580160002], [46.406982442000015, 342.5282592768, 115.44671631509999, 384.4452514816], [136.38232422930002, 77.00781250559999, 173.64819338299998, 88.6533813248]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047932.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Provide the coordinates for each element you describe.", "boxes_value": [[469.3146972672, 205.4086303744, 606.4418945279999, 273.8098755072]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047932_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Provide the coordinates for each element you describe.", "boxes_value": [[34.31469726720002, 17.40863037439999, 171.44189452799992, 85.80987550719999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047932.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two pillows, two beds, a nightstand, and a telephone.", "boxes_value": [[469.3146972672, 205.4086303744, 606.4418945279999, 273.8098755072], [515.6838379008, 205.4086303744, 606.4418945279999, 273.8098755072], [483.948974592, 254.7080078336, 606.9653319936, 286.2932739072], [342.914062464, 210.9109496832, 754.7803954944, 500.1258545152], [286.16027834880003, 206.746582016, 607.0478515968, 419.5455932416], [459.30676270080005, 257.139831552, 500.5639648512, 266.2485961728], [469.3146972672, 251.0867919872, 501.97741701120003, 264.110534656]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047932_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two pillows, two beds, a nightstand, and a telephone.", "boxes_value": [[34.31469726720002, 17.40863037439999, 171.44189452799992, 85.80987550719999], [80.68383790079997, 17.40863037439999, 171.44189452799992, 85.80987550719999], [48.94897459200001, 66.70800783359999, 171.9653319936, 98.29327390719999], [0, 22.910949683199988, 205, 102], [0, 18.74658201599999, 172.0478515968, 102], [24.30676270080005, 69.13983155199998, 65.56396485120001, 78.24859617279998], [34.31469726720002, 63.0867919872, 66.97741701120003, 76.11053465600003]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00047935.jpg", "text": "Please tell me more about the rectangular section in the photo . Specify the location of each mentioned object.", "boxes_value": [[0.2026977792, 0, 356.6253662208, 308.76226805920004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047935_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Specify the location of each mentioned object.", "boxes_value": [[0.2026977792, 0, 356.6253662208, 308.76226805920004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047935.jpg", "text": "Please tell me more about the rectangular section in the photo . Specify the location of each mentioned object. For your reference, objects involved in this region include two pictures, and four people.", "boxes_value": [[0.2026977792, 0, 356.6253662208, 308.76226805920004], [151.2949218816, 0, 356.6253662208, 308.76226805920004], [0.2026977792, 0.1209106704, 108.6791382016, 301.013977018], [281.8685302784, 35.1580200296, 339.3750000128, 183.790161108], [204.0136108544, 159.018127441, 332.2973022208, 299.68780520679996], [173.0485839872, 42.235778826, 347.3374633984, 298.803100554], [0, 0.9121093447999999, 37.6871948288, 144.8626709084]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047935_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Specify the location of each mentioned object. For your reference, objects involved in this region include two pictures, and four people.", "boxes_value": [[0.2026977792, 0, 356.6253662208, 308.76226805920004], [151.2949218816, 0, 356.6253662208, 308.76226805920004], [0.2026977792, 0.1209106704, 108.6791382016, 301.013977018], [281.8685302784, 35.1580200296, 339.3750000128, 183.790161108], [204.0136108544, 159.018127441, 332.2973022208, 299.68780520679996], [173.0485839872, 42.235778826, 347.3374633984, 298.803100554], [0, 0.9121093447999999, 37.6871948288, 144.8626709084]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047936.jpg", "text": "Detail the chosen region in the depicted scene . Provide the coordinates for each element you describe.", "boxes_value": [[135.42376707719998, 361.574218728, 226.0612792908, 464.425415064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047936_crop.jpg", "text": "Detail the chosen region in the depicted scene . Provide the coordinates for each element you describe.", "boxes_value": [[23.423767077199983, 26.574218728000005, 114.0612792908, 129.425415064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047936.jpg", "text": "Detail the chosen region in the depicted scene . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a cabinet, a potted plant, a storage box, and two people.", "boxes_value": [[135.42376707719998, 361.574218728, 226.0612792908, 464.425415064], [130.4524535928, 393.35058590399996, 203.3345947272, 528.587402328], [171.96508790879997, 361.574218728, 198.762023922, 396.2885742], [135.42376707719998, 378.626953104, 175.6192016748, 399.94262697600004], [202.84680176999998, 418.214599632, 214.5719604336, 464.425415064], [214.2543945072, 420.439331088, 226.0612792908, 458.732055672]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047936_crop.jpg", "text": "Detail the chosen region in the depicted scene . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a cabinet, a potted plant, a storage box, and two people.", "boxes_value": [[23.423767077199983, 26.574218728000005, 114.0612792908, 129.425415064], [18.452453592799998, 58.350585903999956, 91.3345947272, 155], [59.96508790879997, 26.574218728000005, 86.762023922, 61.28857420000003], [23.423767077199983, 43.626953103999995, 63.619201674799996, 64.94262697600004], [90.84680176999998, 83.21459963199999, 102.5719604336, 129.425415064], [102.2543945072, 85.43933108800002, 114.0612792908, 123.732055672]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047937.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Provide the coordinates for each element you describe.", "boxes_value": [[426.35888674710003, 308.8326415872, 682.1972655922, 482.8738403328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047937_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Provide the coordinates for each element you describe.", "boxes_value": [[64.35888674710003, 43.83264158719999, 320.1972655922, 217.8738403328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047937.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, a van, and three cars.", "boxes_value": [[426.35888674710003, 308.8326415872, 682.1972655922, 482.8738403328], [423.1152343721, 302.4814453248, 436.61633298780004, 347.2808227328], [426.35888674710003, 308.8326415872, 580.3640137043, 375.3574829056], [547.7601318689, 336.1062622208, 644.9814453125, 409.0935668736], [583.6614990465, 337.087646464, 667.0607910252, 448.0887451136], [630.2583007627001, 338.2747802624, 682.1972655922, 482.8738403328]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047937_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, a van, and three cars.", "boxes_value": [[64.35888674710003, 43.83264158719999, 320.1972655922, 217.8738403328], [61.11523437210002, 37.481445324800006, 74.61633298780004, 82.2808227328], [64.35888674710003, 43.83264158719999, 218.36401370429996, 110.35748290560002], [185.76013186889998, 71.10626222079998, 282.9814453125, 144.09356687360003], [221.6614990465, 72.08764646399999, 305.06079102520005, 183.0887451136], [268.25830076270006, 73.27478026239999, 320.1972655922, 217.8738403328]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047938.jpg", "text": "Tell me about the region of the image . Give coordinates for the items you reference.", "boxes_value": [[427.82360836600003, 370.0795288064, 769.644042937, 512.6137695232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047938_crop.jpg", "text": "Tell me about the region of the image . Give coordinates for the items you reference.", "boxes_value": [[85.82360836600003, 36.07952880639999, 427.644042937, 178]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047938.jpg", "text": "Tell me about the region of the image . Give coordinates for the items you reference. For your reference, objects involved in this region include four cars, and a van.", "boxes_value": [[427.82360836600003, 370.0795288064, 769.644042937, 512.6137695232], [684.8394775529999, 386.5507812352, 749.805297857, 424.5953369088], [571.5118408230001, 376.7897949184, 689.19201659, 436.6051635712], [427.82360836600003, 426.1620483584, 753.3480224269999, 512.6137695232], [690.011962871, 403.2456665088, 769.644042937, 481.5337524224], [719.8519287190001, 370.0795288064, 742.0238036750001, 385.722228992]], "boxes_seq": [[0], [0], [1, 3, 4, 5], [2]]}, {"image_path": "objects365_v1_00047938_crop.jpg", "text": "Tell me about the region of the image . Give coordinates for the items you reference. For your reference, objects involved in this region include four cars, and a van.", "boxes_value": [[85.82360836600003, 36.07952880639999, 427.644042937, 178], [342.83947755299994, 52.55078123520002, 407.805297857, 90.5953369088], [229.51184082300006, 42.78979491839999, 347.19201659, 102.60516357120002], [85.82360836600003, 92.1620483584, 411.34802242699993, 178], [348.01196287100004, 69.24566650880001, 427.644042937, 147.53375242240003], [377.85192871900006, 36.07952880639999, 400.02380367500007, 51.722228992]], "boxes_seq": [[0], [0], [1, 3, 4, 5], [2]]}, {"image_path": "objects365_v1_00047942.jpg", "text": "Can you analyze the content of the area within the photograph ? Please point out the objects and their coordinates.", "boxes_value": [[199.3676757535, 85.4273681408, 352.2225342028, 162.1820678656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047942_crop.jpg", "text": "Can you analyze the content of the area within the photograph ? Please point out the objects and their coordinates.", "boxes_value": [[38.36767575350001, 19.4273681408, 191.22253420279998, 96.1820678656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047942.jpg", "text": "Can you analyze the content of the area within the photograph ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a lamp, a fan, and three plates.", "boxes_value": [[199.3676757535, 85.4273681408, 352.2225342028, 162.1820678656], [231.5352173017, 85.4273681408, 263.7202758875, 120.486145024], [250.5395507841, 90.791931136, 310.8251953171, 124.3796386816], [199.3676757535, 119.7975463936, 241.30126956209998, 150.0078125056], [285.94030764769997, 133.3245239296, 318.8560180507, 156.7713012736], [320.6596069302, 136.9317016576, 352.2225342028, 162.1820678656]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047942_crop.jpg", "text": "Can you analyze the content of the area within the photograph ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a lamp, a fan, and three plates.", "boxes_value": [[38.36767575350001, 19.4273681408, 191.22253420279998, 96.1820678656], [70.53521730169999, 19.4273681408, 102.72027588750001, 54.486145023999995], [89.53955078409999, 24.791931136000002, 149.8251953171, 58.3796386816], [38.36767575350001, 53.7975463936, 80.30126956209998, 84.00781250559999], [124.94030764769997, 67.32452392959999, 157.85601805070002, 90.77130127359999], [159.65960693020003, 70.9317016576, 191.22253420279998, 96.1820678656]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047944.jpg", "text": "Describe the image content present in the specified rectangular area of . Remember to mention the objects and their corresponding locations.", "boxes_value": [[1.110351552, 238.026428208, 186.1760864, 480.219421392]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047944_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Remember to mention the objects and their corresponding locations.", "boxes_value": [[1.110351552, 61.026428208, 186.1760864, 303]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047944.jpg", "text": "Describe the image content present in the specified rectangular area of . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two chairs, a person, a sneakers, and a wine glass.", "boxes_value": [[1.110351552, 238.026428208, 186.1760864, 480.219421392], [66.544311552, 254.835815424, 186.1760864, 480.219421392], [1.110351552, 248.887268064, 106.20129395199999, 443.867248512], [91.80065920000001, 162.464721696, 266.558349632, 479.78930663999995], [103.847900416, 458.99755857599996, 131.299560576, 479.973388656], [4.701416, 238.026428208, 20.40124512, 269.373596208]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047944_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two chairs, a person, a sneakers, and a wine glass.", "boxes_value": [[1.110351552, 61.026428208, 186.1760864, 303], [66.544311552, 77.835815424, 186.1760864, 303], [1.110351552, 71.88726806400001, 106.20129395199999, 266.867248512], [91.80065920000001, 0, 232, 302.78930663999995], [103.847900416, 281.99755857599996, 131.299560576, 302.973388656], [4.701416, 61.026428208, 20.40124512, 92.37359620799998]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047947.jpg", "text": "Can you elaborate on the content of the bounding box in ? Provide the coordinates for each element you describe.", "boxes_value": [[327.06823732, 119.3776855552, 391.54455566419995, 283.6640014848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047947_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Provide the coordinates for each element you describe.", "boxes_value": [[17.06823731999998, 41.3776855552, 81.54455566419995, 205.6640014848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047947.jpg", "text": "Can you elaborate on the content of the bounding box in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a chair, two people, a gloves, and a bottle.", "boxes_value": [[327.06823732, 119.3776855552, 391.54455566419995, 283.6640014848], [327.06823732, 181.5941161984, 366.076660151, 229.3735962112], [353.55737307910005, 119.3776855552, 384.0975341472, 227.404357888], [349.2312011518, 128.94036864, 519.0214843848, 512.0430908416], [356.8623046922, 236.816711424, 386.9331054966, 274.0471801856], [368.7774658335, 228.738830592, 391.54455566419995, 283.6640014848]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047947_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a chair, two people, a gloves, and a bottle.", "boxes_value": [[17.06823731999998, 41.3776855552, 81.54455566419995, 205.6640014848], [17.06823731999998, 103.5941161984, 56.076660151, 151.3735962112], [43.557373079100046, 41.3776855552, 74.09753414720001, 149.404357888], [39.23120115180001, 50.94036864, 97, 246], [46.862304692199984, 158.816711424, 76.93310549659998, 196.04718018559998], [58.7774658335, 150.738830592, 81.54455566419995, 205.6640014848]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047951.jpg", "text": "I request a description of the area in the picture . Specify the location of each mentioned object.", "boxes_value": [[52.076171886400004, 228.9293212672, 176.5172118904, 365.1802978304]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047951_crop.jpg", "text": "I request a description of the area in the picture . Specify the location of each mentioned object.", "boxes_value": [[32.076171886400004, 34.92932126720001, 156.5172118904, 171.18029783039998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047951.jpg", "text": "I request a description of the area in the picture . Specify the location of each mentioned object. For your reference, objects involved in this region include three people, a sneakers, and a car.", "boxes_value": [[52.076171886400004, 228.9293212672, 176.5172118904, 365.1802978304], [52.076171886400004, 242.9743652352, 69.5344848264, 269.97375488], [67.7074585096, 239.7262573056, 79.8876343152, 270.3797607424], [151.7508545168, 230.591125504, 176.5172118904, 265.9136963072], [144.99090576519998, 326.86395264, 175.3405151312, 365.1802978304], [112.37573239439999, 228.9293212672, 159.7547607592, 256.6363525632]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047951_crop.jpg", "text": "I request a description of the area in the picture . Specify the location of each mentioned object. For your reference, objects involved in this region include three people, a sneakers, and a car.", "boxes_value": [[32.076171886400004, 34.92932126720001, 156.5172118904, 171.18029783039998], [32.076171886400004, 48.9743652352, 49.5344848264, 75.97375488], [47.7074585096, 45.7262573056, 59.8876343152, 76.37976074239998], [131.7508545168, 36.59112550399999, 156.5172118904, 71.91369630719998], [124.99090576519998, 132.86395263999998, 155.3405151312, 171.18029783039998], [92.37573239439999, 34.92932126720001, 139.7547607592, 62.63635256319998]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00047954.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each mentioned object.", "boxes_value": [[246.1267700224, 587.4016113494, 473.2017211904, 682.7750244444]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047954_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each mentioned object.", "boxes_value": [[57.126770022399995, 24.401611349400014, 284.2017211904, 119.77502444439995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047954.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a flower, and four people.", "boxes_value": [[246.1267700224, 587.4016113494, 473.2017211904, 682.7750244444], [449.7514038272, 587.4016113494, 473.2017211904, 607.5018310355999], [454.004089344, 626.3879394719, 463.5673217536, 675.1298827951999], [381.45465088, 618.1468506122001, 393.467041024, 654.6544189476], [348.3883666944, 628.7535400239, 371.5628662272, 677.8288574229999], [246.1267700224, 620.1604003850999, 262.9769287168, 682.7750244444]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047954_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a flower, and four people.", "boxes_value": [[57.126770022399995, 24.401611349400014, 284.2017211904, 119.77502444439995], [260.7514038272, 24.401611349400014, 284.2017211904, 44.501831035599935], [265.004089344, 63.38793947190004, 274.5673217536, 112.12988279519993], [192.45465087999997, 55.14685061220007, 204.46704102400003, 91.65441894759999], [159.38836669440002, 65.75354002389997, 182.5628662272, 114.82885742299993], [57.126770022399995, 57.16040038509993, 73.97692871679999, 119.77502444439995]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00047955.jpg", "text": "In the displayed image , help me understand the region defined by . Include the coordinates for each object you identify.", "boxes_value": [[0.13073727359999998, 127.3557739008, 342.294921874, 510.2109374976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047955_crop.jpg", "text": "In the displayed image , help me understand the region defined by . Include the coordinates for each object you identify.", "boxes_value": [[0.13073727359999998, 96.3557739008, 342.294921874, 479.2109374976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047955.jpg", "text": "In the displayed image , help me understand the region defined by . Include the coordinates for each object you identify. For your reference, objects involved in this region include two cabinets, two pictures, three people, a tie, and a canned.", "boxes_value": [[0.13073727359999998, 127.3557739008, 342.294921874, 510.2109374976], [124.6092529368, 353.2867431424, 203.07135011160003, 510.2109374976], [0.6651000791999999, 197.8755493376, 125.2382812388, 510.04125977599995], [39.9046630932, 127.3557739008, 82.9743652244, 168.2720336896], [36.6744384596, 165.5801391616, 83.512756326, 215.1103515648], [282.07971193879996, 229.7215576064, 388.97583009839997, 512.1579589632], [170.5797729372, 246.2650146304, 311.1617431616, 511.7088623104], [0.13073727359999998, 328.6702270464, 34.806579596, 457.8471679488], [317.7954101604, 309.7329712128, 342.294921874, 414.5005493248], [261.02343749199997, 294.7599487488, 308.2181396628, 335.4232177664]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5, 6, 7], [8], [9]]}, {"image_path": "objects365_v1_00047955_crop.jpg", "text": "In the displayed image , help me understand the region defined by . Include the coordinates for each object you identify. For your reference, objects involved in this region include two cabinets, two pictures, three people, a tie, and a canned.", "boxes_value": [[0.13073727359999998, 96.3557739008, 342.294921874, 479.2109374976], [124.6092529368, 322.2867431424, 203.07135011160003, 479.2109374976], [0.6651000791999999, 166.8755493376, 125.2382812388, 479.04125977599995], [39.9046630932, 96.3557739008, 82.9743652244, 137.2720336896], [36.6744384596, 134.5801391616, 83.512756326, 184.1103515648], [282.07971193879996, 198.7215576064, 388.97583009839997, 481], [170.5797729372, 215.2650146304, 311.1617431616, 480.7088623104], [0.13073727359999998, 297.6702270464, 34.806579596, 426.8471679488], [317.7954101604, 278.7329712128, 342.294921874, 383.5005493248], [261.02343749199997, 263.7599487488, 308.2181396628, 304.4232177664]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5, 6, 7], [8], [9]]}, {"image_path": "objects365_v1_00047956.jpg", "text": "Could you tell me more about the area in the snapshot ? Include the coordinates for each mentioned object.", "boxes_value": [[533.5429687451, 205.4088134656, 600.8570556497, 426.200195328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047956_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Include the coordinates for each mentioned object.", "boxes_value": [[17.542968745099984, 55.40881346559999, 84.85705564969999, 276.200195328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047956.jpg", "text": "Could you tell me more about the area in the snapshot ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a chair, two people, a book, and two sneakers.", "boxes_value": [[533.5429687451, 205.4088134656, 600.8570556497, 426.200195328], [422.5018310476, 137.9660644352, 645.0235595706, 415.4174194176], [542.1114501715, 159.1606445568, 626.3393554963, 373.37823488], [469.8212890415, 127.3264160256, 621.6968993826, 426.4352416768], [533.5429687451, 205.4088134656, 600.8570556497, 273.7584838656], [548.4093017861, 400.262023936, 578.3380127048, 426.200195328], [579.1361083815, 393.8772582912, 602.2808837571, 424.2049560576]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047956_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a chair, two people, a book, and two sneakers.", "boxes_value": [[17.542968745099984, 55.40881346559999, 84.85705564969999, 276.200195328], [0, 0, 101, 265.4174194176], [26.111450171499996, 9.160644556800008, 101, 223.37823487999998], [0, 0, 101, 276.4352416768], [17.542968745099984, 55.40881346559999, 84.85705564969999, 123.7584838656], [32.409301786099945, 250.262023936, 62.33801270480001, 276.200195328], [63.13610838149998, 243.8772582912, 86.28088375710001, 274.2049560576]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047958.jpg", "text": "Can you give me a description of the region in image ? Please point out the objects and their coordinates.", "boxes_value": [[11.1658935596, 207.7350464, 131.3664550458, 329.4144287232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047958_crop.jpg", "text": "Can you give me a description of the region in image ? Please point out the objects and their coordinates.", "boxes_value": [[11.1658935596, 30.735046399999987, 131.3664550458, 152.4144287232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047958.jpg", "text": "Can you give me a description of the region in image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include four potted plants, and a trash bin can.", "boxes_value": [[11.1658935596, 207.7350464, 131.3664550458, 329.4144287232], [54.0237426794, 207.7350464, 107.2276611588, 328.9218139648], [89.9856567142, 214.1392211968, 131.3664550458, 329.4144287232], [11.1658935596, 276.7713012736, 33.108764613599995, 316.3712768512], [54.0237426794, 207.7350464, 107.2276611588, 328.9218139648], [89.9856567142, 214.1392211968, 131.3664550458, 329.4144287232]], "boxes_seq": [[0], [0], [1, 2, 4, 5], [3]]}, {"image_path": "objects365_v1_00047958_crop.jpg", "text": "Can you give me a description of the region in image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include four potted plants, and a trash bin can.", "boxes_value": [[11.1658935596, 30.735046399999987, 131.3664550458, 152.4144287232], [54.0237426794, 30.735046399999987, 107.2276611588, 151.92181396479998], [89.9856567142, 37.13922119680001, 131.3664550458, 152.4144287232], [11.1658935596, 99.77130127359999, 33.108764613599995, 139.37127685119998], [54.0237426794, 30.735046399999987, 107.2276611588, 151.92181396479998], [89.9856567142, 37.13922119680001, 131.3664550458, 152.4144287232]], "boxes_seq": [[0], [0], [1, 2, 4, 5], [3]]}, {"image_path": "objects365_v1_00047961.jpg", "text": "What insights can you provide about the area in the selected picture ? Please point out the objects and their coordinates.", "boxes_value": [[363.0064697567, 166.6777954304, 476.27490235380003, 362.0338745344]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047961_crop.jpg", "text": "What insights can you provide about the area in the selected picture ? Please point out the objects and their coordinates.", "boxes_value": [[29.006469756700028, 49.67779543040001, 142.27490235380003, 245.0338745344]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047961.jpg", "text": "What insights can you provide about the area in the selected picture ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two flowers, and three bowls.", "boxes_value": [[363.0064697567, 166.6777954304, 476.27490235380003, 362.0338745344], [449.4128417811, 175.1841430528, 476.27490235380003, 196.6738281472], [363.0064697567, 166.6777954304, 380.9145507583, 189.5105590784], [411.280517553, 333.1194458112, 437.60778813170003, 347.8810424832], [449.9344482606, 333.2716064256, 472.6094970453, 347.1201171968], [435.6295165667, 347.7288207872, 463.174316392, 362.0338745344]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047961_crop.jpg", "text": "What insights can you provide about the area in the selected picture ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two flowers, and three bowls.", "boxes_value": [[29.006469756700028, 49.67779543040001, 142.27490235380003, 245.0338745344], [115.41284178109998, 58.18414305280001, 142.27490235380003, 79.6738281472], [29.006469756700028, 49.67779543040001, 46.91455075829998, 72.51055907840001], [77.28051755299998, 216.11944581120002, 103.60778813170003, 230.88104248320002], [115.93444826059999, 216.2716064256, 138.60949704529997, 230.12011719679998], [101.62951656669998, 230.7288207872, 129.17431639199998, 245.0338745344]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00047962.jpg", "text": "Fill me in about the selected portion within the presented image . Give coordinates for the items you reference.", "boxes_value": [[69.5268554752, 340.8894042959, 348.7973022208, 483.191528305]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047962_crop.jpg", "text": "Fill me in about the selected portion within the presented image . Give coordinates for the items you reference.", "boxes_value": [[69.5268554752, 35.88940429590002, 348.7973022208, 178.19152830500002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047962.jpg", "text": "Fill me in about the selected portion within the presented image . Give coordinates for the items you reference. For your reference, objects involved in this region include a lamp, a nightstand, two beds, and two pillows.", "boxes_value": [[69.5268554752, 340.8894042959, 348.7973022208, 483.191528305], [148.750793472, 340.8894042959, 176.9256591872, 368.59722899540003], [142.904357888, 362.21093748019996, 179.0823364096, 401.110229512], [169.0895995904, 353.11236571940003, 348.7973022208, 483.191528305], [34.8311767552, 352.589965841, 173.7912597504, 487.370727551], [69.5268554752, 359.4702758661, 142.5593261568, 374.44348146849995], [177.0893554688, 357.6368408285, 242.176879872, 374.7490844609]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00047962_crop.jpg", "text": "Fill me in about the selected portion within the presented image . Give coordinates for the items you reference. For your reference, objects involved in this region include a lamp, a nightstand, two beds, and two pillows.", "boxes_value": [[69.5268554752, 35.88940429590002, 348.7973022208, 178.19152830500002], [148.750793472, 35.88940429590002, 176.9256591872, 63.59722899540003], [142.904357888, 57.21093748019996, 179.0823364096, 96.11022951199999], [169.0895995904, 48.11236571940003, 348.7973022208, 178.19152830500002], [34.8311767552, 47.58996584099998, 173.7912597504, 182.37072755100002], [69.5268554752, 54.470275866099996, 142.5593261568, 69.44348146849995], [177.0893554688, 52.6368408285, 242.176879872, 69.74908446090001]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00047963.jpg", "text": "Please describe the area in the image for me. Remember to mention the objects and their corresponding locations.", "boxes_value": [[415.72570797720005, 158.5040283136, 544.438232406, 381.1141357568]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047963_crop.jpg", "text": "Please describe the area in the image for me. Remember to mention the objects and their corresponding locations.", "boxes_value": [[32.725707977200045, 56.5040283136, 161.438232406, 279.1141357568]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047963.jpg", "text": "Please describe the area in the image for me. Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, and five sneakers.", "boxes_value": [[415.72570797720005, 158.5040283136, 544.438232406, 381.1141357568], [440.434814472, 158.5040283136, 544.438232406, 381.1141357568], [406.4876709156, 260.3731079168, 426.28344723720005, 294.2459716608], [415.72570797720005, 304.8037109248, 439.9206543216, 321.9600829952], [447.8389892622, 364.6310424576, 471.5938721016, 380.0277709824], [468.9544677822, 352.7535400448, 503.7071533092, 375.188842752], [508.5461425626, 277.5294799872, 534.500610336, 294.6858520576]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047963_crop.jpg", "text": "Please describe the area in the image for me. Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, and five sneakers.", "boxes_value": [[32.725707977200045, 56.5040283136, 161.438232406, 279.1141357568], [57.43481447200003, 56.5040283136, 161.438232406, 279.1141357568], [23.487670915600006, 158.3731079168, 43.28344723720005, 192.24597166080002], [32.725707977200045, 202.8037109248, 56.92065432160001, 219.96008299520003], [64.83898926220002, 262.6310424576, 88.59387210160003, 278.0277709824], [85.95446778220003, 250.7535400448, 120.70715330920001, 273.188842752], [125.54614256259998, 175.52947998719998, 151.50061033600002, 192.68585205760002]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047969.jpg", "text": "Analyze and describe the region in the included photo . Please mention the objects and their locations.", "boxes_value": [[549.567504864, 179.3803710976, 656.1455077908, 489.3065796096]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047969_crop.jpg", "text": "Analyze and describe the region in the included photo . Please mention the objects and their locations.", "boxes_value": [[27.567504864000057, 78.3803710976, 134.14550779080002, 388.3065796096]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047969.jpg", "text": "Analyze and describe the region in the included photo . Please mention the objects and their locations. For your reference, objects involved in this region include three people, and two sneakers.", "boxes_value": [[549.567504864, 179.3803710976, 656.1455077908, 489.3065796096], [532.3332519756, 212.2968139776, 567.2707519841999, 377.9680175616], [628.1295166368, 220.185913088, 659.5834961303999, 390.7409057792], [549.567504864, 179.3803710976, 656.1455077908, 489.3065796096], [582.269775381, 409.7817382912, 597.3094482516, 444.783020032], [585.9405517284, 459.778808576, 606.3463135026, 488.7084350464]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047969_crop.jpg", "text": "Analyze and describe the region in the included photo . Please mention the objects and their locations. For your reference, objects involved in this region include three people, and two sneakers.", "boxes_value": [[27.567504864000057, 78.3803710976, 134.14550779080002, 388.3065796096], [10.333251975599978, 111.29681397760001, 45.27075198419993, 276.9680175616], [106.12951663679996, 119.185913088, 137.5834961303999, 289.7409057792], [27.567504864000057, 78.3803710976, 134.14550779080002, 388.3065796096], [60.269775381000045, 308.7817382912, 75.30944825159997, 343.783020032], [63.94055172840001, 358.778808576, 84.34631350259997, 387.7084350464]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047971.jpg", "text": "Please elucidate the area of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[265.9866943545, 210.7985839616, 435.39465334050004, 512.647705088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047971_crop.jpg", "text": "Please elucidate the area of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[42.9866943545, 75.7985839616, 212.39465334050004, 377]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047971.jpg", "text": "Please elucidate the area of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two cabinets, two mirrors, and a stool.", "boxes_value": [[265.9866943545, 210.7985839616, 435.39465334050004, 512.647705088], [260.06872557599996, 198.9625854464, 337.0026855195, 262.8770141696], [351.7977295215, 202.513427712, 431.0988769305, 261.101623552], [353.5731201015, 210.7985839616, 426.364501923, 254.0], [265.9866943545, 211.9821777408, 328.7175292755, 236.2460326912], [339.746948217, 380.7199096832, 435.39465334050004, 512.647705088]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047971_crop.jpg", "text": "Please elucidate the area of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two cabinets, two mirrors, and a stool.", "boxes_value": [[42.9866943545, 75.7985839616, 212.39465334050004, 377], [37.06872557599996, 63.9625854464, 114.0026855195, 127.87701416959999], [128.7977295215, 67.51342771200001, 208.0988769305, 126.10162355199998], [130.57312010150002, 75.7985839616, 203.36450192299998, 119.0], [42.9866943545, 76.98217774080001, 105.7175292755, 101.24603269120001], [116.74694821700001, 245.7199096832, 212.39465334050004, 377]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047972.jpg", "text": "Can you break down the region in the image for me? Include the coordinates for each mentioned object.", "boxes_value": [[274.69696045, 291.19958495, 352.4920044, 381.36120605]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047972_crop.jpg", "text": "Can you break down the region in the image for me? Include the coordinates for each mentioned object.", "boxes_value": [[19.696960450000006, 23.199584949999974, 97.49200439999998, 113.36120605000002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047972.jpg", "text": "Can you break down the region in the image for me? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a flower, a vase, a couch, and two pillows.", "boxes_value": [[274.69696045, 291.19958495, 352.4920044, 381.36120605], [287.0678711, 319.93432615, 322.39630125, 353.03485105], [292.4785156, 349.8521118, 316.34906005, 381.36120605], [240.5316162, 306.50592040000004, 499.45947265000007, 414.84814455000003], [274.69696045, 291.19958495, 343.90771485, 343.51000975], [316.5453491, 303.27124025, 352.4920044, 348.87524414999996]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047972_crop.jpg", "text": "Can you break down the region in the image for me? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a flower, a vase, a couch, and two pillows.", "boxes_value": [[19.696960450000006, 23.199584949999974, 97.49200439999998, 113.36120605000002], [32.06787109999999, 51.934326150000004, 67.39630125000002, 85.03485104999999], [37.47851559999998, 81.85211179999999, 61.34906004999999, 113.36120605000002], [0, 38.505920400000036, 116, 135], [19.696960450000006, 23.199584949999974, 88.90771484999999, 75.51000975], [61.54534910000001, 35.271240250000005, 97.49200439999998, 80.87524414999996]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047973.jpg", "text": "In the provided image , would you mind describing the selected area ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[619.2205810876, 182.0856323072, 678.5122070472, 300.6864624128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047973_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[15.220581087599953, 30.0856323072, 74.51220704720004, 148.68646241279998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047973.jpg", "text": "In the provided image , would you mind describing the selected area ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a pillow, a cabinet, a person, and two bottles.", "boxes_value": [[619.2205810876, 182.0856323072, 678.5122070472, 300.6864624128], [620.2371826484, 263.7374877696, 661.1350097684, 300.6864624128], [619.2205810876, 182.0856323072, 678.5122070472, 254.9763183616], [649.2854004256001, 175.1519164928, 771.8023681544, 511.6804809728], [640.8896484448001, 222.3942260736, 654.709838894, 250.543884288], [645.1157226616, 193.2448730624, 657.80944823, 217.0219116032]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047973_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a pillow, a cabinet, a person, and two bottles.", "boxes_value": [[15.220581087599953, 30.0856323072, 74.51220704720004, 148.68646241279998], [16.237182648399994, 111.73748776960002, 57.13500976839998, 148.68646241279998], [15.220581087599953, 30.0856323072, 74.51220704720004, 102.9763183616], [45.28540042560007, 23.151916492800012, 89, 178], [36.88964844480006, 70.39422607360001, 50.70983889399997, 98.54388428799999], [41.11572266159999, 41.24487306239999, 53.809448230000044, 65.02191160320001]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047974.jpg", "text": "Please explain what is contained in the portion of defined by the box . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[202.31774905480003, 142.980468736, 310.0711669892, 284.4656372224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047974_crop.jpg", "text": "Please explain what is contained in the portion of defined by the box . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[27.317749054800032, 35.980468736000006, 135.07116698919998, 177.4656372224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047974.jpg", "text": "Please explain what is contained in the portion of defined by the box . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, two barrels, and a helmet.", "boxes_value": [[202.31774905480003, 142.980468736, 310.0711669892, 284.4656372224], [202.31774905480003, 171.933959936, 277.8524170134, 262.5755004928], [245.6651611276, 142.980468736, 304.261840837, 211.1145629696], [225.6127319316, 249.1154174976, 258.24365233640003, 284.4656372224], [292.75683596939996, 193.5836181504, 310.0711669892, 214.560668928], [203.2405654174, 171.5185975296, 227.31206907440003, 199.0347404288]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047974_crop.jpg", "text": "Please explain what is contained in the portion of defined by the box . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, two barrels, and a helmet.", "boxes_value": [[27.317749054800032, 35.980468736000006, 135.07116698919998, 177.4656372224], [27.317749054800032, 64.93395993600001, 102.85241701339999, 155.57550049280002], [70.66516112759999, 35.980468736000006, 129.26184083700002, 104.1145629696], [50.612731931599996, 142.1154174976, 83.24365233640003, 177.4656372224], [117.75683596939996, 86.58361815040001, 135.07116698919998, 107.56066892800001], [28.240565417400006, 64.51859752959999, 52.31206907440003, 92.0347404288]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00047975.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Provide the coordinates for all objects that you mention.", "boxes_value": [[676.0201416192, 136.1209716736, 757.4858398464, 184.179931648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047975_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Provide the coordinates for all objects that you mention.", "boxes_value": [[21.02014161919999, 12.120971673599996, 102.48583984640004, 60.17993164800001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047975.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two flags, a cabinet, and two vases.", "boxes_value": [[676.0201416192, 136.1209716736, 757.4858398464, 184.179931648], [676.0201416192, 146.377502464, 717.0460204799999, 184.179931648], [712.35729984, 136.1209716736, 757.4858398464, 180.0773315584], [678.3026122752, 147.3501586944, 715.5297851904, 181.1931152384], [676.0201416192, 146.377502464, 717.0460204799999, 184.179931648], [712.35729984, 136.1209716736, 757.4858398464, 180.0773315584]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047975_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two flags, a cabinet, and two vases.", "boxes_value": [[21.02014161919999, 12.120971673599996, 102.48583984640004, 60.17993164800001], [21.02014161919999, 22.377502464000003, 62.046020479999925, 60.17993164800001], [57.357299839999996, 12.120971673599996, 102.48583984640004, 56.077331558400004], [23.302612275199976, 23.350158694399994, 60.52978519040005, 57.1931152384], [21.02014161919999, 22.377502464000003, 62.046020479999925, 60.17993164800001], [57.357299839999996, 12.120971673599996, 102.48583984640004, 56.077331558400004]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00047979.jpg", "text": "I request a description of the area in the picture . Provide the coordinates for each element you describe.", "boxes_value": [[319.8662719684, 92.4896240128, 383.1237792714, 374.14801024]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047979_crop.jpg", "text": "I request a description of the area in the picture . Provide the coordinates for each element you describe.", "boxes_value": [[15.866271968399985, 70.4896240128, 79.1237792714, 352.14801024]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047979.jpg", "text": "I request a description of the area in the picture . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a pen, and five bottles.", "boxes_value": [[319.8662719684, 92.4896240128, 383.1237792714, 374.14801024], [355.8284912359, 351.2395019776, 383.1237792714, 374.14801024], [360.69177245080004, 190.9511718912, 404.0147704838, 275.6281127936], [320.3225097531, 193.9050292736, 354.7840576077, 277.5973510656], [366.5993652418, 86.5819702272, 404.99938963569997, 173.2280883712], [321.307128905, 92.4896240128, 356.7532959115, 182.0896606208], [319.8662719684, 282.3689575424, 353.86560059749996, 367.005676288]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047979_crop.jpg", "text": "I request a description of the area in the picture . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a pen, and five bottles.", "boxes_value": [[15.866271968399985, 70.4896240128, 79.1237792714, 352.14801024], [51.8284912359, 329.2395019776, 79.1237792714, 352.14801024], [56.691772450800045, 168.9511718912, 94, 253.6281127936], [16.322509753100007, 171.9050292736, 50.78405760769999, 255.59735106559998], [62.5993652418, 64.5819702272, 94, 151.2280883712], [17.307128905000013, 70.4896240128, 52.753295911500004, 160.0896606208], [15.866271968399985, 260.3689575424, 49.86560059749996, 345.005676288]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047980.jpg", "text": "Can you divulge the contents of the area within the given image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[45.9758301129, 206.2655029248, 305.8728027189, 349.9481201152]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047980_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[45.9758301129, 36.26550292479999, 305.8728027189, 179.94812011520003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047980.jpg", "text": "Can you divulge the contents of the area within the given image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two lamps, a cabinet, a person, and a blackboard.", "boxes_value": [[45.9758301129, 206.2655029248, 305.8728027189, 349.9481201152], [137.6195678559, 206.2655029248, 185.9974975617, 232.3475341824], [77.0419311648, 223.5133056512, 121.63378906919999, 240.3404540928], [45.9758301129, 286.4373169152, 166.1386718592, 336.2959594496], [139.3322143236, 242.424621568, 168.65679933959998, 349.9481201152], [272.8579101825, 218.1184692224, 305.8728027189, 276.2876587008]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047980_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two lamps, a cabinet, a person, and a blackboard.", "boxes_value": [[45.9758301129, 36.26550292479999, 305.8728027189, 179.94812011520003], [137.6195678559, 36.26550292479999, 185.9974975617, 62.34753418240001], [77.0419311648, 53.5133056512, 121.63378906919999, 70.3404540928], [45.9758301129, 116.43731691519997, 166.1386718592, 166.2959594496], [139.3322143236, 72.42462156799999, 168.65679933959998, 179.94812011520003], [272.8579101825, 48.118469222399995, 305.8728027189, 106.28765870080002]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00047982.jpg", "text": "Could you give me a description of the rectangular region found in ? Provide the coordinates for each element you describe.", "boxes_value": [[441.8308716032, 231.9289550592, 512.3214111232, 362.9094238464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047982_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Provide the coordinates for each element you describe.", "boxes_value": [[17.830871603200023, 32.92895505920001, 88, 163.9094238464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047982.jpg", "text": "Could you give me a description of the rectangular region found in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a chair, and five people.", "boxes_value": [[441.8308716032, 231.9289550592, 512.3214111232, 362.9094238464], [469.0022582784, 291.6247558656, 512.3214111232, 362.9094238464], [419.182861312, 235.79736330240002, 462.2545165824, 276.8469238272], [421.6094360576, 247.7280273408, 471.7586059776, 306.37023928319996], [441.8308716032, 253.3900146432, 511.5948486144, 362.58581544960003], [456.6109008896, 231.9289550592, 488.3970336768, 275.0822754048], [500.1852417024, 248.55877685759998, 511.55249024, 282.0289306368]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047982_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a chair, and five people.", "boxes_value": [[17.830871603200023, 32.92895505920001, 88, 163.9094238464], [45.00225827840001, 92.62475586559998, 88, 163.9094238464], [0, 36.797363302400015, 38.254516582400015, 77.84692382719999], [0, 48.7280273408, 47.758605977599984, 107.37023928319996], [17.830871603200023, 54.390014643200004, 87.59484861440001, 163.58581544960003], [32.610900889599975, 32.92895505920001, 64.39703367679999, 76.08227540479999], [76.18524170239999, 49.55877685759998, 87.55249024, 83.0289306368]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00047986.jpg", "text": "What does the area look like in the context of the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[219.15490721279997, 439.1298828288, 264.1306152192, 496.0572509696]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047986_crop.jpg", "text": "What does the area look like in the context of the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[12.15490721279997, 15.129882828800021, 57.130615219200024, 72.05725096959998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047986.jpg", "text": "What does the area look like in the context of the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, and two leather shoes.", "boxes_value": [[219.15490721279997, 439.1298828288, 264.1306152192, 496.0572509696], [215.975341824, 90.4539794944, 444.1072998144, 512.2034912256], [0.20086671359999997, 106.4484863488, 253.85699466239998, 512.2034912256], [187.1805420288, 139.4258422784, 275.1052246272, 499.3034668032], [219.15490721279997, 473.0975952384, 236.7677612544, 496.0572509696], [232.99359129600003, 439.1298828288, 264.1306152192, 459.5734253056]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047986_crop.jpg", "text": "What does the area look like in the context of the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, and two leather shoes.", "boxes_value": [[12.15490721279997, 15.129882828800021, 57.130615219200024, 72.05725096959998], [8.975341823999997, 0, 68, 86], [0, 0, 46.856994662399984, 86], [0, 0, 68, 75.30346680320002], [12.15490721279997, 49.097595238400004, 29.7677612544, 72.05725096959998], [25.993591296000034, 15.129882828800021, 57.130615219200024, 35.57342530559998]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047993.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Provide the coordinates for each element you describe.", "boxes_value": [[410.9925537024, 341.1791382016, 529.1555175936, 512.4443359232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047993_crop.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Provide the coordinates for each element you describe.", "boxes_value": [[29.992553702400016, 43.17913820159998, 148.15551759360005, 214]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047993.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a chair, two people, and two handbags.", "boxes_value": [[410.9925537024, 341.1791382016, 529.1555175936, 512.4443359232], [480.50183109119996, 403.5511474688, 536.0430908159999, 488.9202881024], [460.42907712, 341.1791382016, 497.66369625600004, 512.0190429696], [413.46044920319997, 341.2587280384, 480.88684085759996, 511.9882812416], [494.07678220800005, 467.454956032, 529.1555175936, 512.4443359232], [410.9925537024, 427.6416626176, 473.7153320448, 512.2745361408]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047993_crop.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a chair, two people, and two handbags.", "boxes_value": [[29.992553702400016, 43.17913820159998, 148.15551759360005, 214], [99.50183109119996, 105.55114746880002, 155.0430908159999, 190.92028810239998], [79.42907711999999, 43.17913820159998, 116.66369625600004, 214], [32.46044920319997, 43.25872803840002, 99.88684085759996, 213.98828124160002], [113.07678220800005, 169.45495603199998, 148.15551759360005, 214], [29.992553702400016, 129.6416626176, 92.7153320448, 214]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00047994.jpg", "text": "Can you generate a description of the contents within the selected region in ? Provide the coordinates for each element you describe.", "boxes_value": [[175.4494628864, 113.66241457529999, 351.68139648, 347.9187011709]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047994_crop.jpg", "text": "Can you generate a description of the contents within the selected region in ? Provide the coordinates for each element you describe.", "boxes_value": [[44.4494628864, 58.66241457529999, 220.68139648, 292.9187011709]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047994.jpg", "text": "Can you generate a description of the contents within the selected region in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a lamp, a stool, a couch, a chair, a cabinet, and a trolley.", "boxes_value": [[175.4494628864, 113.66241457529999, 351.68139648, 347.9187011709], [175.4494628864, 113.66241457529999, 283.73870848, 169.5999145344], [210.323120128, 315.606079095, 233.9282226688, 335.51397705150004], [309.2938842624, 297.1201782441, 346.8345336832, 336.6515502684], [272.5079955968, 297.7124633676, 297.2233276416, 329.5916747895], [215.6817016832, 248.2919311431, 245.3861694464, 330.0367431648], [324.9086914048, 244.4787597438, 351.68139648, 347.9187011709]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047994_crop.jpg", "text": "Can you generate a description of the contents within the selected region in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a lamp, a stool, a couch, a chair, a cabinet, and a trolley.", "boxes_value": [[44.4494628864, 58.66241457529999, 220.68139648, 292.9187011709], [44.4494628864, 58.66241457529999, 152.73870848, 114.5999145344], [79.323120128, 260.606079095, 102.9282226688, 280.51397705150004], [178.2938842624, 242.12017824409998, 215.8345336832, 281.6515502684], [141.50799559680001, 242.71246336759998, 166.2233276416, 274.5916747895], [84.6817016832, 193.2919311431, 114.3861694464, 275.0367431648], [193.90869140479998, 189.4787597438, 220.68139648, 292.9187011709]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00047995.jpg", "text": "Please describe the content within the area displayed in the image . Give coordinates for the items you reference.", "boxes_value": [[457.2973632495, 261.6974487552, 560.552490209, 297.740295424]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047995_crop.jpg", "text": "Please describe the content within the area displayed in the image . Give coordinates for the items you reference.", "boxes_value": [[26.297363249499995, 9.697448755200014, 129.55249020899998, 45.74029542400001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047995.jpg", "text": "Please describe the content within the area displayed in the image . Give coordinates for the items you reference. For your reference, objects involved in this region include a flower, two pictures, a book, and two plates.", "boxes_value": [[457.2973632495, 261.6974487552, 560.552490209, 297.740295424], [479.059570327, 261.6974487552, 530.5800781152, 279.3057250816], [526.6671142468, 262.3496093696, 563.8400878732999, 299.5225829888], [447.75610354419996, 147.5700073472, 604.9259033533, 298.2182617088], [487.35327149209996, 286.6152343552, 518.0446777518999, 297.2745361408], [527.5618896193, 265.6103515648, 560.552490209, 297.740295424], [457.2973632495, 266.789428736, 483.60791013040006, 292.3622436352]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047995_crop.jpg", "text": "Please describe the content within the area displayed in the image . Give coordinates for the items you reference. For your reference, objects involved in this region include a flower, two pictures, a book, and two plates.", "boxes_value": [[26.297363249499995, 9.697448755200014, 129.55249020899998, 45.74029542400001], [48.05957032700002, 9.697448755200014, 99.58007811519997, 27.305725081599974], [95.6671142468, 10.349609369599989, 132.84008787329992, 47.522582988800025], [16.75610354419996, 0, 155, 46.218261708800014], [56.353271492099964, 34.615234355200016, 87.04467775189994, 45.27453614080002], [96.56188961930002, 13.610351564799998, 129.55249020899998, 45.74029542400001], [26.297363249499995, 14.78942873599999, 52.60791013040006, 40.3622436352]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047996.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Provide the coordinates for each element you describe.", "boxes_value": [[0.033386255999999996, 245.731689472, 436.32336425399996, 288.7716064256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047996_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Provide the coordinates for each element you describe.", "boxes_value": [[0.033386255999999996, 11.731689472, 436.32336425399996, 54.7716064256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047996.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, three buses, and a stroller.", "boxes_value": [[0.033386255999999996, 245.731689472, 436.32336425399996, 288.7716064256], [428.83764650800003, 254.1187744256, 444.10021974, 294.818908672], [0.033386255999999996, 250.4375610368, 55.203613258, 272.009338368], [54.143493672, 270.3325195264, 71.080017118, 288.7716064256], [124.47460937999999, 245.731689472, 195.64385987400001, 273.20050048], [377.356262212, 246.785644544, 436.32336425399996, 271.1364135936]], "boxes_seq": [[0], [0], [1], [2, 4, 5], [3]]}, {"image_path": "objects365_v1_00047996_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, three buses, and a stroller.", "boxes_value": [[0.033386255999999996, 11.731689472, 436.32336425399996, 54.7716064256], [428.83764650800003, 20.11877442560001, 444.10021974, 60.81890867200002], [0.033386255999999996, 16.43756103679999, 55.203613258, 38.00933836799999], [54.143493672, 36.332519526400006, 71.080017118, 54.7716064256], [124.47460937999999, 11.731689472, 195.64385987400001, 39.20050048000002], [377.356262212, 12.785644544000007, 436.32336425399996, 37.136413593600025]], "boxes_seq": [[0], [0], [1], [2, 4, 5], [3]]}, {"image_path": "objects365_v1_00047999.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Please mention the objects and their locations.", "boxes_value": [[245.65216062539997, 27.4985961984, 629.8870849353, 249.9500122112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047999_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Please mention the objects and their locations.", "boxes_value": [[96.65216062539997, 27.4985961984, 480.8870849353, 249.9500122112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00047999.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Please mention the objects and their locations. For your reference, objects involved in this region include two pillows, a clock, a flower, and two stuffed toys.", "boxes_value": [[245.65216062539997, 27.4985961984, 629.8870849353, 249.9500122112], [424.936767574, 98.2437133824, 557.5919189179, 197.7350464], [376.46667477930004, 147.0782470656, 511.6728515449, 214.4991455232], [245.65216062539997, 117.5880126976, 282.578247046, 150.2144164864], [457.2954101317, 27.4985961984, 518.6201171808999, 65.4874267648], [204.6308593422, 136.8712158208, 378.5571289179, 202.5127563264], [484.101440406, 178.5535888896, 629.8870849353, 249.9500122112]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00047999_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Please mention the objects and their locations. For your reference, objects involved in this region include two pillows, a clock, a flower, and two stuffed toys.", "boxes_value": [[96.65216062539997, 27.4985961984, 480.8870849353, 249.9500122112], [275.936767574, 98.2437133824, 408.5919189179, 197.7350464], [227.46667477930004, 147.0782470656, 362.6728515449, 214.4991455232], [96.65216062539997, 117.5880126976, 133.578247046, 150.2144164864], [308.2954101317, 27.4985961984, 369.6201171808999, 65.4874267648], [55.63085934220001, 136.8712158208, 229.5571289179, 202.5127563264], [335.101440406, 178.5535888896, 480.8870849353, 249.9500122112]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048001.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each mentioned object.", "boxes_value": [[38.7417602338, 27.4748535296, 673.1043701407, 184.6193847808]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048001_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each mentioned object.", "boxes_value": [[38.7417602338, 27.4748535296, 673.1043701407, 184.6193847808]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048001.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, a helmet, a hat, and a trash bin can.", "boxes_value": [[38.7417602338, 27.4748535296, 673.1043701407, 184.6193847808], [217.4946899304, 68.1937255936, 386.5413818429, 271.8743896576], [647.4073486175, 27.4748535296, 663.7651367377, 47.9220580864], [323.88745120190003, 79.1350097408, 357.0267333906, 102.2799072256], [632.0356445476, 155.3236694528, 673.1043701407, 184.6193847808], [38.7417602338, 149.7624511488, 56.4041747936, 175.0856323072]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048001_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, a helmet, a hat, and a trash bin can.", "boxes_value": [[38.7417602338, 27.4748535296, 673.1043701407, 184.6193847808], [217.4946899304, 68.1937255936, 386.5413818429, 223], [647.4073486175, 27.4748535296, 663.7651367377, 47.9220580864], [323.88745120190003, 79.1350097408, 357.0267333906, 102.2799072256], [632.0356445476, 155.3236694528, 673.1043701407, 184.6193847808], [38.7417602338, 149.7624511488, 56.4041747936, 175.0856323072]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048002.jpg", "text": "In the image , could you provide a description for the coordinates ? Include the coordinates for each object you identify.", "boxes_value": [[133.88671873500002, 141.5676879872, 789.986938455, 512.1099853312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048002_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Include the coordinates for each object you identify.", "boxes_value": [[133.88671873500002, 93.5676879872, 789.986938455, 464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048002.jpg", "text": "In the image , could you provide a description for the coordinates ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, two helmets, two boots, and two horses.", "boxes_value": [[133.88671873500002, 141.5676879872, 789.986938455, 512.1099853312], [133.2018432525, 142.0151367168, 221.7678222375, 282.710998528], [653.0291747924999, 172.7621459968, 750.4924316175001, 332.998535168], [709.4508056625, 172.095642112, 742.2125243775, 195.9223632896], [169.62683108250002, 141.5676879872, 199.4101562475, 166.8835449344], [651.3731689575001, 278.571289088, 683.3903808599999, 330.6922607616], [133.88671873500002, 236.8745727488, 156.96887208750002, 266.6579589632], [582.0302734125, 225.632690432, 789.986938455, 512.1099853312], [111.83843995499998, 173.1118163968, 219.5277099675, 430.6579589632]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5, 6], [7, 8]]}, {"image_path": "objects365_v1_00048002_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, two helmets, two boots, and two horses.", "boxes_value": [[133.88671873500002, 93.5676879872, 789.986938455, 464], [133.2018432525, 94.01513671679999, 221.7678222375, 234.710998528], [653.0291747924999, 124.7621459968, 750.4924316175001, 284.998535168], [709.4508056625, 124.09564211200001, 742.2125243775, 147.9223632896], [169.62683108250002, 93.5676879872, 199.4101562475, 118.88354493439999], [651.3731689575001, 230.57128908800001, 683.3903808599999, 282.6922607616], [133.88671873500002, 188.8745727488, 156.96887208750002, 218.65795896319997], [582.0302734125, 177.632690432, 789.986938455, 464], [111.83843995499998, 125.11181639680001, 219.5277099675, 382.6579589632]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5, 6], [7, 8]]}, {"image_path": "objects365_v1_00048004.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Remember to mention the objects and their corresponding locations.", "boxes_value": [[337.44885255950004, 73.950561536, 406.8839111611, 402.588806144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048004_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Remember to mention the objects and their corresponding locations.", "boxes_value": [[17.44885255950004, 73.950561536, 86.8839111611, 402.588806144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048004.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, a slippers, a sneakers, and a hat.", "boxes_value": [[337.44885255950004, 73.950561536, 406.8839111611, 402.588806144], [333.24487306689997, 152.1469116416, 414.5561523767, 419.1903076352], [293.8731689354, 74.2592773632, 399.1497802495, 392.6571044864], [339.87329102909996, 376.8528442368, 366.5804443375, 396.2762450944], [364.6380615179, 388.5068969472, 406.8839111611, 402.588806144], [337.44885255950004, 73.950561536, 392.0418701002, 125.7581787136]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048004_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, a slippers, a sneakers, and a hat.", "boxes_value": [[17.44885255950004, 73.950561536, 86.8839111611, 402.588806144], [13.24487306689997, 152.1469116416, 94.55615237670003, 419.1903076352], [0, 74.2592773632, 79.1497802495, 392.6571044864], [19.87329102909996, 376.8528442368, 46.58044433750001, 396.2762450944], [44.638061517899985, 388.5068969472, 86.8839111611, 402.588806144], [17.44885255950004, 73.950561536, 72.04187010020001, 125.7581787136]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048006.jpg", "text": "Regarding the image , what's going on in the section ? Provide the coordinates for all objects that you mention.", "boxes_value": [[806.7940673424, 423.4850463744, 911.9299316784001, 512.0343017472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048006_crop.jpg", "text": "Regarding the image , what's going on in the section ? Provide the coordinates for all objects that you mention.", "boxes_value": [[26.794067342400012, 22.485046374399985, 131.92993167840007, 111]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048006.jpg", "text": "Regarding the image , what's going on in the section ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four people, and a traffic light.", "boxes_value": [[806.7940673424, 423.4850463744, 911.9299316784001, 512.0343017472], [806.7940673424, 477.5150756864, 828.2092285392, 512.0343017472], [869.8360595904, 454.9155273216, 884.5135498320001, 490.3629150208], [879.5288086319999, 452.9769897472, 892.8215332128, 489.5321044992], [903.8988036815999, 449.6538085888, 911.9299316784001, 493.132263168], [817.0270995936, 423.4850463744, 832.1108398704, 456.3258056704]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048006_crop.jpg", "text": "Regarding the image , what's going on in the section ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four people, and a traffic light.", "boxes_value": [[26.794067342400012, 22.485046374399985, 131.92993167840007, 111], [26.794067342400012, 76.5150756864, 48.20922853920001, 111], [89.83605959040005, 53.915527321599996, 104.51354983200008, 89.36291502080002], [99.52880863199994, 51.97698974719998, 112.82153321279998, 88.53210449919999], [123.89880368159993, 48.65380858880002, 131.92993167840007, 92.13226316800001], [37.02709959360004, 22.485046374399985, 52.110839870400014, 55.32580567039997]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048007.jpg", "text": "Help me understand the details within the area in photograph . Include the coordinates for each object you identify.", "boxes_value": [[118.4786377164, 291.896484352, 404.5194092001, 502.9270629888]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048007_crop.jpg", "text": "Help me understand the details within the area in photograph . Include the coordinates for each object you identify.", "boxes_value": [[72.4786377164, 52.896484352000016, 358.5194092001, 263.9270629888]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048007.jpg", "text": "Help me understand the details within the area in photograph . Include the coordinates for each object you identify. For your reference, objects involved in this region include three chairs, a desk, and a pillow.", "boxes_value": [[118.4786377164, 291.896484352, 404.5194092001, 502.9270629888], [56.616210914199996, 309.8565673984, 181.83764646900002, 492.9492797952], [210.77331541150002, 315.843261696, 348.4670410131, 502.9270629888], [224.24334715749998, 291.896484352, 318.0347289917, 448.049133312], [118.4786377164, 323.3265991168, 299.07690432600003, 480.9758910976], [367.1268310475, 317.7799072256, 404.5194092001, 379.0623169024]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048007_crop.jpg", "text": "Help me understand the details within the area in photograph . Include the coordinates for each object you identify. For your reference, objects involved in this region include three chairs, a desk, and a pillow.", "boxes_value": [[72.4786377164, 52.896484352000016, 358.5194092001, 263.9270629888], [10.616210914199996, 70.85656739839999, 135.83764646900002, 253.94927979520003], [164.77331541150002, 76.84326169600001, 302.4670410131, 263.9270629888], [178.24334715749998, 52.896484352000016, 272.0347289917, 209.04913331199998], [72.4786377164, 84.32659911680003, 253.07690432600003, 241.97589109760003], [321.1268310475, 78.77990722560003, 358.5194092001, 140.06231690240003]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048008.jpg", "text": "I request a description of the area in the picture . Provide the coordinates for each element you describe.", "boxes_value": [[176.04656985809999, 477.7975463936, 580.2830810729, 512.0170898432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048008_crop.jpg", "text": "I request a description of the area in the picture . Provide the coordinates for each element you describe.", "boxes_value": [[102.04656985809999, 8.797546393599987, 506.2830810729, 43]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048008.jpg", "text": "I request a description of the area in the picture . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two cabinets, a person, a marker, and a pen.", "boxes_value": [[176.04656985809999, 477.7975463936, 580.2830810729, 512.0170898432], [475.79553225089995, 479.7209472512, 515.6907958855, 511.3838500864], [517.5905761628, 485.4202881024, 580.2830810729, 512.0170898432], [166.0699463072, 136.611389184, 485.7382812556999, 511.98962401280005], [148.73736569759998, 479.9454956032, 203.969421379, 498.0493164032], [176.04656985809999, 477.7975463936, 246.92773436230001, 492.2192382976]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048008_crop.jpg", "text": "I request a description of the area in the picture . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two cabinets, a person, a marker, and a pen.", "boxes_value": [[102.04656985809999, 8.797546393599987, 506.2830810729, 43], [401.79553225089995, 10.720947251200016, 441.69079588550005, 42.3838500864], [443.59057616279995, 16.42028810239998, 506.2830810729, 43], [92.06994630720001, 0, 411.7382812556999, 42.98962401280005], [74.73736569759998, 10.945495603200015, 129.969421379, 29.04931640320001], [102.04656985809999, 8.797546393599987, 172.92773436230001, 23.219238297599986]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048009.jpg", "text": "What can you share about the area in the presented image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[180.9462279945, 200.017639168, 259.6737060607, 282.5682983424]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048009_crop.jpg", "text": "What can you share about the area in the presented image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[19.946227994499992, 21.017639167999988, 98.6737060607, 103.5682983424]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048009.jpg", "text": "What can you share about the area in the presented image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a desk, a chair, two people, and a hat.", "boxes_value": [[180.9462279945, 200.017639168, 259.6737060607, 282.5682983424], [0, 242.6823119872, 445.4731445034, 510.83691407360004], [180.9462279945, 245.3408813568, 215.3099975691, 282.5682983424], [209.1220702967, 200.017639168, 259.6737060607, 271.8292236288], [193.2951659874, 114.032653824, 265.5792236245, 248.9156494336], [212.83367916549997, 200.0827636736, 249.6641235252, 232.3962402304]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048009_crop.jpg", "text": "What can you share about the area in the presented image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a desk, a chair, two people, and a hat.", "boxes_value": [[19.946227994499992, 21.017639167999988, 98.6737060607, 103.5682983424], [0, 63.682311987199995, 118, 124], [19.946227994499992, 66.3408813568, 54.309997569100005, 103.5682983424], [48.1220702967, 21.017639167999988, 98.6737060607, 92.82922362879998], [32.2951659874, 0, 104.5792236245, 69.91564943360001], [51.83367916549997, 21.0827636736, 88.66412352520001, 53.3962402304]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048010.jpg", "text": "Please enlighten me about the area in the photograph . Provide the coordinates for all objects that you mention.", "boxes_value": [[145.24737550769999, 272.7772827136, 654.078857443, 404.316406272]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048010_crop.jpg", "text": "Please enlighten me about the area in the photograph . Provide the coordinates for all objects that you mention.", "boxes_value": [[127.24737550769999, 33.777282713600016, 636.078857443, 165.316406272]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048010.jpg", "text": "Please enlighten me about the area in the photograph . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four cabinets, a stool, and a handbag.", "boxes_value": [[145.24737550769999, 272.7772827136, 654.078857443, 404.316406272], [572.1256103323, 272.7772827136, 654.078857443, 404.316406272], [530.2078857555, 291.1430664192, 573.689208994, 380.5534057472], [456.8854980514, 301.8401489408, 495.9383544719, 327.6920776192], [145.24737550769999, 297.7377319424, 198.8356323452, 330.9439087104], [119.5317993333, 323.2931518464, 171.81604006, 366.9234619392], [418.783569349, 320.9131469824, 443.2841797039, 335.8753661952]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048010_crop.jpg", "text": "Please enlighten me about the area in the photograph . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four cabinets, a stool, and a handbag.", "boxes_value": [[127.24737550769999, 33.777282713600016, 636.078857443, 165.316406272], [554.1256103323, 33.777282713600016, 636.078857443, 165.316406272], [512.2078857555, 52.14306641920001, 555.689208994, 141.55340574719997], [438.8854980514, 62.84014894080002, 477.9383544719, 88.6920776192], [127.24737550769999, 58.73773194239999, 180.8356323452, 91.94390871040002], [101.5317993333, 84.29315184640001, 153.81604006, 127.92346193920002], [400.783569349, 81.91314698240001, 425.2841797039, 96.8753661952]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048012.jpg", "text": "What can you tell me about the area within the image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[337.4832153088, 113.4212646764, 498.7810058752, 492.79846190190005]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048012_crop.jpg", "text": "What can you tell me about the area within the image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[40.48321530880003, 95.4212646764, 201.78100587519998, 474.79846190190005]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048012.jpg", "text": "What can you tell me about the area within the image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a lamp, a person, two high heels, and a hat.", "boxes_value": [[337.4832153088, 113.4212646764, 498.7810058752, 492.79846190190005], [409.6703491072, 113.4212646764, 436.6262206976, 184.3359374947], [337.4832153088, 152.10369874059998, 498.7810058752, 492.79846190190005], [381.3453369344, 463.6483154119, 414.1993408, 492.07043454120003], [405.9342041088, 460.1971435581, 448.7064208896, 493.516845676], [353.0863647232, 158.4103393755, 421.172790528, 197.12170407609997]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048012_crop.jpg", "text": "What can you tell me about the area within the image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a lamp, a person, two high heels, and a hat.", "boxes_value": [[40.48321530880003, 95.4212646764, 201.78100587519998, 474.79846190190005], [112.67034910720002, 95.4212646764, 139.6262206976, 166.3359374947], [40.48321530880003, 134.10369874059998, 201.78100587519998, 474.79846190190005], [84.3453369344, 445.6483154119, 117.19934080000002, 474.07043454120003], [108.93420410879997, 442.1971435581, 151.7064208896, 475.516845676], [56.086364723200006, 140.4103393755, 124.17279052800001, 179.12170407609997]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048016.jpg", "text": "Within the input image , what can be found in the region defined by ? Please point out the objects and their coordinates.", "boxes_value": [[343.5632323863, 114.9938354688, 409.49145510780005, 336.36163328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048016_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Please point out the objects and their coordinates.", "boxes_value": [[16.5632323863, 55.9938354688, 82.49145510780005, 277.36163328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048016.jpg", "text": "Within the input image , what can be found in the region defined by ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a bench, a flower, a vase, a fan, and a book.", "boxes_value": [[343.5632323863, 114.9938354688, 409.49145510780005, 336.36163328], [343.5632323863, 218.640563968, 382.8059081925, 245.494323712], [375.0295410522, 167.8394775552, 414.64685061570003, 224.3272705024], [393.8538818376, 223.0175171072, 409.49145510780005, 243.4299316224], [346.68603512010003, 114.9938354688, 382.8245849736, 145.5725707776], [354.01013180999996, 323.667236352, 396.1185302688, 336.36163328]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048016_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a bench, a flower, a vase, a fan, and a book.", "boxes_value": [[16.5632323863, 55.9938354688, 82.49145510780005, 277.36163328], [16.5632323863, 159.640563968, 55.8059081925, 186.494323712], [48.02954105219999, 108.8394775552, 87.64685061570003, 165.3272705024], [66.8538818376, 164.0175171072, 82.49145510780005, 184.4299316224], [19.68603512010003, 55.9938354688, 55.8245849736, 86.57257077759999], [27.01013180999996, 264.667236352, 69.11853026879999, 277.36163328]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048019.jpg", "text": "In the displayed image , help me understand the region defined by . Provide the coordinates for all objects that you mention.", "boxes_value": [[527.4206543216, 250.1915283456, 687.1586913759, 353.3749389824]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048019_crop.jpg", "text": "In the displayed image , help me understand the region defined by . Provide the coordinates for all objects that you mention.", "boxes_value": [[40.420654321599955, 26.191528345600005, 200.15869137590005, 129.3749389824]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048019.jpg", "text": "In the displayed image , help me understand the region defined by . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a desk, four people, a handbag, and a backpack.", "boxes_value": [[527.4206543216, 250.1915283456, 687.1586913759, 353.3749389824], [525.219604527, 301.931335424, 643.515136756, 357.7425537024], [527.4206543216, 253.2640991232, 561.6381835977, 353.3749389824], [565.0600585733, 250.1915283456, 598.2440185517, 299.129638656], [591.5401611586, 268.9622802944, 629.2493896105999, 305.1630859264], [662.8941650721, 242.968200704, 691.2268066519999, 379.9998169088], [542.9272461019, 321.1588745216, 566.5286865449999, 349.6305542144], [662.433349589, 275.454345728, 687.1586913759, 303.9260253696]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00048019_crop.jpg", "text": "In the displayed image , help me understand the region defined by . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a desk, four people, a handbag, and a backpack.", "boxes_value": [[40.420654321599955, 26.191528345600005, 200.15869137590005, 129.3749389824], [38.219604527, 77.931335424, 156.51513675599995, 133.74255370240002], [40.420654321599955, 29.264099123199998, 74.63818359770005, 129.3749389824], [78.06005857330001, 26.191528345600005, 111.24401855170004, 75.129638656], [104.54016115859997, 44.96228029439999, 142.2493896105999, 81.16308592640002], [175.8941650721, 18.968200703999997, 204.22680665199994, 155], [55.92724610189998, 97.15887452160001, 79.52868654499991, 125.63055421439998], [175.43334958900004, 51.45434572800002, 200.15869137590005, 79.92602536959998]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00048020.jpg", "text": "Can you give me a description of the region in image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[0.8594360198, 283.1813354496, 424.71740721140003, 458.9057006592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048020_crop.jpg", "text": "Can you give me a description of the region in image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[0.8594360198, 44.1813354496, 424.71740721140003, 219.90570065920002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048020.jpg", "text": "Can you give me a description of the region in image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, a tie, a pen, two wine glasses, and a microphone.", "boxes_value": [[0.8594360198, 283.1813354496, 424.71740721140003, 458.9057006592], [47.611389186, 90.1865234432, 389.82897946139997, 447.6580200448001], [190.8652953796, 283.1813354496, 230.65802004740002, 363.4300537344], [334.86230472, 435.6921997312, 424.71740721140003, 444.298034688], [139.3604736496, 366.164489728, 226.20581053720002, 452.0115966976], [0.8594360198, 350.6699218944, 46.207214320599995, 458.9057006592], [148.0339355486, 244.8464965632, 189.79895021239997, 418.588806144]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048020_crop.jpg", "text": "Can you give me a description of the region in image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, a tie, a pen, two wine glasses, and a microphone.", "boxes_value": [[0.8594360198, 44.1813354496, 424.71740721140003, 219.90570065920002], [47.611389186, 0, 389.82897946139997, 208.65802004480008], [190.8652953796, 44.1813354496, 230.65802004740002, 124.43005373440002], [334.86230472, 196.6921997312, 424.71740721140003, 205.29803468799997], [139.3604736496, 127.16448972799998, 226.20581053720002, 213.0115966976], [0.8594360198, 111.66992189439998, 46.207214320599995, 219.90570065920002], [148.0339355486, 5.846496563199992, 189.79895021239997, 179.588806144]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048022.jpg", "text": "Kindly share your observations about the rectangular region within . Remember to mention the objects and their corresponding locations.", "boxes_value": [[470.734497095, 81.1475830272, 565.145629919, 408.9987182592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048022_crop.jpg", "text": "Kindly share your observations about the rectangular region within . Remember to mention the objects and their corresponding locations.", "boxes_value": [[23.734497094999995, 81.1475830272, 118.14562991900004, 408.9987182592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048022.jpg", "text": "Kindly share your observations about the rectangular region within . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a towel, a knife, a spoon, a wine glass, a bottle, and a fork.", "boxes_value": [[470.734497095, 81.1475830272, 565.145629919, 408.9987182592], [445.35070802300004, 118.5044555776, 560.2344970940001, 203.1557006848], [493.268676798, 293.7987060736, 546.43798826, 408.9987182592], [532.653320289, 303.6448974848, 583.853271495, 396.1987304448], [493.268676798, 163.829528832, 565.145629919, 309.5525512704], [501.755493198, 81.1475830272, 558.51135252, 219.3151855616], [470.734497095, 132.2058715648, 503.202270514, 182.4223022592]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048022_crop.jpg", "text": "Kindly share your observations about the rectangular region within . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a towel, a knife, a spoon, a wine glass, a bottle, and a fork.", "boxes_value": [[23.734497094999995, 81.1475830272, 118.14562991900004, 408.9987182592], [0, 118.5044555776, 113.23449709400006, 203.1557006848], [46.268676798, 293.7987060736, 99.43798826, 408.9987182592], [85.65332028900002, 303.6448974848, 136.85327149499994, 396.1987304448], [46.268676798, 163.829528832, 118.14562991900004, 309.5525512704], [54.75549319800001, 81.1475830272, 111.51135251999995, 219.3151855616], [23.734497094999995, 132.2058715648, 56.20227051400002, 182.4223022592]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048023.jpg", "text": "Analyze and describe the region in the included photo . Provide the coordinates for each element you describe.", "boxes_value": [[398.6390380571, 202.1903686656, 656.4879150311, 279.7586059776]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048023_crop.jpg", "text": "Analyze and describe the region in the included photo . Provide the coordinates for each element you describe.", "boxes_value": [[64.63903805709998, 20.190368665600005, 322.4879150311, 97.75860597759998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048023.jpg", "text": "Analyze and describe the region in the included photo . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, two trucks, a suv, and a car.", "boxes_value": [[398.6390380571, 202.1903686656, 656.4879150311, 279.7586059776], [398.6390380571, 232.1054687744, 417.01953122369997, 279.7586059776], [449.03930660979995, 202.1903686656, 510.4299316282, 276.058776832], [435.8438720769, 220.1347045888, 452.5795898462, 241.6519775232], [588.45727541, 203.9967041024, 620.3347168061999, 236.870361344], [632.7978515795, 217.4301147648, 656.4879150311, 233.2234497024]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5]]}, {"image_path": "objects365_v1_00048023_crop.jpg", "text": "Analyze and describe the region in the included photo . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, two trucks, a suv, and a car.", "boxes_value": [[64.63903805709998, 20.190368665600005, 322.4879150311, 97.75860597759998], [64.63903805709998, 50.10546877440001, 83.01953122369997, 97.75860597759998], [115.03930660979995, 20.190368665600005, 176.4299316282, 94.05877683199998], [101.84387207690003, 38.13470458879999, 118.57958984620001, 59.6519775232], [254.45727540999997, 21.996704102400003, 286.33471680619994, 54.870361344], [298.79785157950005, 35.43011476480001, 322.4879150311, 51.22344970239999]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5]]}, {"image_path": "objects365_v1_00048024.jpg", "text": "Please share details about the rectangular region within the image . Provide the coordinates for each element you describe.", "boxes_value": [[152.9397582945, 216.0972290048, 529.2789306346, 281.9400024576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048024_crop.jpg", "text": "Please share details about the rectangular region within the image . Provide the coordinates for each element you describe.", "boxes_value": [[94.9397582945, 17.0972290048, 471.2789306346, 82.94000245759997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048024.jpg", "text": "Please share details about the rectangular region within the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include three people, a glasses, and a canned.", "boxes_value": [[152.9397582945, 216.0972290048, 529.2789306346, 281.9400024576], [38.8323363864, 142.9528198144, 386.5605468522, 511.85534668799994], [243.43994137509998, 215.9727783424, 548.8157959028, 511.20233154560003], [342.3107910475, 174.4885254144, 697.6925048600999, 511.2023315456], [152.9397582945, 216.0972290048, 226.46350099460003, 230.7396850688], [496.55493165390004, 247.1707763712, 529.2789306346, 281.9400024576]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048024_crop.jpg", "text": "Please share details about the rectangular region within the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include three people, a glasses, and a canned.", "boxes_value": [[94.9397582945, 17.0972290048, 471.2789306346, 82.94000245759997], [0, 0, 328.5605468522, 99], [185.43994137509998, 16.972778342400005, 490.8157959028, 99], [284.3107910475, 0, 565, 99], [94.9397582945, 17.0972290048, 168.46350099460003, 31.739685068799986], [438.55493165390004, 48.17077637119999, 471.2789306346, 82.94000245759997]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048025.jpg", "text": "Describe what's happening within the coordinates of the given image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[283.6866455366, 257.0830688256, 399.234252955, 367.8356323328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048025_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[29.686645536599997, 28.08306882559998, 145.234252955, 138.8356323328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048025.jpg", "text": "Describe what's happening within the coordinates of the given image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four people, and a hat.", "boxes_value": [[283.6866455366, 257.0830688256, 399.234252955, 367.8356323328], [288.647949204, 298.3771362304, 403.9986572161, 413.7278442496], [283.6866455366, 296.5166625792, 347.87377929030004, 367.8356323328], [299.6461792254, 288.5495605248, 344.6700439624, 338.42724608], [341.9920654376, 257.0830688256, 399.234252955, 338.5946655232], [341.7341308291, 303.0599975424, 380.9600830131, 327.0206908928]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048025_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four people, and a hat.", "boxes_value": [[29.686645536599997, 28.08306882559998, 145.234252955, 138.8356323328], [34.647949203999985, 69.37713623040003, 149.9986572161, 166], [29.686645536599997, 67.51666257919999, 93.87377929030004, 138.8356323328], [45.646179225399976, 59.54956052479997, 90.67004396239997, 109.42724607999997], [87.99206543759999, 28.08306882559998, 145.234252955, 109.59466552319998], [87.73413082910002, 74.05999754240003, 126.96008301310002, 98.02069089280002]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048026.jpg", "text": "What does the selected region in the image encompass? Please point out the objects and their coordinates.", "boxes_value": [[110.1749267667, 369.234619136, 742.6676025588, 511.5115966976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048026_crop.jpg", "text": "What does the selected region in the image encompass? Please point out the objects and their coordinates.", "boxes_value": [[110.1749267667, 36.23461913599999, 742.6676025588, 178.5115966976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048026.jpg", "text": "What does the selected region in the image encompass? Please point out the objects and their coordinates. For your reference, objects involved in this region include a watch, a handbag, a sandals, and two bottles.", "boxes_value": [[110.1749267667, 369.234619136, 742.6676025588, 511.5115966976], [444.99841309500005, 369.234619136, 479.27441407019995, 387.4964599808], [110.1749267667, 459.9948730368, 188.1224975523, 511.5115966976], [719.9572753605, 444.505249024, 742.6676025588, 460.615051264], [683.512451154, 401.573486336, 703.0069580150999, 429.760864256], [214.90673825009998, 451.6173095936, 243.1460571018, 491.5925293056]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048026_crop.jpg", "text": "What does the selected region in the image encompass? Please point out the objects and their coordinates. For your reference, objects involved in this region include a watch, a handbag, a sandals, and two bottles.", "boxes_value": [[110.1749267667, 36.23461913599999, 742.6676025588, 178.5115966976], [444.99841309500005, 36.23461913599999, 479.27441407019995, 54.49645998080001], [110.1749267667, 126.99487303680002, 188.1224975523, 178.5115966976], [719.9572753605, 111.50524902400002, 742.6676025588, 127.61505126399999], [683.512451154, 68.57348633599997, 703.0069580150999, 96.76086425599999], [214.90673825009998, 118.6173095936, 243.1460571018, 158.5925293056]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048029.jpg", "text": "Please, can you help me understand what's inside the region in image ? Specify the location of each mentioned object.", "boxes_value": [[110.43164064289999, 121.1397094912, 358.7353515602, 316.5072021504]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048029_crop.jpg", "text": "Please, can you help me understand what's inside the region in image ? Specify the location of each mentioned object.", "boxes_value": [[62.43164064289999, 49.139709491199994, 310.7353515602, 244.50720215040002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048029.jpg", "text": "Please, can you help me understand what's inside the region in image ? Specify the location of each mentioned object. For your reference, objects involved in this region include four pictures, a couch, and a cup.", "boxes_value": [[110.43164064289999, 121.1397094912, 358.7353515602, 316.5072021504], [110.43164064289999, 121.1397094912, 228.4963379124, 213.5999755776], [69.3875732505, 239.8071899648, 281.5092773349, 344.2021484544], [334.3715820383, 287.234313984, 358.7353515602, 316.5072021504], [268.5253905929, 144.132690432, 306.6301269723, 189.1655273472], [230.9979858082, 145.2873534976, 266.2160644448, 189.7428589056], [233.21984866469998, 192.3038940672, 264.3055420043, 216.8452148224]], "boxes_seq": [[0], [0], [1, 4, 5, 6], [2], [3]]}, {"image_path": "objects365_v1_00048029_crop.jpg", "text": "Please, can you help me understand what's inside the region in image ? Specify the location of each mentioned object. For your reference, objects involved in this region include four pictures, a couch, and a cup.", "boxes_value": [[62.43164064289999, 49.139709491199994, 310.7353515602, 244.50720215040002], [62.43164064289999, 49.139709491199994, 180.4963379124, 141.5999755776], [21.387573250499997, 167.8071899648, 233.5092773349, 272.2021484544], [286.3715820383, 215.23431398399998, 310.7353515602, 244.50720215040002], [220.5253905929, 72.132690432, 258.6301269723, 117.1655273472], [182.9979858082, 73.28735349760001, 218.2160644448, 117.7428589056], [185.21984866469998, 120.30389406719999, 216.30554200429998, 144.8452148224]], "boxes_seq": [[0], [0], [1, 4, 5, 6], [2], [3]]}, {"image_path": "objects365_v1_00048032.jpg", "text": "What details can you provide about the region in the snapshot ? Include the coordinates for each mentioned object.", "boxes_value": [[0, 504.1765137046, 498.2655639552, 680.0000000133999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048032_crop.jpg", "text": "What details can you provide about the region in the snapshot ? Include the coordinates for each mentioned object.", "boxes_value": [[0, 44.176513704599984, 498.2655639552, 220.00000001339993]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048032.jpg", "text": "What details can you provide about the region in the snapshot ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a couch, a vase, a person, and two high heels.", "boxes_value": [[0, 504.1765137046, 498.2655639552, 680.0000000133999], [0, 212.145019537, 512.1610107392, 680.9891357125999], [0, 504.1765137046, 17.3471679488, 584.6624756104], [1.7836914176, 114.7177123708, 499.4112548864, 679.0606689328], [430.2422485504, 551.0207519212, 498.2655639552, 617.277221671], [227.9391479296, 597.8419189644, 312.747436544, 680.0000000133999]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048032_crop.jpg", "text": "What details can you provide about the region in the snapshot ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a couch, a vase, a person, and two high heels.", "boxes_value": [[0, 44.176513704599984, 498.2655639552, 220.00000001339993], [0, 0, 512, 220.98913571259993], [0, 44.176513704599984, 17.3471679488, 124.66247561039995], [1.7836914176, 0, 499.4112548864, 219.0606689328], [430.2422485504, 91.02075192120003, 498.2655639552, 157.277221671], [227.9391479296, 137.84191896439995, 312.747436544, 220.00000001339993]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048035.jpg", "text": "Fill me in about the selected portion within the presented image . Include the coordinates for each mentioned object.", "boxes_value": [[197.1245116939, 426.3704834048, 331.60476307060003, 478.2086757376]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048035_crop.jpg", "text": "Fill me in about the selected portion within the presented image . Include the coordinates for each mentioned object.", "boxes_value": [[34.12451169389999, 13.370483404799984, 168.60476307060003, 65.2086757376]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048035.jpg", "text": "Fill me in about the selected portion within the presented image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a soccer, three people, and a sneakers.", "boxes_value": [[197.1245116939, 426.3704834048, 331.60476307060003, 478.2086757376], [197.1245116939, 426.3704834048, 224.3549805023, 446.3703003136], [246.86999511320002, 262.9999389696, 331.23742675150004, 483.404785152], [213.24987791930002, 244.777038592, 287.46783448400004, 468.46893312], [164.463256834, 265.0183105536, 270.3406372335, 500.6473388544], [299.8602496615, 461.762723072, 331.60476307060003, 478.2086757376]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048035_crop.jpg", "text": "Fill me in about the selected portion within the presented image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a soccer, three people, and a sneakers.", "boxes_value": [[34.12451169389999, 13.370483404799984, 168.60476307060003, 65.2086757376], [34.12451169389999, 13.370483404799984, 61.35498050230001, 33.37030031360001], [83.86999511320002, 0, 168.23742675150004, 70.40478515199999], [50.24987791930002, 0, 124.46783448400004, 55.468933119999974], [1.463256833999992, 0, 107.34063723349999, 78], [136.8602496615, 48.76272307199997, 168.60476307060003, 65.2086757376]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048037.jpg", "text": "Please describe the section of the picture defined by the bbox . Please point out the objects and their coordinates.", "boxes_value": [[610.5609131159999, 131.8308716032, 768.564331073, 384.0068359168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048037_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Please point out the objects and their coordinates.", "boxes_value": [[39.56091311599994, 63.830871603199995, 197.56433107299995, 316.0068359168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048037.jpg", "text": "Please describe the section of the picture defined by the bbox . Please point out the objects and their coordinates. For your reference, objects involved in this region include a chair, a person, a glasses, and two leather shoes.", "boxes_value": [[610.5609131159999, 131.8308716032, 768.564331073, 384.0068359168], [625.701782244, 244.080627456, 769.5119629120001, 389.24102784], [610.5609131159999, 131.8308716032, 768.564331073, 384.0068359168], [692.979736295, 152.3741454848, 726.779663071, 164.880126976], [668.3345946969999, 350.1658324992, 715.767822231, 383.8178100736], [636.926025428, 343.7559204352, 688.846191418, 379.3308716032]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048037_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Please point out the objects and their coordinates. For your reference, objects involved in this region include a chair, a person, a glasses, and two leather shoes.", "boxes_value": [[39.56091311599994, 63.830871603199995, 197.56433107299995, 316.0068359168], [54.701782244000015, 176.080627456, 198.51196291200006, 321.24102784], [39.56091311599994, 63.830871603199995, 197.56433107299995, 316.0068359168], [121.97973629499995, 84.37414548480001, 155.77966307099996, 96.88012697600001], [97.33459469699994, 282.1658324992, 144.76782223099997, 315.8178100736], [65.926025428, 275.7559204352, 117.84619141799999, 311.3308716032]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048039.jpg", "text": "Could you give me a description of the rectangular region found in ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[471.3182372848, 316.8051757568, 680.6280517826, 440.4149170176]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048039_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[53.31823728479998, 31.805175756799997, 262.6280517826, 155.41491701759998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048039.jpg", "text": "Could you give me a description of the rectangular region found in ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, two street lights, and a motorcycle.", "boxes_value": [[471.3182372848, 316.8051757568, 680.6280517826, 440.4149170176], [494.38623048339997, 391.6405029376, 510.0654296908, 422.9989013504], [518.0487060624, 389.0296630784, 533.2825927531001, 416.3941650432], [569.0198974569, 316.8051757568, 593.0551758138, 398.587341312], [471.3182372848, 398.587341312, 525.9437255838, 440.4149170176], [661.8992919983999, 353.2467040768, 680.6280517826, 393.914855936]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00048039_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, two street lights, and a motorcycle.", "boxes_value": [[53.31823728479998, 31.805175756799997, 262.6280517826, 155.41491701759998], [76.38623048339997, 106.64050293759999, 92.06542969079999, 137.9989013504], [100.04870606240002, 104.0296630784, 115.28259275310006, 131.39416504320002], [151.01989745690003, 31.805175756799997, 175.05517581380002, 113.58734131199998], [53.31823728479998, 113.58734131199998, 107.94372558379996, 155.41491701759998], [243.89929199839992, 68.24670407679997, 262.6280517826, 108.91485593599998]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00048040.jpg", "text": "Please interpret and describe the area inside the given picture . Please point out the objects and their coordinates.", "boxes_value": [[8.5605468788, 0, 645.310790986, 134.7142944256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048040_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Please point out the objects and their coordinates.", "boxes_value": [[8.5605468788, 0, 645.310790986, 134.7142944256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048040.jpg", "text": "Please interpret and describe the area inside the given picture . Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, an umbrella, and three buses.", "boxes_value": [[8.5605468788, 0, 645.310790986, 134.7142944256], [602.4720458951, 116.9879150592, 645.310790986, 134.7142944256], [478.87976074659997, 49.0368041984, 683.2255859526999, 135.6990966784], [8.5605468788, 0, 289.53216551180003, 132.6342773248], [379.06921383680003, 13.6920165888, 497.5092773524, 91.1459350528], [306.6233520654, 11.8949584896, 352.56494143170005, 96.578308096]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048040_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, an umbrella, and three buses.", "boxes_value": [[8.5605468788, 0, 645.310790986, 134.7142944256], [602.4720458951, 116.9879150592, 645.310790986, 134.7142944256], [478.87976074659997, 49.0368041984, 683, 135.6990966784], [8.5605468788, 0, 289.53216551180003, 132.6342773248], [379.06921383680003, 13.6920165888, 497.5092773524, 91.1459350528], [306.6233520654, 11.8949584896, 352.56494143170005, 96.578308096]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048042.jpg", "text": "Tell me what you see within the designated area in the picture . Include the coordinates for each object you identify.", "boxes_value": [[230.1312866, 130.0552978885, 335.93853759999996, 704.7711181735]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048042_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Include the coordinates for each object you identify.", "boxes_value": [[27.13128660000001, 130.0552978885, 132.93853759999996, 704.7711181735]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048042.jpg", "text": "Tell me what you see within the designated area in the picture . Include the coordinates for each object you identify. For your reference, objects involved in this region include a necklace, a person, a glasses, and two sandals.", "boxes_value": [[230.1312866, 130.0552978885, 335.93853759999996, 704.7711181735], [285.86724855, 206.6428222365, 321.90686035, 278.7220458885], [213.1468506, 88.29254151800001, 396.73724365000004, 705.934448221], [280.1063843, 130.0552978885, 332.85217285, 150.22283939099998], [230.1312866, 625.757080073, 279.65509035, 698.3266601435], [302.3156128, 636.684570308, 335.93853759999996, 704.7711181735]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048042_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Include the coordinates for each object you identify. For your reference, objects involved in this region include a necklace, a person, a glasses, and two sandals.", "boxes_value": [[27.13128660000001, 130.0552978885, 132.93853759999996, 704.7711181735], [82.86724855, 206.6428222365, 118.90686034999999, 278.7220458885], [10.146850599999993, 88.29254151800001, 159, 705.934448221], [77.1063843, 130.0552978885, 129.85217285, 150.22283939099998], [27.13128660000001, 625.757080073, 76.65509035000002, 698.3266601435], [99.3156128, 636.684570308, 132.93853759999996, 704.7711181735]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048043.jpg", "text": "Please tell me more about the rectangular section in the photo . Include the coordinates for each mentioned object.", "boxes_value": [[360.01489260889997, 246.015747072, 676.3924560369, 288.5210571264]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048043_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Include the coordinates for each mentioned object.", "boxes_value": [[80.01489260889997, 11.01574707200001, 396.39245603689994, 53.521057126400024]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048043.jpg", "text": "Please tell me more about the rectangular section in the photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a kettle, a cup, a bowl, a gas stove, and a plate.", "boxes_value": [[360.01489260889997, 246.015747072, 676.3924560369, 288.5210571264], [360.01489260889997, 246.015747072, 388.4632568287, 288.5210571264], [404.86291503449996, 271.1173095936, 424.9442138449, 288.5210571264], [567.9221191382, 253.1456909312, 624.3438720979001, 280.022888192], [419.98779294450003, 276.0791015424, 514.3837890543, 286.828369152], [622.08154299, 264.3840942592, 676.3924560369, 274.1778564608]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048043_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a kettle, a cup, a bowl, a gas stove, and a plate.", "boxes_value": [[80.01489260889997, 11.01574707200001, 396.39245603689994, 53.521057126400024], [80.01489260889997, 11.01574707200001, 108.46325682870003, 53.521057126400024], [124.86291503449996, 36.1173095936, 144.9442138449, 53.521057126400024], [287.92211913819995, 18.145690931199994, 344.3438720979001, 45.02288819199998], [139.98779294450003, 41.0791015424, 234.3837890543, 51.82836915199999], [342.08154299, 29.384094259200026, 396.39245603689994, 39.1778564608]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048046.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Give coordinates for the items you reference.", "boxes_value": [[608.6500244118, 152.1985473536, 676.9747314648, 256.9356689408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048046_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Give coordinates for the items you reference.", "boxes_value": [[17.65002441180002, 26.19854735359999, 85.97473146480002, 130.93566894079999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048046.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Give coordinates for the items you reference. For your reference, objects involved in this region include a desk, two lamps, a cabinet, and a mirror.", "boxes_value": [[608.6500244118, 152.1985473536, 676.9747314648, 256.9356689408], [608.6500244118, 207.5655517696, 676.9747314648, 256.9356689408], [589.9125976802, 155.7015991296, 656.1518554856, 237.8636474368], [622.713745116, 152.1985473536, 673.9854736084, 223.2145996288], [645.0351562403999, 175.4910888448, 676.5690917986, 219.2526244864], [380.2845459328, 15.3224487424, 684.7873535178, 254.00482176]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048046_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Give coordinates for the items you reference. For your reference, objects involved in this region include a desk, two lamps, a cabinet, and a mirror.", "boxes_value": [[17.65002441180002, 26.19854735359999, 85.97473146480002, 130.93566894079999], [17.65002441180002, 81.5655517696, 85.97473146480002, 130.93566894079999], [0, 29.701599129599998, 65.15185548559998, 111.8636474368], [31.71374511600004, 26.19854735359999, 82.98547360839996, 97.21459962879999], [54.03515624039994, 49.491088844800004, 85.56909179859997, 93.25262448640001], [0, 0, 93.78735351780006, 128.00482176]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048047.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Provide the coordinates for all objects that you mention.", "boxes_value": [[0.2639160493, 161.9082031104, 343.3371581929, 512.3686523392]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048047_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Provide the coordinates for all objects that you mention.", "boxes_value": [[0.2639160493, 87.9082031104, 343.3371581929, 438]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048047.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include five people.", "boxes_value": [[0.2639160493, 161.9082031104, 343.3371581929, 512.3686523392], [303.2601928582, 288.1080932864, 393.6466064598, 511.5159301632], [139.5414428379, 312.8364257792, 343.3371581929, 511.5159301632], [79.8523559808, 290.6661987328, 199.2305908235, 512.3686523392], [71.6820678573, 161.9082031104, 136.2089233522, 389.3185424896], [0.2639160493, 279.685485824, 35.346496611599996, 512.1075439616]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048047_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include five people.", "boxes_value": [[0.2639160493, 87.9082031104, 343.3371581929, 438], [303.2601928582, 214.1080932864, 393.6466064598, 437.5159301632], [139.5414428379, 238.8364257792, 343.3371581929, 437.5159301632], [79.8523559808, 216.6661987328, 199.2305908235, 438], [71.6820678573, 87.9082031104, 136.2089233522, 315.3185424896], [0.2639160493, 205.685485824, 35.346496611599996, 438]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048049.jpg", "text": "Regarding the image , what's going on in the section ? Specify the location of each mentioned object.", "boxes_value": [[0, 373.4638061568, 428.4179687833, 431.4739379712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048049_crop.jpg", "text": "Regarding the image , what's going on in the section ? Specify the location of each mentioned object.", "boxes_value": [[0, 15.46380615679999, 428.4179687833, 73.47393797119997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048049.jpg", "text": "Regarding the image , what's going on in the section ? Specify the location of each mentioned object. For your reference, objects involved in this region include five people, and a car.", "boxes_value": [[0, 373.4638061568, 428.4179687833, 431.4739379712], [410.8929443179, 378.1370239488, 428.4179687833, 431.4739379712], [380.4022216584, 378.3909912064, 403.88745115610004, 435.7916870144], [165.9826049502, 375.9151001088, 177.57073974940002, 407.3367920128], [142.8063354201, 375.0237426688, 152.83453366700002, 405.1082763776], [97.56805417090001, 373.4638061568, 109.3790283066, 405.999694848], [0, 376.393981952, 35.9256591654, 401.6406249984]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00048049_crop.jpg", "text": "Regarding the image , what's going on in the section ? Specify the location of each mentioned object. For your reference, objects involved in this region include five people, and a car.", "boxes_value": [[0, 15.46380615679999, 428.4179687833, 73.47393797119997], [410.8929443179, 20.137023948799992, 428.4179687833, 73.47393797119997], [380.4022216584, 20.390991206399974, 403.88745115610004, 77.7916870144], [165.9826049502, 17.915100108800004, 177.57073974940002, 49.336792012800004], [142.8063354201, 17.023742668800026, 152.83453366700002, 47.10827637760002], [97.56805417090001, 15.46380615679999, 109.3790283066, 47.99969484799999], [0, 18.39398195199999, 35.9256591654, 43.64062499840003]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00048051.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Specify the location of each mentioned object.", "boxes_value": [[549.3045654243, 253.9496459776, 770.6950683435, 353.4136963072]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048051_crop.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Specify the location of each mentioned object.", "boxes_value": [[56.304565424299994, 24.9496459776, 277.6950683435, 124.41369630719998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048051.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Specify the location of each mentioned object. For your reference, objects involved in this region include a flower, three people, a pumpkin, and a bakset.", "boxes_value": [[549.3045654243, 253.9496459776, 770.6950683435, 353.4136963072], [549.3045654243, 312.2458496, 606.6455078004, 353.4136963072], [752.7187499955, 253.9496459776, 770.6950683435, 335.7891845632], [661.9440917622, 232.502624512, 730.369018572, 453.8082275328], [554.6170654434001, 257.389770496, 594.5058593727, 319.4389648384], [573.0223388934, 344.9268798976, 599.3144531439, 361.5785522688], [602.0505370809001, 314.3555297792, 646.0451659913999, 361.1885376]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048051_crop.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Specify the location of each mentioned object. For your reference, objects involved in this region include a flower, three people, a pumpkin, and a bakset.", "boxes_value": [[56.304565424299994, 24.9496459776, 277.6950683435, 124.41369630719998], [56.304565424299994, 83.24584959999999, 113.64550780039997, 124.41369630719998], [259.71874999550005, 24.9496459776, 277.6950683435, 106.78918456320002], [168.94409176219995, 3.5026245120000112, 237.36901857199996, 149], [61.617065443400065, 28.389770495999983, 101.5058593727, 90.43896483840001], [80.02233889340005, 115.92687989759997, 106.31445314389998, 132.57855226880002], [109.05053708090009, 85.35552977920003, 153.04516599139993, 132.18853760000002]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048053.jpg", "text": "Could you give me a description of the rectangular region found in ? Include the coordinates for each mentioned object.", "boxes_value": [[188.6954345972, 330.1821899264, 311.6124877779, 379.8851928576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048053_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Include the coordinates for each mentioned object.", "boxes_value": [[31.695434597200006, 13.182189926399985, 154.61248777790001, 62.885192857599975]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048053.jpg", "text": "Could you give me a description of the rectangular region found in ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two slippers, and three knives.", "boxes_value": [[188.6954345972, 330.1821899264, 311.6124877779, 379.8851928576], [287.93353270660003, 366.6005249024, 311.6124877779, 379.8851928576], [270.5260009747, 330.1821899264, 293.20159913059996, 352.17059328], [188.12164308709998, 366.7161865216, 231.7266235439, 389.9530639872], [188.6954345972, 357.823120128, 223.98101805870002, 373.601196288], [177.2203979729, 348.0693359616, 223.4072265486, 370.1586914304]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048053_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two slippers, and three knives.", "boxes_value": [[31.695434597200006, 13.182189926399985, 154.61248777790001, 62.885192857599975], [130.93353270660003, 49.600524902400025, 154.61248777790001, 62.885192857599975], [113.52600097470003, 13.182189926399985, 136.20159913059996, 35.17059327999999], [31.121643087099983, 49.71618652159998, 74.72662354389999, 72.95306398719998], [31.695434597200006, 40.82312012800003, 66.98101805870002, 56.60119628799998], [20.220397972900003, 31.069335961600018, 66.4072265486, 53.15869143039998]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048054.jpg", "text": "For the image , can you assess and describe what's happening at ? Please mention the objects and their locations.", "boxes_value": [[504.1007079795, 185.56445312, 614.8294677724999, 269.7924804608]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048054_crop.jpg", "text": "For the image , can you assess and describe what's happening at ? Please mention the objects and their locations.", "boxes_value": [[28.100707979499987, 21.564453119999996, 138.82946777249992, 105.7924804608]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048054.jpg", "text": "For the image , can you assess and describe what's happening at ? Please mention the objects and their locations. For your reference, objects involved in this region include six people.", "boxes_value": [[504.1007079795, 185.56445312, 614.8294677724999, 269.7924804608], [497.3388672132, 188.6181640704, 516.7518310581, 264.306945792], [504.1007079795, 185.56445312, 530.2755127273, 264.306945792], [528.9667968935, 219.1554565632, 544.6716308667001, 264.52508544], [558.5803222996, 195.6178588672, 588.0441894287, 269.7924804608], [579.3903808806, 201.799072256, 594.2253418192, 269.5864258048], [591.5468750177, 200.3567504896, 614.8294677724999, 269.7924804608]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048054_crop.jpg", "text": "For the image , can you assess and describe what's happening at ? Please mention the objects and their locations. For your reference, objects involved in this region include six people.", "boxes_value": [[28.100707979499987, 21.564453119999996, 138.82946777249992, 105.7924804608], [21.33886721319999, 24.618164070400013, 40.75183105810004, 100.30694579200002], [28.100707979499987, 21.564453119999996, 54.2755127273, 100.30694579200002], [52.96679689350003, 55.155456563200005, 68.67163086670007, 100.52508544], [82.58032229959997, 31.6178588672, 112.04418942869995, 105.7924804608], [103.39038088059999, 37.79907225599999, 118.22534181920003, 105.5864258048], [115.54687501770002, 36.35675048959999, 138.82946777249992, 105.7924804608]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048055.jpg", "text": "Within the input image , what can be found in the region defined by ? Please mention the objects and their locations.", "boxes_value": [[113.5159911936, 278.7255249132, 218.1906127872, 370.6851806584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048055_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Please mention the objects and their locations.", "boxes_value": [[26.5159911936, 23.72552491319999, 131.1906127872, 115.68518065839999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048055.jpg", "text": "Within the input image , what can be found in the region defined by ? Please mention the objects and their locations. For your reference, objects involved in this region include a person, two cars, a bicycle, and a motorcycle.", "boxes_value": [[113.5159911936, 278.7255249132, 218.1906127872, 370.6851806584], [187.7626342912, 278.7255249132, 218.1906127872, 362.8371582304], [162.1828002816, 309.1953735218, 174.3494873088, 342.4197997882], [113.5159911936, 304.9838256597, 170.6058959872, 348.6591797045], [184.6386108416, 309.68518068820003, 217.7907715072, 370.6851806584], [179.6533202944, 314.9204711716, 194.4631958016, 340.69360348280003]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048055_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Please mention the objects and their locations. For your reference, objects involved in this region include a person, two cars, a bicycle, and a motorcycle.", "boxes_value": [[26.5159911936, 23.72552491319999, 131.1906127872, 115.68518065839999], [100.76263429119999, 23.72552491319999, 131.1906127872, 107.83715823040001], [75.18280028160001, 54.19537352179998, 87.34948730880001, 87.41979978820001], [26.5159911936, 49.98382565970002, 83.6058959872, 93.65917970449999], [97.63861084160001, 54.685180688200035, 130.7907715072, 115.68518065839999], [92.6533202944, 59.92047117160001, 107.4631958016, 85.69360348280003]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048056.jpg", "text": "I would like a description of the content within the bbox in . Specify the location of each mentioned object.", "boxes_value": [[277.745971672, 223.9307861504, 351.67248532, 434.8649902592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048056_crop.jpg", "text": "I would like a description of the content within the bbox in . Specify the location of each mentioned object.", "boxes_value": [[18.745971671999996, 52.930786150399996, 92.67248532000002, 263.8649902592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048056.jpg", "text": "I would like a description of the content within the bbox in . Specify the location of each mentioned object. For your reference, objects involved in this region include two desks, three people, and a hat.", "boxes_value": [[277.745971672, 223.9307861504, 351.67248532, 434.8649902592], [277.745971672, 339.8779907072, 351.67248532, 434.8649902592], [279.46520995640003, 306.783020032, 440.2124023336, 421.970825216], [272.4298095844, 223.7921142784, 335.38348386399997, 396.7294921728], [253.88757324320002, 194.7487793152, 301.8170165816, 362.0256347648], [325.6230468452, 226.4901123072, 383.3923340156, 318.5399780352], [295.6165160936, 223.9307861504, 317.0198364476, 254.0900878848]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00048056_crop.jpg", "text": "I would like a description of the content within the bbox in . Specify the location of each mentioned object. For your reference, objects involved in this region include two desks, three people, and a hat.", "boxes_value": [[18.745971671999996, 52.930786150399996, 92.67248532000002, 263.8649902592], [18.745971671999996, 168.8779907072, 92.67248532000002, 263.8649902592], [20.46520995640003, 135.78302003200002, 111, 250.97082521599998], [13.429809584400004, 52.79211427839999, 76.38348386399997, 225.7294921728], [0, 23.74877931520001, 42.8170165816, 191.02563476479997], [66.62304684520001, 55.49011230720001, 111, 147.53997803520002], [36.61651609360001, 52.930786150399996, 58.01983644760003, 83.0900878848]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00048057.jpg", "text": "What can you share about the area in the presented image ? Include the coordinates for each mentioned object.", "boxes_value": [[170.50524899799998, 54.6708374016, 374.1749267472, 199.219299328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048057_crop.jpg", "text": "What can you share about the area in the presented image ? Include the coordinates for each mentioned object.", "boxes_value": [[51.505248997999985, 36.6708374016, 255.1749267472, 181.219299328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048057.jpg", "text": "What can you share about the area in the presented image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a bowl, a bottle, three chairs, and a desk.", "boxes_value": [[170.50524899799998, 54.6708374016, 374.1749267472, 199.219299328], [170.67297364080002, 85.8583984128, 216.6801147736, 108.3270263808], [170.50524899799998, 54.6708374016, 182.184020994, 82.9694824448], [219.90985104, 157.4260253696, 261.0180663792, 199.219299328], [317.199218724, 119.7435302912, 383.6574706832, 188.942260736], [204.15173338600002, 119.7435302912, 268.5545654532, 195.7936401408], [205.97070311080003, 146.058044416, 374.1749267472, 189.2881469952]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00048057_crop.jpg", "text": "What can you share about the area in the presented image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a bowl, a bottle, three chairs, and a desk.", "boxes_value": [[51.505248997999985, 36.6708374016, 255.1749267472, 181.219299328], [51.67297364080002, 67.8583984128, 97.6801147736, 90.3270263808], [51.505248997999985, 36.6708374016, 63.18402099400001, 64.9694824448], [100.90985104, 139.4260253696, 142.01806637919998, 181.219299328], [198.199218724, 101.7435302912, 264.6574706832, 170.942260736], [85.15173338600002, 101.7435302912, 149.55456545319998, 177.7936401408], [86.97070311080003, 128.058044416, 255.1749267472, 171.2881469952]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00048059.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[429.6026611059, 191.877807616, 567.2210693605999, 220.454345728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048059_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[34.60266110589998, 7.877807616000013, 172.22106936059993, 36.45434572799999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048059.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include five chairs.", "boxes_value": [[429.6026611059, 191.877807616, 567.2210693605999, 220.454345728], [429.6026611059, 191.877807616, 457.8031005603, 218.5742797824], [473.5953368824, 192.6298217984, 503.6759032891, 219.7023315456], [511.9479980266, 193.7578735104, 537.8924560385001, 219.7023315456], [533.0043945591, 194.133850112, 555.9407959235, 219.3262939648], [553.684814481, 194.8858642432, 567.2210693605999, 220.454345728]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048059_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include five chairs.", "boxes_value": [[34.60266110589998, 7.877807616000013, 172.22106936059993, 36.45434572799999], [34.60266110589998, 7.877807616000013, 62.80310056029998, 34.5742797824], [78.59533688239998, 8.629821798400002, 108.6759032891, 35.7023315456], [116.94799802659998, 9.757873510400003, 142.8924560385001, 35.7023315456], [138.00439455909998, 10.133850112000005, 160.9407959235, 35.32629396479999], [158.684814481, 10.88586424319999, 172.22106936059993, 36.45434572799999]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048061.jpg", "text": "Please provide insights on the specified area within the graphic . Include the coordinates for each object you identify.", "boxes_value": [[385.96643066710004, 93.6010131968, 516.8016357596, 279.1705322496]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048061_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Include the coordinates for each object you identify.", "boxes_value": [[32.96643066710004, 46.6010131968, 163.8016357596, 232.1705322496]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048061.jpg", "text": "Please provide insights on the specified area within the graphic . Include the coordinates for each object you identify. For your reference, objects involved in this region include two chairs, a desk, a flower, and two lamps.", "boxes_value": [[385.96643066710004, 93.6010131968, 516.8016357596, 279.1705322496], [390.40734864340004, 242.1271972864, 431.6951904034, 279.1705322496], [426.9317626957, 248.160827648, 457.7352294843, 285.9506225664], [328.8053588934, 248.160827648, 461.8635253836, 286.2681884672], [385.96643066710004, 217.674987776, 413.2766113127, 241.8096313344], [386.43579099910005, 93.6010131968, 415.48999022040005, 116.690429696], [481.8698730717, 144.000732416, 516.8016357596, 159.5612182528]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048061_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Include the coordinates for each object you identify. For your reference, objects involved in this region include two chairs, a desk, a flower, and two lamps.", "boxes_value": [[32.96643066710004, 46.6010131968, 163.8016357596, 232.1705322496], [37.40734864340004, 195.1271972864, 78.69519040339998, 232.1705322496], [73.9317626957, 201.160827648, 104.73522948430002, 238.9506225664], [0, 201.160827648, 108.8635253836, 239.2681884672], [32.96643066710004, 170.674987776, 60.27661131270003, 194.8096313344], [33.43579099910005, 46.6010131968, 62.489990220400045, 69.690429696], [128.8698730717, 97.000732416, 163.8016357596, 112.56121825279999]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048065.jpg", "text": "Can you share some insights about the rectangular region in the image ? Specify the location of each mentioned object.", "boxes_value": [[447.1762695469, 242.9980468736, 671.7271728338, 309.6441650176]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048065_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Specify the location of each mentioned object.", "boxes_value": [[56.176269546900016, 16.998046873600003, 280.72717283379995, 83.64416501760002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048065.jpg", "text": "Can you share some insights about the rectangular region in the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include two people, a glasses, a trash bin can, and a chair.", "boxes_value": [[447.1762695469, 242.9980468736, 671.7271728338, 309.6441650176], [504.365356478, 250.678039552, 683.1829834179, 510.36230466559994], [508.9212646758, 203.4109496832, 621.1093749964, 387.9234008576], [658.1744384619, 254.8173828096, 671.7271728338, 309.6441650176], [511.74829101219996, 242.9980468736, 542.9436035066001, 273.0206298624], [447.1762695469, 255.330139136, 491.60131833680003, 274.1849975808]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048065_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include two people, a glasses, a trash bin can, and a chair.", "boxes_value": [[56.176269546900016, 16.998046873600003, 280.72717283379995, 83.64416501760002], [113.36535647800002, 24.678039552, 292, 100], [117.92126467579999, 0, 230.1093749964, 100], [267.17443846189997, 28.81738280959999, 280.72717283379995, 83.64416501760002], [120.74829101219996, 16.998046873600003, 151.94360350660008, 47.02062986240003], [56.176269546900016, 29.330139136000014, 100.60131833680003, 48.18499758079997]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048067.jpg", "text": "Please tell me more about the rectangular section in the photo . Provide the coordinates for each element you describe.", "boxes_value": [[354.85913084159995, 229.9233398272, 398.839843752, 258.6702270464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048067_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Provide the coordinates for each element you describe.", "boxes_value": [[11.85913084159995, 7.92333982720001, 55.83984375199998, 36.67022704639999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048067.jpg", "text": "Please tell me more about the rectangular section in the photo . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a cabinet, a cup, a plate, a desk, and two stools.", "boxes_value": [[354.85913084159995, 229.9233398272, 398.839843752, 258.6702270464], [314.596618644, 174.6987304448, 577.3209228695999, 377.7607421952], [368.7738036864, 229.9233398272, 398.839843752, 252.245117184], [354.85913084159995, 247.960205056, 398.70318602519995, 258.6702270464], [41.119995126, 225.0239868416, 473.9306640516, 486.4853515776], [381.0880126788, 198.4000244224, 559.264038102, 489.2160034304], [310.7733154008, 174.5066528256, 471.8826904512, 446.8906860544]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048067_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a cabinet, a cup, a plate, a desk, and two stools.", "boxes_value": [[11.85913084159995, 7.92333982720001, 55.83984375199998, 36.67022704639999], [0, 0, 66, 43], [25.773803686400015, 7.92333982720001, 55.83984375199998, 30.24511718400001], [11.85913084159995, 25.960205056000007, 55.703186025199955, 36.67022704639999], [0, 3.023986841599992, 66, 43], [38.0880126788, 0, 66, 43], [0, 0, 66, 43]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048070.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Include the coordinates for each mentioned object.", "boxes_value": [[304.16326901040003, 365.5650634752, 371.93127442800005, 389.2305908224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048070_crop.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Include the coordinates for each mentioned object.", "boxes_value": [[17.16326901040003, 6.56506347520002, 84.93127442800005, 30.230590822400018]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048070.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a couch, a desk, and three remotes.", "boxes_value": [[304.16326901040003, 365.5650634752, 371.93127442800005, 389.2305908224], [9.2499389646, 232.8742675968, 681.3652343967, 495.6184692224], [240.21679690230002, 351.8348388864, 412.6403808315, 511.4583740416], [333.005737329, 373.7130126848, 371.93127442800005, 389.2305908224], [325.6414794648, 372.1349487104, 355.36157223239996, 386.863525376], [304.16326901040003, 365.5650634752, 341.65020750179997, 385.6611938304]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048070_crop.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a couch, a desk, and three remotes.", "boxes_value": [[17.16326901040003, 6.56506347520002, 84.93127442800005, 30.230590822400018], [0, 0, 101, 36], [0, 0, 101, 36], [46.005737329, 14.713012684799992, 84.93127442800005, 30.230590822400018], [38.6414794648, 13.13494871040001, 68.36157223239996, 27.863525375999984], [17.16326901040003, 6.56506347520002, 54.65020750179997, 26.66119383040001]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048071.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Please point out the objects and their coordinates.", "boxes_value": [[232.727172864, 52.5930786304, 663.16760256, 512.4425048576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048071_crop.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Please point out the objects and their coordinates.", "boxes_value": [[107.72717286400001, 52.5930786304, 538.16760256, 512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048071.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Please point out the objects and their coordinates. For your reference, objects involved in this region include a chair, four people, a glasses, a plate, a bottle, and an extractor.", "boxes_value": [[232.727172864, 52.5930786304, 663.16760256, 512.4425048576], [215.8333129728, 407.5495605248, 389.35754396159996, 511.7805175808], [183.7122192384, 196.964416512, 306.6951904512, 340.444580096], [232.727172864, 52.5930786304, 663.16760256, 512.4425048576], [461.7607421952, 254.0, 622.1733398783999, 512.4425048576], [570.484741248, 319.0562134016, 754.9592285184001, 511.55133056], [355.3140869376, 145.176879872, 428.7869872896, 180.5270995968], [318.0189209088, 405.676879872, 382.027832064, 419.9847412224], [272.0830078464, 276.9060058624, 299.9458007808, 352.9636230656], [195.49560545280002, 38.8292846592, 515.4295654656, 102.994323712]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6], [7], [8], [9]]}, {"image_path": "objects365_v1_00048071_crop.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Please point out the objects and their coordinates. For your reference, objects involved in this region include a chair, four people, a glasses, a plate, a bottle, and an extractor.", "boxes_value": [[107.72717286400001, 52.5930786304, 538.16760256, 512], [90.8333129728, 407.5495605248, 264.35754396159996, 511.7805175808], [58.712219238399996, 196.964416512, 181.6951904512, 340.444580096], [107.72717286400001, 52.5930786304, 538.16760256, 512], [336.7607421952, 254.0, 497.17333987839993, 512], [445.484741248, 319.0562134016, 629.9592285184001, 511.55133056], [230.3140869376, 145.176879872, 303.7869872896, 180.5270995968], [193.0189209088, 405.676879872, 257.027832064, 419.9847412224], [147.0830078464, 276.9060058624, 174.94580078080003, 352.9636230656], [70.49560545280002, 38.8292846592, 390.42956546560004, 102.994323712]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6], [7], [8], [9]]}, {"image_path": "objects365_v1_00048072.jpg", "text": "Could you tell me more about the area in the snapshot ? Include the coordinates for each object you identify.", "boxes_value": [[174.21697999100002, 141.7708740096, 412.21862791620003, 261.5479126016]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048072_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Include the coordinates for each object you identify.", "boxes_value": [[60.21697999100002, 30.770874009599993, 298.21862791620003, 150.5479126016]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048072.jpg", "text": "Could you tell me more about the area in the snapshot ? Include the coordinates for each object you identify. For your reference, objects involved in this region include five potted plants.", "boxes_value": [[174.21697999100002, 141.7708740096, 412.21862791620003, 261.5479126016], [375.3000488136, 141.7708740096, 412.21862791620003, 187.5291137536], [174.21697999100002, 194.6744384512, 224.9691772243, 261.5479126016], [213.9530639731, 197.2476806656, 254.66082767150002, 261.4406738432], [262.0977783014, 180.0251464704, 310.6340332092, 257.5264892416], [310.8303222279, 174.7543945216, 339.3530273101, 204.3636474368]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048072_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Include the coordinates for each object you identify. For your reference, objects involved in this region include five potted plants.", "boxes_value": [[60.21697999100002, 30.770874009599993, 298.21862791620003, 150.5479126016], [261.3000488136, 30.770874009599993, 298.21862791620003, 76.5291137536], [60.21697999100002, 83.67443845119999, 110.96917722430001, 150.5479126016], [99.9530639731, 86.2476806656, 140.66082767150002, 150.4406738432], [148.0977783014, 69.02514647039999, 196.63403320920003, 146.52648924160002], [196.8303222279, 63.754394521600005, 225.3530273101, 93.3636474368]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048073.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Please point out the objects and their coordinates.", "boxes_value": [[304.873718272, 0.43655395199999997, 629.182006848, 385.778076192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048073_crop.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Please point out the objects and their coordinates.", "boxes_value": [[81.87371827200002, 0.43655395199999997, 406.182006848, 385.778076192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048073.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include three lamps, a mirror, a flag, and two people.", "boxes_value": [[304.873718272, 0.43655395199999997, 629.182006848, 385.778076192], [548.775878912, 0.43655395199999997, 629.182006848, 76.665710448], [506.72949216, 116.0162964, 522.51171872, 151.526367168], [571.373779264, 124.649780304, 586.4549560319999, 149.245544448], [479.11059571199996, 222.202392576, 588.928710912, 318.55529784], [304.873718272, 174.795043968, 385.873779328, 307.475585952], [443.358520512, 235.74499512, 512.60449216, 385.778076192], [534.34875488, 237.03228758400002, 571.156738304, 313.226989728]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00048073_crop.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include three lamps, a mirror, a flag, and two people.", "boxes_value": [[81.87371827200002, 0.43655395199999997, 406.182006848, 385.778076192], [325.77587891200005, 0.43655395199999997, 406.182006848, 76.665710448], [283.72949216, 116.0162964, 299.51171872, 151.526367168], [348.37377926399995, 124.649780304, 363.4549560319999, 149.245544448], [256.11059571199996, 222.202392576, 365.92871091200004, 318.55529784], [81.87371827200002, 174.795043968, 162.873779328, 307.475585952], [220.35852051199998, 235.74499512, 289.60449215999995, 385.778076192], [311.34875488, 237.03228758400002, 348.156738304, 313.226989728]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00048076.jpg", "text": "Please tell me more about the rectangular section in the photo . Please mention the objects and their locations.", "boxes_value": [[496.5239257513, 372.461792, 565.8463135055, 437.1773681664]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048076_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Please mention the objects and their locations.", "boxes_value": [[17.5239257513, 16.461792000000003, 86.8463135055, 81.17736816640002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048076.jpg", "text": "Please tell me more about the rectangular section in the photo . Please mention the objects and their locations. For your reference, objects involved in this region include a bed, a desk, two cups, and a bottle.", "boxes_value": [[496.5239257513, 372.461792, 565.8463135055, 437.1773681664], [193.1600952244, 316.766418432, 616.6636963052, 479.258667008], [450.6640625258, 388.3643188224, 680.9285888576, 463.538940416], [496.5239257513, 404.9292602368, 521.3132324200001, 437.1773681664], [498.93701174169996, 352.937438976, 523.5069580027, 412.6074218496], [529.8688964634999, 372.461792, 565.8463135055, 420.2855224832]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4]]}, {"image_path": "objects365_v1_00048076_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Please mention the objects and their locations. For your reference, objects involved in this region include a bed, a desk, two cups, and a bottle.", "boxes_value": [[17.5239257513, 16.461792000000003, 86.8463135055, 81.17736816640002], [0, 0, 104, 97], [0, 32.36431882239998, 104, 97], [17.5239257513, 48.92926023680002, 42.31323242000008, 81.17736816640002], [19.937011741699962, 0, 44.5069580027, 56.60742184959997], [50.86889646349994, 16.461792000000003, 86.8463135055, 64.2855224832]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4]]}, {"image_path": "objects365_v1_00048077.jpg", "text": "Help me understand the details within the area in photograph . Please mention the objects and their locations.", "boxes_value": [[524.2937011949, 226.0442504704, 682.8472900107, 387.6089477632]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048077_crop.jpg", "text": "Help me understand the details within the area in photograph . Please mention the objects and their locations.", "boxes_value": [[40.293701194899995, 41.04425047039999, 198.84729001070002, 202.60894776319998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048077.jpg", "text": "Help me understand the details within the area in photograph . Please mention the objects and their locations. For your reference, objects involved in this region include a lamp, a person, a handbag, a bicycle, and a street lights.", "boxes_value": [[524.2937011949, 226.0442504704, 682.8472900107, 387.6089477632], [523.4805907877, 223.43920896, 566.0664062258, 255.3786010624], [657.8636474396, 277.104553216, 682.8472900107, 387.6089477632], [658.9891357513001, 300.5991211008, 675.7480469029, 326.6685791232], [597.4194335969, 297.3107910144, 669.9252929569, 356.8027954176], [524.2937011949, 226.0442504704, 567.0535888815999, 340.0706787328]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048077_crop.jpg", "text": "Help me understand the details within the area in photograph . Please mention the objects and their locations. For your reference, objects involved in this region include a lamp, a person, a handbag, a bicycle, and a street lights.", "boxes_value": [[40.293701194899995, 41.04425047039999, 198.84729001070002, 202.60894776319998], [39.48059078769995, 38.43920896, 82.06640622580005, 70.37860106240001], [173.86364743959996, 92.104553216, 198.84729001070002, 202.60894776319998], [174.98913575130007, 115.5991211008, 191.7480469029, 141.6685791232], [113.41943359690003, 112.31079101440002, 185.92529295689997, 171.8027954176], [40.293701194899995, 41.04425047039999, 83.05358888159992, 155.0706787328]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048081.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each mentioned object.", "boxes_value": [[45.1777344, 218.7866820994, 172.9537964032, 621.5976562725]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048081_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each mentioned object.", "boxes_value": [[32.1777344, 100.7866820994, 159.9537964032, 503.5976562725]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048081.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, a boots, a leather shoes, and a gloves.", "boxes_value": [[45.1777344, 218.7866820994, 172.9537964032, 621.5976562725], [40.3838500864, 161.1228637926, 260.1749267456, 617.4191894470999], [51.2481079296, 218.7866820994, 142.3402099712, 621.5976562725], [139.3978271232, 531.7359619287, 172.9537964032, 607.3790283186], [76.835815424, 582.9229736053001, 142.81030272, 614.7727050768], [45.1777344, 289.2798461878, 67.4539794944, 330.2060546787]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048081_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, a boots, a leather shoes, and a gloves.", "boxes_value": [[32.1777344, 100.7866820994, 159.9537964032, 503.5976562725], [27.383850086400003, 43.122863792599986, 191, 499.41918944709994], [38.2481079296, 100.7866820994, 129.3402099712, 503.5976562725], [126.39782712319999, 413.7359619287, 159.9537964032, 489.37902831860004], [63.835815424, 464.92297360530006, 129.81030272, 496.7727050768], [32.1777344, 171.2798461878, 54.4539794944, 212.2060546787]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048082.jpg", "text": "Regarding the image , what's going on in the section ? Include the coordinates for each mentioned object.", "boxes_value": [[0.3908081152, 151.4075927535, 146.5164361728, 488.3824462725]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048082_crop.jpg", "text": "Regarding the image , what's going on in the section ? Include the coordinates for each mentioned object.", "boxes_value": [[0.3908081152, 84.40759275350001, 146.5164361728, 421.3824462725]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048082.jpg", "text": "Regarding the image , what's going on in the section ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, three sneakers, and a hat.", "boxes_value": [[0.3908081152, 151.4075927535, 146.5164361728, 488.3824462725], [0.3908081152, 151.4075927535, 103.5548706304, 488.3824462725], [8.8883695616, 457.0805290905, 39.1838559232, 486.07763744249996], [32.6919660032, 402.1158609495, 60.823489024, 460.97566306650003], [123.1456324096, 396.489556377, 146.5164361728, 439.76882259], [19.7965240832, 153.42346414349998, 61.76857984, 190.3079980485]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048082_crop.jpg", "text": "Regarding the image , what's going on in the section ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, three sneakers, and a hat.", "boxes_value": [[0.3908081152, 84.40759275350001, 146.5164361728, 421.3824462725], [0.3908081152, 84.40759275350001, 103.5548706304, 421.3824462725], [8.8883695616, 390.0805290905, 39.1838559232, 419.07763744249996], [32.6919660032, 335.1158609495, 60.823489024, 393.97566306650003], [123.1456324096, 329.489556377, 146.5164361728, 372.76882259], [19.7965240832, 86.42346414349998, 61.76857984, 123.3079980485]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048085.jpg", "text": "Please enlighten me about the region in the given photo . Specify the location of each mentioned object.", "boxes_value": [[316.1434325989, 254.5344848384, 636.2558594065, 503.9127197184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048085_crop.jpg", "text": "Please enlighten me about the region in the given photo . Specify the location of each mentioned object.", "boxes_value": [[80.1434325989, 62.534484838400004, 400.2558594065, 311.9127197184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048085.jpg", "text": "Please enlighten me about the region in the given photo . Specify the location of each mentioned object. For your reference, objects involved in this region include four stuffed toys, and a person.", "boxes_value": [[316.1434325989, 254.5344848384, 636.2558594065, 503.9127197184], [316.1434325989, 254.5344848384, 440.7294921728, 494.5670166016], [418.1212158243, 109.9514160128, 531.9184570016, 445.7771606528], [437.91210938660004, 296.7274169856, 559.1308593672, 503.9127197184], [545.1198730436, 306.8492431872, 582.6759033412001, 452.5665893376], [579.671386751, 312.8582153216, 636.2558594065, 492.1256103424]], "boxes_seq": [[0], [0], [1, 3, 4, 5], [2]]}, {"image_path": "objects365_v1_00048085_crop.jpg", "text": "Please enlighten me about the region in the given photo . Specify the location of each mentioned object. For your reference, objects involved in this region include four stuffed toys, and a person.", "boxes_value": [[80.1434325989, 62.534484838400004, 400.2558594065, 311.9127197184], [80.1434325989, 62.534484838400004, 204.7294921728, 302.5670166016], [182.1212158243, 0, 295.9184570016, 253.77716065279998], [201.91210938660004, 104.72741698559997, 323.13085936719995, 311.9127197184], [309.1198730436, 114.84924318719999, 346.67590334120007, 260.5665893376], [343.671386751, 120.85821532160003, 400.2558594065, 300.1256103424]], "boxes_seq": [[0], [0], [1, 3, 4, 5], [2]]}, {"image_path": "objects365_v1_00048086.jpg", "text": "Can you divulge the contents of the area within the given image ? Provide the coordinates for each element you describe.", "boxes_value": [[0.7115478748, 246.0489089536, 165.3879362646, 450.4381103616]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048086_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Provide the coordinates for each element you describe.", "boxes_value": [[0.7115478748, 52.048908953600005, 165.3879362646, 256.4381103616]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048086.jpg", "text": "Can you divulge the contents of the area within the given image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two sneakers, a hat, a cup, and a chair.", "boxes_value": [[0.7115478748, 246.0489089536, 165.3879362646, 450.4381103616], [126.8004940258, 368.0711440384, 165.3879362646, 399.5704994304], [101.120361307, 395.2484463104, 136.01232909179998, 418.7171664384], [85.67617816799999, 246.0489089536, 112.426918982, 264.633228288], [95.5672607302, 421.2994384896, 114.0886230246, 445.265380864], [0.7115478748, 290.9169922048, 119.1333617922, 450.4381103616]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048086_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two sneakers, a hat, a cup, and a chair.", "boxes_value": [[0.7115478748, 52.048908953600005, 165.3879362646, 256.4381103616], [126.8004940258, 174.0711440384, 165.3879362646, 205.5704994304], [101.120361307, 201.24844631040003, 136.01232909179998, 224.7171664384], [85.67617816799999, 52.048908953600005, 112.426918982, 70.633228288], [95.5672607302, 227.2994384896, 114.0886230246, 251.265380864], [0.7115478748, 96.91699220480001, 119.1333617922, 256.4381103616]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048088.jpg", "text": "Please tell me about the area in the image . What does it contain? Remember to mention the objects and their corresponding locations.", "boxes_value": [[462.1959228377, 192.5504760832, 559.5966797102, 310.9616089088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048088_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Remember to mention the objects and their corresponding locations.", "boxes_value": [[25.19592283769998, 30.55047608320001, 122.59667971019996, 148.96160890879997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048088.jpg", "text": "Please tell me about the area in the image . What does it contain? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, an umbrella, a barrel, and a boat.", "boxes_value": [[462.1959228377, 192.5504760832, 559.5966797102, 310.9616089088], [486.30664059360004, 207.7053222912, 506.18188479250006, 248.4495849472], [501.461547828, 211.680358912, 553.8825683886, 265.8404540928], [508.91479488999994, 192.5504760832, 559.5966797102, 225.5930786304], [531.6636962941, 287.3912964096, 557.1982421573999, 310.9616089088], [462.1959228377, 208.438903808, 491.5009765529, 231.7186279424]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048088_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, an umbrella, a barrel, and a boat.", "boxes_value": [[25.19592283769998, 30.55047608320001, 122.59667971019996, 148.96160890879997], [49.30664059360004, 45.70532229119999, 69.18188479250006, 86.44958494720001], [64.461547828, 49.680358912, 116.88256838860002, 103.84045409279997], [71.91479488999994, 30.55047608320001, 122.59667971019996, 63.59307863039999], [94.66369629409996, 125.39129640959999, 120.19824215739993, 148.96160890879997], [25.19592283769998, 46.43890380799999, 54.5009765529, 69.71862794239999]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048091.jpg", "text": "In the image , elaborate on the details found within the section . Provide the coordinates for each element you describe.", "boxes_value": [[299.8714599591, 318.9015502848, 542.1348876927, 511.9267578368]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048091_crop.jpg", "text": "In the image , elaborate on the details found within the section . Provide the coordinates for each element you describe.", "boxes_value": [[60.87145995909998, 48.90155028480001, 303.1348876927, 241.9267578368]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048091.jpg", "text": "In the image , elaborate on the details found within the section . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, a bracelet, and two bottles.", "boxes_value": [[299.8714599591, 318.9015502848, 542.1348876927, 511.9267578368], [213.5000000274, 126.3273925632, 462.9526367463, 512.0468749824], [382.4207763609, 116.506408704, 686.8708496334, 512.057739264], [299.8714599591, 466.9122925056, 323.1963501315, 511.9267578368], [479.64074706810004, 318.9015502848, 542.1348876927, 449.9132690432], [340.3466796867, 321.1603393536, 385.5230712606, 510.1486205952]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048091_crop.jpg", "text": "In the image , elaborate on the details found within the section . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, a bracelet, and two bottles.", "boxes_value": [[60.87145995909998, 48.90155028480001, 303.1348876927, 241.9267578368], [0, 0, 223.95263674630002, 242], [143.4207763609, 0, 363, 242], [60.87145995909998, 196.91229250560002, 84.19635013150003, 241.9267578368], [240.64074706810004, 48.90155028480001, 303.1348876927, 179.9132690432], [101.34667968669999, 51.16033935360002, 146.52307126059998, 240.1486205952]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048092.jpg", "text": "Please provide insights on the specified area within the graphic . Specify the location of each mentioned object.", "boxes_value": [[134.1776733184, 10.3893432832, 360.9766845952, 270.2836303872]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048092_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Specify the location of each mentioned object.", "boxes_value": [[57.1776733184, 10.3893432832, 283.9766845952, 270.2836303872]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048092.jpg", "text": "Please provide insights on the specified area within the graphic . Specify the location of each mentioned object. For your reference, objects involved in this region include a baseball glove, two people, a hat, and a helmet.", "boxes_value": [[134.1776733184, 10.3893432832, 360.9766845952, 270.2836303872], [183.5621948416, 215.8698730496, 235.4700927488, 270.2836303872], [229.6032714752, 10.5062866432, 397.5855102464, 467.554382336], [66.737243648, 30.971130368, 244.9519043072, 496.5462646272], [134.1776733184, 31.8689575424, 182.8225708032, 67.8787842048], [287.6934203904, 10.3893432832, 360.9766845952, 74.1963501056]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048092_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Specify the location of each mentioned object. For your reference, objects involved in this region include a baseball glove, two people, a hat, and a helmet.", "boxes_value": [[57.1776733184, 10.3893432832, 283.9766845952, 270.2836303872], [106.56219484159999, 215.8698730496, 158.4700927488, 270.2836303872], [152.6032714752, 10.5062866432, 320.5855102464, 335], [0, 30.971130368, 167.9519043072, 335], [57.1776733184, 31.8689575424, 105.8225708032, 67.8787842048], [210.69342039039998, 10.3893432832, 283.9766845952, 74.1963501056]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048094.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Please mention the objects and their locations.", "boxes_value": [[288.852355944, 5.831481935999999, 503.396118144, 341.830993632]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048094_crop.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Please mention the objects and their locations.", "boxes_value": [[53.85235594400001, 5.831481935999999, 268.396118144, 341.830993632]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048094.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Please mention the objects and their locations. For your reference, objects involved in this region include two lamps, a flower, a vase, and a chair.", "boxes_value": [[288.852355944, 5.831481935999999, 503.396118144, 341.830993632], [398.231323272, 5.831481935999999, 416.46118161600003, 26.575805663999997], [288.852355944, 17.775177024, 306.453613248, 35.376403824], [411.48999021599997, 194.268493632, 503.396118144, 299.106079104], [441.50964357600003, 285.71276856, 469.219970736, 318.04150391999997], [321.355834992, 234.18463132800002, 415.260131832, 341.830993632]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048094_crop.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Please mention the objects and their locations. For your reference, objects involved in this region include two lamps, a flower, a vase, and a chair.", "boxes_value": [[53.85235594400001, 5.831481935999999, 268.396118144, 341.830993632], [163.231323272, 5.831481935999999, 181.46118161600003, 26.575805663999997], [53.85235594400001, 17.775177024, 71.45361324800001, 35.376403824], [176.48999021599997, 194.268493632, 268.396118144, 299.106079104], [206.50964357600003, 285.71276856, 234.219970736, 318.04150391999997], [86.35583499199998, 234.18463132800002, 180.260131832, 341.830993632]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048096.jpg", "text": "What's going on in the section of contained within the bounding box ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[404.4717407232, 28.3007202464, 512.5332031488, 310.4200439517]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048096_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[27.471740723200014, 28.3007202464, 135, 310.4200439517]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048096.jpg", "text": "What's going on in the section of contained within the bounding box ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a desk, a chair, a person, a handbag, a cup, and a bowl.", "boxes_value": [[404.4717407232, 28.3007202464, 512.5332031488, 310.4200439517], [378.699462912, 173.41870119920003, 478.5877074944, 259.678588896], [404.4717407232, 28.3007202464, 512.5332031488, 310.4200439517], [386.8731689472, 11.3997192091, 445.1801757696, 251.2550048951], [457.7937622016, 218.2103882139, 510.952453632, 295.8682861628], [405.4717407232, 144.5495605547, 422.6223144448, 162.8248291021], [495.0433959936, 144.7612915109, 511.9772338688, 169.5975952047]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048096_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a desk, a chair, a person, a handbag, a cup, and a bowl.", "boxes_value": [[27.471740723200014, 28.3007202464, 135, 310.4200439517], [1.6994629120000013, 173.41870119920003, 101.58770749439998, 259.678588896], [27.471740723200014, 28.3007202464, 135, 310.4200439517], [9.873168947199986, 11.3997192091, 68.1801757696, 251.2550048951], [80.79376220159998, 218.2103882139, 133.95245363200002, 295.8682861628], [28.471740723200014, 144.5495605547, 45.6223144448, 162.8248291021], [118.04339599359997, 144.7612915109, 134.9772338688, 169.5975952047]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048097.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each object you identify.", "boxes_value": [[598.3109130926999, 143.0808715776, 769.3557128694999, 511.4028320256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048097_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each object you identify.", "boxes_value": [[43.310913092699934, 92.08087157759999, 214, 460.4028320256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048097.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each object you identify. For your reference, objects involved in this region include a lamp, a bed, two pillows, and a power outlet.", "boxes_value": [[598.3109130926999, 143.0808715776, 769.3557128694999, 511.4028320256], [620.7915038916, 143.0808715776, 768.474731474, 256.6832275456], [272.8813476444, 361.8704834048, 769.8016357447999, 512.5361328128], [559.1403808557, 408.2536621056, 653.5849609022, 512.7084960768], [598.3109130926999, 387.797912576, 769.3557128694999, 511.4028320256], [724.4394530988, 311.3142089728, 749.516235345, 332.5073852416]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048097_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Include the coordinates for each object you identify. For your reference, objects involved in this region include a lamp, a bed, two pillows, and a power outlet.", "boxes_value": [[43.310913092699934, 92.08087157759999, 214, 460.4028320256], [65.79150389159997, 92.08087157759999, 213.474731474, 205.6832275456], [0, 310.8704834048, 214, 461], [4.140380855699959, 357.2536621056, 98.58496090220001, 461], [43.310913092699934, 336.797912576, 214, 460.4028320256], [169.43945309879996, 260.3142089728, 194.51623534500004, 281.5073852416]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048098.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Include the coordinates for each mentioned object.", "boxes_value": [[316.0190429696, 445.6534423653, 512.3891601408, 645.0294189593]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048098_crop.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Include the coordinates for each mentioned object.", "boxes_value": [[50.01904296959998, 50.65344236530001, 246, 250.02941895929996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048098.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two potted plants, a person, two chairs, and two desks.", "boxes_value": [[316.0190429696, 445.6534423653, 512.3891601408, 645.0294189593], [416.1939086848, 445.6534423653, 462.199768064, 525.1181640866], [299.5693359616, 502.4991455141, 376.6419677696, 601.4821777515999], [316.0190429696, 534.3709716940999, 375.510925312, 608.5993652500999], [487.1265259008, 543.4893798593, 512.3891601408, 591.0072021579], [374.9470214656, 548.5146484574, 440.3280029184, 645.0294189593], [313.5150756864, 495.1113281446, 419.5620727296, 544.9759521715], [461.7148437504, 526.5369873065999, 510.0508422656, 566.5070800797]], "boxes_seq": [[0], [0], [1, 5], [2], [3, 6], [4, 7]]}, {"image_path": "objects365_v1_00048098_crop.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two potted plants, a person, two chairs, and two desks.", "boxes_value": [[50.01904296959998, 50.65344236530001, 246, 250.02941895929996], [150.19390868480002, 50.65344236530001, 196.199768064, 130.11816408660002], [33.56933596160002, 107.49914551410001, 110.6419677696, 206.48217775159992], [50.01904296959998, 139.37097169409992, 109.51092531199998, 213.59936525009994], [221.1265259008, 148.48937985930002, 246, 196.0072021579], [108.94702146560002, 153.51464845739997, 174.3280029184, 250.02941895929996], [47.515075686399996, 100.11132814460001, 153.5620727296, 149.9759521715], [195.7148437504, 131.53698730659994, 244.0508422656, 171.50708007970002]], "boxes_seq": [[0], [0], [1, 5], [2], [3, 6], [4, 7]]}, {"image_path": "objects365_v1_00048100.jpg", "text": "Kindly give an overview of the section in photo . Include the coordinates for each object you identify.", "boxes_value": [[22.1013183744, 427.4360351744, 650.0373535488, 490.9789428736]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048100_crop.jpg", "text": "Kindly give an overview of the section in photo . Include the coordinates for each object you identify.", "boxes_value": [[22.1013183744, 16.43603517439999, 650.0373535488, 79.97894287359998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048100.jpg", "text": "Kindly give an overview of the section in photo . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, and four sneakers.", "boxes_value": [[22.1013183744, 427.4360351744, 650.0373535488, 490.9789428736], [21.1595459328, 98.6069946368, 325.22180175360006, 492.3190918144], [22.1013183744, 428.1024780288, 75.0578613504, 461.7714233344], [276.6405029376, 450.3059692544, 326.10083005440003, 490.9789428736], [484.5935058432, 433.9472656384, 556.2167968512, 466.207336448], [579.3020019456001, 427.4360351744, 650.0373535488, 454.9606933504]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048100_crop.jpg", "text": "Kindly give an overview of the section in photo . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, and four sneakers.", "boxes_value": [[22.1013183744, 16.43603517439999, 650.0373535488, 79.97894287359998], [21.1595459328, 0, 325.22180175360006, 81.3190918144], [22.1013183744, 17.10247802880002, 75.0578613504, 50.771423334400026], [276.6405029376, 39.3059692544, 326.10083005440003, 79.97894287359998], [484.5935058432, 22.947265638399983, 556.2167968512, 55.20733644799998], [579.3020019456001, 16.43603517439999, 650.0373535488, 43.96069335039999]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048101.jpg", "text": "Please provide details for the area within the bounding box in . Specify the location of each mentioned object.", "boxes_value": [[226.1174316453, 283.7169189376, 599.0175780927, 419.4754028544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048101_crop.jpg", "text": "Please provide details for the area within the bounding box in . Specify the location of each mentioned object.", "boxes_value": [[94.1174316453, 34.716918937599985, 467.0175780927, 170.4754028544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048101.jpg", "text": "Please provide details for the area within the bounding box in . Specify the location of each mentioned object. For your reference, objects involved in this region include a chair, a hat, four leather shoes, and a tie.", "boxes_value": [[226.1174316453, 283.7169189376, 599.0175780927, 419.4754028544], [564.8150634711, 283.7169189376, 599.0175780927, 381.4384155136], [281.1704101674, 369.6429443584, 313.4428711245, 386.2537841664], [226.1174316453, 392.898071296, 248.42340086069999, 419.4754028544], [304.9001464899, 389.5759277568, 331.00286867700004, 407.1359252992], [383.6828613345, 391.9489135616, 404.0904540972, 409.5089111552], [402.1921386405, 387.6775512576, 419.34326170620005, 406.6613159424], [276.5600586021, 299.7758178816, 289.17071534129997, 328.6582641664]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5, 6], [7]]}, {"image_path": "objects365_v1_00048101_crop.jpg", "text": "Please provide details for the area within the bounding box in . Specify the location of each mentioned object. For your reference, objects involved in this region include a chair, a hat, four leather shoes, and a tie.", "boxes_value": [[94.1174316453, 34.716918937599985, 467.0175780927, 170.4754028544], [432.81506347109996, 34.716918937599985, 467.0175780927, 132.43841551359998], [149.1704101674, 120.64294435839997, 181.44287112450002, 137.2537841664], [94.1174316453, 143.898071296, 116.42340086069999, 170.4754028544], [172.9001464899, 140.5759277568, 199.00286867700004, 158.13592529919998], [251.6828613345, 142.94891356160002, 272.0904540972, 160.5089111552], [270.1921386405, 138.6775512576, 287.34326170620005, 157.6613159424], [144.5600586021, 50.775817881600005, 157.17071534129997, 79.65826416639999]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5, 6], [7]]}, {"image_path": "objects365_v1_00048103.jpg", "text": "What does the area look like in the context of the image ? Give coordinates for the items you reference.", "boxes_value": [[253.40332032, 489.15319826999996, 501.308410624, 573.6337890394]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048103_crop.jpg", "text": "What does the area look like in the context of the image ? Give coordinates for the items you reference.", "boxes_value": [[62.403320320000006, 21.15319826999996, 310.308410624, 105.63378903939997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048103.jpg", "text": "What does the area look like in the context of the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include a person, three sneakers, and two helmets.", "boxes_value": [[253.40332032, 489.15319826999996, 501.308410624, 573.6337890394], [346.0991211008, 148.30554201299998, 467.1794433536, 575.7932128665], [429.6595459072, 536.0113525179, 464.5513915904, 573.6337890394], [350.4702758912, 539.3487549071, 404.7801513472, 570.5997314177], [474.0185546752, 490.9033203385, 501.308410624, 525.4266357373999], [253.40332032, 518.7680664119, 281.6984863232, 541.97644043], [339.9347534336, 489.15319826999996, 372.94866944, 537.0766601746001]], "boxes_seq": [[0], [0], [1], [2, 3, 5], [4, 6]]}, {"image_path": "objects365_v1_00048103_crop.jpg", "text": "What does the area look like in the context of the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include a person, three sneakers, and two helmets.", "boxes_value": [[62.403320320000006, 21.15319826999996, 310.308410624, 105.63378903939997], [155.0991211008, 0, 276.1794433536, 107.79321286649997], [238.65954590720003, 68.01135251790004, 273.5513915904, 105.63378903939997], [159.4702758912, 71.34875490709999, 213.7801513472, 102.59973141770001], [283.0185546752, 22.90332033850001, 310.308410624, 57.42663573739992], [62.403320320000006, 50.76806641190001, 90.69848632319997, 73.97644043000003], [148.9347534336, 21.15319826999996, 181.94866944, 69.07666017460008]], "boxes_seq": [[0], [0], [1], [2, 3, 5], [4, 6]]}, {"image_path": "objects365_v1_00048104.jpg", "text": "Please share details about the rectangular region within the image . Include the coordinates for each mentioned object.", "boxes_value": [[113.74121095000001, 191.7480469, 194.545105, 258.8774414]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048104_crop.jpg", "text": "Please share details about the rectangular region within the image . Include the coordinates for each mentioned object.", "boxes_value": [[20.74121095000001, 17.74804689999999, 101.545105, 84.87744140000001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048104.jpg", "text": "Please share details about the rectangular region within the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include three pillows, a bed, and a stuffed toy.", "boxes_value": [[113.74121095000001, 191.7480469, 194.545105, 258.8774414], [113.74121095000001, 197.9637451, 174.03338625, 238.3656616], [180.87060545, 198.58532714999998, 208.2196045, 259.49902345], [144.1980591, 191.7480469, 194.545105, 258.8774414], [0, 183.45678709999999, 210.25543215000002, 322.35675050000003], [114.39544679999999, 204.60058595, 161.1413574, 236.66345215]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048104_crop.jpg", "text": "Please share details about the rectangular region within the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include three pillows, a bed, and a stuffed toy.", "boxes_value": [[20.74121095000001, 17.74804689999999, 101.545105, 84.87744140000001], [20.74121095000001, 23.96374510000001, 81.03338625, 64.36566160000001], [87.87060545, 24.585327149999983, 115.2196045, 85.49902344999998], [51.198059099999995, 17.74804689999999, 101.545105, 84.87744140000001], [0, 9.456787099999985, 117.25543215000002, 101], [21.395446799999988, 30.60058595000001, 68.1413574, 62.66345215000001]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048105.jpg", "text": "Describe the visual elements within the selected area of the image . Provide the coordinates for each element you describe.", "boxes_value": [[61.217040987, 195.0810546688, 398.11206050899995, 416.2315063296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048105_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Provide the coordinates for each element you describe.", "boxes_value": [[61.217040987, 56.08105466879999, 398.11206050899995, 277.2315063296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048105.jpg", "text": "Describe the visual elements within the selected area of the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include four people, and a camera.", "boxes_value": [[61.217040987, 195.0810546688, 398.11206050899995, 416.2315063296], [175.089172402, 277.9791259648, 352.58703616, 512.1831054848], [61.217040987, 257.5801391616, 201.83984374899998, 416.2315063296], [220.18115232600002, 248.946044928, 297.44653321, 292.2145996288], [310.250488309, 195.0810546688, 398.11206050899995, 395.970825216], [186.882141117, 336.7891235328, 261.920165981, 404.03106688]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048105_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include four people, and a camera.", "boxes_value": [[61.217040987, 56.08105466879999, 398.11206050899995, 277.2315063296], [175.089172402, 138.9791259648, 352.58703616, 332], [61.217040987, 118.58013916160002, 201.83984374899998, 277.2315063296], [220.18115232600002, 109.94604492799999, 297.44653321, 153.2145996288], [310.250488309, 56.08105466879999, 398.11206050899995, 256.970825216], [186.882141117, 197.78912353279998, 261.920165981, 265.03106688]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048106.jpg", "text": "In the displayed image , help me understand the region defined by . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[471.2120361122, 407.245117184, 562.8487549095, 511.5051879936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048106_crop.jpg", "text": "In the displayed image , help me understand the region defined by . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[23.212036112199996, 26.24511718399998, 114.84875490950003, 130.50518799359998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048106.jpg", "text": "In the displayed image , help me understand the region defined by . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a tie, two wine glasses, and two cups.", "boxes_value": [[471.2120361122, 407.245117184, 562.8487549095, 511.5051879936], [482.99084470440005, 362.0468749824, 510.739624045, 460.4689941504], [471.2120361122, 435.764648448, 495.290039056, 511.2714233344], [498.3291015813, 455.401123072, 527.7836913848, 511.5051879936], [528.4849853349, 407.245117184, 555.6019286836, 461.4790649344], [542.978515598, 478.31030272, 562.8487549095, 504.0246582272]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048106_crop.jpg", "text": "In the displayed image , help me understand the region defined by . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a tie, two wine glasses, and two cups.", "boxes_value": [[23.212036112199996, 26.24511718399998, 114.84875490950003, 130.50518799359998], [34.99084470440005, 0, 62.73962404500003, 79.46899415040002], [23.212036112199996, 54.764648448, 47.29003905600001, 130.27142333440003], [50.329101581299994, 74.40112307200002, 79.78369138480002, 130.50518799359998], [80.48498533489999, 26.24511718399998, 107.60192868360002, 80.47906493440001], [94.97851559799994, 97.31030271999998, 114.84875490950003, 123.0246582272]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048107.jpg", "text": "In the image , elaborate on the details found within the section . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[271.850708032, 211.082397456, 538.584106432, 283.38781737600004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048107_crop.jpg", "text": "In the image , elaborate on the details found within the section . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[66.850708032, 18.082397455999995, 333.584106432, 90.38781737600004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048107.jpg", "text": "In the image , elaborate on the details found within the section . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two storage boxes, a person, two books, and a moniter.", "boxes_value": [[271.850708032, 211.082397456, 538.584106432, 283.38781737600004], [440.241577152, 238.869018576, 481.439086912, 277.788330096], [480.48986816, 235.072021488, 538.584106432, 281.09698488000004], [334.732604992, 216.322265616, 411.41638182400004, 276.978881856], [271.850708032, 269.48901364799997, 329.6798096, 283.38781737600004], [273.58807372800004, 276.438415536, 327.19787596800006, 287.607116688], [470.633178688, 211.082397456, 529.663940416, 242.437011696]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048107_crop.jpg", "text": "In the image , elaborate on the details found within the section . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two storage boxes, a person, two books, and a moniter.", "boxes_value": [[66.850708032, 18.082397455999995, 333.584106432, 90.38781737600004], [235.241577152, 45.869018576, 276.439086912, 84.78833009599998], [275.48986816, 42.07202148799999, 333.584106432, 88.09698488000004], [129.732604992, 23.32226561600001, 206.41638182400004, 83.97888185599999], [66.850708032, 76.48901364799997, 124.6798096, 90.38781737600004], [68.58807372800004, 83.43841553599998, 122.19787596800006, 94.60711668800002], [265.633178688, 18.082397455999995, 324.66394041599995, 49.43701169600001]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048108.jpg", "text": "What's going on in the section of contained within the bounding box ? Please mention the objects and their locations.", "boxes_value": [[255.222412085, 249.6146850816, 528.0164795042, 497.1242065408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048108_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Please mention the objects and their locations.", "boxes_value": [[68.222412085, 62.61468508159999, 341.0164795042, 310.1242065408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048108.jpg", "text": "What's going on in the section of contained within the bounding box ? Please mention the objects and their locations. For your reference, objects involved in this region include two people, a barrel, a slippers, and a bicycle.", "boxes_value": [[255.222412085, 249.6146850816, 528.0164795042, 497.1242065408], [444.95080563720006, 205.6802368, 553.5280761577, 403.5070190592], [299.6666259915, 249.6146850816, 428.3027343893, 490.0765380608], [255.222412085, 367.9976196096, 303.5379638357, 403.27093504], [373.5783691365, 475.862060544, 409.4451904473, 487.361328128], [284.1718750321, 325.8829345792, 528.0164795042, 497.1242065408]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048108_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Please mention the objects and their locations. For your reference, objects involved in this region include two people, a barrel, a slippers, and a bicycle.", "boxes_value": [[68.222412085, 62.61468508159999, 341.0164795042, 310.1242065408], [257.95080563720006, 18.68023679999999, 366.52807615769996, 216.5070190592], [112.66662599149998, 62.61468508159999, 241.30273438929999, 303.0765380608], [68.222412085, 180.9976196096, 116.5379638357, 216.27093503999998], [186.5783691365, 288.862060544, 222.44519044729998, 300.361328128], [97.17187503209999, 138.88293457920003, 341.0164795042, 310.1242065408]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048109.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[252.29779054079998, 158.84368896, 531.9992676096, 399.542724608]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048109_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[70.29779054079998, 60.84368896000001, 349.9992676096, 301.542724608]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048109.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, a watch, a bracelet, a glasses, and a calculator.", "boxes_value": [[252.29779054079998, 158.84368896, 531.9992676096, 399.542724608], [325.6999511808, 88.9193115136, 572.429809536, 409.2635497984], [495.33361820159996, 289.5987548672, 531.9992676096, 321.0264892416], [374.7550048512, 370.4055785984, 393.4467773184, 399.542724608], [435.907348608, 158.84368896, 496.5804443136, 182.8510131712], [252.29779054079998, 323.1018676736, 329.703247104, 377.0820312576]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048109_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, a watch, a bracelet, a glasses, and a calculator.", "boxes_value": [[70.29779054079998, 60.84368896000001, 349.9992676096, 301.542724608], [143.69995118079999, 0, 390.429809536, 311.2635497984], [313.33361820159996, 191.5987548672, 349.9992676096, 223.02648924160002], [192.7550048512, 272.4055785984, 211.4467773184, 301.542724608], [253.907348608, 60.84368896000001, 314.5804443136, 84.85101317120001], [70.29779054079998, 225.1018676736, 147.703247104, 279.0820312576]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048112.jpg", "text": "Could you give me a description of the rectangular region found in ? Give coordinates for the items you reference.", "boxes_value": [[0.1400146292, 245.9376220672, 771.4432373056, 511.99688719359995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048112_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Give coordinates for the items you reference.", "boxes_value": [[0.1400146292, 66.93762206720001, 771.4432373056, 332.99688719359995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048112.jpg", "text": "Could you give me a description of the rectangular region found in ? Give coordinates for the items you reference. For your reference, objects involved in this region include a desk, two chairs, two people, and a bottle.", "boxes_value": [[0.1400146292, 245.9376220672, 771.4432373056, 511.99688719359995], [0.1400146292, 245.9376220672, 771.4432373056, 511.99688719359995], [110.3261719052, 271.0206298624, 147.9507446556, 403.602355968], [379.0382079884, 211.8963622912, 562.7165527124, 339.9989623808], [32.5621338056, 41.2137451008, 522.5488281488, 404.107604992], [0, 0.3248290816, 109.8439330728, 424.7655029248], [182.252990702, 294.9680175616, 399.2702636604, 345.6054076928]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048112_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Give coordinates for the items you reference. For your reference, objects involved in this region include a desk, two chairs, two people, and a bottle.", "boxes_value": [[0.1400146292, 66.93762206720001, 771.4432373056, 332.99688719359995], [0.1400146292, 66.93762206720001, 771.4432373056, 332.99688719359995], [110.3261719052, 92.02062986240003, 147.9507446556, 224.60235596799998], [379.0382079884, 32.896362291200006, 562.7165527124, 160.99896238079998], [32.5621338056, 0, 522.5488281488, 225.107604992], [0, 0, 109.8439330728, 245.7655029248], [182.252990702, 115.96801756159999, 399.2702636604, 166.6054076928]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048113.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Please point out the objects and their coordinates.", "boxes_value": [[266.4252319561, 145.5392456192, 406.2972411934, 340.3406982656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048113_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Please point out the objects and their coordinates.", "boxes_value": [[35.42523195609999, 49.53924561919999, 175.2972411934, 244.3406982656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048113.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, a hat, a handbag, a leather shoes, and a cow.", "boxes_value": [[266.4252319561, 145.5392456192, 406.2972411934, 340.3406982656], [266.4252319561, 145.5392456192, 406.2972411934, 340.3406982656], [266.7787475653, 145.998107904, 316.0996094065, 183.798767104], [364.87524416360003, 121.0626220544, 403.7219238522, 185.0776977408], [379.6829833859, 322.0847778304, 403.105346695, 344.4583740416], [288.6901855142, 187.9279174656, 471.80517578290005, 336.4659423744]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048113_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, a hat, a handbag, a leather shoes, and a cow.", "boxes_value": [[35.42523195609999, 49.53924561919999, 175.2972411934, 244.3406982656], [35.42523195609999, 49.53924561919999, 175.2972411934, 244.3406982656], [35.77874756530002, 49.998107903999994, 85.09960940650001, 87.798767104], [133.87524416360003, 25.062622054399995, 172.7219238522, 89.0776977408], [148.6829833859, 226.08477783040001, 172.10534669499998, 248.4583740416], [57.69018551419998, 91.92791746559999, 210, 240.46594237440002]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048114.jpg", "text": "Can you provide a description of the area in the image ? Specify the location of each mentioned object.", "boxes_value": [[109.684448256, 267.811950714, 239.0897827328, 341.53814699400004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048114_crop.jpg", "text": "Can you provide a description of the area in the image ? Specify the location of each mentioned object.", "boxes_value": [[32.684448255999996, 18.811950713999977, 162.0897827328, 92.53814699400004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048114.jpg", "text": "Can you provide a description of the area in the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include two people, and three plates.", "boxes_value": [[109.684448256, 267.811950714, 239.0897827328, 341.53814699400004], [155.7196655104, 267.811950714, 177.3928833024, 289.99713138000004], [165.2763672064, 268.66522214400004, 180.9766235136, 289.826477028], [109.684448256, 310.474975602, 147.2190551552, 341.53814699400004], [200.1538696192, 309.438232422, 239.0897827328, 340.45495604999996], [150.989074688, 310.758117648, 195.2044067328, 340.125000006]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048114_crop.jpg", "text": "Can you provide a description of the area in the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include two people, and three plates.", "boxes_value": [[32.684448255999996, 18.811950713999977, 162.0897827328, 92.53814699400004], [78.7196655104, 18.811950713999977, 100.39288330240001, 40.99713138000004], [88.27636720640001, 19.66522214400004, 103.9766235136, 40.826477028], [32.684448255999996, 61.47497560199997, 70.21905515520001, 92.53814699400004], [123.15386961920001, 60.438232422, 162.0897827328, 91.45495604999996], [73.98907468799999, 61.758117647999995, 118.20440673280001, 91.125000006]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048116.jpg", "text": "Tell me about the region of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[530.2160644608, 270.2119140864, 673.7692871424, 319.491394048]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048116_crop.jpg", "text": "Tell me about the region of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[36.2160644608, 13.211914086399986, 179.7692871424, 62.49139404800002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048116.jpg", "text": "Tell me about the region of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a bed, and five pillows.", "boxes_value": [[530.2160644608, 270.2119140864, 673.7692871424, 319.491394048], [391.2269286912, 298.5454711808, 704.0062255872, 434.4178466816], [519.6876220416, 277.2886352384, 568.8884277504001, 306.6437378048], [578.2712402688, 279.70050048, 636.1210937856, 317.3488159232], [530.2160644608, 270.2119140864, 585.6171875328, 304.1871948288], [585.6171875328, 270.5180053504, 657.8530273535999, 279.0883789312], [585.9233398272, 278.782287616, 673.7692871424, 319.491394048]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048116_crop.jpg", "text": "Tell me about the region of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a bed, and five pillows.", "boxes_value": [[36.2160644608, 13.211914086399986, 179.7692871424, 62.49139404800002], [0, 41.54547118080001, 210.0062255872, 74], [25.68762204159998, 20.28863523839999, 74.88842775040007, 49.643737804800026], [84.27124026880006, 22.700500480000017, 142.1210937856, 60.34881592319999], [36.2160644608, 13.211914086399986, 91.61718753280002, 47.18719482879999], [91.61718753280002, 13.518005350400017, 163.85302735359994, 22.088378931199998], [91.92333982720004, 21.78228761600002, 179.7692871424, 62.49139404800002]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048119.jpg", "text": "What can you tell me about the area within the image ? Please point out the objects and their coordinates.", "boxes_value": [[0, 126.309814449, 220.5870361088, 573.755126959]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048119_crop.jpg", "text": "What can you tell me about the area within the image ? Please point out the objects and their coordinates.", "boxes_value": [[0, 112.309814449, 220.5870361088, 559.755126959]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048119.jpg", "text": "What can you tell me about the area within the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include four leather shoes, and a handbag.", "boxes_value": [[0, 126.309814449, 220.5870361088, 573.755126959], [4.5167236096, 379.5056152405, 220.5870361088, 476.7744140565], [0.8041381888, 467.1218261395, 199.7968139776, 573.755126959], [0, 339.410095199, 31.9895629824, 397.32592773650003], [0, 290.4044799565, 68.372558592, 355.74536134899995], [0, 126.309814449, 83.2227783168, 244.368896492]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048119_crop.jpg", "text": "What can you tell me about the area within the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include four leather shoes, and a handbag.", "boxes_value": [[0, 112.309814449, 220.5870361088, 559.755126959], [4.5167236096, 365.5056152405, 220.5870361088, 462.7744140565], [0.8041381888, 453.1218261395, 199.7968139776, 559.755126959], [0, 325.410095199, 31.9895629824, 383.32592773650003], [0, 276.4044799565, 68.372558592, 341.74536134899995], [0, 112.309814449, 83.2227783168, 230.368896492]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048120.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[338.6940918208, 0, 423.45922852920006, 318.0890503168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048120_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[21.694091820799997, 0, 106.45922852920006, 318.0890503168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048120.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, a hat, a belt, two trash bin cans, and a bench.", "boxes_value": [[338.6940918208, 0, 423.45922852920006, 318.0890503168], [194.5641479146, 141.463256832, 526.1723632464, 465.5690307584], [350.7780761798, 141.62774656, 399.07446289100005, 167.2718505984], [354.812622079, 302.192321792, 407.49877929959996, 318.0890503168], [338.6940918208, 27.4852295168, 378.65698245379997, 88.7850341888], [391.7204589792, 0, 423.45922852920006, 42.8831787008], [386.7517089784, 116.5215454208, 415.12670894779995, 147.9641113088]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048120_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, a hat, a belt, two trash bin cans, and a bench.", "boxes_value": [[21.694091820799997, 0, 106.45922852920006, 318.0890503168], [0, 141.463256832, 127, 397], [33.778076179799996, 141.62774656, 82.07446289100005, 167.2718505984], [37.81262207899999, 302.192321792, 90.49877929959996, 318.0890503168], [21.694091820799997, 27.4852295168, 61.65698245379997, 88.7850341888], [74.72045897919998, 0, 106.45922852920006, 42.8831787008], [69.75170897840002, 116.5215454208, 98.12670894779995, 147.9641113088]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048121.jpg", "text": "In , what elements can be found within the coordinates ? Give coordinates for the items you reference.", "boxes_value": [[171.0106200962, 97.1642456064, 553.3331298582, 279.040832512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048121_crop.jpg", "text": "In , what elements can be found within the coordinates ? Give coordinates for the items you reference.", "boxes_value": [[96.0106200962, 46.1642456064, 478.3331298582, 228.040832512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048121.jpg", "text": "In , what elements can be found within the coordinates ? Give coordinates for the items you reference. For your reference, objects involved in this region include a picture, two chairs, a desk, a moniter, and a keyboard.", "boxes_value": [[171.0106200962, 97.1642456064, 553.3331298582, 279.040832512], [443.1767578054, 97.1642456064, 481.5015869045, 178.9239501824], [273.65350339419996, 191.8972168192, 350.6406249941, 237.6588134912], [149.0998535363, 193.1081543168, 206.78247073510002, 254.9439697408], [171.0106200962, 215.3970947072, 315.7291870202, 253.7562255872], [504.5447998386, 188.2352294912, 553.3331298582, 255.1273803776], [435.90246582730003, 256.8468017664, 496.1907959163, 279.040832512]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048121_crop.jpg", "text": "In , what elements can be found within the coordinates ? Give coordinates for the items you reference. For your reference, objects involved in this region include a picture, two chairs, a desk, a moniter, and a keyboard.", "boxes_value": [[96.0106200962, 46.1642456064, 478.3331298582, 228.040832512], [368.1767578054, 46.1642456064, 406.5015869045, 127.9239501824], [198.65350339419996, 140.8972168192, 275.6406249941, 186.6588134912], [74.0998535363, 142.1081543168, 131.78247073510002, 203.9439697408], [96.0106200962, 164.3970947072, 240.72918702020002, 202.7562255872], [429.5447998386, 137.2352294912, 478.3331298582, 204.1273803776], [360.90246582730003, 205.8468017664, 421.1907959163, 228.040832512]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048122.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Provide the coordinates for each element you describe.", "boxes_value": [[198.3577881087, 276.4086303744, 289.22058105599996, 348.1610107392]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048122_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Provide the coordinates for each element you describe.", "boxes_value": [[23.357788108699992, 18.40863037439999, 114.22058105599996, 90.16101073919998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048122.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Provide the coordinates for each element you describe. For your reference, objects involved in this region include four people, and an umbrella.", "boxes_value": [[198.3577881087, 276.4086303744, 289.22058105599996, 348.1610107392], [272.671997037, 284.0680542208, 289.22058105599996, 348.1610107392], [244.5656127783, 280.9159545856, 273.9853515861, 345.7969360384], [222.763549767, 276.7131347456, 240.88818362400002, 344.2208862208], [198.3577881087, 276.4086303744, 222.6162109413, 341.0977172992], [219.26293945380002, 286.8682861568, 256.6845093087, 312.3721923584]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048122_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Provide the coordinates for each element you describe. For your reference, objects involved in this region include four people, and an umbrella.", "boxes_value": [[23.357788108699992, 18.40863037439999, 114.22058105599996, 90.16101073919998], [97.67199703699998, 26.06805422079998, 114.22058105599996, 90.16101073919998], [69.5656127783, 22.91595458559999, 98.98535158610002, 87.79693603840002], [47.763549767, 18.713134745599973, 65.88818362400002, 86.22088622080003], [23.357788108699992, 18.40863037439999, 47.61621094130001, 83.09771729919999], [44.262939453800016, 28.868286156800025, 81.6845093087, 54.372192358400014]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048126.jpg", "text": "What details can you provide about the region in the snapshot ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[283.42541501520003, 180.8114624, 505.14770504600006, 288.2898559488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048126_crop.jpg", "text": "What details can you provide about the region in the snapshot ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[56.42541501520003, 27.81146240000001, 278.14770504600006, 135.28985594879998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048126.jpg", "text": "What details can you provide about the region in the snapshot ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a car, a van, a bus, a truck, and a street lights.", "boxes_value": [[283.42541501520003, 180.8114624, 505.14770504600006, 288.2898559488], [273.91577148600004, 251.972290048, 293.1463622784, 272.0482177536], [283.42541501520003, 247.1118163968, 317.6601562236, 272.6821899264], [312.3154296888, 203.7510375936, 394.7581787292, 288.2898559488], [394.4088134488, 213.9981689344, 505.14770504600006, 284.5636596736], [386.0247802736, 180.8114624, 399.6488037472, 228.3208617984]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048126_crop.jpg", "text": "What details can you provide about the region in the snapshot ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a car, a van, a bus, a truck, and a street lights.", "boxes_value": [[56.42541501520003, 27.81146240000001, 278.14770504600006, 135.28985594879998], [46.91577148600004, 98.97229004799999, 66.14636227839998, 119.0482177536], [56.42541501520003, 94.11181639680001, 90.66015622359998, 119.68218992639999], [85.31542968880001, 50.75103759359999, 167.75817872919998, 135.28985594879998], [167.4088134488, 60.99816893440001, 278.14770504600006, 131.5636596736], [159.02478027360002, 27.81146240000001, 172.64880374720002, 75.32086179839999]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048127.jpg", "text": "Please describe the region in the picture . Specify the location of each mentioned object.", "boxes_value": [[29.5305175475, 1.1539306496, 314.29058835480004, 99.861328128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048127_crop.jpg", "text": "Please describe the region in the picture . Specify the location of each mentioned object.", "boxes_value": [[29.5305175475, 1.1539306496, 314.29058835480004, 99.861328128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048127.jpg", "text": "Please describe the region in the picture . Specify the location of each mentioned object. For your reference, objects involved in this region include five lamps.", "boxes_value": [[29.5305175475, 1.1539306496, 314.29058835480004, 99.861328128], [29.5305175475, 20.3273315328, 57.225402860699994, 87.079040512], [74.268371595, 33.8197021696, 105.5138549472, 99.861328128], [121.8467407508, 1.1539306496, 148.1213378773, 24.588073728], [290.8564453447, 4.704589824, 314.29058835480004, 22.457702656], [263.8717040907, 53.7031860224, 275.94384765949997, 78.5575561728]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048127_crop.jpg", "text": "Please describe the region in the picture . Specify the location of each mentioned object. For your reference, objects involved in this region include five lamps.", "boxes_value": [[29.5305175475, 1.1539306496, 314.29058835480004, 99.861328128], [29.5305175475, 20.3273315328, 57.225402860699994, 87.079040512], [74.268371595, 33.8197021696, 105.5138549472, 99.861328128], [121.8467407508, 1.1539306496, 148.1213378773, 24.588073728], [290.8564453447, 4.704589824, 314.29058835480004, 22.457702656], [263.8717040907, 53.7031860224, 275.94384765949997, 78.5575561728]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048131.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Provide the coordinates for each element you describe.", "boxes_value": [[0, 309.9399414272, 682.5006103436, 515.231689472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048131_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Provide the coordinates for each element you describe.", "boxes_value": [[0, 51.93994142719998, 682.5006103436, 254]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048131.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a desk, a watch, a pen, a bottle, a spoon, and a cup.", "boxes_value": [[0, 309.9399414272, 682.5006103436, 515.231689472], [0, 352.3541259776, 682.5006103436, 515.231689472], [598.0039062707999, 309.9399414272, 645.267944347, 355.2549438464], [267.3026123218, 437.6458740224, 399.1026611208, 464.0017090048], [51.549560575499996, 246.461059584, 117.88580321010001, 408.7835082752], [0.067871076, 363.4891357184, 72.85491941869999, 405.4144287232], [0, 372.2644653568, 90.9962158086, 476.7722167808]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048131_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a desk, a watch, a pen, a bottle, a spoon, and a cup.", "boxes_value": [[0, 51.93994142719998, 682.5006103436, 254], [0, 94.3541259776, 682.5006103436, 254], [598.0039062707999, 51.93994142719998, 645.267944347, 97.25494384640001], [267.3026123218, 179.6458740224, 399.1026611208, 206.00170900479998], [51.549560575499996, 0, 117.88580321010001, 150.7835082752], [0.067871076, 105.48913571840001, 72.85491941869999, 147.4144287232], [0, 114.26446535679997, 90.9962158086, 218.77221678080002]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048133.jpg", "text": "In the image , could you provide a description for the coordinates ? Provide the coordinates for all objects that you mention.", "boxes_value": [[0, 107.9366455296, 577.6192627101, 457.9066265088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048133_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Provide the coordinates for all objects that you mention.", "boxes_value": [[0, 87.9366455296, 577.6192627101, 437.9066265088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048133.jpg", "text": "In the image , could you provide a description for the coordinates ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a handbag, a leather shoes, a train, a traffic light, and two street lights.", "boxes_value": [[0, 107.9366455296, 577.6192627101, 457.9066265088], [0, 280.4178881536, 24.2657190057, 333.4403171328], [19.0316547441, 443.4830951424, 49.9640472978, 457.9066265088], [217.3319702529, 81.5057373184, 628.1127929472, 456.8599853568], [186.12274166819998, 148.9851684352, 216.48846433350002, 170.0725097472], [101.95318600229999, 107.9366455296, 152.5115966604, 242.0493774336], [564.0604247907, 143.63006592, 577.6192627101, 188.9674682368]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048133_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a handbag, a leather shoes, a train, a traffic light, and two street lights.", "boxes_value": [[0, 87.9366455296, 577.6192627101, 437.9066265088], [0, 260.4178881536, 24.2657190057, 313.4403171328], [19.0316547441, 423.4830951424, 49.9640472978, 437.9066265088], [217.3319702529, 61.505737318399994, 628.1127929472, 436.8599853568], [186.12274166819998, 128.9851684352, 216.48846433350002, 150.0725097472], [101.95318600229999, 87.9366455296, 152.5115966604, 222.0493774336], [564.0604247907, 123.63006591999999, 577.6192627101, 168.9674682368]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048134.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Remember to mention the objects and their corresponding locations.", "boxes_value": [[200.77276609740002, 279.3497314304, 605.3614502117, 389.806213376]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048134_crop.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Remember to mention the objects and their corresponding locations.", "boxes_value": [[101.77276609740002, 28.349731430400027, 506.3614502117, 138.80621337600002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048134.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include eight people.", "boxes_value": [[200.77276609740002, 279.3497314304, 605.3614502117, 389.806213376], [566.0322265835999, 262.613891584, 602.851074218, 351.31378176], [579.0024413884, 368.4680175616, 605.3614502117, 389.806213376], [512.0592040866, 364.2840576, 536.744506862, 381.4382934528], [398.2554931918, 353.824157696, 432.14562985960004, 386.4590454272], [200.77276609740002, 342.5274658304, 256.4193725902, 369.7232055808], [274.8287964074, 279.3497314304, 308.7188720833, 336.2515869184], [350.9768066097, 245.0413208064, 382.77490232499997, 341.6906738176], [353.9056396284, 252.9907836928, 391.56115720860004, 334.9963989504]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6, 7, 8]]}, {"image_path": "objects365_v1_00048134_crop.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include eight people.", "boxes_value": [[101.77276609740002, 28.349731430400027, 506.3614502117, 138.80621337600002], [467.03222658359994, 11.613891583999987, 503.851074218, 100.31378175999998], [480.00244138840003, 117.46801756159999, 506.3614502117, 138.80621337600002], [413.05920408659995, 113.28405759999998, 437.744506862, 130.4382934528], [299.2554931918, 102.82415769599999, 333.14562985960004, 135.4590454272], [101.77276609740002, 91.52746583039999, 157.4193725902, 118.72320558080003], [175.82879640739998, 28.349731430400027, 209.71887208330003, 85.2515869184], [251.97680660970002, 0, 283.77490232499997, 90.69067381759999], [254.90563962840002, 1.9907836927999938, 292.56115720860004, 83.99639895040002]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6, 7, 8]]}, {"image_path": "objects365_v1_00048135.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Include the coordinates for each mentioned object.", "boxes_value": [[0.5827026432, 71.906005864, 308.758667008, 464.216186511]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048135_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Include the coordinates for each mentioned object.", "boxes_value": [[0.5827026432, 71.906005864, 308.758667008, 464.216186511]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048135.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three potted plants, and three lamps.", "boxes_value": [[0.5827026432, 71.906005864, 308.758667008, 464.216186511], [149.4648437248, 111.53857422400002, 271.595214848, 305.9240723], [242.5193481216, 309.036010745, 308.758667008, 405.34130860299996], [287.1111450112, 393.409179702, 334.0590820352, 469.69958497], [149.269042944, 441.118652359, 192.384521472, 464.216186511], [0.585449216, 287.07482911799997, 39.6378784256, 335.31604005900004], [0.5827026432, 71.906005864, 159.2303467008, 206.08142093000004]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00048135_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three potted plants, and three lamps.", "boxes_value": [[0.5827026432, 71.906005864, 308.758667008, 464.216186511], [149.4648437248, 111.53857422400002, 271.595214848, 305.9240723], [242.5193481216, 309.036010745, 308.758667008, 405.34130860299996], [287.1111450112, 393.409179702, 334.0590820352, 469.69958497], [149.269042944, 441.118652359, 192.384521472, 464.216186511], [0.585449216, 287.07482911799997, 39.6378784256, 335.31604005900004], [0.5827026432, 71.906005864, 159.2303467008, 206.08142093000004]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00048138.jpg", "text": "Describe what's happening within the coordinates of the given image . Please point out the objects and their coordinates.", "boxes_value": [[0.0612182528, 219.23675539930002, 90.0285033984, 537.2280273549]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048138_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Please point out the objects and their coordinates.", "boxes_value": [[0.0612182528, 80.23675539930002, 90.0285033984, 398.22802735489995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048138.jpg", "text": "Describe what's happening within the coordinates of the given image . Please point out the objects and their coordinates. For your reference, objects involved in this region include a picture, a mirror, a power outlet, a faucet, and a sink.", "boxes_value": [[0.0612182528, 219.23675539930002, 90.0285033984, 537.2280273549], [26.7311401472, 219.23675539930002, 90.0285033984, 306.3923340088], [26.5999755776, 60.041320816799995, 510.9075927552, 575.394287076], [64.3156738048, 452.73681637789997, 87.893554688, 470.9173584259], [24.24700928, 471.07958984500004, 72.6358032384, 511.4892577982], [0.0612182528, 498.1052246312, 60.2811889664, 537.2280273549]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048138_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Please point out the objects and their coordinates. For your reference, objects involved in this region include a picture, a mirror, a power outlet, a faucet, and a sink.", "boxes_value": [[0.0612182528, 80.23675539930002, 90.0285033984, 398.22802735489995], [26.7311401472, 80.23675539930002, 90.0285033984, 167.3923340088], [26.5999755776, 0, 112, 436.39428707599996], [64.3156738048, 313.73681637789997, 87.893554688, 331.9173584259], [24.24700928, 332.07958984500004, 72.6358032384, 372.4892577982], [0.0612182528, 359.1052246312, 60.2811889664, 398.22802735489995]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048141.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[219.50067141120002, 0.4120483328, 447.85314938880003, 300.72790528]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048141_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[57.50067141120002, 0.4120483328, 285.85314938880003, 300.72790528]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048141.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a lamp, two pillows, a bench, a stool, and a desk.", "boxes_value": [[219.50067141120002, 0.4120483328, 447.85314938880003, 300.72790528], [295.72607424, 0.4120483328, 317.8532714496, 25.8919067136], [362.8480224768, 188.9649658368, 419.29455567360003, 232.5502319104], [401.6577148416, 194.1633911296, 447.85314938880003, 227.7600708096], [349.1628417792, 218.8359374848, 478.82507327999997, 276.0553588736], [278.8198242048, 218.8359374848, 319.7657470464, 299.6779785216], [219.50067141120002, 191.538635264, 332.3645019648, 300.72790528]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048141_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a lamp, two pillows, a bench, a stool, and a desk.", "boxes_value": [[57.50067141120002, 0.4120483328, 285.85314938880003, 300.72790528], [133.72607424, 0.4120483328, 155.85327144960002, 25.8919067136], [200.8480224768, 188.9649658368, 257.29455567360003, 232.5502319104], [239.6577148416, 194.1633911296, 285.85314938880003, 227.7600708096], [187.1628417792, 218.8359374848, 316.82507327999997, 276.0553588736], [116.8198242048, 218.8359374848, 157.7657470464, 299.6779785216], [57.50067141120002, 191.538635264, 170.3645019648, 300.72790528]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048142.jpg", "text": "Describe what's happening within the coordinates of the given image . Please mention the objects and their locations.", "boxes_value": [[515.7269287008, 295.2457275392, 745.992065472, 404.3626708992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048142_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Please mention the objects and their locations.", "boxes_value": [[57.726928700800045, 28.245727539200004, 287.99206547200004, 137.3626708992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048142.jpg", "text": "Describe what's happening within the coordinates of the given image . Please mention the objects and their locations. For your reference, objects involved in this region include a cabinet, a bench, two desks, two chairs, and a flower.", "boxes_value": [[515.7269287008, 295.2457275392, 745.992065472, 404.3626708992], [667.3389892848, 295.2457275392, 745.992065472, 361.29718016], [651.8109131232, 310.3542480384, 712.664550792, 385.0573730304], [606.4853515248001, 309.9345702912, 700.074096672, 386.736083968], [501.9848632704, 311.6132812288, 543.5333251584, 406.461059584], [572.4912109008, 310.5466308608, 624.5316161856, 404.3626708992], [515.7269287008, 310.6683349504, 576.7196045088, 397.5594482176], [527.9106445008, 283.2109374976, 550.6868896368001, 304.3483886592]], "boxes_seq": [[0], [0], [1], [2], [3, 6], [4, 5], [7]]}, {"image_path": "objects365_v1_00048142_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Please mention the objects and their locations. For your reference, objects involved in this region include a cabinet, a bench, two desks, two chairs, and a flower.", "boxes_value": [[57.726928700800045, 28.245727539200004, 287.99206547200004, 137.3626708992], [209.33898928480005, 28.245727539200004, 287.99206547200004, 94.29718015999998], [193.81091312319995, 43.35424803839999, 254.664550792, 118.05737303040002], [148.48535152480008, 42.934570291199975, 242.074096672, 119.736083968], [43.98486327040001, 44.61328122880002, 85.53332515839998, 139.461059584], [114.49121090079996, 43.54663086080001, 166.5316161856, 137.3626708992], [57.726928700800045, 43.66833495039998, 118.71960450879999, 130.5594482176], [69.9106445008, 16.210937497600014, 92.68688963680006, 37.3483886592]], "boxes_seq": [[0], [0], [1], [2], [3, 6], [4, 5], [7]]}, {"image_path": "objects365_v1_00048143.jpg", "text": "I would like a description of the content within the bbox in . Remember to mention the objects and their corresponding locations.", "boxes_value": [[209.5573730256, 244.5865478656, 752.1541747775999, 512.1787109376]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048143_crop.jpg", "text": "I would like a description of the content within the bbox in . Remember to mention the objects and their corresponding locations.", "boxes_value": [[136.5573730256, 67.58654786560001, 679.1541747775999, 335]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048143.jpg", "text": "I would like a description of the content within the bbox in . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, a glasses, two gloves, a boots, and a skiboard.", "boxes_value": [[209.5573730256, 244.5865478656, 752.1541747775999, 512.1787109376], [579.7058105232, 244.5865478656, 752.1541747775999, 512.1787109376], [672.2343750048, 265.5197143552, 720.5404053167999, 282.3754272256], [601.0133056512, 433.284912128, 641.98339848, 468.429870592], [725.0887451232, 456.1970825216, 739.0690918032, 492.7012939264], [209.5573730256, 378.5444946432, 222.7938232848, 409.6643676672], [181.58593749119999, 399.0991821312, 242.7499999872, 433.3096313344]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048143_crop.jpg", "text": "I would like a description of the content within the bbox in . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, a glasses, two gloves, a boots, and a skiboard.", "boxes_value": [[136.5573730256, 67.58654786560001, 679.1541747775999, 335], [506.70581052319994, 67.58654786560001, 679.1541747775999, 335], [599.2343750048, 88.5197143552, 647.5404053167999, 105.37542722559999], [528.0133056512, 256.284912128, 568.98339848, 291.429870592], [652.0887451232, 279.1970825216, 666.0690918032, 315.7012939264], [136.5573730256, 201.54449464319998, 149.7938232848, 232.6643676672], [108.58593749119999, 222.0991821312, 169.7499999872, 256.3096313344]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048145.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Please mention the objects and their locations.", "boxes_value": [[474.0988769249, 418.2537841664, 623.7204589986, 473.7700195328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048145_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Please mention the objects and their locations.", "boxes_value": [[38.098876924900026, 14.25378416640001, 187.72045899859995, 69.7700195328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048145.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Please mention the objects and their locations. For your reference, objects involved in this region include three chairs, a towel, and a person.", "boxes_value": [[474.0988769249, 418.2537841664, 623.7204589986, 473.7700195328], [590.7336425584, 444.4890136576, 621.3995361403, 473.7700195328], [514.5634765874, 426.2872924672, 554.3302001684, 468.2303467008], [474.0988769249, 424.9924926976, 512.4726562242, 464.8198242304], [534.8773193447, 424.5191650304, 552.7145995942, 448.5994872832], [580.861083961, 418.2537841664, 623.7204589986, 471.7401122816]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048145_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Please mention the objects and their locations. For your reference, objects involved in this region include three chairs, a towel, and a person.", "boxes_value": [[38.098876924900026, 14.25378416640001, 187.72045899859995, 69.7700195328], [154.7336425584, 40.489013657600026, 185.39953614030003, 69.7700195328], [78.56347658740003, 22.287292467200018, 118.33020016839998, 64.2303467008], [38.098876924900026, 20.992492697600028, 76.47265622420002, 60.8198242304], [98.87731934470003, 20.519165030400018, 116.71459959419997, 44.599487283200006], [144.86108396099996, 14.25378416640001, 187.72045899859995, 67.74011228159998]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048147.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for each element you describe.", "boxes_value": [[0.7056274432, 67.147033728, 115.961059584, 457.69848629759997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048147_crop.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for each element you describe.", "boxes_value": [[0.7056274432, 67.147033728, 115.961059584, 457.69848629759997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048147.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a cabinet, a storage box, and three handbags.", "boxes_value": [[0.7056274432, 67.147033728, 115.961059584, 457.69848629759997], [0.7827758592, 14.5725708288, 511.72906496, 766.9948730112001], [38.5964355584, 114.582458496, 115.961059584, 158.02569576960002], [0.7056274432, 406.4312744448, 21.1549072384, 457.69848629759997], [18.7430419968, 67.147033728, 108.186645504, 120.60760496639999], [41.0305175552, 156.1500854784, 115.9340210176, 220.91955563519997]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048147_crop.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a cabinet, a storage box, and three handbags.", "boxes_value": [[0.7056274432, 67.147033728, 115.961059584, 457.69848629759997], [0.7827758592, 14.5725708288, 144, 555], [38.5964355584, 114.582458496, 115.961059584, 158.02569576960002], [0.7056274432, 406.4312744448, 21.1549072384, 457.69848629759997], [18.7430419968, 67.147033728, 108.186645504, 120.60760496639999], [41.0305175552, 156.1500854784, 115.9340210176, 220.91955563519997]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048148.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Please point out the objects and their coordinates.", "boxes_value": [[0, 224.2681274368, 149.6527709955, 370.3262329344]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048148_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Please point out the objects and their coordinates.", "boxes_value": [[0, 37.2681274368, 149.6527709955, 183.32623293440002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048148.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a hat, two handbags, and two strollers.", "boxes_value": [[0, 224.2681274368, 149.6527709955, 370.3262329344], [64.66711425759999, 333.3689575424, 111.57257077700001, 359.0203857408], [127.4576416202, 224.2681274368, 149.6527709955, 243.5993652224], [112.06426999, 340.2554321408, 134.97534182130002, 370.3262329344], [91.3298950488, 214.188842752, 148.6866455029, 276.2191161856], [0, 262.1134033408, 20.551757816200002, 316.7006225408]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048148_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a hat, two handbags, and two strollers.", "boxes_value": [[0, 37.2681274368, 149.6527709955, 183.32623293440002], [64.66711425759999, 146.36895754239998, 111.57257077700001, 172.0203857408], [127.4576416202, 37.2681274368, 149.6527709955, 56.599365222399996], [112.06426999, 153.2554321408, 134.97534182130002, 183.32623293440002], [91.3298950488, 27.188842752, 148.6866455029, 89.2191161856], [0, 75.1134033408, 20.551757816200002, 129.7006225408]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048149.jpg", "text": "Please provide insights on the specified area within the graphic . Include the coordinates for each mentioned object.", "boxes_value": [[113.996643076, 68.9903564288, 297.492797845, 166.6449585152]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048149_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Include the coordinates for each mentioned object.", "boxes_value": [[45.996643076, 24.9903564288, 229.49279784499998, 122.64495851519999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048149.jpg", "text": "Please provide insights on the specified area within the graphic . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two street lights, two suvs, and a pickup truck.", "boxes_value": [[113.996643076, 68.9903564288, 297.492797845, 166.6449585152], [283.7907714455, 68.9903564288, 297.492797845, 136.4116821504], [246.65820312749997, 124.6603393536, 281.400146497, 138.5117187584], [238.78833006750003, 85.4154662912, 266.747924776, 126.9770507776], [193.2290038805, 124.5303954944, 234.3917846495, 140.947937024], [113.996643076, 128.8132324352, 236.77117923100002, 166.6449585152]], "boxes_seq": [[0], [0], [1, 3], [2, 4], [5]]}, {"image_path": "objects365_v1_00048149_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two street lights, two suvs, and a pickup truck.", "boxes_value": [[45.996643076, 24.9903564288, 229.49279784499998, 122.64495851519999], [215.7907714455, 24.9903564288, 229.49279784499998, 92.4116821504], [178.65820312749997, 80.6603393536, 213.40014649699998, 94.51171875840001], [170.78833006750003, 41.415466291200005, 198.747924776, 82.9770507776], [125.22900388049999, 80.5303954944, 166.3917846495, 96.947937024], [45.996643076, 84.8132324352, 168.77117923100002, 122.64495851519999]], "boxes_seq": [[0], [0], [1, 3], [2, 4], [5]]}, {"image_path": "objects365_v1_00048150.jpg", "text": "Tell me what you see in the area within the context of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[53.663818373199994, 192.8296508928, 207.3481444968, 401.2321777152]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048150_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[38.663818373199994, 52.829650892800004, 192.3481444968, 261.2321777152]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048150.jpg", "text": "Tell me what you see in the area within the context of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a person, a hat, a leather shoes, a bottle, and a moniter.", "boxes_value": [[53.663818373199994, 192.8296508928, 207.3481444968, 401.2321777152], [50.03350829079999, 196.706176768, 133.1749878148, 409.8580322304], [79.7416381528, 195.5310058496, 110.9646606812, 207.9649658368], [94.0473022688, 390.0960082944, 126.06384275319999, 401.2321777152], [53.663818373199994, 305.4743652352, 70.9828491576, 333.6467284992], [111.76940915280001, 192.8296508928, 207.3481444968, 257.598266624]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048150_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a person, a hat, a leather shoes, a bottle, and a moniter.", "boxes_value": [[38.663818373199994, 52.829650892800004, 192.3481444968, 261.2321777152], [35.03350829079999, 56.706176768000006, 118.1749878148, 269.8580322304], [64.7416381528, 55.53100584960001, 95.9646606812, 67.96496583679999], [79.0473022688, 250.0960082944, 111.06384275319999, 261.2321777152], [38.663818373199994, 165.47436523520003, 55.9828491576, 193.64672849919998], [96.76940915280001, 52.829650892800004, 192.3481444968, 117.59826662400002]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048152.jpg", "text": "Describe the visual elements within the selected area of the image . Please mention the objects and their locations.", "boxes_value": [[32.3104858368, 250.8880615424, 135.5884399104, 343.7689819136]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048152_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Please mention the objects and their locations.", "boxes_value": [[26.310485836799998, 23.88806154240001, 129.5884399104, 116.76898191359999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048152.jpg", "text": "Describe the visual elements within the selected area of the image . Please mention the objects and their locations. For your reference, objects involved in this region include a desk, two chairs, and two napkins.", "boxes_value": [[32.3104858368, 250.8880615424, 135.5884399104, 343.7689819136], [32.3104858368, 259.8989257728, 135.5884399104, 343.7689819136], [1.8122558976, 254.3538208256, 112.02166748159999, 448.43322752], [60.036071807999996, 281.3862915072, 142.5198364416, 456.057739264], [95.3862915072, 250.8880615424, 116.8736572416, 284.85198976], [35.083007846399994, 252.9675293184, 66.27435302399999, 277.2274169856]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048152_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Please mention the objects and their locations. For your reference, objects involved in this region include a desk, two chairs, and two napkins.", "boxes_value": [[26.310485836799998, 23.88806154240001, 129.5884399104, 116.76898191359999], [26.310485836799998, 32.8989257728, 129.5884399104, 116.76898191359999], [0, 27.353820825599996, 106.02166748159999, 139], [54.036071807999996, 54.386291507199985, 136.5198364416, 139], [89.3862915072, 23.88806154240001, 110.8736572416, 57.85198975999998], [29.083007846399994, 25.96752931840001, 60.27435302399999, 50.22741698559997]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048153.jpg", "text": "What objects or scenery can be found in the area in the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[310.5601806848, 234.1925659481, 427.0764770304, 312.81756594300003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048153_crop.jpg", "text": "What objects or scenery can be found in the area in the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[29.560180684800002, 20.192565948099997, 146.07647703039999, 98.81756594300003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048153.jpg", "text": "What objects or scenery can be found in the area in the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four chairs, a desk, and a cabinet.", "boxes_value": [[310.5601806848, 234.1925659481, 427.0764770304, 312.81756594300003], [212.4268798976, 247.39562986419998, 456.9163208192, 630.7617187531], [347.1122436608, 234.1925659481, 395.2915039232, 304.8903808781], [326.5343627776, 238.38201903339998, 449.2313232384, 463.1651611028], [344.7138061312, 244.33850096440003, 511.0220947456, 537.9028320379], [121.3397216768, 277.9497680492, 510.4392700416, 577.3029785226], [310.5601806848, 278.2468261954, 427.0764770304, 312.81756594300003]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048153_crop.jpg", "text": "What objects or scenery can be found in the area in the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four chairs, a desk, and a cabinet.", "boxes_value": [[29.560180684800002, 20.192565948099997, 146.07647703039999, 98.81756594300003], [0, 33.395629864199975, 175, 118], [66.1122436608, 20.192565948099997, 114.29150392320003, 90.89038087810002], [45.534362777599995, 24.382019033399985, 168.23132323840002, 118], [63.71380613119999, 30.338500964400026, 175, 118], [0, 63.949768049199974, 175, 118], [29.560180684800002, 64.24682619539999, 146.07647703039999, 98.81756594300003]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048155.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each object you identify.", "boxes_value": [[153.0184326144, 0, 338.9496459776, 464.4116210768]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048155_crop.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each object you identify.", "boxes_value": [[47.01843261440001, 0, 232.94964597760003, 464.4116210768]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048155.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two lamps, a stool, a clock, a picture, and two people.", "boxes_value": [[153.0184326144, 0, 338.9496459776, 464.4116210768], [165.816406272, 0, 330.4266967552, 202.95910646939998], [197.8558960128, 204.9401245425, 332.4484253184, 315.637573225], [243.0291137536, 427.9676513792, 286.1864013824, 464.4116210768], [315.137512192, 339.2593994443, 338.9496459776, 402.36083981300004], [153.0184326144, 364.533691436, 206.8767089664, 414.7813720515], [328.4492797952, 424.7637939309, 353.9683227648, 463.17126466400003], [288.75299072, 385.84082034200003, 341.5954589696, 467.03771974119996]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00048155_crop.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two lamps, a stool, a clock, a picture, and two people.", "boxes_value": [[47.01843261440001, 0, 232.94964597760003, 464.4116210768], [59.816406271999995, 0, 224.4266967552, 202.95910646939998], [91.8558960128, 204.9401245425, 226.44842531839998, 315.637573225], [137.0291137536, 427.9676513792, 180.1864013824, 464.4116210768], [209.13751219199997, 339.2593994443, 232.94964597760003, 402.36083981300004], [47.01843261440001, 364.533691436, 100.8767089664, 414.7813720515], [222.44927979520003, 424.7637939309, 247.9683227648, 463.17126466400003], [182.75299072, 385.84082034200003, 235.59545896959997, 467.03771974119996]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00048158.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[450.5184326511, 188.0788574208, 603.5823974925, 343.2056884736]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048158_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[38.5184326511, 39.078857420800006, 191.58239749250004, 194.2056884736]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048158.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a flower, two vases, a fan, and a picture.", "boxes_value": [[450.5184326511, 188.0788574208, 603.5823974925, 343.2056884736], [494.2391357103, 238.9274292224, 535.6837157936, 274.2644042752], [504.0339355773, 267.0225829888, 530.6878661910999, 291.2086792192], [521.1295166212, 311.234191872, 557.0273437415, 338.9989013504], [552.5400390689, 208.5887451136, 603.5823974925, 343.2056884736], [450.5184326511, 188.0788574208, 471.5073241956, 236.9785156096]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048158_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a flower, two vases, a fan, and a picture.", "boxes_value": [[38.5184326511, 39.078857420800006, 191.58239749250004, 194.2056884736], [82.23913571029999, 89.92742922240001, 123.68371579359996, 125.26440427519998], [92.03393557729999, 118.02258298880002, 118.68786619109994, 142.2086792192], [109.12951662119997, 162.234191872, 145.02734374149998, 189.9989013504], [140.5400390689, 59.5887451136, 191.58239749250004, 194.2056884736], [38.5184326511, 39.078857420800006, 59.507324195600006, 87.97851560960001]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048159.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Please point out the objects and their coordinates.", "boxes_value": [[271.6667480576, 253.3448486088, 355.834350592, 471.12451171879997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048159_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Please point out the objects and their coordinates.", "boxes_value": [[21.66674805759999, 55.34484860879999, 105.83435059200002, 273.12451171879997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048159.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a cabinet, two people, two cups, and a wine glass.", "boxes_value": [[271.6667480576, 253.3448486088, 355.834350592, 471.12451171879997], [35.137512192, 0, 377.552062976, 345.0447997741], [338.1788940288, 253.3448486088, 355.834350592, 277.93652340209997], [304.7595214848, 321.4447021712, 325.567810048, 340.9918213249], [266.8671875072, 438.06103512819993, 309.3520507904, 471.8355713061], [303.4859619328, 418.32958987300003, 341.5267944448, 471.12451171879997], [271.6667480576, 416.90747069840006, 290.5093993984, 440.01635741210004]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 6], [5]]}, {"image_path": "objects365_v1_00048159_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a cabinet, two people, two cups, and a wine glass.", "boxes_value": [[21.66674805759999, 55.34484860879999, 105.83435059200002, 273.12451171879997], [0, 0, 126, 147.0447997741], [88.17889402880002, 55.34484860879999, 105.83435059200002, 79.93652340209997], [54.75952148480002, 123.44470217119999, 75.56781004800001, 142.9918213249], [16.867187507200015, 240.06103512819993, 59.35205079040003, 273.8355713061], [53.48596193280002, 220.32958987300003, 91.52679444479998, 273.12451171879997], [21.66674805759999, 218.90747069840006, 40.50939939839998, 242.01635741210004]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 6], [5]]}, {"image_path": "objects365_v1_00048160.jpg", "text": "What's the story in the section of the included visual ? Specify the location of each mentioned object.", "boxes_value": [[434.6710205155, 164.466308608, 672.8400878668, 488.1140136959999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048160_crop.jpg", "text": "What's the story in the section of the included visual ? Specify the location of each mentioned object.", "boxes_value": [[59.671020515500004, 81.46630860799999, 297.8400878668, 405.1140136959999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048160.jpg", "text": "What's the story in the section of the included visual ? Specify the location of each mentioned object. For your reference, objects involved in this region include two chairs, a person, a hat, and a shovel.", "boxes_value": [[434.6710205155, 164.466308608, 672.8400878668, 488.1140136959999], [605.0019530934, 222.217407232, 622.2171630898999, 247.6140747264], [647.613891574, 218.9788818432, 672.8400878668, 248.2958984192], [434.6710205155, 164.466308608, 524.8679199215001, 488.1140136959999], [456.95373535199997, 164.5652465664, 497.56408694130005, 186.5347900416], [599.2093505922, 297.7115478528, 620.2099609141, 425.2421264896]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048160_crop.jpg", "text": "What's the story in the section of the included visual ? Specify the location of each mentioned object. For your reference, objects involved in this region include two chairs, a person, a hat, and a shovel.", "boxes_value": [[59.671020515500004, 81.46630860799999, 297.8400878668, 405.1140136959999], [230.00195309339995, 139.217407232, 247.21716308989994, 164.6140747264], [272.61389157400004, 135.9788818432, 297.8400878668, 165.2958984192], [59.671020515500004, 81.46630860799999, 149.86791992150006, 405.1140136959999], [81.95373535199997, 81.56524656639999, 122.56408694130005, 103.53479004159999], [224.20935059220005, 214.71154785279998, 245.20996091409995, 342.2421264896]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048165.jpg", "text": "Can you give me a description of the region in image ? Provide the coordinates for each element you describe.", "boxes_value": [[438.3183593438, 297.3055419904, 749.0739746026, 512.1610107392]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048165_crop.jpg", "text": "Can you give me a description of the region in image ? Provide the coordinates for each element you describe.", "boxes_value": [[78.31835934380001, 54.3055419904, 389.07397460259995, 269]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048165.jpg", "text": "Can you give me a description of the region in image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include five people.", "boxes_value": [[438.3183593438, 297.3055419904, 749.0739746026, 512.1610107392], [448.1530761632, 313.3611450368, 457.830078102, 365.1417846784], [464.2208252174, 297.4510497792, 476.44433594020006, 339.6516723712], [451.99719239419994, 298.6152343552, 466.69458008739997, 340.9613037056], [438.3183593438, 297.3055419904, 450.97851565100007, 341.2523803648], [703.7673339962, 478.1809081856, 749.0739746026, 512.1610107392]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048165_crop.jpg", "text": "Can you give me a description of the region in image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include five people.", "boxes_value": [[78.31835934380001, 54.3055419904, 389.07397460259995, 269], [88.15307616320001, 70.3611450368, 97.83007810200002, 122.14178467839997], [104.22082521739998, 54.45104977919999, 116.44433594020006, 96.6516723712], [91.99719239419994, 55.615234355200016, 106.69458008739997, 97.96130370560002], [78.31835934380001, 54.3055419904, 90.97851565100007, 98.25238036479999], [343.7673339962, 235.1809081856, 389.07397460259995, 269]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048166.jpg", "text": "What can you tell me about the area within the image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[76.3297119232, 477.36755371140003, 484.2697143808, 547.0383300853999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048166_crop.jpg", "text": "What can you tell me about the area within the image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[76.3297119232, 18.367553711400035, 484.2697143808, 88.03833008539993]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048166.jpg", "text": "What can you tell me about the area within the image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two benches, and three potted plants.", "boxes_value": [[76.3297119232, 477.36755371140003, 484.2697143808, 547.0383300853999], [76.3297119232, 502.9235839724, 130.128173824, 547.0383300853999], [289.9921874944, 496.57727051079996, 340.9557495296, 517.2239990106], [262.4772338688, 502.0616454964, 395.5426025472, 612.6613769844], [167.4945068544, 477.36755371140003, 244.9425048576, 530.3749999795999], [420.4702758912, 478.00231936000006, 484.2697143808, 519.2656250146]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048166_crop.jpg", "text": "What can you tell me about the area within the image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two benches, and three potted plants.", "boxes_value": [[76.3297119232, 18.367553711400035, 484.2697143808, 88.03833008539993], [76.3297119232, 43.923583972400024, 130.128173824, 88.03833008539993], [289.9921874944, 37.57727051079996, 340.9557495296, 58.22399901059998], [262.4772338688, 43.06164549639999, 395.5426025472, 105], [167.4945068544, 18.367553711400035, 244.9425048576, 71.37499997959992], [420.4702758912, 19.002319360000058, 484.2697143808, 60.265625014600005]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048167.jpg", "text": "Tell me about the region of the image . Specify the location of each mentioned object.", "boxes_value": [[157.501464832, 517.3331298816, 275.6406249984, 690.1718750207999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048167_crop.jpg", "text": "Tell me about the region of the image . Specify the location of each mentioned object.", "boxes_value": [[30.50146483200001, 43.33312988160003, 148.64062499840003, 216.17187502079992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048167.jpg", "text": "Tell me about the region of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include four people, and a luggage.", "boxes_value": [[157.501464832, 517.3331298816, 275.6406249984, 690.1718750207999], [237.0181884928, 523.2399902208, 266.5529785344, 643.1966552832], [200.213317888, 522.3312988416, 231.5656128, 637.7441405952001], [157.501464832, 517.3331298816, 221.1148681728, 683.1822509568], [266.0986328064, 525.0576172032, 275.6406249984, 558.6817626624], [198.0682983424, 585.1014404352, 248.9834594816, 690.1718750207999]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048167_crop.jpg", "text": "Tell me about the region of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include four people, and a luggage.", "boxes_value": [[30.50146483200001, 43.33312988160003, 148.64062499840003, 216.17187502079992], [110.0181884928, 49.239990220799996, 139.55297853439998, 169.19665528320002], [73.213317888, 48.33129884159996, 104.5656128, 163.74414059520007], [30.50146483200001, 43.33312988160003, 94.11486817279999, 209.18225095679998], [139.09863280640002, 51.05761720320004, 148.64062499840003, 84.68176266240005], [71.0682983424, 111.10144043519995, 121.9834594816, 216.17187502079992]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048168.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Give coordinates for the items you reference.", "boxes_value": [[80.83032230399999, 373.2202148352, 273.5234374656, 491.0541381632]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048168_crop.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Give coordinates for the items you reference.", "boxes_value": [[48.83032230399999, 30.22021483520001, 241.5234374656, 148.05413816319998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048168.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Give coordinates for the items you reference. For your reference, objects involved in this region include a baseball, three sneakers, and a hockey stick.", "boxes_value": [[80.83032230399999, 373.2202148352, 273.5234374656, 491.0541381632], [80.83032230399999, 394.7075805696, 100.2382812672, 418.2743530496], [217.31048586240001, 388.771057152, 276.539794944, 415.7056884736], [148.9298706432, 399.535583488, 196.72003176959998, 433.2457275392], [53.857971225600004, 393.5053100544, 113.3414306304, 437.228210432], [134.895324672, 373.2202148352, 273.5234374656, 491.0541381632]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048168_crop.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Give coordinates for the items you reference. For your reference, objects involved in this region include a baseball, three sneakers, and a hockey stick.", "boxes_value": [[48.83032230399999, 30.22021483520001, 241.5234374656, 148.05413816319998], [48.83032230399999, 51.7075805696, 68.2382812672, 75.2743530496], [185.31048586240001, 45.771057152000026, 244.539794944, 72.70568847359999], [116.92987064319999, 56.535583487999986, 164.72003176959998, 90.2457275392], [21.857971225600004, 50.50531005440001, 81.3414306304, 94.22821043200003], [102.89532467199999, 30.22021483520001, 241.5234374656, 148.05413816319998]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048170.jpg", "text": "Could you please share some information on the region in this photograph ? Include the coordinates for each object you identify.", "boxes_value": [[243.1886596535, 454.7498168832, 466.307861338, 484.906249984]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048170_crop.jpg", "text": "Could you please share some information on the region in this photograph ? Include the coordinates for each object you identify.", "boxes_value": [[56.1886596535, 7.749816883200026, 279.307861338, 37.906249984]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048170.jpg", "text": "Could you please share some information on the region in this photograph ? Include the coordinates for each object you identify. For your reference, objects involved in this region include five cars.", "boxes_value": [[243.1886596535, 454.7498168832, 466.307861338, 484.906249984], [243.1886596535, 460.3518066176, 279.20324709199997, 484.4979248128], [281.4541625995, 455.236083968, 349.18615721000003, 491.4553222656], [345.70355227100004, 461.5786132992, 400.602417017, 484.906249984], [399.8910522695, 456.3796386816, 439.32287597000004, 480.3479614464], [431.846435526, 454.7498168832, 466.307861338, 477.724121088]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048170_crop.jpg", "text": "Could you please share some information on the region in this photograph ? Include the coordinates for each object you identify. For your reference, objects involved in this region include five cars.", "boxes_value": [[56.1886596535, 7.749816883200026, 279.307861338, 37.906249984], [56.1886596535, 13.351806617600005, 92.20324709199997, 37.49792481280002], [94.45416259950002, 8.236083968000003, 162.18615721000003, 44.45532226559999], [158.70355227100004, 14.578613299200015, 213.602417017, 37.906249984], [212.8910522695, 9.3796386816, 252.32287597000004, 33.34796144640001], [244.846435526, 7.749816883200026, 279.307861338, 30.724121088000004]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048172.jpg", "text": "Could you describe the content of the bbox in the image ? Please mention the objects and their locations.", "boxes_value": [[352.2389526528, 256.4216919189, 475.1586303488, 536.1365966451]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048172_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Please mention the objects and their locations.", "boxes_value": [[31.23895265279998, 70.42169191890002, 154.1586303488, 350.1365966451]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048172.jpg", "text": "Could you describe the content of the bbox in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a picture, a chair, two desks, two vases, a person, a book, and a plate.", "boxes_value": [[352.2389526528, 256.4216919189, 475.1586303488, 536.1365966451], [13.7804565504, 0.6757812315, 511.0770873856, 720.732421842], [413.4583130112, 319.2484130679, 452.6778564608, 401.8157959323], [415.5225219584, 458.5809325824, 479.5122680832, 607.5462646575], [447.2962036224, 214.4172363642, 478.25897216, 311.7288818616], [441.1036377088, 359.5000000209, 483.5668945408, 440.0031738546], [345.5639648256, 210.9117431733, 382.820068352, 303.0716552796], [352.2389526528, 256.4216919189, 475.1586303488, 536.1365966451], [411.2903442432, 423.8380126971, 425.8884277248, 445.0052489931], [440.4022827008, 459.868896495, 469.0791625728, 470.4031982346]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4, 6], [7], [8], [9]]}, {"image_path": "objects365_v1_00048172_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a picture, a chair, two desks, two vases, a person, a book, and a plate.", "boxes_value": [[31.23895265279998, 70.42169191890002, 154.1586303488, 350.1365966451], [0, 0, 184, 420], [92.4583130112, 133.2484130679, 131.6778564608, 215.81579593229998], [94.52252195839998, 272.5809325824, 158.5122680832, 420], [126.29620362240001, 28.417236364199994, 157.25897215999998, 125.72888186159997], [120.10363770880002, 173.50000002090002, 162.56689454079998, 254.0031738546], [24.56396482560001, 24.91174317330001, 61.82006835200002, 117.07165527960001], [31.23895265279998, 70.42169191890002, 154.1586303488, 350.1365966451], [90.29034424320002, 237.8380126971, 104.88842772480001, 259.0052489931], [119.40228270080001, 273.868896495, 148.0791625728, 284.4031982346]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4, 6], [7], [8], [9]]}, {"image_path": "objects365_v1_00048177.jpg", "text": "In the image , please describe the bounding box . Please point out the objects and their coordinates.", "boxes_value": [[211.9919433728, 289.86865236479997, 355.2012939264, 462.1744384512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048177_crop.jpg", "text": "In the image , please describe the bounding box . Please point out the objects and their coordinates.", "boxes_value": [[35.991943372799994, 43.86865236479997, 179.2012939264, 216.1744384512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048177.jpg", "text": "In the image , please describe the bounding box . Please point out the objects and their coordinates. For your reference, objects involved in this region include a flower, a vase, a desk, a book, and a pen.", "boxes_value": [[211.9919433728, 289.86865236479997, 355.2012939264, 462.1744384512], [211.9919433728, 289.86865236479997, 355.2012939264, 430.2138671616], [256.8641967616, 392.9793701376, 300.7817382912, 437.851684608], [202.938598656, 416.86242677760004, 355.564880384, 494.14147952639996], [224.8927612416, 434.65161131520006, 339.103088384, 479.39709473280004], [287.7144164864, 443.98266600960005, 313.0635986432, 462.1744384512]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048177_crop.jpg", "text": "In the image , please describe the bounding box . Please point out the objects and their coordinates. For your reference, objects involved in this region include a flower, a vase, a desk, a book, and a pen.", "boxes_value": [[35.991943372799994, 43.86865236479997, 179.2012939264, 216.1744384512], [35.991943372799994, 43.86865236479997, 179.2012939264, 184.21386716159998], [80.8641967616, 146.97937013759997, 124.78173829119999, 191.85168460800003], [26.93859865600001, 170.86242677760004, 179.564880384, 248.14147952639996], [48.8927612416, 188.65161131520006, 163.103088384, 233.39709473280004], [111.71441648640001, 197.98266600960005, 137.0635986432, 216.1744384512]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048179.jpg", "text": "Please provide details for the area marked as in this photographic . Include the coordinates for each object you identify.", "boxes_value": [[305.512695296, 125.85321046860001, 435.0391235584, 551.3405761953001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048179_crop.jpg", "text": "Please provide details for the area marked as in this photographic . Include the coordinates for each object you identify.", "boxes_value": [[32.512695296000004, 106.85321046860001, 162.03912355839998, 532.3405761953001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048179.jpg", "text": "Please provide details for the area marked as in this photographic . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, a hat, two leather shoes, and a moniter.", "boxes_value": [[305.512695296, 125.85321046860001, 435.0391235584, 551.3405761953001], [305.512695296, 125.85321046860001, 435.0391235584, 551.3405761953001], [355.163757312, 126.25152584429999, 443.068969728, 179.2200927802], [358.2875976704, 505.7614746249, 404.2117919744, 544.1336670001], [373.2903442432, 504.45227048789997, 412.191467264, 534.9543457369], [334.6872558592, 128.9942016823, 381.8955688448, 170.1448364053]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048179_crop.jpg", "text": "Please provide details for the area marked as in this photographic . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, a hat, two leather shoes, and a moniter.", "boxes_value": [[32.512695296000004, 106.85321046860001, 162.03912355839998, 532.3405761953001], [32.512695296000004, 106.85321046860001, 162.03912355839998, 532.3405761953001], [82.16375731199997, 107.25152584429999, 170.068969728, 160.2200927802], [85.28759767039998, 486.7614746249, 131.2117919744, 525.1336670001], [100.29034424320002, 485.45227048789997, 139.19146726399998, 515.9543457369], [61.68725585919998, 109.9942016823, 108.89556884479998, 151.1448364053]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048180.jpg", "text": "I'd like some information about the bounding box in the photo . Provide the coordinates for all objects that you mention.", "boxes_value": [[113.17840573, 123.0619506688, 532.6818847608, 298.088806144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048180_crop.jpg", "text": "I'd like some information about the bounding box in the photo . Provide the coordinates for all objects that you mention.", "boxes_value": [[105.17840573, 44.061950668799994, 524.6818847608, 219.088806144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048180.jpg", "text": "I'd like some information about the bounding box in the photo . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four people, a glasses, a van, and a microphone.", "boxes_value": [[113.17840573, 123.0619506688, 532.6818847608, 298.088806144], [113.17840573, 190.2705078272, 159.823608414, 257.721496576], [148.4140014748, 148.9945678848, 182.97839355399998, 200.0022582784], [218.95361331720002, 120.1075439616, 342.1035155908, 505.0896606208], [319.3596191132, 123.990661632, 500.2014160488, 482.9005127168], [388.6175536992, 123.0619506688, 429.82727051759997, 163.561096192], [465.6231689628, 207.9946289152, 532.6818847608, 298.088806144], [274.1063232424, 193.102355968, 299.555053732, 278.2237549056]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6], [7]]}, {"image_path": "objects365_v1_00048180_crop.jpg", "text": "I'd like some information about the bounding box in the photo . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four people, a glasses, a van, and a microphone.", "boxes_value": [[105.17840573, 44.061950668799994, 524.6818847608, 219.088806144], [105.17840573, 111.27050782719999, 151.823608414, 178.721496576], [140.4140014748, 69.9945678848, 174.97839355399998, 121.0022582784], [210.95361331720002, 41.1075439616, 334.1035155908, 262], [311.3596191132, 44.990661632, 492.2014160488, 262], [380.6175536992, 44.061950668799994, 421.82727051759997, 84.56109619200001], [457.6231689628, 128.9946289152, 524.6818847608, 219.088806144], [266.1063232424, 114.10235596800001, 291.555053732, 199.2237549056]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6], [7]]}, {"image_path": "objects365_v1_00048181.jpg", "text": "Describe the bbox in the provided photo . Provide the coordinates for all objects that you mention.", "boxes_value": [[365.06933595839996, 312.6141357568, 609.4388427858, 448.0617065472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048181_crop.jpg", "text": "Describe the bbox in the provided photo . Provide the coordinates for all objects that you mention.", "boxes_value": [[62.06933595839996, 34.61413575680001, 306.43884278580003, 170.0617065472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048181.jpg", "text": "Describe the bbox in the provided photo . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four sneakers, and a camera.", "boxes_value": [[365.06933595839996, 312.6141357568, 609.4388427858, 448.0617065472], [365.06933595839996, 430.0532836864, 398.1500244468, 448.0617065472], [404.79699710679995, 312.6141357568, 429.33447265819996, 332.9876708864], [436.91882326419994, 318.7113037312, 454.3181152506, 337.1516113408], [470.0129394262, 355.600769024, 490.485595686, 373.309570304], [582.1054687664, 317.4989013504, 609.4388427858, 346.4168090624]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048181_crop.jpg", "text": "Describe the bbox in the provided photo . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four sneakers, and a camera.", "boxes_value": [[62.06933595839996, 34.61413575680001, 306.43884278580003, 170.0617065472], [62.06933595839996, 152.0532836864, 95.15002444679999, 170.0617065472], [101.79699710679995, 34.61413575680001, 126.33447265819996, 54.9876708864], [133.91882326419994, 40.71130373120002, 151.3181152506, 59.1516113408], [167.01293942619998, 77.60076902399999, 187.485595686, 95.30957030399998], [279.1054687664, 39.49890135039999, 306.43884278580003, 68.41680906239998]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048183.jpg", "text": "Please describe the section of the picture defined by the bbox . Give coordinates for the items you reference.", "boxes_value": [[4.9659423985, 171.7476196352, 204.23767092670002, 232.124389632]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048183_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Give coordinates for the items you reference.", "boxes_value": [[4.9659423985, 15.74761963520001, 204.23767092670002, 76.124389632]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048183.jpg", "text": "Please describe the section of the picture defined by the bbox . Give coordinates for the items you reference. For your reference, objects involved in this region include four people, and a trash bin can.", "boxes_value": [[4.9659423985, 171.7476196352, 204.23767092670002, 232.124389632], [192.1099853198, 171.7476196352, 204.23767092670002, 200.1373901312], [128.872863791, 171.1963501056, 165.1967773195, 269.7139892736], [125.797363284, 178.5117797888, 139.16253663540002, 209.0390624768], [48.188110370900006, 177.4486084096, 74.3109741298, 232.124389632], [4.9659423985, 205.3529052672, 28.1173706039, 232.0908203008]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048183_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Give coordinates for the items you reference. For your reference, objects involved in this region include four people, and a trash bin can.", "boxes_value": [[4.9659423985, 15.74761963520001, 204.23767092670002, 76.124389632], [192.1099853198, 15.74761963520001, 204.23767092670002, 44.13739013119999], [128.872863791, 15.196350105600004, 165.1967773195, 91], [125.797363284, 22.5117797888, 139.16253663540002, 53.03906247680001], [48.188110370900006, 21.448608409600013, 74.3109741298, 76.124389632], [4.9659423985, 49.352905267199986, 28.1173706039, 76.0908203008]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048184.jpg", "text": "Can you generate a description of the contents within the selected region in ? Provide the coordinates for each element you describe.", "boxes_value": [[547.4555113866, 106.612521472, 701.2997172942, 362.6153370112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048184_crop.jpg", "text": "Can you generate a description of the contents within the selected region in ? Provide the coordinates for each element you describe.", "boxes_value": [[39.45551138660005, 64.612521472, 193.29971729420004, 320.6153370112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048184.jpg", "text": "Can you generate a description of the contents within the selected region in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, a necklace, a hat, a glasses, and a suv.", "boxes_value": [[547.4555113866, 106.612521472, 701.2997172942, 362.6153370112], [502.7151045695, 148.3323359232, 707.5790726344001, 508.6103487488], [581.2070463947, 106.731606784, 701.2997172942, 362.6153370112], [547.4555113866, 235.458391296, 589.0562405336, 284.1233952256], [583.3321891672, 106.612521472, 676.7059436372999, 165.488583424], [528.1358810749, 182.0474758144, 593.9114815352, 200.4462451712], [497.97729493279996, 55.9591064576, 721.348632785, 180.3101806592]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048184_crop.jpg", "text": "Can you generate a description of the contents within the selected region in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, a necklace, a hat, a glasses, and a suv.", "boxes_value": [[39.45551138660005, 64.612521472, 193.29971729420004, 320.6153370112], [0, 106.33233592319999, 199.57907263440006, 384], [73.2070463947, 64.731606784, 193.29971729420004, 320.6153370112], [39.45551138660005, 193.458391296, 81.05624053359998, 242.1233952256], [75.3321891672, 64.612521472, 168.70594363729992, 123.48858342400001], [20.1358810749, 140.0474758144, 85.91148153519998, 158.4462451712], [0, 13.9591064576, 213.34863278499995, 138.3101806592]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048185.jpg", "text": "Tell me what you see within the designated area in the picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[25.285705536400002, 166.8468627968, 228.11492917419997, 373.8184814592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048185_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[25.285705536400002, 51.846862796799996, 228.11492917419997, 258.8184814592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048185.jpg", "text": "Tell me what you see within the designated area in the picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a pillow, two potted plants, a lamp, and a person.", "boxes_value": [[25.285705536400002, 166.8468627968, 228.11492917419997, 373.8184814592], [161.2838744858, 297.997070336, 228.11492917419997, 350.3480835072], [33.986694358499996, 270.7077636608, 111.3197631763, 373.8184814592], [150.4636230632, 166.8468627968, 210.2023925697, 226.5856323072], [123.4584350431, 153.753417984, 153.7369995299, 203.6721191424], [25.285705536400002, 325.4834594816, 64.2863159402, 373.5015258624]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048185_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a pillow, two potted plants, a lamp, and a person.", "boxes_value": [[25.285705536400002, 51.846862796799996, 228.11492917419997, 258.8184814592], [161.2838744858, 182.99707033599998, 228.11492917419997, 235.3480835072], [33.986694358499996, 155.70776366080003, 111.3197631763, 258.8184814592], [150.4636230632, 51.846862796799996, 210.2023925697, 111.5856323072], [123.4584350431, 38.75341798400001, 153.7369995299, 88.67211914239999], [25.285705536400002, 210.4834594816, 64.2863159402, 258.5015258624]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048186.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Provide the coordinates for each element you describe.", "boxes_value": [[208.99652096350002, 292.63488768, 393.2517089734, 479.8431396352]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048186_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Provide the coordinates for each element you describe.", "boxes_value": [[46.99652096350002, 47.63488768000002, 231.2517089734, 234.84313963519998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048186.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a sneakers, a cup, a wine glass, a desk, and two chairs.", "boxes_value": [[208.99652096350002, 292.63488768, 393.2517089734, 479.8431396352], [208.99652096350002, 347.1440429568, 239.33081053240002, 367.7813110272], [306.03100583580004, 354.9053344768, 393.2517089734, 467.5850829824], [280.5719604317, 393.0938720768, 314.9887695324, 479.8431396352], [167.8164672583, 388.350097664, 510.158569308, 512.3721923584], [330.0587158014, 281.6857910272, 397.8137207324, 390.5379638784], [232.0375976785, 292.63488768, 330.5802001663, 395.425048832]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048186_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a sneakers, a cup, a wine glass, a desk, and two chairs.", "boxes_value": [[46.99652096350002, 47.63488768000002, 231.2517089734, 234.84313963519998], [46.99652096350002, 102.14404295679998, 77.33081053240002, 122.78131102719999], [144.03100583580004, 109.90533447680002, 231.2517089734, 222.58508298240002], [118.5719604317, 148.09387207679998, 152.9887695324, 234.84313963519998], [5.816467258300008, 143.35009766399997, 277, 267], [168.0587158014, 36.685791027200025, 235.8137207324, 145.53796387839998], [70.03759767849999, 47.63488768000002, 168.58020016630002, 150.42504883200002]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048188.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Provide the coordinates for each element you describe.", "boxes_value": [[191.0233154446, 143.07769776, 322.62951661249997, 470.51269527999995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048188_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Provide the coordinates for each element you describe.", "boxes_value": [[33.0233154446, 82.07769776, 164.62951661249997, 409.51269527999995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048188.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include five apples.", "boxes_value": [[191.0233154446, 143.07769776, 322.62951661249997, 470.51269527999995], [191.0233154446, 261.21240231999997, 244.9093017461, 316.1347656], [206.567382809, 296.44555664, 259.4171142735, 350.33166504], [261.48962404729997, 279.8652344, 322.62951661249997, 330.64245608], [228.3290405225, 143.07769776, 286.3601073993, 191.78240968], [255.449584984, 425.78222655999997, 314.7860107611, 470.51269527999995]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048188_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include five apples.", "boxes_value": [[33.0233154446, 82.07769776, 164.62951661249997, 409.51269527999995], [33.0233154446, 200.21240231999997, 86.9093017461, 255.13476559999998], [48.56738280900001, 235.44555664, 101.41711427349998, 289.33166504], [103.48962404729997, 218.86523440000002, 164.62951661249997, 269.64245608], [70.32904052250001, 82.07769776, 128.36010739929998, 130.78240968], [97.44958498400001, 364.78222655999997, 156.7860107611, 409.51269527999995]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048190.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Please mention the objects and their locations.", "boxes_value": [[354.4310302752, 100.1613769728, 610.390502903, 344.8278198272]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048190_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Please mention the objects and their locations.", "boxes_value": [[64.43103027519999, 62.1613769728, 320.39050290299997, 306.8278198272]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048190.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Please mention the objects and their locations. For your reference, objects involved in this region include two cabinets, two benches, and a desk.", "boxes_value": [[354.4310302752, 100.1613769728, 610.390502903, 344.8278198272], [354.4310302752, 100.1613769728, 367.47839354990003, 198.669006336], [337.6260375917, 76.2984008704, 450.48291016260004, 187.1112060416], [204.0823974335, 191.1694336, 554.8605957026, 480.756347648], [421.27258301509994, 166.7909545984, 610.390502903, 344.8278198272], [530.81457519, 217.2604980224, 577.8675536989999, 252.749633792]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048190_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Please mention the objects and their locations. For your reference, objects involved in this region include two cabinets, two benches, and a desk.", "boxes_value": [[64.43103027519999, 62.1613769728, 320.39050290299997, 306.8278198272], [64.43103027519999, 62.1613769728, 77.47839354990003, 160.669006336], [47.626037591700026, 38.2984008704, 160.48291016260004, 149.1112060416], [0, 153.1694336, 264.8605957026, 367], [131.27258301509994, 128.7909545984, 320.39050290299997, 306.8278198272], [240.81457519000003, 179.2604980224, 287.8675536989999, 214.749633792]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048191.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for each element you describe.", "boxes_value": [[297.6092965632, 293.1744753664, 508.4450683392, 334.0058593792]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048191_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for each element you describe.", "boxes_value": [[53.60929656320002, 11.174475366399975, 264.4450683392, 52.00585937919999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048191.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a desk, two bowls, a cell phone, and a tablet.", "boxes_value": [[297.6092965632, 293.1744753664, 508.4450683392, 334.0058593792], [1.3904418816, 291.184448256, 767.5522460928, 509.507873536], [297.6092965632, 293.1744753664, 332.3010757632, 311.1050578944], [408.4548340224, 299.701965312, 450.57226560000004, 315.6774902272], [453.5142822144, 320.6770629632, 508.4450683392, 334.0058593792], [407.87316894720004, 320.4750976512, 540.3533935872, 344.9112548864]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048191_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a desk, two bowls, a cell phone, and a tablet.", "boxes_value": [[53.60929656320002, 11.174475366399975, 264.4450683392, 52.00585937919999], [0, 9.184448255999996, 317, 62], [53.60929656320002, 11.174475366399975, 88.3010757632, 29.105057894399977], [164.4548340224, 17.701965312000027, 206.57226560000004, 33.677490227199996], [209.5142822144, 38.6770629632, 264.4450683392, 52.00585937919999], [163.87316894720004, 38.475097651199974, 296.3533935872, 62]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048192.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Please mention the objects and their locations.", "boxes_value": [[625.2419433215999, 9.2903442432, 767.885376, 318.7216796672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048192_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Please mention the objects and their locations.", "boxes_value": [[36.24194332159993, 9.2903442432, 178.88537599999995, 318.7216796672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048192.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Please mention the objects and their locations. For your reference, objects involved in this region include five people.", "boxes_value": [[625.2419433215999, 9.2903442432, 767.885376, 318.7216796672], [743.0051269631999, 161.2848510976, 767.885376, 318.7216796672], [696.8033447424, 9.2903442432, 738.9412841472, 75.995056128], [697.4471435519999, 94.999694848, 718.1809082112, 127.6361083904], [728.9316406272, 99.9911499264, 753.3769531392, 140.8186645504], [625.2419433215999, 42.480896, 644.2294921728001, 74.563415552]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048192_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Please mention the objects and their locations. For your reference, objects involved in this region include five people.", "boxes_value": [[36.24194332159993, 9.2903442432, 178.88537599999995, 318.7216796672], [154.00512696319993, 161.2848510976, 178.88537599999995, 318.7216796672], [107.80334474239999, 9.2903442432, 149.9412841472, 75.995056128], [108.44714355199994, 94.999694848, 129.18090821119995, 127.6361083904], [139.93164062719995, 99.9911499264, 164.37695313920005, 140.8186645504], [36.24194332159993, 42.480896, 55.229492172800065, 74.563415552]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048194.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Specify the location of each mentioned object.", "boxes_value": [[0.3908691456, 127.6938476448, 389.9539794944, 690.6600342]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048194_crop.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Specify the location of each mentioned object.", "boxes_value": [[0.3908691456, 127.6938476448, 389.9539794944, 690.6600342]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048194.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Specify the location of each mentioned object. For your reference, objects involved in this region include two desks, two people, a watch, and a sneakers.", "boxes_value": [[0.3908691456, 127.6938476448, 389.9539794944, 690.6600342], [0.3908691456, 127.6938476448, 389.9539794944, 690.6600342], [0.33654784, 50.4539794804, 86.24218752, 197.2628174024], [230.3419799552, 0, 511.7857055744, 710.571411108], [340.0268554752, 438.8566894396, 360.8698730496, 478.2266845556], [355.5045776384, 160.61572264240002, 401.088806144, 217.8032226896], [309.5758980608, 651.1154349948, 380.9040158208, 710.5555331524]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4], [6]]}, {"image_path": "objects365_v1_00048194_crop.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Specify the location of each mentioned object. For your reference, objects involved in this region include two desks, two people, a watch, and a sneakers.", "boxes_value": [[0.3908691456, 127.6938476448, 389.9539794944, 690.6600342], [0.3908691456, 127.6938476448, 389.9539794944, 690.6600342], [0.33654784, 50.4539794804, 86.24218752, 197.2628174024], [230.3419799552, 0, 487, 710.571411108], [340.0268554752, 438.8566894396, 360.8698730496, 478.2266845556], [355.5045776384, 160.61572264240002, 401.088806144, 217.8032226896], [309.5758980608, 651.1154349948, 380.9040158208, 710.5555331524]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4], [6]]}, {"image_path": "objects365_v1_00048195.jpg", "text": "Can you share some insights about the rectangular region in the image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[517.6152344064001, 156.9534301696, 768.1901855232, 487.5919799808]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048195_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[63.61523440640008, 82.95343016960001, 314, 413.5919799808]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048195.jpg", "text": "Can you share some insights about the rectangular region in the image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, three glasses, and a cup.", "boxes_value": [[517.6152344064001, 156.9534301696, 768.1901855232, 487.5919799808], [427.114257792, 179.7280273408, 748.3067626751999, 512.5191650304], [706.2458496, 194.3856201216, 768.1901855232, 414.6318969856], [475.6019287296, 218.0650024448, 594.043945344, 253.7447510016], [517.6152344064001, 156.9534301696, 592.6530761472001, 175.2399902208], [719.3977050624001, 226.3511963136, 767.951538048, 258.5102539264], [580.9412841984, 427.1268310528, 686.8992920064, 487.5919799808]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00048195_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, three glasses, and a cup.", "boxes_value": [[63.61523440640008, 82.95343016960001, 314, 413.5919799808], [0, 105.7280273408, 294.30676267519993, 438], [252.24584960000004, 120.3856201216, 314, 340.6318969856], [21.60192872959999, 144.0650024448, 140.043945344, 179.7447510016], [63.61523440640008, 82.95343016960001, 138.65307614720007, 101.2399902208], [265.39770506240006, 152.3511963136, 313.951538048, 184.51025392640003], [126.94128419840001, 353.1268310528, 232.89929200639995, 413.5919799808]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00048196.jpg", "text": "Describe the image content present in the specified rectangular area of . Please mention the objects and their locations.", "boxes_value": [[44.4741210753, 124.6878662144, 123.966308568, 332.01544192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048196_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Please mention the objects and their locations.", "boxes_value": [[20.474121075299998, 52.6878662144, 99.966308568, 260.01544192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048196.jpg", "text": "Describe the image content present in the specified rectangular area of . Please mention the objects and their locations. For your reference, objects involved in this region include two flowers, a vase, a lamp, and two people.", "boxes_value": [[44.4741210753, 124.6878662144, 123.966308568, 332.01544192], [43.403076190200004, 287.5948486144, 66.96508791629999, 313.5665283072], [44.4741210753, 312.2277832192, 65.8941040368, 331.2380370944], [65.1105346452, 292.6473388544, 84.33233641620001, 321.6486205952], [88.4983520253, 124.6878662144, 112.3764648174, 158.7994384896], [93.2690429817, 216.0480346624, 123.966308568, 332.01544192], [66.24609377670001, 211.8217773568, 83.0714721945, 275.5432129024]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5, 6]]}, {"image_path": "objects365_v1_00048196_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Please mention the objects and their locations. For your reference, objects involved in this region include two flowers, a vase, a lamp, and two people.", "boxes_value": [[20.474121075299998, 52.6878662144, 99.966308568, 260.01544192], [19.403076190200004, 215.5948486144, 42.96508791629999, 241.56652830719997], [20.474121075299998, 240.22778321919998, 41.8941040368, 259.2380370944], [41.110534645200005, 220.6473388544, 60.33233641620001, 249.6486205952], [64.4983520253, 52.6878662144, 88.3764648174, 86.79943848959999], [69.2690429817, 144.0480346624, 99.966308568, 260.01544192], [42.24609377670001, 139.8217773568, 59.071472194500004, 203.5432129024]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5, 6]]}, {"image_path": "objects365_v1_00048197.jpg", "text": "Describe the selected rectangular area in the photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[57.984436012799996, 141.733154304, 353.1464843568, 328.8781738496]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048197_crop.jpg", "text": "Describe the selected rectangular area in the photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[57.984436012799996, 47.73315430400001, 353.1464843568, 234.8781738496]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048197.jpg", "text": "Describe the selected rectangular area in the photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four people, and two sneakers.", "boxes_value": [[57.984436012799996, 141.733154304, 353.1464843568, 328.8781738496], [195.17779539, 117.1978149376, 353.9313964644, 330.1736450048], [172.7423706132, 141.733154304, 217.8006591612, 227.273559552], [57.984436012799996, 141.733154304, 85.4418945072, 229.385681152], [247.46868899400002, 154.524353024, 264.0839233572, 201.1062622208], [195.3479614536, 255.0485229568, 213.6241455408, 288.1525878784], [323.694824208, 310.4471435776, 353.1464843568, 328.8781738496]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00048197_crop.jpg", "text": "Describe the selected rectangular area in the photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four people, and two sneakers.", "boxes_value": [[57.984436012799996, 47.73315430400001, 353.1464843568, 234.8781738496], [195.17779539, 23.1978149376, 353.9313964644, 236.1736450048], [172.7423706132, 47.73315430400001, 217.8006591612, 133.273559552], [57.984436012799996, 47.73315430400001, 85.4418945072, 135.385681152], [247.46868899400002, 60.52435302399999, 264.0839233572, 107.1062622208], [195.3479614536, 161.0485229568, 213.6241455408, 194.15258787840003], [323.694824208, 216.4471435776, 353.1464843568, 234.8781738496]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00048198.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Provide the coordinates for each element you describe.", "boxes_value": [[0.058288585999999996, 189.6683959808, 335.656066907, 511.9855346688]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048198_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Provide the coordinates for each element you describe.", "boxes_value": [[0.058288585999999996, 80.6683959808, 335.656066907, 402.9855346688]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048198.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a cymbal, a piano, a person, a tripod, and a speaker.", "boxes_value": [[0.058288585999999996, 189.6683959808, 335.656066907, 511.9855346688], [140.8678588899, 189.6683959808, 181.3238525475, 208.9015502848], [209.1449585146, 245.0657348608, 404.7919921779, 288.1080932864], [279.37280271550003, 190.507568384, 335.656066907, 420.9806518784], [61.6464233395, 320.3964233216, 110.6611328212, 427.0034179584], [0.058288585999999996, 441.7106323456, 225.2471923496, 511.9855346688]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048198_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a cymbal, a piano, a person, a tripod, and a speaker.", "boxes_value": [[0.058288585999999996, 80.6683959808, 335.656066907, 402.9855346688], [140.8678588899, 80.6683959808, 181.3238525475, 99.90155028480001], [209.1449585146, 136.0657348608, 404.7919921779, 179.1080932864], [279.37280271550003, 81.507568384, 335.656066907, 311.9806518784], [61.6464233395, 211.39642332160003, 110.6611328212, 318.0034179584], [0.058288585999999996, 332.7106323456, 225.2471923496, 402.9855346688]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048200.jpg", "text": "Could you please provide a description of the rectangular area in ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[316.559814465, 0.4106445312, 456.7728271613, 312.3180541952]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048200_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[35.55981446499999, 0.4106445312, 175.77282716129997, 312.3180541952]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048200.jpg", "text": "Could you please provide a description of the rectangular area in ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four lamps, and a person.", "boxes_value": [[316.559814465, 0.4106445312, 456.7728271613, 312.3180541952], [386.4095459045, 0.4106445312, 449.3969726734, 204.9346924032], [338.8814086667, 36.1894531072, 390.6022949429, 221.2955322368], [316.559814465, 106.9653320192, 353.03662109510003, 240.89501952], [436.1209716654, 147.6359863296, 456.7728271613, 171.7783203328], [360.53869630519995, 247.341491712, 394.1628417648, 312.3180541952]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048200_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four lamps, and a person.", "boxes_value": [[35.55981446499999, 0.4106445312, 175.77282716129997, 312.3180541952], [105.40954590450002, 0.4106445312, 168.3969726734, 204.9346924032], [57.8814086667, 36.1894531072, 109.60229494290002, 221.2955322368], [35.55981446499999, 106.9653320192, 72.03662109510003, 240.89501952], [155.12097166540002, 147.6359863296, 175.77282716129997, 171.7783203328], [79.53869630519995, 247.341491712, 113.16284176480002, 312.3180541952]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048202.jpg", "text": "What is taking place within the specified area in this capture ? Include the coordinates for each object you identify.", "boxes_value": [[0.5126953236, 34.2067871232, 138.2772826896, 273.4923705856]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048202_crop.jpg", "text": "What is taking place within the specified area in this capture ? Include the coordinates for each object you identify.", "boxes_value": [[0.5126953236, 34.2067871232, 138.2772826896, 273.4923705856]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048202.jpg", "text": "What is taking place within the specified area in this capture ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two chairs, a towel, and two people.", "boxes_value": [[0.5126953236, 34.2067871232, 138.2772826896, 273.4923705856], [0.5126953236, 139.8303222784, 84.0515136888, 273.4923705856], [29.4727783332, 138.7164917248, 111.8977660878, 266.809265152], [104.8040771298, 34.2067871232, 137.2512817572, 68.85913088], [38.9611816602, 33.1047973888, 264.8240356284, 437.8033447424], [64.585815453, 96.167541504, 138.2772826896, 200.4840088064]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048202_crop.jpg", "text": "What is taking place within the specified area in this capture ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two chairs, a towel, and two people.", "boxes_value": [[0.5126953236, 34.2067871232, 138.2772826896, 273.4923705856], [0.5126953236, 139.8303222784, 84.0515136888, 273.4923705856], [29.4727783332, 138.7164917248, 111.8977660878, 266.809265152], [104.8040771298, 34.2067871232, 137.2512817572, 68.85913088], [38.9611816602, 33.1047973888, 172, 333], [64.585815453, 96.167541504, 138.2772826896, 200.4840088064]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048204.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Provide the coordinates for all objects that you mention.", "boxes_value": [[315.88470456000005, 125.09844969599999, 528.4749756, 256.00714113600003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048204_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Provide the coordinates for all objects that you mention.", "boxes_value": [[53.884704560000046, 33.09844969599999, 266.4749756, 164.00714113600003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048204.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two beds, a lamp, and two pillows.", "boxes_value": [[315.88470456000005, 125.09844969599999, 528.4749756, 256.00714113600003], [29.212341300000002, 89.216613792, 477.6352539, 479.235656736], [315.88470456000005, 125.09844969599999, 367.17950442, 256.00714113600003], [360.15173339999996, 95.58776856, 599.6080322400001, 471.79901121599994], [393.60284424, 133.219055184, 528.4749756, 220.354919424], [401.47235106000005, 133.153869648, 490.00256346, 206.395446768]], "boxes_seq": [[0], [0], [1, 3], [2], [4, 5]]}, {"image_path": "objects365_v1_00048204_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two beds, a lamp, and two pillows.", "boxes_value": [[53.884704560000046, 33.09844969599999, 266.4749756, 164.00714113600003], [0, 0, 215.6352539, 196], [53.884704560000046, 33.09844969599999, 105.17950442, 164.00714113600003], [98.15173339999996, 3.5877685600000007, 319, 196], [131.60284424000002, 41.21905518400001, 266.4749756, 128.354919424], [139.47235106000005, 41.15386964800001, 228.00256345999998, 114.395446768]], "boxes_seq": [[0], [0], [1, 3], [2], [4, 5]]}, {"image_path": "objects365_v1_00048205.jpg", "text": "What can you tell me about the area within the image ? Specify the location of each mentioned object.", "boxes_value": [[26.187927226800003, 203.3019409408, 231.93316650039998, 282.0677490176]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048205_crop.jpg", "text": "What can you tell me about the area within the image ? Specify the location of each mentioned object.", "boxes_value": [[26.187927226800003, 20.301940940799994, 231.93316650039998, 99.06774901760002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048205.jpg", "text": "What can you tell me about the area within the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include a chair, two people, two blackboards, and a pot.", "boxes_value": [[26.187927226800003, 203.3019409408, 231.93316650039998, 282.0677490176], [158.32006838380002, 213.9095458816, 187.93566894880001, 281.2849731584], [181.4311523256, 203.3019409408, 200.6430663781, 256.2344360448], [142.5897216969, 211.6549682688, 160.5486449977, 232.1198119936], [26.187927226800003, 205.2677612544, 91.787902828, 282.0677490176], [203.5119628684, 212.981323264, 231.93316650039998, 243.8457641472], [22.968811012099998, 223.7865600512, 50.2474975715, 237.7071533056]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048205_crop.jpg", "text": "What can you tell me about the area within the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include a chair, two people, two blackboards, and a pot.", "boxes_value": [[26.187927226800003, 20.301940940799994, 231.93316650039998, 99.06774901760002], [158.32006838380002, 30.909545881599996, 187.93566894880001, 98.28497315840002], [181.4311523256, 20.301940940799994, 200.6430663781, 73.23443604480002], [142.5897216969, 28.65496826879999, 160.5486449977, 49.119811993599996], [26.187927226800003, 22.2677612544, 91.787902828, 99.06774901760002], [203.5119628684, 29.981323263999997, 231.93316650039998, 60.845764147199986], [22.968811012099998, 40.78656005120001, 50.2474975715, 54.7071533056]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048206.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Remember to mention the objects and their corresponding locations.", "boxes_value": [[35.2957763486, 0, 266.1106567723, 239.726013184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048206_crop.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Remember to mention the objects and their corresponding locations.", "boxes_value": [[35.2957763486, 0, 266.1106567723, 239.726013184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048206.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three cabinets, a lamp, a gas stove, a microwave, a kettle, and an extractor.", "boxes_value": [[35.2957763486, 0, 266.1106567723, 239.726013184], [35.2957763486, 43.2280883712, 149.2487792958, 120.2326660096], [137.5081786798, 48.4077148672, 217.5465697908, 173.756042496], [217.5465697908, 27.6890258944, 255.9502563624, 174.1013793792], [160.7044067676, 0, 191.731567394, 10.92724608], [45.8171386386, 219.6964721664, 166.9379272269, 263.6032104448], [152.63903809890002, 193.159667968, 230.96508790099998, 235.1068115456], [238.0061645432, 202.8420410368, 266.1106567723, 239.726013184], [39.047729481, 119.6594238464, 154.866332995, 141.9322509824]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6], [7], [8]]}, {"image_path": "objects365_v1_00048206_crop.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three cabinets, a lamp, a gas stove, a microwave, a kettle, and an extractor.", "boxes_value": [[35.2957763486, 0, 266.1106567723, 239.726013184], [35.2957763486, 43.2280883712, 149.2487792958, 120.2326660096], [137.5081786798, 48.4077148672, 217.5465697908, 173.756042496], [217.5465697908, 27.6890258944, 255.9502563624, 174.1013793792], [160.7044067676, 0, 191.731567394, 10.92724608], [45.8171386386, 219.6964721664, 166.9379272269, 263.6032104448], [152.63903809890002, 193.159667968, 230.96508790099998, 235.1068115456], [238.0061645432, 202.8420410368, 266.1106567723, 239.726013184], [39.047729481, 119.6594238464, 154.866332995, 141.9322509824]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6], [7], [8]]}, {"image_path": "objects365_v1_00048208.jpg", "text": "Please tell me about the area in the image . What does it contain? Give coordinates for the items you reference.", "boxes_value": [[253.6112060416, 219.94342039309998, 503.652526848, 389.2451171856]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048208_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Give coordinates for the items you reference.", "boxes_value": [[62.61120604160001, 42.943420393099984, 312.652526848, 212.2451171856]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048208.jpg", "text": "Please tell me about the area in the image . What does it contain? Give coordinates for the items you reference. For your reference, objects involved in this region include a desk, a person, a book, a helmet, a cup, and a bread.", "boxes_value": [[253.6112060416, 219.94342039309998, 503.652526848, 389.2451171856], [350.2367553536, 267.7684936567, 511.832031232, 526.0117187802], [127.0446777344, 93.8226318162, 458.7718505984, 683.2139892273999], [335.1609497088, 351.8613281502, 384.0843505664, 373.2252197519], [439.967346176, 219.94342039309998, 503.652526848, 288.58190918270003], [465.907104512, 268.8758544654, 502.7043457024, 335.2651977546], [253.6112060416, 356.82189943670005, 289.2406616064, 389.2451171856]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048208_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Give coordinates for the items you reference. For your reference, objects involved in this region include a desk, a person, a book, a helmet, a cup, and a bread.", "boxes_value": [[62.61120604160001, 42.943420393099984, 312.652526848, 212.2451171856], [159.23675535360002, 90.7684936567, 320.832031232, 254], [0, 0, 267.7718505984, 254], [144.1609497088, 174.8613281502, 193.08435056640002, 196.2252197519], [248.96734617599998, 42.943420393099984, 312.652526848, 111.58190918270003], [274.907104512, 91.87585446539998, 311.7043457024, 158.26519775460002], [62.61120604160001, 179.82189943670005, 98.24066160640001, 212.2451171856]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048209.jpg", "text": "Offer a thorough description of the area within the illustration . Please mention the objects and their locations.", "boxes_value": [[372.0219726336, 269.9232787968, 749.4564208895999, 340.1234741248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048209_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Please mention the objects and their locations.", "boxes_value": [[95.0219726336, 17.92327879679999, 472.45642088959994, 88.1234741248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048209.jpg", "text": "Offer a thorough description of the area within the illustration . Please mention the objects and their locations. For your reference, objects involved in this region include three cabinets, and two people.", "boxes_value": [[372.0219726336, 269.9232787968, 749.4564208895999, 340.1234741248], [420.9095459328, 300.1798706176, 451.027832064, 323.7507324416], [372.0219726336, 298.8704223744, 411.30664058879995, 323.7507324416], [617.1013183488, 300.8387451392, 670.9774169856, 340.1234741248], [574.804687488, 269.9232787968, 589.6560058368, 293.586608896], [718.9829101824, 270.9975585792, 749.4564208895999, 303.375610368]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048209_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Please mention the objects and their locations. For your reference, objects involved in this region include three cabinets, and two people.", "boxes_value": [[95.0219726336, 17.92327879679999, 472.45642088959994, 88.1234741248], [143.90954593279997, 48.17987061759999, 174.027832064, 71.75073244160001], [95.0219726336, 46.87042237439999, 134.30664058879995, 71.75073244160001], [340.10131834879996, 48.8387451392, 393.97741698560003, 88.1234741248], [297.804687488, 17.92327879679999, 312.6560058368, 41.58660889599997], [441.9829101824, 18.997558579200017, 472.45642088959994, 51.375610368000025]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048210.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Include the coordinates for each object you identify.", "boxes_value": [[123.4320678864, 272.0530395648, 240.77929687559998, 334.7576904192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048210_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Include the coordinates for each object you identify.", "boxes_value": [[29.432067886400006, 16.053039564799974, 146.77929687559998, 78.7576904192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048210.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include four chairs, and a desk.", "boxes_value": [[123.4320678864, 272.0530395648, 240.77929687559998, 334.7576904192], [184.34515381920002, 281.906677248, 228.238342308, 333.4140014592], [123.4320678864, 283.2503051776, 175.83520509719997, 334.7576904192], [220.1763305484, 272.0530395648, 240.77929687559998, 319.5294189568], [144.48291016320002, 274.2925414912, 176.73101807759997, 322.2167968768], [160.606933614, 281.458740224, 213.01007082479998, 328.935119616]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048210_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include four chairs, and a desk.", "boxes_value": [[29.432067886400006, 16.053039564799974, 146.77929687559998, 78.7576904192], [90.34515381920002, 25.906677247999994, 134.238342308, 77.4140014592], [29.432067886400006, 27.250305177600012, 81.83520509719997, 78.7576904192], [126.17633054839999, 16.053039564799974, 146.77929687559998, 63.52941895679999], [50.48291016320002, 18.292541491199984, 82.73101807759997, 66.2167968768], [66.60693361400001, 25.458740223999996, 119.01007082479998, 72.93511961600001]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048211.jpg", "text": "I need details about the area located within image . Include the coordinates for each object you identify.", "boxes_value": [[0, 298.7254028288, 157.6382446106, 413.5147704832]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048211_crop.jpg", "text": "I need details about the area located within image . Include the coordinates for each object you identify.", "boxes_value": [[0, 28.725402828799986, 157.6382446106, 143.51477048319998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048211.jpg", "text": "I need details about the area located within image . Include the coordinates for each object you identify. For your reference, objects involved in this region include two pillows, a couch, a cabinet, a vase, and two handbags.", "boxes_value": [[0, 298.7254028288, 157.6382446106, 413.5147704832], [122.8680420136, 369.654846208, 180.9119873262, 400.842651392], [57.893371603599995, 373.986450176, 126.33331297859999, 445.8917236224], [6.780029276800001, 358.3925781504, 290.9357299628, 506.5346679808], [0, 298.7254028288, 69.4062499936, 349.7439575552], [133.43774415919998, 315.5444946432, 157.6382446106, 348.0900268544], [0, 349.18255616, 23.301147441, 413.5147704832], [2.624238981, 348.2944946176, 41.8284302046, 408.6870866432]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00048211_crop.jpg", "text": "I need details about the area located within image . Include the coordinates for each object you identify. For your reference, objects involved in this region include two pillows, a couch, a cabinet, a vase, and two handbags.", "boxes_value": [[0, 28.725402828799986, 157.6382446106, 143.51477048319998], [122.8680420136, 99.65484620799998, 180.9119873262, 130.842651392], [57.893371603599995, 103.986450176, 126.33331297859999, 172], [6.780029276800001, 88.39257815040003, 197, 172], [0, 28.725402828799986, 69.4062499936, 79.74395755519998], [133.43774415919998, 45.54449464319998, 157.6382446106, 78.09002685439998], [0, 79.18255615999999, 23.301147441, 143.51477048319998], [2.624238981, 78.29449461759998, 41.8284302046, 138.68708664320002]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00048212.jpg", "text": "Please provide information about the area within the bounding box in the picture . Provide the coordinates for all objects that you mention.", "boxes_value": [[312.7596435456, 199.7302245888, 474.598144512, 487.3469848576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048212_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Provide the coordinates for all objects that you mention.", "boxes_value": [[40.7596435456, 72.73022458880001, 202.59814451199998, 360.3469848576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048212.jpg", "text": "Please provide information about the area within the bounding box in the picture . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a gloves, two sneakers, and two helmets.", "boxes_value": [[312.7596435456, 199.7302245888, 474.598144512, 487.3469848576], [331.724975616, 199.7302245888, 352.2041015808, 221.7260742144], [436.18627929599995, 232.2288818176, 474.598144512, 279.250366208], [312.7596435456, 368.872131328, 333.500366208, 390.7044677632], [444.4628906496, 345.7119751168, 462.60925294079993, 364.4566650368], [371.70239255039996, 467.6000976384, 387.5410156032, 487.3469848576]], "boxes_seq": [[0], [0], [1], [2, 5], [3, 4]]}, {"image_path": "objects365_v1_00048212_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a gloves, two sneakers, and two helmets.", "boxes_value": [[40.7596435456, 72.73022458880001, 202.59814451199998, 360.3469848576], [59.724975615999995, 72.73022458880001, 80.2041015808, 94.7260742144], [164.18627929599995, 105.22888181760001, 202.59814451199998, 152.250366208], [40.7596435456, 241.87213132800002, 61.500366208, 263.7044677632], [172.4628906496, 218.71197511679998, 190.60925294079993, 237.45666503680002], [99.70239255039996, 340.6000976384, 115.54101560319998, 360.3469848576]], "boxes_seq": [[0], [0], [1], [2, 5], [3, 4]]}, {"image_path": "objects365_v1_00048213.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[621.4249267336, 349.993286144, 670.7265624662999, 437.850097664]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048213_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[12.424926733600046, 21.993286144000024, 61.726562466299924, 109.85009766399997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048213.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a cabinet, and five bottles.", "boxes_value": [[621.4249267336, 349.993286144, 670.7265624662999, 437.850097664], [319.4506836117, 398.5957641728, 681.1564941417, 512.9702148608], [645.9410400273999, 368.8817138688, 682.0416259427, 459.6721191424], [621.4249267336, 402.288269056, 638.9364013669, 437.850097664], [635.4342041229, 365.648864768, 655.9091796938001, 429.22906496], [631.1236572549001, 349.993286144, 670.7265624662999, 389.326782208], [608.5137939559, 363.6099243008, 635.5316161782999, 423.8802490368]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048213_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a cabinet, and five bottles.", "boxes_value": [[12.424926733600046, 21.993286144000024, 61.726562466299924, 109.85009766399997], [0, 70.59576417279999, 72.15649414170002, 131], [36.941040027399936, 40.88171386879998, 73.04162594269997, 131], [12.424926733600046, 74.28826905599999, 29.936401366899986, 109.85009766399997], [26.43420412290004, 37.64886476800001, 46.90917969380007, 101.22906496000002], [22.12365725490008, 21.993286144000024, 61.726562466299924, 61.326782208], [0, 35.6099243008, 26.53161617829994, 95.88024903680002]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048218.jpg", "text": "What details can you provide about the region in the snapshot ? Provide the coordinates for each element you describe.", "boxes_value": [[584.8544921708001, 241.141296384, 766.3286133106, 511.9787597824]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048218_crop.jpg", "text": "What details can you provide about the region in the snapshot ? Provide the coordinates for each element you describe.", "boxes_value": [[45.854492170800086, 68.14129638399999, 227, 338.9787597824]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048218.jpg", "text": "What details can you provide about the region in the snapshot ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two power outlets, a kettle, a gas stove, an oven, and a toaster.", "boxes_value": [[584.8544921708001, 241.141296384, 766.3286133106, 511.9787597824], [733.4560546821999, 241.141296384, 764.9471435708, 278.7274169856], [685.8587646118, 224.8963622912, 725.0411377064, 261.0934448128], [600.0156249964, 239.2155761664, 653.5444336156, 323.5305175552], [600.2082519356001, 335.574951168, 766.3286133106, 507.481628416], [584.8544921708001, 396.9719238144, 726.3458252148, 511.9787597824], [591.8294677926, 225.4348754944, 648.7911376604, 264.0874633728]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048218_crop.jpg", "text": "What details can you provide about the region in the snapshot ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two power outlets, a kettle, a gas stove, an oven, and a toaster.", "boxes_value": [[45.854492170800086, 68.14129638399999, 227, 338.9787597824], [194.45605468219992, 68.14129638399999, 225.9471435708, 105.72741698559997], [146.85876461179998, 51.896362291200006, 186.04113770640004, 88.09344481279999], [61.01562499639999, 66.21557616640001, 114.54443361560004, 150.5305175552], [61.20825193560006, 162.57495116799998, 227, 334.481628416], [45.854492170800086, 223.9719238144, 187.34582521480002, 338.9787597824], [52.829467792599985, 52.43487549439999, 109.79113766039995, 91.08746337280002]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048219.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Specify the location of each mentioned object.", "boxes_value": [[47.093688949999994, 250.75360104499998, 174.06939695, 506.68164063620003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048219_crop.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Specify the location of each mentioned object.", "boxes_value": [[32.093688949999994, 64.75360104499998, 159.06939695, 320.68164063620003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048219.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Specify the location of each mentioned object. For your reference, objects involved in this region include a chair, two people, and two moniters.", "boxes_value": [[47.093688949999994, 250.75360104499998, 174.06939695, 506.68164063620003], [0, 341.2313232304, 224.753479, 670.6330566468], [0.13861085, 346.151733409, 157.92504885, 745.3505859628], [47.093688949999994, 282.3117675808, 141.65960694999998, 344.3470458678], [119.3017578, 250.75360104499998, 174.06939695, 306.7382812422], [86.3463135, 390.28051759420003, 138.94238280000002, 506.68164063620003]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048219_crop.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Specify the location of each mentioned object. For your reference, objects involved in this region include a chair, two people, and two moniters.", "boxes_value": [[32.093688949999994, 64.75360104499998, 159.06939695, 320.68164063620003], [0, 155.2313232304, 190, 384], [0, 160.15173340899997, 142.92504885, 384], [32.093688949999994, 96.3117675808, 126.65960694999998, 158.34704586779998], [104.3017578, 64.75360104499998, 159.06939695, 120.7382812422], [71.3463135, 204.28051759420003, 123.94238280000002, 320.68164063620003]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048220.jpg", "text": "Describe what's happening within the coordinates of the given image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[245.1882324008, 258.1770629632, 334.4769287236, 452.984985344]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048220_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[23.18823240079999, 49.1770629632, 112.4769287236, 243.984985344]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048220.jpg", "text": "Describe what's happening within the coordinates of the given image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a picture, a bench, two people, and a boots.", "boxes_value": [[245.1882324008, 258.1770629632, 334.4769287236, 452.984985344], [27.1116332864, 20.4028320256, 654.968139618, 490.0273437696], [121.6636352568, 303.9578247168, 334.66827390360004, 449.3536376832], [260.3739013356, 258.1770629632, 334.4769287236, 398.1494140416], [111.8098754916, 151.1393432576, 314.7877197348, 454.4554443264], [245.1882324008, 410.8023681536, 310.759277348, 452.984985344]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048220_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a picture, a bench, two people, and a boots.", "boxes_value": [[23.18823240079999, 49.1770629632, 112.4769287236, 243.984985344], [0, 0, 134, 281.0273437696], [0, 94.95782471680002, 112.66827390360004, 240.35363768320002], [38.37390133560001, 49.1770629632, 112.4769287236, 189.14941404159998], [0, 0, 92.78771973480002, 245.45544432640003], [23.18823240079999, 201.8023681536, 88.75927734800001, 243.984985344]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048221.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Please mention the objects and their locations.", "boxes_value": [[7.9470214506, 137.6432495104, 154.62652588150002, 457.5346679808]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048221_crop.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Please mention the objects and their locations.", "boxes_value": [[7.9470214506, 80.64324951040001, 154.62652588150002, 400.5346679808]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048221.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Please mention the objects and their locations. For your reference, objects involved in this region include a desk, a person, a laptop, a tripod, and a cell phone.", "boxes_value": [[7.9470214506, 137.6432495104, 154.62652588150002, 457.5346679808], [0, 237.3114013696, 62.63067626830001, 397.4503173632], [58.5996704214, 137.6432495104, 154.62652588150002, 396.9157714944], [7.9470214506, 221.0430908416, 55.839111332, 244.9891357184], [124.57464600509999, 230.1993408, 146.7891845506, 284.6333618176], [83.472656225, 447.3767699968, 119.8189086745, 457.5346679808]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048221_crop.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Please mention the objects and their locations. For your reference, objects involved in this region include a desk, a person, a laptop, a tripod, and a cell phone.", "boxes_value": [[7.9470214506, 80.64324951040001, 154.62652588150002, 400.5346679808], [0, 180.3114013696, 62.63067626830001, 340.4503173632], [58.5996704214, 80.64324951040001, 154.62652588150002, 339.9157714944], [7.9470214506, 164.0430908416, 55.839111332, 187.9891357184], [124.57464600509999, 173.1993408, 146.7891845506, 227.63336181760002], [83.472656225, 390.3767699968, 119.8189086745, 400.5346679808]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048222.jpg", "text": "Please help me understand the content present within the rectangle in . Include the coordinates for each object you identify.", "boxes_value": [[0, 145.006408704, 162.2004394494, 484.3435668992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048222_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Include the coordinates for each object you identify.", "boxes_value": [[0, 85.006408704, 162.2004394494, 424.3435668992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048222.jpg", "text": "Please help me understand the content present within the rectangle in . Include the coordinates for each object you identify. For your reference, objects involved in this region include a lamp, a desk, a picture, a person, and a telephone.", "boxes_value": [[0, 145.006408704, 162.2004394494, 484.3435668992], [63.752685513100005, 171.013854976, 154.47686768079998, 333.5285033984], [0, 317.3208617984, 162.2004394494, 484.3435668992], [34.369628895, 271.428100608, 85.870727547, 327.1876831232], [49.6987304984, 281.3436279296, 72.2279052702, 319.3094482432], [20.2376098489, 145.006408704, 53.6022338861, 187.3885497856]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048222_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Include the coordinates for each object you identify. For your reference, objects involved in this region include a lamp, a desk, a picture, a person, and a telephone.", "boxes_value": [[0, 85.006408704, 162.2004394494, 424.3435668992], [63.752685513100005, 111.013854976, 154.47686768079998, 273.5285033984], [0, 257.3208617984, 162.2004394494, 424.3435668992], [34.369628895, 211.42810060800002, 85.870727547, 267.1876831232], [49.6987304984, 221.34362792960002, 72.2279052702, 259.3094482432], [20.2376098489, 85.006408704, 53.6022338861, 127.38854978559999]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048225.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Remember to mention the objects and their corresponding locations.", "boxes_value": [[157.74572753680002, 148.3266601472, 244.61151122700002, 325.4564819456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048225_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Remember to mention the objects and their corresponding locations.", "boxes_value": [[21.74572753680002, 44.32666014719999, 108.61151122700002, 221.4564819456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048225.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a chair, a person, a hat, a street lights, and a machinery vehicle.", "boxes_value": [[157.74572753680002, 148.3266601472, 244.61151122700002, 325.4564819456], [157.74572753680002, 251.792663552, 202.8251953038, 293.8668212736], [179.30145266550002, 214.3786620928, 244.61151122700002, 325.4564819456], [185.2518920955, 214.9367675904, 219.4539184594, 233.1005859328], [175.6311034852, 148.3266601472, 192.3562011509, 236.6134033408], [25.088684078, 212.9266967552, 464.5964355494, 388.729797376]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048225_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a chair, a person, a hat, a street lights, and a machinery vehicle.", "boxes_value": [[21.74572753680002, 44.32666014719999, 108.61151122700002, 221.4564819456], [21.74572753680002, 147.792663552, 66.8251953038, 189.8668212736], [43.30145266550002, 110.3786620928, 108.61151122700002, 221.4564819456], [49.25189209550001, 110.9367675904, 83.4539184594, 129.1005859328], [39.631103485199986, 44.32666014719999, 56.356201150900006, 132.6134033408], [0, 108.9266967552, 130, 265]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048226.jpg", "text": "In the submitted image , please give a synopsis of the area . Include the coordinates for each object you identify.", "boxes_value": [[280.80908204950003, 187.705017088, 487.6513671926, 271.1642456064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048226_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Include the coordinates for each object you identify.", "boxes_value": [[51.809082049500034, 21.705017088000005, 258.6513671926, 105.16424560640002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048226.jpg", "text": "In the submitted image , please give a synopsis of the area . Include the coordinates for each object you identify. For your reference, objects involved in this region include a chair, a desk, a person, a suv, and a car.", "boxes_value": [[280.80908204950003, 187.705017088, 487.6513671926, 271.1642456064], [280.80908204950003, 218.4359130624, 302.0266113273, 246.3677978624], [309.6408691508, 221.2001342976, 348.1185302913, 247.6411743232], [432.08020016710003, 187.705017088, 471.39575197930003, 271.1642456064], [383.6207275443, 204.9848632832, 439.1483154133, 248.4703369216], [459.2185058434, 219.3685302784, 487.6513671926, 241.7802734592]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048226_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Include the coordinates for each object you identify. For your reference, objects involved in this region include a chair, a desk, a person, a suv, and a car.", "boxes_value": [[51.809082049500034, 21.705017088000005, 258.6513671926, 105.16424560640002], [51.809082049500034, 52.435913062400004, 73.02661132729997, 80.36779786240001], [80.64086915079997, 55.20013429759999, 119.11853029129998, 81.6411743232], [203.08020016710003, 21.705017088000005, 242.39575197930003, 105.16424560640002], [154.6207275443, 38.98486328320001, 210.1483154133, 82.4703369216], [230.2185058434, 53.36853027839999, 258.6513671926, 75.7802734592]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048228.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each mentioned object.", "boxes_value": [[0, 168.988159154, 99.2501831168, 611.1640624877]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048228_crop.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each mentioned object.", "boxes_value": [[0, 110.98815915399999, 99.2501831168, 553.1640624877]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048228.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each mentioned object. For your reference, objects involved in this region include four people, and a hat.", "boxes_value": [[0, 168.988159154, 99.2501831168, 611.1640624877], [0.091430656, 239.2958373808, 251.3675536896, 682.9377441402], [0.1201782272, 315.758178731, 69.4050903552, 611.1640624877], [0.0196533248, 169.3708495863, 99.2501831168, 407.02001952319995], [51.6527710208, 202.2894287259, 160.2695312384, 318.82611084670003], [0, 168.988159154, 49.652954112, 210.0434570354]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048228_crop.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each mentioned object. For your reference, objects involved in this region include four people, and a hat.", "boxes_value": [[0, 110.98815915399999, 99.2501831168, 553.1640624877], [0.091430656, 181.2958373808, 124, 624.9377441402], [0.1201782272, 257.758178731, 69.4050903552, 553.1640624877], [0.0196533248, 111.37084958630001, 99.2501831168, 349.02001952319995], [51.6527710208, 144.2894287259, 124, 260.82611084670003], [0, 110.98815915399999, 49.652954112, 152.0434570354]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048230.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Provide the coordinates for all objects that you mention.", "boxes_value": [[335.0624999936, 255.11578368, 456.6651611654, 344.1579589632]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048230_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Provide the coordinates for all objects that you mention.", "boxes_value": [[31.0624999936, 23.115783679999993, 152.66516116539998, 112.15795896319997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048230.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a lamp, a desk, two pillows, and a bed.", "boxes_value": [[335.0624999936, 255.11578368, 456.6651611654, 344.1579589632], [351.93640138309996, 255.11578368, 375.3001709354, 311.7081909248], [335.0624999936, 309.8909912064, 384.1265869297, 344.1579589632], [400.2915039357, 281.2415161344, 459.2255859715, 306.7987060736], [406.6407470411, 271.3250732544, 456.6651611654, 285.0572509696], [359.0968017126, 271.0888671744, 555.5275879192, 390.938476544]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048230_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a lamp, a desk, two pillows, and a bed.", "boxes_value": [[31.0624999936, 23.115783679999993, 152.66516116539998, 112.15795896319997], [47.93640138309996, 23.115783679999993, 71.30017093539999, 79.70819092480002], [31.0624999936, 77.89099120639997, 80.12658692970001, 112.15795896319997], [96.2915039357, 49.2415161344, 155.22558597149998, 74.79870607359999], [102.64074704109998, 39.325073254400024, 152.66516116539998, 53.05725096959998], [55.096801712599984, 39.08886717439998, 183, 134]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048231.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Include the coordinates for each mentioned object.", "boxes_value": [[112.48059081980001, 399.8638305792, 223.61804195219997, 465.7933960192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048231_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Include the coordinates for each mentioned object.", "boxes_value": [[28.480590819800014, 16.8638305792, 139.61804195219997, 82.79339601919997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048231.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, and three sneakers.", "boxes_value": [[112.48059081980001, 399.8638305792, 223.61804195219997, 465.7933960192], [69.12182615100001, 161.509033216, 198.78247067139995, 465.9584961024], [152.5369872768, 210.8375244288, 312.8547363578, 446.6895141376], [191.90600588040002, 414.8796386816, 223.61804195219997, 446.8825683456], [112.48059081980001, 442.5185546752, 151.46594240820002, 465.7933960192], [114.75164796479999, 399.8638305792, 145.8521118348, 427.7579956224]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048231_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, and three sneakers.", "boxes_value": [[28.480590819800014, 16.8638305792, 139.61804195219997, 82.79339601919997], [0, 0, 114.78247067139995, 82.95849610239998], [68.5369872768, 0, 167, 63.689514137599986], [107.90600588040002, 31.8796386816, 139.61804195219997, 63.88256834560002], [28.480590819800014, 59.518554675199994, 67.46594240820002, 82.79339601919997], [30.751647964799986, 16.8638305792, 61.85211183480001, 44.75799562240002]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048232.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Include the coordinates for each object you identify.", "boxes_value": [[275.3072509515, 105.572021504, 599.9631347649, 312.7190551552]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048232_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Include the coordinates for each object you identify.", "boxes_value": [[81.30725095150001, 52.572021504000006, 405.9631347649, 259.7190551552]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048232.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, a necklace, a book, a hat, and a tablet.", "boxes_value": [[275.3072509515, 105.572021504, 599.9631347649, 312.7190551552], [478.8070068081, 106.2276000768, 630.3048095547, 510.339904768], [215.37493894829998, 140.7826537984, 348.8453368869, 367.0037231616], [275.3072509515, 233.6983642624, 312.6019287204, 292.9085083136], [288.629272443, 282.044921856, 348.2247314622, 312.7190551552], [539.2930907949, 105.572021504, 599.9631347649, 148.1474609152], [437.81140136159996, 219.4075927552, 509.6400146862, 260.8986816512]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048232_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, a necklace, a book, a hat, and a tablet.", "boxes_value": [[81.30725095150001, 52.572021504000006, 405.9631347649, 259.7190551552], [284.8070068081, 53.2276000768, 436.3048095547, 311], [21.374938948299985, 87.78265379839999, 154.8453368869, 311], [81.30725095150001, 180.6983642624, 118.6019287204, 239.9085083136], [94.62927244299999, 229.04492185599997, 154.2247314622, 259.7190551552], [345.29309079489997, 52.572021504000006, 405.9631347649, 95.14746091520001], [243.81140136159996, 166.4075927552, 315.6400146862, 207.89868165119998]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048242.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each object you identify.", "boxes_value": [[131.169590912, 222.023824464, 413.40222169599997, 433.96801756800005]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048242_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each object you identify.", "boxes_value": [[71.16959091199999, 53.023824464, 353.40222169599997, 264.96801756800005]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048242.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each object you identify. For your reference, objects involved in this region include four cabinets, a gas stove, and an oven.", "boxes_value": [[131.169590912, 222.023824464, 413.40222169599997, 433.96801756800005], [254.490966784, 226.696594224, 336.80230713599997, 326.47802736], [335.458435072, 226.36059571200002, 413.40222169599997, 325.80609129600003], [144.003295872, 282.35198976, 212.25213625599997, 477.16345214399996], [204.47692870400002, 259.89038087999995, 229.098327616, 433.96801756800005], [131.169590912, 222.023824464, 255.704602432, 290.697452208], [231.169616704, 241.81672185600002, 261.63997580800003, 391.704884832]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048242_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each object you identify. For your reference, objects involved in this region include four cabinets, a gas stove, and an oven.", "boxes_value": [[71.16959091199999, 53.023824464, 353.40222169599997, 264.96801756800005], [194.490966784, 57.696594223999995, 276.80230713599997, 157.47802736], [275.458435072, 57.36059571200002, 353.40222169599997, 156.80609129600003], [84.003295872, 113.35198975999998, 152.25213625599997, 308.16345214399996], [144.47692870400002, 90.89038087999995, 169.098327616, 264.96801756800005], [71.16959091199999, 53.023824464, 195.704602432, 121.69745220800002], [171.169616704, 72.81672185600002, 201.63997580800003, 222.704884832]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048243.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Include the coordinates for each object you identify.", "boxes_value": [[251.592895488, 351.2460937408, 448.4161987072, 518.2014160056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048243_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Include the coordinates for each object you identify.", "boxes_value": [[49.59289548800001, 42.246093740800006, 246.4161987072, 209.20141600559998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048243.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include six plates.", "boxes_value": [[251.592895488, 351.2460937408, 448.4161987072, 518.2014160056], [251.592895488, 437.0651245158, 448.4161987072, 518.2014160056], [360.085449216, 390.7994995095, 511.6328124928, 451.014343283], [255.778930688, 399.63250733440003, 341.1666870272, 431.6528930631], [306.2999878144, 371.1699218859, 361.8020630016, 400.3440551627], [380.3027343872, 360.4963989248, 504.1149902336, 397.49780271080004], [260.048278784, 351.2460937408, 369.6292724736, 374.0161743378]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048243_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include six plates.", "boxes_value": [[49.59289548800001, 42.246093740800006, 246.4161987072, 209.20141600559998], [49.59289548800001, 128.0651245158, 246.4161987072, 209.20141600559998], [158.08544921599997, 81.79949950949998, 295, 142.01434328300002], [53.778930688, 90.63250733440003, 139.1666870272, 122.65289306310001], [104.29998781440003, 62.16992188590001, 159.8020630016, 91.34405516269999], [178.30273438720002, 51.49639892480002, 295, 88.49780271080004], [58.04827878399999, 42.246093740800006, 167.6292724736, 65.01617433780001]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048244.jpg", "text": "What's going on in the section of contained within the bounding box ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[44.8530883584, 107.778503424, 256.04443361280005, 175.5682983424]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048244_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[44.8530883584, 17.778503423999993, 256.04443361280005, 85.5682983424]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048244.jpg", "text": "What's going on in the section of contained within the bounding box ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two desks, and three chairs.", "boxes_value": [[44.8530883584, 107.778503424, 256.04443361280005, 175.5682983424], [44.8530883584, 129.9405517824, 134.1531372288, 168.7241821184], [33.7720947456, 124.7259521536, 54.956359833600004, 166.4427490304], [160.87799070719998, 123.4223022592, 186.299194368, 171.3314819584], [149.4710693376, 112.0153808384, 204.87622072320002, 141.6734008832], [198.6838378752, 107.778503424, 256.04443361280005, 175.5682983424]], "boxes_seq": [[0], [0], [1, 4], [2, 3, 5]]}, {"image_path": "objects365_v1_00048244_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two desks, and three chairs.", "boxes_value": [[44.8530883584, 17.778503423999993, 256.04443361280005, 85.5682983424], [44.8530883584, 39.94055178240001, 134.1531372288, 78.7241821184], [33.7720947456, 34.725952153600005, 54.956359833600004, 76.4427490304], [160.87799070719998, 33.422302259199995, 186.299194368, 81.33148195839999], [149.4710693376, 22.015380838400006, 204.87622072320002, 51.6734008832], [198.6838378752, 17.778503423999993, 256.04443361280005, 85.5682983424]], "boxes_seq": [[0], [0], [1, 4], [2, 3, 5]]}, {"image_path": "objects365_v1_00048246.jpg", "text": "Please describe the area in the image for me. Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[34.4505615284, 210.4911499264, 771.3666992184, 352.3571167232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048246_crop.jpg", "text": "Please describe the area in the image for me. Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[34.4505615284, 35.4911499264, 771.3666992184, 177.35711672320002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048246.jpg", "text": "Please describe the area in the image for me. Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four boats, three street lights, and a sailboat.", "boxes_value": [[34.4505615284, 210.4911499264, 771.3666992184, 352.3571167232], [10.7668456836, 248.7307739136, 77.081298824, 281.888000512], [34.4505615284, 220.5471191552, 57.423767096, 300.1244506624], [81.4676513576, 199.5477295104, 115.24450685960001, 317.8899536384], [106.7795409872, 114.6516723712, 167.0396118424, 363.0280761856], [434.57238766319995, 294.965881344, 531.304565428, 325.5128784384], [589.488769508, 210.4911499264, 692.512084974, 352.3571167232], [625.855590826, 297.9099121152, 681.9526367044, 328.9282836992], [735.4099121132, 317.7088623104, 771.3666992184, 338.2886962688]], "boxes_seq": [[0], [0], [1, 5, 7, 8], [2, 3, 4], [6]]}, {"image_path": "objects365_v1_00048246_crop.jpg", "text": "Please describe the area in the image for me. Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four boats, three street lights, and a sailboat.", "boxes_value": [[34.4505615284, 35.4911499264, 771.3666992184, 177.35711672320002], [10.7668456836, 73.73077391359999, 77.081298824, 106.88800051200002], [34.4505615284, 45.547119155199994, 57.423767096, 125.12445066240002], [81.4676513576, 24.54772951039999, 115.24450685960001, 142.88995363840002], [106.7795409872, 0, 167.0396118424, 188.0280761856], [434.57238766319995, 119.96588134400002, 531.304565428, 150.51287843839998], [589.488769508, 35.4911499264, 692.512084974, 177.35711672320002], [625.855590826, 122.90991211519997, 681.9526367044, 153.9282836992], [735.4099121132, 142.70886231039998, 771.3666992184, 163.28869626879998]], "boxes_seq": [[0], [0], [1, 5, 7, 8], [2, 3, 4], [6]]}, {"image_path": "objects365_v1_00048247.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Provide the coordinates for each element you describe.", "boxes_value": [[268.5192260837, 268.7468261888, 312.4022216905, 342.2520752128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048247_crop.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Provide the coordinates for each element you describe.", "boxes_value": [[11.519226083700005, 18.746826188800014, 55.40222169050003, 92.25207521279998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048247.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, two boots, a gloves, and a hockey stick.", "boxes_value": [[268.5192260837, 268.7468261888, 312.4022216905, 342.2520752128], [253.7000121882, 204.3344726528, 354.35363770109996, 341.3236084224], [268.5192260837, 314.1745605632, 304.13989255530004, 342.2520752128], [288.2153320181, 315.012695296, 306.65429688300003, 335.1279297024], [290.2564697319, 268.7468261888, 312.4022216905, 285.4791869952], [232.7856445495, 301.0798340096, 370.8865966754, 334.7490844672]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048247_crop.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, two boots, a gloves, and a hockey stick.", "boxes_value": [[11.519226083700005, 18.746826188800014, 55.40222169050003, 92.25207521279998], [0, 0, 66, 91.32360842240001], [11.519226083700005, 64.17456056319998, 47.13989255530004, 92.25207521279998], [31.215332018100014, 65.012695296, 49.65429688300003, 85.12792970240002], [33.25646973189998, 18.746826188800014, 55.40222169050003, 35.479186995199996], [0, 51.079834009600006, 66, 84.74908446720002]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048248.jpg", "text": "In the submitted image , please give a synopsis of the area . Please mention the objects and their locations.", "boxes_value": [[426.60302737920006, 389.8386840576, 508.300967808, 512.0012207104]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048248_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Please mention the objects and their locations.", "boxes_value": [[20.603027379200057, 30.838684057600005, 102.300967808, 153]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048248.jpg", "text": "In the submitted image , please give a synopsis of the area . Please mention the objects and their locations. For your reference, objects involved in this region include four people, a backpack, and a van.", "boxes_value": [[426.60302737920006, 389.8386840576, 508.300967808, 512.0012207104], [426.60302737920006, 389.8386840576, 449.8135986432, 435.8496704], [449.24975585280004, 431.320495616, 479.90026851839997, 512.0012207104], [431.4588622848, 431.2695312384, 456.81042478079996, 504.0739746304], [483.35827637759996, 450.6165771264, 531.7805175552, 511.8329467904], [463.8312896256, 472.3343235072, 508.300967808, 511.989474304], [490.6778564352, 427.1511230464, 523.5455322624, 452.7448730624]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048248_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Please mention the objects and their locations. For your reference, objects involved in this region include four people, a backpack, and a van.", "boxes_value": [[20.603027379200057, 30.838684057600005, 102.300967808, 153], [20.603027379200057, 30.838684057600005, 43.81359864320001, 76.84967039999998], [43.249755852800035, 72.32049561600002, 73.90026851839997, 153], [25.458862284799977, 72.26953123840002, 50.81042478079996, 145.07397463040002], [77.35827637759996, 91.61657712639999, 122, 152.8329467904], [57.83128962559999, 113.3343235072, 102.300967808, 152.989474304], [84.6778564352, 68.15112304640002, 117.5455322624, 93.74487306240002]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048249.jpg", "text": "Please enlighten me about the region in the given photo . Include the coordinates for each mentioned object.", "boxes_value": [[362.55578614, 189.2324218512, 898.6737060621, 492.05828856840003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048249_crop.jpg", "text": "Please enlighten me about the region in the given photo . Include the coordinates for each mentioned object.", "boxes_value": [[134.55578614, 76.2324218512, 670.6737060621, 379]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048249.jpg", "text": "Please enlighten me about the region in the given photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two guitars, a person, and two speakers.", "boxes_value": [[362.55578614, 189.2324218512, 898.6737060621, 492.05828856840003], [695.0230712754, 223.7313842628, 898.6737060621, 334.09686278640004], [362.55578614, 266.8740234348, 387.6612549258, 339.74975587560004], [683.0551757587, 85.1497802772, 883.0866699257999, 459.25897218600005], [165.67431641250002, 335.82312011159996, 652.4455566383, 491.9645385588], [456.8308105445, 189.2324218512, 702.0866698923, 492.05828856840003]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048249_crop.jpg", "text": "Please enlighten me about the region in the given photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two guitars, a person, and two speakers.", "boxes_value": [[134.55578614, 76.2324218512, 670.6737060621, 379], [467.02307127539996, 110.73138426279999, 670.6737060621, 221.09686278640004], [134.55578614, 153.8740234348, 159.6612549258, 226.74975587560004], [455.0551757587, 0, 655.0866699257999, 346.25897218600005], [0, 222.82312011159996, 424.4455566383, 378.9645385588], [228.83081054450003, 76.2324218512, 474.08666989230005, 379]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048250.jpg", "text": "What does the area look like in the context of the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[96.957275392, 98.6991577377, 338.8549804544, 391.65527346010003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048250_crop.jpg", "text": "What does the area look like in the context of the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[60.957275392, 73.6991577377, 302.8549804544, 366.65527346010003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048250.jpg", "text": "What does the area look like in the context of the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two bottles, two cups, and two speakers.", "boxes_value": [[96.957275392, 98.6991577377, 338.8549804544, 391.65527346010003], [218.6737060352, 310.2658691314, 241.0390624768, 389.3898925566], [231.4760131584, 371.7301025139, 253.7651977728, 391.65527346010003], [112.646118144, 367.6926269393, 138.534423808, 380.7624511353], [323.7743530496, 309.8836669721, 338.8549804544, 362.16308593919996], [96.957275392, 126.6456909237, 126.517639168, 170.5006103801], [253.5073852416, 98.6991577377, 281.6810302976, 132.2869262988]], "boxes_seq": [[0], [0], [1, 4], [2, 3], [5, 6]]}, {"image_path": "objects365_v1_00048250_crop.jpg", "text": "What does the area look like in the context of the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two bottles, two cups, and two speakers.", "boxes_value": [[60.957275392, 73.6991577377, 302.8549804544, 366.65527346010003], [182.6737060352, 285.2658691314, 205.0390624768, 364.3898925566], [195.4760131584, 346.7301025139, 217.7651977728, 366.65527346010003], [76.646118144, 342.6926269393, 102.53442380800001, 355.7624511353], [287.7743530496, 284.8836669721, 302.8549804544, 337.16308593919996], [60.957275392, 101.6456909237, 90.517639168, 145.5006103801], [217.5073852416, 73.6991577377, 245.6810302976, 107.28692629880001]], "boxes_seq": [[0], [0], [1, 4], [2, 3], [5, 6]]}, {"image_path": "objects365_v1_00048252.jpg", "text": "Please give me some details about the rectangle in the image . Give coordinates for the items you reference.", "boxes_value": [[74.0477295104, 576.6666259883, 450.9725341696, 628.4217528986001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048252_crop.jpg", "text": "Please give me some details about the rectangle in the image . Give coordinates for the items you reference.", "boxes_value": [[74.0477295104, 13.666625988299984, 450.9725341696, 65.42175289860006]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048252.jpg", "text": "Please give me some details about the rectangle in the image . Give coordinates for the items you reference. For your reference, objects involved in this region include five cars.", "boxes_value": [[74.0477295104, 576.6666259883, 450.9725341696, 628.4217528986001], [74.0477295104, 576.6666259883, 165.258667008, 613.9442138511], [183.0414428672, 582.2951660157, 246.2147216896, 624.2102051048], [204.0992431616, 586.1055908464, 327.6380615168, 628.4217528986001], [301.3660278272, 583.0974120784999, 391.4130249216, 623.2073974580001], [380.6654052864, 583.124877899, 450.9725341696, 619.5677489911]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048252_crop.jpg", "text": "Please give me some details about the rectangle in the image . Give coordinates for the items you reference. For your reference, objects involved in this region include five cars.", "boxes_value": [[74.0477295104, 13.666625988299984, 450.9725341696, 65.42175289860006], [74.0477295104, 13.666625988299984, 165.258667008, 50.94421385110002], [183.0414428672, 19.295166015700033, 246.2147216896, 61.210205104800025], [204.0992431616, 23.10559084639999, 327.6380615168, 65.42175289860006], [301.3660278272, 20.09741207849993, 391.4130249216, 60.20739745800006], [380.6654052864, 20.124877899000012, 450.9725341696, 56.56774899109996]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048253.jpg", "text": "Kindly describe what I should be seeing in the area of image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0, 291.7370518016, 368.5285644267, 437.709838848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048253_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0, 36.73705180159999, 368.5285644267, 182.709838848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048253.jpg", "text": "Kindly describe what I should be seeing in the area of image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, a boots, and three hockey sticks.", "boxes_value": [[0, 291.7370518016, 368.5285644267, 437.709838848], [71.050964361, 99.256774912, 331.3320312348, 420.293090816], [313.4907226638, 371.4705200128, 368.5285644267, 432.046813952], [0, 346.8127490048, 257.2463479572, 415.487383808], [15.1852589553, 291.7370518016, 308.6554565679, 412.5077514752], [246.31347657420002, 417.8134765568, 315.9508056765, 437.709838848]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048253_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, a boots, and three hockey sticks.", "boxes_value": [[0, 36.73705180159999, 368.5285644267, 182.709838848], [71.050964361, 0, 331.3320312348, 165.29309081600002], [313.4907226638, 116.47052001280002, 368.5285644267, 177.04681395199998], [0, 91.81274900480003, 257.2463479572, 160.487383808], [15.1852589553, 36.73705180159999, 308.6554565679, 157.5077514752], [246.31347657420002, 162.81347655680003, 315.9508056765, 182.709838848]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048255.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Give coordinates for the items you reference.", "boxes_value": [[29.263183573400003, 457.2179565568, 586.0247802806, 488.527709952]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048255_crop.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Give coordinates for the items you reference.", "boxes_value": [[29.263183573400003, 8.217956556800004, 586.0247802806, 39.52770995200001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048255.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Give coordinates for the items you reference. For your reference, objects involved in this region include five sneakers.", "boxes_value": [[29.263183573400003, 457.2179565568, 586.0247802806, 488.527709952], [565.7156982693999, 457.2179565568, 586.0247802806, 488.527709952], [535.8162841506, 481.475952128, 557.535644526, 496.4257202176], [241.5183105422, 459.351135232, 269.8582763476, 471.9752807424], [59.447021464799995, 467.4165039104, 90.716613792, 488.045776384], [29.263183573400003, 469.153686528, 52.4982299648, 486.5256957952]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048255_crop.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Give coordinates for the items you reference. For your reference, objects involved in this region include five sneakers.", "boxes_value": [[29.263183573400003, 8.217956556800004, 586.0247802806, 39.52770995200001], [565.7156982693999, 8.217956556800004, 586.0247802806, 39.52770995200001], [535.8162841506, 32.47595212800002, 557.535644526, 47], [241.5183105422, 10.35113523199999, 269.8582763476, 22.975280742400003], [59.447021464799995, 18.416503910400024, 90.716613792, 39.04577638400002], [29.263183573400003, 20.15368652799998, 52.4982299648, 37.52569579520002]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048256.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[274.9658813612, 220.9108276224, 415.1389159908, 403.6530761728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048256_crop.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[35.96588136119999, 45.91082762240001, 176.1389159908, 228.6530761728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048256.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three chairs, a desk, and a trash bin can.", "boxes_value": [[274.9658813612, 220.9108276224, 415.1389159908, 403.6530761728], [239.646545418, 239.6755371008, 301.27343752, 307.7144775168], [274.9658813612, 304.3468017664, 353.8387450936, 403.6530761728], [344.43939211, 267.1580810752, 415.1389159908, 383.2196655104], [283.1392822012, 294.5388183552, 424.1296386784, 409.3743896576], [291.3108520532, 220.9108276224, 323.1790771516, 290.7699585024]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048256_crop.jpg", "text": "Can you give a brief explanation of the specified area in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three chairs, a desk, and a trash bin can.", "boxes_value": [[35.96588136119999, 45.91082762240001, 176.1389159908, 228.6530761728], [0.6465454179999881, 64.6755371008, 62.273437520000016, 132.7144775168], [35.96588136119999, 129.3468017664, 114.83874509359998, 228.6530761728], [105.43939210999997, 92.15808107520002, 176.1389159908, 208.21966551039998], [44.13928220119999, 119.53881835520002, 185.1296386784, 234.37438965759998], [52.310852053199994, 45.91082762240001, 84.17907715159998, 115.76995850240002]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048259.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for all objects that you mention.", "boxes_value": [[499.15991212060004, 145.0619506688, 620.7288818574, 262.0093993984]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048259_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for all objects that you mention.", "boxes_value": [[31.15991212060004, 30.061950668799994, 152.72888185739998, 147.00939939839998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048259.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three chairs, two people, a glasses, and a hat.", "boxes_value": [[499.15991212060004, 145.0619506688, 620.7288818574, 262.0093993984], [485.6704101522, 201.5082397696, 515.6848144568, 267.8192138752], [569.7094726782, 145.0619506688, 605.09436033, 192.241760256], [560.4786377027999, 193.2673950208, 590.7352294632, 213.78039552], [457.9981689286, 186.8967285248, 621.7879638698, 479.12072755199995], [452.22583010359995, 171.7443237376, 580.6601562514, 376.6619262464], [499.15991212060004, 195.8770752, 547.4880370828, 208.0485839872], [555.6340332124, 184.487304704, 620.7288818574, 262.0093993984]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6], [7]]}, {"image_path": "objects365_v1_00048259_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three chairs, two people, a glasses, and a hat.", "boxes_value": [[31.15991212060004, 30.061950668799994, 152.72888185739998, 147.00939939839998], [17.67041015220002, 86.50823976960001, 47.68481445680004, 152.81921387519998], [101.70947267819997, 30.061950668799994, 137.09436032999997, 77.24176025599999], [92.47863770279992, 78.2673950208, 122.73522946319997, 98.78039552000001], [0, 71.89672852480001, 153.78796386980002, 176], [0, 56.74432373760001, 112.66015625140005, 176], [31.15991212060004, 80.87707520000001, 79.48803708280002, 93.0485839872], [87.63403321240003, 69.487304704, 152.72888185739998, 147.00939939839998]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6], [7]]}, {"image_path": "objects365_v1_00048262.jpg", "text": "Please describe the area in the image for me. Provide the coordinates for all objects that you mention.", "boxes_value": [[478.3895263449, 414.841491712, 605.281616202, 489.97375488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048262_crop.jpg", "text": "Please describe the area in the image for me. Provide the coordinates for all objects that you mention.", "boxes_value": [[32.38952634489999, 18.841491711999993, 159.281616202, 93.97375488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048262.jpg", "text": "Please describe the area in the image for me. Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three chairs, a flower, and a napkin.", "boxes_value": [[478.3895263449, 414.841491712, 605.281616202, 489.97375488], [491.7940673496, 438.5908203008, 539.3724364866, 474.097106944], [508.8188476437, 445.8533935616, 565.7648926091999, 489.97375488], [559.27624512, 429.9477538816, 605.281616202, 464.9667358208], [505.0311279564, 414.841491712, 534.5570067981, 438.8741454848], [478.3895263449, 450.5337524224, 505.9388427558, 473.9247436288]], "boxes_seq": [[0], [0], [1, 3, 4], [2], [5]]}, {"image_path": "objects365_v1_00048262_crop.jpg", "text": "Please describe the area in the image for me. Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three chairs, a flower, and a napkin.", "boxes_value": [[32.38952634489999, 18.841491711999993, 159.281616202, 93.97375488], [45.79406734960003, 42.590820300799976, 93.37243648660001, 78.09710694400002], [62.818847643699996, 49.8533935616, 119.76489260919993, 93.97375488], [113.27624512, 33.94775388160002, 159.281616202, 68.96673582080001], [59.031127956399985, 18.841491711999993, 88.55700679810002, 42.87414548480001], [32.38952634489999, 54.53375242240003, 59.93884275580001, 77.9247436288]], "boxes_seq": [[0], [0], [1, 3, 4], [2], [5]]}, {"image_path": "objects365_v1_00048263.jpg", "text": "Can you break down the region in the image for me? Remember to mention the objects and their corresponding locations.", "boxes_value": [[50.09167480320001, 145.6578369024, 408.54443358719993, 511.6537475584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048263_crop.jpg", "text": "Can you break down the region in the image for me? Remember to mention the objects and their corresponding locations.", "boxes_value": [[50.09167480320001, 91.65783690239999, 408.54443358719993, 457.6537475584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048263.jpg", "text": "Can you break down the region in the image for me? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a stool, three guitars, six drums, two people, a bracelet, a tripod, and a speaker.", "boxes_value": [[50.09167480320001, 145.6578369024, 408.54443358719993, 511.6537475584], [362.51428224, 194.7784423936, 413.17407229439993, 268.1477661184], [72.72308352, 361.9082641408, 494.63537594879995, 464.850097664], [0, 265.2874756096, 117.87298583039998, 414.282165504], [47.4391479552, 220.1375732224, 131.4179687424, 271.6084594688], [146.7689819136, 156.927673344, 192.82189939199998, 202.9805907968], [92.58905026560001, 151.5097046016, 149.4779662848, 208.3985595904], [117.87298583039998, 203.8836059648, 206.36682132480001, 280.6384277504], [44.730163583999996, 191.2416381952, 117.87298583039998, 228.2645263872], [279.5096435712, 130.7407226368, 408.63842772480007, 184.9206542848], [9.5132446464, 155.1217040896, 85.365051264, 179.502624512], [50.09167480320001, 145.6578369024, 408.54443358719993, 511.6537475584], [274.5263672064, 78.9915161088, 374.7194823936, 281.422729472], [342.354125952, 461.9807739392, 369.6828613632, 490.7867431424], [355.50329587199997, 100.3939209216, 399.7716064512, 251.755187968], [304.732665984, 212.9361572352, 367.6441650432, 284.1254272512]], "boxes_seq": [[0], [0], [1], [2, 9, 10], [3, 4, 5, 6, 7, 8], [11, 12], [13], [14], [15]]}, {"image_path": "objects365_v1_00048263_crop.jpg", "text": "Can you break down the region in the image for me? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a stool, three guitars, six drums, two people, a bracelet, a tripod, and a speaker.", "boxes_value": [[50.09167480320001, 91.65783690239999, 408.54443358719993, 457.6537475584], [362.51428224, 140.7784423936, 413.17407229439993, 214.1477661184], [72.72308352, 307.9082641408, 494.63537594879995, 410.850097664], [0, 211.2874756096, 117.87298583039998, 360.282165504], [47.4391479552, 166.1375732224, 131.4179687424, 217.6084594688], [146.7689819136, 102.927673344, 192.82189939199998, 148.9805907968], [92.58905026560001, 97.50970460159999, 149.4779662848, 154.3985595904], [117.87298583039998, 149.8836059648, 206.36682132480001, 226.6384277504], [44.730163583999996, 137.2416381952, 117.87298583039998, 174.2645263872], [279.5096435712, 76.7407226368, 408.63842772480007, 130.9206542848], [9.5132446464, 101.1217040896, 85.365051264, 125.50262451200001], [50.09167480320001, 91.65783690239999, 408.54443358719993, 457.6537475584], [274.5263672064, 24.9915161088, 374.7194823936, 227.42272947200001], [342.354125952, 407.9807739392, 369.6828613632, 436.7867431424], [355.50329587199997, 46.3939209216, 399.7716064512, 197.755187968], [304.732665984, 158.9361572352, 367.6441650432, 230.1254272512]], "boxes_seq": [[0], [0], [1], [2, 9, 10], [3, 4, 5, 6, 7, 8], [11, 12], [13], [14], [15]]}, {"image_path": "objects365_v1_00048265.jpg", "text": "Please provide insights on the specified area within the graphic . Remember to mention the objects and their corresponding locations.", "boxes_value": [[101.92523194259999, 36.2667236352, 245.30694579540003, 360.4869995008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048265_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Remember to mention the objects and their corresponding locations.", "boxes_value": [[35.92523194259999, 36.2667236352, 179.30694579540003, 360.4869995008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048265.jpg", "text": "Please provide insights on the specified area within the graphic . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, and four hats.", "boxes_value": [[101.92523194259999, 36.2667236352, 245.30694579540003, 360.4869995008], [101.92523194259999, 68.0706176512, 207.98608397579997, 360.4869995008], [172.69970706269999, 90.8921508864, 269.5680541737, 406.709533696], [197.1323242119, 88.406677248, 245.30694579540003, 124.614318848], [137.9113159005, 66.0070190592, 187.6201172154, 99.7599487488], [124.8322753755, 53.8143921152, 146.957641569, 70.0269165056], [135.8949585078, 36.2667236352, 165.45898438019998, 52.0977783296]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048265_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, and four hats.", "boxes_value": [[35.92523194259999, 36.2667236352, 179.30694579540003, 360.4869995008], [35.92523194259999, 68.0706176512, 141.98608397579997, 360.4869995008], [106.69970706269999, 90.8921508864, 203.56805417369998, 406.709533696], [131.1323242119, 88.406677248, 179.30694579540003, 124.614318848], [71.9113159005, 66.0070190592, 121.6201172154, 99.7599487488], [58.832275375500004, 53.8143921152, 80.957641569, 70.0269165056], [69.8949585078, 36.2667236352, 99.45898438019998, 52.0977783296]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048266.jpg", "text": "Tell me about the region of the image . Give coordinates for the items you reference.", "boxes_value": [[150.7706298557, 134.2322998272, 308.42376711220004, 238.585021952]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048266_crop.jpg", "text": "Tell me about the region of the image . Give coordinates for the items you reference.", "boxes_value": [[39.77062985570001, 26.232299827199995, 197.42376711220004, 130.585021952]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048266.jpg", "text": "Tell me about the region of the image . Give coordinates for the items you reference. For your reference, objects involved in this region include three pictures, a cabinet, and a plate.", "boxes_value": [[150.7706298557, 134.2322998272, 308.42376711220004, 238.585021952], [256.7858276344, 198.1368408064, 289.1238403137, 226.8076171776], [215.1132202149, 195.1364135936, 248.4512939248, 226.14086912], [150.7706298557, 180.8010253824, 182.10845945230002, 221.4735107584], [190.7655029616, 175.18371584, 308.42376711220004, 238.585021952], [237.469543464, 134.2322998272, 284.4979858127, 176.3837280256]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048266_crop.jpg", "text": "Tell me about the region of the image . Give coordinates for the items you reference. For your reference, objects involved in this region include three pictures, a cabinet, and a plate.", "boxes_value": [[39.77062985570001, 26.232299827199995, 197.42376711220004, 130.585021952], [145.7858276344, 90.13684080639999, 178.1238403137, 118.80761717760001], [104.1132202149, 87.1364135936, 137.4512939248, 118.14086911999999], [39.77062985570001, 72.80102538240001, 71.10845945230002, 113.47351075840001], [79.76550296159999, 67.18371583999999, 197.42376711220004, 130.585021952], [126.469543464, 26.232299827199995, 173.4979858127, 68.38372802559999]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048267.jpg", "text": "What can I find in the bbox of the provided image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[165.7757055179, 313.0081058304, 324.0818474235, 462.37672448]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048267_crop.jpg", "text": "What can I find in the bbox of the provided image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[39.77570551790001, 38.008105830399984, 198.0818474235, 187.37672448]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048267.jpg", "text": "What can I find in the bbox of the provided image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four sneakers, and a gloves.", "boxes_value": [[165.7757055179, 313.0081058304, 324.0818474235, 462.37672448], [165.7757055179, 429.8402912256, 187.6881287413, 462.37672448], [230.2481237574, 406.3818603008, 251.4067085079, 438.11968128], [298.783539578, 371.8841677824, 324.0818474235, 402.2421371904], [203.5699081623, 353.0254291968, 234.3878468524, 386.1432140288], [200.81009274, 313.0081058304, 229.3281852833, 337.8464444928]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048267_crop.jpg", "text": "What can I find in the bbox of the provided image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four sneakers, and a gloves.", "boxes_value": [[39.77570551790001, 38.008105830399984, 198.0818474235, 187.37672448], [39.77570551790001, 154.84029122560003, 61.688128741300005, 187.37672448], [104.24812375740001, 131.38186030079999, 125.40670850789999, 163.11968128], [172.783539578, 96.88416778240003, 198.0818474235, 127.2421371904], [77.56990816230001, 78.02542919680002, 108.3878468524, 111.1432140288], [74.81009273999999, 38.008105830399984, 103.3281852833, 62.8464444928]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048269.jpg", "text": "Describe the image content present in the specified rectangular area of . Provide the coordinates for each element you describe.", "boxes_value": [[113.2940063232, 307.0591430656, 299.381591808, 469.9326171648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048269_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Provide the coordinates for each element you describe.", "boxes_value": [[47.294006323199994, 41.05914306559998, 233.381591808, 203.93261716479998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048269.jpg", "text": "Describe the image content present in the specified rectangular area of . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two potted plants, a vase, a person, and a bicycle.", "boxes_value": [[113.2940063232, 307.0591430656, 299.381591808, 469.9326171648], [159.22985840639998, 223.847473152, 250.0877685504, 456.7183227392], [249.4842529536, 326.5523681792, 273.999633792, 355.7058105344], [113.2940063232, 415.2989501952, 167.9276733696, 469.9326171648], [270.81079104, 276.5872802816, 284.3642578176, 348.87225344], [254.46154782719998, 307.0591430656, 299.381591808, 357.4693603328]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048269_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two potted plants, a vase, a person, and a bicycle.", "boxes_value": [[47.294006323199994, 41.05914306559998, 233.381591808, 203.93261716479998], [93.22985840639998, 0, 184.0877685504, 190.7183227392], [183.4842529536, 60.552368179200016, 207.999633792, 89.70581053439997], [47.294006323199994, 149.2989501952, 101.9276733696, 203.93261716479998], [204.81079104000003, 10.587280281599988, 218.3642578176, 82.87225344000001], [188.46154782719998, 41.05914306559998, 233.381591808, 91.46936033280002]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048270.jpg", "text": "Kindly share your observations about the rectangular region within . Provide the coordinates for all objects that you mention.", "boxes_value": [[547.30297853, 169.6302490112, 647.75451664, 452.0289916928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048270_crop.jpg", "text": "Kindly share your observations about the rectangular region within . Provide the coordinates for all objects that you mention.", "boxes_value": [[25.30297853000002, 70.6302490112, 125.75451664000002, 353.0289916928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048270.jpg", "text": "Kindly share your observations about the rectangular region within . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two backpacks, and three boots.", "boxes_value": [[547.30297853, 169.6302490112, 647.75451664, 452.0289916928], [506.36450195099997, 200.0338745344, 577.06933592, 316.7681884672], [552.326049814, 169.6302490112, 597.067382835, 256.2430419968], [575.030151388, 366.9310913024, 619.283813456, 398.7539673088], [547.30297853, 390.1022339072, 575.707641586, 429.4750366208], [590.937622044, 415.1959228416, 647.75451664, 452.0289916928]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048270_crop.jpg", "text": "Kindly share your observations about the rectangular region within . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two backpacks, and three boots.", "boxes_value": [[25.30297853000002, 70.6302490112, 125.75451664000002, 353.0289916928], [0, 101.03387453440001, 55.06933591999996, 217.7681884672], [30.326049814000044, 70.6302490112, 75.06738283499999, 157.2430419968], [53.030151388000036, 267.9310913024, 97.28381345599996, 299.7539673088], [25.30297853000002, 291.1022339072, 53.70764158600002, 330.4750366208], [68.93762204400002, 316.1959228416, 125.75451664000002, 353.0289916928]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048271.jpg", "text": "Tell me about the region of the image . Specify the location of each mentioned object.", "boxes_value": [[22.565917952, 0, 474.2598266368, 485.05432129919996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048271_crop.jpg", "text": "Tell me about the region of the image . Specify the location of each mentioned object.", "boxes_value": [[22.565917952, 0, 474.2598266368, 485.05432129919996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048271.jpg", "text": "Tell me about the region of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include two chairs, a desk, a bed, two pillows, and a lamp.", "boxes_value": [[22.565917952, 0, 474.2598266368, 485.05432129919996], [395.9196167168, 357.5432128681, 474.2598266368, 485.05432129919996], [211.736938496, 357.12646484569996, 279.2427978752, 418.7984618908], [272.1588745216, 382.1286621363, 398.83654784, 472.5532226539], [0.1673584128, 330.3156738565, 305.5510864384, 576.3353271397], [22.565917952, 366.3154297112, 77.7465209856, 427.32360840150005], [8.3610229248, 361.5804443091, 51.5220947456, 409.8406982625], [168.2068481536, 0, 294.2305908224, 118.0216064223]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6], [7]]}, {"image_path": "objects365_v1_00048271_crop.jpg", "text": "Tell me about the region of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include two chairs, a desk, a bed, two pillows, and a lamp.", "boxes_value": [[22.565917952, 0, 474.2598266368, 485.05432129919996], [395.9196167168, 357.5432128681, 474.2598266368, 485.05432129919996], [211.736938496, 357.12646484569996, 279.2427978752, 418.7984618908], [272.1588745216, 382.1286621363, 398.83654784, 472.5532226539], [0.1673584128, 330.3156738565, 305.5510864384, 576.3353271397], [22.565917952, 366.3154297112, 77.7465209856, 427.32360840150005], [8.3610229248, 361.5804443091, 51.5220947456, 409.8406982625], [168.2068481536, 0, 294.2305908224, 118.0216064223]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6], [7]]}, {"image_path": "objects365_v1_00048273.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each mentioned object.", "boxes_value": [[12.6074218496, 221.97955320480003, 165.5279541248, 305.955688485]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048273_crop.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each mentioned object.", "boxes_value": [[12.6074218496, 21.979553204800027, 165.5279541248, 105.955688485]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048273.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a cabinet, a cleaning products, two bottles, and a cup.", "boxes_value": [[12.6074218496, 221.97955320480003, 165.5279541248, 305.955688485], [29.5144653312, 226.43841550439998, 141.5968627712, 295.53033444839997], [12.6074218496, 221.97955320480003, 47.4335327232, 304.6061401356], [154.4276123136, 237.9250488306, 165.5279541248, 286.7183837682], [20.6393432576, 241.9025878806, 47.4251708928, 305.955688485], [88.832153344, 274.9895629656, 105.0534057472, 299.4461669682]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048273_crop.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a cabinet, a cleaning products, two bottles, and a cup.", "boxes_value": [[12.6074218496, 21.979553204800027, 165.5279541248, 105.955688485], [29.5144653312, 26.438415504399984, 141.5968627712, 95.53033444839997], [12.6074218496, 21.979553204800027, 47.4335327232, 104.60614013560001], [154.4276123136, 37.92504883059999, 165.5279541248, 86.7183837682], [20.6393432576, 41.90258788060001, 47.4251708928, 105.955688485], [88.832153344, 74.98956296559999, 105.0534057472, 99.44616696819998]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048274.jpg", "text": "Please describe the section of the picture defined by the bbox . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0, 669.556152366, 176.6951293952, 722.5430907892]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048274_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0, 13.556152365999992, 176.6951293952, 66.54309078920005]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048274.jpg", "text": "Please describe the section of the picture defined by the bbox . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four cars, and a van.", "boxes_value": [[0, 669.556152366, 176.6951293952, 722.5430907892], [23.1553955328, 683.5535888428, 39.4766235136, 712.3422851556], [0, 683.7802734416, 32.9027709952, 722.5430907892], [101.7508545024, 669.556152366, 128.89733888, 697.2022705368], [115.0742797824, 678.0498046776, 137.2244873216, 699.0343017296], [135.0594482176, 672.2208251624, 176.6951293952, 705.6959228484]], "boxes_seq": [[0], [0], [1, 2, 4, 5], [3]]}, {"image_path": "objects365_v1_00048274_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four cars, and a van.", "boxes_value": [[0, 13.556152365999992, 176.6951293952, 66.54309078920005], [23.1553955328, 27.553588842799968, 39.4766235136, 56.34228515560005], [0, 27.780273441600002, 32.9027709952, 66.54309078920005], [101.7508545024, 13.556152365999992, 128.89733888, 41.20227053680003], [115.0742797824, 22.049804677600036, 137.2244873216, 43.034301729599974], [135.0594482176, 16.220825162400047, 176.6951293952, 49.69592284839996]], "boxes_seq": [[0], [0], [1, 2, 4, 5], [3]]}, {"image_path": "objects365_v1_00048275.jpg", "text": "Help me grasp the context of the region within image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[283.9980468877, 192.6899414016, 498.2309570084, 494.3648681472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048275_crop.jpg", "text": "Help me grasp the context of the region within image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[53.99804688770001, 75.68994140160001, 268.2309570084, 377.3648681472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048275.jpg", "text": "Help me grasp the context of the region within image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, and two sneakers.", "boxes_value": [[283.9980468877, 192.6899414016, 498.2309570084, 494.3648681472], [283.9980468877, 192.6899414016, 357.44934080170003, 426.5973510656], [327.7190551796, 211.052795392, 416.4726562655, 421.7880248832], [395.48657226079996, 228.5411987456, 498.2309570084, 494.3648681472], [397.0641967373, 469.856392192, 439.0025468896, 494.0921221632], [459.65560378249995, 429.3932603392, 491.2674255225, 447.5173714432]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048275_crop.jpg", "text": "Help me grasp the context of the region within image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, and two sneakers.", "boxes_value": [[53.99804688770001, 75.68994140160001, 268.2309570084, 377.3648681472], [53.99804688770001, 75.68994140160001, 127.44934080170003, 309.5973510656], [97.71905517959999, 94.05279539200001, 186.4726562655, 304.7880248832], [165.48657226079996, 111.54119874560001, 268.2309570084, 377.3648681472], [167.0641967373, 352.856392192, 209.0025468896, 377.0921221632], [229.65560378249995, 312.3932603392, 261.2674255225, 330.5173714432]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048279.jpg", "text": "Help me understand the details within the area in photograph . Specify the location of each mentioned object.", "boxes_value": [[530.0664062208, 324.4160766464, 684.33837888, 410.6735839744]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048279_crop.jpg", "text": "Help me understand the details within the area in photograph . Specify the location of each mentioned object.", "boxes_value": [[39.06640622079999, 22.416076646399972, 193.33837888000005, 108.6735839744]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048279.jpg", "text": "Help me understand the details within the area in photograph . Specify the location of each mentioned object. For your reference, objects involved in this region include a leather shoes, a high heels, a sneakers, a handbag, and a tablet.", "boxes_value": [[530.0664062208, 324.4160766464, 684.33837888, 410.6735839744], [560.0321044991999, 393.7075195392, 589.8736571904, 410.6735839744], [564.7795410432, 364.3096313344, 592.6063232256, 392.1364135936], [530.0664062208, 329.1525878784, 557.451293952, 345.6375732224], [648.7468261632, 350.7618408448, 684.33837888, 371.5235595776], [645.5068359168, 324.4160766464, 672.1975097856, 352.8349609472]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048279_crop.jpg", "text": "Help me understand the details within the area in photograph . Specify the location of each mentioned object. For your reference, objects involved in this region include a leather shoes, a high heels, a sneakers, a handbag, and a tablet.", "boxes_value": [[39.06640622079999, 22.416076646399972, 193.33837888000005, 108.6735839744], [69.03210449919993, 91.7075195392, 98.87365719039997, 108.6735839744], [73.77954104319997, 62.309631334400024, 101.60632322560002, 90.13641359360003], [39.06640622079999, 27.152587878400027, 66.45129395200001, 43.63757322240002], [157.7468261632, 48.76184084480002, 193.33837888000005, 69.5235595776], [154.50683591680001, 22.416076646399972, 181.1975097856, 50.83496094719999]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048283.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each mentioned object.", "boxes_value": [[55.7209472416, 161.064880384, 415.3537597828, 337.0744018432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048283_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each mentioned object.", "boxes_value": [[55.7209472416, 44.06488038399999, 415.3537597828, 220.0744018432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048283.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include an american football, two helmets, and two gloves.", "boxes_value": [[55.7209472416, 161.064880384, 415.3537597828, 337.0744018432], [357.42395016079996, 178.153686528, 415.3537597828, 224.1392211968], [55.7209472416, 161.064880384, 148.9909667916, 231.7695312384], [226.46520998719998, 198.6737670656, 303.9395751588, 242.3000488448], [250.53491209039998, 256.591430656, 327.2570800656, 337.0744018432], [340.4760742024, 175.8206787072, 373.313476574, 232.4127807488]], "boxes_seq": [[0], [0], [1], [2, 4], [3, 5]]}, {"image_path": "objects365_v1_00048283_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include an american football, two helmets, and two gloves.", "boxes_value": [[55.7209472416, 44.06488038399999, 415.3537597828, 220.0744018432], [357.42395016079996, 61.15368652800001, 415.3537597828, 107.13922119680001], [55.7209472416, 44.06488038399999, 148.9909667916, 114.76953123839999], [226.46520998719998, 81.6737670656, 303.9395751588, 125.30004884479999], [250.53491209039998, 139.591430656, 327.2570800656, 220.0744018432], [340.4760742024, 58.82067870719999, 373.313476574, 115.4127807488]], "boxes_seq": [[0], [0], [1], [2, 4], [3, 5]]}, {"image_path": "objects365_v1_00048284.jpg", "text": "Please share details about the rectangular region within the image . Give coordinates for the items you reference.", "boxes_value": [[5.7348022272, 24.0274048012, 313.7030639616, 274.7802124318]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048284_crop.jpg", "text": "Please share details about the rectangular region within the image . Give coordinates for the items you reference.", "boxes_value": [[5.7348022272, 24.0274048012, 313.7030639616, 274.7802124318]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048284.jpg", "text": "Please share details about the rectangular region within the image . Give coordinates for the items you reference. For your reference, objects involved in this region include two people, two helmets, and a gloves.", "boxes_value": [[5.7348022272, 24.0274048012, 313.7030639616, 274.7802124318], [119.9328613376, 25.0754394786, 408.1553955328, 474.00378420090004], [0.7135620096, 120.7753295788, 114.5053100544, 433.7026367517], [5.7348022272, 120.5897216651, 70.9106445312, 165.8221435256], [226.1385498112, 24.0274048012, 306.7377319424, 86.71557614459999], [267.93072512, 236.4707641512, 313.7030639616, 274.7802124318]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048284_crop.jpg", "text": "Please share details about the rectangular region within the image . Give coordinates for the items you reference. For your reference, objects involved in this region include two people, two helmets, and a gloves.", "boxes_value": [[5.7348022272, 24.0274048012, 313.7030639616, 274.7802124318], [119.9328613376, 25.0754394786, 390, 337], [0.7135620096, 120.7753295788, 114.5053100544, 337], [5.7348022272, 120.5897216651, 70.9106445312, 165.8221435256], [226.1385498112, 24.0274048012, 306.7377319424, 86.71557614459999], [267.93072512, 236.4707641512, 313.7030639616, 274.7802124318]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048286.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[3.9508056575999997, 275.578308096, 134.97619630079998, 458.2984619008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048286_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[3.9508056575999997, 46.578308096, 134.97619630079998, 229.29846190080002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048286.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, a speaker, and three chairs.", "boxes_value": [[3.9508056575999997, 275.578308096, 134.97619630079998, 458.2984619008], [8.1303710976, 275.578308096, 33.8836670208, 324.4659423744], [111.4151611392, 293.3887939584, 134.97619630079998, 325.9030151168], [14.666015616, 432.3900146688, 40.5744628992, 458.2984619008], [55.6386718464, 400.826477056, 84.7718505984, 416.3328247296], [3.9508056575999997, 388.6093139456, 25.5657348864, 404.5855712768]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048286_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, a speaker, and three chairs.", "boxes_value": [[3.9508056575999997, 46.578308096, 134.97619630079998, 229.29846190080002], [8.1303710976, 46.578308096, 33.8836670208, 95.46594237440002], [111.4151611392, 64.38879395840002, 134.97619630079998, 96.90301511680002], [14.666015616, 203.3900146688, 40.5744628992, 229.29846190080002], [55.6386718464, 171.826477056, 84.7718505984, 187.33282472960002], [3.9508056575999997, 159.60931394559998, 25.5657348864, 175.5855712768]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048288.jpg", "text": "Can you provide some context for the area within the picture ? Specify the location of each mentioned object.", "boxes_value": [[269.45861813759996, 241.177612288, 635.9925537024001, 382.7921142784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048288_crop.jpg", "text": "Can you provide some context for the area within the picture ? Specify the location of each mentioned object.", "boxes_value": [[92.45861813759996, 36.177612288000006, 458.9925537024001, 177.7921142784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048288.jpg", "text": "Can you provide some context for the area within the picture ? Specify the location of each mentioned object. For your reference, objects involved in this region include three golf clubs, and two people.", "boxes_value": [[269.45861813759996, 241.177612288, 635.9925537024001, 382.7921142784], [269.45861813759996, 364.6500854272, 326.0051269632, 382.7921142784], [274.1708984064, 364.8857421824, 324.591430656, 390.802856448], [596.6027831808, 266.7398071296, 620.7308349696, 316.7832641536], [581.104858368, 241.177612288, 635.9925537024001, 344.3663940608], [260.0122070016, 319.218872064, 315.6616210944, 431.7273559552]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048288_crop.jpg", "text": "Can you provide some context for the area within the picture ? Specify the location of each mentioned object. For your reference, objects involved in this region include three golf clubs, and two people.", "boxes_value": [[92.45861813759996, 36.177612288000006, 458.9925537024001, 177.7921142784], [92.45861813759996, 159.6500854272, 149.00512696319998, 177.7921142784], [97.1708984064, 159.8857421824, 147.591430656, 185.802856448], [419.6027831808, 61.739807129600024, 443.7308349696, 111.78326415359999], [404.104858368, 36.177612288000006, 458.9925537024001, 139.36639406080002], [83.01220700160002, 114.21887206399998, 138.6616210944, 213]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048289.jpg", "text": "Please help me understand the content present within the rectangle in . Provide the coordinates for each element you describe.", "boxes_value": [[419.99377439700004, 25.667968768, 765.2656249830001, 447.3082885632]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048289_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Provide the coordinates for each element you describe.", "boxes_value": [[86.99377439700004, 25.667968768, 432, 447.3082885632]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048289.jpg", "text": "Please help me understand the content present within the rectangle in . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a picture, and five people.", "boxes_value": [[419.99377439700004, 25.667968768, 765.2656249830001, 447.3082885632], [697.508300799, 25.667968768, 765.2656249830001, 359.1472167936], [504.43273922400004, 68.2734374912, 568.643066397, 134.5550537216], [540.3455810685, 201.5579833856, 568.6843261365, 286.5743408128], [496.033203087, 198.3016357376, 519.79553226, 290.1826782208], [419.99377439700004, 209.9188232192, 454.8450927555, 279.6216430592], [533.661987285, 362.8045043712, 567.715820301, 447.3082885632]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048289_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a picture, and five people.", "boxes_value": [[86.99377439700004, 25.667968768, 432, 447.3082885632], [364.50830079900004, 25.667968768, 432, 359.1472167936], [171.43273922400004, 68.2734374912, 235.64306639699998, 134.5550537216], [207.3455810685, 201.5579833856, 235.68432613649998, 286.5743408128], [163.033203087, 198.3016357376, 186.79553225999996, 290.1826782208], [86.99377439700004, 209.9188232192, 121.84509275549999, 279.6216430592], [200.661987285, 362.8045043712, 234.715820301, 447.3082885632]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048291.jpg", "text": "Could you please provide a description of the rectangular area in ? Provide the coordinates for each element you describe.", "boxes_value": [[167.3853759744, 210.67059328, 378.5930175744, 491.28735349759995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048291_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Provide the coordinates for each element you describe.", "boxes_value": [[53.385375974400006, 70.67059327999999, 264.5930175744, 351.28735349759995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048291.jpg", "text": "Could you please provide a description of the rectangular area in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a potted plant, three people, and two handbags.", "boxes_value": [[167.3853759744, 210.67059328, 378.5930175744, 491.28735349759995], [217.06549071359998, 234.7595825152, 257.472656256, 315.2617797632], [336.701538048, 210.67059328, 349.9438476288, 244.8602905088], [297.9448242432, 215.7891235328, 378.5930175744, 487.97686768639994], [167.3853759744, 227.0256347648, 237.81500244480003, 491.28735349759995], [160.7557680384, 318.9984152576, 187.136164608, 357.7427080192], [268.0827750912, 256.0964124672, 286.031780352, 275.8701408256]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00048291_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a potted plant, three people, and two handbags.", "boxes_value": [[53.385375974400006, 70.67059327999999, 264.5930175744, 351.28735349759995], [103.06549071359998, 94.75958251520001, 143.472656256, 175.26177976320002], [222.70153804799997, 70.67059327999999, 235.94384762879997, 104.8602905088], [183.9448242432, 75.7891235328, 264.5930175744, 347.97686768639994], [53.385375974400006, 87.0256347648, 123.81500244480003, 351.28735349759995], [46.75576803839999, 178.99841525760002, 73.136164608, 217.7427080192], [154.0827750912, 116.09641246720003, 172.031780352, 135.8701408256]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00048296.jpg", "text": "For the image , can you assess and describe what's happening at ? Include the coordinates for each mentioned object.", "boxes_value": [[201.24932863, 373.6726074346, 436.87524412, 440.4810791178]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048296_crop.jpg", "text": "For the image , can you assess and describe what's happening at ? Include the coordinates for each mentioned object.", "boxes_value": [[59.24932863000001, 17.672607434600025, 294.87524412, 84.4810791178]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048296.jpg", "text": "For the image , can you assess and describe what's happening at ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a chair, a bench, a flower, three storage boxes, a lamp, and a pillow.", "boxes_value": [[201.24932863, 373.6726074346, 436.87524412, 440.4810791178], [172.72924802, 374.30102540070004, 240.01385499, 436.9130859296], [132.33465574, 380.7789306653, 253.28680419, 435.6359252711], [240.01617430000002, 404.8298339668, 266.60650632, 424.0700073461], [385.74560548999995, 411.496643078, 406.10278317, 436.824707038], [403.49902340999995, 401.55474854429997, 436.87524412, 435.1677246242], [379.14807126, 373.6726074346, 396.01733395, 393.8461303473], [201.24932863, 389.07318115370003, 235.24749759, 415.3109130658], [266.65893555, 409.43920898619996, 296.59210203000004, 440.4810791178]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5, 8], [6], [7]]}, {"image_path": "objects365_v1_00048296_crop.jpg", "text": "For the image , can you assess and describe what's happening at ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a chair, a bench, a flower, three storage boxes, a lamp, and a pillow.", "boxes_value": [[59.24932863000001, 17.672607434600025, 294.87524412, 84.4810791178], [30.72924802, 18.30102540070004, 98.01385499, 80.91308592960002], [0, 24.77893066529998, 111.28680419, 79.6359252711], [98.01617430000002, 48.82983396679998, 124.60650632, 68.0700073461], [243.74560548999995, 55.49664307799998, 264.10278317, 80.82470703799999], [261.49902340999995, 45.55474854429997, 294.87524412, 79.16772462419999], [237.14807126, 17.672607434600025, 254.01733395000002, 37.846130347300004], [59.24932863000001, 33.073181153700034, 93.24749759, 59.31091306579998], [124.65893555000002, 53.43920898619996, 154.59210203000004, 84.4810791178]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5, 8], [6], [7]]}, {"image_path": "objects365_v1_00048298.jpg", "text": "I'd like some information about the specific region in the image . Please point out the objects and their coordinates.", "boxes_value": [[8.1758422795, 318.9915771392, 428.4891357441, 398.2858276352]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048298_crop.jpg", "text": "I'd like some information about the specific region in the image . Please point out the objects and their coordinates.", "boxes_value": [[8.1758422795, 19.99157713919999, 428.4891357441, 99.28582763520001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048298.jpg", "text": "I'd like some information about the specific region in the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include four potted plants, a street lights, and two cars.", "boxes_value": [[8.1758422795, 318.9915771392, 428.4891357441, 398.2858276352], [317.7457275135, 340.709838848, 333.76483157440003, 398.2858276352], [329.9264526465, 338.5773925888, 361.48669436309996, 414.9188843008], [357.6481933831, 364.5932006912, 387.92895508090004, 414.065917952], [408.34582517810003, 346.7266845696, 428.4891357441, 396.9504394752], [8.1758422795, 318.9915771392, 20.229125964399998, 385.1375732224], [33.1643066257, 344.2740478464, 191.3266601328, 398.0651245056], [275.4036254628, 322.7678833152, 298.1997680645, 344.9032592896]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6, 7]]}, {"image_path": "objects365_v1_00048298_crop.jpg", "text": "I'd like some information about the specific region in the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include four potted plants, a street lights, and two cars.", "boxes_value": [[8.1758422795, 19.99157713919999, 428.4891357441, 99.28582763520001], [317.7457275135, 41.709838848000004, 333.76483157440003, 99.28582763520001], [329.9264526465, 39.57739258880002, 361.48669436309996, 115.91888430080002], [357.6481933831, 65.59320069120002, 387.92895508090004, 115.065917952], [408.34582517810003, 47.726684569600025, 428.4891357441, 97.95043947520003], [8.1758422795, 19.99157713919999, 20.229125964399998, 86.13757322240002], [33.1643066257, 45.27404784639998, 191.3266601328, 99.06512450560001], [275.4036254628, 23.76788331519998, 298.1997680645, 45.90325928959999]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6, 7]]}, {"image_path": "objects365_v1_00048300.jpg", "text": "Can you generate a description for the selected region in the image ? Please point out the objects and their coordinates.", "boxes_value": [[0.3724365312, 305.3023681611, 166.8056030208, 393.3134155514]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048300_crop.jpg", "text": "Can you generate a description for the selected region in the image ? Please point out the objects and their coordinates.", "boxes_value": [[0.3724365312, 22.30236816109999, 166.8056030208, 110.3134155514]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048300.jpg", "text": "Can you generate a description for the selected region in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, a tong, a bowl, and a desk.", "boxes_value": [[0.3724365312, 305.3023681611, 166.8056030208, 393.3134155514], [0.3724365312, 306.6774902469, 56.4695434752, 391.3910522586], [0.309997568, 51.0958251731, 136.4575195136, 396.15838622369995], [50.1520385536, 361.4691161875, 153.4049682432, 393.3134155514], [145.1893920768, 305.3023681611, 166.8056030208, 324.3142089623], [1.9292602368, 250.8132324239, 389.1250610176, 520.0941161995]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048300_crop.jpg", "text": "Can you generate a description for the selected region in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, a tong, a bowl, and a desk.", "boxes_value": [[0.3724365312, 22.30236816109999, 166.8056030208, 110.3134155514], [0.3724365312, 23.677490246899993, 56.4695434752, 108.39105225859998], [0.309997568, 0, 136.4575195136, 113.15838622369995], [50.1520385536, 78.46911618749999, 153.4049682432, 110.3134155514], [145.1893920768, 22.30236816109999, 166.8056030208, 41.31420896229997], [1.9292602368, 0, 208, 132]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048302.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Provide the coordinates for each element you describe.", "boxes_value": [[0, 57.91760256, 156.432495105, 199.1740539392]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048302_crop.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Provide the coordinates for each element you describe.", "boxes_value": [[0, 35.91760256, 156.432495105, 177.1740539392]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048302.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Provide the coordinates for each element you describe. For your reference, objects involved in this region include four lamps, and a hat.", "boxes_value": [[0, 57.91760256, 156.432495105, 199.1740539392], [0, 57.91760256, 24.689453092, 81.9860229632], [46.6466064746, 84.0972900352, 106.18432614620001, 110.6992797696], [112.940368627, 109.432495104, 156.432495105, 127.1671752704], [0, 128.7862548992, 71.55206298739999, 150.4597778432], [74.5866948538, 184.750654464, 96.3976891552, 199.1740539392]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048302_crop.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Provide the coordinates for each element you describe. For your reference, objects involved in this region include four lamps, and a hat.", "boxes_value": [[0, 35.91760256, 156.432495105, 177.1740539392], [0, 35.91760256, 24.689453092, 59.9860229632], [46.6466064746, 62.097290035200004, 106.18432614620001, 88.6992797696], [112.940368627, 87.432495104, 156.432495105, 105.1671752704], [0, 106.7862548992, 71.55206298739999, 128.4597778432], [74.5866948538, 162.750654464, 96.3976891552, 177.1740539392]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048304.jpg", "text": "Please describe the region in the picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[1.5357666166000001, 50.5234985472, 146.4774780415, 116.2849731584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048304_crop.jpg", "text": "Please describe the region in the picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[1.5357666166000001, 16.5234985472, 146.4774780415, 82.2849731584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048304.jpg", "text": "Please describe the region in the picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a cabinet, two wine glasses, and two canneds.", "boxes_value": [[1.5357666166000001, 50.5234985472, 146.4774780415, 116.2849731584], [0.0805664068, 72.8860473856, 271.4201050052, 262.4146728448], [85.54595947610001, 50.5234985472, 117.4978637862, 114.7988281344], [115.640197767, 54.6103515648, 146.4774780415, 116.2849731584], [1.5357666166000001, 62.542419456, 16.334411625599998, 108.048278784], [14.4845581019, 62.172485376, 44.0819091801, 108.4182739456]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048304_crop.jpg", "text": "Please describe the region in the picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a cabinet, two wine glasses, and two canneds.", "boxes_value": [[1.5357666166000001, 16.5234985472, 146.4774780415, 82.2849731584], [0.0805664068, 38.886047385599994, 182, 98], [85.54595947610001, 16.5234985472, 117.4978637862, 80.7988281344], [115.640197767, 20.6103515648, 146.4774780415, 82.2849731584], [1.5357666166000001, 28.542419455999998, 16.334411625599998, 74.048278784], [14.4845581019, 28.172485375999997, 44.0819091801, 74.4182739456]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048305.jpg", "text": "Please tell me about the area in the image . What does it contain? Include the coordinates for each mentioned object.", "boxes_value": [[137.0122070031, 236.7609252864, 269.2225341853, 488.774719232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048305_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Include the coordinates for each mentioned object.", "boxes_value": [[34.0122070031, 63.76092528640001, 166.2225341853, 315.774719232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048305.jpg", "text": "Please tell me about the area in the image . What does it contain? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a desk, a chair, a cabinet, two flowers, three vases, and a carpet.", "boxes_value": [[137.0122070031, 236.7609252864, 269.2225341853, 488.774719232], [0, 281.1420288, 301.6768798789, 510.7891235328], [137.0122070031, 282.9913330176, 269.2225341853, 488.774719232], [133.1635131941, 254.2770996224, 309.47082520370003, 315.6013793792], [175.79162600089998, 208.5068969472, 223.7962035912, 261.3387451392], [213.8734741001, 213.0659789824, 261.07348631630003, 260.8023681536], [256.8259887716, 231.8351440384, 281.4549560441, 258.0403442176], [220.5721435296, 236.7609252864, 247.5654907271, 261.1928711168], [182.9391479509, 228.4855956992, 214.4641723461, 263.5572509696], [73.31463621270001, 385.1352539136, 568.4221191618, 511.162597632]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6, 7, 8], [9]]}, {"image_path": "objects365_v1_00048305_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a desk, a chair, a cabinet, two flowers, three vases, and a carpet.", "boxes_value": [[34.0122070031, 63.76092528640001, 166.2225341853, 315.774719232], [0, 108.14202879999999, 198.67687987890002, 337.7891235328], [34.0122070031, 109.99133301760003, 166.2225341853, 315.774719232], [30.163513194100005, 81.27709962239999, 199, 142.6013793792], [72.79162600089998, 35.506896947200005, 120.7962035912, 88.3387451392], [110.8734741001, 40.0659789824, 158.07348631630003, 87.80236815360001], [153.8259887716, 58.83514403839999, 178.45495604410002, 85.04034421760002], [117.5721435296, 63.76092528640001, 144.5654907271, 88.1928711168], [79.93914795090001, 55.48559569919999, 111.46417234610001, 90.55725096959998], [0, 212.13525391360002, 199, 338.162597632]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6, 7, 8], [9]]}, {"image_path": "objects365_v1_00048306.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Include the coordinates for each object you identify.", "boxes_value": [[0.9416198499999999, 43.43664551729999, 318.74749755, 440.6939697216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048306_crop.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Include the coordinates for each object you identify.", "boxes_value": [[0.9416198499999999, 43.43664551729999, 318.74749755, 440.6939697216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048306.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Include the coordinates for each object you identify. For your reference, objects involved in this region include two cabinets, two handbags, and a briefcase.", "boxes_value": [[0.9416198499999999, 43.43664551729999, 318.74749755, 440.6939697216], [0.9416198499999999, 43.43664551729999, 198.20043944999998, 440.6939697216], [194.54748535, 61.7013549978, 318.74749755, 421.51605223769997], [138.45654295, 73.8343505949, 168.8126831, 107.536804221], [41.651611349999996, 72.8782348518, 79.1784668, 110.4050903508], [38.440612800000004, 400.00268553, 94.89642334999999, 423.76263429150004]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048306_crop.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Include the coordinates for each object you identify. For your reference, objects involved in this region include two cabinets, two handbags, and a briefcase.", "boxes_value": [[0.9416198499999999, 43.43664551729999, 318.74749755, 440.6939697216], [0.9416198499999999, 43.43664551729999, 198.20043944999998, 440.6939697216], [194.54748535, 61.7013549978, 318.74749755, 421.51605223769997], [138.45654295, 73.8343505949, 168.8126831, 107.536804221], [41.651611349999996, 72.8782348518, 79.1784668, 110.4050903508], [38.440612800000004, 400.00268553, 94.89642334999999, 423.76263429150004]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048307.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Please mention the objects and their locations.", "boxes_value": [[23.919128386, 174.274291968, 429.769653353, 331.6706543104]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048307_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Please mention the objects and their locations.", "boxes_value": [[23.919128386, 40.274291968, 429.769653353, 197.67065431039998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048307.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include five lamps, and a handbag.", "boxes_value": [[23.919128386, 174.274291968, 429.769653353, 331.6706543104], [402.675048821, 174.274291968, 429.769653353, 210.23620608], [299.22296142749997, 182.6489868288, 321.391235362, 215.65515136], [203.65289304750002, 195.4573364224, 219.41705320999998, 223.0445556736], [122.861755358, 205.3099365376, 139.1185302565, 224.0298461696], [23.919128386, 204.9105224704, 38.7387084835, 229.0237426688], [208.009399408, 308.265380864, 229.13122555750002, 331.6706543104]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00048307_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include five lamps, and a handbag.", "boxes_value": [[23.919128386, 40.274291968, 429.769653353, 197.67065431039998], [402.675048821, 40.274291968, 429.769653353, 76.23620607999999], [299.22296142749997, 48.64898682879999, 321.391235362, 81.65515135999999], [203.65289304750002, 61.457336422400004, 219.41705320999998, 89.0445556736], [122.861755358, 71.30993653760001, 139.1185302565, 90.0298461696], [23.919128386, 70.9105224704, 38.7387084835, 95.0237426688], [208.009399408, 174.265380864, 229.13122555750002, 197.67065431039998]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00048309.jpg", "text": "What does the area within the given visual contain? Please mention the objects and their locations.", "boxes_value": [[74.1685180318, 342.5808715776, 210.143005382, 405.7756347904]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048309_crop.jpg", "text": "What does the area within the given visual contain? Please mention the objects and their locations.", "boxes_value": [[34.168518031800005, 16.58087157760002, 170.143005382, 79.77563479039998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048309.jpg", "text": "What does the area within the given visual contain? Please mention the objects and their locations. For your reference, objects involved in this region include three pillows, a bed, and a nightstand.", "boxes_value": [[74.1685180318, 342.5808715776, 210.143005382, 405.7756347904], [137.1518554406, 342.5808715776, 187.7283935198, 390.8584594944], [103.81726077729999, 347.1787109376, 161.2906493911, 393.7321777152], [52.0912475387, 301.2000121856, 392.90832519540004, 477.068542464], [175.08428958049998, 346.0292358144, 210.143005382, 371.8922729472], [74.1685180318, 368.8137206784, 131.1954345794, 405.7756347904]], "boxes_seq": [[0], [0], [1, 2, 5], [3], [4]]}, {"image_path": "objects365_v1_00048309_crop.jpg", "text": "What does the area within the given visual contain? Please mention the objects and their locations. For your reference, objects involved in this region include three pillows, a bed, and a nightstand.", "boxes_value": [[34.168518031800005, 16.58087157760002, 170.143005382, 79.77563479039998], [97.1518554406, 16.58087157760002, 147.7283935198, 64.8584594944], [63.81726077729999, 21.178710937599988, 121.2906493911, 67.73217771520001], [12.091247538700003, 0, 204, 95], [135.08428958049998, 20.02923581440001, 170.143005382, 45.89227294720001], [34.168518031800005, 42.81372067839999, 91.1954345794, 79.77563479039998]], "boxes_seq": [[0], [0], [1, 2, 5], [3], [4]]}, {"image_path": "objects365_v1_00048310.jpg", "text": "Describe the selected rectangular area in the photo . Please point out the objects and their coordinates.", "boxes_value": [[398.47216793760003, 128.6091918848, 569.0694580146001, 325.552062976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048310_crop.jpg", "text": "Describe the selected rectangular area in the photo . Please point out the objects and their coordinates.", "boxes_value": [[43.472167937600034, 49.6091918848, 214.06945801460006, 246.552062976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048310.jpg", "text": "Describe the selected rectangular area in the photo . Please point out the objects and their coordinates. For your reference, objects involved in this region include three lamps, a flower, and a vase.", "boxes_value": [[398.47216793760003, 128.6091918848, 569.0694580146001, 325.552062976], [550.9069824528, 128.6091918848, 569.0694580146001, 210.3402710016], [489.43408200479996, 139.7860717568, 503.4052734174, 214.5316162048], [493.6254882912, 176.1110229504, 529.950317355, 229.0471801856], [372.7253417634, 230.3488769536, 453.55822750799996, 310.5830077952], [398.47216793760003, 308.1879882752, 426.6140136588, 325.552062976]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048310_crop.jpg", "text": "Describe the selected rectangular area in the photo . Please point out the objects and their coordinates. For your reference, objects involved in this region include three lamps, a flower, and a vase.", "boxes_value": [[43.472167937600034, 49.6091918848, 214.06945801460006, 246.552062976], [195.90698245279998, 49.6091918848, 214.06945801460006, 131.3402710016], [134.43408200479996, 60.7860717568, 148.4052734174, 135.5316162048], [138.62548829119999, 97.11102295040001, 174.95031735500004, 150.0471801856], [17.725341763400024, 151.3488769536, 98.55822750799996, 231.5830077952], [43.472167937600034, 229.18798827519998, 71.61401365879999, 246.552062976]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048314.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Please mention the objects and their locations.", "boxes_value": [[23.6568603628, 317.1787109376, 297.34619140990003, 395.6229248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048314_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Please mention the objects and their locations.", "boxes_value": [[23.6568603628, 20.178710937599988, 297.34619140990003, 98.62292480000002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048314.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Please mention the objects and their locations. For your reference, objects involved in this region include two desks, a person, and two moniters.", "boxes_value": [[23.6568603628, 317.1787109376, 297.34619140990003, 395.6229248], [23.6568603628, 363.2949218816, 69.7243042267, 395.6229248], [66.087402376, 352.38421632, 98.01135252059998, 379.0548095488], [285.98480222909996, 317.1787109376, 297.34619140990003, 346.3937377792], [196.3095093084, 327.6840820224, 217.3690185451, 345.196716288], [164.8311157164, 329.0141601792, 185.89062502139998, 346.0834350592]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048314_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Please mention the objects and their locations. For your reference, objects involved in this region include two desks, a person, and two moniters.", "boxes_value": [[23.6568603628, 20.178710937599988, 297.34619140990003, 98.62292480000002], [23.6568603628, 66.29492188159998, 69.7243042267, 98.62292480000002], [66.087402376, 55.38421632000001, 98.01135252059998, 82.0548095488], [285.98480222909996, 20.178710937599988, 297.34619140990003, 49.39373777920002], [196.3095093084, 30.68408202239999, 217.3690185451, 48.196716288000005], [164.8311157164, 32.01416017920002, 185.89062502139998, 49.083435059199985]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048315.jpg", "text": "What's going on in the section of contained within the bounding box ? Include the coordinates for each mentioned object.", "boxes_value": [[129.25549313599998, 324.068969728, 282.210327168, 374.7783203328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048315_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Include the coordinates for each mentioned object.", "boxes_value": [[38.255493135999984, 13.068969728000013, 191.210327168, 63.77832033279998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048315.jpg", "text": "What's going on in the section of contained within the bounding box ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a sneakers, and five traffic cones.", "boxes_value": [[129.25549313599998, 324.068969728, 282.210327168, 374.7783203328], [144.226379374, 341.6569824256, 171.234619171, 392.7797241344], [129.25549313599998, 328.2029419008, 153.783386218, 367.337280256], [178.3112793, 331.785644544, 213.587341275, 371.4711913984], [227.64263914699998, 328.2029419008, 254.65087894400003, 367.0617065472], [251.619323725, 324.068969728, 268.155029296, 361.2742309376], [252.721740733, 333.71478272, 282.210327168, 374.7783203328]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048315_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a sneakers, and five traffic cones.", "boxes_value": [[38.255493135999984, 13.068969728000013, 191.210327168, 63.77832033279998], [53.226379374000004, 30.656982425600006, 80.23461917099999, 76], [38.255493135999984, 17.2029419008, 62.783386218000004, 56.337280255999985], [87.3112793, 20.78564454399998, 122.587341275, 60.47119139839998], [136.64263914699998, 17.2029419008, 163.65087894400003, 56.061706547200004], [160.619323725, 13.068969728000013, 177.155029296, 50.27423093760001], [161.721740733, 22.714782720000017, 191.210327168, 63.77832033279998]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048316.jpg", "text": "What can I find in the bbox of the provided image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[0, 248.6626587136, 265.3776855552, 453.0655517696]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048316_crop.jpg", "text": "What can I find in the bbox of the provided image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[0, 51.662658713599996, 265.3776855552, 256.0655517696]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048316.jpg", "text": "What can I find in the bbox of the provided image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four stools, and a faucet.", "boxes_value": [[0, 248.6626587136, 265.3776855552, 453.0655517696], [0, 315.9802856448, 48.975585945599995, 453.0655517696], [102.1591796736, 311.3955078144, 166.3462524672, 441.145080576], [170.9310302976, 310.0200195072, 228.24090577919998, 423.2644042752], [215.86199953919999, 306.4541015552, 265.3776855552, 406.4025268736], [64.9952392704, 248.6626587136, 89.4724121088, 269.1470947328]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048316_crop.jpg", "text": "What can I find in the bbox of the provided image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four stools, and a faucet.", "boxes_value": [[0, 51.662658713599996, 265.3776855552, 256.0655517696], [0, 118.9802856448, 48.975585945599995, 256.0655517696], [102.1591796736, 114.39550781439999, 166.3462524672, 244.145080576], [170.9310302976, 113.0200195072, 228.24090577919998, 226.26440427519998], [215.86199953919999, 109.4541015552, 265.3776855552, 209.40252687359998], [64.9952392704, 51.662658713599996, 89.4724121088, 72.14709473279999]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048317.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Include the coordinates for each mentioned object.", "boxes_value": [[325.35021973050004, 185.8685913088, 696.9190673851999, 251.9623413248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048317_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Include the coordinates for each mentioned object.", "boxes_value": [[93.35021973050004, 16.868591308800006, 464.9190673851999, 82.96234132480001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048317.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a cabinet, a faucet, a gas stove, and two pots.", "boxes_value": [[325.35021973050004, 185.8685913088, 696.9190673851999, 251.9623413248], [325.35021973050004, 221.930603008, 358.616821262, 251.1586303488], [438.92199709380003, 185.8685913088, 457.7176514001, 212.9962768384], [526.3055419932999, 235.2915039232, 646.198730459, 251.9623413248], [538.4090576052, 217.0220337152, 566.2698974283, 239.630493184], [657.756591801, 212.9952392704, 696.9190673851999, 237.4434814464]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048317_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a cabinet, a faucet, a gas stove, and two pots.", "boxes_value": [[93.35021973050004, 16.868591308800006, 464.9190673851999, 82.96234132480001], [93.35021973050004, 52.93060300799999, 126.61682126199997, 82.15863034879999], [206.92199709380003, 16.868591308800006, 225.7176514001, 43.99627683840001], [294.3055419932999, 66.2915039232, 414.198730459, 82.96234132480001], [306.4090576052, 48.022033715199996, 334.26989742830006, 70.63049318399999], [425.756591801, 43.99523927039999, 464.9190673851999, 68.4434814464]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048318.jpg", "text": "Kindly share your observations about the rectangular region within . Include the coordinates for each mentioned object.", "boxes_value": [[123.01568600280001, 320.2017822208, 280.3876953528, 410.5803222528]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048318_crop.jpg", "text": "Kindly share your observations about the rectangular region within . Include the coordinates for each mentioned object.", "boxes_value": [[40.01568600280001, 23.2017822208, 197.3876953528, 113.58032225279999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048318.jpg", "text": "Kindly share your observations about the rectangular region within . Include the coordinates for each mentioned object. For your reference, objects involved in this region include four people, and a glasses.", "boxes_value": [[123.01568600280001, 320.2017822208, 280.3876953528, 410.5803222528], [208.6340332296, 362.8490600448, 264.813232398, 410.5803222528], [263.0025634368, 329.5817260544, 280.3876953528, 374.3042602496], [201.5208739848, 338.4447631872, 212.6192626908, 370.1822509568], [123.01568600280001, 320.2017822208, 142.7567138352, 368.7238769664], [224.01977538719999, 383.3228149248, 262.71875003639997, 394.6100463616]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048318_crop.jpg", "text": "Kindly share your observations about the rectangular region within . Include the coordinates for each mentioned object. For your reference, objects involved in this region include four people, and a glasses.", "boxes_value": [[40.01568600280001, 23.2017822208, 197.3876953528, 113.58032225279999], [125.63403322959999, 65.84906004480001, 181.81323239800003, 113.58032225279999], [180.00256343680002, 32.58172605440001, 197.3876953528, 77.30426024960002], [118.5208739848, 41.44476318720001, 129.6192626908, 73.18225095679998], [40.01568600280001, 23.2017822208, 59.7567138352, 71.72387696639998], [141.01977538719999, 86.32281492480001, 179.71875003639997, 97.61004636159998]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048319.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Provide the coordinates for all objects that you mention.", "boxes_value": [[90.7659301888, 11.816345204700001, 334.0534057472, 668.2154540788]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048319_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Provide the coordinates for all objects that you mention.", "boxes_value": [[61.7659301888, 11.816345204700001, 305.0534057472, 668.2154540788]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048319.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two people, two bracelets, a hat, and four leather shoes.", "boxes_value": [[90.7659301888, 11.816345204700001, 334.0534057472, 668.2154540788], [204.3001098752, 145.3859252753, 462.8526611456, 672.0317382828], [90.7659301888, 11.816345204700001, 334.0534057472, 668.2154540788], [215.5444946432, 53.1138916114, 242.5310058496, 87.4603271812], [276.0596923904, 327.8856200929, 312.0416869888, 353.23657224789997], [263.2786865152, 145.3350219838, 349.1775512576, 187.7628173856], [158.1963501056, 606.6025390620999, 218.4180908032, 645.2275390787], [167.3334350336, 643.5662841858, 214.2648925696, 668.9008789376], [266.59552, 590.8203124895, 315.6035766784, 673.0540771336999], [298.5753174016, 619.0622558489, 383.3011474432, 658.1025390391]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6, 7, 8, 9]]}, {"image_path": "objects365_v1_00048319_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two people, two bracelets, a hat, and four leather shoes.", "boxes_value": [[61.7659301888, 11.816345204700001, 305.0534057472, 668.2154540788], [175.3001098752, 145.3859252753, 365, 672.0317382828], [61.7659301888, 11.816345204700001, 305.0534057472, 668.2154540788], [186.5444946432, 53.1138916114, 213.5310058496, 87.4603271812], [247.05969239040002, 327.8856200929, 283.0416869888, 353.23657224789997], [234.27868651519998, 145.3350219838, 320.1775512576, 187.7628173856], [129.1963501056, 606.6025390620999, 189.4180908032, 645.2275390787], [138.3334350336, 643.5662841858, 185.2648925696, 668.9008789376], [237.59552000000002, 590.8203124895, 286.6035766784, 673.0540771336999], [269.5753174016, 619.0622558489, 354.3011474432, 658.1025390391]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6, 7, 8, 9]]}, {"image_path": "objects365_v1_00048321.jpg", "text": "Describe the bbox in the provided photo . Provide the coordinates for all objects that you mention.", "boxes_value": [[437.9870605772, 249.736511232, 681.8597411986, 511.5159301632]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048321_crop.jpg", "text": "Describe the bbox in the provided photo . Provide the coordinates for all objects that you mention.", "boxes_value": [[60.9870605772, 65.736511232, 304.8597411986, 327.5159301632]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048321.jpg", "text": "Describe the bbox in the provided photo . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three people, a watch, and a bracelet.", "boxes_value": [[437.9870605772, 249.736511232, 681.8597411986, 511.5159301632], [380.8560791198, 68.9637451264, 632.4030761452, 511.5159301632], [132.7198486555, 74.0799560704, 643.4881591786, 511.5159301632], [437.9870605772, 408.3389892608, 479.7694091583, 457.795715328], [605.1165771586001, 276.1702270464, 623.875976596, 306.8674926592], [603.4111327836999, 249.736511232, 681.8597411986, 511.5159301632]], "boxes_seq": [[0], [0], [1, 2, 5], [3], [4]]}, {"image_path": "objects365_v1_00048321_crop.jpg", "text": "Describe the bbox in the provided photo . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three people, a watch, and a bracelet.", "boxes_value": [[60.9870605772, 65.736511232, 304.8597411986, 327.5159301632], [3.856079119799972, 0, 255.40307614519998, 327.5159301632], [0, 0, 266.4881591786, 327.5159301632], [60.9870605772, 224.33898926080002, 102.7694091583, 273.795715328], [228.11657715860008, 92.17022704639999, 246.875976596, 122.86749265920002], [226.41113278369994, 65.736511232, 304.8597411986, 327.5159301632]], "boxes_seq": [[0], [0], [1, 2, 5], [3], [4]]}, {"image_path": "objects365_v1_00048323.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each object you identify.", "boxes_value": [[361.9085693184, 94.405517568, 599.847168, 290.2282714624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048323_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each object you identify.", "boxes_value": [[59.9085693184, 49.40551756799999, 297.847168, 245.22827146240002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048323.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each object you identify. For your reference, objects involved in this region include a lamp, and four cabinets.", "boxes_value": [[361.9085693184, 94.405517568, 599.847168, 290.2282714624], [466.4793701376, 94.405517568, 552.2681884416, 116.29949952], [568.2247314432, 171.9379883008, 599.847168, 193.222961408], [422.41845703679996, 233.9664306688, 463.4261474304, 271.0686034944], [364.1304931584, 173.7351074304, 422.2183838208, 290.2282714624], [361.9085693184, 128.0266113536, 421.26611328, 180.0834960896]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048323_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each object you identify. For your reference, objects involved in this region include a lamp, and four cabinets.", "boxes_value": [[59.9085693184, 49.40551756799999, 297.847168, 245.22827146240002], [164.47937013759997, 49.40551756799999, 250.26818844160005, 71.29949952], [266.22473144319997, 126.93798830079999, 297.847168, 148.222961408], [120.41845703679996, 188.9664306688, 161.42614743040002, 226.0686034944], [62.130493158399986, 128.7351074304, 120.2183838208, 245.22827146240002], [59.9085693184, 83.0266113536, 119.26611328000001, 135.0834960896]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048325.jpg", "text": "Describe the selected rectangular area in the photo . Please point out the objects and their coordinates.", "boxes_value": [[440.20092771839995, 169.2290039296, 541.4200439808, 303.3530883584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048325_crop.jpg", "text": "Describe the selected rectangular area in the photo . Please point out the objects and their coordinates.", "boxes_value": [[26.200927718399953, 34.2290039296, 127.42004398079996, 168.3530883584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048325.jpg", "text": "Describe the selected rectangular area in the photo . Please point out the objects and their coordinates. For your reference, objects involved in this region include a car, two suvs, a bicycle, and a dog.", "boxes_value": [[440.20092771839995, 169.2290039296, 541.4200439808, 303.3530883584], [440.20092771839995, 202.9686889472, 455.383911168, 246.8303222784], [441.88793948160003, 138.8632812544, 483.2191162368, 209.7166137856], [527.9241943296, 169.2290039296, 541.4200439808, 224.0560302592], [500.08898926079996, 216.4645996032, 538.0461425664, 282.2570190336], [499.48730465279993, 254.3530883584, 536.2238769408, 303.3530883584]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048325_crop.jpg", "text": "Describe the selected rectangular area in the photo . Please point out the objects and their coordinates. For your reference, objects involved in this region include a car, two suvs, a bicycle, and a dog.", "boxes_value": [[26.200927718399953, 34.2290039296, 127.42004398079996, 168.3530883584], [26.200927718399953, 67.96868894720001, 41.383911168, 111.83032227839999], [27.88793948160003, 3.8632812543999933, 69.2191162368, 74.7166137856], [113.92419432960003, 34.2290039296, 127.42004398079996, 89.05603025920001], [86.08898926079996, 81.46459960320001, 124.04614256640002, 147.2570190336], [85.48730465279993, 119.35308835839999, 122.22387694079998, 168.3530883584]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048326.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Include the coordinates for each object you identify.", "boxes_value": [[182.2437133695, 138.1534424064, 431.2585449, 345.374023424]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048326_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Include the coordinates for each object you identify.", "boxes_value": [[63.2437133695, 52.15344240639999, 312.2585449, 259.374023424]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048326.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two potted plants, a vase, a picture, and a barrel.", "boxes_value": [[182.2437133695, 138.1534424064, 431.2585449, 345.374023424], [178.2541503945, 232.7511596544, 209.688354513, 289.8369140736], [378.136596672, 313.2244873216, 390.384033237, 345.374023424], [339.8446045215, 210.751098624, 362.8218994155, 237.2634277376], [371.495239227, 138.1534424064, 431.2585449, 264.8518676992], [182.2437133695, 263.5437011968, 207.20605466700002, 289.1868285952]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5]]}, {"image_path": "objects365_v1_00048326_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two potted plants, a vase, a picture, and a barrel.", "boxes_value": [[63.2437133695, 52.15344240639999, 312.2585449, 259.374023424], [59.25415039449999, 146.7511596544, 90.68835451300001, 203.83691407359998], [259.136596672, 227.2244873216, 271.384033237, 259.374023424], [220.8446045215, 124.75109862400001, 243.8218994155, 151.2634277376], [252.495239227, 52.15344240639999, 312.2585449, 178.8518676992], [63.2437133695, 177.5437011968, 88.20605466700002, 203.18682859519998]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5]]}, {"image_path": "objects365_v1_00048328.jpg", "text": "Kindly describe what I should be seeing in the area of image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[735.0404052447, 182.8359374848, 908.9105224941001, 313.8104247808]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048328_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[44.04040524469997, 32.83593748480001, 217.91052249410006, 163.81042478080002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048328.jpg", "text": "Kindly describe what I should be seeing in the area of image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, a flower, and two ties.", "boxes_value": [[735.0404052447, 182.8359374848, 908.9105224941001, 313.8104247808], [857.7113037462, 182.8359374848, 908.9105224941001, 290.1891479552], [763.9176025089, 197.6760864256, 833.0714111637, 298.5413818368], [757.1468505609, 244.0252075008, 774.1483154262, 263.4946899456], [873.1314697572001, 266.1480712704, 909.0269775747, 329.0106201088], [735.0404052447, 260.7318115328, 797.9180908473, 313.8104247808]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048328_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, a flower, and two ties.", "boxes_value": [[44.04040524469997, 32.83593748480001, 217.91052249410006, 163.81042478080002], [166.71130374619997, 32.83593748480001, 217.91052249410006, 140.18914795519999], [72.91760250890002, 47.676086425600005, 142.07141116369996, 148.54138183679999], [66.14685056090002, 94.02520750080001, 83.14831542620004, 113.49468994559999], [182.13146975720008, 116.14807127040001, 218, 179.01062010880003], [44.04040524469997, 110.73181153280001, 106.91809084730005, 163.81042478080002]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048330.jpg", "text": "What details can you provide about the region in the snapshot ? Give coordinates for the items you reference.", "boxes_value": [[488.9381187561, 42.2342885888, 689.2174071978, 112.0527764992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048330_crop.jpg", "text": "What details can you provide about the region in the snapshot ? Give coordinates for the items you reference.", "boxes_value": [[50.93811875609998, 18.2342885888, 251, 88.0527764992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048330.jpg", "text": "What details can you provide about the region in the snapshot ? Give coordinates for the items you reference. For your reference, objects involved in this region include a cabinet, a person, a hat, a cup, and a bowl.", "boxes_value": [[488.9381187561, 42.2342885888, 689.2174071978, 112.0527764992], [573.6938476282, 20.0592651264, 689.0634765364999, 384.5886230528], [461.39257811010003, 42.3777465856, 627.6435546706, 396.0750121984], [488.9381187561, 42.2342885888, 569.2293798710999, 112.0527764992], [662.1009521190999, 64.61810304, 683.3107909914, 84.7540283392], [637.937866189, 80.7268066304, 689.2174071978, 104.8899536384]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048330_crop.jpg", "text": "What details can you provide about the region in the snapshot ? Give coordinates for the items you reference. For your reference, objects involved in this region include a cabinet, a person, a hat, a cup, and a bowl.", "boxes_value": [[50.93811875609998, 18.2342885888, 251, 88.0527764992], [135.69384762820005, 0, 251, 105], [23.392578110100033, 18.3777465856, 189.6435546706, 105], [50.93811875609998, 18.2342885888, 131.22937987109992, 88.0527764992], [224.10095211909993, 40.618103039999994, 245.31079099140004, 60.754028339200005], [199.93786618900003, 56.726806630400006, 251, 80.8899536384]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048331.jpg", "text": "What's going on in the section of contained within the bounding box ? Provide the coordinates for each element you describe.", "boxes_value": [[386.6076660367, 158.3309326336, 466.9190674103, 325.7100219904]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048331_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Provide the coordinates for each element you describe.", "boxes_value": [[20.60766603669998, 42.330932633600014, 100.91906741029999, 209.71002199039998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048331.jpg", "text": "What's going on in the section of contained within the bounding box ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a cabinet, a picture, and three stuffed toys.", "boxes_value": [[386.6076660367, 158.3309326336, 466.9190674103, 325.7100219904], [364.2044677463, 132.5980224512, 467.0341796839, 386.5563354624], [411.23864743160004, 254.5856323072, 435.5092773435, 277.537170432], [422.4881591634, 158.3309326336, 447.0845947598, 191.5766601728], [386.6076660367, 284.9513549824, 412.89575193509995, 325.7100219904], [447.3839111089, 272.4102782976, 466.9190674103, 292.186645504]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048331_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a cabinet, a picture, and three stuffed toys.", "boxes_value": [[20.60766603669998, 42.330932633600014, 100.91906741029999, 209.71002199039998], [0, 16.598022451199995, 101.03417968389999, 251], [45.23864743160004, 138.5856323072, 69.5092773435, 161.53717043199998], [56.488159163399985, 42.330932633600014, 81.08459475979998, 75.57666017279999], [20.60766603669998, 168.9513549824, 46.89575193509995, 209.71002199039998], [81.38391110890001, 156.41027829759997, 100.91906741029999, 176.186645504]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048332.jpg", "text": "What can you tell me about the area within the image ? Include the coordinates for each mentioned object.", "boxes_value": [[425.205078144, 68.452453632, 480.6402587904, 246.583251968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048332_crop.jpg", "text": "What can you tell me about the area within the image ? Include the coordinates for each mentioned object.", "boxes_value": [[14.205078144000026, 45.452453632, 69.64025879040003, 223.583251968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048332.jpg", "text": "What can you tell me about the area within the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a baseball bat, a person, a helmet, and two gloves.", "boxes_value": [[425.205078144, 68.452453632, 480.6402587904, 246.583251968], [394.13000486399994, 51.6029052928, 462.05773923839996, 249.03485107200004], [332.7370605312, 69.4324340736, 481.32287600639995, 451.802429184], [425.205078144, 68.452453632, 480.6402587904, 129.998535168], [412.7115478272, 201.045043968, 460.81518551039994, 229.9072876032], [442.856445312, 215.796875008, 469.0594482432, 246.583251968]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048332_crop.jpg", "text": "What can you tell me about the area within the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a baseball bat, a person, a helmet, and two gloves.", "boxes_value": [[14.205078144000026, 45.452453632, 69.64025879040003, 223.583251968], [0, 28.602905292800003, 51.05773923839996, 226.03485107200004], [0, 46.43243407360001, 70.32287600639995, 268], [14.205078144000026, 45.452453632, 69.64025879040003, 106.99853516799999], [1.711547827199979, 178.045043968, 49.81518551039994, 206.9072876032], [31.856445312000005, 192.796875008, 58.059448243199995, 223.583251968]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048334.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for all objects that you mention.", "boxes_value": [[639.8017577967, 174.834655744, 770.269653288, 388.5308837888]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048334_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for all objects that you mention.", "boxes_value": [[32.80175779670003, 53.834655744, 163.26965328799997, 267.5308837888]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048334.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two pictures, and three people.", "boxes_value": [[639.8017577967, 174.834655744, 770.269653288, 388.5308837888], [682.6708984089, 175.9649658368, 717.1452636519, 246.0439453184], [744.2725830045, 174.834655744, 770.269653288, 218.3514404352], [639.8017577967, 274.1984252928, 705.2330322369, 388.5308837888], [725.2067871468, 200.502197248, 758.2667236689, 256.9796752896], [631.4383544946, 212.5061035008, 771.3530273304001, 512.1579589632]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048334_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two pictures, and three people.", "boxes_value": [[32.80175779670003, 53.834655744, 163.26965328799997, 267.5308837888], [75.67089840890003, 54.96496583679999, 110.1452636519, 125.0439453184], [137.2725830045, 53.834655744, 163.26965328799997, 97.3514404352], [32.80175779670003, 153.19842529279998, 98.23303223690004, 267.5308837888], [118.20678714680002, 79.50219724799999, 151.2667236689, 135.97967528959998], [24.43835449460005, 91.50610350080001, 164, 320]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048336.jpg", "text": "Describe the image content present in the specified rectangular area of . Please mention the objects and their locations.", "boxes_value": [[553.8547363584, 123.2893676544, 768.0, 235.2380981248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048336_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Please mention the objects and their locations.", "boxes_value": [[53.85473635840003, 28.289367654399996, 268, 140.2380981248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048336.jpg", "text": "Describe the image content present in the specified rectangular area of . Please mention the objects and their locations. For your reference, objects involved in this region include four people, and a train.", "boxes_value": [[553.8547363584, 123.2893676544, 768.0, 235.2380981248], [553.8547363584, 125.5595092992, 590.3959961088, 234.5462646272], [660.2694091776, 123.2893676544, 708.4365234432, 228.5129394688], [706.5760498176, 130.9382324224, 744.2001953280001, 222.1044311552], [717.643066368, 154.5378418176, 768.0, 235.2380981248], [25.5114135552, 0, 767.8652343552, 295.4555663872]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048336_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Please mention the objects and their locations. For your reference, objects involved in this region include four people, and a train.", "boxes_value": [[53.85473635840003, 28.289367654399996, 268, 140.2380981248], [53.85473635840003, 30.559509299200002, 90.39599610879998, 139.5462646272], [160.26940917759998, 28.289367654399996, 208.43652344320003, 133.5129394688], [206.57604981760005, 35.938232422400006, 244.20019532800006, 127.10443115519999], [217.64306636799995, 59.5378418176, 268, 140.2380981248], [0, 0, 267.86523435519996, 168]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048338.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Give coordinates for the items you reference.", "boxes_value": [[92.16351314959999, 187.9144287232, 560.3673095955, 280.8997802496]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048338_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Give coordinates for the items you reference.", "boxes_value": [[92.16351314959999, 23.91442872319999, 560.3673095955, 116.89978024959998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048338.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Give coordinates for the items you reference. For your reference, objects involved in this region include six chairs, a desk, and a potted plant.", "boxes_value": [[92.16351314959999, 187.9144287232, 560.3673095955, 280.8997802496], [217.4882202479, 230.8476562432, 280.6066284211, 329.3275757056], [182.5069580338, 222.102355968, 238.7811889316, 315.2590331904], [320.1506958256, 224.0034789888, 380.6075439601, 279.1370239488], [273.382263192, 220.5814209024, 322.05187988660003, 276.095214848], [219.00915526449998, 228.5662841856, 354.7517089953, 321.3427124224], [92.16351314959999, 198.6353759744, 170.14550782819998, 280.8997802496], [542.7808838068, 187.9144287232, 560.3673095955, 246.2498779136], [253.35711669480003, 188.7488403456, 308.5569458032, 238.5934448128]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 6, 7], [5], [8]]}, {"image_path": "objects365_v1_00048338_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Give coordinates for the items you reference. For your reference, objects involved in this region include six chairs, a desk, and a potted plant.", "boxes_value": [[92.16351314959999, 23.91442872319999, 560.3673095955, 116.89978024959998], [217.4882202479, 66.84765624319999, 280.6066284211, 140], [182.5069580338, 58.10235596800001, 238.7811889316, 140], [320.1506958256, 60.0034789888, 380.6075439601, 115.13702394879999], [273.382263192, 56.5814209024, 322.05187988660003, 112.09521484800001], [219.00915526449998, 64.56628418560001, 354.7517089953, 140], [92.16351314959999, 34.635375974400006, 170.14550782819998, 116.89978024959998], [542.7808838068, 23.91442872319999, 560.3673095955, 82.24987791359999], [253.35711669480003, 24.7488403456, 308.5569458032, 74.59344481279999]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 6, 7], [5], [8]]}, {"image_path": "objects365_v1_00048339.jpg", "text": "Within the input image , what can be found in the region defined by ? Give coordinates for the items you reference.", "boxes_value": [[39.970430208, 192.4573022044, 105.9868668416, 547.69875941]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048339_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Give coordinates for the items you reference.", "boxes_value": [[16.970430208000003, 89.45730220440001, 82.9868668416, 444.69875941]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048339.jpg", "text": "Within the input image , what can be found in the region defined by ? Give coordinates for the items you reference. For your reference, objects involved in this region include a person, two sandals, a hat, and a donut.", "boxes_value": [[39.970430208, 192.4573022044, 105.9868668416, 547.69875941], [0.5470581248, 193.01794430599998, 137.13006592, 548.556030278], [56.0311487488, 482.2335324968, 93.7037713408, 505.02224213439996], [39.970430208, 522.4829877144, 78.7289443328, 547.69875941], [62.2395207168, 192.4573022044, 105.9868668416, 216.95469082120002], [60.542297344, 277.333740208, 406.9959716864, 531.589233388]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048339_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Give coordinates for the items you reference. For your reference, objects involved in this region include a person, two sandals, a hat, and a donut.", "boxes_value": [[16.970430208000003, 89.45730220440001, 82.9868668416, 444.69875941], [0, 90.01794430599998, 99, 445.556030278], [33.0311487488, 379.2335324968, 70.7037713408, 402.02224213439996], [16.970430208000003, 419.4829877144, 55.7289443328, 444.69875941], [39.2395207168, 89.45730220440001, 82.9868668416, 113.95469082120002], [37.542297344, 174.333740208, 99, 428.589233388]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048340.jpg", "text": "Kindly describe what I should be seeing in the area of image . Provide the coordinates for all objects that you mention.", "boxes_value": [[272.0919189504, 581.1776123376001, 393.4225463808, 731.9624023572001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048340_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Provide the coordinates for all objects that you mention.", "boxes_value": [[31.091918950399986, 38.17761233760007, 152.4225463808, 188.96240235720006]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048340.jpg", "text": "Kindly describe what I should be seeing in the area of image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three people, a handbag, and a hat.", "boxes_value": [[272.0919189504, 581.1776123376001, 393.4225463808, 731.9624023572001], [346.419799808, 590.9542236624001, 393.4225463808, 731.9624023572001], [288.5124511744, 581.1776123376001, 349.427978496, 731.210327184], [258.43072512, 592.08227541, 298.6650390528, 703.7607421548], [272.0919189504, 670.7319335724, 297.1283569152, 728.6866454808], [364.3558349824, 591.6043701048, 387.9945068544, 608.1407470596]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048340_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three people, a handbag, and a hat.", "boxes_value": [[31.091918950399986, 38.17761233760007, 152.4225463808, 188.96240235720006], [105.419799808, 47.95422366240007, 152.4225463808, 188.96240235720006], [47.512451174399985, 38.17761233760007, 108.42797849599998, 188.210327184], [17.430725119999977, 49.082275409999966, 57.66503905280001, 160.76074215480003], [31.091918950399986, 127.73193357239995, 56.128356915200015, 185.68664548080005], [123.35583498239998, 48.604370104799955, 146.99450685440002, 65.14074705960002]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048341.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Provide the coordinates for each element you describe.", "boxes_value": [[146.327209472, 13.2250366464, 481.6613159424, 201.06433105920001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048341_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Provide the coordinates for each element you describe.", "boxes_value": [[84.32720947199999, 13.2250366464, 419.6613159424, 201.06433105920001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048341.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a helmet, two cars, and two street lights.", "boxes_value": [[146.327209472, 13.2250366464, 481.6613159424, 201.06433105920001], [212.8392944128, 90.44488527360001, 286.5855712768, 201.06433105920001], [448.7326660096, 140.8293457152, 481.6613159424, 154.47540280319998], [146.327209472, 135.6403808256, 173.9362182656, 153.20977781759998], [335.740600576, 13.2250366464, 390.5980224512, 160.416748032], [368.7573242368, 45.6574706688, 404.8707885568, 153.0372924672]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048341_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a helmet, two cars, and two street lights.", "boxes_value": [[84.32720947199999, 13.2250366464, 419.6613159424, 201.06433105920001], [150.8392944128, 90.44488527360001, 224.5855712768, 201.06433105920001], [386.7326660096, 140.8293457152, 419.6613159424, 154.47540280319998], [84.32720947199999, 135.6403808256, 111.93621826559999, 153.20977781759998], [273.740600576, 13.2250366464, 328.5980224512, 160.416748032], [306.7573242368, 45.6574706688, 342.8707885568, 153.0372924672]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048342.jpg", "text": "Can you provide some context for the area within the picture ? Please point out the objects and their coordinates.", "boxes_value": [[387.1065673955, 453.86163328, 574.7578125277, 512.4119873024]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048342_crop.jpg", "text": "Can you provide some context for the area within the picture ? Please point out the objects and their coordinates.", "boxes_value": [[47.10656739550001, 14.861633279999978, 234.75781252770003, 73]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048342.jpg", "text": "Can you provide some context for the area within the picture ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, two sneakers, and two gloves.", "boxes_value": [[387.1065673955, 453.86163328, 574.7578125277, 512.4119873024], [478.861450225, 218.393981952, 776.6574706686, 504.1220703232], [533.6806640562, 441.0250244096, 594.1552734457, 488.092590336], [531.3985596008, 453.86163328, 574.7578125277, 503.2113037312], [404.89074705039997, 464.7182006784, 454.20129397970004, 484.9274292224], [387.1065673955, 483.5801391616, 421.05810548700003, 512.4119873024]], "boxes_seq": [[0], [0], [1], [2, 4], [3, 5]]}, {"image_path": "objects365_v1_00048342_crop.jpg", "text": "Can you provide some context for the area within the picture ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, two sneakers, and two gloves.", "boxes_value": [[47.10656739550001, 14.861633279999978, 234.75781252770003, 73], [138.861450225, 0, 281, 65.12207032319998], [193.68066405620004, 2.0250244096000074, 254.1552734457, 49.092590336], [191.3985596008, 14.861633279999978, 234.75781252770003, 64.21130373120002], [64.89074705039997, 25.718200678400024, 114.20129397970004, 45.92742922240001], [47.10656739550001, 44.580139161600016, 81.05810548700003, 73]], "boxes_seq": [[0], [0], [1], [2, 4], [3, 5]]}, {"image_path": "objects365_v1_00048346.jpg", "text": "What can you tell me about the area within the image ? Please mention the objects and their locations.", "boxes_value": [[0.24755857920000002, 119.0968627712, 385.7314453248, 511.9676513792]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048346_crop.jpg", "text": "What can you tell me about the area within the image ? Please mention the objects and their locations.", "boxes_value": [[0.24755857920000002, 99.0968627712, 385.7314453248, 491.9676513792]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048346.jpg", "text": "What can you tell me about the area within the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a radiator, three people, and two boots.", "boxes_value": [[0.24755857920000002, 119.0968627712, 385.7314453248, 511.9676513792], [38.2808227584, 119.0968627712, 80.765014656, 145.55847168], [0.24755857920000002, 345.98803712, 153.47930910719998, 511.9676513792], [234.24487303680002, 178.7738037248, 297.6070556928, 390.345153792], [293.2373046528, 184.9643554816, 385.7314453248, 412.5582885888], [180.054052608, 258.6229746688, 196.54280186879998, 283.7040717312], [161.539314048, 260.59540224, 173.320818816, 287.5424608768]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00048346_crop.jpg", "text": "What can you tell me about the area within the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a radiator, three people, and two boots.", "boxes_value": [[0.24755857920000002, 99.0968627712, 385.7314453248, 491.9676513792], [38.2808227584, 99.0968627712, 80.765014656, 125.55847168], [0.24755857920000002, 325.98803712, 153.47930910719998, 491.9676513792], [234.24487303680002, 158.7738037248, 297.6070556928, 370.345153792], [293.2373046528, 164.9643554816, 385.7314453248, 392.5582885888], [180.054052608, 238.6229746688, 196.54280186879998, 263.7040717312], [161.539314048, 240.59540224, 173.320818816, 267.5424608768]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00048349.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Provide the coordinates for each element you describe.", "boxes_value": [[420.7553710848, 94.8651733504, 621.5843506176, 291.1568603648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048349_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Provide the coordinates for each element you describe.", "boxes_value": [[50.755371084800004, 49.8651733504, 251.58435061759997, 246.15686036480002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048349.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a picture, a towel, a bowl, two wine glasses, and a bottle.", "boxes_value": [[420.7553710848, 94.8651733504, 621.5843506176, 291.1568603648], [476.52832028160003, 94.8651733504, 621.5843506176, 241.899230976], [427.78356933119994, 276.3377685504, 455.9095458816, 291.1568603648], [467.11474606080003, 264.6149292032, 500.1306152448, 280.3551025152], [490.1490478848, 240.4288940544, 512.2236327936, 259.240295424], [469.80200194560007, 237.5495605248, 486.118041984, 264.2310180864], [420.7553710848, 195.3616943104, 470.81237790719996, 277.540832512]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048349_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a picture, a towel, a bowl, two wine glasses, and a bottle.", "boxes_value": [[50.755371084800004, 49.8651733504, 251.58435061759997, 246.15686036480002], [106.52832028160003, 49.8651733504, 251.58435061759997, 196.899230976], [57.78356933119994, 231.33776855040003, 85.90954588160002, 246.15686036480002], [97.11474606080003, 219.6149292032, 130.13061524480003, 235.35510251519997], [120.14904788479998, 195.4288940544, 142.22363279360002, 214.240295424], [99.80200194560007, 192.5495605248, 116.118041984, 219.2310180864], [50.755371084800004, 150.3616943104, 100.81237790719996, 232.540832512]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048351.jpg", "text": "Please enlighten me about the region in the given photo . Specify the location of each mentioned object.", "boxes_value": [[0.1323852288, 150.6890258944, 317.5510253568, 230.4998779392]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048351_crop.jpg", "text": "Please enlighten me about the region in the given photo . Specify the location of each mentioned object.", "boxes_value": [[0.1323852288, 20.689025894400004, 317.5510253568, 100.49987793919999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048351.jpg", "text": "Please enlighten me about the region in the given photo . Specify the location of each mentioned object. For your reference, objects involved in this region include two people, two trucks, a bus, and a street lights.", "boxes_value": [[0.1323852288, 150.6890258944, 317.5510253568, 230.4998779392], [111.3113403648, 165.5609131008, 134.00915527680002, 209.210571264], [0.1323852288, 150.6890258944, 18.4224243456, 202.4057617408], [20.9929198848, 202.2715453952, 90.197937024, 230.4998779392], [178.6126708992, 213.2905273344, 202.9877929728, 229.3084716544], [205.7735596032, 197.2725830144, 301.1848144896, 240.7996215808], [311.63134763520003, 154.4420166144, 317.5510253568, 219.906677248]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048351_crop.jpg", "text": "Please enlighten me about the region in the given photo . Specify the location of each mentioned object. For your reference, objects involved in this region include two people, two trucks, a bus, and a street lights.", "boxes_value": [[0.1323852288, 20.689025894400004, 317.5510253568, 100.49987793919999], [111.3113403648, 35.56091310080001, 134.00915527680002, 79.21057126400001], [0.1323852288, 20.689025894400004, 18.4224243456, 72.40576174079999], [20.9929198848, 72.27154539520001, 90.197937024, 100.49987793919999], [178.6126708992, 83.2905273344, 202.9877929728, 99.3084716544], [205.7735596032, 67.2725830144, 301.1848144896, 110.7996215808], [311.63134763520003, 24.44201661439999, 317.5510253568, 89.906677248]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048352.jpg", "text": "Describe the bbox in the provided photo . Give coordinates for the items you reference.", "boxes_value": [[98.6528320512, 277.97924807519996, 378.6321411072, 639.4715575920001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048352_crop.jpg", "text": "Describe the bbox in the provided photo . Give coordinates for the items you reference.", "boxes_value": [[70.6528320512, 90.97924807519996, 350.6321411072, 452.47155759200007]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048352.jpg", "text": "Describe the bbox in the provided photo . Give coordinates for the items you reference. For your reference, objects involved in this region include a carpet, a gun, a person, a glasses, and two gloves.", "boxes_value": [[98.6528320512, 277.97924807519996, 378.6321411072, 639.4715575920001], [0.0420532224, 466.8963623184, 339.3140258816, 869.6577147984], [240.5471191552, 326.4145507392, 363.7929687552, 413.2073974704], [98.6528320512, 277.97924807519996, 378.6321411072, 639.4715575920001], [150.3093261824, 325.1744384928, 240.9221191168, 383.0356445616], [174.2435913216, 447.4193115648, 338.6700439552, 597.0073242192], [185.4727172608, 394.4820556944, 321.8263549952, 505.1690674032]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048352_crop.jpg", "text": "Describe the bbox in the provided photo . Give coordinates for the items you reference. For your reference, objects involved in this region include a carpet, a gun, a person, a glasses, and two gloves.", "boxes_value": [[70.6528320512, 90.97924807519996, 350.6321411072, 452.47155759200007], [0, 279.8963623184, 311.3140258816, 542], [212.5471191552, 139.41455073920002, 335.7929687552, 226.2073974704], [70.6528320512, 90.97924807519996, 350.6321411072, 452.47155759200007], [122.30932618240001, 138.1744384928, 212.9221191168, 196.0356445616], [146.2435913216, 260.4193115648, 310.6700439552, 410.0073242192], [157.4727172608, 207.4820556944, 293.8263549952, 318.1690674032]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048355.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each object you identify.", "boxes_value": [[70.87188723599999, 186.223388672, 530.2938232590001, 282.2221679616]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048355_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each object you identify.", "boxes_value": [[70.87188723599999, 24.223388672, 530.2938232590001, 120.22216796160001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048355.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each object you identify. For your reference, objects involved in this region include four pillows, and a couch.", "boxes_value": [[70.87188723599999, 186.223388672, 530.2938232590001, 282.2221679616], [70.87188723599999, 206.9653320192, 111.75946043740001, 282.2221679616], [129.2522582736, 189.7703857664, 194.5562744304, 256.1939086848], [249.6586914036, 170.4347534336, 540.2025146134, 312.3139648512], [256.426940937, 186.223388672, 312.3657226702, 237.5005493248], [473.18969723059996, 187.388732928, 530.2938232590001, 237.88903808]], "boxes_seq": [[0], [0], [1, 2, 4, 5], [3]]}, {"image_path": "objects365_v1_00048355_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each object you identify. For your reference, objects involved in this region include four pillows, and a couch.", "boxes_value": [[70.87188723599999, 24.223388672, 530.2938232590001, 120.22216796160001], [70.87188723599999, 44.96533201919999, 111.75946043740001, 120.22216796160001], [129.2522582736, 27.77038576640001, 194.5562744304, 94.19390868480002], [249.6586914036, 8.434753433600008, 540.2025146134, 144], [256.426940937, 24.223388672, 312.3657226702, 75.5005493248], [473.18969723059996, 25.388732927999996, 530.2938232590001, 75.88903808]], "boxes_seq": [[0], [0], [1, 2, 4, 5], [3]]}, {"image_path": "objects365_v1_00048356.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Give coordinates for the items you reference.", "boxes_value": [[32.4000244224, 106.05883785879999, 225.919372544, 458.58886719910004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048356_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Give coordinates for the items you reference.", "boxes_value": [[32.4000244224, 89.05883785879999, 225.919372544, 441.58886719910004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048356.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Give coordinates for the items you reference. For your reference, objects involved in this region include a person, three sneakers, a helmet, and a motorcycle.", "boxes_value": [[32.4000244224, 106.05883785879999, 225.919372544, 458.58886719910004], [32.4000244224, 106.05883785879999, 225.919372544, 458.58886719910004], [65.8446044672, 409.7716064606, 83.3944701952, 443.5714111193], [182.8436279296, 372.7219238136, 224.4432983552, 399.37170410960005], [208.7351074304, 361.8342285315, 231.8395385856, 376.2271728382], [95.4080810496, 107.66003416790001, 169.3963012608, 184.8420410071], [43.3379516416, 190.0376586842, 212.7820434432, 539.3656005706999]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048356_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Give coordinates for the items you reference. For your reference, objects involved in this region include a person, three sneakers, a helmet, and a motorcycle.", "boxes_value": [[32.4000244224, 89.05883785879999, 225.919372544, 441.58886719910004], [32.4000244224, 89.05883785879999, 225.919372544, 441.58886719910004], [65.8446044672, 392.7716064606, 83.3944701952, 426.5714111193], [182.8436279296, 355.7219238136, 224.4432983552, 382.37170410960005], [208.7351074304, 344.8342285315, 231.8395385856, 359.2271728382], [95.4080810496, 90.66003416790001, 169.3963012608, 167.8420410071], [43.3379516416, 173.0376586842, 212.7820434432, 522.3656005706999]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048358.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Give coordinates for the items you reference.", "boxes_value": [[71.2704442966, 478.625549312, 385.75510143350004, 512.221566208]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048358_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Give coordinates for the items you reference.", "boxes_value": [[71.2704442966, 8.625549311999976, 385.75510143350004, 42]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048358.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Give coordinates for the items you reference. For your reference, objects involved in this region include three people, and two hats.", "boxes_value": [[71.2704442966, 478.625549312, 385.75510143350004, 512.221566208], [176.2840576047, 495.1823730688, 198.4409179365, 511.8851928576], [265.7027587904, 496.010192896, 291.85278319810004, 511.9338989056], [256.2070312866, 478.625549312, 283.0874023707, 511.641723648], [71.2704442966, 499.528803584, 109.5208175872, 511.99240832], [344.5373040988, 486.5749367296, 385.75510143350004, 512.221566208]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048358_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Give coordinates for the items you reference. For your reference, objects involved in this region include three people, and two hats.", "boxes_value": [[71.2704442966, 8.625549311999976, 385.75510143350004, 42], [176.2840576047, 25.18237306880002, 198.4409179365, 41.885192857599975], [265.7027587904, 26.010192895999978, 291.85278319810004, 41.93389890560002], [256.2070312866, 8.625549311999976, 283.0874023707, 41.64172364799998], [71.2704442966, 29.528803584000002, 109.5208175872, 41.99240831999998], [344.5373040988, 16.574936729599983, 385.75510143350004, 42]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048360.jpg", "text": "Help me grasp the context of the region within image . Specify the location of each mentioned object.", "boxes_value": [[205.7932129191, 0.0046007296, 771.8303223107999, 340.4542846464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048360_crop.jpg", "text": "Help me grasp the context of the region within image . Specify the location of each mentioned object.", "boxes_value": [[141.7932129191, 0.0046007296, 707.8303223107999, 340.4542846464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048360.jpg", "text": "Help me grasp the context of the region within image . Specify the location of each mentioned object. For your reference, objects involved in this region include a cabinet, a bottle, a plate, a bowl, and two baksets.", "boxes_value": [[205.7932129191, 0.0046007296, 771.8303223107999, 340.4542846464], [85.8085327507, 49.037902848, 840.1370850018, 512.9739990016], [732.5605468719, 10.5477905408, 771.8303223107999, 56.2855835136], [550.9207763429, 326.1022339072, 602.6549072575, 340.4542846464], [205.7932129191, 0.2746582016, 341.986938478, 54.1712036352], [508.8400879023, 0.0046007296, 639.9577535284, 51.129735936], [428.41784666, 0.9218749952, 516.8027344183, 51.0863036928]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048360_crop.jpg", "text": "Help me grasp the context of the region within image . Specify the location of each mentioned object. For your reference, objects involved in this region include a cabinet, a bottle, a plate, a bowl, and two baksets.", "boxes_value": [[141.7932129191, 0.0046007296, 707.8303223107999, 340.4542846464], [21.808532750699996, 49.037902848, 776.1370850018, 425], [668.5605468719, 10.5477905408, 707.8303223107999, 56.2855835136], [486.9207763429, 326.1022339072, 538.6549072575, 340.4542846464], [141.7932129191, 0.2746582016, 277.986938478, 54.1712036352], [444.8400879023, 0.0046007296, 575.9577535284, 51.129735936], [364.41784666, 0.9218749952, 452.8027344183, 51.0863036928]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048361.jpg", "text": "Can you generate a description for the selected region in the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[185.92980955099998, 86.5481567232, 625.8007812420001, 182.9442138624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048361_crop.jpg", "text": "Can you generate a description for the selected region in the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[110.92980955099998, 24.548156723199995, 550.8007812420001, 120.9442138624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048361.jpg", "text": "Can you generate a description for the selected region in the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two glasses, four desks, two lamps, and three flowers.", "boxes_value": [[185.92980955099998, 86.5481567232, 625.8007812420001, 182.9442138624], [543.4482421546, 86.5481567232, 563.0192871008, 114.227172864], [605.9102783449999, 107.7454223872, 625.8007812420001, 132.6685791232], [602.8281250082, 86.4606933504, 628.5878905996, 252.3673705984], [534.8356933633, 57.6409301504, 566.3203125003, 257.7930297856], [435.1479492388, 25.4229736448, 473.67565918500003, 257.7932739072], [299.0932616979, 2.1468506112, 336.2449951314, 229.0377197056], [543.4482421546, 86.5481567232, 563.0192871008, 114.227172864], [605.9102783449999, 107.7454223872, 625.8007812420001, 132.6685791232], [185.92980955099998, 120.0322875904, 259.90838622670003, 167.0679931392], [356.7198486502, 135.5586547712, 422.9351806827, 174.8311767552], [444.33813476190005, 154.3836669952, 531.4129638523, 182.9442138624]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6], [7, 8], [9, 10, 11]]}, {"image_path": "objects365_v1_00048361_crop.jpg", "text": "Can you generate a description for the selected region in the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two glasses, four desks, two lamps, and three flowers.", "boxes_value": [[110.92980955099998, 24.548156723199995, 550.8007812420001, 120.9442138624], [468.44824215460005, 24.548156723199995, 488.01928710080006, 52.227172863999996], [530.9102783449999, 45.745422387199994, 550.8007812420001, 70.6685791232], [527.8281250082, 24.460693350400007, 553.5878905996, 145], [459.83569336330004, 0, 491.3203125003, 145], [360.1479492388, 0, 398.67565918500003, 145], [224.0932616979, 0, 261.2449951314, 145], [468.44824215460005, 24.548156723199995, 488.01928710080006, 52.227172863999996], [530.9102783449999, 45.745422387199994, 550.8007812420001, 70.6685791232], [110.92980955099998, 58.0322875904, 184.90838622670003, 105.06799313920001], [281.7198486502, 73.5586547712, 347.9351806827, 112.8311767552], [369.33813476190005, 92.3836669952, 456.4129638523, 120.9442138624]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6], [7, 8], [9, 10, 11]]}, {"image_path": "objects365_v1_00048363.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[379.6972656384, 25.7252197376, 714.7279052544001, 451.2217406976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048363_crop.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[84.69726563839998, 25.7252197376, 419.72790525440007, 451.2217406976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048363.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a chair, a bed, a lamp, two pillows, and two towels.", "boxes_value": [[379.6972656384, 25.7252197376, 714.7279052544001, 451.2217406976], [620.6667480576, 312.221191424, 765.5056152576, 479.6451416064], [379.6972656384, 25.7252197376, 714.7279052544001, 451.2217406976], [493.76281735680004, 183.5896606208, 529.9428711168, 240.7480468992], [442.6850585856, 226.154418944, 527.8146972672, 263.550598144], [380.0540771328, 227.0665283072, 464.879638656, 285.4410400256], [547.8200683776, 257.6213989376, 593.778442368, 287.8772582912], [554.7138671616, 262.9832153088, 608.3317870848, 298.6008300544]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6, 7]]}, {"image_path": "objects365_v1_00048363_crop.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a chair, a bed, a lamp, two pillows, and two towels.", "boxes_value": [[84.69726563839998, 25.7252197376, 419.72790525440007, 451.2217406976], [325.6667480576, 312.221191424, 470.50561525759997, 479.6451416064], [84.69726563839998, 25.7252197376, 419.72790525440007, 451.2217406976], [198.76281735680004, 183.5896606208, 234.9428711168, 240.7480468992], [147.68505858560002, 226.154418944, 232.81469726720002, 263.550598144], [85.05407713279999, 227.0665283072, 169.879638656, 285.4410400256], [252.82006837760002, 257.6213989376, 298.778442368, 287.8772582912], [259.7138671616, 262.9832153088, 313.3317870848, 298.6008300544]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6, 7]]}, {"image_path": "objects365_v1_00048364.jpg", "text": "What sort of things can be seen in the region of the photo ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[341.43786619229996, 239.9589843968, 598.5490722947001, 292.739440896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048364_crop.jpg", "text": "What sort of things can be seen in the region of the photo ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[64.43786619229996, 13.958984396799991, 321.54907229470007, 66.73944089600002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048364.jpg", "text": "What sort of things can be seen in the region of the photo ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a pickup truck, two cars, a van, and a suv.", "boxes_value": [[341.43786619229996, 239.9589843968, 598.5490722947001, 292.739440896], [341.43786619229996, 248.2224731648, 404.92651367229996, 290.1291504128], [403.04077150380004, 246.6858520576, 470.720092795, 289.8497314304], [501.03723141430004, 241.014587392, 542.0740966966, 263.7102050816], [543.9213867164, 239.9589843968, 596.4378662256, 267.4048461824], [510.009887721, 248.7997436416, 598.5490722947001, 292.739440896]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048364_crop.jpg", "text": "What sort of things can be seen in the region of the photo ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a pickup truck, two cars, a van, and a suv.", "boxes_value": [[64.43786619229996, 13.958984396799991, 321.54907229470007, 66.73944089600002], [64.43786619229996, 22.222473164799993, 127.92651367229996, 64.12915041280002], [126.04077150380004, 20.685852057599988, 193.72009279500003, 63.84973143040003], [224.03723141430004, 15.01458739200001, 265.0740966966, 37.71020508160001], [266.9213867164, 13.958984396799991, 319.43786622560003, 41.40484618239998], [233.00988772099998, 22.799743641600003, 321.54907229470007, 66.73944089600002]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048365.jpg", "text": "Please provide details for the area within the bounding box in . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0, 96.0353393664, 326.902893056, 511.9773941248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048365_crop.jpg", "text": "Please provide details for the area within the bounding box in . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0, 96.0353393664, 326.902893056, 511.9773941248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048365.jpg", "text": "Please provide details for the area within the bounding box in . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three people, a handbag, two boots, and a calculator.", "boxes_value": [[0, 96.0353393664, 326.902893056, 511.9773941248], [0, 171.3416748032, 152.7456665088, 512.3339843584], [263.222656256, 107.5679321088, 353.4780883968, 369.8101196288], [162.437438976, 96.0353393664, 326.902893056, 389.3654785023999], [109.848428544, 192.42238848, 167.2320938496, 340.036850176], [0, 474.9630222848, 30.9817734144, 511.9773941248], [75.1283625472, 473.5146018304, 128.2421840384, 511.9655991296], [0, 125.6935425024, 24.655883776, 151.3690796032]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5, 6], [7]]}, {"image_path": "objects365_v1_00048365_crop.jpg", "text": "Please provide details for the area within the bounding box in . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three people, a handbag, two boots, and a calculator.", "boxes_value": [[0, 96.0353393664, 326.902893056, 511.9773941248], [0, 171.3416748032, 152.7456665088, 512], [263.222656256, 107.5679321088, 353.4780883968, 369.8101196288], [162.437438976, 96.0353393664, 326.902893056, 389.3654785023999], [109.848428544, 192.42238848, 167.2320938496, 340.036850176], [0, 474.9630222848, 30.9817734144, 511.9773941248], [75.1283625472, 473.5146018304, 128.2421840384, 511.9655991296], [0, 125.6935425024, 24.655883776, 151.3690796032]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5, 6], [7]]}, {"image_path": "objects365_v1_00048367.jpg", "text": "Describe the image content present in the specified rectangular area of . Please point out the objects and their coordinates.", "boxes_value": [[246.30676271480002, 89.9500732416, 508.00366209860005, 147.8337402368]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048367_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Please point out the objects and their coordinates.", "boxes_value": [[66.30676271480002, 14.950073241599995, 328.00366209860005, 72.8337402368]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048367.jpg", "text": "Describe the image content present in the specified rectangular area of . Please point out the objects and their coordinates. For your reference, objects involved in this region include five lamps.", "boxes_value": [[246.30676271480002, 89.9500732416, 508.00366209860005, 147.8337402368], [328.328979507, 89.9500732416, 386.4754638737, 147.8337402368], [448.2999267356, 101.0845337088, 508.00366209860005, 142.2013549568], [254.54406741300002, 134.8791503872, 295.0975952464, 164.167846656], [246.30676271480002, 98.6427001856, 315.5761718665, 137.216247552], [381.56567384209995, 105.0250244096, 401.64318849100005, 131.606445312]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048367_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Please point out the objects and their coordinates. For your reference, objects involved in this region include five lamps.", "boxes_value": [[66.30676271480002, 14.950073241599995, 328.00366209860005, 72.8337402368], [148.32897950699999, 14.950073241599995, 206.4754638737, 72.8337402368], [268.2999267356, 26.084533708799995, 328.00366209860005, 67.2013549568], [74.54406741300002, 59.87915038720001, 115.09759524639998, 87], [66.30676271480002, 23.642700185600006, 135.57617186649998, 62.216247552], [201.56567384209995, 30.025024409599993, 221.64318849100005, 56.606445312000005]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048370.jpg", "text": "Fill me in on the details of the rectangular box within the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[494.327514661, 315.0739135488, 596.4102783009, 411.4239501824]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048370_crop.jpg", "text": "Fill me in on the details of the rectangular box within the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[26.32751466100001, 25.07391354880002, 128.41027830090002, 121.4239501824]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048370.jpg", "text": "Fill me in on the details of the rectangular box within the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a desk, a person, two books, and a cup.", "boxes_value": [[494.327514661, 315.0739135488, 596.4102783009, 411.4239501824], [152.11730959730002, 243.6206054912, 771.4200439652, 512.3310546944], [545.6805419824, 185.191589376, 772.0948486157, 425.1296996864], [494.327514661, 351.2786254848, 596.4102783009, 400.2633667072], [495.4738769849, 381.061828608, 595.5689697596, 411.4239501824], [508.6207275127, 315.0739135488, 528.5345458849, 345.1399536128]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048370_crop.jpg", "text": "Fill me in on the details of the rectangular box within the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a desk, a person, two books, and a cup.", "boxes_value": [[26.32751466100001, 25.07391354880002, 128.41027830090002, 121.4239501824], [0, 0, 153, 145], [77.68054198239997, 0, 153, 135.1296996864], [26.32751466100001, 61.27862548479999, 128.41027830090002, 110.26336670720002], [27.473876984900016, 91.06182860799998, 127.56896975960001, 121.4239501824], [40.6207275127, 25.07391354880002, 60.53454588490001, 55.139953612800014]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048371.jpg", "text": "Can you discuss the entities within the region of image ? Include the coordinates for each mentioned object.", "boxes_value": [[214.84484861509998, 116.1154174976, 350.1219482373, 331.5134887936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048371_crop.jpg", "text": "Can you discuss the entities within the region of image ? Include the coordinates for each mentioned object.", "boxes_value": [[33.84484861509998, 54.115417497600006, 169.1219482373, 269.5134887936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048371.jpg", "text": "Can you discuss the entities within the region of image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include four people, two hats, and a sneakers.", "boxes_value": [[214.84484861509998, 116.1154174976, 350.1219482373, 331.5134887936], [312.4820556481, 144.4376220672, 356.0833739955, 325.550903296], [267.3900756626, 129.5311889408, 319.1899413951, 327.4142455808], [214.84484861509998, 116.1154174976, 298.3209228391, 331.5134887936], [174.97009280679998, 124.3139038208, 246.8937377641, 339.3394165248], [242.0369873269, 118.1707153408, 277.59698488920003, 150.5653075968], [321.0062865973, 145.0447387648, 350.1219482373, 166.1005249024], [328.4613037221, 310.05249024, 353.1950683527, 323.9423827968]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6], [7]]}, {"image_path": "objects365_v1_00048371_crop.jpg", "text": "Can you discuss the entities within the region of image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include four people, two hats, and a sneakers.", "boxes_value": [[33.84484861509998, 54.115417497600006, 169.1219482373, 269.5134887936], [131.48205564810002, 82.43762206720001, 175.08337399549998, 263.550903296], [86.3900756626, 67.5311889408, 138.1899413951, 265.4142455808], [33.84484861509998, 54.115417497600006, 117.32092283909998, 269.5134887936], [0, 62.31390382079999, 65.8937377641, 277.3394165248], [61.036987326900004, 56.1707153408, 96.59698488920003, 88.56530759680001], [140.0062865973, 83.0447387648, 169.1219482373, 104.1005249024], [147.4613037221, 248.05249024, 172.1950683527, 261.9423827968]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6], [7]]}, {"image_path": "objects365_v1_00048373.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Please point out the objects and their coordinates.", "boxes_value": [[6.7810058496, 64.263488768, 174.6705932544, 291.0304565248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048373_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Please point out the objects and their coordinates.", "boxes_value": [[6.7810058496, 57.263488768, 174.6705932544, 284.0304565248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048373.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Please point out the objects and their coordinates. For your reference, objects involved in this region include a spoon, a knife, two plates, and a wine glass.", "boxes_value": [[6.7810058496, 64.263488768, 174.6705932544, 291.0304565248], [0.1383666432, 274.049133312, 40.4237060352, 307.053955072], [31.6592407296, 270.6516113408, 106.71942136319998, 291.0304565248], [0.9097900031999999, 200.2736205824, 72.25854489599999, 266.283325184], [74.6853637632, 64.263488768, 174.6705932544, 271.9998169088], [6.7810058496, 78.8579101696, 95.90002444800001, 104.7134399488]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4]]}, {"image_path": "objects365_v1_00048373_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Please point out the objects and their coordinates. For your reference, objects involved in this region include a spoon, a knife, two plates, and a wine glass.", "boxes_value": [[6.7810058496, 57.263488768, 174.6705932544, 284.0304565248], [0.1383666432, 267.049133312, 40.4237060352, 300.053955072], [31.6592407296, 263.6516113408, 106.71942136319998, 284.0304565248], [0.9097900031999999, 193.2736205824, 72.25854489599999, 259.283325184], [74.6853637632, 57.263488768, 174.6705932544, 264.9998169088], [6.7810058496, 71.8579101696, 95.90002444800001, 97.7134399488]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4]]}, {"image_path": "objects365_v1_00048376.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Provide the coordinates for each element you describe.", "boxes_value": [[232.5905151488, 351.1070556672, 302.557006848, 448.473022464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048376_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Provide the coordinates for each element you describe.", "boxes_value": [[17.590515148799994, 25.107055667199973, 87.55700684800001, 122.473022464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048376.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a chair, three wine glasses, and a cup.", "boxes_value": [[232.5905151488, 351.1070556672, 302.557006848, 448.473022464], [229.5544433664, 341.131958016, 284.23156736, 381.3726196224], [274.1790160896, 351.1070556672, 300.7449340928, 404.386413568], [275.6468506112, 412.266540544, 289.8358764544, 437.7089233408], [287.389465344, 415.6915283456, 302.557006848, 437.2196655104], [232.5905151488, 373.1244506624, 257.054382336, 448.473022464]], "boxes_seq": [[0], [0], [1], [2, 3, 5], [4]]}, {"image_path": "objects365_v1_00048376_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a chair, three wine glasses, and a cup.", "boxes_value": [[17.590515148799994, 25.107055667199973, 87.55700684800001, 122.473022464], [14.554443366399994, 15.131958015999999, 69.23156735999999, 55.37261962240001], [59.1790160896, 25.107055667199973, 85.74493409280001, 78.38641356800002], [60.64685061120002, 86.26654054400001, 74.83587645440002, 111.70892334080003], [72.38946534399997, 89.69152834559998, 87.55700684800001, 111.21966551039998], [17.590515148799994, 47.12445066240002, 42.054382336, 122.473022464]], "boxes_seq": [[0], [0], [1], [2, 3, 5], [4]]}, {"image_path": "objects365_v1_00048379.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Please mention the objects and their locations.", "boxes_value": [[287.9053954971, 241.6962280448, 481.87683106590003, 315.7985229312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048379_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Please mention the objects and their locations.", "boxes_value": [[48.90539549710002, 18.696228044799994, 242.87683106590003, 92.79852293120001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048379.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Please mention the objects and their locations. For your reference, objects involved in this region include a flower, a vase, and three potted plants.", "boxes_value": [[287.9053954971, 241.6962280448, 481.87683106590003, 315.7985229312], [440.5324707144, 241.6962280448, 481.87683106590003, 280.5264282112], [453.10339354250004, 274.939331072, 472.9374999684, 292.538635264], [379.4421386668, 272.082519552, 421.7215576412, 315.7985229312], [340.03619385359997, 254.2266845696, 361.3811035193, 286.0388183552], [287.9053954971, 268.5934448128, 324.0275268523, 311.6937255936]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048379_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Please mention the objects and their locations. For your reference, objects involved in this region include a flower, a vase, and three potted plants.", "boxes_value": [[48.90539549710002, 18.696228044799994, 242.87683106590003, 92.79852293120001], [201.5324707144, 18.696228044799994, 242.87683106590003, 57.52642821120003], [214.10339354250004, 51.939331072000016, 233.9374999684, 69.53863526399999], [140.44213866680002, 49.08251955200001, 182.72155764119998, 92.79852293120001], [101.03619385359997, 31.226684569599996, 122.38110351929998, 63.03881835520002], [48.90539549710002, 45.59344481279999, 85.02752685230001, 88.69372559359999]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048381.jpg", "text": "In the image , could you provide a description for the coordinates ? Please point out the objects and their coordinates.", "boxes_value": [[312.8542480218, 67.1369628672, 715.2335204766, 317.0658569216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048381_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Please point out the objects and their coordinates.", "boxes_value": [[100.8542480218, 63.1369628672, 503.2335204766, 313.0658569216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048381.jpg", "text": "In the image , could you provide a description for the coordinates ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, a helmet, a street lights, and a truck.", "boxes_value": [[312.8542480218, 67.1369628672, 715.2335204766, 317.0658569216], [506.1033935728, 144.9714355712, 559.6439209022, 317.0658569216], [312.8542480218, 140.8592529408, 363.2912597622, 296.5814209024], [520.1741943735, 145.3787231232, 547.3200683213, 162.2593383936], [598.29235843, 67.1369628672, 636.9073486103999, 219.1054687744], [663.8372802834, 191.8093261824, 715.2335204766, 217.1722412032]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048381_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, a helmet, a street lights, and a truck.", "boxes_value": [[100.8542480218, 63.1369628672, 503.2335204766, 313.0658569216], [294.1033935728, 140.9714355712, 347.64392090219997, 313.0658569216], [100.8542480218, 136.8592529408, 151.2912597622, 292.5814209024], [308.1741943735, 141.3787231232, 335.3200683213, 158.2593383936], [386.29235843000004, 63.1369628672, 424.90734861039994, 215.1054687744], [451.8372802834, 187.8093261824, 503.2335204766, 213.1722412032]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048382.jpg", "text": "Please help me understand the content present within the rectangle in . Specify the location of each mentioned object.", "boxes_value": [[607.7998046835, 252.0387573248, 757.6682129082, 504.7599487488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048382_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Specify the location of each mentioned object.", "boxes_value": [[37.79980468350004, 64.0387573248, 187.66821290819996, 316.7599487488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048382.jpg", "text": "Please help me understand the content present within the rectangle in . Specify the location of each mentioned object. For your reference, objects involved in this region include four chairs, and two desks.", "boxes_value": [[607.7998046835, 252.0387573248, 757.6682129082, 504.7599487488], [518.5683593540999, 343.0321655296, 685.1641845972, 511.183288576], [651.6354980387999, 328.3633422848, 756.9366455211, 495.483093248], [607.7998046835, 303.9511108608, 757.6682129082, 504.7599487488], [565.8767089767, 263.7550659072, 664.5936279279, 332.484252928], [669.3140869503001, 252.0387573248, 750.21130371, 317.4450683392], [648.1408691769, 249.2006225408, 756.982666044, 319.44152832]], "boxes_seq": [[0], [0], [1, 2, 4, 5], [3, 6]]}, {"image_path": "objects365_v1_00048382_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Specify the location of each mentioned object. For your reference, objects involved in this region include four chairs, and two desks.", "boxes_value": [[37.79980468350004, 64.0387573248, 187.66821290819996, 316.7599487488], [0, 155.03216552959998, 115.1641845972, 323.183288576], [81.63549803879994, 140.3633422848, 186.9366455211, 307.483093248], [37.79980468350004, 115.95111086079999, 187.66821290819996, 316.7599487488], [0, 75.75506590719999, 94.5936279279, 144.484252928], [99.31408695030007, 64.0387573248, 180.21130371000004, 129.44506833920002], [78.14086917689997, 61.2006225408, 186.98266604399998, 131.44152831999997]], "boxes_seq": [[0], [0], [1, 2, 4, 5], [3, 6]]}, {"image_path": "objects365_v1_00048384.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[187.50933840099998, 184.5551757824, 268.740722635, 354.5916137472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048384_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[20.50933840099998, 42.5551757824, 101.740722635, 212.59161374719997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048384.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two pictures, two people, and a glasses.", "boxes_value": [[187.50933840099998, 184.5551757824, 268.740722635, 354.5916137472], [224.830017104, 180.9323730432, 275.03112791, 269.9487915008], [189.637451157, 184.5551757824, 224.830017104, 203.1864624128], [187.50933840099998, 199.5698242048, 268.740722635, 354.5916137472], [15.118469212, 141.8867187712, 297.723510777, 511.92730711039997], [191.54241945299998, 234.8889770496, 219.340270996, 248.0929565184]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048384_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two pictures, two people, and a glasses.", "boxes_value": [[20.50933840099998, 42.5551757824, 101.740722635, 212.59161374719997], [57.83001710400001, 38.93237304319999, 108.03112791000001, 127.94879150079998], [22.637451156999987, 42.5551757824, 57.83001710400001, 61.18646241280001], [20.50933840099998, 57.5698242048, 101.740722635, 212.59161374719997], [0, 0, 122, 255], [24.54241945299998, 92.88897704959999, 52.34027099599999, 106.0929565184]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048388.jpg", "text": "What objects or scenery can be found in the area in the image ? Include the coordinates for each object you identify.", "boxes_value": [[0.09429934079999999, 379.5910034432, 420.09631349759997, 423.9960327168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048388_crop.jpg", "text": "What objects or scenery can be found in the area in the image ? Include the coordinates for each object you identify.", "boxes_value": [[0.09429934079999999, 11.591003443200009, 420.09631349759997, 55.996032716800016]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048388.jpg", "text": "What objects or scenery can be found in the area in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include five people.", "boxes_value": [[0.09429934079999999, 379.5910034432, 420.09631349759997, 423.9960327168], [0.09429934079999999, 389.9127197184, 14.2108764672, 423.9960327168], [254.642395008, 373.5810546688, 278.3968505856, 425.071899392], [308.9792480256, 380.4396972544, 327.05114749439997, 411.8277587968], [405.83581539840003, 381.1365356544, 420.09631349759997, 409.8967285248], [384.55273436159996, 379.5910034432, 399.75341798399995, 401.6912231424]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048388_crop.jpg", "text": "What objects or scenery can be found in the area in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include five people.", "boxes_value": [[0.09429934079999999, 11.591003443200009, 420.09631349759997, 55.996032716800016], [0.09429934079999999, 21.912719718400012, 14.2108764672, 55.996032716800016], [254.642395008, 5.581054668799993, 278.3968505856, 57.07189939199998], [308.9792480256, 12.439697254400016, 327.05114749439997, 43.827758796800026], [405.83581539840003, 13.136535654400006, 420.09631349759997, 41.89672852479998], [384.55273436159996, 11.591003443200009, 399.75341798399995, 33.69122314240002]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048393.jpg", "text": "Tell me about the region of the image . Please point out the objects and their coordinates.", "boxes_value": [[113.75152587000001, 122.8911132672, 400.4493408266, 512.1328124928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048393_crop.jpg", "text": "Tell me about the region of the image . Please point out the objects and their coordinates.", "boxes_value": [[71.75152587000001, 97.8911132672, 358.4493408266, 487]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048393.jpg", "text": "Tell me about the region of the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, a tie, a slippers, and a wine glass.", "boxes_value": [[113.75152587000001, 122.8911132672, 400.4493408266, 512.1328124928], [113.75152587000001, 122.8911132672, 337.1767578012, 512.1328124928], [126.1619262816, 259.4136963072, 148.3931884742, 290.1954345472], [229.79345699720002, 275.885986304, 253.59948729200002, 348.139404288], [371.3818359457, 463.9660034048, 400.4493408266, 483.7170409984], [293.5523681919, 388.846313472, 332.9821167162, 469.7022705152]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048393_crop.jpg", "text": "Tell me about the region of the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, a tie, a slippers, and a wine glass.", "boxes_value": [[71.75152587000001, 97.8911132672, 358.4493408266, 487], [71.75152587000001, 97.8911132672, 295.1767578012, 487], [84.1619262816, 234.41369630719998, 106.3931884742, 265.1954345472], [187.79345699720002, 250.88598630400003, 211.59948729200002, 323.139404288], [329.3818359457, 438.9660034048, 358.4493408266, 458.7170409984], [251.55236819189997, 363.846313472, 290.9821167162, 444.7022705152]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048394.jpg", "text": "Help me understand what's happening in the selected bounding box within . Provide the coordinates for all objects that you mention.", "boxes_value": [[49.074951168, 107.00000002360001, 415.0848388608, 622.2636718404]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048394_crop.jpg", "text": "Help me understand what's happening in the selected bounding box within . Provide the coordinates for all objects that you mention.", "boxes_value": [[49.074951168, 107.00000002360001, 415.0848388608, 622.2636718404]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048394.jpg", "text": "Help me understand what's happening in the selected bounding box within . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a lamp, a flower, a tie, two high heels, and a leather shoes.", "boxes_value": [[49.074951168, 107.00000002360001, 415.0848388608, 622.2636718404], [371.5, 107.00000002360001, 415.0848388608, 222.3216552364], [82.0898437632, 165.2431640596, 128.750366208, 206.51977537279998], [143.040832512, 207.53643799879998, 166.970825216, 324.037841764], [49.074951168, 600.6726074404, 85.2399291904, 622.2636718404], [174.3027954176, 578.0020752036, 219.104125952, 616.3261718992], [229.794982912, 585.0081787028, 263.1068115456, 605.2331542856]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 6], [5]]}, {"image_path": "objects365_v1_00048394_crop.jpg", "text": "Help me understand what's happening in the selected bounding box within . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a lamp, a flower, a tie, two high heels, and a leather shoes.", "boxes_value": [[49.074951168, 107.00000002360001, 415.0848388608, 622.2636718404], [371.5, 107.00000002360001, 415.0848388608, 222.3216552364], [82.0898437632, 165.2431640596, 128.750366208, 206.51977537279998], [143.040832512, 207.53643799879998, 166.970825216, 324.037841764], [49.074951168, 600.6726074404, 85.2399291904, 622.2636718404], [174.3027954176, 578.0020752036, 219.104125952, 616.3261718992], [229.794982912, 585.0081787028, 263.1068115456, 605.2331542856]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 6], [5]]}, {"image_path": "objects365_v1_00048395.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Provide the coordinates for all objects that you mention.", "boxes_value": [[453.7308349784, 336.5773925888, 704.836425758, 366.995056128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048395_crop.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Provide the coordinates for all objects that you mention.", "boxes_value": [[63.730834978400026, 8.577392588800024, 314.836425758, 38.99505612799999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048395.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a person, and four sneakers.", "boxes_value": [[453.7308349784, 336.5773925888, 704.836425758, 366.995056128], [445.65173338840003, 124.8941040128, 572.8234863112, 368.1160888832], [453.7308349784, 345.8536987136, 490.62036136160003, 364.8377685504], [544.120971678, 336.5773925888, 570.6555175428, 366.995056128], [651.3791504264, 336.919067392, 673.875732458, 358.0792236544], [687.9083251972, 337.8100585984, 704.836425758, 358.9702148608]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048395_crop.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a person, and four sneakers.", "boxes_value": [[63.730834978400026, 8.577392588800024, 314.836425758, 38.99505612799999], [55.65173338840003, 0, 182.82348631119999, 40.11608888320001], [63.730834978400026, 17.85369871360001, 100.62036136160003, 36.83776855040003], [154.12097167800005, 8.577392588800024, 180.6555175428, 38.99505612799999], [261.3791504264, 8.919067391999988, 283.875732458, 30.079223654399982], [297.90832519720004, 9.810058598400019, 314.836425758, 30.970214860800013]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048396.jpg", "text": "Can you give me a visual rundown of the area in ? Please point out the objects and their coordinates.", "boxes_value": [[54.28643796, 129.5324096512, 425.51232912, 235.0349121024]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048396_crop.jpg", "text": "Can you give me a visual rundown of the area in ? Please point out the objects and their coordinates.", "boxes_value": [[54.28643796, 26.5324096512, 425.51232912, 132.0349121024]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048396.jpg", "text": "Can you give me a visual rundown of the area in ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a lamp, a potted plant, a mirror, a flower, and a cup.", "boxes_value": [[54.28643796, 129.5324096512, 425.51232912, 235.0349121024], [54.28643796, 129.5324096512, 98.91455076, 191.209533696], [281.50128174, 208.8255615488, 302.973999, 235.0349121024], [290.67858888, 141.7354736128, 310.0731201, 215.0401611264], [399.49145508, 201.2122192384, 425.51232912, 224.2161865216], [281.46411132, 213.762023936, 310.05487062000003, 235.2050781184]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048396_crop.jpg", "text": "Can you give me a visual rundown of the area in ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a lamp, a potted plant, a mirror, a flower, and a cup.", "boxes_value": [[54.28643796, 26.5324096512, 425.51232912, 132.0349121024], [54.28643796, 26.5324096512, 98.91455076, 88.209533696], [281.50128174, 105.82556154880001, 302.973999, 132.0349121024], [290.67858888, 38.73547361280001, 310.0731201, 112.0401611264], [399.49145508, 98.2122192384, 425.51232912, 121.21618652160001], [281.46411132, 110.76202393599999, 310.05487062000003, 132.2050781184]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048397.jpg", "text": "Could you describe the content of the bbox in the image ? Specify the location of each mentioned object.", "boxes_value": [[314.6503295773, 222.5402221568, 458.5898437525, 371.2058715648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048397_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Specify the location of each mentioned object.", "boxes_value": [[36.6503295773, 37.54022215680001, 180.58984375249997, 186.20587156480002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048397.jpg", "text": "Could you describe the content of the bbox in the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include two people, a handbag, a hat, and a chair.", "boxes_value": [[314.6503295773, 222.5402221568, 458.5898437525, 371.2058715648], [409.660034176, 222.5402221568, 457.05017092229997, 360.3627319296], [296.80584716109996, 233.4716186624, 359.8521728448, 413.4669189632], [314.6503295773, 317.6114502144, 344.62292481689997, 358.4831542784], [329.1879272736, 234.0235595776, 350.34777829420005, 252.428833024], [424.4279785399, 296.310729984, 458.5898437525, 371.2058715648]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048397_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include two people, a handbag, a hat, and a chair.", "boxes_value": [[36.6503295773, 37.54022215680001, 180.58984375249997, 186.20587156480002], [131.660034176, 37.54022215680001, 179.05017092229997, 175.3627319296], [18.805847161099962, 48.471618662400004, 81.85217284480001, 223], [36.6503295773, 132.6114502144, 66.62292481689997, 173.48315427839998], [51.187927273599996, 49.0235595776, 72.34777829420005, 67.428833024], [146.42797853989998, 111.31072998399998, 180.58984375249997, 186.20587156480002]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048398.jpg", "text": "Please tell me more about the rectangular section in the photo . Please point out the objects and their coordinates.", "boxes_value": [[106.93652341580001, 325.5669555712, 225.2342529085, 380.05847168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048398_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Please point out the objects and their coordinates.", "boxes_value": [[29.93652341580001, 14.566955571200026, 148.2342529085, 69.05847168000003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048398.jpg", "text": "Please tell me more about the rectangular section in the photo . Please point out the objects and their coordinates. For your reference, objects involved in this region include a desk, a person, and three cups.", "boxes_value": [[106.93652341580001, 325.5669555712, 225.2342529085, 380.05847168], [73.1677856157, 293.8940429824, 396.0025635031, 511.1630859264], [159.0249633483, 162.4196777472, 349.868530251, 511.9821167104], [194.9611816322, 333.018798848, 225.2342529085, 380.05847168], [124.16882325139998, 348.8539428864, 162.8251953212, 401.4824828928], [106.93652341580001, 325.5669555712, 140.9354858201, 363.2918701056]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048398_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Please point out the objects and their coordinates. For your reference, objects involved in this region include a desk, a person, and three cups.", "boxes_value": [[29.93652341580001, 14.566955571200026, 148.2342529085, 69.05847168000003], [0, 0, 177, 82], [82.02496334829999, 0, 177, 82], [117.96118163220001, 22.018798848000017, 148.2342529085, 69.05847168000003], [47.168823251399985, 37.85394288639998, 85.8251953212, 82], [29.93652341580001, 14.566955571200026, 63.93548582010001, 52.291870105600026]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048399.jpg", "text": "I am interested in the region of the image ; please describe it. Remember to mention the objects and their corresponding locations.", "boxes_value": [[264.7326049641, 193.6629028352, 433.4948730693, 500.7513427968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048399_crop.jpg", "text": "I am interested in the region of the image ; please describe it. Remember to mention the objects and their corresponding locations.", "boxes_value": [[42.732604964100005, 77.66290283519999, 211.4948730693, 384.7513427968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048399.jpg", "text": "I am interested in the region of the image ; please describe it. Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, a sneakers, a slippers, a backpack, and a laptop.", "boxes_value": [[264.7326049641, 193.6629028352, 433.4948730693, 500.7513427968], [266.7797241187, 256.3405151232, 374.8687744233, 499.8031005696], [267.4989013993, 474.571899392, 296.0277709657, 500.7513427968], [383.1756592001, 343.7292480512, 403.7899169803, 358.7481079296], [264.7326049641, 193.6629028352, 310.1965332262, 228.4124145664], [400.908081055, 196.257934592, 433.4948730693, 225.9998779392]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048399_crop.jpg", "text": "I am interested in the region of the image ; please describe it. Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, a sneakers, a slippers, a backpack, and a laptop.", "boxes_value": [[42.732604964100005, 77.66290283519999, 211.4948730693, 384.7513427968], [44.77972411870002, 140.34051512320002, 152.8687744233, 383.8031005696], [45.498901399299996, 358.571899392, 74.02777096569997, 384.7513427968], [161.17565920009997, 227.7292480512, 181.7899169803, 242.7481079296], [42.732604964100005, 77.66290283519999, 88.19653322620002, 112.4124145664], [178.90808105500003, 80.257934592, 211.4948730693, 109.99987793919999]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048404.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each mentioned object.", "boxes_value": [[235.2382202339, 209.0976562688, 668.2030029334, 402.481201152]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048404_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each mentioned object.", "boxes_value": [[109.2382202339, 49.097656268799994, 542.2030029334, 242.48120115199998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048404.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a chair, two lamps, a person, and a moniter.", "boxes_value": [[235.2382202339, 209.0976562688, 668.2030029334, 402.481201152], [585.7103271354, 322.0338134528, 668.2030029334, 402.481201152], [476.3660888681, 212.4096679936, 503.3349609586, 243.6367187456], [515.6364746416, 209.0976562688, 534.0888672182, 252.6263427584], [235.2382202339, 272.101623552, 302.8317871186, 297.0875854336], [348.64477537109997, 262.5633544704, 419.90551760339997, 296.998107904]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048404_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a chair, two lamps, a person, and a moniter.", "boxes_value": [[109.2382202339, 49.097656268799994, 542.2030029334, 242.48120115199998], [459.7103271354, 162.03381345280002, 542.2030029334, 242.48120115199998], [350.3660888681, 52.40966799360001, 377.3349609586, 83.6367187456], [389.6364746416, 49.097656268799994, 408.0888672182, 92.6263427584], [109.2382202339, 112.10162355199998, 176.8317871186, 137.0875854336], [222.64477537109997, 102.56335447039999, 293.90551760339997, 136.998107904]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048406.jpg", "text": "Please provide information about the area within the bounding box in the picture . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[219.66076658240002, 416.0195922944, 394.0295409908, 465.9705810432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048406_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[43.660766582400015, 13.019592294400013, 218.0295409908, 62.97058104320001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048406.jpg", "text": "Please provide information about the area within the bounding box in the picture . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, and three sneakers.", "boxes_value": [[219.66076658240002, 416.0195922944, 394.0295409908, 465.9705810432], [171.93133546159999, 60.0229492224, 440.4971923924, 466.66900633600005], [0, 164.0762329088, 363.71478268839996, 462.3293456896], [219.66076658240002, 416.0195922944, 279.11163331480003, 456.1643066368], [287.38568115400005, 427.051757824, 362.771850584, 462.9061279232], [354.19134522200005, 439.0031738368, 394.0295409908, 465.9705810432]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048406_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, and three sneakers.", "boxes_value": [[43.660766582400015, 13.019592294400013, 218.0295409908, 62.97058104320001], [0, 0, 261, 63.66900633600005], [0, 0, 187.71478268839996, 59.32934568960002], [43.660766582400015, 13.019592294400013, 103.11163331480003, 53.164306636800006], [111.38568115400005, 24.051757823999992, 186.771850584, 59.90612792320002], [178.19134522200005, 36.00317383679999, 218.0295409908, 62.97058104320001]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048408.jpg", "text": "Share some details about the objects or environment within the bounding box in . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[408.6385498368, 318.7886352384, 528.9971923968, 420.4256591872]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048408_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[30.638549836799996, 25.78863523839999, 150.99719239679996, 127.42565918719998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048408.jpg", "text": "Share some details about the objects or environment within the bounding box in . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a moniter, three cabinets, and a potted plant.", "boxes_value": [[408.6385498368, 318.7886352384, 528.9971923968, 420.4256591872], [510.5247802368, 356.2013549568, 528.9971923968, 406.499694848], [453.7593993984, 338.5636596736, 549.1575927552001, 429.77209472], [433.454956032, 364.3469848576, 456.33764651519994, 420.4256591872], [408.6385498368, 352.7444457984, 456.33764651519994, 387.2296753152], [464.37622072320005, 318.7886352384, 482.15795896320003, 347.3064574976]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048408_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a moniter, three cabinets, and a potted plant.", "boxes_value": [[30.638549836799996, 25.78863523839999, 150.99719239679996, 127.42565918719998], [132.52478023679998, 63.2013549568, 150.99719239679996, 113.49969484799999], [75.75939939839998, 45.5636596736, 171.15759275520008, 136.77209471999998], [55.454956031999984, 71.34698485759998, 78.33764651519994, 127.42565918719998], [30.638549836799996, 59.74444579840002, 78.33764651519994, 94.22967531519998], [86.37622072320005, 25.78863523839999, 104.15795896320003, 54.30645749759998]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048409.jpg", "text": "I need details about the area located within image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[246.6934203844, 182.4978637824, 364.85375978080003, 426.6968384]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048409_crop.jpg", "text": "I need details about the area located within image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[29.693420384400014, 61.4978637824, 147.85375978080003, 305.6968384]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048409.jpg", "text": "I need details about the area located within image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a gun, a person, a barrel, a helmet, and two sneakers.", "boxes_value": [[246.6934203844, 182.4978637824, 364.85375978080003, 426.6968384], [277.8956298456, 222.755615232, 364.85375978080003, 292.6781616128], [207.0222777992, 182.5702514688, 302.1171875036, 393.9750366208], [293.8376464652, 369.751525888, 342.9669189672, 426.6968384], [246.6934203844, 182.4978637824, 275.7863769648, 203.7748412928], [249.80828857080002, 343.6479492096, 272.45886229, 358.0039062528], [270.86364748, 372.6788940288, 301.8087158568, 394.6913452032]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048409_crop.jpg", "text": "I need details about the area located within image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a gun, a person, a barrel, a helmet, and two sneakers.", "boxes_value": [[29.693420384400014, 61.4978637824, 147.85375978080003, 305.6968384], [60.89562984560001, 101.755615232, 147.85375978080003, 171.6781616128], [0, 61.570251468799995, 85.11718750360001, 272.9750366208], [76.83764646520001, 248.751525888, 125.9669189672, 305.6968384], [29.693420384400014, 61.4978637824, 58.78637696480001, 82.7748412928], [32.808288570800016, 222.64794920959997, 55.45886229000001, 237.0039062528], [53.86364748, 251.67889402880002, 84.8087158568, 273.6913452032]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048410.jpg", "text": "What's going on in the section of contained within the bounding box ? Specify the location of each mentioned object.", "boxes_value": [[1.099060052, 0, 651.0614013312, 93.0840454144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048410_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Specify the location of each mentioned object.", "boxes_value": [[1.099060052, 0, 651.0614013312, 93.0840454144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048410.jpg", "text": "What's going on in the section of contained within the bounding box ? Specify the location of each mentioned object. For your reference, objects involved in this region include five lamps.", "boxes_value": [[1.099060052, 0, 651.0614013312, 93.0840454144], [428.44787596400005, 0, 457.2802734504, 23.9175414784], [382.181762684, 1.1198119936, 411.6846923808, 22.5765380608], [142.1345825168, 0.4492797952, 171.6375732072, 25.2586059776], [616.864746084, 0.4492797952, 651.0614013312, 83.5941161984], [1.099060052, 52.1958618112, 71.0523681576, 93.0840454144]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048410_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Specify the location of each mentioned object. For your reference, objects involved in this region include five lamps.", "boxes_value": [[1.099060052, 0, 651.0614013312, 93.0840454144], [428.44787596400005, 0, 457.2802734504, 23.9175414784], [382.181762684, 1.1198119936, 411.6846923808, 22.5765380608], [142.1345825168, 0.4492797952, 171.6375732072, 25.2586059776], [616.864746084, 0.4492797952, 651.0614013312, 83.5941161984], [1.099060052, 52.1958618112, 71.0523681576, 93.0840454144]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048411.jpg", "text": "What objects or scenery can be found in the area in the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[393.3916015872, 165.8776855552, 768.0716552448, 239.3258667008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048411_crop.jpg", "text": "What objects or scenery can be found in the area in the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[94.3916015872, 18.877685555200003, 469, 92.32586670079999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048411.jpg", "text": "What objects or scenery can be found in the area in the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two glasses, a tie, a tea pot, a chair, and a desk.", "boxes_value": [[393.3916015872, 165.8776855552, 768.0716552448, 239.3258667008], [691.7314453248, 200.9776001024, 730.6541747711999, 211.0557861376], [703.894775424, 165.8776855552, 743.1650390784, 181.8638305792], [393.3916015872, 180.4161376768, 411.1671142656, 215.0148925952], [756.6138915839999, 179.281433088, 768.0716552448, 216.6625366016], [572.9490966528, 205.8071288832, 615.1579590143999, 239.3258667008], [749.1441650688, 207.0707397632, 767.4997558272, 228.6185302528]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048411_crop.jpg", "text": "What objects or scenery can be found in the area in the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two glasses, a tie, a tea pot, a chair, and a desk.", "boxes_value": [[94.3916015872, 18.877685555200003, 469, 92.32586670079999], [392.7314453248, 53.977600102400004, 431.6541747711999, 64.0557861376], [404.89477542400004, 18.877685555200003, 444.1650390784, 34.8638305792], [94.3916015872, 33.41613767679999, 112.16711426559999, 68.0148925952], [457.61389158399993, 32.281433088, 469, 69.66253660160001], [273.9490966528, 58.807128883199994, 316.1579590143999, 92.32586670079999], [450.1441650688, 60.07073976320001, 468.49975582720003, 81.61853025280001]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048412.jpg", "text": "Please, can you help me understand what's inside the region in image ? Specify the location of each mentioned object.", "boxes_value": [[305.7150878574, 150.9275812352, 477.48436049339995, 229.9835815424]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048412_crop.jpg", "text": "Please, can you help me understand what's inside the region in image ? Specify the location of each mentioned object.", "boxes_value": [[43.71508785740002, 19.927581235199995, 215.48436049339995, 98.9835815424]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048412.jpg", "text": "Please, can you help me understand what's inside the region in image ? Specify the location of each mentioned object. For your reference, objects involved in this region include three people, a helmet, and a hat.", "boxes_value": [[305.7150878574, 150.9275812352, 477.48436049339995, 229.9835815424], [227.47698977640002, 166.7779540992, 426.8415527038, 294.1498413056], [305.7150878574, 155.8757324288, 377.20727536180004, 229.9835815424], [438.6732177737, 151.0805053952, 478.7786864926, 194.2374267392], [335.8444018761, 157.1691390976, 375.27356911199996, 186.1375069184], [437.33578438029997, 150.9275812352, 477.48436049339995, 188.0416718848]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048412_crop.jpg", "text": "Please, can you help me understand what's inside the region in image ? Specify the location of each mentioned object. For your reference, objects involved in this region include three people, a helmet, and a hat.", "boxes_value": [[43.71508785740002, 19.927581235199995, 215.48436049339995, 98.9835815424], [0, 35.7779540992, 164.8415527038, 118], [43.71508785740002, 24.875732428800006, 115.20727536180004, 98.9835815424], [176.6732177737, 20.080505395199992, 216.7786864926, 63.237426739200004], [73.8444018761, 26.16913909760001, 113.27356911199996, 55.13750691839999], [175.33578438029997, 19.927581235199995, 215.48436049339995, 57.041671884799996]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048413.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Include the coordinates for each mentioned object.", "boxes_value": [[284.708496072, 250.9101562368, 420.032836916, 447.2101440512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048413_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Include the coordinates for each mentioned object.", "boxes_value": [[34.708496072, 49.91015623679999, 170.032836916, 246.21014405120002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048413.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, two sneakers, and a helmet.", "boxes_value": [[284.708496072, 250.9101562368, 420.032836916, 447.2101440512], [284.708496072, 259.3270263808, 371.181884766, 447.2101440512], [383.843627937, 250.9101562368, 420.032836916, 312.013549824], [297.678100566, 431.9030761472, 324.604980469, 444.8132324352], [357.064697297, 377.3115844608, 371.450317392, 403.5007934464], [309.45532225299996, 259.6448364032, 336.961792013, 282.7249755648]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048413_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, two sneakers, and a helmet.", "boxes_value": [[34.708496072, 49.91015623679999, 170.032836916, 246.21014405120002], [34.708496072, 58.32702638080002, 121.181884766, 246.21014405120002], [133.843627937, 49.91015623679999, 170.032836916, 111.013549824], [47.67810056600001, 230.9030761472, 74.604980469, 243.8132324352], [107.06469729700001, 176.31158446080002, 121.45031739199999, 202.5007934464], [59.45532225299996, 58.644836403199974, 86.96179201299998, 81.72497556479999]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048414.jpg", "text": "What does the selected region in the image encompass? Provide the coordinates for each element you describe.", "boxes_value": [[422.72216794039997, 33.1432495104, 681.8691406446, 190.1887206912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048414_crop.jpg", "text": "What does the selected region in the image encompass? Provide the coordinates for each element you describe.", "boxes_value": [[65.72216794039997, 33.1432495104, 324.86914064459995, 190.1887206912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048414.jpg", "text": "What does the selected region in the image encompass? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a microwave, three cabinets, and a recorder.", "boxes_value": [[422.72216794039997, 33.1432495104, 681.8691406446, 190.1887206912], [414.3941650085, 106.5721435648, 548.4176025583, 184.8779907072], [422.72216794039997, 36.2231445504, 549.133544946, 109.92041016320002], [547.634765648, 33.1432495104, 608.6104736304, 190.1887206912], [592.3795166295, 58.5863647232, 681.8691406446, 127.01959229440001], [591.9182128972, 123.5543213056, 683.0439453127999, 170.0924682752]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048414_crop.jpg", "text": "What does the selected region in the image encompass? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a microwave, three cabinets, and a recorder.", "boxes_value": [[65.72216794039997, 33.1432495104, 324.86914064459995, 190.1887206912], [57.39416500850001, 106.5721435648, 191.41760255830002, 184.8779907072], [65.72216794039997, 36.2231445504, 192.13354494600003, 109.92041016320002], [190.63476564799998, 33.1432495104, 251.61047363039995, 190.1887206912], [235.37951662950002, 58.5863647232, 324.86914064459995, 127.01959229440001], [234.91821289719996, 123.5543213056, 326, 170.0924682752]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048416.jpg", "text": "Please describe the region in the picture . Include the coordinates for each object you identify.", "boxes_value": [[428.87756344469994, 216.6118163968, 682.6513671578, 438.7678833152]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048416_crop.jpg", "text": "Please describe the region in the picture . Include the coordinates for each object you identify.", "boxes_value": [[63.877563444699945, 55.61181639680001, 317.6513671578, 277.7678833152]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048416.jpg", "text": "Please describe the region in the picture . Include the coordinates for each object you identify. For your reference, objects involved in this region include two desks, a carpet, and two barrels.", "boxes_value": [[428.87756344469994, 216.6118163968, 682.6513671578, 438.7678833152], [499.5771484375, 264.3712158208, 668.2218017571, 437.52508544], [622.2279052456, 342.8315429888, 682.6513671578, 423.9974365184], [454.4339599806, 216.6118163968, 532.9029541014, 282.1564941312], [556.3497314504, 376.7153930752, 607.3386230631, 438.7678833152], [428.87756344469994, 290.6115112448, 494.77819824130006, 400.2857055744]], "boxes_seq": [[0], [0], [1, 3], [2], [4, 5]]}, {"image_path": "objects365_v1_00048416_crop.jpg", "text": "Please describe the region in the picture . Include the coordinates for each object you identify. For your reference, objects involved in this region include two desks, a carpet, and two barrels.", "boxes_value": [[63.877563444699945, 55.61181639680001, 317.6513671578, 277.7678833152], [134.5771484375, 103.37121582079999, 303.2218017571, 276.52508544], [257.2279052456, 181.83154298879998, 317.6513671578, 262.9974365184], [89.43395998059998, 55.61181639680001, 167.90295410140004, 121.15649413120002], [191.3497314504, 215.71539307519998, 242.33862306310004, 277.7678833152], [63.877563444699945, 129.6115112448, 129.77819824130006, 239.28570557440003]], "boxes_seq": [[0], [0], [1, 3], [2], [4, 5]]}, {"image_path": "objects365_v1_00048417.jpg", "text": "What information can you give me about the coordinates in image ? Specify the location of each mentioned object.", "boxes_value": [[316.5545043968, 442.06372070400005, 489.402587904, 601.2420654336]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048417_crop.jpg", "text": "What information can you give me about the coordinates in image ? Specify the location of each mentioned object.", "boxes_value": [[43.554504396799985, 40.06372070400005, 216.40258790399997, 199.24206543360003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048417.jpg", "text": "What information can you give me about the coordinates in image ? Specify the location of each mentioned object. For your reference, objects involved in this region include four people, and a suv.", "boxes_value": [[316.5545043968, 442.06372070400005, 489.402587904, 601.2420654336], [375.1708374016, 470.129638656, 410.3143310336, 601.2420654336], [451.919372544, 458.93481446399994, 489.402587904, 587.5268554751999], [348.4649048064, 442.06372070400005, 382.0256957952, 581.2913818368], [316.5545043968, 474.1179199488, 339.2855835136, 535.5905761536001], [308.5324096512, 459.6557617152, 348.1497192448, 486.4241943552]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048417_crop.jpg", "text": "What information can you give me about the coordinates in image ? Specify the location of each mentioned object. For your reference, objects involved in this region include four people, and a suv.", "boxes_value": [[43.554504396799985, 40.06372070400005, 216.40258790399997, 199.24206543360003], [102.17083740160001, 68.129638656, 137.3143310336, 199.24206543360003], [178.919372544, 56.93481446399994, 216.40258790399997, 185.5268554751999], [75.4649048064, 40.06372070400005, 109.02569579520002, 179.29138183680004], [43.554504396799985, 72.11791994880002, 66.28558351359999, 133.59057615360007], [35.5324096512, 57.655761715200015, 75.1497192448, 84.42419435519997]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048418.jpg", "text": "Within the input image , what can be found in the region defined by ? Please point out the objects and their coordinates.", "boxes_value": [[665.4818115452, 220.0960082944, 764.2911376980001, 508.8509521408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048418_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Please point out the objects and their coordinates.", "boxes_value": [[25.481811545200003, 73.09600829440001, 124.29113769800006, 361.8509521408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048418.jpg", "text": "Within the input image , what can be found in the region defined by ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two handbags, a desk, and two chairs.", "boxes_value": [[665.4818115452, 220.0960082944, 764.2911376980001, 508.8509521408], [688.8392333697, 424.9824828928, 730.2426757634, 508.8509521408], [698.9246825962, 385.7023315456, 762.0914306543, 445.6842040832], [522.148315421, 229.6857910272, 769.0860595590001, 311.1992187392], [720.3377685888, 220.0960082944, 764.2911376980001, 232.083251968], [665.4818115452, 220.652526848, 715.4858398350001, 230.9273681408]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048418_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two handbags, a desk, and two chairs.", "boxes_value": [[25.481811545200003, 73.09600829440001, 124.29113769800006, 361.8509521408], [48.83923336969997, 277.9824828928, 90.24267576340003, 361.8509521408], [58.9246825962, 238.70233154559998, 122.09143065429998, 298.6842040832], [0, 82.6857910272, 129, 164.19921873919998], [80.33776858880003, 73.09600829440001, 124.29113769800006, 85.08325196800001], [25.481811545200003, 73.65252684800001, 75.48583983500009, 83.92736814080001]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048420.jpg", "text": "Please elucidate the area of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[563.3553467136, 363.0531005952, 768.3270263808001, 486.9273071104]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048420_crop.jpg", "text": "Please elucidate the area of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[51.355346713600056, 31.05310059520002, 256, 154.92730711040002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048420.jpg", "text": "Please elucidate the area of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a desk, a person, a backpack, and two benches.", "boxes_value": [[563.3553467136, 363.0531005952, 768.3270263808001, 486.9273071104], [358.218994176, 289.2774658048, 685.3814697216001, 499.2991332864], [460.8695068416, 84.6755981312, 757.6328124672, 512.4425048576], [564.8063964672, 262.2280883712, 681.9975585792, 504.9810180608], [741.591552768, 363.0531005952, 768.3270263808001, 418.3063354368], [563.3553467136, 430.7828979712, 766.5445556736, 486.9273071104]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048420_crop.jpg", "text": "Please elucidate the area of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a desk, a person, a backpack, and two benches.", "boxes_value": [[51.355346713600056, 31.05310059520002, 256, 154.92730711040002], [0, 0, 173.38146972160007, 167.29913328639998], [0, 0, 245.63281246719998, 180], [52.80639646719999, 0, 169.99755857920002, 172.9810180608], [229.59155276800004, 31.05310059520002, 256, 86.3063354368], [51.355346713600056, 98.78289797119999, 254.54455567360003, 154.92730711040002]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048422.jpg", "text": "What does the selected region in the image encompass? Please point out the objects and their coordinates.", "boxes_value": [[220.5816650423, 124.800537088, 683.8250732205, 344.6955566592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048422_crop.jpg", "text": "What does the selected region in the image encompass? Please point out the objects and their coordinates.", "boxes_value": [[116.58166504229999, 55.800537088, 579, 275.6955566592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048422.jpg", "text": "What does the selected region in the image encompass? Please point out the objects and their coordinates. For your reference, objects involved in this region include a bowl, two plates, a fork, and two sausages.", "boxes_value": [[220.5816650423, 124.800537088, 683.8250732205, 344.6955566592], [600.7808838015, 87.3048706048, 680.5562744247001, 166.1632690176], [548.3791504096, 97.0900878848, 682.7790527101, 214.984680192], [650.8930664375, 297.4453735424, 683.8250732205, 344.6955566592], [189.2259521152, 199.1222534144, 605.272949189, 415.4298095616], [220.5816650423, 142.6211547648, 520.1051025426, 342.7603759616], [290.49328611699997, 124.800537088, 580.4210205042, 315.3440551936]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5, 6]]}, {"image_path": "objects365_v1_00048422_crop.jpg", "text": "What does the selected region in the image encompass? Please point out the objects and their coordinates. For your reference, objects involved in this region include a bowl, two plates, a fork, and two sausages.", "boxes_value": [[116.58166504229999, 55.800537088, 579, 275.6955566592], [496.78088380149995, 18.3048706048, 576.5562744247001, 97.16326901759999], [444.3791504096, 28.0900878848, 578.7790527101, 145.984680192], [546.8930664375, 228.44537354239998, 579, 275.6955566592], [85.22595211519999, 130.1222534144, 501.272949189, 330], [116.58166504229999, 73.6211547648, 416.1051025426, 273.7603759616], [186.49328611699997, 55.800537088, 476.4210205042, 246.3440551936]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5, 6]]}, {"image_path": "objects365_v1_00048424.jpg", "text": "Kindly describe what I should be seeing in the area of image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[47.6205444058, 229.4948120064, 183.0713501203, 512.0366211072]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048424_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[34.6205444058, 71.4948120064, 170.0713501203, 354]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048424.jpg", "text": "Kindly describe what I should be seeing in the area of image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a flower, a vase, and four people.", "boxes_value": [[47.6205444058, 229.4948120064, 183.0713501203, 512.0366211072], [53.282104478, 391.5583496192, 93.1656494035, 418.9531250176], [57.2866820988, 415.8333740032, 87.0781249752, 441.928100608], [112.61279295889999, 239.2584838656, 124.70611574300001, 273.310729984], [47.6205444058, 229.4948120064, 59.444763209200005, 256.3948974592], [123.2875976874, 391.7095947264, 165.2198486497, 459.5223388672], [126.83154296490001, 427.0229492224, 183.0713501203, 512.0366211072]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048424_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a flower, a vase, and four people.", "boxes_value": [[34.6205444058, 71.4948120064, 170.0713501203, 354], [40.282104478, 233.5583496192, 80.1656494035, 260.9531250176], [44.2866820988, 257.8333740032, 74.0781249752, 283.928100608], [99.61279295889999, 81.2584838656, 111.70611574300001, 115.31072998399998], [34.6205444058, 71.4948120064, 46.444763209200005, 98.39489745920002], [110.2875976874, 233.70959472639998, 152.2198486497, 301.5223388672], [113.83154296490001, 269.0229492224, 170.0713501203, 354]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048426.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[231.13055419300002, 187.2843628032, 445.260009733, 414.878967296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048426_crop.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[54.13055419300002, 57.2843628032, 268.260009733, 284.878967296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048426.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a lamp, two desks, a stool, and a cabinet.", "boxes_value": [[231.13055419300002, 187.2843628032, 445.260009733, 414.878967296], [432.35253908, 263.1624145408, 454.94079592500003, 307.6934814208], [402.019775388, 303.175842304, 445.260009733, 322.5372314624], [327.146972691, 313.1726074368, 379.067016636, 375.049865728], [324.302001947, 294.6805420032, 364.13110349, 363.670166016], [231.13055419300002, 187.2843628032, 329.280639611, 414.878967296]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5]]}, {"image_path": "objects365_v1_00048426_crop.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a lamp, two desks, a stool, and a cabinet.", "boxes_value": [[54.13055419300002, 57.2843628032, 268.260009733, 284.878967296], [255.35253907999999, 133.1624145408, 277.94079592500003, 177.69348142080003], [225.01977538800003, 173.175842304, 268.260009733, 192.53723146239997], [150.14697269099997, 183.17260743679998, 202.067016636, 245.049865728], [147.302001947, 164.6805420032, 187.13110349, 233.670166016], [54.13055419300002, 57.2843628032, 152.28063961100003, 284.878967296]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5]]}, {"image_path": "objects365_v1_00048427.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each mentioned object.", "boxes_value": [[296.3350219996, 389.6711425536, 423.1210937692, 512.1668701184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048427_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each mentioned object.", "boxes_value": [[32.335021999599974, 30.67114255360002, 159.1210937692, 153]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048427.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, three cups, and a plate.", "boxes_value": [[296.3350219996, 389.6711425536, 423.1210937692, 512.1668701184], [345.9439697579, 50.5619506688, 719.6480712971, 512.1441650176], [352.432373081, 435.1755981312, 423.1210937692, 512.1668701184], [335.4283447616, 416.5021362176, 391.2327881131, 481.0758666752], [296.3350219996, 389.6711425536, 346.1887207372, 440.9201659904], [253.0826415806, 428.6204223488, 337.3055419571, 457.3198242304]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048427_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, three cups, and a plate.", "boxes_value": [[32.335021999599974, 30.67114255360002, 159.1210937692, 153], [81.94396975789999, 0, 190, 153], [88.43237308099998, 76.17559813119999, 159.1210937692, 153], [71.42834476159999, 57.502136217600025, 127.23278811310001, 122.07586667520002], [32.335021999599974, 30.67114255360002, 82.18872073720001, 81.9201659904], [0, 69.62042234879999, 73.30554195709999, 98.3198242304]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048428.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give coordinates for the items you reference.", "boxes_value": [[83.46130369240001, 47.0349731328, 327.99377444839996, 257.4522704896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048428_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give coordinates for the items you reference.", "boxes_value": [[61.46130369240001, 47.0349731328, 305.99377444839996, 257.4522704896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048428.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give coordinates for the items you reference. For your reference, objects involved in this region include a radiator, three people, and a glasses.", "boxes_value": [[83.46130369240001, 47.0349731328, 327.99377444839996, 257.4522704896], [83.46130369240001, 139.2003173888, 163.2672119488, 214.5725707776], [15.298889136, 79.8143921152, 367.8572997972, 510.2538451968], [234.4750366248, 47.0349731328, 327.99377444839996, 257.4522704896], [263.391967758, 59.34002688, 382.1363525316, 293.1370849792], [257.9901075152, 143.4684534784, 286.030934942, 160.1945610752]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048428_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give coordinates for the items you reference. For your reference, objects involved in this region include a radiator, three people, and a glasses.", "boxes_value": [[61.46130369240001, 47.0349731328, 305.99377444839996, 257.4522704896], [61.46130369240001, 139.2003173888, 141.2672119488, 214.5725707776], [0, 79.8143921152, 345.8572997972, 310], [212.4750366248, 47.0349731328, 305.99377444839996, 257.4522704896], [241.39196775800002, 59.34002688, 360.1363525316, 293.1370849792], [235.9901075152, 143.4684534784, 264.030934942, 160.1945610752]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048430.jpg", "text": "Tell me what you see in the area within the context of the image . Please point out the objects and their coordinates.", "boxes_value": [[247.96533204839997, 221.0172729344, 486.7331542866, 308.5037231616]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048430_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Please point out the objects and their coordinates.", "boxes_value": [[59.96533204839997, 22.01727293440001, 298.7331542866, 109.50372316160002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048430.jpg", "text": "Tell me what you see in the area within the context of the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include five hurdles.", "boxes_value": [[247.96533204839997, 221.0172729344, 486.7331542866, 308.5037231616], [431.0233154489, 240.0725097472, 486.7331542866, 303.0776977408], [247.96533204839997, 240.6180420096, 303.2351074293, 305.8002929664], [306.8396606242, 239.4165649408, 371.12072757280004, 303.3972778496], [371.42114259569996, 262.5457153536, 451.9227294856, 308.5037231616], [300.20471190079996, 221.0172729344, 353.2828369262, 259.2829590016]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048430_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include five hurdles.", "boxes_value": [[59.96533204839997, 22.01727293440001, 298.7331542866, 109.50372316160002], [243.0233154489, 41.0725097472, 298.7331542866, 104.07769774079998], [59.96533204839997, 41.6180420096, 115.2351074293, 106.80029296639998], [118.83966062420001, 40.41656494079999, 183.12072757280004, 104.39727784960002], [183.42114259569996, 63.545715353599974, 263.9227294856, 109.50372316160002], [112.20471190079996, 22.01727293440001, 165.2828369262, 60.28295900159998]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048439.jpg", "text": "Describe the image content present in the specified rectangular area of . Please point out the objects and their coordinates.", "boxes_value": [[239.2187500032, 100.6278686267, 472.9439697408, 207.8713378845]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048439_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Please point out the objects and their coordinates.", "boxes_value": [[59.2187500032, 27.627868626700007, 292.9439697408, 134.8713378845]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048439.jpg", "text": "Describe the image content present in the specified rectangular area of . Please point out the objects and their coordinates. For your reference, objects involved in this region include five people.", "boxes_value": [[239.2187500032, 100.6278686267, 472.9439697408, 207.8713378845], [444.2911376896, 141.1511840917, 472.9439697408, 178.39984130390002], [369.793823232, 100.6278686267, 393.5347290112, 148.1097412203], [294.0684814336, 112.49835207429999, 325.995910656, 161.6174926658], [239.2187500032, 152.6123046593, 279.742065408, 207.8713378845], [229.379272448, 128.7628173967, 449.0992431616, 667.2724609212]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048439_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Please point out the objects and their coordinates. For your reference, objects involved in this region include five people.", "boxes_value": [[59.2187500032, 27.627868626700007, 292.9439697408, 134.8713378845], [264.2911376896, 68.1511840917, 292.9439697408, 105.39984130390002], [189.79382323200002, 27.627868626700007, 213.5347290112, 75.10974122030001], [114.06848143360003, 39.49835207429999, 145.99591065599998, 88.6174926658], [59.2187500032, 79.61230465930001, 99.74206540799997, 134.8713378845], [49.379272447999995, 55.7628173967, 269.0992431616, 161]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048440.jpg", "text": "What's inside the area of the provided graphic ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[2.098327626, 0.5579223552, 484.19580074910004, 302.8650512896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048440_crop.jpg", "text": "What's inside the area of the provided graphic ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[2.098327626, 0.5579223552, 484.19580074910004, 302.8650512896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048440.jpg", "text": "What's inside the area of the provided graphic ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two cabinets, a power outlet, a handbag, a bottle, an extractor, and a gas stove.", "boxes_value": [[2.098327626, 0.5579223552, 484.19580074910004, 302.8650512896], [2.098327626, 1.5260009984, 349.6343994104, 158.8417358336], [350.60253907, 0.5579223552, 484.19580074910004, 83.8117065216], [202.43780514850002, 178.3628539904, 221.5813598483, 211.5824585216], [71.23059078749999, 236.876342784, 191.4431152527, 302.8650512896], [227.34240720309998, 220.114746112, 255.2446289139, 284.5999145472], [347.6319580178, 68.8227538944, 497.0638427836, 117.1865844736], [327.1703490984, 230.4301147648, 531.7866210632, 285.6145019392]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00048440_crop.jpg", "text": "What's inside the area of the provided graphic ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two cabinets, a power outlet, a handbag, a bottle, an extractor, and a gas stove.", "boxes_value": [[2.098327626, 0.5579223552, 484.19580074910004, 302.8650512896], [2.098327626, 1.5260009984, 349.6343994104, 158.8417358336], [350.60253907, 0.5579223552, 484.19580074910004, 83.8117065216], [202.43780514850002, 178.3628539904, 221.5813598483, 211.5824585216], [71.23059078749999, 236.876342784, 191.4431152527, 302.8650512896], [227.34240720309998, 220.114746112, 255.2446289139, 284.5999145472], [347.6319580178, 68.8227538944, 497.0638427836, 117.1865844736], [327.1703490984, 230.4301147648, 531.7866210632, 285.6145019392]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00048442.jpg", "text": "Share some details about the objects or environment within the bounding box in . Please point out the objects and their coordinates.", "boxes_value": [[46.3123779072, 251.50671389559997, 227.5430908416, 441.7297363606]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048442_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Please point out the objects and their coordinates.", "boxes_value": [[45.3123779072, 48.50671389559997, 226.5430908416, 238.7297363606]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048442.jpg", "text": "Share some details about the objects or environment within the bounding box in . Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, a helmet, and two gloves.", "boxes_value": [[46.3123779072, 251.50671389559997, 227.5430908416, 441.7297363606], [98.3687133696, 249.3192748886, 250.77947996160003, 572.5035400364001], [46.3123779072, 273.1932373334, 89.3798828032, 392.58825685560004], [121.3015136768, 251.50671389559997, 160.4254150144, 291.979736323], [210.3421020672, 317.9498291194, 227.5430908416, 337.5118408332], [112.5323486208, 415.4222412462, 130.0706787328, 441.7297363606]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048442_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, a helmet, and two gloves.", "boxes_value": [[45.3123779072, 48.50671389559997, 226.5430908416, 238.7297363606], [97.3687133696, 46.319274888600006, 249.77947996160003, 286], [45.3123779072, 70.19323733340002, 88.3798828032, 189.58825685560004], [120.3015136768, 48.50671389559997, 159.4254150144, 88.979736323], [209.3421020672, 114.94982911940002, 226.5430908416, 134.51184083319998], [111.5323486208, 212.42224124619997, 129.0706787328, 238.7297363606]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048443.jpg", "text": "In the image , please describe the bounding box . Provide the coordinates for all objects that you mention.", "boxes_value": [[0, 248.902587888, 259.29119872, 479.20574952000004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048443_crop.jpg", "text": "In the image , please describe the bounding box . Provide the coordinates for all objects that you mention.", "boxes_value": [[0, 57.902587888, 259.29119872, 288.20574952000004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048443.jpg", "text": "In the image , please describe the bounding box . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a potted plant, a couch, two pictures, a desk, and a chair.", "boxes_value": [[0, 248.902587888, 259.29119872, 479.20574952000004], [155.402099584, 240.927734352, 282.425231936, 299.233459488], [0, 248.902587888, 102.342285184, 372.058532736], [43.609313983999996, 314.10797116799995, 86.227600128, 380.985900864], [0.047851584, 351.912841776, 133.241760256, 479.20574952000004], [107.108764672, 366.24383544, 431.66345216, 480.048706032], [201.326416, 268.71270753600004, 259.29119872, 316.44836424]], "boxes_seq": [[0], [0], [1], [2], [3, 6], [4], [5]]}, {"image_path": "objects365_v1_00048443_crop.jpg", "text": "In the image , please describe the bounding box . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a potted plant, a couch, two pictures, a desk, and a chair.", "boxes_value": [[0, 57.902587888, 259.29119872, 288.20574952000004], [155.402099584, 49.92773435199999, 282.425231936, 108.233459488], [0, 57.902587888, 102.342285184, 181.05853273600002], [43.609313983999996, 123.10797116799995, 86.227600128, 189.98590086399997], [0.047851584, 160.912841776, 133.241760256, 288.20574952000004], [107.108764672, 175.24383544, 324, 289], [201.326416, 77.71270753600004, 259.29119872, 125.44836423999999]], "boxes_seq": [[0], [0], [1], [2], [3, 6], [4], [5]]}, {"image_path": "objects365_v1_00048444.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Give coordinates for the items you reference.", "boxes_value": [[208.0542602496, 0.6280517632, 481.0201416192, 384.656983296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048444_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Give coordinates for the items you reference.", "boxes_value": [[69.05426024959999, 0.6280517632, 342.0201416192, 384.656983296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048444.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Give coordinates for the items you reference. For your reference, objects involved in this region include two people, two gloves, a helmet, a sneakers, and a hockey stick.", "boxes_value": [[208.0542602496, 0.6280517632, 481.0201416192, 384.656983296], [304.6690673664, 86.5079345664, 495.51330570240003, 511.1362914816], [208.0542602496, 0.6280517632, 407.247802752, 346.5332031488], [209.5069579776, 72.9912109568, 258.224853504, 149.0386962944], [388.3372803072, 86.6560058368, 481.0201416192, 166.2681884672], [234.242919936, 285.1009521664, 271.7637939456, 344.9901123072], [444.7292480256, 315.6895141376, 496.7346191616, 416.754455552], [215.99272335359998, 96.3104551936, 287.386225152, 384.656983296]], "boxes_seq": [[0], [0], [1, 2], [3, 6], [4], [5], [7]]}, {"image_path": "objects365_v1_00048444_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Give coordinates for the items you reference. For your reference, objects involved in this region include two people, two gloves, a helmet, a sneakers, and a hockey stick.", "boxes_value": [[69.05426024959999, 0.6280517632, 342.0201416192, 384.656983296], [165.6690673664, 86.5079345664, 356.51330570240003, 480], [69.05426024959999, 0.6280517632, 268.247802752, 346.5332031488], [70.5069579776, 72.9912109568, 119.22485350400001, 149.0386962944], [249.3372803072, 86.6560058368, 342.0201416192, 166.2681884672], [95.24291993599999, 285.1009521664, 132.7637939456, 344.9901123072], [305.7292480256, 315.6895141376, 357.7346191616, 416.754455552], [76.99272335359998, 96.3104551936, 148.386225152, 384.656983296]], "boxes_seq": [[0], [0], [1, 2], [3, 6], [4], [5], [7]]}, {"image_path": "objects365_v1_00048446.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Please point out the objects and their coordinates.", "boxes_value": [[350.46728517120005, 155.5451660288, 702.0601806336, 324.3303222784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048446_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Please point out the objects and their coordinates.", "boxes_value": [[88.46728517120005, 42.5451660288, 440.0601806336, 211.3303222784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048446.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include three people, a sneakers, and a hat.", "boxes_value": [[350.46728517120005, 155.5451660288, 702.0601806336, 324.3303222784], [434.3302001664, 124.000427264, 487.80761717760004, 302.2615356416], [395.0533446912, 157.916748032, 442.9595947008, 272.22790528], [350.46728517120005, 155.5451660288, 379.8751220736, 248.0375366144], [667.773193344, 309.2460937728, 688.2340088064, 324.3303222784], [673.900390656, 179.655883776, 702.0601806336, 192.307373056]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048446_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include three people, a sneakers, and a hat.", "boxes_value": [[88.46728517120005, 42.5451660288, 440.0601806336, 211.3303222784], [172.3302001664, 11.000427263999995, 225.80761717760004, 189.2615356416], [133.05334469119998, 44.91674803199999, 180.95959470079998, 159.22790528000002], [88.46728517120005, 42.5451660288, 117.87512207359998, 135.0375366144], [405.773193344, 196.2460937728, 426.23400880639997, 211.3303222784], [411.900390656, 66.655883776, 440.0601806336, 79.30737305599999]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048447.jpg", "text": "Can you elaborate on the content of the bounding box in ? Specify the location of each mentioned object.", "boxes_value": [[410.122436484, 439.3697509888, 661.937988252, 490.1307373056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048447_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Specify the location of each mentioned object.", "boxes_value": [[63.12243648399999, 13.369750988799979, 314.93798825199997, 64.13073730560001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048447.jpg", "text": "Can you elaborate on the content of the bounding box in ? Specify the location of each mentioned object. For your reference, objects involved in this region include five people.", "boxes_value": [[410.122436484, 439.3697509888, 661.937988252, 490.1307373056], [410.122436484, 452.3112793088, 419.6065673288, 490.1307373056], [503.86328128559995, 439.6762695168, 519.0313721160001, 478.639831552], [570.7030029712, 439.3697509888, 585.0374756223999, 468.9022216704], [647.1796875632, 446.1237182464, 661.937988252, 487.99285888], [512.1545409512, 439.8473510912, 524.01110844, 477.7883300864]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048447_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Specify the location of each mentioned object. For your reference, objects involved in this region include five people.", "boxes_value": [[63.12243648399999, 13.369750988799979, 314.93798825199997, 64.13073730560001], [63.12243648399999, 26.31127930880001, 72.6065673288, 64.13073730560001], [156.86328128559995, 13.676269516800005, 172.03137211600006, 52.639831551999976], [223.70300297120002, 13.369750988799979, 238.03747562239994, 42.902221670400024], [300.1796875632, 20.123718246400017, 314.93798825199997, 61.99285888000003], [165.15454095120003, 13.847351091199982, 177.01110844000004, 51.78833008639998]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048450.jpg", "text": "Regarding the image , what's going on in the section ? Please mention the objects and their locations.", "boxes_value": [[106.62939454430001, 104.8447927296, 447.62871797930006, 375.0793033728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048450_crop.jpg", "text": "Regarding the image , what's going on in the section ? Please mention the objects and their locations.", "boxes_value": [[85.62939454430001, 67.8447927296, 426.62871797930006, 338.0793033728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048450.jpg", "text": "Regarding the image , what's going on in the section ? Please mention the objects and their locations. For your reference, objects involved in this region include three desks, a handbag, a glasses, and a bottle.", "boxes_value": [[106.62939454430001, 104.8447927296, 447.62871797930006, 375.0793033728], [45.758300784, 264.9478149632, 358.34704588139994, 476.9545898496], [237.4158325489, 244.262023936, 296.4481811658, 272.0001220608], [60.8308105569, 237.6596069376, 198.8884277671, 278.18621824], [281.5513641273, 251.5393677824, 365.0426392676, 375.0793033728], [422.26023099879995, 104.8447927296, 447.62871797930006, 118.1217578496], [106.62939454430001, 266.2492675584, 122.3438110075, 290.4252319232]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048450_crop.jpg", "text": "Regarding the image , what's going on in the section ? Please mention the objects and their locations. For your reference, objects involved in this region include three desks, a handbag, a glasses, and a bottle.", "boxes_value": [[85.62939454430001, 67.8447927296, 426.62871797930006, 338.0793033728], [24.758300784, 227.94781496320002, 337.34704588139994, 405], [216.4158325489, 207.262023936, 275.4481811658, 235.00012206079998], [39.8308105569, 200.6596069376, 177.8884277671, 241.18621824000002], [260.5513641273, 214.5393677824, 344.0426392676, 338.0793033728], [401.26023099879995, 67.8447927296, 426.62871797930006, 81.1217578496], [85.62939454430001, 229.2492675584, 101.3438110075, 253.4252319232]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048453.jpg", "text": "Detail the chosen region in the depicted scene . Please point out the objects and their coordinates.", "boxes_value": [[0, 241.7192382976, 201.12896726259999, 513.9711913984]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048453_crop.jpg", "text": "Detail the chosen region in the depicted scene . Please point out the objects and their coordinates.", "boxes_value": [[0, 68.71923829759999, 201.12896726259999, 339]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048453.jpg", "text": "Detail the chosen region in the depicted scene . Please point out the objects and their coordinates. For your reference, objects involved in this region include a desk, two people, a bottle, and a cup.", "boxes_value": [[0, 241.7192382976, 201.12896726259999, 513.9711913984], [0, 369.6536254976, 406.8160400706, 510.4255371264], [50.0955200314, 28.4213867008, 370.4320068331, 438.6984252928], [40.2528076461, 241.7192382976, 201.12896726259999, 513.9711913984], [0, 452.7223510528, 43.969360342099996, 512.0954589696], [50.0955200314, 28.4213867008, 370.4320068331, 438.6984252928]], "boxes_seq": [[0], [0], [1], [2, 5], [3], [4]]}, {"image_path": "objects365_v1_00048453_crop.jpg", "text": "Detail the chosen region in the depicted scene . Please point out the objects and their coordinates. For your reference, objects involved in this region include a desk, two people, a bottle, and a cup.", "boxes_value": [[0, 68.71923829759999, 201.12896726259999, 339], [0, 196.6536254976, 251, 337.4255371264], [50.0955200314, 0, 251, 265.6984252928], [40.2528076461, 68.71923829759999, 201.12896726259999, 339], [0, 279.7223510528, 43.969360342099996, 339], [50.0955200314, 0, 251, 265.6984252928]], "boxes_seq": [[0], [0], [1], [2, 5], [3], [4]]}, {"image_path": "objects365_v1_00048454.jpg", "text": "Could you tell me more about the area in the snapshot ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[393.3139648332, 52.6941528576, 471.40502930490004, 439.2373657088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048454_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[20.313964833199975, 52.6941528576, 98.40502930490004, 439.2373657088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048454.jpg", "text": "Could you tell me more about the area in the snapshot ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two lamps, a person, two cups, and a bottle.", "boxes_value": [[393.3139648332, 52.6941528576, 471.40502930490004, 439.2373657088], [415.5856933415, 52.6941528576, 462.52233885100003, 91.5844726784], [446.5050048529, 293.943420416, 503.44311523799996, 342.68621824], [326.44030761, 209.8007812608, 439.0372314737, 394.268005376], [450.63488770270004, 353.8676758016, 471.40502930490004, 396.6590576128], [430.61560057959997, 366.6300048896, 452.6368408186, 402.4146118144], [393.3139648332, 375.2225341952, 444.4224853302, 439.2373657088]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048454_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two lamps, a person, two cups, and a bottle.", "boxes_value": [[20.313964833199975, 52.6941528576, 98.40502930490004, 439.2373657088], [42.58569334150002, 52.6941528576, 89.52233885100003, 91.5844726784], [73.50500485290002, 293.943420416, 117, 342.68621824], [0, 209.8007812608, 66.03723147369999, 394.268005376], [77.63488770270004, 353.8676758016, 98.40502930490004, 396.6590576128], [57.61560057959997, 366.6300048896, 79.63684081859998, 402.4146118144], [20.313964833199975, 375.2225341952, 71.4224853302, 439.2373657088]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048457.jpg", "text": "What information can you give me about the coordinates in image ? Please mention the objects and their locations.", "boxes_value": [[294.56994628079997, 246.0414428672, 415.93786619040003, 448.9844360192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048457_crop.jpg", "text": "What information can you give me about the coordinates in image ? Please mention the objects and their locations.", "boxes_value": [[30.569946280799968, 51.041442867200004, 151.93786619040003, 253.98443601920002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048457.jpg", "text": "What information can you give me about the coordinates in image ? Please mention the objects and their locations. For your reference, objects involved in this region include two drums, two people, a microphone, and a tripod.", "boxes_value": [[294.56994628079997, 246.0414428672, 415.93786619040003, 448.9844360192], [315.27166750199996, 246.0414428672, 369.27612306360004, 293.7927246336], [246.4870605264, 269.9171142656, 337.4418945204, 360.8719482368], [294.56994628079997, 277.21246336, 415.93786619040003, 448.9844360192], [325.5717163176, 154.2880248832, 429.84497067480004, 336.5225829888], [310.1248168884, 306.8597412352, 332.443054188, 323.0912475648], [366.4520263944, 250.65997312, 408.14135744400005, 392.8943481344]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048457_crop.jpg", "text": "What information can you give me about the coordinates in image ? Please mention the objects and their locations. For your reference, objects involved in this region include two drums, two people, a microphone, and a tripod.", "boxes_value": [[30.569946280799968, 51.041442867200004, 151.93786619040003, 253.98443601920002], [51.27166750199996, 51.041442867200004, 105.27612306360004, 98.79272463360002], [0, 74.91711426559999, 73.44189452040001, 165.8719482368], [30.569946280799968, 82.21246336000002, 151.93786619040003, 253.98443601920002], [61.57171631760002, 0, 165.84497067480004, 141.52258298880002], [46.124816888400005, 111.85974123519998, 68.44305418800002, 128.09124756480003], [102.45202639439998, 55.65997311999999, 144.14135744400005, 197.8943481344]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048459.jpg", "text": "In the provided image , please explain the content within the region . Please mention the objects and their locations.", "boxes_value": [[241.99688723580002, 437.1149292032, 458.7827148248, 511.6782226432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048459_crop.jpg", "text": "In the provided image , please explain the content within the region . Please mention the objects and their locations.", "boxes_value": [[54.99688723580002, 19.114929203200006, 271.7827148248, 93.6782226432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048459.jpg", "text": "In the provided image , please explain the content within the region . Please mention the objects and their locations. For your reference, objects involved in this region include five people.", "boxes_value": [[241.99688723580002, 437.1149292032, 458.7827148248, 511.6782226432], [431.1666259922, 444.0189209088, 458.7827148248, 511.6782226432], [430.0759277198, 445.3997192192, 462.23474121099997, 511.6782226432], [351.7706299165, 446.0901489152, 378.69616702300004, 511.405212416], [322.0833740457, 437.1149292032, 385.6002197271, 511.6782226432], [241.99688723580002, 451.6133423104, 305.5137329172, 511.6782226432]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048459_crop.jpg", "text": "In the provided image , please explain the content within the region . Please mention the objects and their locations. For your reference, objects involved in this region include five people.", "boxes_value": [[54.99688723580002, 19.114929203200006, 271.7827148248, 93.6782226432], [244.1666259922, 26.0189209088, 271.7827148248, 93.6782226432], [243.0759277198, 27.399719219199994, 275.23474121099997, 93.6782226432], [164.7706299165, 28.090148915200018, 191.69616702300004, 93.40521241599998], [135.0833740457, 19.114929203200006, 198.6002197271, 93.6782226432], [54.99688723580002, 33.613342310400014, 118.5137329172, 93.6782226432]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048461.jpg", "text": "Within the input image , what can be found in the region defined by ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[48.5773315126, 275.6305542144, 682.7235107403001, 398.7462158336]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048461_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[48.5773315126, 31.63055421439998, 682.7235107403001, 154.7462158336]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048461.jpg", "text": "Within the input image , what can be found in the region defined by ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three desks, a trash bin can, a van, and two cars.", "boxes_value": [[48.5773315126, 275.6305542144, 682.7235107403001, 398.7462158336], [225.1988525465, 283.9344482304, 311.6684570325, 355.3447876096], [288.35083006589997, 283.9344482304, 340.3297119425, 327.6550903296], [48.5773315126, 309.2655639552, 190.36968991369997, 398.7462158336], [247.8839111403, 315.977539072, 272.7029419007, 356.2557983232], [376.409427017, 245.0092421632, 599.7587800387, 344.3807763456], [579.3823229832, 262.6914066944, 627.3587838115, 313.4491116032], [671.1904297025, 275.6305542144, 682.7235107403001, 305.503356928]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00048461_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three desks, a trash bin can, a van, and two cars.", "boxes_value": [[48.5773315126, 31.63055421439998, 682.7235107403001, 154.7462158336], [225.1988525465, 39.934448230399994, 311.6684570325, 111.34478760960002], [288.35083006589997, 39.934448230399994, 340.3297119425, 83.6550903296], [48.5773315126, 65.26556395519998, 190.36968991369997, 154.7462158336], [247.8839111403, 71.97753907200001, 272.7029419007, 112.2557983232], [376.409427017, 1.009242163200014, 599.7587800387, 100.3807763456], [579.3823229832, 18.691406694399973, 627.3587838115, 69.44911160319998], [671.1904297025, 31.63055421439998, 682.7235107403001, 61.50335692800002]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00048462.jpg", "text": "Offer a thorough description of the area within the illustration . Remember to mention the objects and their corresponding locations.", "boxes_value": [[103.7039794944, 174.9819336192, 455.12634278400003, 457.4368286208]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048462_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Remember to mention the objects and their corresponding locations.", "boxes_value": [[88.7039794944, 70.98193361919999, 440.12634278400003, 353.4368286208]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048462.jpg", "text": "Offer a thorough description of the area within the illustration . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, two gloves, two sneakers, and two hockey sticks.", "boxes_value": [[103.7039794944, 174.9819336192, 455.12634278400003, 457.4368286208], [324.73791505919996, 110.67425536, 396.9573974784, 247.19030763520004], [103.32666017279999, 123.3784790016, 249.0474853632, 460.1554565632], [216.685913088, 293.8555908096, 249.2635497984, 334.7509155328], [103.7039794944, 424.8591919104, 126.5776366848, 453.971130368], [198.66424558079999, 426.938598656, 246.4909668096, 457.4368286208], [374.72204590079997, 178.1010742272, 397.5957030912, 196.8158569472], [374.0289306624, 174.9819336192, 455.12634278400003, 230.43322752], [193.1191406592, 279.6461791744, 418.38989260799997, 446.6931152384]], "boxes_seq": [[0], [0], [1, 2], [3, 6], [4, 5], [7, 8]]}, {"image_path": "objects365_v1_00048462_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, two gloves, two sneakers, and two hockey sticks.", "boxes_value": [[88.7039794944, 70.98193361919999, 440.12634278400003, 353.4368286208], [309.73791505919996, 6.674255360000004, 381.9573974784, 143.19030763520004], [88.32666017279999, 19.3784790016, 234.0474853632, 356.1554565632], [201.685913088, 189.85559080960002, 234.2635497984, 230.75091553279998], [88.7039794944, 320.8591919104, 111.5776366848, 349.971130368], [183.66424558079999, 322.938598656, 231.4909668096, 353.4368286208], [359.72204590079997, 74.1010742272, 382.5957030912, 92.81585694719999], [359.0289306624, 70.98193361919999, 440.12634278400003, 126.43322752], [178.1191406592, 175.6461791744, 403.38989260799997, 342.6931152384]], "boxes_seq": [[0], [0], [1, 2], [3, 6], [4, 5], [7, 8]]}, {"image_path": "objects365_v1_00048463.jpg", "text": "Share some details about the objects or environment within the bounding box in . Provide the coordinates for all objects that you mention.", "boxes_value": [[16.8525390848, 206.1138915786, 229.7103271424, 358.356811536]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048463_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Provide the coordinates for all objects that you mention.", "boxes_value": [[16.8525390848, 38.113891578600004, 229.7103271424, 190.356811536]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048463.jpg", "text": "Share some details about the objects or environment within the bounding box in . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a baseball bat, a person, two bracelets, and two gloves.", "boxes_value": [[16.8525390848, 206.1138915786, 229.7103271424, 358.356811536], [16.8525390848, 261.7911987429, 171.0983276544, 358.356811536], [102.7156982272, 92.3873291211, 490.850402816, 611.9584961166], [187.7314452992, 206.1138915786, 224.1130981376, 236.5485229152], [195.0777587712, 258.937255839, 229.7103271424, 298.8171386427], [170.6122436608, 215.1925659213, 213.1932372992, 255.9993286101], [146.6604614144, 249.78961179119997, 198.1124877824, 286.160888685]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00048463_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a baseball bat, a person, two bracelets, and two gloves.", "boxes_value": [[16.8525390848, 38.113891578600004, 229.7103271424, 190.356811536], [16.8525390848, 93.79119874290001, 171.0983276544, 190.356811536], [102.7156982272, 0, 282, 228], [187.7314452992, 38.113891578600004, 224.1130981376, 68.54852291520001], [195.0777587712, 90.93725583899999, 229.7103271424, 130.8171386427], [170.6122436608, 47.1925659213, 213.1932372992, 87.99932861010001], [146.6604614144, 81.78961179119997, 198.1124877824, 118.16088868499997]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00048464.jpg", "text": "Describe the visual elements within the selected area of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[211.6943359256, 222.799804672, 472.1691894436, 385.0078125056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048464_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[65.6943359256, 40.79980467199999, 326.1691894436, 203.0078125056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048464.jpg", "text": "Describe the visual elements within the selected area of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two chairs, two desks, a stool, and a person.", "boxes_value": [[211.6943359256, 222.799804672, 472.1691894436, 385.0078125056], [211.6943359256, 222.799804672, 319.66717528280003, 385.0078125056], [223.6360473942, 255.1419067392, 352.0092773563, 368.5879516672], [316.6817016888, 299.9232787968, 366.9362793066, 357.6414184448], [404.54016113060004, 237.6591186432, 472.1691894436, 341.113159168], [430.12951658279997, 293.2244873216, 495.93066402849996, 368.5302734336], [323.2512817116, 68.9637451264, 442.62951662259997, 413.8341675008]], "boxes_seq": [[0], [0], [1, 4], [2, 5], [3], [6]]}, {"image_path": "objects365_v1_00048464_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two chairs, two desks, a stool, and a person.", "boxes_value": [[65.6943359256, 40.79980467199999, 326.1691894436, 203.0078125056], [65.6943359256, 40.79980467199999, 173.66717528280003, 203.0078125056], [77.6360473942, 73.14190673920001, 206.0092773563, 186.5879516672], [170.6817016888, 117.92327879679999, 220.9362793066, 175.64141844480002], [258.54016113060004, 55.6591186432, 326.1691894436, 159.11315916799998], [284.12951658279997, 111.22448732160001, 349.93066402849996, 186.53027343359997], [177.25128171159997, 0, 296.62951662259997, 231.8341675008]], "boxes_seq": [[0], [0], [1, 4], [2, 5], [3], [6]]}, {"image_path": "objects365_v1_00048465.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Give coordinates for the items you reference.", "boxes_value": [[107.90698245120001, 186.2546997248, 322.5285644544, 313.7452392448]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048465_crop.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Give coordinates for the items you reference.", "boxes_value": [[53.90698245120001, 32.25469972479999, 268.5285644544, 159.74523924480002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048465.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Give coordinates for the items you reference. For your reference, objects involved in this region include four people, and four helmets.", "boxes_value": [[107.90698245120001, 186.2546997248, 322.5285644544, 313.7452392448], [81.7582397184, 252.8421020672, 228.73779294719998, 353.1292114432], [216.9631958016, 186.2546997248, 322.5285644544, 313.7452392448], [96.5200195584, 278.943420416, 119.4891967488, 307.480896], [145.242492672, 287.2958373888, 174.12799073280001, 313.0491943424], [107.90698245120001, 252.6832275456, 140.4212036352, 280.9564209152], [96.5977172736, 278.6002807808, 117.80261230079999, 305.2242431488], [147.2539062528, 286.8466797056, 173.4066162432, 311.1145019392], [235.13647457279998, 186.7123412992, 268.59313966080003, 211.9226684416]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6, 7, 8]]}, {"image_path": "objects365_v1_00048465_crop.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Give coordinates for the items you reference. For your reference, objects involved in this region include four people, and four helmets.", "boxes_value": [[53.90698245120001, 32.25469972479999, 268.5285644544, 159.74523924480002], [27.758239718400006, 98.84210206719999, 174.73779294719998, 191], [162.9631958016, 32.25469972479999, 268.5285644544, 159.74523924480002], [42.520019558399994, 124.94342041599998, 65.4891967488, 153.48089599999997], [91.242492672, 133.2958373888, 120.12799073280001, 159.04919434240003], [53.90698245120001, 98.6832275456, 86.42120363519999, 126.9564209152], [42.5977172736, 124.6002807808, 63.80261230079999, 151.22424314879999], [93.2539062528, 132.84667970560002, 119.4066162432, 157.1145019392], [181.13647457279998, 32.712341299200006, 214.59313966080003, 57.922668441599996]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6, 7, 8]]}, {"image_path": "objects365_v1_00048466.jpg", "text": "Describe the selected rectangular area in the photo . Specify the location of each mentioned object.", "boxes_value": [[168.5036010708, 100.36413575, 329.7154540752, 310.33758545]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048466_crop.jpg", "text": "Describe the selected rectangular area in the photo . Specify the location of each mentioned object.", "boxes_value": [[40.50360107079999, 53.36413575, 201.7154540752, 263.33758545]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048466.jpg", "text": "Describe the selected rectangular area in the photo . Specify the location of each mentioned object. For your reference, objects involved in this region include two benches, a desk, a vase, and a plate.", "boxes_value": [[168.5036010708, 100.36413575, 329.7154540752, 310.33758545], [24.2090454312, 102.35443115, 261.051147434, 369.05059815000004], [168.5036010708, 100.36413575, 329.7154540752, 310.33758545], [114.93603518040001, 139.17736815, 475.09448244199996, 405.06616210000004], [266.2197875776, 110.29248045, 309.8350219832, 170.6828003], [282.9771728436, 156.4992676, 372.4968872288, 174.57818605]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048466_crop.jpg", "text": "Describe the selected rectangular area in the photo . Specify the location of each mentioned object. For your reference, objects involved in this region include two benches, a desk, a vase, and a plate.", "boxes_value": [[40.50360107079999, 53.36413575, 201.7154540752, 263.33758545], [0, 55.354431149999996, 133.05114743399997, 315], [40.50360107079999, 53.36413575, 201.7154540752, 263.33758545], [0, 92.17736815, 242, 315], [138.21978757760002, 63.29248045, 181.83502198320002, 123.6828003], [154.9771728436, 109.4992676, 242, 127.57818605]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048467.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Please mention the objects and their locations.", "boxes_value": [[1.0451049984, 223.9598073856, 158.1065063424, 511.9508056576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048467_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Please mention the objects and their locations.", "boxes_value": [[1.0451049984, 72.95980738559999, 158.1065063424, 360.9508056576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048467.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Please mention the objects and their locations. For your reference, objects involved in this region include a bed, two stools, a desk, a person, a handbag, and a trash bin can.", "boxes_value": [[1.0451049984, 223.9598073856, 158.1065063424, 511.9508056576], [48.7100830208, 278.8803100672, 411.5152587776, 504.9364623872], [1.0451049984, 396.9276123136, 118.1714477568, 511.3716430848], [102.9281616384, 497.8746948096, 158.1065063424, 511.9508056576], [0.378967296, 226.6944580096, 178.1596069376, 310.4761352704], [59.9704589824, 117.5470581248, 152.0822753792, 317.2029419008], [47.688842752, 223.9598073856, 76.1591988224, 276.1778564608], [25.921691904, 299.5953979392, 56.002197248, 340.8486938624]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00048467_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Please mention the objects and their locations. For your reference, objects involved in this region include a bed, two stools, a desk, a person, a handbag, and a trash bin can.", "boxes_value": [[1.0451049984, 72.95980738559999, 158.1065063424, 360.9508056576], [48.7100830208, 127.88031006720001, 197, 353.9364623872], [1.0451049984, 245.92761231359998, 118.1714477568, 360.3716430848], [102.9281616384, 346.8746948096, 158.1065063424, 360.9508056576], [0.378967296, 75.6944580096, 178.1596069376, 159.4761352704], [59.9704589824, 0, 152.0822753792, 166.2029419008], [47.688842752, 72.95980738559999, 76.1591988224, 125.1778564608], [25.921691904, 148.59539793919998, 56.002197248, 189.8486938624]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00048468.jpg", "text": "I am interested in the region of the image ; please describe it. Please point out the objects and their coordinates.", "boxes_value": [[120.4386596864, 246.5438232724, 499.6810302976, 383.1339111666]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048468_crop.jpg", "text": "I am interested in the region of the image ; please describe it. Please point out the objects and their coordinates.", "boxes_value": [[95.4386596864, 34.543823272400004, 474.6810302976, 171.1339111666]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048468.jpg", "text": "I am interested in the region of the image ; please describe it. Please point out the objects and their coordinates. For your reference, objects involved in this region include five lamps.", "boxes_value": [[120.4386596864, 246.5438232724, 499.6810302976, 383.1339111666], [416.9207153152, 246.5438232724, 453.2988891648, 323.8474731629], [467.8501586944, 248.3627319102, 499.6810302976, 322.02856445680004], [349.6210937344, 312.0245361456, 369.6290893312, 353.85937498420003], [311.4240112128, 344.7648925822, 331.8227538944, 383.1339111666], [120.4386596864, 348.40270999439997, 138.62774656, 382.9619140769]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048468_crop.jpg", "text": "I am interested in the region of the image ; please describe it. Please point out the objects and their coordinates. For your reference, objects involved in this region include five lamps.", "boxes_value": [[95.4386596864, 34.543823272400004, 474.6810302976, 171.1339111666], [391.9207153152, 34.543823272400004, 428.2988891648, 111.84747316289997], [442.8501586944, 36.36273191020001, 474.6810302976, 110.02856445680004], [324.6210937344, 100.0245361456, 344.6290893312, 141.85937498420003], [286.4240112128, 132.7648925822, 306.8227538944, 171.1339111666], [95.4386596864, 136.40270999439997, 113.62774655999999, 170.96191407689997]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048469.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for each element you describe.", "boxes_value": [[157.5649413982, 15.7520141824, 360.4471435702, 386.3685913088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048469_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for each element you describe.", "boxes_value": [[51.564941398200006, 15.7520141824, 254.4471435702, 386.3685913088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048469.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a lamp, two pictures, a cabinet, and three chairs.", "boxes_value": [[157.5649413982, 15.7520141824, 360.4471435702, 386.3685913088], [208.8598022581, 18.4117431808, 277.25292971790003, 150.0684814336], [157.5649413982, 15.7520141824, 193.4713134632, 84.715087872], [178.7680664109, 116.9409179648, 208.0794067313, 150.0208740352], [159.976318386, 143.2694091776, 321.9492187759, 312.164184576], [175.6809692469, 217.3388061696, 355.25939938050004, 501.8844604416], [240.6524048093, 202.1383056896, 360.4471435702, 386.3685913088], [155.8264770468, 271.5057373184, 328.1369628677, 511.9688110592]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5, 6, 7]]}, {"image_path": "objects365_v1_00048469_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a lamp, two pictures, a cabinet, and three chairs.", "boxes_value": [[51.564941398200006, 15.7520141824, 254.4471435702, 386.3685913088], [102.85980225809999, 18.4117431808, 171.25292971790003, 150.0684814336], [51.564941398200006, 15.7520141824, 87.4713134632, 84.715087872], [72.76806641089999, 116.9409179648, 102.0794067313, 150.0208740352], [53.976318386, 143.2694091776, 215.94921877590002, 312.164184576], [69.68096924689999, 217.3388061696, 249.25939938050004, 479], [134.6524048093, 202.1383056896, 254.4471435702, 386.3685913088], [49.826477046799994, 271.5057373184, 222.1369628677, 479]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5, 6, 7]]}, {"image_path": "objects365_v1_00048471.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for all objects that you mention.", "boxes_value": [[0, 296.4655151616, 136.4652710146, 413.5602417152]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048471_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for all objects that you mention.", "boxes_value": [[0, 29.465515161600024, 136.4652710146, 146.5602417152]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048471.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two pillows, a couch, a bottle, and two wine glasses.", "boxes_value": [[0, 296.4655151616, 136.4652710146, 413.5602417152], [38.7993164306, 296.4655151616, 136.4652710146, 358.7329101312], [0, 303.2893676544, 14.915893579399999, 366.409729024], [0.005249013, 287.4523926016, 200.6987304388, 443.2841186304], [101.6489257674, 336.3406982656, 125.22125240899999, 413.5602417152], [115.0607910386, 361.538635264, 138.2266235152, 418.0308837888], [128.0661620766, 356.6616210944, 148.7935180798, 408.4320678912]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048471_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two pillows, a couch, a bottle, and two wine glasses.", "boxes_value": [[0, 29.465515161600024, 136.4652710146, 146.5602417152], [38.7993164306, 29.465515161600024, 136.4652710146, 91.73291013120001], [0, 36.289367654399996, 14.915893579399999, 99.409729024], [0.005249013, 20.452392601600025, 170, 175], [101.6489257674, 69.3406982656, 125.22125240899999, 146.5602417152], [115.0607910386, 94.53863526399999, 138.2266235152, 151.0308837888], [128.0661620766, 89.6616210944, 148.7935180798, 141.4320678912]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048472.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Specify the location of each mentioned object.", "boxes_value": [[377.68701171080005, 181.0488281088, 524.7540283055, 263.6099243008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048472_crop.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Specify the location of each mentioned object.", "boxes_value": [[37.68701171080005, 21.048828108799995, 184.75402830550001, 103.6099243008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048472.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Specify the location of each mentioned object. For your reference, objects involved in this region include two pictures, two people, and a hat.", "boxes_value": [[377.68701171080005, 181.0488281088, 524.7540283055, 263.6099243008], [481.36877441269996, 210.7550659072, 524.7540283055, 263.6099243008], [377.68701171080005, 181.0488281088, 440.39172360670005, 238.0054931456], [376.55871579100005, 188.5720825344, 452.0837402471, 445.3571167232], [426.2590331716, 208.5496216064, 521.761596649, 433.1756591616], [398.048095734, 188.8666381824, 434.59838865629996, 207.4758300672]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048472_crop.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Specify the location of each mentioned object. For your reference, objects involved in this region include two pictures, two people, and a hat.", "boxes_value": [[37.68701171080005, 21.048828108799995, 184.75402830550001, 103.6099243008], [141.36877441269996, 50.75506590719999, 184.75402830550001, 103.6099243008], [37.68701171080005, 21.048828108799995, 100.39172360670005, 78.00549314560001], [36.558715791000054, 28.57208253440001, 112.08374024710002, 124], [86.25903317159998, 48.549621606399995, 181.761596649, 124], [58.048095734000015, 28.86663818240001, 94.59838865629996, 47.47583006720001]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048475.jpg", "text": "Please detail the contents of the chosen region in the visual input . Specify the location of each mentioned object.", "boxes_value": [[297.2388916318, 83.2163696128, 393.5881347615, 154.404357888]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048475_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Specify the location of each mentioned object.", "boxes_value": [[24.238891631800016, 18.216369612799994, 120.58813476149999, 89.40435788799999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048475.jpg", "text": "Please detail the contents of the chosen region in the visual input . Specify the location of each mentioned object. For your reference, objects involved in this region include two people, a glasses, and two helmets.", "boxes_value": [[297.2388916318, 83.2163696128, 393.5881347615, 154.404357888], [247.5977172702, 81.6594848768, 419.2750243895, 464.2383422976], [272.0418091019, 118.0414428672, 428.9389648715, 471.62841794559995], [300.9210815733, 106.2296753152, 332.832885724, 116.9692993024], [297.2388916318, 83.2163696128, 336.82189941400003, 110.8323974656], [354.00524903139996, 120.6514281984, 393.5881347615, 154.404357888]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048475_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Specify the location of each mentioned object. For your reference, objects involved in this region include two people, a glasses, and two helmets.", "boxes_value": [[24.238891631800016, 18.216369612799994, 120.58813476149999, 89.40435788799999], [0, 16.659484876799993, 144, 107], [0, 53.041442867200004, 144, 107], [27.9210815733, 41.2296753152, 59.83288572399999, 51.9692993024], [24.238891631800016, 18.216369612799994, 63.82189941400003, 45.832397465599996], [81.00524903139996, 55.6514281984, 120.58813476149999, 89.40435788799999]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048476.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Provide the coordinates for each element you describe.", "boxes_value": [[60.936401356800005, 105.9881591808, 757.1883545088, 512.5415038976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048476_crop.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Provide the coordinates for each element you describe.", "boxes_value": [[60.936401356800005, 101.9881591808, 757.1883545088, 508]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048476.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two pictures, a person, and three stuffed toys.", "boxes_value": [[60.936401356800005, 105.9881591808, 757.1883545088, 512.5415038976], [711.3000488448, 105.9881591808, 757.1883545088, 432.0366211072], [686.5445556479999, 137.3854370304, 718.5456542976001, 399.431762688], [0, 222.5447387648, 106.9527588096, 511.9917602304], [489.6490478592, 204.44750976, 657.1910400768, 511.5230102528], [416.317504896, 208.5214843904, 497.7969970944, 487.07922365440004], [60.936401356800005, 109.363525376, 293.7326660352, 512.5415038976]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00048476_crop.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two pictures, a person, and three stuffed toys.", "boxes_value": [[60.936401356800005, 101.9881591808, 757.1883545088, 508], [711.3000488448, 101.9881591808, 757.1883545088, 428.0366211072], [686.5445556479999, 133.3854370304, 718.5456542976001, 395.431762688], [0, 218.5447387648, 106.9527588096, 507.9917602304], [489.6490478592, 200.44750976, 657.1910400768, 507.5230102528], [416.317504896, 204.5214843904, 497.7969970944, 483.07922365440004], [60.936401356800005, 105.363525376, 293.7326660352, 508]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00048479.jpg", "text": "Describe the visual elements within the selected area of the image . Please point out the objects and their coordinates.", "boxes_value": [[372.9712524288, 233.33160401450002, 449.5003662336, 497.2271728765]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048479_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Please point out the objects and their coordinates.", "boxes_value": [[19.971252428800028, 66.33160401450002, 96.5003662336, 330.2271728765]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048479.jpg", "text": "Describe the visual elements within the selected area of the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include three people, a bracelet, and a glasses.", "boxes_value": [[372.9712524288, 233.33160401450002, 449.5003662336, 497.2271728765], [393.8323364352, 86.5382080112, 512.4763183616, 682.5441894318], [294.9031372288, 198.25787356130002, 479.6519164928, 497.65478517500003], [372.9712524288, 473.2740478769, 387.0864258048, 497.2271728765], [377.2018432512, 233.33160401450002, 442.2405395456, 423.8729247933], [412.8385009664, 254.16204836970002, 449.5003662336, 278.228393528]], "boxes_seq": [[0], [0], [1, 2, 4], [3], [5]]}, {"image_path": "objects365_v1_00048479_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include three people, a bracelet, and a glasses.", "boxes_value": [[19.971252428800028, 66.33160401450002, 96.5003662336, 330.2271728765], [40.83233643519998, 0, 115, 396], [0, 31.257873561300016, 115, 330.65478517500003], [19.971252428800028, 306.2740478769, 34.0864258048, 330.2271728765], [24.20184325119999, 66.33160401450002, 89.24053954559997, 256.8729247933], [59.83850096639998, 87.16204836970002, 96.5003662336, 111.22839352800003]], "boxes_seq": [[0], [0], [1, 2, 4], [3], [5]]}, {"image_path": "objects365_v1_00048481.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for all objects that you mention.", "boxes_value": [[78.138244608, 580.8278808722, 464.0874023424, 625.0198974385]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048481_crop.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for all objects that you mention.", "boxes_value": [[78.138244608, 11.827880872200012, 464.0874023424, 56.01989743850004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048481.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include six people.", "boxes_value": [[78.138244608, 580.8278808722, 464.0874023424, 625.0198974385], [78.138244608, 585.6353759448, 113.55364992, 614.1599120811001], [88.2340698112, 580.8278808722, 100.413085952, 608.070434546], [179.0044555776, 583.6665039339, 200.4322509824, 682.770019557], [241.2499999744, 575.8293457137, 270.0541991936, 672.1678466877], [371.3816528384, 591.2845459189, 384.6632690176, 614.7929687305999], [448.94635008, 597.5268554954, 464.0874023424, 625.0198974385]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048481_crop.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include six people.", "boxes_value": [[78.138244608, 11.827880872200012, 464.0874023424, 56.01989743850004], [78.138244608, 16.63537594479999, 113.55364992, 45.15991208110006], [88.2340698112, 11.827880872200012, 100.413085952, 39.070434546], [179.0044555776, 14.666503933900003, 200.4322509824, 67], [241.2499999744, 6.829345713700036, 270.0541991936, 67], [371.3816528384, 22.284545918899994, 384.6632690176, 45.79296873059991], [448.94635008, 28.526855495400014, 464.0874023424, 56.01989743850004]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048482.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Specify the location of each mentioned object.", "boxes_value": [[532.6069335688001, 220.3843994112, 663.6126709209, 267.5993652224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048482_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Specify the location of each mentioned object.", "boxes_value": [[33.60693356880006, 12.384399411200008, 164.61267092089997, 59.599365222400024]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048482.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Specify the location of each mentioned object. For your reference, objects involved in this region include a cabinet, a faucet, a sink, two bottles, and a cup.", "boxes_value": [[532.6069335688001, 220.3843994112, 663.6126709209, 267.5993652224], [469.8529052481, 209.6542968832, 681.4689941666, 247.6976318464], [618.13183595, 180.4935302656, 678.6499023778, 267.3704223744], [532.6069335688001, 259.6385498112, 650.8339843421, 267.5993652224], [607.3725586011001, 230.5146484224, 630.9515381131, 272.2580566528], [638.8111571981, 220.3843994112, 663.6126709209, 258.9840088064], [576.8073730277, 230.4952392704, 593.5745849515, 257.7420043776]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048482_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Specify the location of each mentioned object. For your reference, objects involved in this region include a cabinet, a faucet, a sink, two bottles, and a cup.", "boxes_value": [[33.60693356880006, 12.384399411200008, 164.61267092089997, 59.599365222400024], [0, 1.6542968832000042, 182.46899416660005, 39.697631846399986], [119.13183594999998, 0, 179.6499023778, 59.37042237439999], [33.60693356880006, 51.638549811199994, 151.83398434210005, 59.599365222400024], [108.37255860110008, 22.5146484224, 131.9515381131, 64.25805665280001], [139.81115719809998, 12.384399411200008, 164.61267092089997, 50.98400880640003], [77.80737302770001, 22.495239270399992, 94.57458495150001, 49.74200437759998]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048484.jpg", "text": "Can you generate a description for the selected region in the image ? Give coordinates for the items you reference.", "boxes_value": [[301.2850341812, 175.2943115264, 484.7076415686, 332.9534301696]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048484_crop.jpg", "text": "Can you generate a description for the selected region in the image ? Give coordinates for the items you reference.", "boxes_value": [[46.28503418119999, 40.29431152640001, 229.70764156860002, 197.95343016959998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048484.jpg", "text": "Can you generate a description for the selected region in the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include a baseball, a baseball glove, two people, and a helmet.", "boxes_value": [[301.2850341812, 175.2943115264, 484.7076415686, 332.9534301696], [403.240356468, 222.2415771648, 435.91931148820004, 256.761596672], [417.14184572619996, 175.2943115264, 484.7076415686, 324.4208984576], [76.9487304866, 185.7333373952, 605.5242919686, 511.916748032], [41.971984871, 6.1455077888, 623.961425771, 511.0860595712], [301.2850341812, 235.16461184, 403.8439941634, 332.9534301696]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048484_crop.jpg", "text": "Can you generate a description for the selected region in the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include a baseball, a baseball glove, two people, and a helmet.", "boxes_value": [[46.28503418119999, 40.29431152640001, 229.70764156860002, 197.95343016959998], [148.24035646800002, 87.24157716479999, 180.91931148820004, 121.761596672], [162.14184572619996, 40.29431152640001, 229.70764156860002, 189.4208984576], [0, 50.73333739520001, 275, 237], [0, 0, 275, 237], [46.28503418119999, 100.16461183999999, 148.8439941634, 197.95343016959998]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048490.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Include the coordinates for each mentioned object.", "boxes_value": [[153.9680786467, 182.9380493312, 242.4565429574, 268.260864256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048490_crop.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Include the coordinates for each mentioned object.", "boxes_value": [[22.968078646700008, 21.938049331200006, 111.45654295739999, 107.26086425599999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048490.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a flower, a cabinet, two storage boxes, and a chair.", "boxes_value": [[153.9680786467, 182.9380493312, 242.4565429574, 268.260864256], [126.8215942503, 193.936584448, 212.6686401383, 285.2738647552], [0, 0.8597412352, 248.929443374, 246.7340698112], [153.9680786467, 195.4789428736, 232.3488769775, 223.3041381888], [195.1179809754, 182.9380493312, 234.7002563767, 199.3980102656], [166.6304321577, 234.1738891776, 242.4565429574, 268.260864256]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048490_crop.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a flower, a cabinet, two storage boxes, and a chair.", "boxes_value": [[22.968078646700008, 21.938049331200006, 111.45654295739999, 107.26086425599999], [0, 32.93658444799999, 81.66864013829999, 124.27386475520001], [0, 0, 117.92944337399999, 85.73406981119999], [22.968078646700008, 34.478942873600005, 101.3488769775, 62.30413818880001], [64.1179809754, 21.938049331200006, 103.7002563767, 38.39801026559999], [35.6304321577, 73.17388917759999, 111.45654295739999, 107.26086425599999]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048492.jpg", "text": "Describe the image content present in the specified rectangular area of . Provide the coordinates for each element you describe.", "boxes_value": [[182.58416747459998, 133.9853515776, 639.5718994407, 282.4215088128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048492_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Provide the coordinates for each element you describe.", "boxes_value": [[114.58416747459998, 37.9853515776, 571, 186.42150881280003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048492.jpg", "text": "Describe the image content present in the specified rectangular area of . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a flower, a picture, two cabinets, and two bottles.", "boxes_value": [[182.58416747459998, 133.9853515776, 639.5718994407, 282.4215088128], [182.58416747459998, 170.2313842688, 266.35284425969996, 219.3648681472], [392.81115723839997, 137.207214336, 435.5009765694, 182.3134155264], [141.6004028055, 99.8103637504, 291.2185669257, 301.6658324992], [602.9807129154, 133.9853515776, 639.5718994407, 282.4215088128], [620.2283935599, 217.6699829248, 634.9819336218001, 265.9879150592], [608.0566405986, 212.5062255616, 618.753051765, 266.9471435776]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00048492_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a flower, a picture, two cabinets, and two bottles.", "boxes_value": [[114.58416747459998, 37.9853515776, 571, 186.42150881280003], [114.58416747459998, 74.23138426880001, 198.35284425969996, 123.36486814720001], [324.81115723839997, 41.20721433599999, 367.5009765694, 86.31341552640001], [73.60040280550001, 3.8103637504000005, 223.21856692569997, 205.6658324992], [534.9807129154, 37.9853515776, 571, 186.42150881280003], [552.2283935599, 121.6699829248, 566.9819336218001, 169.98791505920002], [540.0566405986, 116.50622556159999, 550.753051765, 170.9471435776]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00048494.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[399.4034423808, 183.9213867008, 751.3668212736, 308.7861938688]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048494_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[88.40344238080002, 31.921386700800014, 440.36682127359995, 156.7861938688]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048494.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, and four street lights.", "boxes_value": [[399.4034423808, 183.9213867008, 751.3668212736, 308.7861938688], [730.7324218368, 257.7631225344, 751.3668212736, 308.7861938688], [629.1358642944, 52.3839721472, 670.031005824, 274.9706420736], [525.4912109567999, 183.9213867008, 553.083984384, 302.018371584], [475.13476561920004, 206.9440307712, 494.055542016, 295.060668928], [399.4034423808, 234.1631469568, 411.9143066112, 275.8660278272]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048494_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, and four street lights.", "boxes_value": [[88.40344238080002, 31.921386700800014, 440.36682127359995, 156.7861938688], [419.7324218368, 105.7631225344, 440.36682127359995, 156.7861938688], [318.1358642944, 0, 359.031005824, 122.9706420736], [214.49121095679993, 31.921386700800014, 242.08398438400002, 150.01837158400002], [164.13476561920004, 54.944030771200005, 183.055542016, 143.06066892799998], [88.40344238080002, 82.1631469568, 100.9143066112, 123.86602782720001]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048495.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Please point out the objects and their coordinates.", "boxes_value": [[326.1015625088, 0, 638.66796878, 166.918334976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048495_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Please point out the objects and their coordinates.", "boxes_value": [[79.10156250879999, 0, 391.66796878, 166.918334976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048495.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, a glasses, a street lights, and a car.", "boxes_value": [[326.1015625088, 0, 638.66796878, 166.918334976], [572.5838622976, 11.8623046656, 638.66796878, 166.918334976], [563.2122802376, 28.4178466816, 595.5405273792, 166.0203247104], [326.1015625088, 33.013427712, 366.9863281608, 76.0276489216], [421.26953124360006, 0, 446.5296630596, 84.808837888], [428.10852052039996, 44.6300048896, 612.760376004, 130.6569213952]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048495_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, a glasses, a street lights, and a car.", "boxes_value": [[79.10156250879999, 0, 391.66796878, 166.918334976], [325.58386229760004, 11.8623046656, 391.66796878, 166.918334976], [316.2122802376, 28.4178466816, 348.54052737919994, 166.0203247104], [79.10156250879999, 33.013427712, 119.98632816079999, 76.0276489216], [174.26953124360006, 0, 199.5296630596, 84.808837888], [181.10852052039996, 44.6300048896, 365.760376004, 130.6569213952]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048496.jpg", "text": "In the provided image , would you mind describing the selected area ? Provide the coordinates for each element you describe.", "boxes_value": [[355.77929687349996, 232.1422729728, 550.9998778933, 355.6182251008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048496_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Provide the coordinates for each element you describe.", "boxes_value": [[49.77929687349996, 31.142272972799987, 244.99987789329998, 154.61822510079998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048496.jpg", "text": "In the provided image , would you mind describing the selected area ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a backpack, a cup, and three chairs.", "boxes_value": [[355.77929687349996, 232.1422729728, 550.9998778933, 355.6182251008], [474.94384762640004, 292.2103881728, 516.8012695546, 355.6182251008], [533.7667236218, 240.912353536, 550.9998778933, 258.3882446336], [339.1223144678, 318.2036132864, 393.95166018950005, 370.2568969728], [475.8488769592, 234.2243652096, 501.52844239309997, 308.4869995008], [355.77929687349996, 232.1422729728, 427.26574704120003, 307.0989380096]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048496_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a backpack, a cup, and three chairs.", "boxes_value": [[49.77929687349996, 31.142272972799987, 244.99987789329998, 154.61822510079998], [168.94384762640004, 91.21038817279998, 210.80126955460003, 154.61822510079998], [227.7667236218, 39.91235353600001, 244.99987789329998, 57.38824463359998], [33.12231446779998, 117.20361328640001, 87.95166018950005, 169.2568969728], [169.84887695920003, 33.224365209599995, 195.52844239309997, 107.48699950079998], [49.77929687349996, 31.142272972799987, 121.26574704120003, 106.09893800959998]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048497.jpg", "text": "Regarding the coordinates in image , can you provide a description? Provide the coordinates for each element you describe.", "boxes_value": [[0.010986319999999999, 340.8437499904, 764.1920165784001, 438.794250496]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048497_crop.jpg", "text": "Regarding the coordinates in image , can you provide a description? Provide the coordinates for each element you describe.", "boxes_value": [[0.010986319999999999, 24.8437499904, 764, 122.79425049600002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048497.jpg", "text": "Regarding the coordinates in image , can you provide a description? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a carpet, a piano, two drums, a bottle, and a laptop.", "boxes_value": [[0.010986319999999999, 340.8437499904, 764.1920165784001, 438.794250496], [465.2292480132, 353.9971313664, 764.1920165784001, 433.5530395648], [181.07037351879998, 375.3574218752, 409.9946288816, 438.794250496], [0.1248779392, 370.8322143744, 60.40954593319999, 507.9020996096], [0.010986319999999999, 340.8437499904, 42.953308129999996, 376.1016845824], [464.07824709839997, 360.5578002944, 478.0280761888, 404.1508789248], [27.6383666664, 294.1223754752, 163.06793209880001, 393.5354003968]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048497_crop.jpg", "text": "Regarding the coordinates in image , can you provide a description? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a carpet, a piano, two drums, a bottle, and a laptop.", "boxes_value": [[0.010986319999999999, 24.8437499904, 764, 122.79425049600002], [465.2292480132, 37.997131366400026, 764, 117.55303956479997], [181.07037351879998, 59.357421875199975, 409.9946288816, 122.79425049600002], [0.1248779392, 54.832214374399996, 60.40954593319999, 147], [0.010986319999999999, 24.8437499904, 42.953308129999996, 60.101684582400026], [464.07824709839997, 44.55780029440001, 478.0280761888, 88.1508789248], [27.6383666664, 0, 163.06793209880001, 77.53540039680001]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048498.jpg", "text": "Kindly share your observations about the rectangular region within . Include the coordinates for each object you identify.", "boxes_value": [[199.211890944, 95.3751220736, 287.269829376, 144.2051372032]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048498_crop.jpg", "text": "Kindly share your observations about the rectangular region within . Include the coordinates for each object you identify.", "boxes_value": [[22.211890944000004, 12.375122073599996, 110.26982937600002, 61.205137203199996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048498.jpg", "text": "Kindly share your observations about the rectangular region within . Include the coordinates for each object you identify. For your reference, objects involved in this region include an american football, a person, two gloves, and a helmet.", "boxes_value": [[199.211890944, 95.3751220736, 287.269829376, 144.2051372032], [217.85552977919997, 95.3751220736, 255.5531616, 132.1303100416], [191.039733888, 77.2490844672, 345.61010741760003, 415.0471191552], [257.6990694144, 107.5514395136, 287.269829376, 144.2051372032], [243.24488271360002, 77.0026700288, 294.1940510976, 127.7212992], [199.211890944, 95.4385637376, 227.79875466240003, 126.1075246592]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4]]}, {"image_path": "objects365_v1_00048498_crop.jpg", "text": "Kindly share your observations about the rectangular region within . Include the coordinates for each object you identify. For your reference, objects involved in this region include an american football, a person, two gloves, and a helmet.", "boxes_value": [[22.211890944000004, 12.375122073599996, 110.26982937600002, 61.205137203199996], [40.85552977919997, 12.375122073599996, 78.55316160000001, 49.13031004160001], [14.039733888, 0, 132, 73], [80.69906941440001, 24.551439513600002, 110.26982937600002, 61.205137203199996], [66.24488271360002, 0, 117.19405109759998, 44.721299200000004], [22.211890944000004, 12.438563737600006, 50.79875466240003, 43.107524659199996]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4]]}, {"image_path": "objects365_v1_00048502.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Provide the coordinates for all objects that you mention.", "boxes_value": [[223.3066849189, 318.7072753664, 530.4880371246, 511.250793472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048502_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Provide the coordinates for all objects that you mention.", "boxes_value": [[77.3066849189, 48.70727536639998, 384.48803712460005, 241.250793472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048502.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a cabinet, two people, a barrel, and a sandals.", "boxes_value": [[223.3066849189, 318.7072753664, 530.4880371246, 511.250793472], [404.7332763754, 318.7072753664, 530.4880371246, 511.250793472], [148.8264159889, 183.699462912, 390.2357177783, 512.2739257856], [284.33154294729997, 219.188110336, 426.8544921737, 512.2739257856], [420.61486815319995, 362.3021850624, 464.44384768049997, 426.9876708864], [223.3066849189, 482.7019254784, 269.87616048809997, 507.736383488]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048502_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a cabinet, two people, a barrel, and a sandals.", "boxes_value": [[77.3066849189, 48.70727536639998, 384.48803712460005, 241.250793472], [258.7332763754, 48.70727536639998, 384.48803712460005, 241.250793472], [2.82641598890001, 0, 244.2357177783, 242], [138.33154294729997, 0, 280.8544921737, 242], [274.61486815319995, 92.30218506239999, 318.44384768049997, 156.9876708864], [77.3066849189, 212.70192547840003, 123.87616048809997, 237.736383488]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048503.jpg", "text": "In the image , could you provide a description for the coordinates ? Provide the coordinates for all objects that you mention.", "boxes_value": [[387.4302978672, 322.3198242304, 470.4455566279, 402.9609985536]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048503_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Provide the coordinates for all objects that you mention.", "boxes_value": [[21.43029786720001, 20.319824230400002, 104.44555662789998, 100.9609985536]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048503.jpg", "text": "In the image , could you provide a description for the coordinates ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a lamp, a bench, and three people.", "boxes_value": [[387.4302978672, 322.3198242304, 470.4455566279, 402.9609985536], [387.4302978672, 322.3198242304, 402.5548095828, 344.1664428544], [412.616821295, 392.1729736192, 452.2767333791, 402.0879516672], [435.7576904492, 359.9480590848, 448.7077636853, 402.9609985536], [447.1661377295, 361.9522094592, 458.4204101995, 401.4193115136], [456.8787841506, 360.5646972416, 470.4455566279, 401.2651367424]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048503_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a lamp, a bench, and three people.", "boxes_value": [[21.43029786720001, 20.319824230400002, 104.44555662789998, 100.9609985536], [21.43029786720001, 20.319824230400002, 36.554809582799976, 42.166442854399975], [46.61682129500002, 90.17297361919998, 86.2767333791, 100.0879516672], [69.75769044920003, 57.94805908479998, 82.7077636853, 100.9609985536], [81.16613772950001, 59.95220945919999, 92.4204101995, 99.41931151360001], [90.87878415059998, 58.564697241600015, 104.44555662789998, 99.26513674239999]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048505.jpg", "text": "Can you discuss the entities within the region of image ? Please mention the objects and their locations.", "boxes_value": [[268.730590848, 279.9684448256, 479.32739258879997, 379.2798461952]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048505_crop.jpg", "text": "Can you discuss the entities within the region of image ? Please mention the objects and their locations.", "boxes_value": [[52.73059084800002, 24.96844482559999, 263.32739258879997, 124.27984619519998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048505.jpg", "text": "Can you discuss the entities within the region of image ? Please mention the objects and their locations. For your reference, objects involved in this region include a soccer, a person, and four sneakers.", "boxes_value": [[268.730590848, 279.9684448256, 479.32739258879997, 379.2798461952], [335.0186767872, 339.834777856, 379.27416990719996, 379.2798461952], [268.90490726400003, 58.7747802624, 496.5379638528, 358.2116088832], [268.730590848, 331.5908813312, 309.43640133120005, 358.1810302976], [342.46740725760003, 279.9684448256, 375.77978519040005, 296.2702636544], [448.86450193919995, 330.9645996032, 479.32739258879997, 352.83538816], [468.28039549439995, 280.9743042048, 495.84204103679997, 334.8701171712]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048505_crop.jpg", "text": "Can you discuss the entities within the region of image ? Please mention the objects and their locations. For your reference, objects involved in this region include a soccer, a person, and four sneakers.", "boxes_value": [[52.73059084800002, 24.96844482559999, 263.32739258879997, 124.27984619519998], [119.01867678719998, 84.83477785600002, 163.27416990719996, 124.27984619519998], [52.90490726400003, 0, 280.5379638528, 103.21160888319997], [52.73059084800002, 76.59088133120002, 93.43640133120005, 103.18103029759999], [126.46740725760003, 24.96844482559999, 159.77978519040005, 41.270263654400026], [232.86450193919995, 75.96459960319999, 263.32739258879997, 97.83538815999998], [252.28039549439995, 25.974304204799978, 279.84204103679997, 79.87011717119998]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048508.jpg", "text": "Within the input image , what can be found in the region defined by ? Please mention the objects and their locations.", "boxes_value": [[28.5916748314, 56.5522461184, 214.1510619961, 350.7387695104]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048508_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Please mention the objects and their locations.", "boxes_value": [[28.5916748314, 56.5522461184, 214.1510619961, 350.7387695104]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048508.jpg", "text": "Within the input image , what can be found in the region defined by ? Please mention the objects and their locations. For your reference, objects involved in this region include a chair, three people, and a sports car.", "boxes_value": [[28.5916748314, 56.5522461184, 214.1510619961, 350.7387695104], [174.0112304728, 164.8527832064, 216.3571777452, 199.0552978432], [160.5292969054, 111.2806396416, 214.1510619961, 185.1917724672], [96.4730224684, 95.62890624, 142.26892089979998, 189.8293456896], [28.5916748314, 56.5522461184, 137.2144164867, 350.7387695104], [77.9299316581, 134.4133300736, 648.7403564391001, 409.3393554432]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048508_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Please mention the objects and their locations. For your reference, objects involved in this region include a chair, three people, and a sports car.", "boxes_value": [[28.5916748314, 56.5522461184, 214.1510619961, 350.7387695104], [174.0112304728, 164.8527832064, 216.3571777452, 199.0552978432], [160.5292969054, 111.2806396416, 214.1510619961, 185.1917724672], [96.4730224684, 95.62890624, 142.26892089979998, 189.8293456896], [28.5916748314, 56.5522461184, 137.2144164867, 350.7387695104], [77.9299316581, 134.4133300736, 260, 409.3393554432]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048509.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Please mention the objects and their locations.", "boxes_value": [[389.53332518919996, 155.557006848, 715.9783935756, 253.4027709952]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048509_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Please mention the objects and their locations.", "boxes_value": [[82.53332518919996, 24.557006847999986, 408.97839357559997, 122.4027709952]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048509.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Please mention the objects and their locations. For your reference, objects involved in this region include a person, and four helmets.", "boxes_value": [[389.53332518919996, 155.557006848, 715.9783935756, 253.4027709952], [523.3032226887999, 199.3270263808, 634.6562500132, 304.6011352576], [389.53332518919996, 196.3626708992, 442.18579098400005, 232.7805786112], [435.6042480356, 208.6481933824, 490.01171872320003, 253.4027709952], [583.0310058744, 199.8728027136, 633.489623994, 236.7294921728], [665.9586181612, 155.557006848, 715.9783935756, 200.750366208]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048509_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Please mention the objects and their locations. For your reference, objects involved in this region include a person, and four helmets.", "boxes_value": [[82.53332518919996, 24.557006847999986, 408.97839357559997, 122.4027709952], [216.3032226887999, 68.32702638079999, 327.65625001319995, 146], [82.53332518919996, 65.3626708992, 135.18579098400005, 101.78057861120001], [128.6042480356, 77.6481933824, 183.01171872320003, 122.4027709952], [276.03100587439997, 68.87280271360001, 326.489623994, 105.72949217280001], [358.9586181612, 24.557006847999986, 408.97839357559997, 69.750366208]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048513.jpg", "text": "What insights can you provide about the area in the selected picture ? Include the coordinates for each mentioned object.", "boxes_value": [[29.7539673233, 311.6326294016, 216.07598878470003, 370.385681152]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048513_crop.jpg", "text": "What insights can you provide about the area in the selected picture ? Include the coordinates for each mentioned object.", "boxes_value": [[29.7539673233, 15.632629401600013, 216.07598878470003, 74.38568115200002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048513.jpg", "text": "What insights can you provide about the area in the selected picture ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, and four sneakers.", "boxes_value": [[29.7539673233, 311.6326294016, 216.07598878470003, 370.385681152], [101.11785888050001, 127.6210937344, 243.36560057920002, 365.8571777536], [31.0718383602, 116.8123168768, 183.6990356168, 371.0350341632], [29.7539673233, 338.5191650304, 74.0796509079, 370.385681152], [64.9749145749, 341.154724096, 103.3106689098, 356.2493896704], [100.4354858254, 329.4144287232, 123.9161376585, 366.0729370112], [166.08544923739998, 311.6326294016, 216.07598878470003, 338.7587280384]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048513_crop.jpg", "text": "What insights can you provide about the area in the selected picture ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, and four sneakers.", "boxes_value": [[29.7539673233, 15.632629401600013, 216.07598878470003, 74.38568115200002], [101.11785888050001, 0, 243.36560057920002, 69.85717775360001], [31.0718383602, 0, 183.6990356168, 75.03503416320001], [29.7539673233, 42.51916503040002, 74.0796509079, 74.38568115200002], [64.9749145749, 45.154724095999995, 103.3106689098, 60.24938967039998], [100.4354858254, 33.41442872319999, 123.9161376585, 70.0729370112], [166.08544923739998, 15.632629401600013, 216.07598878470003, 42.75872803840002]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048517.jpg", "text": "Share some details about the objects or environment within the bounding box in . Remember to mention the objects and their corresponding locations.", "boxes_value": [[220.05999756800003, 260.358703632, 640.045898432, 480.50201414400004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048517_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Remember to mention the objects and their corresponding locations.", "boxes_value": [[105.05999756800003, 55.358703632000015, 525, 275]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048517.jpg", "text": "Share some details about the objects or environment within the bounding box in . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four people, and three chairs.", "boxes_value": [[220.05999756800003, 260.358703632, 640.045898432, 480.50201414400004], [344.928344704, 246.99945067199997, 439.67333984, 366.9215088], [225.0062256, 247.661987328, 460.21252441599995, 461.666564928], [453.30297849600004, 260.358703632, 639.308837888, 433.147094736], [535.446044928, 343.596923808, 640.045898432, 480.50201414400004], [220.05999756800003, 405.29333496, 447.85656736, 476.636230464], [447.230712896, 408.42242433599995, 634.349365248, 476.636230464], [351.481079104, 330.821411136, 431.585449216, 387.144714336]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6, 7]]}, {"image_path": "objects365_v1_00048517_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four people, and three chairs.", "boxes_value": [[105.05999756800003, 55.358703632000015, 525, 275], [229.92834470399998, 41.999450671999966, 324.67333984, 161.92150880000003], [110.0062256, 42.66198732800001, 345.21252441599995, 256.666564928], [338.30297849600004, 55.358703632000015, 524.308837888, 228.14709473599999], [420.446044928, 138.59692380799999, 525, 275], [105.05999756800003, 200.29333495999998, 332.85656736, 271.636230464], [332.230712896, 203.42242433599995, 519.349365248, 271.636230464], [236.481079104, 125.821411136, 316.585449216, 182.144714336]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6, 7]]}, {"image_path": "objects365_v1_00048518.jpg", "text": "Regarding the image , what's going on in the section ? Please mention the objects and their locations.", "boxes_value": [[272.4882812628, 112.601068544, 715.9721484044001, 327.4466552832]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048518_crop.jpg", "text": "Regarding the image , what's going on in the section ? Please mention the objects and their locations.", "boxes_value": [[111.4882812628, 54.601068544, 554.9721484044001, 269.4466552832]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048518.jpg", "text": "Regarding the image , what's going on in the section ? Please mention the objects and their locations. For your reference, objects involved in this region include a person, two hats, a handbag, and a pen.", "boxes_value": [[272.4882812628, 112.601068544, 715.9721484044001, 327.4466552832], [290.2371826456, 129.2673950208, 602.751342736, 510.1199341056], [437.4318561316, 127.9896332288, 513.092299146, 195.528333824], [290.813031496, 112.601068544, 359.2066523292, 198.5205547008], [666.1975338648, 172.497188352, 715.9721484044001, 223.9881689088], [272.4882812628, 283.9232177664, 294.13720706280003, 327.4466552832]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048518_crop.jpg", "text": "Regarding the image , what's going on in the section ? Please mention the objects and their locations. For your reference, objects involved in this region include a person, two hats, a handbag, and a pen.", "boxes_value": [[111.4882812628, 54.601068544, 554.9721484044001, 269.4466552832], [129.2371826456, 71.2673950208, 441.75134273599997, 323], [276.4318561316, 69.9896332288, 352.09229914599996, 137.528333824], [129.813031496, 54.601068544, 198.20665232919998, 140.5205547008], [505.1975338648, 114.497188352, 554.9721484044001, 165.9881689088], [111.4882812628, 225.9232177664, 133.13720706280003, 269.4466552832]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048519.jpg", "text": "What's going on in the section of contained within the bounding box ? Provide the coordinates for all objects that you mention.", "boxes_value": [[65.8170166092, 246.5731811328, 558.8353271796001, 476.8557128704]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048519_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Provide the coordinates for all objects that you mention.", "boxes_value": [[65.8170166092, 57.57318113279999, 558.8353271796001, 287.8557128704]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048519.jpg", "text": "What's going on in the section of contained within the bounding box ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a stool, a desk, a person, a car, and a motorcycle.", "boxes_value": [[65.8170166092, 246.5731811328, 558.8353271796001, 476.8557128704], [386.36206053440003, 357.502624512, 471.5184326442, 476.8557128704], [403.1789551076, 320.1234741248, 558.8353271796001, 473.6858520576], [65.8170166092, 246.5731811328, 75.0595092775, 281.276184064], [89.9392700302, 250.8555908096, 152.10437012650002, 302.5508422656], [220.9807129148, 294.1770019328, 471.3094482359, 435.2077026304]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048519_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a stool, a desk, a person, a car, and a motorcycle.", "boxes_value": [[65.8170166092, 57.57318113279999, 558.8353271796001, 287.8557128704], [386.36206053440003, 168.502624512, 471.5184326442, 287.8557128704], [403.1789551076, 131.1234741248, 558.8353271796001, 284.6858520576], [65.8170166092, 57.57318113279999, 75.0595092775, 92.276184064], [89.9392700302, 61.85559080959999, 152.10437012650002, 113.55084226560001], [220.9807129148, 105.17700193280001, 471.3094482359, 246.20770263039998]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048520.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Include the coordinates for each object you identify.", "boxes_value": [[12.673400896, 102.421874976, 172.36676025600002, 236.760131856]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048520_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Include the coordinates for each object you identify.", "boxes_value": [[12.673400896, 34.421874976, 172.36676025600002, 168.760131856]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048520.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Include the coordinates for each object you identify. For your reference, objects involved in this region include a chair, a picture, a person, a glasses, and a wine glass.", "boxes_value": [[12.673400896, 102.421874976, 172.36676025600002, 236.760131856], [12.673400896, 180.296142576, 49.70166016, 223.697326656], [43.46960448, 136.335571296, 70.08355712, 185.761535664], [44.574523904, 57.163940448, 247.45166015999996, 236.883911136], [106.264465344, 102.421874976, 162.942016576, 123.703308096], [141.770751936, 188.287780752, 172.36676025600002, 236.760131856]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048520_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Include the coordinates for each object you identify. For your reference, objects involved in this region include a chair, a picture, a person, a glasses, and a wine glass.", "boxes_value": [[12.673400896, 34.421874976, 172.36676025600002, 168.760131856], [12.673400896, 112.296142576, 49.70166016, 155.697326656], [43.46960448, 68.33557129600001, 70.08355712, 117.76153566400001], [44.574523904, 0, 212, 168.883911136], [106.264465344, 34.421874976, 162.942016576, 55.703308096], [141.770751936, 120.287780752, 172.36676025600002, 168.760131856]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048521.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[210.2045287936, 383.2955322569, 372.9667968512, 504.8769531202]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048521_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[41.20452879359999, 31.295532256900003, 203.9667968512, 152.87695312020003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048521.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a desk, and four baksets.", "boxes_value": [[210.2045287936, 383.2955322569, 372.9667968512, 504.8769531202], [164.130371072, 450.02844237240004, 385.2247314432, 637.7969970668], [276.87823488, 383.2955322569, 343.5519409152, 423.82263181509995], [176.8676758016, 391.7930908267, 238.9657592832, 440.8178710751], [303.024780288, 408.1346435588, 372.9667968512, 504.2232665923], [210.2045287936, 425.13000487090005, 320.0200195072, 504.8769531202]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048521_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a desk, and four baksets.", "boxes_value": [[41.20452879359999, 31.295532256900003, 203.9667968512, 152.87695312020003], [0, 98.02844237240004, 216.22473144320003, 183], [107.87823487999998, 31.295532256900003, 174.55194091520002, 71.82263181509995], [7.867675801600001, 39.793090826699995, 69.96575928319999, 88.81787107510002], [134.024780288, 56.13464355880001, 203.9667968512, 152.22326659229998], [41.20452879359999, 73.13000487090005, 151.0200195072, 152.87695312020003]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048523.jpg", "text": "In the image , elaborate on the details found within the section . Include the coordinates for each object you identify.", "boxes_value": [[138.2221069056, 295.6915283456, 364.3194580224, 412.5190429696]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048523_crop.jpg", "text": "In the image , elaborate on the details found within the section . Include the coordinates for each object you identify.", "boxes_value": [[57.222106905599986, 29.691528345599977, 283.3194580224, 146.51904296959998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048523.jpg", "text": "In the image , elaborate on the details found within the section . Include the coordinates for each object you identify. For your reference, objects involved in this region include two helmets, and three hats.", "boxes_value": [[138.2221069056, 295.6915283456, 364.3194580224, 412.5190429696], [138.2221069056, 307.0732421632, 163.2360840192, 327.8195190272], [158.823547392, 360.7184448, 211.07849118719997, 412.5190429696], [241.518066432, 366.5606078976, 271.56347658239997, 392.600036608], [173.0573730816, 295.6915283456, 197.09368896, 312.574218752], [337.3072509696, 354.6307373056, 364.3194580224, 375.9028320256]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048523_crop.jpg", "text": "In the image , elaborate on the details found within the section . Include the coordinates for each object you identify. For your reference, objects involved in this region include two helmets, and three hats.", "boxes_value": [[57.222106905599986, 29.691528345599977, 283.3194580224, 146.51904296959998], [57.222106905599986, 41.07324216320001, 82.2360840192, 61.81951902719999], [77.823547392, 94.71844479999999, 130.07849118719997, 146.51904296959998], [160.518066432, 100.5606078976, 190.56347658239997, 126.60003660799998], [92.05737308159999, 29.691528345599977, 116.09368896000001, 46.57421875199998], [256.3072509696, 88.63073730560001, 283.3194580224, 109.90283202559999]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048526.jpg", "text": "In the image , elaborate on the details found within the section . Include the coordinates for each object you identify.", "boxes_value": [[42.4621582198, 255.6820678656, 180.99304197560002, 511.9410400256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048526_crop.jpg", "text": "In the image , elaborate on the details found within the section . Include the coordinates for each object you identify.", "boxes_value": [[35.4621582198, 64.6820678656, 173.99304197560002, 320.9410400256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048526.jpg", "text": "In the image , elaborate on the details found within the section . Include the coordinates for each object you identify. For your reference, objects involved in this region include a carpet, a person, a bracelet, and two boots.", "boxes_value": [[42.4621582198, 255.6820678656, 180.99304197560002, 511.9410400256], [0, 183.1159057408, 682.192993154, 510.158203136], [30.7745971982, 81.5647583232, 287.4378051596, 511.9896240128], [159.1632079788, 255.6820678656, 180.99304197560002, 272.5147094528], [103.1127929669, 388.6833496064, 177.9477539053, 441.9971923968], [42.4621582198, 436.1278076416, 135.3945312272, 511.9410400256]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048526_crop.jpg", "text": "In the image , elaborate on the details found within the section . Include the coordinates for each object you identify. For your reference, objects involved in this region include a carpet, a person, a bracelet, and two boots.", "boxes_value": [[35.4621582198, 64.6820678656, 173.99304197560002, 320.9410400256], [0, 0, 208, 319.158203136], [23.7745971982, 0, 208, 320.9896240128], [152.1632079788, 64.6820678656, 173.99304197560002, 81.51470945279999], [96.1127929669, 197.6833496064, 170.9477539053, 250.99719239680002], [35.4621582198, 245.1278076416, 128.3945312272, 320.9410400256]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048527.jpg", "text": "Tell me about the region of the image . Include the coordinates for each object you identify.", "boxes_value": [[113.4539184384, 413.277343744, 668.5911865344, 487.5924682752]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048527_crop.jpg", "text": "Tell me about the region of the image . Include the coordinates for each object you identify.", "boxes_value": [[113.4539184384, 19.277343744000007, 668.5911865344, 93.59246827520002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048527.jpg", "text": "Tell me about the region of the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include six leather shoes.", "boxes_value": [[113.4539184384, 413.277343744, 668.5911865344, 487.5924682752], [113.4539184384, 425.6827392512, 148.30377200639998, 447.412658688], [222.92340088319997, 446.5926513664, 265.1531982336, 461.7625732608], [199.96350097919998, 468.7325439488, 238.50329587199997, 487.5924682752], [641.1213378816, 424.8627319296, 668.5911865344, 449.4626464768], [567.3795165696, 413.277343744, 608.577148416, 432.4143066624], [539.471557632, 409.556274432, 574.0242919679999, 433.743225088]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048527_crop.jpg", "text": "Tell me about the region of the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include six leather shoes.", "boxes_value": [[113.4539184384, 19.277343744000007, 668.5911865344, 93.59246827520002], [113.4539184384, 31.68273925120002, 148.30377200639998, 53.41265868800002], [222.92340088319997, 52.59265136639999, 265.1531982336, 67.76257326080002], [199.96350097919998, 74.73254394880001, 238.50329587199997, 93.59246827520002], [641.1213378816, 30.86273192959999, 668.5911865344, 55.46264647679999], [567.3795165696, 19.277343744000007, 608.577148416, 38.41430666240001], [539.471557632, 15.55627443200001, 574.0242919679999, 39.743225087999974]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048528.jpg", "text": "Could you describe the content of the bbox in the image ? Please point out the objects and their coordinates.", "boxes_value": [[0, 303.651855504, 242.1376342528, 531.533935512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048528_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Please point out the objects and their coordinates.", "boxes_value": [[0, 57.651855504000025, 242.1376342528, 285.53393551199997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048528.jpg", "text": "Could you describe the content of the bbox in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a desk, a stool, three leather shoes, and a trash bin can.", "boxes_value": [[0, 303.651855504, 242.1376342528, 531.533935512], [0, 303.651855504, 85.3115234304, 322.8051758076], [2.3139648512, 345.150634736, 52.325317376, 408.99487304959996], [82.8911132672, 513.0560302698001, 152.4723510784, 531.533935512], [86.6444702208, 523.4498291178, 158.5354003968, 541.0617676066], [48.6789550592, 395.53491209000003, 80.2124023296, 408.5042724934], [173.2774047744, 321.9982910172, 242.1376342528, 409.044067408]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00048528_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a desk, a stool, three leather shoes, and a trash bin can.", "boxes_value": [[0, 57.651855504000025, 242.1376342528, 285.53393551199997], [0, 57.651855504000025, 85.3115234304, 76.8051758076], [2.3139648512, 99.15063473599997, 52.325317376, 162.99487304959996], [82.8911132672, 267.0560302698001, 152.4723510784, 285.53393551199997], [86.6444702208, 277.44982911780005, 158.5354003968, 295.06176760660003], [48.6789550592, 149.53491209000003, 80.2124023296, 162.50427249339998], [173.2774047744, 75.99829101720002, 242.1376342528, 163.044067408]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00048530.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for each element you describe.", "boxes_value": [[541.4533691184, 173.1821289085, 664.6425781404, 342.7308960168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048530_crop.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for each element you describe.", "boxes_value": [[31.45336911840002, 43.182128908500005, 154.64257814040002, 212.7308960168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048530.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a potted plant, a car, two vans, a suv, and a street lights.", "boxes_value": [[541.4533691184, 173.1821289085, 664.6425781404, 342.7308960168], [611.2156982544, 276.03283691490003, 664.6425781404, 342.7308960168], [540.9003906168, 290.9097289921, 663.9542236548, 364.2393798719], [541.9971923688, 249.2451782122, 572.882690448, 291.5254516828], [591.7991943768, 260.8659057858, 633.9089355144, 287.1137085155], [563.474243166, 252.9348754866, 588.5311279644001, 291.23016359540003], [541.4533691184, 173.1821289085, 565.6853027016, 199.0852661358]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4], [6]]}, {"image_path": "objects365_v1_00048530_crop.jpg", "text": "Can you provide some context for the area within the picture ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a potted plant, a car, two vans, a suv, and a street lights.", "boxes_value": [[31.45336911840002, 43.182128908500005, 154.64257814040002, 212.7308960168], [101.21569825439997, 146.03283691490003, 154.64257814040002, 212.7308960168], [30.900390616799996, 160.90972899209999, 153.9542236548, 234.23937987189998], [31.99719236880003, 119.24517821219999, 62.88269044799995, 161.5254516828], [81.79919437679996, 130.8659057858, 123.90893551440001, 157.11370851549998], [53.47424316599995, 122.93487548659999, 78.53112796440007, 161.23016359540003], [31.45336911840002, 43.182128908500005, 55.685302701599994, 69.0852661358]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4], [6]]}, {"image_path": "objects365_v1_00048531.jpg", "text": "Can you divulge the contents of the area within the given image ? Specify the location of each mentioned object.", "boxes_value": [[186.7603149312, 404.0059204096, 272.8126220544, 486.3167724544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048531_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Specify the location of each mentioned object.", "boxes_value": [[21.760314931200014, 21.00592040959998, 107.81262205439998, 103.3167724544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048531.jpg", "text": "Can you divulge the contents of the area within the given image ? Specify the location of each mentioned object. For your reference, objects involved in this region include three people, and two sneakers.", "boxes_value": [[186.7603149312, 404.0059204096, 272.8126220544, 486.3167724544], [236.9574584832, 64.26013184, 556.8472900608, 488.5350341632], [179.7140503296, 52.4747314688, 418.78955074559997, 491.90234373119995], [0.40734865919999996, 124.8708495872, 267.26281735680004, 512.1058349568], [186.7603149312, 404.0059204096, 230.90887449599998, 486.3167724544], [239.1399535872, 417.1008300544, 272.8126220544, 486.3167724544]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048531_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Specify the location of each mentioned object. For your reference, objects involved in this region include three people, and two sneakers.", "boxes_value": [[21.760314931200014, 21.00592040959998, 107.81262205439998, 103.3167724544], [71.95745848320001, 0, 129, 105.53503416320001], [14.714050329600013, 0, 129, 108.90234373119995], [0, 0, 102.26281735680004, 123], [21.760314931200014, 21.00592040959998, 65.90887449599998, 103.3167724544], [74.13995358720001, 34.10083005439998, 107.81262205439998, 103.3167724544]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048533.jpg", "text": "What does the selected region in the image encompass? Include the coordinates for each object you identify.", "boxes_value": [[208.3128051456, 142.1685791232, 550.3569335807999, 489.6692505088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048533_crop.jpg", "text": "What does the selected region in the image encompass? Include the coordinates for each object you identify.", "boxes_value": [[86.31280514560001, 87.1685791232, 428.35693358079993, 434.6692505088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048533.jpg", "text": "What does the selected region in the image encompass? Include the coordinates for each object you identify. For your reference, objects involved in this region include three people, two sneakers, and a speaker.", "boxes_value": [[208.3128051456, 142.1685791232, 550.3569335807999, 489.6692505088], [414.07592770560007, 177.4769897472, 525.4718017536001, 485.4121093632], [323.25634767360003, 146.9672851456, 438.11608888320006, 488.9597778432], [208.3128051456, 157.610229504, 343.1230468608, 489.6692505088], [245.0868336384, 460.6655246848, 278.8660739328, 489.6191591424], [310.415011968, 460.9522591232, 340.0290534912, 488.0637056], [519.8912353536, 142.1685791232, 550.3569335807999, 166.8618164224]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048533_crop.jpg", "text": "What does the selected region in the image encompass? Include the coordinates for each object you identify. For your reference, objects involved in this region include three people, two sneakers, and a speaker.", "boxes_value": [[86.31280514560001, 87.1685791232, 428.35693358079993, 434.6692505088], [292.07592770560007, 122.47698974720001, 403.47180175360006, 430.4121093632], [201.25634767360003, 91.96728514559999, 316.11608888320006, 433.9597778432], [86.31280514560001, 102.61022950399999, 221.1230468608, 434.6692505088], [123.08683363840001, 405.6655246848, 156.86607393280002, 434.6191591424], [188.415011968, 405.9522591232, 218.0290534912, 433.0637056], [397.8912353536, 87.1685791232, 428.35693358079993, 111.86181642240001]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048534.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Specify the location of each mentioned object.", "boxes_value": [[319.6909179984, 235.9946289152, 665.2946777028, 379.7797241344]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048534_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Specify the location of each mentioned object.", "boxes_value": [[86.69091799839998, 35.994628915199996, 432.29467770279996, 179.7797241344]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048534.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Specify the location of each mentioned object. For your reference, objects involved in this region include a bed, two pillows, a nightstand, a lamp, a flower, a person, and a bottle.", "boxes_value": [[319.6909179984, 235.9946289152, 665.2946777028, 379.7797241344], [90.03417968429999, 269.3157348864, 507.2438964897, 490.0136718848], [319.6909179984, 287.7598266368, 378.07592771310004, 313.144653312], [352.691162091, 290.932922368, 487.2305908206, 323.9331664896], [498.6538085607, 317.5869750784, 564.0196532907, 379.7797241344], [533.5578613512, 271.259704576, 555.1348877037, 320.7600708096], [637.6270751778, 257.4146118144, 659.3078613387, 282.592163072], [610.5825195479999, 235.9946289152, 665.2946777028, 309.5010376192], [619.3510742073, 335.2832031232, 648.8034668022, 378.4578247168]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6], [7], [8]]}, {"image_path": "objects365_v1_00048534_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Specify the location of each mentioned object. For your reference, objects involved in this region include a bed, two pillows, a nightstand, a lamp, a flower, a person, and a bottle.", "boxes_value": [[86.69091799839998, 35.994628915199996, 432.29467770279996, 179.7797241344], [0, 69.31573488639998, 274.2438964897, 215], [86.69091799839998, 87.75982663680003, 145.07592771310004, 113.144653312], [119.69116209100002, 90.93292236799999, 254.2305908206, 123.93316648960001], [265.6538085607, 117.58697507839997, 331.01965329070003, 179.7797241344], [300.5578613512, 71.25970457599999, 322.1348877037, 120.7600708096], [404.62707517779995, 57.41461181440002, 426.30786133870004, 82.592163072], [377.58251954799994, 35.994628915199996, 432.29467770279996, 109.50103761920002], [386.35107420730003, 135.2832031232, 415.8034668022, 178.45782471680002]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6], [7], [8]]}, {"image_path": "objects365_v1_00048535.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Provide the coordinates for all objects that you mention.", "boxes_value": [[178.0757517222, 212.9846065664, 608.4138401847999, 408.5432980992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048535_crop.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Provide the coordinates for all objects that you mention.", "boxes_value": [[108.07575172220001, 48.984606566400004, 538.4138401847999, 244.5432980992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048535.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a bench, two handbags, a tie, and a leather shoes.", "boxes_value": [[178.0757517222, 212.9846065664, 608.4138401847999, 408.5432980992], [452.1225585586, 346.1578979328, 623.1889648518, 436.6027221504], [585.211023194, 279.26941696, 608.4138401847999, 313.1812263936], [451.7554944992, 212.9846065664, 466.5387232164, 269.1146780672], [178.0757517222, 242.5220261888, 198.6015621774, 277.315778048], [311.87341332119996, 382.996132352, 331.9291322574, 408.5432980992]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5]]}, {"image_path": "objects365_v1_00048535_crop.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a bench, two handbags, a tie, and a leather shoes.", "boxes_value": [[108.07575172220001, 48.984606566400004, 538.4138401847999, 244.5432980992], [382.1225585586, 182.15789793279998, 553.1889648518, 272.6027221504], [515.211023194, 115.26941696, 538.4138401847999, 149.1812263936], [381.7554944992, 48.984606566400004, 396.5387232164, 105.11467806719997], [108.07575172220001, 78.5220261888, 128.6015621774, 113.31577804800003], [241.87341332119996, 218.99613235200002, 261.9291322574, 244.5432980992]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5]]}, {"image_path": "objects365_v1_00048539.jpg", "text": "Detail the chosen region in the depicted scene . Give coordinates for the items you reference.", "boxes_value": [[171.8245849779, 58.5735473664, 458.8127441547, 160.3119506944]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048539_crop.jpg", "text": "Detail the chosen region in the depicted scene . Give coordinates for the items you reference.", "boxes_value": [[71.8245849779, 25.5735473664, 358.8127441547, 127.3119506944]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048539.jpg", "text": "Detail the chosen region in the depicted scene . Give coordinates for the items you reference. For your reference, objects involved in this region include two cabinets, two bottles, a bowl, a plate, and a cup.", "boxes_value": [[171.8245849779, 58.5735473664, 458.8127441547, 160.3119506944], [213.23596193970002, 100.06927488, 285.5570068275, 217.5909423616], [275.69500733160004, 99.247436544, 340.6195678689, 208.5507812352], [299.1151123038, 65.5495605248, 315.367309554, 102.0504150528], [240.48236082000003, 110.8550414848, 268.08459473069996, 123.4416503808], [445.2175292859, 78.3143921152, 458.8127441547, 106.0958862336], [171.8245849779, 137.7565918208, 186.66363524640002, 160.3119506944], [214.3766479719, 58.5735473664, 229.8240356766, 86.0698852352]], "boxes_seq": [[0], [0], [1, 2], [3, 7], [4], [5], [6]]}, {"image_path": "objects365_v1_00048539_crop.jpg", "text": "Detail the chosen region in the depicted scene . Give coordinates for the items you reference. For your reference, objects involved in this region include two cabinets, two bottles, a bowl, a plate, and a cup.", "boxes_value": [[71.8245849779, 25.5735473664, 358.8127441547, 127.3119506944], [113.23596193970002, 67.06927488, 185.5570068275, 152], [175.69500733160004, 66.247436544, 240.6195678689, 152], [199.1151123038, 32.5495605248, 215.36730955399997, 69.0504150528], [140.48236082000003, 77.8550414848, 168.08459473069996, 90.4416503808], [345.2175292859, 45.31439211519999, 358.8127441547, 73.0958862336], [71.8245849779, 104.7565918208, 86.66363524640002, 127.3119506944], [114.37664797190001, 25.5735473664, 129.8240356766, 53.069885235200005]], "boxes_seq": [[0], [0], [1, 2], [3, 7], [4], [5], [6]]}, {"image_path": "objects365_v1_00048544.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Please mention the objects and their locations.", "boxes_value": [[316.9826349406, 151.7485961728, 415.2272214956, 510.2987596288]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048544_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Please mention the objects and their locations.", "boxes_value": [[24.982634940600008, 89.7485961728, 123.22722149560002, 448.2987596288]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048544.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include two pictures, a luggage, a backpack, and a bottle.", "boxes_value": [[316.9826349406, 151.7485961728, 415.2272214956, 510.2987596288], [387.89416506599997, 151.7485961728, 406.7176513792, 185.6309203968], [389.89025876799997, 192.0755615232, 404.2729492486, 215.3092041216], [316.9826349406, 251.5646078976, 415.2272214956, 432.5187106816], [317.2650108614, 434.53286016, 403.2371768892, 510.2987596288], [355.1466064698, 495.5528564224, 415.5043945044, 511.7876587008]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048544_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include two pictures, a luggage, a backpack, and a bottle.", "boxes_value": [[24.982634940600008, 89.7485961728, 123.22722149560002, 448.2987596288], [95.89416506599997, 89.7485961728, 114.71765137919999, 123.63092039680001], [97.89025876799997, 130.0755615232, 112.27294924860001, 153.3092041216], [24.982634940600008, 189.5646078976, 123.22722149560002, 370.5187106816], [25.265010861400015, 372.53286016, 111.23717688919999, 448.2987596288], [63.14660646980002, 433.5528564224, 123.50439450440001, 449.7876587008]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048545.jpg", "text": "What can be observed in the rectangular region in the photograph ? Provide the coordinates for all objects that you mention.", "boxes_value": [[271.3679198976, 324.282592768, 493.285156224, 430.411193856]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048545_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Provide the coordinates for all objects that you mention.", "boxes_value": [[56.36791989760002, 27.282592767999972, 278.285156224, 133.411193856]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048545.jpg", "text": "What can be observed in the rectangular region in the photograph ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include six sneakers.", "boxes_value": [[271.3679198976, 324.282592768, 493.285156224, 430.411193856], [271.3679198976, 396.5794067456, 296.0185546752, 422.0253296128], [297.138305664, 393.9504394752, 325.1807860992, 409.7243041792], [329.5623779328, 352.763183616, 351.4704589824, 371.604125952], [424.64367674880003, 324.282592768, 456.1914062592, 351.010498048], [430.37365724160003, 384.0553588736, 466.3822021632, 421.3056030208], [468.0377197056, 411.3721923584, 493.285156224, 430.411193856]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048545_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include six sneakers.", "boxes_value": [[56.36791989760002, 27.282592767999972, 278.285156224, 133.411193856], [56.36791989760002, 99.57940674560001, 81.0185546752, 125.02532961280002], [82.13830566399997, 96.95043947520003, 110.18078609920002, 112.72430417919998], [114.56237793280002, 55.76318361599999, 136.47045898239998, 74.604125952], [209.64367674880003, 27.282592767999972, 241.1914062592, 54.01049804799999], [215.37365724160003, 87.05535887360003, 251.38220216320002, 124.30560302079999], [253.0377197056, 114.37219235840001, 278.285156224, 133.411193856]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048547.jpg", "text": "Can you break down the region in the image for me? Include the coordinates for each mentioned object.", "boxes_value": [[255.8066406558, 374.5280151552, 428.77014160709996, 510.5440673792]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048547_crop.jpg", "text": "Can you break down the region in the image for me? Include the coordinates for each mentioned object.", "boxes_value": [[43.80664065580001, 34.52801515520002, 216.77014160709996, 170.5440673792]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048547.jpg", "text": "Can you break down the region in the image for me? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a plate, a knife, a spoon, a chair, a stool, and a napkin.", "boxes_value": [[255.8066406558, 374.5280151552, 428.77014160709996, 510.5440673792], [375.2945556447, 451.9523926016, 407.2050781581, 466.0863036928], [388.84558103369994, 498.0130004992, 446.9838866847, 505.1528320512], [391.75976565659994, 499.7615356416, 428.77014160709996, 510.5440673792], [255.8066406558, 374.5280151552, 375.9560546823, 508.3306884608], [298.1320190145, 318.5493164032, 349.3320312459, 416.1706543104], [383.4653320557, 472.1493530112, 440.12670898079995, 491.9466552832]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048547_crop.jpg", "text": "Can you break down the region in the image for me? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a plate, a knife, a spoon, a chair, a stool, and a napkin.", "boxes_value": [[43.80664065580001, 34.52801515520002, 216.77014160709996, 170.5440673792], [163.29455564469998, 111.95239260160002, 195.20507815809998, 126.08630369280002], [176.84558103369994, 158.01300049920002, 234.9838866847, 165.1528320512], [179.75976565659994, 159.7615356416, 216.77014160709996, 170.5440673792], [43.80664065580001, 34.52801515520002, 163.95605468230002, 168.3306884608], [86.13201901449997, 0, 137.3320312459, 76.17065431039998], [171.4653320557, 132.1493530112, 228.12670898079995, 151.94665528320002]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048549.jpg", "text": "Please enlighten me about the area in the photograph . Please mention the objects and their locations.", "boxes_value": [[22.1107788288, 24.2506103623, 366.51794432, 281.6400146587]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048549_crop.jpg", "text": "Please enlighten me about the area in the photograph . Please mention the objects and their locations.", "boxes_value": [[22.1107788288, 24.2506103623, 366.51794432, 281.6400146587]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048549.jpg", "text": "Please enlighten me about the area in the photograph . Please mention the objects and their locations. For your reference, objects involved in this region include five lamps.", "boxes_value": [[22.1107788288, 24.2506103623, 366.51794432, 281.6400146587], [191.5664672768, 24.2506103623, 210.8019409408, 84.70501709210001], [102.7167358464, 90.20092774, 119.204284672, 151.5713500965], [22.1107788288, 134.1677856384, 37.6823730688, 186.3784179525], [352.7783203328, 227.59741208309998, 366.51794432, 258.74060061290004], [209.885986304, 253.2447509569, 220.8776855552, 281.6400146587]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048549_crop.jpg", "text": "Please enlighten me about the area in the photograph . Please mention the objects and their locations. For your reference, objects involved in this region include five lamps.", "boxes_value": [[22.1107788288, 24.2506103623, 366.51794432, 281.6400146587], [191.5664672768, 24.2506103623, 210.8019409408, 84.70501709210001], [102.7167358464, 90.20092774, 119.204284672, 151.5713500965], [22.1107788288, 134.1677856384, 37.6823730688, 186.3784179525], [352.7783203328, 227.59741208309998, 366.51794432, 258.74060061290004], [209.885986304, 253.2447509569, 220.8776855552, 281.6400146587]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048551.jpg", "text": "Please interpret and describe the area inside the given picture . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0.014038095, 226.4652099584, 330.30334472190003, 423.9683227648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048551_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0.014038095, 49.46520995840001, 330.30334472190003, 246.9683227648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048551.jpg", "text": "Please interpret and describe the area inside the given picture . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include five people.", "boxes_value": [[0.014038095, 226.4652099584, 330.30334472190003, 423.9683227648], [0.014038095, 371.7890625024, 8.863891573, 423.9683227648], [12.635620099499999, 247.017211904, 57.3941039864, 313.089233408], [243.5324707189, 249.8590698496, 284.7386474639, 323.7460326912], [299.2410888874, 226.4652099584, 330.30334472190003, 275.2772827136], [30.7176514007, 228.4984741376, 56.7990112601, 275.0723876864]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048551_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include five people.", "boxes_value": [[0.014038095, 49.46520995840001, 330.30334472190003, 246.9683227648], [0.014038095, 194.7890625024, 8.863891573, 246.9683227648], [12.635620099499999, 70.01721190399999, 57.3941039864, 136.08923340799998], [243.5324707189, 72.85906984959999, 284.7386474639, 146.7460326912], [299.2410888874, 49.46520995840001, 330.30334472190003, 98.27728271360002], [30.7176514007, 51.4984741376, 56.7990112601, 98.07238768640002]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048553.jpg", "text": "I'd like some information about the bounding box in the photo . Provide the coordinates for all objects that you mention.", "boxes_value": [[150.9604492157, 316.401855488, 349.6331786845, 510.7959594496]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048553_crop.jpg", "text": "I'd like some information about the bounding box in the photo . Provide the coordinates for all objects that you mention.", "boxes_value": [[49.96044921570001, 49.401855488000024, 248.63317868450002, 243.7959594496]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048553.jpg", "text": "I'd like some information about the bounding box in the photo . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two vases, three chairs, and a desk.", "boxes_value": [[150.9604492157, 316.401855488, 349.6331786845, 510.7959594496], [227.32659909979998, 329.5386962944, 266.1625366327, 385.811157248], [150.9604492157, 330.9754028544, 196.47656250260002, 364.2633667072], [213.3569335932, 316.401855488, 349.6331786845, 510.7959594496], [274.4808960144, 335.7583618048, 433.687377917, 511.798034688], [91.6919555344, 347.6331787264, 269.4707031387, 510.7959594496], [83.3795776618, 357.2530517504, 403.2768554397, 511.2211303936]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00048553_crop.jpg", "text": "I'd like some information about the bounding box in the photo . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two vases, three chairs, and a desk.", "boxes_value": [[49.96044921570001, 49.401855488000024, 248.63317868450002, 243.7959594496], [126.32659909979998, 62.53869629439998, 165.16253663269998, 118.81115724799997], [49.96044921570001, 63.97540285439999, 95.47656250260002, 97.26336670720002], [112.35693359320001, 49.401855488000024, 248.63317868450002, 243.7959594496], [173.4808960144, 68.75836180480002, 298, 244.79803468799997], [0, 80.63317872639999, 168.4707031387, 243.7959594496], [0, 90.2530517504, 298, 244.2211303936]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00048558.jpg", "text": "In the submitted image , please give a synopsis of the area . Include the coordinates for each mentioned object.", "boxes_value": [[0, 197.109497088, 241.2539062353, 498.1762084864]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048558_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Include the coordinates for each mentioned object.", "boxes_value": [[0, 76.10949708800001, 241.2539062353, 377.1762084864]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048558.jpg", "text": "In the submitted image , please give a synopsis of the area . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two pictures, a lamp, a nightstand, a bed, and two pillows.", "boxes_value": [[0, 197.109497088, 241.2539062353, 498.1762084864], [11.148498563399999, 197.109497088, 71.9487305001, 234.5250244096], [14.960510229599999, 244.4152221696, 67.4394531228, 286.9874267648], [0.20953372409999999, 270.9293212672, 55.5985717632, 379.0518188544], [0, 367.670471168, 71.1530761491, 498.1762084864], [78.0554809728, 248.9353027584, 608.0017089933, 511.6470337024], [129.9776001309, 298.549865728, 266.566833468, 358.9741211136], [134.8666381899, 301.188110336, 241.2539062353, 356.7089233408]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00048558_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two pictures, a lamp, a nightstand, a bed, and two pillows.", "boxes_value": [[0, 76.10949708800001, 241.2539062353, 377.1762084864], [11.148498563399999, 76.10949708800001, 71.9487305001, 113.52502440960001], [14.960510229599999, 123.41522216960001, 67.4394531228, 165.98742676479998], [0.20953372409999999, 149.9293212672, 55.5985717632, 258.0518188544], [0, 246.670471168, 71.1530761491, 377.1762084864], [78.0554809728, 127.93530275840001, 301, 390.6470337024], [129.9776001309, 177.549865728, 266.566833468, 237.9741211136], [134.8666381899, 180.18811033600002, 241.2539062353, 235.70892334080003]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00048559.jpg", "text": "What insights can you provide about the area in the selected picture ? Give coordinates for the items you reference.", "boxes_value": [[605.5605468576, 4.332031232, 770.1199951314, 511.8564453376]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048559_crop.jpg", "text": "What insights can you provide about the area in the selected picture ? Give coordinates for the items you reference.", "boxes_value": [[41.5605468576, 4.332031232, 205, 511.8564453376]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048559.jpg", "text": "What insights can you provide about the area in the selected picture ? Give coordinates for the items you reference. For your reference, objects involved in this region include a chair, a lamp, a person, a pen, and an extractor.", "boxes_value": [[605.5605468576, 4.332031232, 770.1199951314, 511.8564453376], [633.1163330255999, 344.778808576, 770.1199951314, 511.8564453376], [646.0284423842, 4.332031232, 768.774536121, 28.332031232], [457.1383056612, 168.0752563712, 768.3172607294, 511.092163072], [605.5605468576, 335.2142944256, 643.4931640421, 358.4822387712], [714.5789795132, 174.653686528, 761.0415038807, 228.1560058368]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048559_crop.jpg", "text": "What insights can you provide about the area in the selected picture ? Give coordinates for the items you reference. For your reference, objects involved in this region include a chair, a lamp, a person, a pen, and an extractor.", "boxes_value": [[41.5605468576, 4.332031232, 205, 511.8564453376], [69.11633302559994, 344.778808576, 205, 511.8564453376], [82.02844238420005, 4.332031232, 204.77453612099998, 28.332031232], [0, 168.0752563712, 204.3172607294, 511.092163072], [41.5605468576, 335.2142944256, 79.49316404210003, 358.4822387712], [150.5789795132, 174.653686528, 197.04150388070002, 228.1560058368]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048560.jpg", "text": "Tell me about the region of the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[310.8096924, 304.82873532779996, 441.16210935000004, 373.2739868036]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048560_crop.jpg", "text": "Tell me about the region of the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[32.80969240000002, 17.82873532779996, 163.16210935000004, 86.2739868036]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048560.jpg", "text": "Tell me about the region of the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three people, two helmets, a sneakers, and a bicycle.", "boxes_value": [[310.8096924, 304.82873532779996, 441.16210935000004, 373.2739868036], [333.78149415, 252.57464601959998, 381.94958497500005, 403.8992309352], [377.68701172500005, 240.2128906061, 482.12219235000003, 381.3071288878], [339.452514675, 298.89001467270003, 411.74023439999996, 492.3359374915], [354.6687012, 304.82873532779996, 371.207275425, 328.1224975773], [310.8096924, 320.32489012220003, 341.02355955, 368.53125000299997], [420.48181155000003, 358.45312500160003, 441.16210935000004, 373.2739868036], [355.498413075, 305.8779907122, 507.122924775, 408.0891723454]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6], [7]]}, {"image_path": "objects365_v1_00048560_crop.jpg", "text": "Tell me about the region of the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three people, two helmets, a sneakers, and a bicycle.", "boxes_value": [[32.80969240000002, 17.82873532779996, 163.16210935000004, 86.2739868036], [55.781494150000015, 0, 103.94958497500005, 103], [99.68701172500005, 0, 195, 94.30712888779999], [61.452514674999975, 11.890014672700033, 133.74023439999996, 103], [76.66870119999999, 17.82873532779996, 93.20727542499998, 41.12249757730001], [32.80969240000002, 33.32489012220003, 63.023559550000016, 81.53125000299997], [142.48181155000003, 71.45312500160003, 163.16210935000004, 86.2739868036], [77.49841307499997, 18.877990712200017, 195, 103]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6], [7]]}, {"image_path": "objects365_v1_00048561.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Please mention the objects and their locations.", "boxes_value": [[3.1934204057000004, 370.934082048, 540.9407958817, 441.9968871936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048561_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Please mention the objects and their locations.", "boxes_value": [[3.1934204057000004, 17.934082047999993, 540.9407958817, 88.9968871936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048561.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Please mention the objects and their locations. For your reference, objects involved in this region include two chairs, a couch, two cabinets, a picture, and a carpet.", "boxes_value": [[3.1934204057000004, 370.934082048, 540.9407958817, 441.9968871936], [133.7168579029, 386.787841792, 193.6706542921, 433.1975097856], [29.3519287226, 388.604248064, 77.1477661321, 421.544616704], [3.1934204057000004, 398.938476544, 110.4110107555, 441.8900756992], [306.81311034559997, 394.5512695296, 358.66918943310003, 441.9968871936], [329.71276853399996, 344.5736083968, 353.5104370373, 395.6685791232], [405.5520629957, 370.934082048, 426.4699096701, 395.8014526464], [507.2946777153, 425.3027343872, 540.9407958817, 441.8357543936]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 6], [5], [7]]}, {"image_path": "objects365_v1_00048561_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Please mention the objects and their locations. For your reference, objects involved in this region include two chairs, a couch, two cabinets, a picture, and a carpet.", "boxes_value": [[3.1934204057000004, 17.934082047999993, 540.9407958817, 88.9968871936], [133.7168579029, 33.787841791999995, 193.6706542921, 80.1975097856], [29.3519287226, 35.60424806399999, 77.1477661321, 68.54461670400002], [3.1934204057000004, 45.938476544000025, 110.4110107555, 88.8900756992], [306.81311034559997, 41.551269529600006, 358.66918943310003, 88.9968871936], [329.71276853399996, 0, 353.5104370373, 42.668579123200004], [405.5520629957, 17.934082047999993, 426.4699096701, 42.80145264639998], [507.2946777153, 72.30273438720002, 540.9407958817, 88.83575439359998]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 6], [5], [7]]}, {"image_path": "objects365_v1_00048563.jpg", "text": "Could you give me a description of the rectangular region found in ? Include the coordinates for each mentioned object.", "boxes_value": [[353.5261230288, 459.5739135488, 682.2166748129999, 511.5206298624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048563_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Include the coordinates for each mentioned object.", "boxes_value": [[82.52612302879999, 13.573913548800022, 411.21667481299994, 65.52062986240003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048563.jpg", "text": "Could you give me a description of the rectangular region found in ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include six cars.", "boxes_value": [[353.5261230288, 459.5739135488, 682.2166748129999, 511.5206298624], [322.143371566, 483.1896362496, 631.5834960888, 512.1373291008], [535.1765136417, 479.2525024256, 682.2166748129999, 511.5206298624], [598.1414794775001, 461.4597168128, 675.7287597342, 478.9707641856], [547.4942627128, 459.5739135488, 603.5294189583, 478.9707641856], [424.64770510389997, 461.0107421696, 571.740234344, 483.909729024], [353.5261230288, 461.818908672, 458.32275393879996, 491.4529418752]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048563_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include six cars.", "boxes_value": [[82.52612302879999, 13.573913548800022, 411.21667481299994, 65.52062986240003], [51.143371565999985, 37.18963624960003, 360.58349608879996, 66], [264.1765136417, 33.25250242560003, 411.21667481299994, 65.52062986240003], [327.14147947750007, 15.459716812800025, 404.7287597342, 32.97076418559999], [276.49426271280004, 13.573913548800022, 332.52941895829997, 32.97076418559999], [153.64770510389997, 15.010742169600007, 300.740234344, 37.909729024], [82.52612302879999, 15.81890867200002, 187.32275393879996, 45.4529418752]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048564.jpg", "text": "Help me understand what's happening in the selected bounding box within . Provide the coordinates for each element you describe.", "boxes_value": [[306.4705200472, 231.9391479296, 514.1668701026, 295.2757568512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048564_crop.jpg", "text": "Help me understand what's happening in the selected bounding box within . Provide the coordinates for each element you describe.", "boxes_value": [[52.47052004720001, 15.939147929600011, 260.1668701026, 79.27575685120001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048564.jpg", "text": "Help me understand what's happening in the selected bounding box within . Provide the coordinates for each element you describe. For your reference, objects involved in this region include three flowers, and three vases.", "boxes_value": [[306.4705200472, 231.9391479296, 514.1668701026, 295.2757568512], [403.28826905140005, 234.5931396608, 470.9445800538, 295.2757568512], [461.05664061119995, 256.763488768, 503.695190422, 282.8967895552], [417.04260253580003, 259.5143432704, 460.36889650959995, 289.0862426624], [275.228454568, 247.6943969792, 335.94421388899997, 287.1890869248], [306.4705200472, 268.3259277312, 327.6915893366, 286.5996093952], [447.43762204319995, 231.9391479296, 514.1668701026, 280.848632832]], "boxes_seq": [[0], [0], [1, 4, 6], [2, 3, 5]]}, {"image_path": "objects365_v1_00048564_crop.jpg", "text": "Help me understand what's happening in the selected bounding box within . Provide the coordinates for each element you describe. For your reference, objects involved in this region include three flowers, and three vases.", "boxes_value": [[52.47052004720001, 15.939147929600011, 260.1668701026, 79.27575685120001], [149.28826905140005, 18.593139660800006, 216.9445800538, 79.27575685120001], [207.05664061119995, 40.763488768, 249.695190422, 66.89678955519997], [163.04260253580003, 43.51434327039999, 206.36889650959995, 73.08624266240002], [21.228454568000018, 31.694396979200008, 81.94421388899997, 71.1890869248], [52.47052004720001, 52.32592773120001, 73.69158933659998, 70.59960939519999], [193.43762204319995, 15.939147929600011, 260.1668701026, 64.84863283200002]], "boxes_seq": [[0], [0], [1, 4, 6], [2, 3, 5]]}, {"image_path": "objects365_v1_00048565.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Specify the location of each mentioned object.", "boxes_value": [[73.4492797952, 276.41430664, 259.154113792, 525.1478271368]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048565_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Specify the location of each mentioned object.", "boxes_value": [[46.4492797952, 62.41430664000001, 232.15411379199998, 311.14782713679995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048565.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Specify the location of each mentioned object. For your reference, objects involved in this region include two people, two gloves, and a sneakers.", "boxes_value": [[73.4492797952, 276.41430664, 259.154113792, 525.1478271368], [73.4492797952, 276.41430664, 259.154113792, 525.1478271368], [0.0803832832, 338.6092529068, 179.7282714624, 703.6068114868], [194.827514624, 485.7423095988, 234.9581298688, 529.1204833984], [217.7924194304, 450.4830322136, 257.2271118336, 496.64477537880003], [214.7635497984, 365.533569374, 245.1771240448, 395.03466798159997]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048565_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Specify the location of each mentioned object. For your reference, objects involved in this region include two people, two gloves, and a sneakers.", "boxes_value": [[46.4492797952, 62.41430664000001, 232.15411379199998, 311.14782713679995], [46.4492797952, 62.41430664000001, 232.15411379199998, 311.14782713679995], [0, 124.60925290680001, 152.7282714624, 373], [167.827514624, 271.7423095988, 207.9581298688, 315.1204833984], [190.7924194304, 236.48303221359998, 230.22711183360002, 282.64477537880003], [187.7635497984, 151.53356937400002, 218.1771240448, 181.03466798159997]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048566.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Give coordinates for the items you reference.", "boxes_value": [[318.6088256662, 103.4508056576, 652.8679198892, 240.1994018304]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048566_crop.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Give coordinates for the items you reference.", "boxes_value": [[83.6088256662, 34.4508056576, 417.86791988920004, 171.1994018304]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048566.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Give coordinates for the items you reference. For your reference, objects involved in this region include a potted plant, a picture, two mirrors, and a necklace.", "boxes_value": [[318.6088256662, 103.4508056576, 652.8679198892, 240.1994018304], [585.8834228306, 152.5285033984, 652.8679198892, 226.8082885632], [480.4326171843, 103.4508056576, 504.97155763579997, 159.1606445568], [380.00329590629997, 178.9622802944, 410.7005615223, 233.5351562752], [318.6088256662, 207.1013793792, 369.77087403430005, 236.9459838976], [483.826782201, 207.4938354688, 510.2207031157, 240.1994018304]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048566_crop.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Give coordinates for the items you reference. For your reference, objects involved in this region include a potted plant, a picture, two mirrors, and a necklace.", "boxes_value": [[83.6088256662, 34.4508056576, 417.86791988920004, 171.1994018304], [350.8834228306, 83.5285033984, 417.86791988920004, 157.8082885632], [245.4326171843, 34.4508056576, 269.97155763579997, 90.16064455680001], [145.00329590629997, 109.96228029439999, 175.7005615223, 164.5351562752], [83.6088256662, 138.1013793792, 134.77087403430005, 167.9459838976], [248.82678220100001, 138.4938354688, 275.2207031157, 171.1994018304]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048567.jpg", "text": "What does the area look like in the context of the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[212.7136230647, 269.1083984384, 501.09570311259995, 512.8713378816]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048567_crop.jpg", "text": "What does the area look like in the context of the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[72.71362306469999, 61.1083984384, 361.09570311259995, 304]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048567.jpg", "text": "What does the area look like in the context of the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a chair, three desks, two people, and two boots.", "boxes_value": [[212.7136230647, 269.1083984384, 501.09570311259995, 512.8713378816], [229.5292968437, 207.7888794112, 354.82482908779997, 421.8700561408], [212.7136230647, 269.1083984384, 501.09570311259995, 512.8713378816], [328.8817138539, 221.2145385984, 483.7724609571, 390.3715209728], [409.27087402620003, 258.7218628096, 596.3830566367, 480.2716064256], [233.5366210807, 162.0087890432, 387.6992187421, 456.8307494912], [271.6588135052, 283.336975104, 313.8118286451, 325.4899902464], [348.37280272210006, 374.0606078976, 388.96606442200004, 440.2741088768], [293.4721679414, 396.686340352, 335.7290649225, 456.5779419136]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6], [7, 8]]}, {"image_path": "objects365_v1_00048567_crop.jpg", "text": "What does the area look like in the context of the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a chair, three desks, two people, and two boots.", "boxes_value": [[72.71362306469999, 61.1083984384, 361.09570311259995, 304], [89.5292968437, 0, 214.82482908779997, 213.8700561408], [72.71362306469999, 61.1083984384, 361.09570311259995, 304], [188.8817138539, 13.214538598399997, 343.7724609571, 182.3715209728], [269.27087402620003, 50.7218628096, 433, 272.2716064256], [93.5366210807, 0, 247.69921874210002, 248.83074949119998], [131.6588135052, 75.33697510399998, 173.81182864509998, 117.4899902464], [208.37280272210006, 166.0606078976, 248.96606442200004, 232.27410887680003], [153.47216794140002, 188.686340352, 195.72906492250002, 248.5779419136]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6], [7, 8]]}, {"image_path": "objects365_v1_00048569.jpg", "text": "Please interpret and describe the area inside the given picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[453.8665771278, 155.7944946176, 672.984619136, 210.930603008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048569_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[54.866577127799985, 13.794494617600009, 273.984619136, 68.93060300799999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048569.jpg", "text": "Please interpret and describe the area inside the given picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three pickup trucks, a truck, and a car.", "boxes_value": [[453.8665771278, 155.7944946176, 672.984619136, 210.930603008], [453.8665771278, 166.1658935296, 511.53930667220004, 191.6380615168], [515.6245116917, 160.8792114176, 553.5924072447, 173.3750000128], [610.6126709188, 160.5455322112, 672.984619136, 182.22558592], [511.6994628641, 155.7944946176, 651.7452392539, 210.930603008], [614.8894043009, 171.7070312448, 681.7279052602, 220.3168334848]], "boxes_seq": [[0], [0], [1, 2, 4], [3], [5]]}, {"image_path": "objects365_v1_00048569_crop.jpg", "text": "Please interpret and describe the area inside the given picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three pickup trucks, a truck, and a car.", "boxes_value": [[54.866577127799985, 13.794494617600009, 273.984619136, 68.93060300799999], [54.866577127799985, 24.165893529599998, 112.53930667220004, 49.63806151680001], [116.62451169170004, 18.879211417600004, 154.59240724469998, 31.3750000128], [211.61267091879995, 18.545532211199998, 273.984619136, 40.225585919999986], [112.69946286409998, 13.794494617600009, 252.74523925389997, 68.93060300799999], [215.88940430089997, 29.707031244799992, 282.72790526020003, 78.31683348479999]], "boxes_seq": [[0], [0], [1, 2, 4], [3], [5]]}, {"image_path": "objects365_v1_00048571.jpg", "text": "Could you give me a description of the rectangular region found in ? Please point out the objects and their coordinates.", "boxes_value": [[284.4421387008, 0.3104858624, 389.9710693632, 92.6956176896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048571_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Please point out the objects and their coordinates.", "boxes_value": [[26.4421387008, 0.3104858624, 131.9710693632, 92.6956176896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048571.jpg", "text": "Could you give me a description of the rectangular region found in ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, a cup, and two plates.", "boxes_value": [[284.4421387008, 0.3104858624, 389.9710693632, 92.6956176896], [287.3862304512, 0.3104858624, 389.9710693632, 73.090270976], [0.12890626560000001, 1.7957763584, 427.895874048, 510.6601562624], [357.7528076544, 49.286682112, 382.83459471360004, 71.9300537344], [351.133911168, 67.4013671936, 388.4083252224, 78.8972168192], [284.4421387008, 74.5753174016, 342.4702148352, 92.6956176896]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048571_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, a cup, and two plates.", "boxes_value": [[26.4421387008, 0.3104858624, 131.9710693632, 92.6956176896], [29.386230451199992, 0.3104858624, 131.9710693632, 73.090270976], [0, 1.7957763584, 158, 115], [99.75280765439999, 49.286682112, 124.83459471360004, 71.9300537344], [93.133911168, 67.4013671936, 130.40832522239998, 78.8972168192], [26.4421387008, 74.5753174016, 84.47021483520001, 92.6956176896]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048573.jpg", "text": "Can you analyze the content of the area within the photograph ? Provide the coordinates for each element you describe.", "boxes_value": [[199.8500366592, 192.5249633792, 367.4648437248, 376.2277832192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048573_crop.jpg", "text": "Can you analyze the content of the area within the photograph ? Provide the coordinates for each element you describe.", "boxes_value": [[42.850036659199986, 46.52496337919999, 210.4648437248, 230.22778321919998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048573.jpg", "text": "Can you analyze the content of the area within the photograph ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a soccer, three people, and two sneakers.", "boxes_value": [[199.8500366592, 192.5249633792, 367.4648437248, 376.2277832192], [282.6727295232, 353.1696777216, 307.116577152, 375.4309692416], [315.83349611520003, 197.3887939584, 380.1855469056, 342.5551757824], [326.3094482688, 216.4699096576, 367.4648437248, 328.7119751168], [199.8500366592, 192.5249633792, 289.269531264, 376.2277832192], [235.15344238080002, 359.5703124992, 262.1610107136, 375.8543090688], [264.7426758144, 352.2227172864, 285.3955077888, 370.0953369088]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00048573_crop.jpg", "text": "Can you analyze the content of the area within the photograph ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a soccer, three people, and two sneakers.", "boxes_value": [[42.850036659199986, 46.52496337919999, 210.4648437248, 230.22778321919998], [125.67272952320002, 207.1696777216, 150.116577152, 229.4309692416], [158.83349611520003, 51.38879395839999, 223.1855469056, 196.5551757824], [169.3094482688, 70.4699096576, 210.4648437248, 182.71197511679998], [42.850036659199986, 46.52496337919999, 132.26953126400002, 230.22778321919998], [78.15344238080002, 213.57031249919999, 105.16101071359998, 229.85430906879998], [107.7426758144, 206.22271728639998, 128.3955077888, 224.0953369088]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00048576.jpg", "text": "Kindly give an overview of the section in photo . Include the coordinates for each object you identify.", "boxes_value": [[282.0985107577, 166.7063598592, 466.5601806889, 414.1068115456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048576_crop.jpg", "text": "Kindly give an overview of the section in photo . Include the coordinates for each object you identify.", "boxes_value": [[47.09851075770001, 62.706359859200006, 231.5601806889, 310.1068115456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048576.jpg", "text": "Kindly give an overview of the section in photo . Include the coordinates for each object you identify. For your reference, objects involved in this region include five people.", "boxes_value": [[282.0985107577, 166.7063598592, 466.5601806889, 414.1068115456], [282.0985107577, 265.4564209152, 352.0209961098, 350.1929321472], [305.2084961315, 318.19451904, 385.2044677621, 388.11694336], [319.5794677631, 348.8039550976, 384.1359863515, 414.1068115456], [403.9617919998, 283.730957056, 443.4755859026, 338.8424072192], [427.98901367509995, 166.7063598592, 466.5601806889, 227.9305419776]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048576_crop.jpg", "text": "Kindly give an overview of the section in photo . Include the coordinates for each object you identify. For your reference, objects involved in this region include five people.", "boxes_value": [[47.09851075770001, 62.706359859200006, 231.5601806889, 310.1068115456], [47.09851075770001, 161.4564209152, 117.02099610980002, 246.1929321472], [70.2084961315, 214.19451904, 150.20446776210002, 284.11694336], [84.57946776310001, 244.8039550976, 149.13598635149998, 310.1068115456], [168.96179199980003, 179.73095705600002, 208.47558590260002, 234.84240721920003], [192.98901367509995, 62.706359859200006, 231.5601806889, 123.9305419776]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048577.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Provide the coordinates for all objects that you mention.", "boxes_value": [[175.4827270481, 120.067749, 231.88305665320001, 216.06066895]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048577_crop.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Provide the coordinates for all objects that you mention.", "boxes_value": [[14.482727048100003, 24.067749000000006, 70.88305665320001, 120.06066895000001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048577.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a mirror, three people, a gloves, and a sneakers.", "boxes_value": [[175.4827270481, 120.067749, 231.88305665320001, 216.06066895], [91.0909423765, 83.53765870000001, 664.9371338157, 272.7684326], [193.8704833983, 119.7616577, 499.5699462764, 413.8031006], [127.8082885415, 99.03625489999999, 278.0673828093, 395.66839600000003], [207.55236818859998, 155.31347655, 227.88378908540002, 211.2984619], [175.4827270481, 194.34368895, 213.95275878459998, 216.06066895], [192.75872799840002, 120.067749, 231.88305665320001, 152.91741945]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048577_crop.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a mirror, three people, a gloves, and a sneakers.", "boxes_value": [[14.482727048100003, 24.067749000000006, 70.88305665320001, 120.06066895000001], [0, 0, 84, 144], [32.870483398299996, 23.7616577, 84, 144], [0, 3.0362548999999888, 84, 144], [46.552368188599985, 59.31347654999999, 66.88378908540002, 115.2984619], [14.482727048100003, 98.34368895, 52.95275878459998, 120.06066895000001], [31.75872799840002, 24.067749000000006, 70.88305665320001, 56.91741945000001]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048581.jpg", "text": "Describe the image content present in the specified rectangular area of . Remember to mention the objects and their corresponding locations.", "boxes_value": [[643.457763699, 249.6934814208, 770.7548828319001, 311.9926757888]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048581_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Remember to mention the objects and their corresponding locations.", "boxes_value": [[32.457763699, 15.693481420799998, 159.7548828319001, 77.9926757888]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048581.jpg", "text": "Describe the image content present in the specified rectangular area of . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a chair, three people, a car, a sports car, and a suv.", "boxes_value": [[643.457763699, 249.6934814208, 770.7548828319001, 311.9926757888], [657.1143799155, 282.8187866112, 703.4215087554, 324.6922607616], [697.4494628772, 234.788574208, 737.048584011, 353.103088384], [643.2335205078, 273.2321777152, 692.2839355812, 322.2827148288], [643.457763699, 250.8536376832, 654.3917236374, 293.8356323328], [649.3814697276, 251.4956664832, 704.4793700994001, 294.83654784], [727.2236327856, 266.3760986112, 770.4942627042, 311.9926757888], [744.1668701151, 249.6934814208, 770.7548828319001, 281.494750976]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5], [6], [7]]}, {"image_path": "objects365_v1_00048581_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a chair, three people, a car, a sports car, and a suv.", "boxes_value": [[32.457763699, 15.693481420799998, 159.7548828319001, 77.9926757888], [46.11437991549997, 48.81878661119998, 92.42150875540005, 90.69226076159998], [86.44946287719995, 0.788574208, 126.048584011, 93], [32.233520507799994, 39.23217771520001, 81.28393558120001, 88.28271482880001], [32.457763699, 16.85363768319999, 43.39172363739999, 59.8356323328], [38.381469727600006, 17.49566648320001, 93.47937009940006, 60.83654783999998], [116.22363278559999, 32.37609861120001, 159.49426270419997, 77.9926757888], [133.16687011509998, 15.693481420799998, 159.7548828319001, 47.49475097599998]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5], [6], [7]]}, {"image_path": "objects365_v1_00048583.jpg", "text": "Please, can you help me understand what's inside the region in image ? Please mention the objects and their locations.", "boxes_value": [[371.9484863493, 195.624816896, 682.8303222417001, 490.8717041151999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048583_crop.jpg", "text": "Please, can you help me understand what's inside the region in image ? Please mention the objects and their locations.", "boxes_value": [[77.94848634930003, 74.624816896, 388.83032224170006, 369.8717041151999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048583.jpg", "text": "Please, can you help me understand what's inside the region in image ? Please mention the objects and their locations. For your reference, objects involved in this region include a storage box, a desk, a cabinet, two people, an umbrella, and a leather shoes.", "boxes_value": [[371.9484863493, 195.624816896, 682.8303222417001, 490.8717041151999], [638.2519531337, 383.7315063296, 682.2658691174, 429.31744384], [642.5747070558, 417.5279541248, 681.8729248061001, 487.0858154496], [316.7270507715, 170.3833007616, 476.4042968443, 280.2191772672], [371.9484863493, 200.1571655168, 482.49438477650006, 490.8717041151999], [373.0277099485, 351.7311401472, 460.40563961780003, 500.9208984576], [643.8577880555, 195.624816896, 682.8303222417001, 299.9887085056], [416.10266116400004, 471.2581787136, 434.7495116951, 490.4455566336]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 6], [5], [7]]}, {"image_path": "objects365_v1_00048583_crop.jpg", "text": "Please, can you help me understand what's inside the region in image ? Please mention the objects and their locations. For your reference, objects involved in this region include a storage box, a desk, a cabinet, two people, an umbrella, and a leather shoes.", "boxes_value": [[77.94848634930003, 74.624816896, 388.83032224170006, 369.8717041151999], [344.2519531337, 262.7315063296, 388.26586911740003, 308.31744384], [348.5747070558, 296.5279541248, 387.87292480610006, 366.0858154496], [22.72705077149999, 49.3833007616, 182.4042968443, 159.2191772672], [77.94848634930003, 79.1571655168, 188.49438477650006, 369.8717041151999], [79.02770994849999, 230.7311401472, 166.40563961780003, 379.9208984576], [349.8577880555, 74.624816896, 388.83032224170006, 178.98870850560002], [122.10266116400004, 350.2581787136, 140.7495116951, 369.4455566336]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 6], [5], [7]]}, {"image_path": "objects365_v1_00048584.jpg", "text": "What does the area within the given visual contain? Please point out the objects and their coordinates.", "boxes_value": [[59.1193237248, 368.5310058496, 737.7344971008, 511.8049926656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048584_crop.jpg", "text": "What does the area within the given visual contain? Please point out the objects and their coordinates.", "boxes_value": [[59.1193237248, 36.53100584959998, 737.7344971008, 179.80499266560003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048584.jpg", "text": "What does the area within the given visual contain? Please point out the objects and their coordinates. For your reference, objects involved in this region include a cabinet, a chair, a storage box, a stool, a desk, and a person.", "boxes_value": [[59.1193237248, 368.5310058496, 737.7344971008, 511.8049926656], [650.6083984127999, 368.5310058496, 737.7344971008, 511.8049926656], [479.26049802240004, 435.3276977664, 576.0672607488, 459.5293579264], [33.9495849984, 241.7142333952, 231.43530270719998, 480.8268432384], [59.1193237248, 477.9226074112, 152.05377200639998, 511.8049926656], [156.4705810176, 438.041259776, 679.2269286912, 510.9511718912], [556.9099120896001, 229.5420532224, 680.0020751616, 477.6904907264]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048584_crop.jpg", "text": "What does the area within the given visual contain? Please point out the objects and their coordinates. For your reference, objects involved in this region include a cabinet, a chair, a storage box, a stool, a desk, and a person.", "boxes_value": [[59.1193237248, 36.53100584959998, 737.7344971008, 179.80499266560003], [650.6083984127999, 36.53100584959998, 737.7344971008, 179.80499266560003], [479.26049802240004, 103.32769776639998, 576.0672607488, 127.5293579264], [33.9495849984, 0, 231.43530270719998, 148.8268432384], [59.1193237248, 145.92260741119998, 152.05377200639998, 179.80499266560003], [156.4705810176, 106.041259776, 679.2269286912, 178.95117189119998], [556.9099120896001, 0, 680.0020751616, 145.69049072640001]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048585.jpg", "text": "Analyze and describe the region in the included photo . Please point out the objects and their coordinates.", "boxes_value": [[0, 0.0302734336, 194.1874389632, 349.8378295808]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048585_crop.jpg", "text": "Analyze and describe the region in the included photo . Please point out the objects and their coordinates.", "boxes_value": [[0, 0.0302734336, 194.1874389632, 349.8378295808]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048585.jpg", "text": "Analyze and describe the region in the included photo . Please point out the objects and their coordinates. For your reference, objects involved in this region include four people, and a street lights.", "boxes_value": [[0, 0.0302734336, 194.1874389632, 349.8378295808], [111.2890625312, 256.9953613312, 137.59008786459998, 313.80548096], [0, 265.674682624, 19.2356567717, 349.8378295808], [12.5649413873, 159.8255615488, 55.85357666579999, 213.378540032], [0, 0.0302734336, 42.458251937899995, 77.0307006976], [149.2908935539, 73.838134784, 194.1874389632, 252.8963012608]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048585_crop.jpg", "text": "Analyze and describe the region in the included photo . Please point out the objects and their coordinates. For your reference, objects involved in this region include four people, and a street lights.", "boxes_value": [[0, 0.0302734336, 194.1874389632, 349.8378295808], [111.2890625312, 256.9953613312, 137.59008786459998, 313.80548096], [0, 265.674682624, 19.2356567717, 349.8378295808], [12.5649413873, 159.8255615488, 55.85357666579999, 213.378540032], [0, 0.0302734336, 42.458251937899995, 77.0307006976], [149.2908935539, 73.838134784, 194.1874389632, 252.8963012608]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048586.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each mentioned object.", "boxes_value": [[184.7299804672, 471.33447267839995, 394.3611450368, 611.8271484672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048586_crop.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each mentioned object.", "boxes_value": [[52.729980467199994, 35.33447267839995, 262.3611450368, 175.8271484672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048586.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a machinery vehicle, and four street lights.", "boxes_value": [[184.7299804672, 471.33447267839995, 394.3611450368, 611.8271484672], [338.435974144, 556.2686767872001, 380.9795532288, 611.8271484672], [184.7299804672, 566.2221679872, 209.5166015488, 592.0003662336], [347.0821533184, 471.33447267839995, 394.3611450368, 545.3450927616], [320.4432983552, 542.6623534848001, 354.500427264, 556.2340088064], [306.8716430848, 551.1126709248, 330.6860351488, 578.51196288]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048586_crop.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a machinery vehicle, and four street lights.", "boxes_value": [[52.729980467199994, 35.33447267839995, 262.3611450368, 175.8271484672], [206.435974144, 120.26867678720009, 248.9795532288, 175.8271484672], [52.729980467199994, 130.2221679872, 77.5166015488, 156.00036623359995], [215.0821533184, 35.33447267839995, 262.3611450368, 109.34509276159997], [188.4432983552, 106.66235348480006, 222.500427264, 120.23400880639997], [174.87164308479998, 115.1126709248, 198.6860351488, 142.51196288000006]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048589.jpg", "text": "I'd like some information about the specific region in the image . Please point out the objects and their coordinates.", "boxes_value": [[0, 343.0565185536, 287.9316405917, 463.3405761536]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048589_crop.jpg", "text": "I'd like some information about the specific region in the image . Please point out the objects and their coordinates.", "boxes_value": [[0, 31.05651855359997, 287.9316405917, 151.3405761536]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048589.jpg", "text": "I'd like some information about the specific region in the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include two lamps, a person, and two umbrellas.", "boxes_value": [[0, 343.0565185536, 287.9316405917, 463.3405761536], [15.115600584400001, 403.0105590784, 72.34045410819999, 463.3405761536], [259.54101562100004, 366.191467264, 287.9316405917, 404.7849731584], [158.74078368, 281.624816896, 218.18627927010002, 480.4261474816], [205.1695556486, 315.3259277312, 245.4520873864, 367.1770629632], [0, 343.0565185536, 16.3366089049, 396.0410156032]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048589_crop.jpg", "text": "I'd like some information about the specific region in the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include two lamps, a person, and two umbrellas.", "boxes_value": [[0, 31.05651855359997, 287.9316405917, 151.3405761536], [15.115600584400001, 91.01055907839998, 72.34045410819999, 151.3405761536], [259.54101562100004, 54.19146726399998, 287.9316405917, 92.78497315840002], [158.74078368, 0, 218.18627927010002, 168.42614748160003], [205.1695556486, 3.325927731200011, 245.4520873864, 55.1770629632], [0, 31.05651855359997, 16.3366089049, 84.04101560319998]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048590.jpg", "text": "Please provide information about the area within the bounding box in the picture . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0.7677612562, 0.0960693248, 629.1204834131, 178.315979008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048590_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0.7677612562, 0.0960693248, 629.1204834131, 178.315979008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048590.jpg", "text": "Please provide information about the area within the bounding box in the picture . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include five cars.", "boxes_value": [[0.7677612562, 0.0960693248, 629.1204834131, 178.315979008], [0.7677612562, 0.3245239296, 169.0273437519, 75.866577152], [152.9555053976, 0.0960693248, 264.1751708942, 48.1072997888], [196.0609741459, 11.2966308352, 404.1715088071, 94.193054208], [105.5318603298, 80.9233398272, 644.7615966964, 394.708374016], [393.6279296882, 86.653259264, 629.1204834131, 178.315979008]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048590_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include five cars.", "boxes_value": [[0.7677612562, 0.0960693248, 629.1204834131, 178.315979008], [0.7677612562, 0.3245239296, 169.0273437519, 75.866577152], [152.9555053976, 0.0960693248, 264.1751708942, 48.1072997888], [196.0609741459, 11.2966308352, 404.1715088071, 94.193054208], [105.5318603298, 80.9233398272, 644.7615966964, 222], [393.6279296882, 86.653259264, 629.1204834131, 178.315979008]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048592.jpg", "text": "In the provided image , would you mind describing the selected area ? Please mention the objects and their locations.", "boxes_value": [[341.461059584, 566.4060058466, 512.0656738304, 683.0296630997999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048592_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Please mention the objects and their locations.", "boxes_value": [[43.461059584, 29.40600584660001, 214, 146]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048592.jpg", "text": "In the provided image , would you mind describing the selected area ? Please mention the objects and their locations. For your reference, objects involved in this region include five people.", "boxes_value": [[341.461059584, 566.4060058466, 512.0656738304, 683.0296630997999], [355.3676147712, 566.4060058466, 443.8991699456, 682.7504882841], [341.461059584, 570.8759765939, 381.4177856512, 662.5413818693], [394.1770629632, 584.3067626818, 461.3312377856, 682.8339843511], [441.8565673984, 573.5621337705, 481.141723648, 669.2567138872], [446.893127424, 580.949096707, 512.0656738304, 683.0296630997999]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048592_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Please mention the objects and their locations. For your reference, objects involved in this region include five people.", "boxes_value": [[43.461059584, 29.40600584660001, 214, 146], [57.36761477120001, 29.40600584660001, 145.89916994560002, 145.75048828410002], [43.461059584, 33.87597659389996, 83.4177856512, 125.54138186930004], [96.1770629632, 47.30676268180002, 163.33123778560002, 145.83398435109996], [143.8565673984, 36.562133770500054, 183.14172364799998, 132.25671388720002], [148.893127424, 43.94909670699997, 214, 146]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048595.jpg", "text": "Can you analyze the content of the area within the photograph ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[156.7967529472, 78.4105834827, 334.1043701248, 539.2155762057]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048595_crop.jpg", "text": "Can you analyze the content of the area within the photograph ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[44.79675294719999, 78.4105834827, 222.10437012480003, 539.2155762057]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048595.jpg", "text": "Can you analyze the content of the area within the photograph ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include five people.", "boxes_value": [[156.7967529472, 78.4105834827, 334.1043701248, 539.2155762057], [156.7967529472, 425.5397948991, 205.5148925952, 539.2155762057], [194.2467651584, 428.1911621047, 234.6795654144, 530.5987549079], [236.3366088704, 399.3580322254, 285.7175903232, 537.8898926029], [290.3574218752, 390.7412109276, 334.1043701248, 511.0451660074], [223.573059072, 78.4105834827, 274.6684570112, 190.5712280511]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048595_crop.jpg", "text": "Can you analyze the content of the area within the photograph ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include five people.", "boxes_value": [[44.79675294719999, 78.4105834827, 222.10437012480003, 539.2155762057], [44.79675294719999, 425.5397948991, 93.5148925952, 539.2155762057], [82.2467651584, 428.1911621047, 122.6795654144, 530.5987549079], [124.3366088704, 399.3580322254, 173.7175903232, 537.8898926029], [178.35742187519998, 390.7412109276, 222.10437012480003, 511.0451660074], [111.573059072, 78.4105834827, 162.66845701120002, 190.5712280511]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048596.jpg", "text": "Share some details about the objects or environment within the bounding box in . Provide the coordinates for all objects that you mention.", "boxes_value": [[226.02990720780002, 149.1270751744, 339.3298339559, 491.2960205312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048596_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Provide the coordinates for all objects that you mention.", "boxes_value": [[29.029907207800022, 86.1270751744, 142.32983395590003, 428.2960205312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048596.jpg", "text": "Share some details about the objects or environment within the bounding box in . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a cabinet, and five baksets.", "boxes_value": [[226.02990720780002, 149.1270751744, 339.3298339559, 491.2960205312], [226.02990720780002, 149.1270751744, 326.0599365091, 398.4405517824], [245.7281494296, 302.3593750016, 321.68951417290003, 336.493286144], [246.7424926732, 353.3563232256, 323.691528349, 393.7004394496], [206.4874878161, 432.6149902336, 297.5690918066, 506.4162597888], [272.72863772669996, 418.6943969792, 339.3298339559, 491.2960205312], [296.48907469799997, 411.734619136, 377.4904785291, 478.6958007808]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048596_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a cabinet, and five baksets.", "boxes_value": [[29.029907207800022, 86.1270751744, 142.32983395590003, 428.2960205312], [29.029907207800022, 86.1270751744, 129.0599365091, 335.4405517824], [48.728149429599995, 239.35937500159997, 124.68951417290003, 273.493286144], [49.74249267319999, 290.3563232256, 126.69152834900001, 330.7004394496], [9.487487816100014, 369.6149902336, 100.5690918066, 443.4162597888], [75.72863772669996, 355.6943969792, 142.32983395590003, 428.2960205312], [99.48907469799997, 348.734619136, 170, 415.6958007808]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048597.jpg", "text": "Can you discuss the entities within the region of image ? Give coordinates for the items you reference.", "boxes_value": [[274.9035644896, 336.4721069568, 400.73327640179997, 511.0260620288]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048597_crop.jpg", "text": "Can you discuss the entities within the region of image ? Give coordinates for the items you reference.", "boxes_value": [[31.9035644896, 44.47210695680002, 157.73327640179997, 219.02606202880003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048597.jpg", "text": "Can you discuss the entities within the region of image ? Give coordinates for the items you reference. For your reference, objects involved in this region include two handbags, and three boots.", "boxes_value": [[274.9035644896, 336.4721069568, 400.73327640179997, 511.0260620288], [274.9035644896, 369.3502197248, 313.5836181451, 435.8800658944], [346.8486327998, 336.4721069568, 365.8018798886, 404.935974144], [301.5283202813, 476.779968256, 332.24108883959997, 511.0260620288], [332.24108883959997, 471.8876953088, 359.6923828262, 503.1440429568], [379.805175793, 369.6929931776, 400.73327640179997, 406.657043456]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048597_crop.jpg", "text": "Can you discuss the entities within the region of image ? Give coordinates for the items you reference. For your reference, objects involved in this region include two handbags, and three boots.", "boxes_value": [[31.9035644896, 44.47210695680002, 157.73327640179997, 219.02606202880003], [31.9035644896, 77.35021972480001, 70.58361814509999, 143.8800658944], [103.84863279979999, 44.47210695680002, 122.80187988860001, 112.935974144], [58.52832028130001, 184.77996825600002, 89.24108883959997, 219.02606202880003], [89.24108883959997, 179.8876953088, 116.6923828262, 211.14404295679998], [136.805175793, 77.69299317759999, 157.73327640179997, 114.657043456]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048598.jpg", "text": "Could you describe the content of the bbox in the image ? Please point out the objects and their coordinates.", "boxes_value": [[676.8082275355999, 153.6999511552, 894.5731201157, 468.49121095679993]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048598_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Please point out the objects and their coordinates.", "boxes_value": [[54.80822753559994, 78.69995115520001, 272.5731201157, 393.49121095679993]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048598.jpg", "text": "Could you describe the content of the bbox in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include three people, a hat, and two backpacks.", "boxes_value": [[676.8082275355999, 153.6999511552, 894.5731201157, 468.49121095679993], [646.8474121281, 169.1413574144, 713.3841552948, 314.4645996032], [739.7834472674, 153.6999511552, 894.5731201157, 468.49121095679993], [667.1392822102, 366.373291008, 910.7552490091, 511.800537088], [676.8082275355999, 169.2025146368, 705.2897949412001, 187.6710815232], [704.6221923682999, 171.6501465088, 753.1300048488, 265.7730102784], [756.9422607184, 158.5762329088, 864.0191649978, 360.499267584]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048598_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include three people, a hat, and two backpacks.", "boxes_value": [[54.80822753559994, 78.69995115520001, 272.5731201157, 393.49121095679993], [24.847412128100018, 94.1413574144, 91.38415529480005, 239.46459960319999], [117.78344726739999, 78.69995115520001, 272.5731201157, 393.49121095679993], [45.13928221020001, 291.373291008, 288.7552490091, 436.800537088], [54.80822753559994, 94.2025146368, 83.28979494120006, 112.6710815232], [82.62219236829992, 96.65014650879999, 131.1300048488, 190.77301027840002], [134.9422607184, 83.5762329088, 242.01916499779998, 285.499267584]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048600.jpg", "text": "Can you analyze the content of the area within the photograph ? Provide the coordinates for all objects that you mention.", "boxes_value": [[0.2816162304, 44.6607666176, 227.1923828224, 484.2899780096001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048600_crop.jpg", "text": "Can you analyze the content of the area within the photograph ? Provide the coordinates for all objects that you mention.", "boxes_value": [[0.2816162304, 44.6607666176, 227.1923828224, 484.2899780096001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048600.jpg", "text": "Can you analyze the content of the area within the photograph ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a cabinet, a picture, a couch, a person, a cup, and a speaker.", "boxes_value": [[0.2816162304, 44.6607666176, 227.1923828224, 484.2899780096001], [0.2816162304, 86.5276489216, 87.896606464, 234.37786864640003], [177.094787584, 97.3025512448, 216.655578624, 154.2969970688], [0.7472534016, 224.7019042816, 227.1923828224, 484.2899780096001], [46.3013305856, 168.6025390592, 291.5123291136, 438.59552], [191.1945800704, 217.90814208, 211.9219360256, 260.175720192], [14.8657837056, 44.6607666176, 63.654113792, 91.6080322048]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048600_crop.jpg", "text": "Can you analyze the content of the area within the photograph ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a cabinet, a picture, a couch, a person, a cup, and a speaker.", "boxes_value": [[0.2816162304, 44.6607666176, 227.1923828224, 484.2899780096001], [0.2816162304, 86.5276489216, 87.896606464, 234.37786864640003], [177.094787584, 97.3025512448, 216.655578624, 154.2969970688], [0.7472534016, 224.7019042816, 227.1923828224, 484.2899780096001], [46.3013305856, 168.6025390592, 283, 438.59552], [191.1945800704, 217.90814208, 211.9219360256, 260.175720192], [14.8657837056, 44.6607666176, 63.654113792, 91.6080322048]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048606.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Include the coordinates for each object you identify.", "boxes_value": [[323.6771240448, 275.1033325056, 387.6635741952, 342.4791259648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048606_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Include the coordinates for each object you identify.", "boxes_value": [[16.677124044799996, 17.103332505600008, 80.6635741952, 84.4791259648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048606.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a cabinet, a cup, three bottles, and a plate.", "boxes_value": [[323.6771240448, 275.1033325056, 387.6635741952, 342.4791259648], [180.616088832, 112.7547607552, 371.3010253824, 344.6137084928], [350.989990272, 319.7894897664, 371.58544919039997, 341.154846208], [327.1617431808, 265.203552256, 350.57580564479997, 317.9013671936], [356.81188961280003, 275.1033325056, 368.6274414336, 310.5499877888], [359.4374999808, 322.3655395328, 387.6635741952, 335.8222045696], [323.6771240448, 312.4063110144, 336.3883056384, 342.4791259648]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 6], [5]]}, {"image_path": "objects365_v1_00048606_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a cabinet, a cup, three bottles, and a plate.", "boxes_value": [[16.677124044799996, 17.103332505600008, 80.6635741952, 84.4791259648], [0, 0, 64.30102538239998, 86.61370849280001], [43.989990272, 61.78948976639998, 64.58544919039997, 83.15484620799998], [20.161743180799988, 7.203552256000023, 43.57580564479997, 59.90136719359998], [49.81188961280003, 17.103332505600008, 61.627441433599984, 52.549987788800024], [52.4374999808, 64.36553953280003, 80.6635741952, 77.82220456959999], [16.677124044799996, 54.40631101439999, 29.388305638400027, 84.4791259648]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 6], [5]]}, {"image_path": "objects365_v1_00048607.jpg", "text": "I am interested in the region of the image ; please describe it. Give coordinates for the items you reference.", "boxes_value": [[296.839111361, 100.91644288, 474.8250732407, 360.8024902144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048607_crop.jpg", "text": "I am interested in the region of the image ; please describe it. Give coordinates for the items you reference.", "boxes_value": [[44.83911136099999, 65.91644288, 222.82507324070002, 325.8024902144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048607.jpg", "text": "I am interested in the region of the image ; please describe it. Give coordinates for the items you reference. For your reference, objects involved in this region include two people, and three sneakers.", "boxes_value": [[296.839111361, 100.91644288, 474.8250732407, 360.8024902144], [387.3487549178, 100.91644288, 406.6749267828, 154.8262939648], [343.610595718, 119.7340087808, 474.8250732407, 360.8024902144], [371.1949462884, 340.2125243904, 393.07812503570005, 361.8604126208], [390.0191650511, 302.5639648256, 408.137573233, 340.4478149632], [296.839111361, 339.5065917952, 334.4876709161, 356.6837768704]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048607_crop.jpg", "text": "I am interested in the region of the image ; please describe it. Give coordinates for the items you reference. For your reference, objects involved in this region include two people, and three sneakers.", "boxes_value": [[44.83911136099999, 65.91644288, 222.82507324070002, 325.8024902144], [135.34875491780002, 65.91644288, 154.67492678280001, 119.82629396479999], [91.61059571800001, 84.7340087808, 222.82507324070002, 325.8024902144], [119.19494628839999, 305.2125243904, 141.07812503570005, 326.8604126208], [138.0191650511, 267.5639648256, 156.137573233, 305.4478149632], [44.83911136099999, 304.5065917952, 82.4876709161, 321.6837768704]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048609.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Please point out the objects and their coordinates.", "boxes_value": [[373.08666992639996, 351.8535156224, 700.7445068544, 511.1065673728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048609_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Please point out the objects and their coordinates.", "boxes_value": [[82.08666992639996, 39.85351562239998, 409.7445068544, 199.1065673728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048609.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Please point out the objects and their coordinates. For your reference, objects involved in this region include four benches, a hat, and a handbag.", "boxes_value": [[373.08666992639996, 351.8535156224, 700.7445068544, 511.1065673728], [510.95104980479994, 460.425781248, 700.7445068544, 511.1065673728], [445.86645504, 381.3934936576, 581.0289306624, 427.1408080896], [373.08666992639996, 368.223815936, 506.86279296, 463.8772582912], [342.65722659840003, 348.0216674816, 409.8231201024, 384.166076672], [462.020141568, 351.8535156224, 481.5163574016, 368.4118652416], [486.7786864896, 374.2645874176, 504.7561035264, 390.9548339712]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048609_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Please point out the objects and their coordinates. For your reference, objects involved in this region include four benches, a hat, and a handbag.", "boxes_value": [[82.08666992639996, 39.85351562239998, 409.7445068544, 199.1065673728], [219.95104980479994, 148.42578124800002, 409.7445068544, 199.1065673728], [154.86645504, 69.3934936576, 290.02893066239994, 115.1408080896], [82.08666992639996, 56.223815935999994, 215.86279295999998, 151.8772582912], [51.65722659840003, 36.02166748159999, 118.82312010240003, 72.16607667199997], [171.02014156799999, 39.85351562239998, 190.51635740159998, 56.411865241600026], [195.77868648959998, 62.26458741760001, 213.7561035264, 78.9548339712]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048610.jpg", "text": "What's inside the area of the provided graphic ? Provide the coordinates for each element you describe.", "boxes_value": [[2.5358276352, 289.11926272, 80.5234374912, 496.314086912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048610_crop.jpg", "text": "What's inside the area of the provided graphic ? Provide the coordinates for each element you describe.", "boxes_value": [[2.5358276352, 52.119262719999995, 80.5234374912, 259.314086912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048610.jpg", "text": "What's inside the area of the provided graphic ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, a helmet, two boots, and a gloves.", "boxes_value": [[2.5358276352, 289.11926272, 80.5234374912, 496.314086912], [2.3270263296, 288.4495239168, 91.4792480256, 503.3128662016], [37.8787842048, 289.11926272, 69.0881347584, 322.58355712], [48.158569344, 462.7794189312, 80.5234374912, 490.854919424], [2.5358276352, 469.2133789184, 35.680542028800005, 496.314086912], [48.4559326464, 392.1419067392, 76.2398681856, 408.7445068288]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048610_crop.jpg", "text": "What's inside the area of the provided graphic ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, a helmet, two boots, and a gloves.", "boxes_value": [[2.5358276352, 52.119262719999995, 80.5234374912, 259.314086912], [2.3270263296, 51.44952391679999, 91.4792480256, 266.3128662016], [37.8787842048, 52.119262719999995, 69.0881347584, 85.58355712000002], [48.158569344, 225.77941893119998, 80.5234374912, 253.854919424], [2.5358276352, 232.2133789184, 35.680542028800005, 259.314086912], [48.4559326464, 155.1419067392, 76.2398681856, 171.7445068288]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048611.jpg", "text": "What insights can you provide about the area in the selected picture ? Provide the coordinates for each element you describe.", "boxes_value": [[37.5793308625, 127.6901244928, 100.5881958185, 379.3040013824]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048611_crop.jpg", "text": "What insights can you provide about the area in the selected picture ? Provide the coordinates for each element you describe.", "boxes_value": [[16.5793308625, 63.690124492799995, 79.5881958185, 315.3040013824]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048611.jpg", "text": "What insights can you provide about the area in the selected picture ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a gloves, a sneakers, a bottle, and two hockey sticks.", "boxes_value": [[37.5793308625, 127.6901244928, 100.5881958185, 379.3040013824], [37.5793308625, 273.492493824, 80.281832154, 337.3572965888], [61.3869200645, 339.6246860288, 80.6597303795, 379.3040013824], [40.4719848965, 127.6901244928, 55.902832015499996, 158.11096192], [2.161376929, 255.3544922112, 223.3959960725, 442.275146496], [76.2072143635, 140.6737060352, 100.5881958185, 186.7266235392]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048611_crop.jpg", "text": "What insights can you provide about the area in the selected picture ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a gloves, a sneakers, a bottle, and two hockey sticks.", "boxes_value": [[16.5793308625, 63.690124492799995, 79.5881958185, 315.3040013824], [16.5793308625, 209.492493824, 59.281832154, 273.3572965888], [40.3869200645, 275.6246860288, 59.659730379500004, 315.3040013824], [19.471984896499997, 63.690124492799995, 34.902832015499996, 94.11096192], [0, 191.3544922112, 95, 378], [55.2072143635, 76.67370603520001, 79.5881958185, 122.7266235392]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048612.jpg", "text": "What does the selected region in the image encompass? Please point out the objects and their coordinates.", "boxes_value": [[352.4236449984, 6.4418945536, 456.06677247360005, 313.2880248832]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048612_crop.jpg", "text": "What does the selected region in the image encompass? Please point out the objects and their coordinates.", "boxes_value": [[26.423644998399993, 6.4418945536, 130.06677247360005, 313.2880248832]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048612.jpg", "text": "What does the selected region in the image encompass? Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, a helmet, a boots, and a horse.", "boxes_value": [[352.4236449984, 6.4418945536, 456.06677247360005, 313.2880248832], [282.79223631360003, 6.1886596608, 456.7434081984, 252.2539673088], [357.4930419648, 278.5975341568, 378.99139403519996, 313.2880248832], [398.3628540096, 6.4418945536, 456.06677247360005, 45.9509277184], [352.4236449984, 134.3453369344, 415.606567392, 252.5268554752], [60.8527832256, 70.6486205952, 537.6756591936, 457.1499633664]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048612_crop.jpg", "text": "What does the selected region in the image encompass? Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, a helmet, a boots, and a horse.", "boxes_value": [[26.423644998399993, 6.4418945536, 130.06677247360005, 313.2880248832], [0, 6.1886596608, 130.7434081984, 252.2539673088], [31.4930419648, 278.5975341568, 52.99139403519996, 313.2880248832], [72.36285400960003, 6.4418945536, 130.06677247360005, 45.9509277184], [26.423644998399993, 134.3453369344, 89.60656739199999, 252.5268554752], [0, 70.6486205952, 155, 389]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048613.jpg", "text": "Can you generate a description of the contents within the selected region in ? Include the coordinates for each mentioned object.", "boxes_value": [[29.0210571264, 599.3032226304, 270.6613159424, 691.817016576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048613_crop.jpg", "text": "Can you generate a description of the contents within the selected region in ? Include the coordinates for each mentioned object.", "boxes_value": [[29.0210571264, 23.3032226304, 270.6613159424, 115.81701657600001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048613.jpg", "text": "Can you generate a description of the contents within the selected region in ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, and four sneakers.", "boxes_value": [[29.0210571264, 599.3032226304, 270.6613159424, 691.817016576], [81.1505126912, 410.4278564352, 208.1842651136, 767.3651122944], [195.6377563648, 658.6777344, 220.4921874944, 691.817016576], [255.0122070528, 599.3032226304, 270.6613159424, 624.1577148672], [29.0210571264, 638.8862304768, 46.0509643776, 666.5023193088], [79.1901855232, 643.9492187136, 103.5843505664, 661.8996582144]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048613_crop.jpg", "text": "Can you generate a description of the contents within the selected region in ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, and four sneakers.", "boxes_value": [[29.0210571264, 23.3032226304, 270.6613159424, 115.81701657600001], [81.1505126912, 0, 208.1842651136, 138], [195.6377563648, 82.67773439999996, 220.4921874944, 115.81701657600001], [255.0122070528, 23.3032226304, 270.6613159424, 48.15771486719996], [29.0210571264, 62.88623047680005, 46.0509643776, 90.5023193088], [79.1901855232, 67.94921871359998, 103.5843505664, 85.8996582144]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048614.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Provide the coordinates for each element you describe.", "boxes_value": [[249.68005368060003, 55.4665527296, 381.2349853223, 278.1851196416]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048614_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Provide the coordinates for each element you describe.", "boxes_value": [[33.680053680600025, 55.4665527296, 165.23498532230002, 278.1851196416]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048614.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a lamp, a clock, a cabinet, a cup, a bowl, and a wine glass.", "boxes_value": [[249.68005368060003, 55.4665527296, 381.2349853223, 278.1851196416], [250.2856445096, 55.4665527296, 296.6451416323, 122.9605712896], [278.0570678956, 118.85913088, 304.8262329034, 189.0910644736], [351.61132815760004, 215.0148315648, 381.2349853223, 272.974182144], [249.68005368060003, 255.8320922624, 276.0972290378, 278.1851196416], [278.94219969659997, 242.8267211776, 350.06555176620003, 276.5594482176], [365.1086426118, 196.9548950016, 398.2362060231, 223.7954712064]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048614_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a lamp, a clock, a cabinet, a cup, a bowl, and a wine glass.", "boxes_value": [[33.680053680600025, 55.4665527296, 165.23498532230002, 278.1851196416], [34.28564450959999, 55.4665527296, 80.64514163230001, 122.9605712896], [62.05706789560003, 118.85913088, 88.82623290340001, 189.0910644736], [135.61132815760004, 215.0148315648, 165.23498532230002, 272.974182144], [33.680053680600025, 255.8320922624, 60.09722903779999, 278.1851196416], [62.94219969659997, 242.8267211776, 134.06555176620003, 276.5594482176], [149.1086426118, 196.9548950016, 182.2362060231, 223.7954712064]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048615.jpg", "text": "Could you give me a description of the rectangular region found in ? Please mention the objects and their locations.", "boxes_value": [[14.1628418055, 87.283020032, 238.72753908899998, 243.021728512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048615_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Please mention the objects and their locations.", "boxes_value": [[14.1628418055, 39.283020031999996, 238.72753908899998, 195.021728512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048615.jpg", "text": "Could you give me a description of the rectangular region found in ? Please mention the objects and their locations. For your reference, objects involved in this region include five lamps, a storage box, and a converter.", "boxes_value": [[14.1628418055, 87.283020032, 238.72753908899998, 243.021728512], [108.6286010445, 147.265686016, 186.21667477350002, 242.3239135744], [71.4204712215, 191.505554176, 172.416870144, 233.9476318208], [14.1628418055, 209.3447876096, 93.27880857150001, 240.6527099392], [205.5887451405, 227.5181884928, 238.72753908899998, 243.021728512], [12.8807373015, 77.4219360256, 36.934692354, 107.617370624], [144.0078124725, 87.283020032, 164.186828631, 109.6553955328], [165.502868688, 76.7548217856, 187.875244116, 101.3205566464]], "boxes_seq": [[0], [0], [1, 2, 5, 6, 7], [3], [4]]}, {"image_path": "objects365_v1_00048615_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Please mention the objects and their locations. For your reference, objects involved in this region include five lamps, a storage box, and a converter.", "boxes_value": [[14.1628418055, 39.283020031999996, 238.72753908899998, 195.021728512], [108.6286010445, 99.26568601599999, 186.21667477350002, 194.3239135744], [71.4204712215, 143.505554176, 172.416870144, 185.9476318208], [14.1628418055, 161.3447876096, 93.27880857150001, 192.6527099392], [205.5887451405, 179.5181884928, 238.72753908899998, 195.021728512], [12.8807373015, 29.421936025600004, 36.934692354, 59.617370624], [144.0078124725, 39.283020031999996, 164.186828631, 61.6553955328], [165.502868688, 28.7548217856, 187.875244116, 53.32055664639999]], "boxes_seq": [[0], [0], [1, 2, 5, 6, 7], [3], [4]]}, {"image_path": "objects365_v1_00048618.jpg", "text": "I need details about the area located within image . Give coordinates for the items you reference.", "boxes_value": [[261.83612058, 179.220153792, 336.87420654, 246.812194848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048618_crop.jpg", "text": "I need details about the area located within image . Give coordinates for the items you reference.", "boxes_value": [[18.83612058, 17.22015379199999, 93.87420653999999, 84.81219484799999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048618.jpg", "text": "I need details about the area located within image . Give coordinates for the items you reference. For your reference, objects involved in this region include a pillow, a bed, two lamps, and a mirror.", "boxes_value": [[261.83612058, 179.220153792, 336.87420654, 246.812194848], [272.16412356, 208.77093504, 322.20037842000005, 246.812194848], [0, 147.62701416, 547.7036132999999, 476.48663328000004], [288.3460083, 179.220153792, 336.87420654, 228.181701648], [257.97039792000004, 78.917968752, 320.92089846000005, 222.08856201599997], [261.83612058, 184.998291024, 290.95385741999996, 221.483215344]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4]]}, {"image_path": "objects365_v1_00048618_crop.jpg", "text": "I need details about the area located within image . Give coordinates for the items you reference. For your reference, objects involved in this region include a pillow, a bed, two lamps, and a mirror.", "boxes_value": [[18.83612058, 17.22015379199999, 93.87420653999999, 84.81219484799999], [29.164123560000007, 46.77093504000001, 79.20037842000005, 84.81219484799999], [0, 0, 112, 101], [45.346008299999994, 17.22015379199999, 93.87420653999999, 66.181701648], [14.970397920000039, 0, 77.92089846000005, 60.08856201599997], [18.83612058, 22.998291023999997, 47.95385741999996, 59.483215344]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4]]}, {"image_path": "objects365_v1_00048619.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Include the coordinates for each object you identify.", "boxes_value": [[354.1107788288, 120.6782226878, 466.3184204288, 624.6169433524]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048619_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Include the coordinates for each object you identify.", "boxes_value": [[28.110778828799994, 120.6782226878, 140.31842042879998, 624.6169433524]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048619.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, a bottle, a car, and two street lights.", "boxes_value": [[354.1107788288, 120.6782226878, 466.3184204288, 624.6169433524], [241.171630848, 232.4494628656, 439.347778304, 681.8847656356], [379.0178832896, 540.8825683636001, 429.5230102528, 624.6169433524], [445.314086912, 253.3446655243, 466.3184204288, 269.0978393689], [354.1107788288, 120.6782226878, 399.250061056, 236.0562744], [386.3873291264, 182.38171388740002, 394.1614990336, 235.6607055929]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048619_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, a bottle, a car, and two street lights.", "boxes_value": [[28.110778828799994, 120.6782226878, 140.31842042879998, 624.6169433524], [0, 232.4494628656, 113.34777830399997, 681.8847656356], [53.01788328959998, 540.8825683636001, 103.52301025280002, 624.6169433524], [119.314086912, 253.3446655243, 140.31842042879998, 269.0978393689], [28.110778828799994, 120.6782226878, 73.25006105599999, 236.0562744], [60.387329126400004, 182.38171388740002, 68.16149903360002, 235.6607055929]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048621.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Give coordinates for the items you reference.", "boxes_value": [[392.13391110509997, 204.1511230464, 535.0100097844, 303.9682617344]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048621_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Give coordinates for the items you reference.", "boxes_value": [[36.13391110509997, 25.151123046399988, 179.01000978440004, 124.96826173440002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048621.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Give coordinates for the items you reference. For your reference, objects involved in this region include two cabinets, two people, and two moniters.", "boxes_value": [[392.13391110509997, 204.1511230464, 535.0100097844, 303.9682617344], [392.13391110509997, 238.1722412032, 433.6699218713, 286.307373056], [444.5390625022, 239.3367920128, 464.7248534826, 273.4972534272], [442.2292480242, 230.6009521664, 505.1801757512, 303.9682617344], [497.9075927757, 211.1387939328, 515.8635254244, 242.7755737088], [475.8369140598, 204.1511230464, 535.0100097844, 246.827514624], [382.848144513, 258.9083862528, 409.8935546763, 285.4560546816]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00048621_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Give coordinates for the items you reference. For your reference, objects involved in this region include two cabinets, two people, and two moniters.", "boxes_value": [[36.13391110509997, 25.151123046399988, 179.01000978440004, 124.96826173440002], [36.13391110509997, 59.1722412032, 77.6699218713, 107.30737305600002], [88.53906250220001, 60.336792012800004, 108.7248534826, 94.49725342720001], [86.22924802419999, 51.60095216639999, 149.1801757512, 124.96826173440002], [141.90759277569998, 32.13879393280001, 159.8635254244, 63.77557370880001], [119.8369140598, 25.151123046399988, 179.01000978440004, 67.827514624], [26.848144513000022, 79.90838625280003, 53.89355467630003, 106.4560546816]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00048624.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[354.03808596389996, 206.86511232, 507.0540771585, 410.609069824]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048624_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[39.03808596389996, 51.86511232000001, 192.05407715849998, 255.60906982400002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048624.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a desk, a lamp, two air conditioners, a mirror, a chair, and a towel.", "boxes_value": [[354.03808596389996, 206.86511232, 507.0540771585, 410.609069824], [322.8812866101, 309.5392456192, 410.2724609471, 402.2498779136], [354.03808596389996, 249.505249024, 375.69592287100005, 300.0401611264], [340.7395019707, 195.8884277248, 404.9530029394, 225.1455077888], [350.23852540929994, 225.1065063424, 399.506225604, 312.5366821376], [442.5692138696, 206.86511232, 461.5672607468, 274.4982909952], [468.0266113313, 294.7207031296, 505.6428222615, 410.609069824], [474.7934570533, 297.2484130816, 507.0540771585, 328.4221801984]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4], [6], [7]]}, {"image_path": "objects365_v1_00048624_crop.jpg", "text": "Please describe what can be seen in the rectangular region of the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a desk, a lamp, two air conditioners, a mirror, a chair, and a towel.", "boxes_value": [[39.03808596389996, 51.86511232000001, 192.05407715849998, 255.60906982400002], [7.881286610100005, 154.53924561920002, 95.2724609471, 247.24987791360002], [39.03808596389996, 94.505249024, 60.69592287100005, 145.0401611264], [25.73950197070002, 40.88842772480001, 89.9530029394, 70.14550778879999], [35.23852540929994, 70.1065063424, 84.50622560400001, 157.5366821376], [127.56921386959999, 51.86511232000001, 146.5672607468, 119.49829099520002], [153.0266113313, 139.7207031296, 190.6428222615, 255.60906982400002], [159.79345705330002, 142.2484130816, 192.05407715849998, 173.42218019839999]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4], [6], [7]]}, {"image_path": "objects365_v1_00048625.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Specify the location of each mentioned object.", "boxes_value": [[227.8052368128, 314.0958252032, 360.7569580032, 449.6694336]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048625_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Specify the location of each mentioned object.", "boxes_value": [[33.80523681279999, 34.09582520319998, 166.7569580032, 169.6694336]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048625.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Specify the location of each mentioned object. For your reference, objects involved in this region include two people, a watch, and two bottles.", "boxes_value": [[227.8052368128, 314.0958252032, 360.7569580032, 449.6694336], [292.6678467072, 219.4729004032, 579.2139892224, 471.5867919872], [341.7791748096, 401.4286498816, 360.7569580032, 432.904052736], [197.767211904, 260.7991943168, 289.72399902719997, 394.4193115136], [296.41467287039995, 342.5596923904, 336.889404288, 449.6694336], [227.8052368128, 314.0958252032, 260.87597652479997, 420.2183837696]], "boxes_seq": [[0], [0], [1, 3], [2], [4, 5]]}, {"image_path": "objects365_v1_00048625_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Specify the location of each mentioned object. For your reference, objects involved in this region include two people, a watch, and two bottles.", "boxes_value": [[33.80523681279999, 34.09582520319998, 166.7569580032, 169.6694336], [98.6678467072, 0, 199, 191.5867919872], [147.77917480960002, 121.4286498816, 166.7569580032, 152.90405273599998], [3.7672119039999927, 0, 95.72399902719997, 114.41931151360001], [102.41467287039995, 62.559692390400016, 142.88940428799998, 169.6694336], [33.80523681279999, 34.09582520319998, 66.87597652479997, 140.2183837696]], "boxes_seq": [[0], [0], [1, 3], [2], [4, 5]]}, {"image_path": "objects365_v1_00048628.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Provide the coordinates for each element you describe.", "boxes_value": [[481.32006832499997, 111.70703125, 751.151245125, 497.236084]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048628_crop.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Provide the coordinates for each element you describe.", "boxes_value": [[68.32006832499997, 96.70703125, 337, 482.236084]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048628.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a chair, a desk, two people, and a blackboard.", "boxes_value": [[481.32006832499997, 111.70703125, 751.151245125, 497.236084], [579.264282225, 321.91137695, 738.5462646750001, 497.236084], [669.791503875, 365.4560547, 751.151245125, 494.94421385], [544.909179675, 113.9838867, 558.9746094, 147.30810545], [574.589843775, 111.70703125, 590.7700195499999, 148.2885132], [481.32006832499997, 115.51953125, 509.58374025, 143.5443115]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048628_crop.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a chair, a desk, two people, and a blackboard.", "boxes_value": [[68.32006832499997, 96.70703125, 337, 482.236084], [166.264282225, 306.91137695, 325.5462646750001, 482.236084], [256.791503875, 350.4560547, 337, 479.94421385], [131.90917967500002, 98.9838867, 145.97460939999996, 132.30810545], [161.58984377499996, 96.70703125, 177.77001954999992, 133.2885132], [68.32006832499997, 100.51953125, 96.58374025, 128.5443115]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048629.jpg", "text": "Describe the visual elements within the selected area of the image . Specify the location of each mentioned object.", "boxes_value": [[347.470248384, 169.236238464, 640.08288576, 481.037536608]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048629_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Specify the location of each mentioned object.", "boxes_value": [[73.470248384, 78.236238464, 366, 389]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048629.jpg", "text": "Describe the visual elements within the selected area of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include a desk, a person, a hat, a moniter, and two remotes.", "boxes_value": [[347.470248384, 169.236238464, 640.08288576, 481.037536608], [398.59692384, 332.430725088, 640.08288576, 481.037536608], [333.233154304, 170.22796632, 414.68395993599995, 436.963745136], [347.470248384, 169.236238464, 396.58722374399997, 200.34365616], [384.60461427200005, 182.68133544, 482.09973145600003, 248.46118166399998], [485.47814944000004, 387.99005126400004, 505.450439424, 406.8527832], [510.628417984, 414.249938976, 528.01171872, 436.811218272]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048629_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include a desk, a person, a hat, a moniter, and two remotes.", "boxes_value": [[73.470248384, 78.236238464, 366, 389], [124.59692383999999, 241.43072508799997, 366, 389], [59.23315430399998, 79.22796632000001, 140.68395993599995, 345.963745136], [73.470248384, 78.236238464, 122.58722374399997, 109.34365616], [110.60461427200005, 91.68133544, 208.09973145600003, 157.46118166399998], [211.47814944000004, 296.99005126400004, 231.45043942400002, 315.8527832], [236.628417984, 323.249938976, 254.01171871999998, 345.811218272]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048630.jpg", "text": "Please elucidate the area of the image . Specify the location of each mentioned object.", "boxes_value": [[433.96716308939995, 419.7498168832, 822.015258786, 480.495910656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048630_crop.jpg", "text": "Please elucidate the area of the image . Specify the location of each mentioned object.", "boxes_value": [[97.96716308939995, 15.749816883200026, 486, 76.49591065599998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048630.jpg", "text": "Please elucidate the area of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include a chair, and four cars.", "boxes_value": [[433.96716308939995, 419.7498168832, 822.015258786, 480.495910656], [433.96716308939995, 454.8135986176, 459.09118652819996, 480.495910656], [449.41564942739996, 433.2982788096, 639.5611572276, 490.2952270336], [578.8267822056, 428.1591796736, 716.1800537484, 480.0170898432], [652.1752929534, 428.6264037888, 774.1113281322, 475.8123779072], [719.917602507, 419.7498168832, 822.015258786, 466.4686279168]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048630_crop.jpg", "text": "Please elucidate the area of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include a chair, and four cars.", "boxes_value": [[97.96716308939995, 15.749816883200026, 486, 76.49591065599998], [97.96716308939995, 50.81359861760001, 123.09118652819996, 76.49591065599998], [113.41564942739996, 29.298278809599992, 303.5611572276, 86.29522703359999], [242.8267822056, 24.159179673600022, 380.18005374840004, 76.01708984319998], [316.1752929534, 24.62640378880002, 438.1113281322, 71.81237790720002], [383.91760250699997, 15.749816883200026, 486, 62.46862791680002]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048633.jpg", "text": "Can you give me a visual rundown of the area in ? Provide the coordinates for each element you describe.", "boxes_value": [[199.51818847500002, 285.638244608, 695.5585937475, 382.4927978496]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048633_crop.jpg", "text": "Can you give me a visual rundown of the area in ? Provide the coordinates for each element you describe.", "boxes_value": [[124.51818847500002, 24.63824460799998, 620.5585937475, 121.49279784959998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048633.jpg", "text": "Can you give me a visual rundown of the area in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a desk, two glasses, and two handbags.", "boxes_value": [[199.51818847500002, 285.638244608, 695.5585937475, 382.4927978496], [289.185058626, 311.5230102528, 335.726806662, 355.7832641536], [199.51818847500002, 358.4003906048, 225.61828613850003, 382.4927978496], [386.90356442399997, 320.9233398272, 422.3729248185, 343.6772461056], [661.511718705, 285.638244608, 695.5585937475, 324.0085449216], [593.648681661, 308.6702270464, 610.3247069954999, 339.9803466752]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048633_crop.jpg", "text": "Can you give me a visual rundown of the area in ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a desk, two glasses, and two handbags.", "boxes_value": [[124.51818847500002, 24.63824460799998, 620.5585937475, 121.49279784959998], [214.185058626, 50.52301025280002, 260.726806662, 94.78326415359999], [124.51818847500002, 97.40039060480001, 150.61828613850003, 121.49279784959998], [311.90356442399997, 59.92333982719998, 347.3729248185, 82.67724610559998], [586.511718705, 24.63824460799998, 620.5585937475, 63.00854492159999], [518.648681661, 47.67022704639999, 535.3247069954999, 78.9803466752]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048634.jpg", "text": "What's going on in the section of contained within the bounding box ? Include the coordinates for each mentioned object.", "boxes_value": [[336.354614288, 351.9279174656, 761.76757811, 402.1108398592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048634_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Include the coordinates for each mentioned object.", "boxes_value": [[106.354614288, 12.92791746559999, 531.76757811, 63.110839859199984]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048634.jpg", "text": "What's going on in the section of contained within the bounding box ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a potted plant, four people, and a van.", "boxes_value": [[336.354614288, 351.9279174656, 761.76757811, 402.1108398592], [336.354614288, 351.9279174656, 371.816040057, 399.9371337728], [373.566162123, 342.6778564608, 392.90551754300003, 396.9222412288], [686.768798793, 364.8472900608, 712.240112276, 402.1108398592], [740.541503933, 362.4888305664, 761.76757811, 401.1674804736], [715.07019045, 363.9039306752, 741.484863273, 401.1674804736], [438.020751938, 359.2613525504, 455.88769533999994, 378.256774912]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00048634_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a potted plant, four people, and a van.", "boxes_value": [[106.354614288, 12.92791746559999, 531.76757811, 63.110839859199984], [106.354614288, 12.92791746559999, 141.816040057, 60.937133772799996], [143.56616212300003, 3.677856460800001, 162.90551754300003, 57.922241228799976], [456.768798793, 25.847290060799992, 482.240112276, 63.110839859199984], [510.54150393299994, 23.488830566399997, 531.76757811, 62.167480473599994], [485.07019045000004, 24.9039306752, 511.484863273, 62.167480473599994], [208.020751938, 20.261352550399977, 225.88769533999994, 39.256774912000026]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00048635.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give coordinates for the items you reference.", "boxes_value": [[295.754028288, 323.818176256, 666.2923584, 512.7379150336]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048635_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give coordinates for the items you reference.", "boxes_value": [[92.75402828799997, 47.818176256000015, 463.2923584, 236]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048635.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give coordinates for the items you reference. For your reference, objects involved in this region include two chairs, a flower, a desk, and a vase.", "boxes_value": [[295.754028288, 323.818176256, 666.2923584, 512.7379150336], [295.754028288, 323.818176256, 395.23352048640004, 511.825317376], [440.8663329792, 345.72192384, 603.3190917888, 511.825317376], [588.7165527552, 452.5026855424, 666.2923584, 512.7379150336], [401.88684080639996, 388.396606464, 482.2912598016, 510.3893432832], [595.6008301056, 500.2004394496, 624.7127685888, 510.8945923072]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048635_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give coordinates for the items you reference. For your reference, objects involved in this region include two chairs, a flower, a desk, and a vase.", "boxes_value": [[92.75402828799997, 47.818176256000015, 463.2923584, 236], [92.75402828799997, 47.818176256000015, 192.23352048640004, 235.825317376], [237.86633297920002, 69.72192383999999, 400.31909178880005, 235.825317376], [385.71655275520004, 176.5026855424, 463.2923584, 236], [198.88684080639996, 112.396606464, 279.2912598016, 234.3893432832], [392.60083010560004, 224.20043944960003, 421.71276858880003, 234.8945923072]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048637.jpg", "text": "I need details about the area located within image . Give coordinates for the items you reference.", "boxes_value": [[212.8528442128, 57.293090816, 400.2553711186, 235.6718139904]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048637_crop.jpg", "text": "I need details about the area located within image . Give coordinates for the items you reference.", "boxes_value": [[46.852844212799994, 45.293090816, 234.2553711186, 223.6718139904]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048637.jpg", "text": "I need details about the area located within image . Give coordinates for the items you reference. For your reference, objects involved in this region include two mirrors, and three people.", "boxes_value": [[212.8528442128, 57.293090816, 400.2553711186, 235.6718139904], [277.51245120560003, 57.293090816, 333.72180173360005, 98.5897827328], [343.4724121254, 172.5796508672, 400.2553711186, 235.6718139904], [199.80938722259998, 162.7381591552, 563.514404326, 504.1614990336001], [283.7910156126, 130.5180663808, 354.21313477580003, 200.6823119872], [212.8528442128, 124.8430175744, 295.91491695499997, 203.777771008]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048637_crop.jpg", "text": "I need details about the area located within image . Give coordinates for the items you reference. For your reference, objects involved in this region include two mirrors, and three people.", "boxes_value": [[46.852844212799994, 45.293090816, 234.2553711186, 223.6718139904], [111.51245120560003, 45.293090816, 167.72180173360005, 86.5897827328], [177.4724121254, 160.5796508672, 234.2553711186, 223.6718139904], [33.80938722259998, 150.7381591552, 281, 268], [117.7910156126, 118.51806638080001, 188.21313477580003, 188.6823119872], [46.852844212799994, 112.8430175744, 129.91491695499997, 191.777771008]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048638.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Give coordinates for the items you reference.", "boxes_value": [[227.62103271840002, 0, 521.6879882538001, 133.6505126912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048638_crop.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Give coordinates for the items you reference.", "boxes_value": [[73.62103271840002, 0, 367.6879882538001, 133.6505126912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048638.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Give coordinates for the items you reference. For your reference, objects involved in this region include two cabinets, a microwave, a bottle, and a pot.", "boxes_value": [[227.62103271840002, 0, 521.6879882538001, 133.6505126912], [322.529907231, 0.4125976576, 496.14367678019994, 68.7006835712], [227.62103271840002, 0, 323.1086425884, 67.5432739328], [329.70043944, 93.1503906304, 398.1210937668, 133.6505126912], [247.5843505506, 100.2677001728, 265.7756347794, 126.505249024], [501.62121580139996, 0, 521.6879882538001, 54.249206528]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048638_crop.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Give coordinates for the items you reference. For your reference, objects involved in this region include two cabinets, a microwave, a bottle, and a pot.", "boxes_value": [[73.62103271840002, 0, 367.6879882538001, 133.6505126912], [168.52990723099998, 0.4125976576, 342.14367678019994, 68.7006835712], [73.62103271840002, 0, 169.10864258840002, 67.5432739328], [175.70043944000003, 93.1503906304, 244.12109376680002, 133.6505126912], [93.58435055059999, 100.2677001728, 111.77563477939998, 126.505249024], [347.62121580139996, 0, 367.6879882538001, 54.249206528]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048639.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Provide the coordinates for each element you describe.", "boxes_value": [[287.00463870510004, 243.0058593792, 427.9543457196, 317.6469116416]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048639_crop.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Provide the coordinates for each element you describe.", "boxes_value": [[36.004638705100035, 19.00585937919999, 176.95434571959998, 93.64691164160001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048639.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Provide the coordinates for each element you describe. For your reference, objects involved in this region include three chairs, and two desks.", "boxes_value": [[287.00463870510004, 243.0058593792, 427.9543457196, 317.6469116416], [283.3864746179, 264.4045410304, 347.7209472489, 373.8471679488], [325.5366821497, 267.3624267776, 427.9543457196, 317.6469116416], [287.00463870510004, 243.0058593792, 339.6131591577, 304.1712036352], [317.7457275135, 245.541259776, 392.2216796786, 317.4818115072], [408.8729248275, 237.9570312704, 421.91186520959997, 304.3159179776]], "boxes_seq": [[0], [0], [1, 3, 5], [2, 4]]}, {"image_path": "objects365_v1_00048639_crop.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Provide the coordinates for each element you describe. For your reference, objects involved in this region include three chairs, and two desks.", "boxes_value": [[36.004638705100035, 19.00585937919999, 176.95434571959998, 93.64691164160001], [32.38647461789998, 40.404541030400026, 96.72094724890002, 112], [74.53668214970003, 43.36242677759998, 176.95434571959998, 93.64691164160001], [36.004638705100035, 19.00585937919999, 88.61315915770001, 80.17120363520002], [66.74572751350001, 21.541259776000004, 141.22167967860003, 93.4818115072], [157.87292482750001, 13.957031270399995, 170.91186520959997, 80.31591797760001]], "boxes_seq": [[0], [0], [1, 3, 5], [2, 4]]}, {"image_path": "objects365_v1_00048640.jpg", "text": "Please provide information about the area within the bounding box in the picture . Specify the location of each mentioned object.", "boxes_value": [[218.0705566677, 81.6125488128, 374.16540524600003, 152.250366208]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048640_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Specify the location of each mentioned object.", "boxes_value": [[39.070556667700004, 18.6125488128, 195.16540524600003, 89.250366208]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048640.jpg", "text": "Please provide information about the area within the bounding box in the picture . Specify the location of each mentioned object. For your reference, objects involved in this region include two people, two hats, and a scale.", "boxes_value": [[218.0705566677, 81.6125488128, 374.16540524600003, 152.250366208], [218.0705566677, 95.3306274304, 269.97845455920003, 152.250366208], [274.5891723459, 81.6125488128, 317.4451904385, 136.766418432], [339.90832518900004, 115.81030272, 374.16540524600003, 133.4843749888], [225.5008545199, 95.7406616064, 252.5355224592, 116.3385619968], [254.1599731425, 116.3375854592, 295.9599609499, 147.5375976448]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048640_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Specify the location of each mentioned object. For your reference, objects involved in this region include two people, two hats, and a scale.", "boxes_value": [[39.070556667700004, 18.6125488128, 195.16540524600003, 89.250366208], [39.070556667700004, 32.3306274304, 90.97845455920003, 89.250366208], [95.58917234590001, 18.6125488128, 138.4451904385, 73.766418432], [160.90832518900004, 52.810302719999996, 195.16540524600003, 70.4843749888], [46.5008545199, 32.740661606399996, 73.5355224592, 53.338561996799996], [75.15997314250001, 53.3375854592, 116.9599609499, 84.5375976448]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048641.jpg", "text": "Please provide details for the area marked as in this photographic . Please mention the objects and their locations.", "boxes_value": [[238.3479614433, 451.7330932736, 335.2230224363, 487.8731079168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048641_crop.jpg", "text": "Please provide details for the area marked as in this photographic . Please mention the objects and their locations.", "boxes_value": [[24.347961443299994, 9.73309327359999, 121.22302243630003, 45.873107916799995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048641.jpg", "text": "Please provide details for the area marked as in this photographic . Please mention the objects and their locations. For your reference, objects involved in this region include a cabinet, four plates, and a cookies.", "boxes_value": [[238.3479614433, 451.7330932736, 335.2230224363, 487.8731079168], [220.30908206450002, 381.9817504768, 495.53833004599994, 511.7701415936], [247.46691894539998, 443.3435058688, 277.8493042132, 464.3218383872], [299.6145019586, 451.7330932736, 335.2230224363, 466.614318848], [275.1668090777, 475.1177978368, 317.68450926730003, 487.8731079168], [238.3479614433, 473.556579584, 265.3142089697, 484.74468992], [239.5422973844, 435.535278336, 273.2811889613, 482.8704223744]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00048641_crop.jpg", "text": "Please provide details for the area marked as in this photographic . Please mention the objects and their locations. For your reference, objects involved in this region include a cabinet, four plates, and a cookies.", "boxes_value": [[24.347961443299994, 9.73309327359999, 121.22302243630003, 45.873107916799995], [6.309082064500018, 0, 145, 54], [33.46691894539998, 1.34350586879998, 63.849304213200014, 22.32183838719999], [85.61450195859999, 9.73309327359999, 121.22302243630003, 24.614318847999982], [61.16680907770001, 33.11779783679998, 103.68450926730003, 45.873107916799995], [24.347961443299994, 31.55657958400002, 51.31420896970002, 42.744689919999985], [25.5422973844, 0, 59.28118896130002, 40.87042237439999]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00048642.jpg", "text": "Please tell me more about the rectangular section in the photo . Provide the coordinates for all objects that you mention.", "boxes_value": [[179.2199096637, 188.2734374912, 316.1970214593, 287.9674072064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048642_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Provide the coordinates for all objects that you mention.", "boxes_value": [[35.21990966370001, 25.273437491200013, 172.1970214593, 124.96740720640003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048642.jpg", "text": "Please tell me more about the rectangular section in the photo . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three people, a gloves, a belt, and a boots.", "boxes_value": [[179.2199096637, 188.2734374912, 316.1970214593, 287.9674072064], [1.058227569, 0.3712768512, 360.9328613199, 512.0063476736], [213.77917480530002, 188.2734374912, 316.1970214593, 287.9674072064], [170.1971435379, 159.9451293696, 229.0328979384, 240.0271606272], [179.2199096637, 213.8012695552, 200.6080322313, 231.7326660096], [231.7333984521, 228.0556030464, 268.3986816276, 247.5786743296], [279.112548822, 258.5306396672, 302.92126467090003, 286.3867187712]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048642_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three people, a gloves, a belt, and a boots.", "boxes_value": [[35.21990966370001, 25.273437491200013, 172.1970214593, 124.96740720640003], [0, 0, 206, 149], [69.77917480530002, 25.273437491200013, 172.1970214593, 124.96740720640003], [26.19714353789999, 0, 85.0328979384, 77.0271606272], [35.21990966370001, 50.80126955520001, 56.6080322313, 68.7326660096], [87.7333984521, 65.0556030464, 124.39868162760001, 84.5786743296], [135.112548822, 95.53063966719998, 158.92126467090003, 123.38671877119998]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048645.jpg", "text": "Can you elaborate on the content of the bounding box in ? Include the coordinates for each mentioned object.", "boxes_value": [[0.1208496128, 609.8630370815999, 443.4180297728, 768.3408202752]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048645_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Include the coordinates for each mentioned object.", "boxes_value": [[0.1208496128, 39.86303708159994, 443.4180297728, 198]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048645.jpg", "text": "Can you elaborate on the content of the bounding box in ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a leather shoes, two sneakers, and two boots.", "boxes_value": [[0.1208496128, 609.8630370815999, 443.4180297728, 768.3408202752], [33.8008422912, 591.9167480832, 70.120727552, 626.9548339968001], [135.0691528192, 609.8630370815999, 173.5254516736, 634.6459961088], [310.444457984, 661.962036096, 363.6339111424, 766.8354491904], [383.2036132864, 658.4494629120001, 443.4180297728, 768.3408202752], [0.1208496128, 662.5106201088, 18.6271972864, 688.4792480256]], "boxes_seq": [[0], [0], [1], [2, 5], [3, 4]]}, {"image_path": "objects365_v1_00048645_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a leather shoes, two sneakers, and two boots.", "boxes_value": [[0.1208496128, 39.86303708159994, 443.4180297728, 198], [33.8008422912, 21.91674808319999, 70.120727552, 56.95483399680006], [135.0691528192, 39.86303708159994, 173.5254516736, 64.64599610879998], [310.444457984, 91.96203609600002, 363.6339111424, 196.83544919040003], [383.2036132864, 88.44946291200006, 443.4180297728, 198], [0.1208496128, 92.51062010880003, 18.6271972864, 118.47924802559999]], "boxes_seq": [[0], [0], [1], [2, 5], [3, 4]]}, {"image_path": "objects365_v1_00048648.jpg", "text": "Please describe the content within the area displayed in the image . Specify the location of each mentioned object.", "boxes_value": [[347.9532470836, 205.1867675648, 477.8715820028, 440.2412719616]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048648_crop.jpg", "text": "Please describe the content within the area displayed in the image . Specify the location of each mentioned object.", "boxes_value": [[32.95324708359999, 59.18676756479999, 162.87158200279998, 294.2412719616]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048648.jpg", "text": "Please describe the content within the area displayed in the image . Specify the location of each mentioned object. For your reference, objects involved in this region include a person, a helmet, two gloves, and two boots.", "boxes_value": [[347.9532470836, 205.1867675648, 477.8715820028, 440.2412719616], [347.9532470836, 205.1867675648, 477.8715820028, 440.2412719616], [378.70935057919996, 205.2434081792, 423.87182620839997, 260.1243286016], [416.5217285416, 300.7133789184, 451.8022461172, 337.9540405248], [347.43041988799996, 334.0339965952, 387.611206074, 375.6846923776], [404.71289060320004, 394.8493042176, 442.3592528992, 438.667236352], [366.4493407868, 373.8660888576, 391.7526855344, 423.5469970944]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00048648_crop.jpg", "text": "Please describe the content within the area displayed in the image . Specify the location of each mentioned object. For your reference, objects involved in this region include a person, a helmet, two gloves, and two boots.", "boxes_value": [[32.95324708359999, 59.18676756479999, 162.87158200279998, 294.2412719616], [32.95324708359999, 59.18676756479999, 162.87158200279998, 294.2412719616], [63.70935057919996, 59.2434081792, 108.87182620839997, 114.12432860159998], [101.5217285416, 154.7133789184, 136.8022461172, 191.9540405248], [32.43041988799996, 188.0339965952, 72.611206074, 229.68469237760002], [89.71289060320004, 248.84930421759998, 127.35925289919999, 292.667236352], [51.44934078680001, 227.8660888576, 76.75268553439997, 277.5469970944]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00048649.jpg", "text": "What sort of things can be seen in the region of the photo ? Provide the coordinates for each element you describe.", "boxes_value": [[176.4122314487, 19.7603759616, 541.1768799078, 162.8716430848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048649_crop.jpg", "text": "What sort of things can be seen in the region of the photo ? Provide the coordinates for each element you describe.", "boxes_value": [[91.4122314487, 19.7603759616, 456.17687990779996, 162.8716430848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048649.jpg", "text": "What sort of things can be seen in the region of the photo ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a baseball bat, a person, two helmets, and a hat.", "boxes_value": [[176.4122314487, 19.7603759616, 541.1768799078, 162.8716430848], [220.44085689730002, 19.7603759616, 277.35241699799997, 109.9860229632], [299.7806396659, 32.978271488, 363.7103271581, 161.5897827328], [176.4122314487, 25.0087890432, 206.7570190498, 59.47448729599999], [234.42590335510002, 141.6784057856, 261.39904782959997, 162.8716430848], [488.50964358939996, 101.7703857664, 541.1768799078, 159.0173339648]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4]]}, {"image_path": "objects365_v1_00048649_crop.jpg", "text": "What sort of things can be seen in the region of the photo ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a baseball bat, a person, two helmets, and a hat.", "boxes_value": [[91.4122314487, 19.7603759616, 456.17687990779996, 162.8716430848], [135.44085689730002, 19.7603759616, 192.35241699799997, 109.9860229632], [214.78063966590003, 32.978271488, 278.7103271581, 161.5897827328], [91.4122314487, 25.0087890432, 121.7570190498, 59.47448729599999], [149.42590335510002, 141.6784057856, 176.39904782959997, 162.8716430848], [403.50964358939996, 101.7703857664, 456.17687990779996, 159.0173339648]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4]]}, {"image_path": "objects365_v1_00048650.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Please point out the objects and their coordinates.", "boxes_value": [[131.97290039, 417.2413940224, 217.2479248307, 511.5774536192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048650_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Please point out the objects and their coordinates.", "boxes_value": [[21.972900390000007, 24.241394022400016, 107.24792483069999, 118.57745361920001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048650.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Please point out the objects and their coordinates. For your reference, objects involved in this region include a desk, a carpet, a person, a leather shoes, and a converter.", "boxes_value": [[131.97290039, 417.2413940224, 217.2479248307, 511.5774536192], [90.9584350489, 289.6892089856, 403.6723632549, 510.2430419968], [158.5447998352, 299.8577270272, 622.9616699036001, 509.9304199168], [0.3615722772, 88.8601074176, 237.41265871619999, 468.5018310656001], [163.9080810353, 417.2413940224, 217.2479248307, 463.0635376128], [131.97290039, 481.4239501824, 163.0687256018, 511.5774536192]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048650_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Please point out the objects and their coordinates. For your reference, objects involved in this region include a desk, a carpet, a person, a leather shoes, and a converter.", "boxes_value": [[21.972900390000007, 24.241394022400016, 107.24792483069999, 118.57745361920001], [0, 0, 128, 117.2430419968], [48.544799835199996, 0, 128, 116.93041991680002], [0, 0, 127.41265871619999, 75.50183106560007], [53.9080810353, 24.241394022400016, 107.24792483069999, 70.06353761280002], [21.972900390000007, 88.4239501824, 53.0687256018, 118.57745361920001]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048651.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Please mention the objects and their locations.", "boxes_value": [[284.19964601699996, 0.8891601408, 570.5803222859, 256.0556640768]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048651_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Please mention the objects and their locations.", "boxes_value": [[72.19964601699996, 0.8891601408, 358.5803222859, 256.0556640768]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048651.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Please mention the objects and their locations. For your reference, objects involved in this region include two people, a helmet, a boots, a glasses, and two horses.", "boxes_value": [[284.19964601699996, 0.8891601408, 570.5803222859, 256.0556640768], [390.609130885, 0.8891601408, 570.5803222859, 253.158203136], [190.7423706395, 0.0546264576, 355.9752197161, 260.4938964992], [427.00793454390003, 1.9731445248, 482.5043945513, 45.6159668224], [284.19964601699996, 148.5024414208, 350.0296630531, 256.0556640768], [439.4052734684, 48.8217773568, 469.7893066103, 62.2910156288], [122.43206785769999, 24.2135620096, 441.9403076062, 511.9122925056], [375.61950685029996, 98.7783813632, 645.3769531277001, 495.4882202112]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00048651_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Please mention the objects and their locations. For your reference, objects involved in this region include two people, a helmet, a boots, a glasses, and two horses.", "boxes_value": [[72.19964601699996, 0.8891601408, 358.5803222859, 256.0556640768], [178.609130885, 0.8891601408, 358.5803222859, 253.158203136], [0, 0.0546264576, 143.9752197161, 260.4938964992], [215.00793454390003, 1.9731445248, 270.5043945513, 45.6159668224], [72.19964601699996, 148.5024414208, 138.02966305310002, 256.0556640768], [227.40527346840003, 48.8217773568, 257.7893066103, 62.2910156288], [0, 24.2135620096, 229.9403076062, 319], [163.61950685029996, 98.7783813632, 430, 319]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00048652.jpg", "text": "What does the selected region in the image encompass? Remember to mention the objects and their corresponding locations.", "boxes_value": [[251.13409424, 62.15756226, 399.8451538, 300.51965330999997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048652_crop.jpg", "text": "What does the selected region in the image encompass? Remember to mention the objects and their corresponding locations.", "boxes_value": [[38.134094239999996, 60.15756226, 186.8451538, 298]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048652.jpg", "text": "What does the selected region in the image encompass? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a desk, a carpet, a chair, a person, and a sneakers.", "boxes_value": [[251.13409424, 62.15756226, 399.8451538, 300.51965330999997], [371.88787840000003, 62.15756226, 399.8451538, 103.03973388], [220.26916504, 93.00631713, 382.33422852, 154.02679443000002], [255.27563476, 110.45288085, 315.60168455999997, 172.43173217999998], [251.13409424, 124.87426758000001, 399.79949952, 300.51965330999997], [270.87469484, 215.43054198, 314.16101076, 239.07769776]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048652_crop.jpg", "text": "What does the selected region in the image encompass? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a desk, a carpet, a chair, a person, and a sneakers.", "boxes_value": [[38.134094239999996, 60.15756226, 186.8451538, 298], [158.88787840000003, 60.15756226, 186.8451538, 101.03973388], [7.26916503999999, 91.00631713, 169.33422852, 152.02679443000002], [42.27563476, 108.45288085, 102.60168455999997, 170.43173217999998], [38.134094239999996, 122.87426758000001, 186.79949951999998, 298], [57.87469484000002, 213.43054198, 101.16101076000001, 237.07769776]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048653.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[208.5678100402, 201.5804443135, 529.6563720786, 278.3156127857]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048653_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[80.56781004019999, 19.58044431350001, 401.65637207860004, 96.31561278570001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048653.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four potted plants, and an umbrella.", "boxes_value": [[208.5678100402, 201.5804443135, 529.6563720786, 278.3156127857], [311.6022949169, 219.86773683069998, 336.723754874, 272.72320558629997], [384.15295407969995, 220.4706420714, 405.25500491709994, 266.49310303979996], [471.37463377160003, 224.8920287936, 493.68249512660003, 263.8804931553], [515.7893066066, 229.9163208076, 529.6563720786, 263.0765991462], [208.5678100402, 201.5804443135, 302.1308593511, 278.3156127857]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048653_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four potted plants, and an umbrella.", "boxes_value": [[80.56781004019999, 19.58044431350001, 401.65637207860004, 96.31561278570001], [183.6022949169, 37.86773683069998, 208.723754874, 90.72320558629997], [256.15295407969995, 38.47064207139999, 277.25500491709994, 84.49310303979996], [343.37463377160003, 42.89202879359999, 365.68249512660003, 81.88049315529997], [387.7893066066, 47.91632080759999, 401.65637207860004, 81.07659914620001], [80.56781004019999, 19.58044431350001, 174.1308593511, 96.31561278570001]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048654.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Give coordinates for the items you reference.", "boxes_value": [[303.9777831883, 148.3961181696, 509.9127197015, 428.0355224576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048654_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Give coordinates for the items you reference.", "boxes_value": [[51.97778318830001, 70.39611816959999, 257.9127197015, 350.0355224576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048654.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Give coordinates for the items you reference. For your reference, objects involved in this region include two bottles, and three vases.", "boxes_value": [[303.9777831883, 148.3961181696, 509.9127197015, 428.0355224576], [303.9777831883, 375.4198608384, 331.0828247156, 428.0355224576], [465.8713378973, 326.7493896704, 481.96997068310003, 367.2163085824], [461.0502929926, 252.192321792, 509.9127197015, 285.0716552704], [384.3055420021, 230.9829711872, 396.9147949268, 258.1717529088], [433.281860327, 148.3961181696, 449.65930177219997, 181.1508789248]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048654_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Give coordinates for the items you reference. For your reference, objects involved in this region include two bottles, and three vases.", "boxes_value": [[51.97778318830001, 70.39611816959999, 257.9127197015, 350.0355224576], [51.97778318830001, 297.4198608384, 79.0828247156, 350.0355224576], [213.8713378973, 248.74938967039998, 229.96997068310003, 289.2163085824], [209.05029299260002, 174.192321792, 257.9127197015, 207.07165527040002], [132.30554200210003, 152.9829711872, 144.91479492680003, 180.1717529088], [181.281860327, 70.39611816959999, 197.65930177219997, 103.1508789248]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048656.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Provide the coordinates for all objects that you mention.", "boxes_value": [[316.8481445376, 161.703674286, 511.9924926976, 322.68981929800003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048656_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Provide the coordinates for all objects that you mention.", "boxes_value": [[48.84814453759998, 40.703674285999995, 243.99249269760003, 201.68981929800003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048656.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a cabinet, two breads, and two canneds.", "boxes_value": [[316.8481445376, 161.703674286, 511.9924926976, 322.68981929800003], [0.6581421056, 0, 509.4617309696, 768.469604518], [397.5058593792, 161.703674286, 477.0371093504, 208.41094966999998], [316.8481445376, 176.84014894700002, 389.110534656, 205.320007354], [447.1335449088, 275.71923825600004, 493.3335571456, 339.919189456], [492.9335327232, 278.668701157, 511.9924926976, 322.68981929800003]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048656_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a cabinet, two breads, and two canneds.", "boxes_value": [[48.84814453759998, 40.703674285999995, 243.99249269760003, 201.68981929800003], [0, 0, 241.4617309696, 241], [129.5058593792, 40.703674285999995, 209.0371093504, 87.41094966999998], [48.84814453759998, 55.84014894700002, 121.11053465600003, 84.32000735400001], [179.1335449088, 154.71923825600004, 225.33355714560003, 218.91918945600003], [224.93353272320002, 157.668701157, 243.99249269760003, 201.68981929800003]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048657.jpg", "text": "I'd like some information about the specific region in the image . Specify the location of each mentioned object.", "boxes_value": [[247.878601047, 168.2459716608, 750.907226531, 493.9832153088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048657_crop.jpg", "text": "I'd like some information about the specific region in the image . Specify the location of each mentioned object.", "boxes_value": [[125.87860104699999, 82.2459716608, 628.907226531, 407.9832153088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048657.jpg", "text": "I'd like some information about the specific region in the image . Specify the location of each mentioned object. For your reference, objects involved in this region include a drum, a hat, three speakers, a tripod, and a microphone.", "boxes_value": [[247.878601047, 168.2459716608, 750.907226531, 493.9832153088], [724.38891602, 381.1134033408, 750.907226531, 416.659240704], [442.47863767800004, 219.1928100352, 462.815673859, 234.0557861376], [247.878601047, 403.8307494912, 402.363281243, 493.9832153088], [488.577026399, 400.76727296, 578.729614232, 474.7273559552], [612.789062501, 391.0411377152, 740.5959472449999, 460.008605952], [571.215576162, 208.5647583232, 676.6485595869999, 472.146972672], [317.55529782900004, 168.2459716608, 353.425781247, 179.8630981632]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00048657_crop.jpg", "text": "I'd like some information about the specific region in the image . Specify the location of each mentioned object. For your reference, objects involved in this region include a drum, a hat, three speakers, a tripod, and a microphone.", "boxes_value": [[125.87860104699999, 82.2459716608, 628.907226531, 407.9832153088], [602.38891602, 295.1134033408, 628.907226531, 330.659240704], [320.47863767800004, 133.1928100352, 340.815673859, 148.0557861376], [125.87860104699999, 317.8307494912, 280.363281243, 407.9832153088], [366.577026399, 314.76727296, 456.729614232, 388.7273559552], [490.789062501, 305.0411377152, 618.5959472449999, 374.008605952], [449.21557616200005, 122.56475832320001, 554.6485595869999, 386.146972672], [195.55529782900004, 82.2459716608, 231.42578124699997, 93.8630981632]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00048658.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Specify the location of each mentioned object.", "boxes_value": [[446.2866210669, 247.93670656, 645.9886474496, 394.271362304]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048658_crop.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Specify the location of each mentioned object.", "boxes_value": [[50.28662106690001, 36.936706560000005, 249.98864744959997, 183.27136230399998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048658.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Specify the location of each mentioned object. For your reference, objects involved in this region include a potted plant, a suv, a motorcycle, a car, and two street lights.", "boxes_value": [[446.2866210669, 247.93670656, 645.9886474496, 394.271362304], [600.4534912348, 322.9817504768, 645.9886474496, 374.3299560448], [472.49365235010004, 284.9828491264, 599.3322753613, 394.271362304], [446.2866210669, 326.8543090688, 482.33227540949997, 353.587341312], [463.8691406089, 305.9163818496, 489.6568603658, 341.5315551744], [482.62036132620005, 231.3281249792, 499.7825927885, 296.6552123904], [503.3811034824, 247.93670656, 516.11437985, 288.62774656]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048658_crop.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Specify the location of each mentioned object. For your reference, objects involved in this region include a potted plant, a suv, a motorcycle, a car, and two street lights.", "boxes_value": [[50.28662106690001, 36.936706560000005, 249.98864744959997, 183.27136230399998], [204.45349123480003, 111.98175047680002, 249.98864744959997, 163.32995604479999], [76.49365235010004, 73.98284912640003, 203.33227536130005, 183.27136230399998], [50.28662106690001, 115.85430906879998, 86.33227540949997, 142.58734131199998], [67.86914060890001, 94.91638184959999, 93.65686036580001, 130.5315551744], [86.62036132620005, 20.3281249792, 103.78259278849998, 85.65521239039998], [107.38110348240002, 36.936706560000005, 120.11437984999998, 77.62774655999999]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048659.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Provide the coordinates for all objects that you mention.", "boxes_value": [[110.4666748416, 374.1419677696, 372.069702144, 412.9503783936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048659_crop.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Provide the coordinates for all objects that you mention.", "boxes_value": [[65.4666748416, 10.1419677696, 327.069702144, 48.950378393599976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048659.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a paddle, three people, and two boats.", "boxes_value": [[110.4666748416, 374.1419677696, 372.069702144, 412.9503783936], [307.90466311679995, 388.2854003712, 372.069702144, 402.0350951936], [161.9551391232, 369.6481933824, 190.674194304, 397.5824585216], [207.9486084096, 369.9154052608, 235.0138550016, 395.7750244352], [340.3041992448, 374.1419677696, 366.4709472768, 405.9717407232], [110.4666748416, 389.0473632768, 274.0698242304, 406.5762329088], [116.30969241599999, 406.045043968, 267.69567874560005, 412.9503783936]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00048659_crop.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a paddle, three people, and two boats.", "boxes_value": [[65.4666748416, 10.1419677696, 327.069702144, 48.950378393599976], [262.90466311679995, 24.285400371200012, 327.069702144, 38.0350951936], [116.95513912320001, 5.648193382399995, 145.674194304, 33.58245852160002], [162.9486084096, 5.915405260800014, 190.0138550016, 31.77502443520001], [295.3041992448, 10.1419677696, 321.4709472768, 41.971740723200014], [65.4666748416, 25.047363276800013, 229.0698242304, 42.57623290880002], [71.30969241599999, 42.045043968000016, 222.69567874560005, 48.950378393599976]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00048660.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Include the coordinates for each mentioned object.", "boxes_value": [[641.2681885112, 98.4770507776, 787.9334716816, 251.404785152]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048660_crop.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Include the coordinates for each mentioned object.", "boxes_value": [[37.26818851120004, 38.4770507776, 183.93347168160005, 191.404785152]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048660.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two lamps, a mirror, a cabinet, a kettle, and a moniter.", "boxes_value": [[641.2681885112, 98.4770507776, 787.9334716816, 251.404785152], [641.2681885112, 126.8475341824, 687.3702392252, 174.4694213632], [701.3021240624, 98.4770507776, 787.9334716816, 208.6659546112], [733.0601806748, 96.5871582208, 787.743286148, 426.0704955904], [705.7414550768, 130.772521984, 720.4094237948, 159.8927002112], [663.6348876792, 220.4223022592, 685.0275878788, 251.404785152], [715.1811523812, 183.2599487488, 739.3867187356, 251.0051269632]], "boxes_seq": [[0], [0], [1, 4], [2], [3], [5], [6]]}, {"image_path": "objects365_v1_00048660_crop.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two lamps, a mirror, a cabinet, a kettle, and a moniter.", "boxes_value": [[37.26818851120004, 38.4770507776, 183.93347168160005, 191.404785152], [37.26818851120004, 66.8475341824, 83.37023922519995, 114.46942136320001], [97.30212406240003, 38.4770507776, 183.93347168160005, 148.6659546112], [129.0601806748, 36.587158220800006, 183.74328614800004, 229], [101.74145507679998, 70.77252198400001, 116.40942379479998, 99.89270021120001], [59.634887679200006, 160.4223022592, 81.02758787879998, 191.404785152], [111.18115238120004, 123.25994874880001, 135.38671873559997, 191.0051269632]], "boxes_seq": [[0], [0], [1, 4], [2], [3], [5], [6]]}, {"image_path": "objects365_v1_00048666.jpg", "text": "I'd like a thorough description of the area in the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[296.8961181696, 263.4996337664, 520.608032256, 385.9916992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048666_crop.jpg", "text": "I'd like a thorough description of the area in the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[56.896118169600015, 31.499633766399995, 280.608032256, 153.99169920000003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048666.jpg", "text": "I'd like a thorough description of the area in the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a chair, two pillows, and two cats.", "boxes_value": [[296.8961181696, 263.4996337664, 520.608032256, 385.9916992], [296.8961181696, 263.4996337664, 380.5217285376, 376.798767104], [447.38757327359997, 286.8389282304, 480.52954099199997, 316.8978271232], [507.5054931456, 277.5900268544, 520.608032256, 315.356384256], [398.17150878719997, 345.547241216, 464.53454592, 385.9916992], [365.7102051072, 337.8923339776, 438.2078857728, 380.5361938432]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048666_crop.jpg", "text": "I'd like a thorough description of the area in the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a chair, two pillows, and two cats.", "boxes_value": [[56.896118169600015, 31.499633766399995, 280.608032256, 153.99169920000003], [56.896118169600015, 31.499633766399995, 140.52172853759998, 144.79876710399998], [207.38757327359997, 54.83892823039997, 240.52954099199997, 84.89782712319999], [267.5054931456, 45.59002685439998, 280.608032256, 83.35638425600001], [158.17150878719997, 113.54724121599997, 224.53454592000003, 153.99169920000003], [125.71020510720001, 105.8923339776, 198.2078857728, 148.5361938432]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048668.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[177.4794311186, 336.5677910016, 302.10575921139997, 414.083984384]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048668_crop.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[31.47943111859999, 19.56779100160003, 156.10575921139997, 97.08398438400002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048668.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a bench, a desk, a carpet, and two strollers.", "boxes_value": [[177.4794311186, 336.5677910016, 302.10575921139997, 414.083984384], [177.4794311186, 378.4860229632, 234.9877319646, 414.083984384], [140.5835571347, 362.6239624192, 204.6483154562, 405.0559081984], [29.5025024584, 380.5889282048, 319.3809204208, 472.6225586176], [253.575887593, 336.5677910016, 280.1396068155, 378.4567328256], [268.90111023450004, 340.1436762624, 302.10575921139997, 378.4567328256]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048668_crop.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a bench, a desk, a carpet, and two strollers.", "boxes_value": [[31.47943111859999, 19.56779100160003, 156.10575921139997, 97.08398438400002], [31.47943111859999, 61.486022963200014, 88.98773196459999, 97.08398438400002], [0, 45.623962419199984, 58.64831545620001, 88.0559081984], [0, 63.58892820480003, 173.3809204208, 116], [107.575887593, 19.56779100160003, 134.13960681549997, 61.4567328256], [122.90111023450004, 23.143676262399993, 156.10575921139997, 61.4567328256]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048671.jpg", "text": "What objects or scenery can be found in the area in the image ? Please point out the objects and their coordinates.", "boxes_value": [[28.5974121216, 329.1519775232, 327.8559570432, 464.1173095936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048671_crop.jpg", "text": "What objects or scenery can be found in the area in the image ? Please point out the objects and their coordinates.", "boxes_value": [[28.5974121216, 34.1519775232, 327.8559570432, 169.1173095936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048671.jpg", "text": "What objects or scenery can be found in the area in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, a trolley, a motorcycle, and a tricycle.", "boxes_value": [[28.5974121216, 329.1519775232, 327.8559570432, 464.1173095936], [28.5974121216, 329.1519775232, 75.0518798592, 464.1173095936], [144.38012697599999, 340.7361449984, 192.6802368, 450.7660522496], [56.0469970944, 402.0919189504, 151.03540039680001, 455.633422848], [113.46417239040001, 365.601928704, 228.8807373312, 445.8799438336], [189.512756352, 359.254882816, 327.8559570432, 435.0521850368]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048671_crop.jpg", "text": "What objects or scenery can be found in the area in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, a trolley, a motorcycle, and a tricycle.", "boxes_value": [[28.5974121216, 34.1519775232, 327.8559570432, 169.1173095936], [28.5974121216, 34.1519775232, 75.0518798592, 169.1173095936], [144.38012697599999, 45.73614499839999, 192.6802368, 155.76605224960002], [56.0469970944, 107.09191895039999, 151.03540039680001, 160.633422848], [113.46417239040001, 70.60192870399999, 228.8807373312, 150.8799438336], [189.512756352, 64.25488281600002, 327.8559570432, 140.05218503679998]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048674.jpg", "text": "What is taking place within the specified area in this capture ? Give coordinates for the items you reference.", "boxes_value": [[73.28979494410001, 105.6138305536, 449.85559084970004, 347.445312512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048674_crop.jpg", "text": "What is taking place within the specified area in this capture ? Give coordinates for the items you reference.", "boxes_value": [[73.28979494410001, 60.613830553599996, 449.85559084970004, 302.445312512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048674.jpg", "text": "What is taking place within the specified area in this capture ? Give coordinates for the items you reference. For your reference, objects involved in this region include a van, a chair, four lamps, and a desk.", "boxes_value": [[73.28979494410001, 105.6138305536, 449.85559084970004, 347.445312512], [353.36083982910003, 245.7095947264, 494.18493651849997, 342.9114379776], [250.01293947969998, 318.4866333184, 277.0726928479, 347.445312512], [109.6032714813, 105.6138305536, 146.4142456283, 128.9937744384], [293.1604614259, 129.4912109568, 323.0071411077, 151.3787842048], [73.28979494410001, 166.7995605504, 98.1620483429, 180.7280273408], [424.4859618932, 153.8660278272, 449.85559084970004, 167.7944946176], [284.3666992112, 335.5627441152, 338.5950317526, 348.133850112]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5, 6], [7]]}, {"image_path": "objects365_v1_00048674_crop.jpg", "text": "What is taking place within the specified area in this capture ? Give coordinates for the items you reference. For your reference, objects involved in this region include a van, a chair, four lamps, and a desk.", "boxes_value": [[73.28979494410001, 60.613830553599996, 449.85559084970004, 302.445312512], [353.36083982910003, 200.7095947264, 494.18493651849997, 297.9114379776], [250.01293947969998, 273.4866333184, 277.0726928479, 302.445312512], [109.6032714813, 60.613830553599996, 146.4142456283, 83.99377443840001], [293.1604614259, 84.49121095679999, 323.0071411077, 106.37878420480001], [73.28979494410001, 121.7995605504, 98.1620483429, 135.7280273408], [424.4859618932, 108.86602782720001, 449.85559084970004, 122.79449461760001], [284.3666992112, 290.5627441152, 338.5950317526, 303.133850112]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5, 6], [7]]}, {"image_path": "objects365_v1_00048676.jpg", "text": "In the image , please describe the bounding box . Include the coordinates for each object you identify.", "boxes_value": [[0.4810791094, 241.2931518464, 218.3870849583, 503.4828491264]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048676_crop.jpg", "text": "In the image , please describe the bounding box . Include the coordinates for each object you identify.", "boxes_value": [[0.4810791094, 66.29315184640001, 218.3870849583, 328.4828491264]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048676.jpg", "text": "In the image , please describe the bounding box . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, a watch, a glasses, and two machinery vehicles.", "boxes_value": [[0.4810791094, 241.2931518464, 218.3870849583, 503.4828491264], [128.2668456772, 181.0466308608, 306.6709594876, 511.7999878144], [194.4711303896, 352.9830322176, 212.6388549864, 383.6802368], [183.304504396, 241.2931518464, 218.3870849583, 254.896606464], [25.7050781229, 366.0609741312, 153.8102417179, 503.4828491264], [0.4810791094, 367.7258911232, 120.8961181971, 450.4938964992]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048676_crop.jpg", "text": "In the image , please describe the bounding box . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, a watch, a glasses, and two machinery vehicles.", "boxes_value": [[0.4810791094, 66.29315184640001, 218.3870849583, 328.4828491264], [128.2668456772, 6.046630860800008, 272, 336.7999878144], [194.4711303896, 177.9830322176, 212.6388549864, 208.6802368], [183.304504396, 66.29315184640001, 218.3870849583, 79.896606464], [25.7050781229, 191.0609741312, 153.8102417179, 328.4828491264], [0.4810791094, 192.72589112319997, 120.8961181971, 275.4938964992]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048677.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Specify the location of each mentioned object.", "boxes_value": [[204.827819812, 97.5321044992, 370.152343781, 294.9093627904]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048677_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Specify the location of each mentioned object.", "boxes_value": [[41.827819812, 49.5321044992, 207.152343781, 246.9093627904]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048677.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Specify the location of each mentioned object. For your reference, objects involved in this region include five street lights.", "boxes_value": [[204.827819812, 97.5321044992, 370.152343781, 294.9093627904], [235.193603495, 97.5321044992, 275.6812133578, 294.9093627904], [224.2281493783, 163.3245239296, 251.2199096585, 229.9604492288], [204.827819812, 181.8814087168, 229.2891235113, 215.6210937344], [349.0650634845, 187.7858276352, 370.152343781, 250.204284672], [359.4530029245, 223.1179809792, 375.92822263380003, 245.6207885824]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048677_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Specify the location of each mentioned object. For your reference, objects involved in this region include five street lights.", "boxes_value": [[41.827819812, 49.5321044992, 207.152343781, 246.9093627904], [72.19360349499999, 49.5321044992, 112.6812133578, 246.9093627904], [61.2281493783, 115.32452392959999, 88.2199096585, 181.9604492288], [41.827819812, 133.8814087168, 66.2891235113, 167.6210937344], [186.0650634845, 139.7858276352, 207.152343781, 202.204284672], [196.45300292450003, 175.1179809792, 212.92822263380003, 197.6207885824]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048679.jpg", "text": "Offer a thorough description of the area within the illustration . Include the coordinates for each object you identify.", "boxes_value": [[440.4117431808, 197.237243648, 588.8267822592, 326.6039428608]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048679_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Include the coordinates for each object you identify.", "boxes_value": [[37.41174318079999, 33.237243648, 185.82678225919994, 162.60394286079998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048679.jpg", "text": "Offer a thorough description of the area within the illustration . Include the coordinates for each object you identify. For your reference, objects involved in this region include a paddle, two people, two helmets, and a boat.", "boxes_value": [[440.4117431808, 197.237243648, 588.8267822592, 326.6039428608], [435.04387430400004, 155.2336326144, 492.1908572928, 258.1650405888], [488.43249515520006, 231.8841552896, 588.8267822592, 326.6039428608], [423.5847168, 198.448242176, 493.67871091200004, 316.7834472448], [440.4117431808, 197.237243648, 469.96887206400004, 219.6856078848], [515.2397461247999, 232.4063720448, 553.4020995840001, 261.5893554688], [74.707580544, 208.0216674816, 626.4476318207999, 438.1444091904]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048679_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Include the coordinates for each object you identify. For your reference, objects involved in this region include a paddle, two people, two helmets, and a boat.", "boxes_value": [[37.41174318079999, 33.237243648, 185.82678225919994, 162.60394286079998], [32.04387430400004, 0, 89.19085729279999, 94.16504058880003], [85.43249515520006, 67.88415528959999, 185.82678225919994, 162.60394286079998], [20.584716800000024, 34.44824217600001, 90.67871091200004, 152.78344724480002], [37.41174318079999, 33.237243648, 66.96887206400004, 55.68560788479999], [112.23974612479992, 68.40637204480001, 150.4020995840001, 97.58935546880002], [0, 44.02166748159999, 222, 194]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048682.jpg", "text": "I'd like a thorough description of the area in the image . Provide the coordinates for each element you describe.", "boxes_value": [[141.9982909952, 262.540161145, 331.0761718784, 503.1110839604]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048682_crop.jpg", "text": "I'd like a thorough description of the area in the image . Provide the coordinates for each element you describe.", "boxes_value": [[47.998290995199994, 60.54016114500001, 237.07617187839998, 301.1110839604]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048682.jpg", "text": "I'd like a thorough description of the area in the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two mirrors, and three people.", "boxes_value": [[141.9982909952, 262.540161145, 331.0761718784, 503.1110839604], [248.8726806528, 262.540161145, 304.0117797888, 374.8604736352], [141.9982909952, 308.8297729176, 246.8305053696, 365.3303222742], [311.8379516416, 465.8985595967, 331.0761718784, 503.1110839604], [212.2952270336, 391.87145994440004, 247.9208374272, 454.4234619152], [276.9185180672, 382.3436279425, 299.0809936384, 415.27661133839996]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048682_crop.jpg", "text": "I'd like a thorough description of the area in the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two mirrors, and three people.", "boxes_value": [[47.998290995199994, 60.54016114500001, 237.07617187839998, 301.1110839604], [154.8726806528, 60.54016114500001, 210.01177978880003, 172.86047363519998], [47.998290995199994, 106.8297729176, 152.8305053696, 163.3303222742], [217.8379516416, 263.8985595967, 237.07617187839998, 301.1110839604], [118.29522703360001, 189.87145994440004, 153.9208374272, 252.4234619152], [182.9185180672, 180.3436279425, 205.0809936384, 213.27661133839996]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048683.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Give coordinates for the items you reference.", "boxes_value": [[446.41027828600005, 182.4302978352, 522.7463378954, 342.06622313580004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048683_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Give coordinates for the items you reference.", "boxes_value": [[19.41027828600005, 40.43029783520001, 95.74633789539996, 200.06622313580004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048683.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Give coordinates for the items you reference. For your reference, objects involved in this region include two people, a gloves, a scissors, and a bottle.", "boxes_value": [[446.41027828600005, 182.4302978352, 522.7463378954, 342.06622313580004], [276.34948734119996, 73.9569091572, 522.4129638778, 486.05639646059996], [431.3895263354, 11.9409485094, 803.4854736056001, 486.06317139780003], [476.8457030848, 258.1155395352, 522.7463378954, 318.0198364038], [446.41027828600005, 331.05462647400003, 486.60253902719995, 342.06622313580004], [485.32897945459996, 182.4302978352, 506.4437255708, 214.73577882179998]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048683_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Give coordinates for the items you reference. For your reference, objects involved in this region include two people, a gloves, a scissors, and a bottle.", "boxes_value": [[19.41027828600005, 40.43029783520001, 95.74633789539996, 200.06622313580004], [0, 0, 95.41296387780005, 239], [4.389526335399978, 0, 114, 239], [49.84570308479999, 116.11553953520001, 95.74633789539996, 176.0198364038], [19.41027828600005, 189.05462647400003, 59.60253902719995, 200.06622313580004], [58.32897945459996, 40.43029783520001, 79.44372557079998, 72.73577882179998]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048684.jpg", "text": "Could you please share some information on the region in this photograph ? Include the coordinates for each mentioned object.", "boxes_value": [[0, 116.6909179904, 422.45227051859996, 284.0822143488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048684_crop.jpg", "text": "Could you please share some information on the region in this photograph ? Include the coordinates for each mentioned object.", "boxes_value": [[0, 42.690917990399996, 422.45227051859996, 210.0822143488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048684.jpg", "text": "Could you please share some information on the region in this photograph ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a cabinet, a desk, a chair, a cup, and a refrigerator.", "boxes_value": [[0, 116.6909179904, 422.45227051859996, 284.0822143488], [352.7882079847, 116.6909179904, 422.45227051859996, 210.1041870336], [0, 120.2202758656, 186.515747055, 284.0822143488], [0, 112.6301269504, 124.9907837228, 241.02532961279996], [0, 121.7084960768, 16.9648437793, 146.756347648], [303.9116211149, 86.4533691392, 390.41235353080003, 204.335815424]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048684_crop.jpg", "text": "Could you please share some information on the region in this photograph ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a cabinet, a desk, a chair, a cup, and a refrigerator.", "boxes_value": [[0, 42.690917990399996, 422.45227051859996, 210.0822143488], [352.7882079847, 42.690917990399996, 422.45227051859996, 136.1041870336], [0, 46.2202758656, 186.515747055, 210.0822143488], [0, 38.6301269504, 124.9907837228, 167.02532961279996], [0, 47.7084960768, 16.9648437793, 72.756347648], [303.9116211149, 12.453369139200007, 390.41235353080003, 130.335815424]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048686.jpg", "text": "Please provide insights on the specified area within the graphic . Remember to mention the objects and their corresponding locations.", "boxes_value": [[169.1403808512, 195.3457641472, 295.3417968384, 384.6065673728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048686_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Remember to mention the objects and their corresponding locations.", "boxes_value": [[32.14038085120001, 47.345764147199986, 158.3417968384, 236.6065673728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048686.jpg", "text": "Please provide insights on the specified area within the graphic . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, and three helmets.", "boxes_value": [[169.1403808512, 195.3457641472, 295.3417968384, 384.6065673728], [277.0968017664, 230.4796752896, 295.3417968384, 279.5385132032], [189.342895488, 196.4968261632, 263.96118167040004, 384.6065673728], [169.1403808512, 214.1141357568, 213.1837158144, 342.7973022208], [180.36364746240002, 214.6963501056, 202.7938842624, 230.4805907968], [190.3316040192, 195.3457641472, 222.90600583679998, 219.1279297024], [284.4112548864, 228.278808576, 308.43688965120003, 245.0836791808]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00048686_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, and three helmets.", "boxes_value": [[32.14038085120001, 47.345764147199986, 158.3417968384, 236.6065673728], [140.0968017664, 82.47967528960001, 158.3417968384, 131.5385132032], [52.34289548800001, 48.49682616320001, 126.96118167040004, 236.6065673728], [32.14038085120001, 66.11413575680001, 76.18371581439999, 194.79730222080002], [43.363647462400024, 66.6963501056, 65.79388426240001, 82.48059079679999], [53.3316040192, 47.345764147199986, 85.90600583679998, 71.1279297024], [147.4112548864, 80.27880857599999, 171.43688965120003, 97.0836791808]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00048688.jpg", "text": "What can I find in the bbox of the provided image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[178.765747072, 190.617553728, 400.66467283199995, 247.08374025599997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048688_crop.jpg", "text": "What can I find in the bbox of the provided image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[55.76574707200001, 14.61755372799999, 277.66467283199995, 71.08374025599997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048688.jpg", "text": "What can I find in the bbox of the provided image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a faucet, a sink, a coffee machine, a microwave, and an induction cooker.", "boxes_value": [[178.765747072, 190.617553728, 400.66467283199995, 247.08374025599997], [270.655883776, 213.31103515200002, 292.07891846399997, 231.205566384], [238.395446784, 228.433166496, 313.249816896, 247.08374025599997], [308.811767552, 195.937744128, 345.73846438399994, 236.94683836800002], [178.765747072, 190.617553728, 247.895080576, 228.77947996799998], [304.914978048, 234.163391136, 400.66467283199995, 246.967163088]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048688_crop.jpg", "text": "What can I find in the bbox of the provided image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a faucet, a sink, a coffee machine, a microwave, and an induction cooker.", "boxes_value": [[55.76574707200001, 14.61755372799999, 277.66467283199995, 71.08374025599997], [147.655883776, 37.31103515200002, 169.07891846399997, 55.20556638400001], [115.395446784, 52.43316649600001, 190.24981689600003, 71.08374025599997], [185.811767552, 19.93774412799999, 222.73846438399994, 60.946838368000016], [55.76574707200001, 14.61755372799999, 124.895080576, 52.779479967999976], [181.91497804800002, 58.163391136, 277.66467283199995, 70.967163088]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048691.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Include the coordinates for each object you identify.", "boxes_value": [[224.5596313332, 236.01544192, 333.3271484672, 325.4948120064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048691_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Include the coordinates for each object you identify.", "boxes_value": [[27.559631333200002, 23.01544192, 136.3271484672, 112.49481200640002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048691.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Include the coordinates for each object you identify. For your reference, objects involved in this region include a potted plant, a moniter, two speakers, and a keyboard.", "boxes_value": [[224.5596313332, 236.01544192, 333.3271484672, 325.4948120064], [224.5596313332, 236.01544192, 269.7361450504, 322.4055785984], [268.1007690508, 252.9041137664, 322.8067626794, 321.8126831104], [254.9502563698, 287.3583984128, 270.4678344842, 325.4948120064], [320.176757783, 279.4681396736, 333.3271484672, 310.7662963712], [282.8293457104, 307.3471679488, 354.368041969, 329.7030029312]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048691_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Include the coordinates for each object you identify. For your reference, objects involved in this region include a potted plant, a moniter, two speakers, and a keyboard.", "boxes_value": [[27.559631333200002, 23.01544192, 136.3271484672, 112.49481200640002], [27.559631333200002, 23.01544192, 72.73614505040001, 109.40557859839998], [71.10076905080001, 39.9041137664, 125.80676267939998, 108.81268311039997], [57.9502563698, 74.3583984128, 73.46783448420001, 112.49481200640002], [123.17675778300003, 66.46813967359998, 136.3271484672, 97.76629637119999], [85.82934571039999, 94.3471679488, 157.368041969, 116.70300293119999]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048692.jpg", "text": "Describe the selected rectangular area in the photo . Give coordinates for the items you reference.", "boxes_value": [[532.1427002112, 119.3202514432, 766.6628418048, 511.3148803584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048692_crop.jpg", "text": "Describe the selected rectangular area in the photo . Give coordinates for the items you reference.", "boxes_value": [[59.14270021120001, 98.3202514432, 293.6628418048, 490.3148803584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048692.jpg", "text": "Describe the selected rectangular area in the photo . Give coordinates for the items you reference. For your reference, objects involved in this region include a lamp, a desk, and three people.", "boxes_value": [[532.1427002112, 119.3202514432, 766.6628418048, 511.3148803584], [728.1762695424, 119.3202514432, 765.2160644352, 133.7514038272], [739.3968506112, 303.7086792192, 765.9913330176, 361.2269897216], [631.1306152704, 322.0935058432, 766.6628418048, 511.3148803584], [380.48815918080004, 28.8844604416, 687.2020263936, 511.5452880896], [532.1427002112, 445.511657728, 621.0632324352, 510.841064448]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048692_crop.jpg", "text": "Describe the selected rectangular area in the photo . Give coordinates for the items you reference. For your reference, objects involved in this region include a lamp, a desk, and three people.", "boxes_value": [[59.14270021120001, 98.3202514432, 293.6628418048, 490.3148803584], [255.17626954239995, 98.3202514432, 292.2160644352, 112.7514038272], [266.39685061119997, 282.7086792192, 292.9913330176, 340.2269897216], [158.13061527039997, 301.0935058432, 293.6628418048, 490.3148803584], [0, 7.884460441600002, 214.20202639360002, 490.5452880896], [59.14270021120001, 424.511657728, 148.0632324352, 489.841064448]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048694.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Provide the coordinates for each element you describe.", "boxes_value": [[300.8329467864, 89.5039062528, 420.5914306748, 159.8246459904]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048694_crop.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Provide the coordinates for each element you describe.", "boxes_value": [[30.832946786399987, 18.503906252799993, 150.5914306748, 88.8246459904]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048694.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Provide the coordinates for each element you describe. For your reference, objects involved in this region include five people.", "boxes_value": [[300.8329467864, 89.5039062528, 420.5914306748, 159.8246459904], [395.85644529480004, 89.5039062528, 420.5914306748, 149.91412352], [377.8424072346, 94.6417846784, 397.0286865538, 147.9953613312], [341.6839599698, 100.3198852608, 359.8784179508, 159.6911010816], [331.4207763958, 99.3199462912, 351.35449215999995, 159.8246459904], [300.8329467864, 102.0822143488, 315.9688720524, 145.5369872896]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048694_crop.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Provide the coordinates for each element you describe. For your reference, objects involved in this region include five people.", "boxes_value": [[30.832946786399987, 18.503906252799993, 150.5914306748, 88.8246459904], [125.85644529480004, 18.503906252799993, 150.5914306748, 78.91412352], [107.84240723459999, 23.6417846784, 127.02868655380001, 76.9953613312], [71.68395996980001, 29.319885260800007, 89.87841795079999, 88.69110108160001], [61.420776395799976, 28.319946291199997, 81.35449215999995, 88.8246459904], [30.832946786399987, 31.082214348799994, 45.9688720524, 74.5369872896]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048695.jpg", "text": "I need details about the area located within image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[338.997985856, 117.066040032, 474.95996096, 182.919250512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048695_crop.jpg", "text": "I need details about the area located within image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[33.997985856000014, 17.066040032000004, 169.95996096, 82.91925051199999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048695.jpg", "text": "I need details about the area located within image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a cabinet, a flower, a person, two boots, and a moniter.", "boxes_value": [[338.997985856, 117.066040032, 474.95996096, 182.919250512], [409.49084473600004, 103.007812512, 532.207153344, 258.95989992], [372.275512704, 120.23120116800001, 407.465087872, 180.950500512], [338.997985856, 130.082458512, 368.163879424, 182.919250512], [458.01989747199997, 118.929443376, 474.95996096, 145.35583497599998], [431.42407225600004, 117.066040032, 449.041748032, 145.18640136], [362.82635500799995, 142.296997056, 388.61071776000006, 175.267150896]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048695_crop.jpg", "text": "I need details about the area located within image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a cabinet, a flower, a person, two boots, and a moniter.", "boxes_value": [[33.997985856000014, 17.066040032000004, 169.95996096, 82.91925051199999], [104.49084473600004, 3.007812512000001, 203, 99], [67.275512704, 20.231201168000013, 102.46508787200003, 80.95050051199999], [33.997985856000014, 30.082458511999988, 63.163879424000015, 82.91925051199999], [153.01989747199997, 18.929443375999995, 169.95996096, 45.35583497599998], [126.42407225600004, 17.066040032000004, 144.041748032, 45.18640135999999], [57.82635500799995, 42.29699705600001, 83.61071776000006, 75.267150896]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048699.jpg", "text": "What's the story in the section of the included visual ? Provide the coordinates for each element you describe.", "boxes_value": [[595.1536865410001, 177.5769653248, 684.0888671835, 387.6572875776]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048699_crop.jpg", "text": "What's the story in the section of the included visual ? Provide the coordinates for each element you describe.", "boxes_value": [[23.15368654100007, 52.5769653248, 112.08886718350004, 262.6572875776]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048699.jpg", "text": "What's the story in the section of the included visual ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a cabinet, a storage box, and three canneds.", "boxes_value": [[595.1536865410001, 177.5769653248, 684.0888671835, 387.6572875776], [334.748840304, 71.73272704, 667.192504908, 510.8369140736], [651.451293958, 284.8488159232, 684.0888671835, 387.6572875776], [625.9725342135, 177.5769653248, 643.793212884, 204.993286144], [600.6054687195, 235.6226806784, 624.5095214885, 263.300964352], [595.1536865410001, 261.6234741248, 621.993286123, 288.4630737408]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048699_crop.jpg", "text": "What's the story in the section of the included visual ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a cabinet, a storage box, and three canneds.", "boxes_value": [[23.15368654100007, 52.5769653248, 112.08886718350004, 262.6572875776], [0, 0, 95.19250490800005, 315], [79.45129395799995, 159.8488159232, 112.08886718350004, 262.6572875776], [53.97253421350001, 52.5769653248, 71.79321288400001, 79.993286144], [28.60546871949998, 110.6226806784, 52.50952148850001, 138.300964352], [23.15368654100007, 136.6234741248, 49.99328612299996, 163.46307374079998]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048700.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[0.17028808709999999, 379.5100097536, 400.2045898256, 512.2778320384]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048700_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[0.17028808709999999, 33.5100097536, 400.2045898256, 166]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048700.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two chairs, a person, a handbag, a plate, and a spoon.", "boxes_value": [[0.17028808709999999, 379.5100097536, 400.2045898256, 512.2778320384], [45.3340454305, 448.8081665024, 242.8945923155, 511.6683349504], [103.7041625748, 409.0394897408, 282.6632690716, 511.6683349504], [0.17028808709999999, 413.1596069376, 166.4988403235, 512.2778320384], [343.1480712821, 425.2274169856, 400.2045898256, 512.071289088], [301.215393035, 364.666076672, 355.11022951679996, 400.0216674816], [307.381347651, 379.5100097536, 358.9925536892, 392.0702514688]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048700_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two chairs, a person, a handbag, a plate, and a spoon.", "boxes_value": [[0.17028808709999999, 33.5100097536, 400.2045898256, 166], [45.3340454305, 102.80816650240001, 242.8945923155, 165.66833495039998], [103.7041625748, 63.03948974079998, 282.6632690716, 165.66833495039998], [0.17028808709999999, 67.15960693760002, 166.4988403235, 166], [343.1480712821, 79.22741698559997, 400.2045898256, 166], [301.215393035, 18.666076671999974, 355.11022951679996, 54.02166748159999], [307.381347651, 33.5100097536, 358.9925536892, 46.070251468799995]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048701.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[301.1002197504, 141.4974975488, 586.721557632, 439.4893798912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048701_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[72.10021975040002, 75.4974975488, 357.721557632, 373.4893798912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048701.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a hat, a handbag, and four lamps.", "boxes_value": [[301.1002197504, 141.4974975488, 586.721557632, 439.4893798912], [530.4532470528, 421.1243286016, 550.3923340032, 439.4893798912], [432.85620119039993, 426.8961792, 454.8942871296, 441.0635376128], [301.1002197504, 141.4974975488, 323.4764404224, 160.321960448], [334.8421631232, 153.2183837696, 363.61157230080005, 167.0703124992], [397.3535155968, 179.1463623168, 438.198974592, 212.888305664], [522.1370849279999, 226.631530752, 586.721557632, 287.1060790784]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048701_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a hat, a handbag, and four lamps.", "boxes_value": [[72.10021975040002, 75.4974975488, 357.721557632, 373.4893798912], [301.45324705279995, 355.1243286016, 321.39233400319995, 373.4893798912], [203.85620119039993, 360.8961792, 225.8942871296, 375.0635376128], [72.10021975040002, 75.4974975488, 94.4764404224, 94.321960448], [105.84216312320001, 87.2183837696, 134.61157230080005, 101.07031249920001], [168.35351559679998, 113.14636231680001, 209.198974592, 146.888305664], [293.1370849279999, 160.631530752, 357.721557632, 221.1060790784]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048703.jpg", "text": "What objects or scenery can be found in the area in the image ? Please mention the objects and their locations.", "boxes_value": [[37.3292236288, 570.0318603264, 285.4132080128, 702.6655273727999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048703_crop.jpg", "text": "What objects or scenery can be found in the area in the image ? Please mention the objects and their locations.", "boxes_value": [[37.3292236288, 34.031860326399965, 285.4132080128, 166.66552737279994]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048703.jpg", "text": "What objects or scenery can be found in the area in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a chair, a desk, a person, and three handbags.", "boxes_value": [[37.3292236288, 570.0318603264, 285.4132080128, 702.6655273727999], [0.132690432, 352.92077637119996, 165.21362304, 762.5660400384], [0.6850586112, 375.3930663936, 250.9268188672, 702.9345702912], [80.9972534272, 4.5430298112, 389.2228393472, 721.729492224], [37.3292236288, 617.286010752, 160.8342895616, 702.6655273727999], [162.9821777408, 570.0318603264, 285.4132080128, 653.8005371136001], [122.1679077376, 621.5019531263999, 208.3975219712, 674.2542724607999]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00048703_crop.jpg", "text": "What objects or scenery can be found in the area in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a chair, a desk, a person, and three handbags.", "boxes_value": [[37.3292236288, 34.031860326399965, 285.4132080128, 166.66552737279994], [0.132690432, 0, 165.21362304, 199], [0.6850586112, 0, 250.9268188672, 166.93457029119998], [80.9972534272, 0, 347, 185.72949222399996], [37.3292236288, 81.28601075200004, 160.8342895616, 166.66552737279994], [162.9821777408, 34.031860326399965, 285.4132080128, 117.80053711360006], [122.1679077376, 85.50195312639994, 208.3975219712, 138.25427246079994]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00048705.jpg", "text": "Please describe the area in the image for me. Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[259.4047241216, 534.9128417649, 409.5689086976, 646.8343506168001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048705_crop.jpg", "text": "Please describe the area in the image for me. Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[38.4047241216, 28.91284176490001, 188.56890869760002, 140.83435061680007]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048705.jpg", "text": "Please describe the area in the image for me. Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, and three boots.", "boxes_value": [[259.4047241216, 534.9128417649, 409.5689086976, 646.8343506168001], [268.1365966848, 276.8067016397, 511.9352416768, 646.7421874847], [209.8091430912, 245.314025903, 404.8518676992, 639.4720458927], [259.4047241216, 534.9128417649, 321.1544799744, 631.747680683], [303.2610473472, 552.4554443281, 329.5748901376, 638.7647705018], [374.834655744, 587.8913574556, 409.5689086976, 646.8343506168001]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048705_crop.jpg", "text": "Please describe the area in the image for me. Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, and three boots.", "boxes_value": [[38.4047241216, 28.91284176490001, 188.56890869760002, 140.83435061680007], [47.1365966848, 0, 226, 140.74218748470003], [0, 0, 183.8518676992, 133.47204589269995], [38.4047241216, 28.91284176490001, 100.15447997439998, 125.747680683], [82.26104734720002, 46.45544432810004, 108.5748901376, 132.76477050179994], [153.83465574399997, 81.89135745559997, 188.56890869760002, 140.83435061680007]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048706.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Specify the location of each mentioned object.", "boxes_value": [[98.9022217134, 102.5438232576, 233.2988280868, 341.8114013696]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048706_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Specify the location of each mentioned object.", "boxes_value": [[33.902221713399996, 60.543823257599996, 168.2988280868, 299.8114013696]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048706.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Specify the location of each mentioned object. For your reference, objects involved in this region include two people, two flags, and two hats.", "boxes_value": [[98.9022217134, 102.5438232576, 233.2988280868, 341.8114013696], [141.29443358380001, 184.0197143552, 203.95703121780002, 378.6295776256], [82.17919925599999, 184.0148925952, 143.9257201858, 385.9772948992], [134.0634155672, 105.8516845568, 167.6931762632, 341.8114013696], [207.9387206876, 102.5438232576, 233.2988280868, 334.0931396608], [98.9022217134, 183.8434448384, 126.62432864619998, 199.1167602688], [164.46343994080001, 183.7086792192, 185.3723144532, 198.3013305856]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00048706_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Specify the location of each mentioned object. For your reference, objects involved in this region include two people, two flags, and two hats.", "boxes_value": [[33.902221713399996, 60.543823257599996, 168.2988280868, 299.8114013696], [76.29443358380001, 142.0197143552, 138.95703121780002, 336.6295776256], [17.17919925599999, 142.0148925952, 78.9257201858, 343.9772948992], [69.0634155672, 63.851684556799995, 102.6931762632, 299.8114013696], [142.9387206876, 60.543823257599996, 168.2988280868, 292.0931396608], [33.902221713399996, 141.8434448384, 61.624328646199984, 157.1167602688], [99.46343994080001, 141.7086792192, 120.3723144532, 156.3013305856]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00048707.jpg", "text": "Tell me what you see within the designated area in the picture . Please point out the objects and their coordinates.", "boxes_value": [[320.2283935464, 357.0762939392, 564.7460937208, 466.9183960064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048707_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Please point out the objects and their coordinates.", "boxes_value": [[61.228393546400014, 28.076293939200013, 305.7460937208, 137.91839600639997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048707.jpg", "text": "Tell me what you see within the designated area in the picture . Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, two sneakers, and a boots.", "boxes_value": [[320.2283935464, 357.0762939392, 564.7460937208, 466.9183960064], [119.9864501824, 7.7463378944, 437.433471692, 462.7536621055999], [344.6412353768, 80.1893310464, 694.0655517584, 477.7539062272], [320.2283935464, 395.6667480576, 357.06469724560003, 427.6792602624], [359.25732424319995, 357.0762939392, 405.7412109256, 419.7857666048], [508.7916259736, 425.7720336896, 564.7460937208, 466.9183960064]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00048707_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, two sneakers, and a boots.", "boxes_value": [[61.228393546400014, 28.076293939200013, 305.7460937208, 137.91839600639997], [0, 0, 178.433471692, 133.75366210559991], [85.64123537680001, 0, 366, 148.7539062272], [61.228393546400014, 66.66674805759999, 98.06469724560003, 98.67926026240002], [100.25732424319995, 28.076293939200013, 146.7412109256, 90.78576660480002], [249.7916259736, 96.7720336896, 305.7460937208, 137.91839600639997]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00048709.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Please point out the objects and their coordinates.", "boxes_value": [[148.1035156315, 274.0058593792, 332.220825196, 335.4551391744]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048709_crop.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Please point out the objects and their coordinates.", "boxes_value": [[46.10351563149999, 16.00585937919999, 230.22082519600002, 77.45513917440002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048709.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Please point out the objects and their coordinates. For your reference, objects involved in this region include three chairs, and two side tables.", "boxes_value": [[148.1035156315, 274.0058593792, 332.220825196, 335.4551391744], [171.0162353341, 277.1979980288, 261.59411619639997, 335.4551391744], [207.7261962802, 276.0009155072, 293.1168213207, 328.6717529088], [280.3481445648, 274.0058593792, 332.220825196, 319.8933105664], [148.1035156315, 302.9220580864, 190.2030029549, 334.5836791808], [243.06347657799998, 294.9507446272, 279.9688720387, 318.659667968]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048709_crop.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Please point out the objects and their coordinates. For your reference, objects involved in this region include three chairs, and two side tables.", "boxes_value": [[46.10351563149999, 16.00585937919999, 230.22082519600002, 77.45513917440002], [69.0162353341, 19.197998028799987, 159.59411619639997, 77.45513917440002], [105.72619628020001, 18.000915507199977, 191.1168213207, 70.67175290879999], [178.3481445648, 16.00585937919999, 230.22082519600002, 61.893310566399975], [46.10351563149999, 44.9220580864, 88.2030029549, 76.5836791808], [141.06347657799998, 36.95074462719998, 177.9688720387, 60.65966796800001]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048710.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[432.1778564832, 293.7949218816, 566.3225097504001, 355.3233642496]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048710_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[34.1778564832, 15.794921881599976, 168.32250975040006, 77.32336424959999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048710.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a chair, a desk, a cell phone, a keyboard, and a mouse.", "boxes_value": [[432.1778564832, 293.7949218816, 566.3225097504001, 355.3233642496], [273.8568115392, 190.8599243264, 515.8496093904, 511.0476073984], [332.2972412112, 260.8237915136, 699.8302002384, 513.0156250112], [446.3286132816, 293.7949218816, 474.25952145119993, 307.783752448], [432.1778564832, 304.3707275264, 522.0294189744, 339.8051147264], [539.171386752, 338.4245605376, 566.3225097504001, 355.3233642496]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048710_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a chair, a desk, a cell phone, a keyboard, and a mouse.", "boxes_value": [[34.1778564832, 15.794921881599976, 168.32250975040006, 77.32336424959999], [0, 0, 117.84960939040002, 92], [0, 0, 201, 92], [48.328613281599985, 15.794921881599976, 76.25952145119993, 29.783752447999973], [34.1778564832, 26.370727526400003, 124.02941897439996, 61.80511472640001], [141.171386752, 60.42456053759997, 168.32250975040006, 77.32336424959999]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048711.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for each element you describe.", "boxes_value": [[275.0936279436, 185.6473388544, 404.60388180999996, 431.3586425856]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048711_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for each element you describe.", "boxes_value": [[33.093627943599984, 61.647338854400004, 162.60388180999996, 307.3586425856]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048711.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, a backpack, two sneakers, a fire truck, and a bicycle.", "boxes_value": [[275.0936279436, 185.6473388544, 404.60388180999996, 431.3586425856], [275.0936279436, 185.6473388544, 404.60388180999996, 431.3586425856], [283.36047360320003, 214.7703857664, 350.0893554892, 274.4072876032], [284.65234373000004, 393.8631591936, 307.2797851656, 429.7437133824], [335.24084475079997, 389.0144043008, 362.8785400156, 402.9140624896], [90.5900268768, 102.1764526592, 613.7852783404, 348.3447265792], [256.9725341668, 289.607910144, 432.3865966572, 476.7765502976]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048711_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, a backpack, two sneakers, a fire truck, and a bicycle.", "boxes_value": [[33.093627943599984, 61.647338854400004, 162.60388180999996, 307.3586425856], [33.093627943599984, 61.647338854400004, 162.60388180999996, 307.3586425856], [41.360473603200035, 90.77038576640001, 108.08935548919999, 150.40728760320002], [42.65234373000004, 269.8631591936, 65.2797851656, 305.7437133824], [93.24084475079997, 265.0144043008, 120.87854001559998, 278.9140624896], [0, 0, 194, 224.34472657920003], [14.972534166800017, 165.60791014400002, 190.38659665720002, 352.7765502976]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048712.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each mentioned object.", "boxes_value": [[359.97729490729995, 300.35668944400004, 440.2094726803, 358.47363282500004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048712_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each mentioned object.", "boxes_value": [[20.97729490729995, 15.35668944400004, 101.2094726803, 73.47363282500004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048712.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include an umbrella, and four people.", "boxes_value": [[359.97729490729995, 300.35668944400004, 440.2094726803, 358.47363282500004], [394.69311525930004, 300.35668944400004, 422.98010255049996, 318.614685039], [422.98010255049996, 332.243835462, 440.2094726803, 358.47363282500004], [406.7794189274, 313.471557623, 425.29455565629996, 351.273315428], [385.69274900029995, 305.242614733, 399.3218994252, 347.158813456], [359.97729490729995, 306.01409910399997, 381.7761230429, 329.158020037]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048712_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include an umbrella, and four people.", "boxes_value": [[20.97729490729995, 15.35668944400004, 101.2094726803, 73.47363282500004], [55.69311525930004, 15.35668944400004, 83.98010255049996, 33.614685038999994], [83.98010255049996, 47.24383546199999, 101.2094726803, 73.47363282500004], [67.7794189274, 28.471557623000024, 86.29455565629996, 66.27331542799999], [46.692749000299955, 20.242614732999982, 60.3218994252, 62.15881345600002], [20.97729490729995, 21.014099103999968, 42.7761230429, 44.15802003699997]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048714.jpg", "text": "Fill me in about the selected portion within the presented image . Include the coordinates for each mentioned object.", "boxes_value": [[306.004577624, 466.6126708736, 412.96081542400003, 512.1025390592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048714_crop.jpg", "text": "Fill me in about the selected portion within the presented image . Include the coordinates for each mentioned object.", "boxes_value": [[27.004577623999978, 11.612670873599996, 133.96081542400003, 57]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048714.jpg", "text": "Fill me in about the selected portion within the presented image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, and four sneakers.", "boxes_value": [[306.004577624, 466.6126708736, 412.96081542400003, 512.1025390592], [289.92388913999997, 221.624389632, 372.27050778800003, 512.5174560768], [368.854736344, 223.1955566592, 466.584716804, 511.0159301632], [306.004577624, 485.3960571392, 328.929077148, 511.606750464], [334.96905515599997, 482.899963392, 368.326293928, 511.8812866048], [351.78759763200003, 466.6126708736, 369.29284667599995, 496.6216430592], [386.095581056, 495.811889664, 412.96081542400003, 512.1025390592]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048714_crop.jpg", "text": "Fill me in about the selected portion within the presented image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, and four sneakers.", "boxes_value": [[27.004577623999978, 11.612670873599996, 133.96081542400003, 57], [10.923889139999972, 0, 93.27050778800003, 57], [89.854736344, 0, 160, 56.01593016319998], [27.004577623999978, 30.396057139200025, 49.929077147999976, 56.606750464000015], [55.96905515599997, 27.899963392000018, 89.32629392799998, 56.88128660479998], [72.78759763200003, 11.612670873599996, 90.29284667599995, 41.62164305919998], [107.09558105600001, 40.81188966399998, 133.96081542400003, 57]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048718.jpg", "text": "Can you provide a description of the area in the image ? Please mention the objects and their locations.", "boxes_value": [[339.8504028623, 142.0408935424, 433.47692871559997, 448.4552612352]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048718_crop.jpg", "text": "Can you provide a description of the area in the image ? Please mention the objects and their locations.", "boxes_value": [[23.850402862299973, 77.0408935424, 117.47692871559997, 383.4552612352]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048718.jpg", "text": "Can you provide a description of the area in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a person, a helmet, two leather shoes, and a fire truck.", "boxes_value": [[339.8504028623, 142.0408935424, 433.47692871559997, 448.4552612352], [339.8504028623, 142.0408935424, 433.47692871559997, 448.4552612352], [361.3519286872, 142.3074951168, 400.916137666, 181.0021362176], [371.60595702669997, 431.2576293888, 400.7083740099, 448.8723144704], [342.2481689275, 425.3860473856, 379.5197754117, 439.9373169152], [152.9580077894, 35.1136474624, 550.243041986, 396.1284790272]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048718_crop.jpg", "text": "Can you provide a description of the area in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a person, a helmet, two leather shoes, and a fire truck.", "boxes_value": [[23.850402862299973, 77.0408935424, 117.47692871559997, 383.4552612352], [23.850402862299973, 77.0408935424, 117.47692871559997, 383.4552612352], [45.35192868719997, 77.3074951168, 84.916137666, 116.0021362176], [55.60595702669997, 366.2576293888, 84.70837400990001, 383.8723144704], [26.24816892749999, 360.3860473856, 63.51977541169998, 374.9373169152], [0, 0, 140, 331.1284790272]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048720.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Please point out the objects and their coordinates.", "boxes_value": [[116.7631225748, 319.9163818496, 351.67321779279996, 511.3907470848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048720_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Please point out the objects and their coordinates.", "boxes_value": [[58.76312257479999, 47.916381849599986, 293.67321779279996, 239.3907470848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048720.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a stool, three people, a chair, and a desk.", "boxes_value": [[116.7631225748, 319.9163818496, 351.67321779279996, 511.3907470848], [271.7105102533, 367.0514526208, 325.6742553564, 396.0264892416], [175.4085693131, 350.6181640704, 328.1520385901, 512.0047607296], [165.865783674, 319.9163818496, 210.35815427129998, 391.1527710208], [122.16973875890001, 220.324279808, 193.33148191700002, 387.1557006848], [125.31726072800001, 471.910034176, 289.8201293764, 511.3907470848], [116.7631225748, 390.9746093568, 351.67321779279996, 511.1856689664]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048720_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a stool, three people, a chair, and a desk.", "boxes_value": [[58.76312257479999, 47.916381849599986, 293.67321779279996, 239.3907470848], [213.71051025330001, 95.05145262079998, 267.6742553564, 124.02648924160002], [117.4085693131, 78.61816407039998, 270.1520385901, 240], [107.865783674, 47.916381849599986, 152.35815427129998, 119.1527710208], [64.16973875890001, 0, 135.33148191700002, 115.15570068480002], [67.31726072800001, 199.910034176, 231.82012937640002, 239.3907470848], [58.76312257479999, 118.97460935679999, 293.67321779279996, 239.1856689664]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048723.jpg", "text": "Explain the content within the rectangular region of the image . Specify the location of each mentioned object.", "boxes_value": [[363.67315673599995, 124.70495606400002, 639.312988288, 337.775512704]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048723_crop.jpg", "text": "Explain the content within the rectangular region of the image . Specify the location of each mentioned object.", "boxes_value": [[69.67315673599995, 53.704956064000015, 345.31298828800004, 266.775512704]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048723.jpg", "text": "Explain the content within the rectangular region of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include a chair, a lamp, a cabinet, a desk, and an air conditioner.", "boxes_value": [[363.67315673599995, 124.70495606400002, 639.312988288, 337.775512704], [487.232421888, 177.180236832, 600.677856448, 337.775512704], [477.397460928, 124.70495606400002, 513.648559552, 179.081604], [363.67315673599995, 202.297058112, 471.955200192, 322.379028336], [452.557983424, 165.812316912, 639.312988288, 328.905944832], [334.625427264, 126.296386704, 456.974609344, 210.33996580800002]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048723_crop.jpg", "text": "Explain the content within the rectangular region of the image . Specify the location of each mentioned object. For your reference, objects involved in this region include a chair, a lamp, a cabinet, a desk, and an air conditioner.", "boxes_value": [[69.67315673599995, 53.704956064000015, 345.31298828800004, 266.775512704], [193.23242188799998, 106.18023683199999, 306.67785644799994, 266.775512704], [183.397460928, 53.704956064000015, 219.64855955200005, 108.081604], [69.67315673599995, 131.297058112, 177.955200192, 251.37902833599998], [158.55798342399999, 94.812316912, 345.31298828800004, 257.905944832], [40.625427263999995, 55.296386704, 162.974609344, 139.33996580800002]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048724.jpg", "text": "Can you elaborate on the content of the bounding box in ? Please point out the objects and their coordinates.", "boxes_value": [[239.9753418294, 267.6494751232, 335.5141601679, 439.7135009792]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048724_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Please point out the objects and their coordinates.", "boxes_value": [[23.97534182940001, 43.64947512319998, 119.5141601679, 215.71350097919998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048724.jpg", "text": "Can you elaborate on the content of the bounding box in ? Please point out the objects and their coordinates. For your reference, objects involved in this region include three leather shoes, and two tripods.", "boxes_value": [[239.9753418294, 267.6494751232, 335.5141601679, 439.7135009792], [303.95324702880004, 420.7844238336, 330.3298339629, 439.7135009792], [294.4769287056, 353.2839965696, 335.5141601679, 370.2850952192], [297.1149902679, 308.1431274496, 327.0135497727, 327.1960449024], [179.902832061, 271.9837646336, 287.4605713308, 400.9693603328], [239.9753418294, 267.6494751232, 321.67651365570003, 360.899108864]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048724_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Please point out the objects and their coordinates. For your reference, objects involved in this region include three leather shoes, and two tripods.", "boxes_value": [[23.97534182940001, 43.64947512319998, 119.5141601679, 215.71350097919998], [87.95324702880004, 196.7844238336, 114.32983396290001, 215.71350097919998], [78.47692870560002, 129.2839965696, 119.5141601679, 146.2850952192], [81.1149902679, 84.1431274496, 111.0135497727, 103.19604490239999], [0, 47.9837646336, 71.46057133080001, 176.96936033280002], [23.97534182940001, 43.64947512319998, 105.67651365570003, 136.89910886400003]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048725.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give coordinates for the items you reference.", "boxes_value": [[9.466613772, 26.057739264, 85.43554688900001, 217.388732928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048725_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give coordinates for the items you reference.", "boxes_value": [[9.466613772, 26.057739264, 85.43554688900001, 217.388732928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048725.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give coordinates for the items you reference. For your reference, objects involved in this region include a gun, a person, a backpack, a boots, and an airplane.", "boxes_value": [[9.466613772, 26.057739264, 85.43554688900001, 217.388732928], [51.300109861, 53.4746703872, 85.43554688900001, 154.1304321536], [20.369689956, 2.9684448256, 100.30670164899999, 219.6993408], [9.466613772, 26.057739264, 59.502685549999995, 102.50183106559999], [49.361144986999996, 195.5070190592, 69.92993161, 217.388732928], [0, 0.5793457152, 769.276977546, 397.8896484352]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048725_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give coordinates for the items you reference. For your reference, objects involved in this region include a gun, a person, a backpack, a boots, and an airplane.", "boxes_value": [[9.466613772, 26.057739264, 85.43554688900001, 217.388732928], [51.300109861, 53.4746703872, 85.43554688900001, 154.1304321536], [20.369689956, 2.9684448256, 100.30670164899999, 219.6993408], [9.466613772, 26.057739264, 59.502685549999995, 102.50183106559999], [49.361144986999996, 195.5070190592, 69.92993161, 217.388732928], [0, 0.5793457152, 104, 265]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048726.jpg", "text": "In the image , please describe the bounding box . Include the coordinates for each object you identify.", "boxes_value": [[107.2426846957, 121.8240731136, 173.81077461720002, 374.9707813376]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048726_crop.jpg", "text": "In the image , please describe the bounding box . Include the coordinates for each object you identify.", "boxes_value": [[17.242684695700007, 63.824073113599994, 83.81077461720002, 316.9707813376]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048726.jpg", "text": "In the image , please describe the bounding box . Include the coordinates for each object you identify. For your reference, objects involved in this region include a carpet, a person, a hat, two leather shoes, and a bottle.", "boxes_value": [[107.2426846957, 121.8240731136, 173.81077461720002, 374.9707813376], [0.7026977200000001, 273.0716552704, 643.2215576416, 462.8142700032], [77.0382080192, 124.3645019648, 173.6203002602, 381.4362182656], [107.2426846957, 121.8240731136, 173.81077461720002, 170.7873788928], [108.0993068473, 364.2708163584, 126.143257023, 381.1654979072], [129.4992368074, 361.5814701056, 158.2201954794, 374.9707813376], [156.51340334059998, 192.3770338816, 172.7443977699, 216.1824923648]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048726_crop.jpg", "text": "In the image , please describe the bounding box . Include the coordinates for each object you identify. For your reference, objects involved in this region include a carpet, a person, a hat, two leather shoes, and a bottle.", "boxes_value": [[17.242684695700007, 63.824073113599994, 83.81077461720002, 316.9707813376], [0, 215.07165527040002, 100, 380], [0, 66.3645019648, 83.6203002602, 323.4362182656], [17.242684695700007, 63.824073113599994, 83.81077461720002, 112.7873788928], [18.0993068473, 306.2708163584, 36.143257023000004, 323.1654979072], [39.499236807399996, 303.5814701056, 68.22019547939999, 316.9707813376], [66.51340334059998, 134.3770338816, 82.74439776989999, 158.1824923648]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048728.jpg", "text": "Help me understand the details within the area in photograph . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[138.1280517888, 1.4795532288, 449.9676513792, 357.2459716608]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048728_crop.jpg", "text": "Help me understand the details within the area in photograph . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[78.12805178880001, 1.4795532288, 389.9676513792, 357.2459716608]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048728.jpg", "text": "Help me understand the details within the area in photograph . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, a book, a glasses, and two gloves.", "boxes_value": [[138.1280517888, 1.4795532288, 449.9676513792, 357.2459716608], [245.73278807039998, 1.4795532288, 385.999145472, 332.3706054656], [138.1280517888, 187.1242675712, 257.011596672, 247.4948120064], [89.2659301632, 39.9166259712, 193.5486450432, 65.4761352704], [285.1246338048, 95.3685302784, 346.9055175936, 177.7431030272], [361.842895488, 292.8471679488, 449.9676513792, 357.2459716608]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048728_crop.jpg", "text": "Help me understand the details within the area in photograph . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a person, a book, a glasses, and two gloves.", "boxes_value": [[78.12805178880001, 1.4795532288, 389.9676513792, 357.2459716608], [185.73278807039998, 1.4795532288, 325.999145472, 332.3706054656], [78.12805178880001, 187.1242675712, 197.011596672, 247.4948120064], [29.265930163199997, 39.9166259712, 133.5486450432, 65.4761352704], [225.1246338048, 95.3685302784, 286.9055175936, 177.7431030272], [301.842895488, 292.8471679488, 389.9676513792, 357.2459716608]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048730.jpg", "text": "What does the selected region in the image encompass? Include the coordinates for each mentioned object.", "boxes_value": [[392.1423340032, 361.4615478272, 527.9985351936, 512.0996093952]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048730_crop.jpg", "text": "What does the selected region in the image encompass? Include the coordinates for each mentioned object.", "boxes_value": [[34.142334003200006, 38.46154782719998, 169.9985351936, 189]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048730.jpg", "text": "What does the selected region in the image encompass? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two cars, two suvs, a stop sign, and a street lights.", "boxes_value": [[392.1423340032, 361.4615478272, 527.9985351936, 512.0996093952], [363.2635497984, 498.4407348736, 415.1673583872, 511.7093506048], [392.1423340032, 491.4161376768, 429.99707028480003, 512.0996093952], [494.6961669888, 427.4719238144, 520.974487296, 463.1235351552], [485.41308595199996, 361.4615478272, 500.63293455359997, 511.3190918144], [433.89953610239996, 486.7330932736, 494.3889159936, 512.0996093952], [498.75341798399995, 489.6440429568, 527.9985351936, 512.0080566272]], "boxes_seq": [[0], [0], [1, 6], [2, 5], [3], [4]]}, {"image_path": "objects365_v1_00048730_crop.jpg", "text": "What does the selected region in the image encompass? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two cars, two suvs, a stop sign, and a street lights.", "boxes_value": [[34.142334003200006, 38.46154782719998, 169.9985351936, 189], [5.263549798399993, 175.44073487359998, 57.16735838720001, 188.70935060480002], [34.142334003200006, 168.41613767680002, 71.99707028480003, 189], [136.69616698879997, 104.47192381439999, 162.974487296, 140.1235351552], [127.41308595199996, 38.46154782719998, 142.63293455359997, 188.3190918144], [75.89953610239996, 163.7330932736, 136.3889159936, 189], [140.75341798399995, 166.64404295679998, 169.9985351936, 189]], "boxes_seq": [[0], [0], [1, 6], [2, 5], [3], [4]]}, {"image_path": "objects365_v1_00048731.jpg", "text": "In the image , could you provide a description for the coordinates ? Please point out the objects and their coordinates.", "boxes_value": [[664.338378914, 256.9486084096, 776.6101074431999, 390.409606912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048731_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Please point out the objects and their coordinates.", "boxes_value": [[28.33837891400003, 33.94860840960001, 130, 167.40960691200002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048731.jpg", "text": "In the image , could you provide a description for the coordinates ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a faucet, a sink, a pot, a bowl, and a plate.", "boxes_value": [[664.338378914, 256.9486084096, 776.6101074431999, 390.409606912], [678.9313965028, 222.0679931392, 740.6636962929999, 281.2414550528], [664.338378914, 276.8420410368, 706.5371094129999, 316.4721069568], [719.0064697508, 344.459899904, 776.6101074431999, 390.409606912], [702.5810546912, 283.7411499008, 751.7888183975999, 337.1097412096], [664.6693115194, 256.9486084096, 707.911743139, 270.72790528]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048731_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a faucet, a sink, a pot, a bowl, and a plate.", "boxes_value": [[28.33837891400003, 33.94860840960001, 130, 167.40960691200002], [42.9313965028, 0, 104.66369629299993, 58.241455052800006], [28.33837891400003, 53.842041036800026, 70.53710941299994, 93.47210695680002], [83.00646975079997, 121.459899904, 130, 167.40960691200002], [66.58105469120005, 60.741149900799996, 115.78881839759993, 114.10974120959997], [28.669311519400026, 33.94860840960001, 71.91174313900001, 47.727905280000016]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048732.jpg", "text": "Can you break down the region in the image for me? Provide the coordinates for each element you describe.", "boxes_value": [[180.40443441969998, 180.1834848768, 639.5389147345, 266.0047315456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048732_crop.jpg", "text": "Can you break down the region in the image for me? Provide the coordinates for each element you describe.", "boxes_value": [[115.40443441969998, 22.18348487680001, 574.5389147345, 108.00473154560001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048732.jpg", "text": "Can you break down the region in the image for me? Provide the coordinates for each element you describe. For your reference, objects involved in this region include an umbrella, three street lights, and a boat.", "boxes_value": [[180.40443441969998, 180.1834848768, 639.5389147345, 266.0047315456], [449.2792968959, 219.8750000128, 489.6555175878, 235.4427490304], [338.9891850422, 180.1834848768, 355.3360891476, 266.0047315456], [630.3053223758, 194.9681336832, 639.5389147345, 242.7419375616], [608.6264534399, 199.3841995776, 620.2688090137, 233.5083452416], [180.40443441969998, 247.9873051136, 250.55775369030002, 262.9952291328]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048732_crop.jpg", "text": "Can you break down the region in the image for me? Provide the coordinates for each element you describe. For your reference, objects involved in this region include an umbrella, three street lights, and a boat.", "boxes_value": [[115.40443441969998, 22.18348487680001, 574.5389147345, 108.00473154560001], [384.2792968959, 61.8750000128, 424.6555175878, 77.4427490304], [273.9891850422, 22.18348487680001, 290.3360891476, 108.00473154560001], [565.3053223758, 36.968133683199994, 574.5389147345, 84.7419375616], [543.6264534399, 41.384199577599986, 555.2688090137, 75.50834524160001], [115.40443441969998, 89.9873051136, 185.55775369030002, 104.99522913279998]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048735.jpg", "text": "What is taking place within the specified area in this capture ? Please mention the objects and their locations.", "boxes_value": [[5.4752807424, 418.84167480540003, 208.0337524224, 544.826538058]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048735_crop.jpg", "text": "What is taking place within the specified area in this capture ? Please mention the objects and their locations.", "boxes_value": [[5.4752807424, 31.841674805400032, 208.0337524224, 157.82653805799998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048735.jpg", "text": "What is taking place within the specified area in this capture ? Please mention the objects and their locations. For your reference, objects involved in this region include two people, a carriage, and two horses.", "boxes_value": [[5.4752807424, 418.84167480540003, 208.0337524224, 544.826538058], [188.3817748992, 437.96704099939996, 208.0337524224, 487.21533201000005], [87.659362816, 435.92626950619996, 99.932495104, 466.45947266380006], [5.4752807424, 418.84167480540003, 182.6846923776, 544.826538058], [119.4471435776, 461.70544431, 180.9428100608, 538.1105957128], [106.1988525568, 455.6549072548, 153.2598877184, 535.4777831876]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048735_crop.jpg", "text": "What is taking place within the specified area in this capture ? Please mention the objects and their locations. For your reference, objects involved in this region include two people, a carriage, and two horses.", "boxes_value": [[5.4752807424, 31.841674805400032, 208.0337524224, 157.82653805799998], [188.3817748992, 50.967040999399956, 208.0337524224, 100.21533201000005], [87.659362816, 48.92626950619996, 99.932495104, 79.45947266380006], [5.4752807424, 31.841674805400032, 182.6846923776, 157.82653805799998], [119.4471435776, 74.70544431000002, 180.9428100608, 151.11059571279998], [106.1988525568, 68.65490725479998, 153.2598877184, 148.47778318760004]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048736.jpg", "text": "Please provide details for the area marked as in this photographic . Include the coordinates for each mentioned object.", "boxes_value": [[325.3175659145, 412.1770019328, 439.96679686420003, 448.1760253952]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048736_crop.jpg", "text": "Please provide details for the area marked as in this photographic . Include the coordinates for each mentioned object.", "boxes_value": [[29.317565914499994, 9.17700193280001, 143.96679686420003, 45.176025395199986]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048736.jpg", "text": "Please provide details for the area marked as in this photographic . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, and three sneakers.", "boxes_value": [[325.3175659145, 412.1770019328, 439.96679686420003, 448.1760253952], [366.36010740489996, 118.7046508544, 502.318603548, 464.9015502848], [240.3497314469, 108.0932617216, 475.1269531129, 441.6891479552], [325.3175659145, 412.1770019328, 353.0994872837, 436.0458984448], [386.3594970988, 419.2202758656, 409.8371582052, 442.6979369984], [406.31542968829996, 415.3073120256, 439.96679686420003, 448.1760253952]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048736_crop.jpg", "text": "Please provide details for the area marked as in this photographic . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, and three sneakers.", "boxes_value": [[29.317565914499994, 9.17700193280001, 143.96679686420003, 45.176025395199986], [70.36010740489996, 0, 172, 54], [0, 0, 172, 38.689147955199985], [29.317565914499994, 9.17700193280001, 57.0994872837, 33.0458984448], [90.35949709879998, 16.2202758656, 113.83715820520001, 39.697936998399996], [110.31542968829996, 12.307312025600027, 143.96679686420003, 45.176025395199986]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048737.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Give coordinates for the items you reference.", "boxes_value": [[142.15216064999998, 192.14575196340002, 604.8143310749999, 270.1082763558]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048737_crop.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Give coordinates for the items you reference.", "boxes_value": [[116.15216064999998, 20.145751963400016, 578.8143310749999, 98.1082763558]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048737.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Give coordinates for the items you reference. For your reference, objects involved in this region include a truck, a suv, and three cars.", "boxes_value": [[142.15216064999998, 192.14575196340002, 604.8143310749999, 270.1082763558], [142.15216064999998, 198.3723144384, 227.538207975, 270.1082763558], [235.37976074999997, 198.98547365160002, 292.303833, 256.02398681759996], [304.03918454999996, 192.41259764400002, 356.750488275, 249.53009032320003], [437.267456025, 192.14575196340002, 489.87207029999996, 247.4481811542], [542.29748535, 193.46612550839998, 604.8143310749999, 253.26489256739998]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048737_crop.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Give coordinates for the items you reference. For your reference, objects involved in this region include a truck, a suv, and three cars.", "boxes_value": [[116.15216064999998, 20.145751963400016, 578.8143310749999, 98.1082763558], [116.15216064999998, 26.372314438399997, 201.538207975, 98.1082763558], [209.37976074999997, 26.985473651600017, 266.303833, 84.02398681759996], [278.03918454999996, 20.412597644000016, 330.750488275, 77.53009032320003], [411.267456025, 20.145751963400016, 463.87207029999996, 75.4481811542], [516.29748535, 21.466125508399983, 578.8143310749999, 81.26489256739998]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048738.jpg", "text": "Offer a thorough description of the area within the illustration . Give coordinates for the items you reference.", "boxes_value": [[412.53454588880004, 186.3306274304, 596.316284152, 292.516906752]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048738_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Give coordinates for the items you reference.", "boxes_value": [[46.53454588880004, 27.3306274304, 230.31628415199998, 133.516906752]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048738.jpg", "text": "Offer a thorough description of the area within the illustration . Give coordinates for the items you reference. For your reference, objects involved in this region include two people, a book, a laptop, and a chair.", "boxes_value": [[412.53454588880004, 186.3306274304, 596.316284152, 292.516906752], [368.84533689280005, 171.5721435648, 663.8394775352, 505.6224365056], [359.0001220416, 129.3570556416, 493.1582031008, 426.1815795712], [428.6271972336, 247.8837280256, 498.3682861392, 267.8348388864], [490.4042968984, 186.3306274304, 596.316284152, 292.516906752], [412.53454588880004, 202.823852544, 426.2342529184, 227.4833374208]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048738_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Give coordinates for the items you reference. For your reference, objects involved in this region include two people, a book, a laptop, and a chair.", "boxes_value": [[46.53454588880004, 27.3306274304, 230.31628415199998, 133.516906752], [2.8453368928000486, 12.572143564800001, 276, 160], [0, 0, 127.1582031008, 160], [62.627197233599986, 88.88372802559999, 132.3682861392, 108.83483888640001], [124.40429689839999, 27.3306274304, 230.31628415199998, 133.516906752], [46.53454588880004, 43.823852544000005, 60.23425291839999, 68.48333742080001]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048739.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for each element you describe.", "boxes_value": [[75.6390991161, 199.6272583168, 304.2450561826, 402.9215088128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048739_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for each element you describe.", "boxes_value": [[57.6390991161, 51.62725831680001, 286.2450561826, 254.92150881280003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048739.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two chairs, and three lamps.", "boxes_value": [[75.6390991161, 199.6272583168, 304.2450561826, 402.9215088128], [105.8449706835, 354.0740356608, 128.1185913067, 383.2321777152], [288.2251586806, 323.2435913216, 304.2450561826, 402.9215088128], [75.6390991161, 227.0305175552, 135.3872070084, 237.3629150208], [106.1870116878, 199.6272583168, 194.6861572306, 218.9442748928], [202.46954347550002, 230.5975341568, 256.0679931318, 240.2177734144]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048739_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two chairs, and three lamps.", "boxes_value": [[57.6390991161, 51.62725831680001, 286.2450561826, 254.92150881280003], [87.8449706835, 206.0740356608, 110.11859130670001, 235.2321777152], [270.2251586806, 175.24359132159998, 286.2450561826, 254.92150881280003], [57.6390991161, 79.03051755519999, 117.3872070084, 89.36291502079999], [88.1870116878, 51.62725831680001, 176.6861572306, 70.9442748928], [184.46954347550002, 82.59753415680001, 238.0679931318, 92.2177734144]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048741.jpg", "text": "What can you tell me about the area within the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[445.4822998272, 149.8851318272, 543.5601806592, 313.3779907072]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048741_crop.jpg", "text": "What can you tell me about the area within the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[25.482299827199995, 40.88513182720001, 123.5601806592, 204.3779907072]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048741.jpg", "text": "What can you tell me about the area within the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a desk, a flower, a vase, a barrel, a trash bin can, a bottle, and a tripod.", "boxes_value": [[445.4822998272, 149.8851318272, 543.5601806592, 313.3779907072], [490.6072998144, 227.2963256832, 556.0299072, 315.082824704], [497.87536619519994, 128.0270385664, 550.8543700992, 230.892089856], [513.231689472, 220.164489728, 543.5601806592, 234.7528686592], [445.4822998272, 149.8851318272, 459.09899903999997, 200.77691648], [463.8911132928, 254.6380004864, 491.951538048, 313.3779907072], [502.11767577599994, 198.5787963904, 516.5399169791999, 232.7854614016], [441.3548583936, 6.8963012608, 491.07690432, 336.9949950976]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00048741_crop.jpg", "text": "What can you tell me about the area within the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a desk, a flower, a vase, a barrel, a trash bin can, a bottle, and a tripod.", "boxes_value": [[25.482299827199995, 40.88513182720001, 123.5601806592, 204.3779907072], [70.6072998144, 118.2963256832, 136.02990720000003, 206.08282470400002], [77.87536619519994, 19.027038566399995, 130.85437009919997, 121.89208985600001], [93.23168947199997, 111.164489728, 123.5601806592, 125.7528686592], [25.482299827199995, 40.88513182720001, 39.09899903999997, 91.77691648000001], [43.891113292800014, 145.6380004864, 71.95153804799997, 204.3779907072], [82.11767577599994, 89.57879639039999, 96.53991697919992, 123.7854614016], [21.35485839360001, 0, 71.07690431999998, 227.9949950976]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00048743.jpg", "text": "Fill me in on the details of the rectangular box within the image . Provide the coordinates for each element you describe.", "boxes_value": [[118.18579100800001, 206.5226440192, 456.235351543, 279.4280395264]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048743_crop.jpg", "text": "Fill me in on the details of the rectangular box within the image . Provide the coordinates for each element you describe.", "boxes_value": [[85.18579100800001, 18.522644019199987, 423.235351543, 91.42803952640003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048743.jpg", "text": "Fill me in on the details of the rectangular box within the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include three cabinets, a wine glass, and a bottle.", "boxes_value": [[118.18579100800001, 206.5226440192, 456.235351543, 279.4280395264], [311.148437489, 238.8619384832, 407.916076643, 265.3820190208], [220.6793213155, 238.0957641728, 314.0114135585, 265.9545898496], [118.18579100800001, 238.0957641728, 223.542297385, 263.6642456064], [384.2932739485, 218.9087524352, 401.191528307, 279.4280395264], [433.3990478315, 206.5226440192, 456.235351543, 260.4535522304]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048743_crop.jpg", "text": "Fill me in on the details of the rectangular box within the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include three cabinets, a wine glass, and a bottle.", "boxes_value": [[85.18579100800001, 18.522644019199987, 423.235351543, 91.42803952640003], [278.148437489, 50.86193848319999, 374.916076643, 77.38201902079999], [187.6793213155, 50.09576417279999, 281.0114135585, 77.95458984959998], [85.18579100800001, 50.09576417279999, 190.542297385, 75.66424560640002], [351.2932739485, 30.9087524352, 368.191528307, 91.42803952640003], [400.3990478315, 18.522644019199987, 423.235351543, 72.45355223040002]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048746.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Please point out the objects and their coordinates.", "boxes_value": [[166.0788106227, 125.2798461952, 299.4315576766, 399.4600829952]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048746_crop.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Please point out the objects and their coordinates.", "boxes_value": [[34.07881062269999, 69.2798461952, 167.4315576766, 343.4600829952]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048746.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Please point out the objects and their coordinates. For your reference, objects involved in this region include two pictures, a hat, a handbag, and a sneakers.", "boxes_value": [[166.0788106227, 125.2798461952, 299.4315576766, 399.4600829952], [220.37451168869998, 125.2798461952, 256.8922729795, 187.0444946432], [251.93304440269998, 133.3948974592, 269.5566406575, 174.8718872064], [166.0788106227, 230.9513629184, 240.5810495454, 294.1843556352], [266.2498881664, 244.0988168192, 299.4315576766, 287.2975940608], [265.3013915868, 384.3551025152, 290.15161133730004, 399.4600829952]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048746_crop.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Please point out the objects and their coordinates. For your reference, objects involved in this region include two pictures, a hat, a handbag, and a sneakers.", "boxes_value": [[34.07881062269999, 69.2798461952, 167.4315576766, 343.4600829952], [88.37451168869998, 69.2798461952, 124.89227297949998, 131.0444946432], [119.93304440269998, 77.3948974592, 137.5566406575, 118.8718872064], [34.07881062269999, 174.9513629184, 108.58104954539999, 238.1843556352], [134.2498881664, 188.0988168192, 167.4315576766, 231.2975940608], [133.30139158679998, 328.3551025152, 158.15161133730004, 343.4600829952]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048747.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Include the coordinates for each object you identify.", "boxes_value": [[326.0125732608, 169.1427001856, 521.2459716864, 428.3731689472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048747_crop.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Include the coordinates for each object you identify.", "boxes_value": [[49.012573260800025, 65.1427001856, 244.24597168640003, 324.3731689472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048747.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a flower, a vase, a lantern, and two handbags.", "boxes_value": [[326.0125732608, 169.1427001856, 521.2459716864, 428.3731689472], [329.978637696, 314.6916504064, 476.91625973760006, 404.1318969856], [349.20031741440005, 388.8123168768, 463.8596191488, 428.3731689472], [402.4985351424, 169.1427001856, 521.2459716864, 286.962524416], [326.0125732608, 306.53106688, 347.1782226432, 329.94152832], [431.8408203264, 299.4758300672, 456.5340576, 318.3966674944]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048747_crop.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a flower, a vase, a lantern, and two handbags.", "boxes_value": [[49.012573260800025, 65.1427001856, 244.24597168640003, 324.3731689472], [52.97863769600002, 210.69165040640002, 199.91625973760006, 300.1318969856], [72.20031741440005, 284.8123168768, 186.8596191488, 324.3731689472], [125.49853514239999, 65.1427001856, 244.24597168640003, 182.962524416], [49.012573260800025, 202.53106688000003, 70.1782226432, 225.94152831999997], [154.84082032639998, 195.47583006719998, 179.53405759999998, 214.3966674944]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048748.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Include the coordinates for each object you identify.", "boxes_value": [[0.1375122148, 0, 168.7902831943, 209.6846923776]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048748_crop.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Include the coordinates for each object you identify.", "boxes_value": [[0.1375122148, 0, 168.7902831943, 209.6846923776]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048748.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Include the coordinates for each object you identify. For your reference, objects involved in this region include a cabinet, a chair, a desk, and two books.", "boxes_value": [[0.1375122148, 0, 168.7902831943, 209.6846923776], [0.1375122148, 0, 94.9246826414, 209.6846923776], [79.48870847639999, 13.96545408, 168.7902831943, 154.4979248128], [76.66864013419999, 0, 222.371215813, 116.42724608], [11.709167453400001, 95.8330688512, 61.3973388401, 115.7083129856], [0.1823730208, 22.0823364096, 46.8827514821, 36.0726928896]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048748_crop.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Include the coordinates for each object you identify. For your reference, objects involved in this region include a cabinet, a chair, a desk, and two books.", "boxes_value": [[0.1375122148, 0, 168.7902831943, 209.6846923776], [0.1375122148, 0, 94.9246826414, 209.6846923776], [79.48870847639999, 13.96545408, 168.7902831943, 154.4979248128], [76.66864013419999, 0, 210, 116.42724608], [11.709167453400001, 95.8330688512, 61.3973388401, 115.7083129856], [0.1823730208, 22.0823364096, 46.8827514821, 36.0726928896]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048752.jpg", "text": "Help me understand the details within the area in photograph . Please point out the objects and their coordinates.", "boxes_value": [[188.6168212992, 327.718872064, 760.7499999744, 451.7512817152]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048752_crop.jpg", "text": "Help me understand the details within the area in photograph . Please point out the objects and their coordinates.", "boxes_value": [[143.6168212992, 31.71887206399998, 715.7499999744, 155.75128171519998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048752.jpg", "text": "Help me understand the details within the area in photograph . Please point out the objects and their coordinates. For your reference, objects involved in this region include a bench, two umbrellas, two cars, a stroller, and a bicycle.", "boxes_value": [[188.6168212992, 327.718872064, 760.7499999744, 451.7512817152], [505.10070804480006, 403.034851072, 539.2641601536, 450.8637085184], [368.3682861312, 329.6265259008, 392.15026851839997, 356.8422851584], [335.9383544832, 327.718872064, 366.0791015424, 355.4432983552], [188.6168212992, 345.6807861248, 227.8299560448, 366.6977539072], [285.9102782976, 352.8377685504, 313.7219238144, 372.6212768768], [616.1055908352, 402.3544922112, 681.1508788992, 483.5172729344], [724.6508788992, 395.9616699392, 760.7499999744, 451.7512817152]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6], [7]]}, {"image_path": "objects365_v1_00048752_crop.jpg", "text": "Help me understand the details within the area in photograph . Please point out the objects and their coordinates. For your reference, objects involved in this region include a bench, two umbrellas, two cars, a stroller, and a bicycle.", "boxes_value": [[143.6168212992, 31.71887206399998, 715.7499999744, 155.75128171519998], [460.10070804480006, 107.03485107199998, 494.26416015359996, 154.86370851840002], [323.3682861312, 33.626525900800004, 347.15026851839997, 60.84228515839999], [290.9383544832, 31.71887206399998, 321.0791015424, 59.4432983552], [143.6168212992, 49.68078612480002, 182.8299560448, 70.69775390720002], [240.91027829759997, 56.83776855040003, 268.7219238144, 76.62127687679998], [571.1055908352, 106.35449221120001, 636.1508788992, 186], [679.6508788992, 99.96166993920002, 715.7499999744, 155.75128171519998]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6], [7]]}, {"image_path": "objects365_v1_00048753.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Give coordinates for the items you reference.", "boxes_value": [[306.61633299199997, 258.856689456, 381.394653312, 364.510375968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048753_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Give coordinates for the items you reference.", "boxes_value": [[19.61633299199997, 26.856689456000026, 94.394653312, 132.510375968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048753.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Give coordinates for the items you reference. For your reference, objects involved in this region include three chairs, two people, and a boat.", "boxes_value": [[306.61633299199997, 258.856689456, 381.394653312, 364.510375968], [298.344909696, 312.56805417600003, 347.766235328, 356.019165024], [285.468078592, 304.72998048, 328.85742188800003, 347.55950928], [306.61633299199997, 337.40368651200004, 354.95117190400003, 364.510375968], [320.16436768, 258.856689456, 381.394653312, 360.30328368], [311.831298816, 229.50958252799998, 344.620300288, 284.580627456], [229.33825683199998, 257.60198976, 525.9686279040001, 455.132324208]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048753_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Give coordinates for the items you reference. For your reference, objects involved in this region include three chairs, two people, and a boat.", "boxes_value": [[19.61633299199997, 26.856689456000026, 94.394653312, 132.510375968], [11.344909696000002, 80.56805417600003, 60.76623532799999, 124.01916502400002], [0, 72.72998048, 41.85742188800003, 115.55950927999999], [19.61633299199997, 105.40368651200004, 67.95117190400003, 132.510375968], [33.16436768, 26.856689456000026, 94.394653312, 128.30328368], [24.831298816000015, 0, 57.62030028800001, 52.580627456], [0, 25.60198975999998, 113, 158]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048755.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Provide the coordinates for each element you describe.", "boxes_value": [[574.319946304, 338.960021952, 638.3199463039999, 475.280029296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048755_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Provide the coordinates for each element you describe.", "boxes_value": [[16.31994630400004, 34.96002195199998, 80.31994630399993, 171.280029296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048755.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two chairs, a flower, a vase, and a napkin.", "boxes_value": [[574.319946304, 338.960021952, 638.3199463039999, 475.280029296], [595.439941376, 338.960021952, 638.3199463039999, 434.320007328], [510.319946304, 324.88000488, 638.95996096, 415.119995136], [541.6800536960001, 355.5999756, 628.719970688, 430.479980448], [574.319946304, 421.520019552, 592.239990208, 475.280029296], [553.839965824, 425.359985328, 615.280029312, 475.919982912]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048755_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two chairs, a flower, a vase, and a napkin.", "boxes_value": [[16.31994630400004, 34.96002195199998, 80.31994630399993, 171.280029296], [37.43994137599998, 34.96002195199998, 80.31994630399993, 130.32000732799997], [0, 20.88000488, 80.95996095999999, 111.119995136], [0, 51.59997559999999, 70.71997068799999, 126.47998044799999], [16.31994630400004, 117.52001955200001, 34.239990207999995, 171.280029296], [0, 121.359985328, 57.28002931200001, 171.91998291200002]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048756.jpg", "text": "Could you describe the content of the bbox in the image ? Give coordinates for the items you reference.", "boxes_value": [[240.61315917119998, 41.18737792, 652.7363281596, 240.6093139456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048756_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Give coordinates for the items you reference.", "boxes_value": [[103.61315917119998, 41.18737792, 515.7363281596, 240.6093139456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048756.jpg", "text": "Could you describe the content of the bbox in the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include four glasses, a hat, and two cameras.", "boxes_value": [[240.61315917119998, 41.18737792, 652.7363281596, 240.6093139456], [346.0416259644, 224.2494507008, 392.6423339652, 240.6093139456], [443.7049560912, 191.5297241088, 471.9628906236, 212.3513794048], [508.22155763280006, 90.5162353664, 571.5970459116, 123.9210815488], [628.2225341724, 41.18737792, 652.7363281596, 54.5585327104], [240.61315917119998, 156.147705088, 294.4899902136, 169.9622192128], [552.1840820016, 208.3772582912, 572.2313232528, 233.078308096], [274.1848144884, 55.1852417024, 303.71008302, 72.4606933504]], "boxes_seq": [[0], [0], [1, 2, 4, 5], [3], [6, 7]]}, {"image_path": "objects365_v1_00048756_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include four glasses, a hat, and two cameras.", "boxes_value": [[103.61315917119998, 41.18737792, 515.7363281596, 240.6093139456], [209.0416259644, 224.2494507008, 255.6423339652, 240.6093139456], [306.7049560912, 191.5297241088, 334.9628906236, 212.3513794048], [371.22155763280006, 90.5162353664, 434.59704591160005, 123.9210815488], [491.22253417239995, 41.18737792, 515.7363281596, 54.5585327104], [103.61315917119998, 156.147705088, 157.48999021359998, 169.9622192128], [415.1840820016, 208.3772582912, 435.23132325280005, 233.078308096], [137.18481448839998, 55.1852417024, 166.71008302, 72.4606933504]], "boxes_seq": [[0], [0], [1, 2, 4, 5], [3], [6, 7]]}, {"image_path": "objects365_v1_00048757.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for each element you describe.", "boxes_value": [[27.789342787800003, 11.4002837504, 531.5427407037, 224.9326782464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048757_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for each element you describe.", "boxes_value": [[27.789342787800003, 11.4002837504, 531.5427407037, 224.9326782464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048757.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include three people, and four hats.", "boxes_value": [[27.789342787800003, 11.4002837504, 531.5427407037, 224.9326782464], [449.8686523232, 11.9404296704, 557.8533935205, 263.4215088128], [298.40026859060004, 10.886108416, 418.70532227260003, 243.1290893312], [163.815063445, 22.7521972736, 291.6041259494, 224.9326782464], [27.789342787800003, 50.25153152, 88.62576754460001, 81.1711476224], [204.6948965743, 29.3470532608, 258.8072346101, 75.6318203392], [300.7415884583, 11.4002837504, 351.8881825844, 60.5110606848], [460.2610096667, 12.6552506368, 531.5427407037, 55.1565644288]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6, 7]]}, {"image_path": "objects365_v1_00048757_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include three people, and four hats.", "boxes_value": [[27.789342787800003, 11.4002837504, 531.5427407037, 224.9326782464], [449.8686523232, 11.9404296704, 557.8533935205, 263.4215088128], [298.40026859060004, 10.886108416, 418.70532227260003, 243.1290893312], [163.815063445, 22.7521972736, 291.6041259494, 224.9326782464], [27.789342787800003, 50.25153152, 88.62576754460001, 81.1711476224], [204.6948965743, 29.3470532608, 258.8072346101, 75.6318203392], [300.7415884583, 11.4002837504, 351.8881825844, 60.5110606848], [460.2610096667, 12.6552506368, 531.5427407037, 55.1565644288]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6, 7]]}, {"image_path": "objects365_v1_00048758.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Please mention the objects and their locations.", "boxes_value": [[409.3989257472, 368.4087524352, 628.3148193024, 512.0845947392]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048758_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Please mention the objects and their locations.", "boxes_value": [[55.398925747199996, 36.40875243519997, 274.3148193024, 180]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048758.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a desk, three people, and a backpack.", "boxes_value": [[409.3989257472, 368.4087524352, 628.3148193024, 512.0845947392], [493.7670898176, 439.4025268736, 520.5760498176, 512.0845947392], [458.5687256064, 368.4087524352, 509.1141357312, 511.9360962048], [508.303222656, 387.8700561408, 591.8248290816, 511.6657714688], [553.442749056, 395.1680908288, 628.3148193024, 511.9360962048], [409.3989257472, 389.2974853632, 440.26379397119996, 461.3942260736]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048758_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a desk, three people, and a backpack.", "boxes_value": [[55.398925747199996, 36.40875243519997, 274.3148193024, 180], [139.76708981759998, 107.40252687359998, 166.57604981760005, 180], [104.5687256064, 36.40875243519997, 155.1141357312, 179.93609620479998], [154.303222656, 55.87005614079999, 237.8248290816, 179.66577146880002], [199.44274905600003, 63.16809082880002, 274.3148193024, 179.93609620479998], [55.398925747199996, 57.297485363199996, 86.26379397119996, 129.3942260736]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048759.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Please mention the objects and their locations.", "boxes_value": [[54.2127685604, 336.279968256, 190.9293823038, 501.647705088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048759_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Please mention the objects and their locations.", "boxes_value": [[34.2127685604, 42.27996825600002, 170.9293823038, 207.647705088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048759.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Please mention the objects and their locations. For your reference, objects involved in this region include a person, a flag, a suv, a street lights, and a tricycle.", "boxes_value": [[54.2127685604, 336.279968256, 190.9293823038, 501.647705088], [75.4266357674, 412.6322021376, 87.4523315324, 439.890502912], [140.3182983022, 354.6612548608, 160.7849731422, 384.9821777408], [118.15917968939999, 413.36511232, 190.9293823038, 466.3184204288], [111.2116088806, 336.279968256, 126.8647460614, 431.6895752192], [54.2127685604, 431.2211303936, 120.95422365640002, 501.647705088]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048759_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Please mention the objects and their locations. For your reference, objects involved in this region include a person, a flag, a suv, a street lights, and a tricycle.", "boxes_value": [[34.2127685604, 42.27996825600002, 170.9293823038, 207.647705088], [55.4266357674, 118.63220213760002, 67.4523315324, 145.890502912], [120.31829830219999, 60.6612548608, 140.7849731422, 90.98217774080001], [98.15917968939999, 119.36511231999998, 170.9293823038, 172.31842042879998], [91.2116088806, 42.27996825600002, 106.8647460614, 137.68957521919998], [34.2127685604, 137.2211303936, 100.95422365640002, 207.647705088]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048762.jpg", "text": "I'd like a thorough description of the area in the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[318.94720456109997, 139.7657470464, 532.0380859542, 451.442260736]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048762_crop.jpg", "text": "I'd like a thorough description of the area in the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[53.947204561099966, 78.76574704640001, 267.0380859542, 390.442260736]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048762.jpg", "text": "I'd like a thorough description of the area in the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, a boots, four gloves, and a hat.", "boxes_value": [[318.94720456109997, 139.7657470464, 532.0380859542, 451.442260736], [326.81030271149996, 168.369506816, 483.2866210645, 478.1769409024], [318.94720456109997, 139.7657470464, 532.0380859542, 451.442260736], [318.9983520406, 395.3471069184, 353.24865721569995, 448.9713134592], [325.2257080151, 296.0557861376, 348.4051513779, 325.4626464768], [458.42138672010003, 313.0079955968, 482.9847412115, 345.5285033984], [397.5318603607, 141.1071166976, 421.056274388, 168.0496216064], [493.05786132459997, 311.0064697344, 531.9536132655, 334.0263671808], [394.23913573339996, 169.0187378176, 425.43457027989996, 195.0148925952]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5, 6, 7], [8]]}, {"image_path": "objects365_v1_00048762_crop.jpg", "text": "I'd like a thorough description of the area in the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, a boots, four gloves, and a hat.", "boxes_value": [[53.947204561099966, 78.76574704640001, 267.0380859542, 390.442260736], [61.81030271149996, 107.36950681600001, 218.28662106450003, 417.1769409024], [53.947204561099966, 78.76574704640001, 267.0380859542, 390.442260736], [53.998352040600025, 334.3471069184, 88.24865721569995, 387.9713134592], [60.2257080151, 235.05578613760002, 83.40515137789998, 264.4626464768], [193.42138672010003, 252.00799559680001, 217.98474121150002, 284.5285033984], [132.53186036070002, 80.10711669759999, 156.05627438800002, 107.0496216064], [228.05786132459997, 250.0064697344, 266.95361326550005, 273.0263671808], [129.23913573339996, 108.0187378176, 160.43457027989996, 134.0148925952]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5, 6, 7], [8]]}, {"image_path": "objects365_v1_00048763.jpg", "text": "Can you generate a description of the contents within the selected region in ? Give coordinates for the items you reference.", "boxes_value": [[340.771972682, 259.0779419136, 399.3162841886, 460.9668579328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048763_crop.jpg", "text": "Can you generate a description of the contents within the selected region in ? Give coordinates for the items you reference.", "boxes_value": [[14.771972682000012, 51.0779419136, 73.31628418859998, 252.9668579328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048763.jpg", "text": "Can you generate a description of the contents within the selected region in ? Give coordinates for the items you reference. For your reference, objects involved in this region include two people, a backpack, a handbag, a sandals, and a train.", "boxes_value": [[340.771972682, 259.0779419136, 399.3162841886, 460.9668579328], [340.771972682, 259.0779419136, 399.3162841886, 460.9668579328], [381.220825205, 261.5616455168, 419.89550782379996, 381.8434448384], [379.3760986578, 281.0754394624, 407.2893066416, 325.0814819328], [373.5881347902, 293.9672241152, 405.09729001860006, 361.1265869312], [360.51062008919996, 436.3904418816, 375.6485595838, 460.5594482176], [31.8475952466, 209.2442016768, 708.7020263348, 409.7643432448]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048763_crop.jpg", "text": "Can you generate a description of the contents within the selected region in ? Give coordinates for the items you reference. For your reference, objects involved in this region include two people, a backpack, a handbag, a sandals, and a train.", "boxes_value": [[14.771972682000012, 51.0779419136, 73.31628418859998, 252.9668579328], [14.771972682000012, 51.0779419136, 73.31628418859998, 252.9668579328], [55.22082520499998, 53.56164551680001, 87, 173.8434448384], [53.376098657800014, 73.07543946240003, 81.28930664159998, 117.08148193279999], [47.5881347902, 85.9672241152, 79.09729001860006, 153.1265869312], [34.51062008919996, 228.3904418816, 49.64855958380002, 252.5594482176], [0, 1.244201676800003, 87, 201.7643432448]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048766.jpg", "text": "Tell me what you see within the designated area in the picture . Please mention the objects and their locations.", "boxes_value": [[417.6750488064, 289.0336303616, 457.6380615356, 405.3795776512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048766_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Please mention the objects and their locations.", "boxes_value": [[10.675048806400014, 30.03363036159999, 50.6380615356, 146.3795776512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048766.jpg", "text": "Tell me what you see within the designated area in the picture . Please mention the objects and their locations. For your reference, objects involved in this region include three people, a leather shoes, and a boots.", "boxes_value": [[417.6750488064, 289.0336303616, 457.6380615356, 405.3795776512], [349.2283935804, 86.6788940288, 553.19213871, 456.8731079168], [332.4013672032, 77.5005493248, 458.8588867166, 426.7884521472], [418.8607177682, 289.0336303616, 428.94616699470004, 354.8544922112], [431.87048343649997, 381.3171997184, 457.6380615356, 405.3795776512], [417.6750488064, 320.8723144704, 428.5551757888, 354.2791748096]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048766_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Please mention the objects and their locations. For your reference, objects involved in this region include three people, a leather shoes, and a boots.", "boxes_value": [[10.675048806400014, 30.03363036159999, 50.6380615356, 146.3795776512], [0, 0, 60, 175], [0, 0, 51.85888671660001, 167.78845214720002], [11.860717768200004, 30.03363036159999, 21.946166994700036, 95.85449221120001], [24.870483436499967, 122.31719971839999, 50.6380615356, 146.3795776512], [10.675048806400014, 61.8723144704, 21.5551757888, 95.27917480960002]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048767.jpg", "text": "Please, can you help me understand what's inside the region in image ? Include the coordinates for each mentioned object.", "boxes_value": [[170.9067382784, 35.1597289929, 419.2775268352, 259.1503906292]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048767_crop.jpg", "text": "Please, can you help me understand what's inside the region in image ? Include the coordinates for each mentioned object.", "boxes_value": [[62.90673827840001, 35.1597289929, 311.2775268352, 259.1503906292]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048767.jpg", "text": "Please, can you help me understand what's inside the region in image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a baseball bat, a person, a helmet, and two hats.", "boxes_value": [[170.9067382784, 35.1597289929, 419.2775268352, 259.1503906292], [170.9067382784, 35.1597289929, 236.9684448256, 259.1503906292], [80.241760256, 145.7477417101, 424.316711424, 675.9040527038001], [207.8941650432, 146.0114746434, 310.1425170944, 221.9136352389], [399.9211425792, 98.3733520612, 419.2775268352, 114.9645385962], [279.4163208192, 101.35015867429999, 302.7794189312, 119.1264648744]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048767_crop.jpg", "text": "Please, can you help me understand what's inside the region in image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a baseball bat, a person, a helmet, and two hats.", "boxes_value": [[62.90673827840001, 35.1597289929, 311.2775268352, 259.1503906292], [62.90673827840001, 35.1597289929, 128.9684448256, 259.1503906292], [0, 145.7477417101, 316.316711424, 315], [99.89416504319999, 146.0114746434, 202.14251709439998, 221.9136352389], [291.9211425792, 98.3733520612, 311.2775268352, 114.9645385962], [171.4163208192, 101.35015867429999, 194.77941893119998, 119.1264648744]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048768.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[223.7385253598, 252.6591186432, 671.4221191656001, 293.0563354624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048768_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[112.73852535980001, 10.659118643200003, 560.4221191656001, 51.0563354624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048768.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two chairs, a flower, and two moniters.", "boxes_value": [[223.7385253598, 252.6591186432, 671.4221191656001, 293.0563354624], [551.1353760022, 266.86163328, 626.2059325939999, 325.1934814208], [580.0477294986, 252.6591186432, 631.2781982694, 293.0563354624], [638.8143310736, 254.9779052544, 671.4221191656001, 271.2817993216], [304.7455444044, 258.8454589952, 381.8073730196, 279.3602294784], [223.7385253598, 258.3194579968, 301.5894165174, 279.6232300032]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048768_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two chairs, a flower, and two moniters.", "boxes_value": [[112.73852535980001, 10.659118643200003, 560.4221191656001, 51.0563354624], [440.1353760022, 24.861633279999978, 515.2059325939999, 61], [469.0477294986, 10.659118643200003, 520.2781982694, 51.0563354624], [527.8143310736, 12.977905254400014, 560.4221191656001, 29.281799321599976], [193.7455444044, 16.845458995199976, 270.8073730196, 37.360229478400015], [112.73852535980001, 16.319457996799997, 190.58941651740002, 37.62323000319998]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048769.jpg", "text": "Describe the visual elements within the selected area of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[53.7377929728, 30.4154052443, 250.7581787136, 351.5971679967]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048769_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[49.7377929728, 30.4154052443, 246.7581787136, 351.5971679967]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048769.jpg", "text": "Describe the visual elements within the selected area of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three lamps, and two potted plants.", "boxes_value": [[53.7377929728, 30.4154052443, 250.7581787136, 351.5971679967], [196.7872314368, 30.4154052443, 237.1170043904, 267.9130859178], [213.7445068288, 168.5850830158, 245.2746582016, 327.606811506], [226.7678222848, 252.208557109, 250.7581787136, 351.5971679967], [53.7377929728, 186.7607421602, 143.628173824, 259.0255737571], [115.42724608, 267.8383178395, 182.404357888, 320.7150268479]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048769_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three lamps, and two potted plants.", "boxes_value": [[49.7377929728, 30.4154052443, 246.7581787136, 351.5971679967], [192.7872314368, 30.4154052443, 233.1170043904, 267.9130859178], [209.7445068288, 168.5850830158, 241.2746582016, 327.606811506], [222.7678222848, 252.208557109, 246.7581787136, 351.5971679967], [49.7377929728, 186.7607421602, 139.628173824, 259.0255737571], [111.42724608, 267.8383178395, 178.404357888, 320.7150268479]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048773.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give coordinates for the items you reference.", "boxes_value": [[220.28015134560002, 102.8102416896, 391.0673828367, 293.6979980288]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048773_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give coordinates for the items you reference.", "boxes_value": [[43.28015134560002, 47.810241689600005, 214.0673828367, 238.6979980288]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048773.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give coordinates for the items you reference. For your reference, objects involved in this region include four chairs, a desk, a flower, and a tent.", "boxes_value": [[220.28015134560002, 102.8102416896, 391.0673828367, 293.6979980288], [109.0423584143, 84.545776384, 303.81604004589997, 327.8074340864], [300.5286865306, 88.6549682688, 484.6186523327, 323.698303232], [303.6036377007, 80.990234368, 458.3472900308, 282.312194816], [220.28015134560002, 146.717407232, 391.0673828367, 293.6979980288], [332.242065453, 102.8102416896, 375.9686279009, 156.8031005696], [123.80065915969999, 81.1400756736, 297.6744995381, 271.507873536], [214.63336183139998, 64.969665536, 358.8613281479, 183.3825073152]], "boxes_seq": [[0], [0], [1, 2, 3, 6], [4], [5], [7]]}, {"image_path": "objects365_v1_00048773_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give coordinates for the items you reference. For your reference, objects involved in this region include four chairs, a desk, a flower, and a tent.", "boxes_value": [[43.28015134560002, 47.810241689600005, 214.0673828367, 238.6979980288], [0, 29.545776384000007, 126.81604004589997, 272.8074340864], [123.5286865306, 33.654968268800005, 256, 268.698303232], [126.6036377007, 25.990234368000003, 256, 227.312194816], [43.28015134560002, 91.717407232, 214.0673828367, 238.6979980288], [155.242065453, 47.810241689600005, 198.9686279009, 101.80310056959999], [0, 26.140075673599995, 120.67449953810001, 216.50787353599998], [37.63336183139998, 9.969665535999994, 181.8613281479, 128.3825073152]], "boxes_seq": [[0], [0], [1, 2, 3, 6], [4], [5], [7]]}, {"image_path": "objects365_v1_00048775.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Include the coordinates for each mentioned object.", "boxes_value": [[437.3061523234, 137.9956054528, 579.2612304817001, 447.5834960896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048775_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Include the coordinates for each mentioned object.", "boxes_value": [[36.3061523234, 77.99560545279999, 178.26123048170007, 387.5834960896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048775.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Include the coordinates for each mentioned object. For your reference, objects involved in this region include four people, and a sneakers.", "boxes_value": [[437.3061523234, 137.9956054528, 579.2612304817001, 447.5834960896], [271.4034423798, 82.670471168, 512.3356933218, 489.5781250048], [445.4100341985, 137.9956054528, 509.65869143260005, 353.0499267584], [498.0582275392, 163.8735351808, 544.4599609187, 348.5881957888], [522.1514892378, 138.8879394304, 579.2612304817001, 370.0043945472], [437.3061523234, 401.3438110208, 478.82751463819994, 447.5834960896]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048775_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Include the coordinates for each mentioned object. For your reference, objects involved in this region include four people, and a sneakers.", "boxes_value": [[36.3061523234, 77.99560545279999, 178.26123048170007, 387.5834960896], [0, 22.670471168000006, 111.33569332180002, 429.5781250048], [44.4100341985, 77.99560545279999, 108.65869143260005, 293.0499267584], [97.0582275392, 103.87353518079999, 143.45996091869995, 288.5881957888], [121.15148923779998, 78.8879394304, 178.26123048170007, 310.0043945472], [36.3061523234, 341.3438110208, 77.82751463819994, 387.5834960896]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048777.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Please mention the objects and their locations.", "boxes_value": [[3.6931762697000003, 219.6297607168, 448.91198728210003, 511.6400146432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048777_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Please mention the objects and their locations.", "boxes_value": [[3.6931762697000003, 73.6297607168, 448.91198728210003, 365.6400146432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048777.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Please mention the objects and their locations. For your reference, objects involved in this region include a person, a car, a van, a street lights, and a traffic light.", "boxes_value": [[3.6931762697000003, 219.6297607168, 448.91198728210003, 511.6400146432], [175.2068481317, 389.2089843712, 227.063598612, 511.6400146432], [407.6926269269, 375.8416748032, 448.91198728210003, 403.5906372096], [376.171997093, 370.4534912, 409.30908200240003, 400.627136256], [43.7785644287, 254.5884399616, 74.9353637825, 399.0425415168], [3.6931762697000003, 219.6297607168, 45.719177246300006, 297.1777344]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048777_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Please mention the objects and their locations. For your reference, objects involved in this region include a person, a car, a van, a street lights, and a traffic light.", "boxes_value": [[3.6931762697000003, 73.6297607168, 448.91198728210003, 365.6400146432], [175.2068481317, 243.20898437120002, 227.063598612, 365.6400146432], [407.6926269269, 229.84167480320002, 448.91198728210003, 257.5906372096], [376.171997093, 224.45349119999997, 409.30908200240003, 254.62713625599997], [43.7785644287, 108.58843996159999, 74.9353637825, 253.0425415168], [3.6931762697000003, 73.6297607168, 45.719177246300006, 151.17773440000002]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048778.jpg", "text": "Fill me in about the selected portion within the presented image . Please point out the objects and their coordinates.", "boxes_value": [[600.1008301056, 153.496643072, 767.976318336, 324.2516479488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048778_crop.jpg", "text": "Fill me in about the selected portion within the presented image . Please point out the objects and their coordinates.", "boxes_value": [[42.10083010560004, 43.49664307200001, 209.97631833599996, 214.25164794879998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048778.jpg", "text": "Fill me in about the selected portion within the presented image . Please point out the objects and their coordinates. For your reference, objects involved in this region include a glasses, two speakers, a moniter, and a keyboard.", "boxes_value": [[600.1008301056, 153.496643072, 767.976318336, 324.2516479488], [608.1103309824, 296.4651550208, 679.0289757696, 321.4790956032], [610.4215087872, 153.496643072, 674.6612548608, 242.5358886912], [600.1008301056, 244.22192384, 708.8177490432, 324.2516479488], [734.6356200959999, 121.6630859264, 768.0119629056001, 183.461059584], [698.745971712, 183.4316406272, 767.976318336, 197.935852032]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048778_crop.jpg", "text": "Fill me in about the selected portion within the presented image . Please point out the objects and their coordinates. For your reference, objects involved in this region include a glasses, two speakers, a moniter, and a keyboard.", "boxes_value": [[42.10083010560004, 43.49664307200001, 209.97631833599996, 214.25164794879998], [50.1103309824, 186.4651550208, 121.02897576960004, 211.4790956032], [52.42150878719997, 43.49664307200001, 116.6612548608, 132.5358886912], [42.10083010560004, 134.22192384, 150.81774904320002, 214.25164794879998], [176.6356200959999, 11.6630859264, 210, 73.461059584], [140.74597171200003, 73.43164062720001, 209.97631833599996, 87.93585203200001]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048779.jpg", "text": "Please explain what is contained in the portion of defined by the box . Please point out the objects and their coordinates.", "boxes_value": [[195.65631104, 564.2192382797, 357.2263794176, 594.3411865219]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048779_crop.jpg", "text": "Please explain what is contained in the portion of defined by the box . Please point out the objects and their coordinates.", "boxes_value": [[40.65631103999999, 8.219238279699994, 202.22637941760001, 38.3411865219]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048779.jpg", "text": "Please explain what is contained in the portion of defined by the box . Please point out the objects and their coordinates. For your reference, objects involved in this region include two plates, two forks, and a knife.", "boxes_value": [[195.65631104, 564.2192382797, 357.2263794176, 594.3411865219], [205.6356811776, 580.1656493823, 273.031799296, 608.0415038856], [277.7952270336, 564.2192382797, 342.1654052864, 587.7390136912], [311.4244995072, 580.5180663947, 357.2263794176, 594.3411865219], [169.7909545984, 566.1060791345, 217.1483154432, 578.0260010078999], [195.65631104, 575.0828857459, 244.629333504, 593.4720458808]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00048779_crop.jpg", "text": "Please explain what is contained in the portion of defined by the box . Please point out the objects and their coordinates. For your reference, objects involved in this region include two plates, two forks, and a knife.", "boxes_value": [[40.65631103999999, 8.219238279699994, 202.22637941760001, 38.3411865219], [50.63568117759999, 24.16564938229999, 118.03179929599997, 45], [122.79522703359999, 8.219238279699994, 187.16540528640002, 31.739013691199943], [156.42449950719998, 24.518066394699986, 202.22637941760001, 38.3411865219], [14.790954598399992, 10.106079134499964, 62.148315443200005, 22.026001007899936], [40.65631103999999, 19.082885745899944, 89.62933350399999, 37.47204588080001]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00048782.jpg", "text": "Please describe the region in the picture . Include the coordinates for each mentioned object.", "boxes_value": [[194.5443725672, 36.231994624, 359.6912842072, 430.0090942464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048782_crop.jpg", "text": "Please describe the region in the picture . Include the coordinates for each mentioned object.", "boxes_value": [[41.544372567200014, 36.231994624, 206.6912842072, 430.0090942464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048782.jpg", "text": "Please describe the region in the picture . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, a helmet, two gloves, and four sneakers.", "boxes_value": [[194.5443725672, 36.231994624, 359.6912842072, 430.0090942464], [240.44360354, 36.231994624, 359.6912842072, 350.0065917952], [286.378418, 38.4142455808, 325.160400402, 65.4440918016], [295.839965852, 81.2531738112, 320.6325683692, 110.486145024], [242.55462650040002, 188.4406738432, 264.7568358996, 219.5238036992], [322.1470947216, 326.1473998848, 346.0721435564, 349.3687744], [265.7513427432, 332.1564941312, 285.47070310199996, 348.9955444224], [194.5443725672, 399.0112914944, 235.1870727556, 430.0090942464], [200.3818969644, 414.207946752, 232.7360229356, 444.601196288]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6, 7, 8]]}, {"image_path": "objects365_v1_00048782_crop.jpg", "text": "Please describe the region in the picture . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, a helmet, two gloves, and four sneakers.", "boxes_value": [[41.544372567200014, 36.231994624, 206.6912842072, 430.0090942464], [87.44360354, 36.231994624, 206.6912842072, 350.0065917952], [133.378418, 38.4142455808, 172.160400402, 65.4440918016], [142.83996585199998, 81.2531738112, 167.6325683692, 110.486145024], [89.55462650040002, 188.4406738432, 111.75683589959999, 219.5238036992], [169.1470947216, 326.1473998848, 193.07214355640002, 349.3687744], [112.75134274319998, 332.1564941312, 132.47070310199996, 348.9955444224], [41.544372567200014, 399.0112914944, 82.18707275560001, 430.0090942464], [47.3818969644, 414.207946752, 79.7360229356, 444.601196288]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6, 7, 8]]}, {"image_path": "objects365_v1_00048786.jpg", "text": "What can you tell me about the selected region in the photo ? Please mention the objects and their locations.", "boxes_value": [[43.0515137008, 271.6568603648, 223.3745117472, 364.7854003712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048786_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Please mention the objects and their locations.", "boxes_value": [[43.0515137008, 23.656860364800025, 223.3745117472, 116.78540037120001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048786.jpg", "text": "What can you tell me about the selected region in the photo ? Please mention the objects and their locations. For your reference, objects involved in this region include a bed, three pillows, and a telephone.", "boxes_value": [[43.0515137008, 271.6568603648, 223.3745117472, 364.7854003712], [67.6422729576, 243.1831054848, 473.4007568004, 510.7845458944], [159.3635253672, 291.7745971712, 223.3745117472, 341.1544799744], [120.95697019120001, 267.0846557696, 217.8878784148, 345.7266845696], [90.780334458, 271.6568603648, 131.0158081276, 342.9833374208], [43.0515137008, 343.0182495232, 89.5812988356, 364.7854003712]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048786_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Please mention the objects and their locations. For your reference, objects involved in this region include a bed, three pillows, and a telephone.", "boxes_value": [[43.0515137008, 23.656860364800025, 223.3745117472, 116.78540037120001], [67.6422729576, 0, 268, 140], [159.3635253672, 43.774597171200014, 223.3745117472, 93.15447997439998], [120.95697019120001, 19.084655769599976, 217.8878784148, 97.72668456960002], [90.780334458, 23.656860364800025, 131.0158081276, 94.98333742080001], [43.0515137008, 95.01824952319998, 89.5812988356, 116.78540037120001]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048787.jpg", "text": "Describe the visual elements within the selected area of the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[500.19641118059997, 408.9421386752, 856.5272217229, 511.9578247168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048787_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[89.19641118059997, 25.9421386752, 445.5272217229, 128.95782471680002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048787.jpg", "text": "Describe the visual elements within the selected area of the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four people, and a handbag.", "boxes_value": [[500.19641118059997, 408.9421386752, 856.5272217229, 511.9578247168], [586.9630127249, 416.9226684416, 609.203979506, 470.79949952], [604.6024169773, 420.949096704, 618.4071044621, 471.7581787136], [613.9973144829, 421.3325195264, 633.2645263654, 473.292053248], [500.19641118059997, 408.9421386752, 551.1219481936, 511.9578247168], [812.1931152507, 438.4983520256, 856.5272217229, 508.83288576]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048787_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four people, and a handbag.", "boxes_value": [[89.19641118059997, 25.9421386752, 445.5272217229, 128.95782471680002], [175.96301272489995, 33.922668441600024, 198.203979506, 87.79949951999998], [193.60241697729998, 37.949096704, 207.40710446210005, 88.75817871359999], [202.99731448290004, 38.332519526400006, 222.2645263654, 90.292053248], [89.19641118059997, 25.9421386752, 140.12194819360002, 128.95782471680002], [401.1931152507, 55.49835202560001, 445.5272217229, 125.83288576000001]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048792.jpg", "text": "Can you give me a visual rundown of the area in ? Include the coordinates for each object you identify.", "boxes_value": [[0, 207.5211181568, 118.54772946770001, 366.5188598784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048792_crop.jpg", "text": "Can you give me a visual rundown of the area in ? Include the coordinates for each object you identify.", "boxes_value": [[0, 40.521118156799986, 118.54772946770001, 199.5188598784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048792.jpg", "text": "Can you give me a visual rundown of the area in ? Include the coordinates for each object you identify. For your reference, objects involved in this region include three chairs, a desk, and an umbrella.", "boxes_value": [[0, 207.5211181568, 118.54772946770001, 366.5188598784], [0, 299.863525376, 36.4827880482, 369.0274048], [69.093749972, 300.938598656, 118.54772946770001, 366.5188598784], [40.4247436671, 292.6962890752, 80.91967772849999, 352.5427856384], [3.5134887866, 308.1058349568, 73.7524413964, 362.218566912], [0, 207.5211181568, 114.7657470388, 315.6586303488]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048792_crop.jpg", "text": "Can you give me a visual rundown of the area in ? Include the coordinates for each object you identify. For your reference, objects involved in this region include three chairs, a desk, and an umbrella.", "boxes_value": [[0, 40.521118156799986, 118.54772946770001, 199.5188598784], [0, 132.86352537599998, 36.4827880482, 202.0274048], [69.093749972, 133.938598656, 118.54772946770001, 199.5188598784], [40.4247436671, 125.69628907520001, 80.91967772849999, 185.5427856384], [3.5134887866, 141.10583495679998, 73.7524413964, 195.21856691199997], [0, 40.521118156799986, 114.7657470388, 148.6586303488]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048793.jpg", "text": "Please elucidate the area of the image . Provide the coordinates for each element you describe.", "boxes_value": [[151.80645753, 32.034484850000005, 281.084838867, 410.62432859999996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048793_crop.jpg", "text": "Please elucidate the area of the image . Provide the coordinates for each element you describe.", "boxes_value": [[32.80645752999999, 32.034484850000005, 162.08483886699997, 410.62432859999996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048793.jpg", "text": "Please elucidate the area of the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a basketball, two people, and two sneakers.", "boxes_value": [[151.80645753, 32.034484850000005, 281.084838867, 410.62432859999996], [191.064208979, 32.034484850000005, 241.405944815, 83.2293091], [169.03173828799999, 112.2366333, 482.785766603, 445.71777345000004], [67.578308105, 27.69213865, 285.515258784, 410.02117919999995], [151.80645753, 352.60168455, 181.32672117, 381.61297605], [236.80444335800001, 368.37976075, 281.084838867, 410.62432859999996]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048793_crop.jpg", "text": "Please elucidate the area of the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a basketball, two people, and two sneakers.", "boxes_value": [[32.80645752999999, 32.034484850000005, 162.08483886699997, 410.62432859999996], [72.064208979, 32.034484850000005, 122.405944815, 83.2293091], [50.031738287999985, 112.2366333, 194, 445.71777345000004], [0, 27.69213865, 166.51525878400003, 410.02117919999995], [32.80645752999999, 352.60168455, 62.32672117000001, 381.61297605], [117.80444335800001, 368.37976075, 162.08483886699997, 410.62432859999996]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048794.jpg", "text": "Can you generate a description for the selected region in the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[530.3405761784, 252.4003295748, 801.1234130832, 357.8879394552]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048794_crop.jpg", "text": "Can you generate a description for the selected region in the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[68.34057617840006, 26.400329574799997, 339.12341308320003, 131.8879394552]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048794.jpg", "text": "Can you generate a description for the selected region in the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two pillows, a chair, a desk, a potted plant, and two vases.", "boxes_value": [[530.3405761784, 252.4003295748, 801.1234130832, 357.8879394552], [549.3408203072, 340.60076905889997, 616.9868164392, 379.7041626189], [669.8659668280001, 274.0547485554, 801.1234130832, 357.8879394552], [724.3266601336001, 278.7240600681, 773.4119872968, 312.35388185939996], [612.2440185344, 294.9409179735, 725.1279296816, 328.5659179803], [631.0921630576, 252.4003295748, 659.3505859176, 304.66766357520004], [530.3405761784, 264.4909668003, 542.1943359576, 314.7798462141], [544.708862268, 276.7039795032, 554.0482177712, 314.7798462141]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00048794_crop.jpg", "text": "Can you generate a description for the selected region in the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two pillows, a chair, a desk, a potted plant, and two vases.", "boxes_value": [[68.34057617840006, 26.400329574799997, 339.12341308320003, 131.8879394552], [87.34082030720003, 114.60076905889997, 154.98681643919997, 153.7041626189], [207.86596682800007, 48.05474855540001, 339.12341308320003, 131.8879394552], [262.32666013360006, 52.724060068100016, 311.4119872968, 86.35388185939996], [150.24401853439997, 68.9409179735, 263.1279296816, 102.56591798030001], [169.09216305760003, 26.400329574799997, 197.35058591760003, 78.66766357520004], [68.34057617840006, 38.49096680029999, 80.19433595759995, 88.77984621410002], [82.70886226799996, 50.7039795032, 92.04821777120003, 88.77984621410002]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00048800.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Specify the location of each mentioned object.", "boxes_value": [[364.4587402328, 1.3275756624, 473.9285888669, 48.4835815152]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048800_crop.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Specify the location of each mentioned object.", "boxes_value": [[27.458740232799983, 1.3275756624, 136.9285888669, 48.4835815152]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048800.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Specify the location of each mentioned object. For your reference, objects involved in this region include a cabinet, and four storage boxes.", "boxes_value": [[364.4587402328, 1.3275756624, 473.9285888669, 48.4835815152], [313.9504394522, 1.3800048912, 475.9269409364, 115.81988527200001], [402.6406250177, 1.68664548, 426.1003418198, 17.725463870400002], [364.4587402328, 1.4472656016, 401.80279539139997, 17.725463870400002], [437.8302001954, 1.3275756624, 472.42132569819995, 16.6481933904], [451.849060075, 26.404052750399998, 473.9285888669, 48.4835815152]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048800_crop.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Specify the location of each mentioned object. For your reference, objects involved in this region include a cabinet, and four storage boxes.", "boxes_value": [[27.458740232799983, 1.3275756624, 136.9285888669, 48.4835815152], [0, 1.3800048912, 138.9269409364, 60], [65.64062501770002, 1.68664548, 89.10034181980001, 17.725463870400002], [27.458740232799983, 1.4472656016, 64.80279539139997, 17.725463870400002], [100.83020019539998, 1.3275756624, 135.42132569819995, 16.6481933904], [114.84906007500001, 26.404052750399998, 136.9285888669, 48.4835815152]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048804.jpg", "text": "What's the story in the section of the included visual ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[73.0817870848, 239.869201664, 265.2523193344, 285.1931762688]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048804_crop.jpg", "text": "What's the story in the section of the included visual ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[48.0817870848, 11.869201664000002, 240.2523193344, 57.193176268800016]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048804.jpg", "text": "What's the story in the section of the included visual ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three chairs, a desk, a person, and a trash bin can.", "boxes_value": [[73.0817870848, 239.869201664, 265.2523193344, 285.1931762688], [73.0817870848, 248.6672973824, 98.2550048768, 285.1931762688], [122.339172352, 246.6566772224, 141.1964721664, 279.6569213952], [190.1528930816, 243.3929443328, 212.6365356544, 269.8656616448], [207.5595703296, 246.6566772224, 225.3289184768, 271.6788329984], [194.7981567488, 233.0479126016, 211.8521728512, 272.0285644288], [245.9049682432, 239.869201664, 265.2523193344, 267.1229248]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048804_crop.jpg", "text": "What's the story in the section of the included visual ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three chairs, a desk, a person, and a trash bin can.", "boxes_value": [[48.0817870848, 11.869201664000002, 240.2523193344, 57.193176268800016], [48.0817870848, 20.667297382399994, 73.2550048768, 57.193176268800016], [97.339172352, 18.656677222399992, 116.19647216640001, 51.656921395200015], [165.1528930816, 15.392944332799999, 187.6365356544, 41.865661644800014], [182.5595703296, 18.656677222399992, 200.3289184768, 43.678832998400026], [169.7981567488, 5.04791260159999, 186.8521728512, 44.028564428799996], [220.9049682432, 11.869201664000002, 240.2523193344, 39.12292480000002]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048806.jpg", "text": "I request a description of the area in the picture . Give coordinates for the items you reference.", "boxes_value": [[421.0133056969, 316.462829568, 620.6907959193001, 512.3218994176]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048806_crop.jpg", "text": "I request a description of the area in the picture . Give coordinates for the items you reference.", "boxes_value": [[50.01330569689998, 49.46282956800002, 249.69079591930006, 245]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048806.jpg", "text": "I request a description of the area in the picture . Give coordinates for the items you reference. For your reference, objects involved in this region include a desk, a person, two cups, and a bottle.", "boxes_value": [[421.0133056969, 316.462829568, 620.6907959193001, 512.3218994176], [1.0258789087, 392.4334716928, 679.8791504103, 510.3484496896], [425.5187987961, 10.1415405056, 682.6864013696, 512.051025408], [429.01110838569997, 340.0830688256, 492.4298095625, 459.1414794752], [473.9218750161, 331.3895263744, 620.6907959193001, 512.3218994176], [421.0133056969, 316.462829568, 441.01757815779996, 352.23986816]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048806_crop.jpg", "text": "I request a description of the area in the picture . Give coordinates for the items you reference. For your reference, objects involved in this region include a desk, a person, two cups, and a bottle.", "boxes_value": [[50.01330569689998, 49.46282956800002, 249.69079591930006, 245], [0, 125.43347169280003, 299, 243.3484496896], [54.518798796099986, 0, 299, 245], [58.01110838569997, 73.08306882559998, 121.4298095625, 192.14147947520001], [102.92187501609999, 64.38952637440002, 249.69079591930006, 245], [50.01330569689998, 49.46282956800002, 70.01757815779996, 85.23986816000001]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048807.jpg", "text": "Describe the bbox in the provided photo . Give coordinates for the items you reference.", "boxes_value": [[218.227600129, 223.9896850432, 497.283081064, 467.022155776]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048807_crop.jpg", "text": "Describe the bbox in the provided photo . Give coordinates for the items you reference.", "boxes_value": [[70.227600129, 60.98968504320001, 349.283081064, 304.022155776]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048807.jpg", "text": "Describe the bbox in the provided photo . Give coordinates for the items you reference. For your reference, objects involved in this region include three people, a watch, and a slippers.", "boxes_value": [[218.227600129, 223.9896850432, 497.283081064, 467.022155776], [269.057251001, 223.9896850432, 497.283081064, 467.022155776], [348.195922844, 377.6719970816, 373.724609412, 408.8168945152], [298.962158187, 139.8911132672, 759.133911151, 471.39849856], [444.50268555, 267.2039794688, 565.423339866, 433.990966784], [218.227600129, 424.7903442432, 248.61187743699998, 455.4157714944]], "boxes_seq": [[0], [0], [1, 3, 4], [2], [5]]}, {"image_path": "objects365_v1_00048807_crop.jpg", "text": "Describe the bbox in the provided photo . Give coordinates for the items you reference. For your reference, objects involved in this region include three people, a watch, and a slippers.", "boxes_value": [[70.227600129, 60.98968504320001, 349.283081064, 304.022155776], [121.057251001, 60.98968504320001, 349.283081064, 304.022155776], [200.195922844, 214.6719970816, 225.724609412, 245.81689451519998], [150.962158187, 0, 419, 308.39849856], [296.50268555, 104.20397946880001, 417.423339866, 270.990966784], [70.227600129, 261.7903442432, 100.61187743699998, 292.4157714944]], "boxes_seq": [[0], [0], [1, 3, 4], [2], [5]]}, {"image_path": "objects365_v1_00048808.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Please point out the objects and their coordinates.", "boxes_value": [[672.4819335657, 341.4334716928, 771.7607421534, 393.3032226816]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048808_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Please point out the objects and their coordinates.", "boxes_value": [[25.481933565700047, 13.433471692800026, 124, 65.3032226816]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048808.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a couch, and four pillows.", "boxes_value": [[672.4819335657, 341.4334716928, 771.7607421534, 393.3032226816], [597.7923584364, 311.4870605312, 771.5599365141001, 410.2470703104], [733.547119122, 364.8333129728, 771.7607421534, 393.3032226816], [730.9936523175, 353.7356567552, 771.5485840017001, 384.6552123904], [668.2706298708, 339.2599487488, 744.3442382796, 381.9155273216], [672.4819335657, 341.4334716928, 709.7036132859, 375.3948974592]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048808_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a couch, and four pillows.", "boxes_value": [[25.481933565700047, 13.433471692800026, 124, 65.3032226816], [0, 0, 124, 78], [86.54711912200003, 36.8333129728, 124, 65.3032226816], [83.99365231750005, 25.73565675520001, 124, 56.65521239039998], [21.270629870800008, 11.259948748800014, 97.34423827959995, 53.915527321599996], [25.481933565700047, 13.433471692800026, 62.70361328590002, 47.394897459200024]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048811.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[125.9352416768, 460.512329088, 420.6101684736, 757.1180419584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048811_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[73.9352416768, 74.512329088, 368.6101684736, 371.1180419584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048811.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, a belt, two leather shoes, and two boots.", "boxes_value": [[125.9352416768, 460.512329088, 420.6101684736, 757.1180419584], [100.217834496, 250.074890112, 239.869506816, 758.7723388416], [311.0508422656, 279.89916994559997, 454.8515014656, 699.8881836288], [125.9352416768, 460.512329088, 205.4974975488, 475.5477295104], [149.340332032, 721.0198974719999, 197.2738647552, 757.1180419584], [187.2137451008, 613.9091797248, 215.618835456, 656.5167236351999], [319.7570190336, 561.9067382784, 386.2884521472, 627.3820800768001], [355.6628418048, 627.3820800768001, 420.6101684736, 695.4975585792]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6, 7]]}, {"image_path": "objects365_v1_00048811_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, a belt, two leather shoes, and two boots.", "boxes_value": [[73.9352416768, 74.512329088, 368.6101684736, 371.1180419584], [48.217834495999995, 0, 187.869506816, 372.7723388416], [259.0508422656, 0, 402.8515014656, 313.8881836288], [73.9352416768, 74.512329088, 153.4974975488, 89.54772951040002], [97.34033203199999, 335.0198974719999, 145.2738647552, 371.1180419584], [135.2137451008, 227.90917972479997, 163.618835456, 270.5167236351999], [267.7570190336, 175.90673827839998, 334.2884521472, 241.3820800768001], [303.6628418048, 241.3820800768001, 368.6101684736, 309.4975585792]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6, 7]]}, {"image_path": "objects365_v1_00048812.jpg", "text": "What can you tell me about the selected region in the photo ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0, 7.1494751232, 722.846557587, 252.630798336]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048812_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0, 7.1494751232, 722.846557587, 252.630798336]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048812.jpg", "text": "What can you tell me about the selected region in the photo ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a cabinet, five people, and two necklaces.", "boxes_value": [[0, 7.1494751232, 722.846557587, 252.630798336], [0, 7.1494751232, 722.846557587, 252.630798336], [458.89575192999996, 49.442260736, 590.470825175, 292.1253051904], [423.808959959, 75.7572632064, 498.855590849, 182.9666747904], [285.411498988, 50.4168700928, 483.26147461899996, 423.7004394496], [238.539672833, 78.323120128, 318.021850608, 197.904357888], [108.93371581400001, 41.0882568192, 294.391967793, 392.6713867264], [361.14550779999996, 156.0209350656, 401.09655763099994, 195.9719238144], [520.102050776, 144.1423950336, 547.256225593, 161.2924194304]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6], [7, 8]]}, {"image_path": "objects365_v1_00048812_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a cabinet, five people, and two necklaces.", "boxes_value": [[0, 7.1494751232, 722.846557587, 252.630798336], [0, 7.1494751232, 722.846557587, 252.630798336], [458.89575192999996, 49.442260736, 590.470825175, 292.1253051904], [423.808959959, 75.7572632064, 498.855590849, 182.9666747904], [285.411498988, 50.4168700928, 483.26147461899996, 314], [238.539672833, 78.323120128, 318.021850608, 197.904357888], [108.93371581400001, 41.0882568192, 294.391967793, 314], [361.14550779999996, 156.0209350656, 401.09655763099994, 195.9719238144], [520.102050776, 144.1423950336, 547.256225593, 161.2924194304]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6], [7, 8]]}, {"image_path": "objects365_v1_00048814.jpg", "text": "Can you give me a description of the region in image ? Specify the location of each mentioned object.", "boxes_value": [[405.3370971648, 291.52502441039996, 511.8065185792, 565.7122802694]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048814_crop.jpg", "text": "Can you give me a description of the region in image ? Specify the location of each mentioned object.", "boxes_value": [[27.337097164800014, 69.52502441039996, 133.80651857919997, 343.7122802694]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048814.jpg", "text": "Can you give me a description of the region in image ? Specify the location of each mentioned object. For your reference, objects involved in this region include two desks, a couch, and two cups.", "boxes_value": [[405.3370971648, 291.52502441039996, 511.8065185792, 565.7122802694], [405.3980102656, 400.5955810494, 511.8065185792, 565.7122802694], [405.3370971648, 301.894775424, 511.6406860288, 403.16625978419995], [0, 259.4274902208, 496.1995239424, 489.10803220620005], [466.5260620288, 393.370239273, 486.1919555584, 420.5999755518], [450.0396118016, 291.52502441039996, 465.19451904, 313.4837646666]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048814_crop.jpg", "text": "Can you give me a description of the region in image ? Specify the location of each mentioned object. For your reference, objects involved in this region include two desks, a couch, and two cups.", "boxes_value": [[27.337097164800014, 69.52502441039996, 133.80651857919997, 343.7122802694], [27.39801026560002, 178.59558104939998, 133.80651857919997, 343.7122802694], [27.337097164800014, 79.89477542399999, 133.64068602880002, 181.16625978419995], [0, 37.427490220799996, 118.19952394239999, 267.10803220620005], [88.52606202880003, 171.37023927299998, 108.19195555840002, 198.59997555180001], [72.03961180160002, 69.52502441039996, 87.19451903999999, 91.4837646666]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048818.jpg", "text": "What insights can you provide about the area in the selected picture ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0.023803703000000002, 125.5110473728, 158.34783934200001, 319.6948242432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048818_crop.jpg", "text": "What insights can you provide about the area in the selected picture ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0.023803703000000002, 49.51104737279999, 158.34783934200001, 243.6948242432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048818.jpg", "text": "What insights can you provide about the area in the selected picture ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, and three street lights.", "boxes_value": [[0.023803703000000002, 125.5110473728, 158.34783934200001, 319.6948242432], [145.279602083, 288.0781860352, 158.34783934200001, 319.6948242432], [123.499206523, 290.7479858176, 138.675231926, 328.4069824], [72.105773894, 133.6101684736, 88.911438, 198.4029540864], [0.023803703000000002, 125.5110473728, 20.474060068, 164.589233408], [142.803894079, 143.567077632, 157.449645969, 196.5708007936]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048818_crop.jpg", "text": "What insights can you provide about the area in the selected picture ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, and three street lights.", "boxes_value": [[0.023803703000000002, 49.51104737279999, 158.34783934200001, 243.6948242432], [145.279602083, 212.07818603520002, 158.34783934200001, 243.6948242432], [123.499206523, 214.7479858176, 138.675231926, 252.4069824], [72.105773894, 57.6101684736, 88.911438, 122.4029540864], [0.023803703000000002, 49.51104737279999, 20.474060068, 88.58923340800001], [142.803894079, 67.56707763200001, 157.449645969, 120.5708007936]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048819.jpg", "text": "What can you tell me about the selected region in the photo ? Please point out the objects and their coordinates.", "boxes_value": [[3.8251953152, 167.8083496203, 345.4802246144, 472.78515625589995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048819_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Please point out the objects and their coordinates.", "boxes_value": [[3.8251953152, 76.80834962029999, 345.4802246144, 381.78515625589995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048819.jpg", "text": "What can you tell me about the selected region in the photo ? Please point out the objects and their coordinates. For your reference, objects involved in this region include five pictures, and a handbag.", "boxes_value": [[3.8251953152, 167.8083496203, 345.4802246144, 472.78515625589995], [277.6742554112, 167.8083496203, 345.4802246144, 225.3204956121], [127.6564331008, 191.51293946459998, 254.6481323008, 400.2681884697], [6.4714965504, 230.9735717856, 103.7046508544, 396.0612792678], [3.8251953152, 407.7248535216, 71.6141357568, 472.78515625589995], [278.7288207872, 229.45135501200002, 409.3507080192, 425.84741208450004], [229.9974817792, 380.3913868635, 358.3857997824, 493.0229567616]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00048819_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Please point out the objects and their coordinates. For your reference, objects involved in this region include five pictures, and a handbag.", "boxes_value": [[3.8251953152, 76.80834962029999, 345.4802246144, 381.78515625589995], [277.6742554112, 76.80834962029999, 345.4802246144, 134.3204956121], [127.6564331008, 100.51293946459998, 254.6481323008, 309.2681884697], [6.4714965504, 139.9735717856, 103.7046508544, 305.0612792678], [3.8251953152, 316.7248535216, 71.6141357568, 381.78515625589995], [278.7288207872, 138.45135501200002, 409.3507080192, 334.84741208450004], [229.9974817792, 289.3913868635, 358.3857997824, 402.0229567616]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00048821.jpg", "text": "Could you tell me more about the area in the snapshot ? Provide the coordinates for each element you describe.", "boxes_value": [[391.4461669871, 186.7138672128, 678.9714355558, 495.2702636544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048821_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Provide the coordinates for each element you describe.", "boxes_value": [[72.44616698710001, 77.71386721280001, 359.9714355558, 386.2702636544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048821.jpg", "text": "Could you tell me more about the area in the snapshot ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a picture, a cup, two bottles, and a bowl.", "boxes_value": [[391.4461669871, 186.7138672128, 678.9714355558, 495.2702636544], [500.49267578750005, 378.8190918144, 588.4570312375, 483.3965454336], [391.4461669871, 186.7138672128, 421.46398925730006, 233.6054076928], [428.7463379034, 186.8914795008, 441.53491213660004, 234.1383056896], [663.4447021651999, 268.6107177984, 678.9714355558, 291.4517822464], [585.2513428028001, 469.4049682432, 624.485107414, 495.2702636544]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048821_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a picture, a cup, two bottles, and a bowl.", "boxes_value": [[72.44616698710001, 77.71386721280001, 359.9714355558, 386.2702636544], [181.49267578750005, 269.8190918144, 269.4570312375, 374.3965454336], [72.44616698710001, 77.71386721280001, 102.46398925730006, 124.60540769279999], [109.74633790339999, 77.89147950079999, 122.53491213660004, 125.1383056896], [344.44470216519994, 159.6107177984, 359.9714355558, 182.4517822464], [266.2513428028001, 360.4049682432, 305.485107414, 386.2702636544]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048823.jpg", "text": "Fill me in on the details of the rectangular box within the image . Include the coordinates for each object you identify.", "boxes_value": [[10.5772094976, 0, 511.4946289152, 386.65112307280003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048823_crop.jpg", "text": "Fill me in on the details of the rectangular box within the image . Include the coordinates for each object you identify.", "boxes_value": [[10.5772094976, 0, 511.4946289152, 386.65112307280003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048823.jpg", "text": "Fill me in on the details of the rectangular box within the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include two flowers, three candles, and a vase.", "boxes_value": [[10.5772094976, 0, 511.4946289152, 386.65112307280003], [131.6444091904, 0, 511.4946289152, 358.3707275404], [341.0188598784, 195.74639889379998, 360.4027709952, 360.5838622756], [146.2806396416, 198.04644776950002, 168.5145263616, 352.91699221079995], [10.5772094976, 208.01336671, 209.148864768, 386.65112307280003], [77.084289536, 332.2578124961, 125.917175296, 354.6051025238], [276.4956054528, 309.6152953769, 289.9104004096, 359.7749023228]], "boxes_seq": [[0], [0], [1, 4], [2, 3, 6], [5]]}, {"image_path": "objects365_v1_00048823_crop.jpg", "text": "Fill me in on the details of the rectangular box within the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include two flowers, three candles, and a vase.", "boxes_value": [[10.5772094976, 0, 511.4946289152, 386.65112307280003], [131.6444091904, 0, 511.4946289152, 358.3707275404], [341.0188598784, 195.74639889379998, 360.4027709952, 360.5838622756], [146.2806396416, 198.04644776950002, 168.5145263616, 352.91699221079995], [10.5772094976, 208.01336671, 209.148864768, 386.65112307280003], [77.084289536, 332.2578124961, 125.917175296, 354.6051025238], [276.4956054528, 309.6152953769, 289.9104004096, 359.7749023228]], "boxes_seq": [[0], [0], [1, 4], [2, 3, 6], [5]]}, {"image_path": "objects365_v1_00048824.jpg", "text": "Please share details about the rectangular region within the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[632.3867187164, 240.2828368896, 837.0505371588, 343.268310528]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048824_crop.jpg", "text": "Please share details about the rectangular region within the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[51.38671871639997, 26.28283688959999, 256.0505371588, 129.26831052799997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048824.jpg", "text": "Please share details about the rectangular region within the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a chair, a desk, a book, a pen, and a cup.", "boxes_value": [[632.3867187164, 240.2828368896, 837.0505371588, 343.268310528], [693.4074707184, 27.6694336, 1017.5187988158, 399.1347656192], [459.90112302759997, 227.1351928832, 902.1409912202, 484.0180053504], [746.9107666524001, 240.2828368896, 837.0505371588, 338.792053248], [814.89318849, 274.6112060416, 825.9765624604, 311.3878784], [632.3867187164, 273.171630848, 682.3820801206, 343.268310528]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048824_crop.jpg", "text": "Please share details about the rectangular region within the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a chair, a desk, a book, a pen, and a cup.", "boxes_value": [[51.38671871639997, 26.28283688959999, 256.0505371588, 129.26831052799997], [112.40747071839996, 0, 307, 155], [0, 13.135192883200006, 307, 155], [165.91076665240007, 26.28283688959999, 256.0505371588, 124.792053248], [233.89318848999994, 60.611206041599985, 244.97656246040003, 97.38787839999998], [51.38671871639997, 59.17163084800001, 101.3820801206, 129.26831052799997]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048825.jpg", "text": "In the image , elaborate on the details found within the section . Remember to mention the objects and their corresponding locations.", "boxes_value": [[0, 137.059448256, 164.654174784, 352.557434064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048825_crop.jpg", "text": "In the image , elaborate on the details found within the section . Remember to mention the objects and their corresponding locations.", "boxes_value": [[0, 54.059448255999996, 164.654174784, 269.557434064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048825.jpg", "text": "In the image , elaborate on the details found within the section . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a potted plant, a chair, two towels, and an umbrella.", "boxes_value": [[0, 137.059448256, 164.654174784, 352.557434064], [33.952453632, 221.879821776, 56.900756864, 246.35803223999997], [0, 279.216491712, 107.417053248, 352.557434064], [113.248413056, 304.45538328000004, 164.654174784, 333.56463624], [50.694457984, 273.48803712, 94.04870604799999, 293.926452624], [0.146240256, 137.059448256, 142.65057376, 197.488464336]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048825_crop.jpg", "text": "In the image , elaborate on the details found within the section . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a potted plant, a chair, two towels, and an umbrella.", "boxes_value": [[0, 54.059448255999996, 164.654174784, 269.557434064], [33.952453632, 138.879821776, 56.900756864, 163.35803223999997], [0, 196.216491712, 107.417053248, 269.557434064], [113.248413056, 221.45538328000004, 164.654174784, 250.56463624000003], [50.694457984, 190.48803712, 94.04870604799999, 210.92645262399998], [0.146240256, 54.059448255999996, 142.65057376, 114.48846433599999]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048826.jpg", "text": "Please help me understand the content present within the rectangle in . Please point out the objects and their coordinates.", "boxes_value": [[249.37158203849998, 281.5448608256, 298.7939453049, 359.7651367424]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048826_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Please point out the objects and their coordinates.", "boxes_value": [[12.37158203849998, 20.544860825599983, 61.79394530489998, 98.76513674239999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048826.jpg", "text": "Please help me understand the content present within the rectangle in . Please point out the objects and their coordinates. For your reference, objects involved in this region include a plate, a desk, two chairs, and a napkin.", "boxes_value": [[249.37158203849998, 281.5448608256, 298.7939453049, 359.7651367424], [249.37158203849998, 344.5550537216, 291.42297365909997, 359.7651367424], [139.3770141606, 211.1640014848, 723.6229248459, 513.0385742336], [202.1262207453, 270.5213623296, 272.5070800392, 352.773681664], [241.98046876889998, 251.0182495232, 288.61840821479996, 321.399108864], [276.0782470569, 281.5448608256, 298.7939453049, 333.665344256]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048826_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Please point out the objects and their coordinates. For your reference, objects involved in this region include a plate, a desk, two chairs, and a napkin.", "boxes_value": [[12.37158203849998, 20.544860825599983, 61.79394530489998, 98.76513674239999], [12.37158203849998, 83.55505372160002, 54.422973659099966, 98.76513674239999], [0, 0, 74, 118], [0, 9.521362329599981, 35.50708003919999, 91.77368166399998], [4.980468768899982, 0, 51.618408214799956, 60.39910886400003], [39.07824705690001, 20.544860825599983, 61.79394530489998, 72.66534425600003]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048827.jpg", "text": "Can you divulge the contents of the area within the given image ? Specify the location of each mentioned object.", "boxes_value": [[68.5563964925, 319.2401122816, 379.2529297039, 381.3794555904]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048827_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Specify the location of each mentioned object.", "boxes_value": [[68.5563964925, 16.240112281599977, 379.2529297039, 78.37945559040003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048827.jpg", "text": "Can you divulge the contents of the area within the given image ? Specify the location of each mentioned object. For your reference, objects involved in this region include three pillows, and two couches.", "boxes_value": [[68.5563964925, 319.2401122816, 379.2529297039, 381.3794555904], [311.1243286222, 329.7214355456, 379.2529297039, 352.1814575104], [171.12371827110002, 336.45947264, 237.0063476851, 355.1760864256], [156.1503906164, 304.2667846656, 398.7182616859, 396.3527832064], [68.5563964925, 319.2401122816, 124.7064209074, 381.3794555904], [0, 272.0741577216, 251.23101807170002, 511.6473998848]], "boxes_seq": [[0], [0], [1, 2, 4], [3, 5]]}, {"image_path": "objects365_v1_00048827_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Specify the location of each mentioned object. For your reference, objects involved in this region include three pillows, and two couches.", "boxes_value": [[68.5563964925, 16.240112281599977, 379.2529297039, 78.37945559040003], [311.1243286222, 26.721435545600002, 379.2529297039, 49.18145751039998], [171.12371827110002, 33.45947264, 237.0063476851, 52.176086425599976], [156.1503906164, 1.266784665600028, 398.7182616859, 93], [68.5563964925, 16.240112281599977, 124.7064209074, 78.37945559040003], [0, 0, 251.23101807170002, 93]], "boxes_seq": [[0], [0], [1, 2, 4], [3, 5]]}, {"image_path": "objects365_v1_00048828.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Give coordinates for the items you reference.", "boxes_value": [[280.0950317084, 215.0900268544, 454.35241702039997, 314.5130615296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048828_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Give coordinates for the items you reference.", "boxes_value": [[44.09503170839997, 25.09002685440001, 218.35241702039997, 124.51306152960001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048828.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Give coordinates for the items you reference. For your reference, objects involved in this region include five people.", "boxes_value": [[280.0950317084, 215.0900268544, 454.35241702039997, 314.5130615296], [280.0950317084, 219.1078491136, 294.67022705209996, 265.8147582976], [316.5330200335, 234.6768188416, 338.89270022759996, 258.3615112192], [385.1885986442, 215.0900268544, 399.4869385036, 262.9727172608], [360.9147948996, 220.4103393792, 378.2056884676, 250.0044555776], [434.40136719789996, 254.6597289984, 454.35241702039997, 314.5130615296]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048828_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Give coordinates for the items you reference. For your reference, objects involved in this region include five people.", "boxes_value": [[44.09503170839997, 25.09002685440001, 218.35241702039997, 124.51306152960001], [44.09503170839997, 29.107849113599997, 58.67022705209996, 75.81475829760001], [80.53302003350001, 44.67681884160001, 102.89270022759996, 68.3615112192], [149.1885986442, 25.09002685440001, 163.48693850360002, 72.97271726079998], [124.9147948996, 30.410339379199996, 142.2056884676, 60.0044555776], [198.40136719789996, 64.6597289984, 218.35241702039997, 124.51306152960001]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048831.jpg", "text": "In the image , could you provide a description for the coordinates ? Please mention the objects and their locations.", "boxes_value": [[318.9571533312, 269.0481567232, 477.9453125376, 315.1553344512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048831_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Please mention the objects and their locations.", "boxes_value": [[39.957153331200004, 12.048156723200009, 198.9453125376, 58.15533445120002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048831.jpg", "text": "In the image , could you provide a description for the coordinates ? Please mention the objects and their locations. For your reference, objects involved in this region include three suvs, and two cars.", "boxes_value": [[318.9571533312, 269.0481567232, 477.9453125376, 315.1553344512], [318.9571533312, 285.7591552512, 369.8521728768, 315.1553344512], [366.3421631232, 278.0809936384, 405.6102295296, 303.3090820096], [404.77258298879997, 275.6562500096, 439.90844728319996, 296.9311523328], [432.81677245440005, 270.8210449408, 458.60449221120007, 289.0336303616], [456.8316650496, 269.0481567232, 477.9453125376, 284.6819458048]], "boxes_seq": [[0], [0], [1, 2, 4], [3, 5]]}, {"image_path": "objects365_v1_00048831_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Please mention the objects and their locations. For your reference, objects involved in this region include three suvs, and two cars.", "boxes_value": [[39.957153331200004, 12.048156723200009, 198.9453125376, 58.15533445120002], [39.957153331200004, 28.759155251200013, 90.85217287680001, 58.15533445120002], [87.34216312320001, 21.080993638400003, 126.61022952960002, 46.30908200959999], [125.77258298879997, 18.6562500096, 160.90844728319996, 39.931152332800025], [153.81677245440005, 13.821044940799993, 179.60449221120007, 32.03363036159999], [177.83166504960002, 12.048156723200009, 198.9453125376, 27.681945804800023]], "boxes_seq": [[0], [0], [1, 2, 4], [3, 5]]}, {"image_path": "objects365_v1_00048834.jpg", "text": "In the submitted image , please give a synopsis of the area . Specify the location of each mentioned object.", "boxes_value": [[279.7118530048, 83.3975219697, 512.162597632, 443.26715087220003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048834_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Specify the location of each mentioned object.", "boxes_value": [[58.71185300479999, 83.3975219697, 291, 443.26715087220003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048834.jpg", "text": "In the submitted image , please give a synopsis of the area . Specify the location of each mentioned object. For your reference, objects involved in this region include two desks, a chair, two people, a pen, a projector, and a laptop.", "boxes_value": [[279.7118530048, 83.3975219697, 512.162597632, 443.26715087220003], [0.1610717696, 271.3439331069, 512.7227783168, 498.8649291816], [301.1762084864, 248.0949706923, 364.6865234432, 301.9172973861], [447.3003539968, 107.359252926, 511.6483154432, 174.0294189597], [279.7118530048, 110.854675317, 512.162597632, 443.26715087220003], [346.08587648, 132.7122192492, 483.6425781248, 329.1384887514], [277.8397216768, 328.45471192739996, 315.0830688256, 388.2243042108], [458.0385742336, 97.1172485535, 490.7044677632, 112.79687501309999], [485.26013184, 83.3975219697, 511.8284301824, 114.53906248320001]], "boxes_seq": [[0], [0], [1, 3], [2], [4, 5], [6], [7], [8]]}, {"image_path": "objects365_v1_00048834_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Specify the location of each mentioned object. For your reference, objects involved in this region include two desks, a chair, two people, a pen, a projector, and a laptop.", "boxes_value": [[58.71185300479999, 83.3975219697, 291, 443.26715087220003], [0, 271.3439331069, 291, 498.8649291816], [80.17620848640001, 248.0949706923, 143.68652344319997, 301.9172973861], [226.30035399680003, 107.359252926, 290.6483154432, 174.0294189597], [58.71185300479999, 110.854675317, 291, 443.26715087220003], [125.08587648000002, 132.7122192492, 262.6425781248, 329.1384887514], [56.839721676800025, 328.45471192739996, 94.08306882559998, 388.2243042108], [237.0385742336, 97.1172485535, 269.7044677632, 112.79687501309999], [264.26013184, 83.3975219697, 290.8284301824, 114.53906248320001]], "boxes_seq": [[0], [0], [1, 3], [2], [4, 5], [6], [7], [8]]}, {"image_path": "objects365_v1_00048835.jpg", "text": "Describe the bbox in the provided photo . Please point out the objects and their coordinates.", "boxes_value": [[0.0982666272, 401.5446777344, 177.67504883520002, 488.4096679936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048835_crop.jpg", "text": "Describe the bbox in the provided photo . Please point out the objects and their coordinates.", "boxes_value": [[0.0982666272, 22.54467773440001, 177.67504883520002, 109.40966799360001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048835.jpg", "text": "Describe the bbox in the provided photo . Please point out the objects and their coordinates. For your reference, objects involved in this region include a chair, a desk, a plate, three tomatoes, an orange, and a pear.", "boxes_value": [[0.0982666272, 401.5446777344, 177.67504883520002, 488.4096679936], [0.0982666272, 401.5446777344, 177.67504883520002, 469.6475830272], [0.0982666272, 451.6552123904, 226.1762084736, 511.890502912], [0.200439456, 467.4911499264, 197.27215576320003, 511.9815673856], [128.7971191584, 447.6208496128, 174.6215820384, 489.9203491328], [101.1010742112, 456.1814575104, 126.2792968416, 488.4096679936], [28.041564969600003, 458.893127424, 57.442504896, 500.0627441152], [98.3364868416, 427.8159179776, 134.15209960319999, 460.7724609536], [57.8004150528, 450.260559104, 97.9417724544, 500.3293456896]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5, 8], [6], [7]]}, {"image_path": "objects365_v1_00048835_crop.jpg", "text": "Describe the bbox in the provided photo . Please point out the objects and their coordinates. For your reference, objects involved in this region include a chair, a desk, a plate, three tomatoes, an orange, and a pear.", "boxes_value": [[0.0982666272, 22.54467773440001, 177.67504883520002, 109.40966799360001], [0.0982666272, 22.54467773440001, 177.67504883520002, 90.64758302720003], [0.0982666272, 72.65521239039998, 222, 131], [0.200439456, 88.4911499264, 197.27215576320003, 131], [128.7971191584, 68.62084961279999, 174.6215820384, 110.92034913280003], [101.1010742112, 77.18145751039998, 126.2792968416, 109.40966799360001], [28.041564969600003, 79.893127424, 57.442504896, 121.06274411520002], [98.3364868416, 48.81591797760001, 134.15209960319999, 81.77246095359999], [57.8004150528, 71.26055910399998, 97.9417724544, 121.32934568960002]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5, 8], [6], [7]]}, {"image_path": "objects365_v1_00048838.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Provide the coordinates for all objects that you mention.", "boxes_value": [[42.1589965824, 611.1343994084, 309.0718383616, 771.9649657908001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048838_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Provide the coordinates for all objects that you mention.", "boxes_value": [[42.1589965824, 41.134399408399986, 309.0718383616, 201.96496579080008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048838.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three people, a backpack, and a handbag.", "boxes_value": [[42.1589965824, 611.1343994084, 309.0718383616, 771.9649657908001], [42.1589965824, 611.1343994084, 125.43853757439999, 771.7969970416], [269.0360717824, 630.5018310608, 309.0718383616, 761.5279540964], [113.1394653184, 628.5523681688, 158.6722412032, 771.9649657908001], [54.5571289088, 655.1857910215999, 109.8023071232, 715.9555663847999], [288.0860595712, 687.0516357164, 310.3378295808, 705.8153076304]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048838_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three people, a backpack, and a handbag.", "boxes_value": [[42.1589965824, 41.134399408399986, 309.0718383616, 201.96496579080008], [42.1589965824, 41.134399408399986, 125.43853757439999, 201.79699704159998], [269.0360717824, 60.50183106079999, 309.0718383616, 191.52795409639998], [113.1394653184, 58.552368168799944, 158.6722412032, 201.96496579080008], [54.5571289088, 85.18579102159993, 109.8023071232, 145.9555663847999], [288.0860595712, 117.05163571640003, 310.3378295808, 135.81530763039996]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048839.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0, 143.9882812416, 281.36181639020003, 271.199768064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048839_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[0, 31.988281241599992, 281.36181639020003, 159.199768064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048839.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four people, a sports car, and a car.", "boxes_value": [[0, 143.9882812416, 281.36181639020003, 271.199768064], [0.4818115337, 111.9542846464, 53.2994384769, 287.94982912], [55.5678100852, 113.5415038976, 113.988220192, 257.2471313408], [124.45550535390001, 156.9002075136, 141.46655273800002, 179.4573364224], [232.0977172822, 135.49401856, 265.242797876, 170.8093261824], [59.6734008479, 115.2982788096, 672.3297119440999, 437.2401122816], [0, 143.9882812416, 281.36181639020003, 271.199768064]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048839_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four people, a sports car, and a car.", "boxes_value": [[0, 31.988281241599992, 281.36181639020003, 159.199768064], [0.4818115337, 0, 53.2994384769, 175.94982912], [55.5678100852, 1.5415038975999948, 113.988220192, 145.24713134080002], [124.45550535390001, 44.90020751360001, 141.46655273800002, 67.4573364224], [232.0977172822, 23.49401856, 265.242797876, 58.809326182400014], [59.6734008479, 3.2982788096000064, 351, 191], [0, 31.988281241599992, 281.36181639020003, 159.199768064]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048840.jpg", "text": "Can you break down the region in the image for me? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[11.4376220748, 56.172790528, 535.6123047, 342.8315429888]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048840_crop.jpg", "text": "Can you break down the region in the image for me? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[11.4376220748, 56.172790528, 535.6123047, 342.8315429888]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048840.jpg", "text": "Can you break down the region in the image for me? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two guns, two people, two hats, and a backpack.", "boxes_value": [[11.4376220748, 56.172790528, 535.6123047, 342.8315429888], [413.1428222844, 241.2290039296, 579.5524902132, 407.638671872], [11.4376220748, 142.9312133632, 135.2774047824, 272.96295168], [51.3604736028, 60.43670656, 349.8060303, 483.3767699968], [360.0384521724, 53.6151122944, 683.2124023176, 439.8889770496], [105.320434554, 62.369628928, 204.17736815280003, 144.496948224], [282.26965335119996, 116.5624999936, 402.108520536, 342.8315429888], [455.5715331888, 56.172790528, 535.6123047, 102.9658203136]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5, 7], [6]]}, {"image_path": "objects365_v1_00048840_crop.jpg", "text": "Can you break down the region in the image for me? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two guns, two people, two hats, and a backpack.", "boxes_value": [[11.4376220748, 56.172790528, 535.6123047, 342.8315429888], [413.1428222844, 241.2290039296, 579.5524902132, 407.638671872], [11.4376220748, 142.9312133632, 135.2774047824, 272.96295168], [51.3604736028, 60.43670656, 349.8060303, 414], [360.0384521724, 53.6151122944, 666, 414], [105.320434554, 62.369628928, 204.17736815280003, 144.496948224], [282.26965335119996, 116.5624999936, 402.108520536, 342.8315429888], [455.5715331888, 56.172790528, 535.6123047, 102.9658203136]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5, 7], [6]]}, {"image_path": "objects365_v1_00048841.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Include the coordinates for each object you identify.", "boxes_value": [[230.2174388736, 377.05151370240003, 511.7576293888, 488.4813731328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048841_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Include the coordinates for each object you identify.", "boxes_value": [[71.2174388736, 28.05151370240003, 352.7576293888, 139.4813731328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048841.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Include the coordinates for each object you identify. For your reference, objects involved in this region include a chair, a desk, two people, and two sneakers.", "boxes_value": [[230.2174388736, 377.05151370240003, 511.7576293888, 488.4813731328], [432.1433105408, 377.05151370240003, 455.704284672, 401.0147705088], [486.2703857664, 473.43774412799996, 511.7576293888, 486.4218749952], [172.3884277248, 206.8171386624, 388.479003904, 490.18115235839997], [423.2021484544, 355.30529786880004, 453.9522094592, 400.7176513536], [230.2174388736, 464.5788374016, 262.4714630144, 488.4813731328], [363.2652884992, 431.7488484864, 388.607736064, 465.7307668224]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00048841_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Include the coordinates for each object you identify. For your reference, objects involved in this region include a chair, a desk, two people, and two sneakers.", "boxes_value": [[71.2174388736, 28.05151370240003, 352.7576293888, 139.4813731328], [273.1433105408, 28.05151370240003, 296.704284672, 52.014770508799984], [327.2703857664, 124.43774412799996, 352.7576293888, 137.42187499520003], [13.38842772480001, 0, 229.47900390400002, 141.18115235839997], [264.2021484544, 6.305297868800039, 294.9522094592, 51.71765135359999], [71.2174388736, 115.57883740160003, 103.4714630144, 139.4813731328], [204.26528849919998, 82.7488484864, 229.607736064, 116.73076682240003]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00048842.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Include the coordinates for each object you identify.", "boxes_value": [[349.512695307, 230.16668703439998, 485.1798095586, 277.9096069042]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048842_crop.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Include the coordinates for each object you identify.", "boxes_value": [[34.512695307, 12.166687034399985, 170.17980955860003, 59.90960690420002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048842.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Include the coordinates for each object you identify. For your reference, objects involved in this region include three people, a glasses, and a car.", "boxes_value": [[349.512695307, 230.16668703439998, 485.1798095586, 277.9096069042], [297.2164306662, 242.133972197, 441.3121948182, 605.9249267576], [390.86419677000004, 212.7785034342, 496.544006364, 572.2204589992], [469.4360351682, 230.16668703439998, 485.1798095586, 268.9716796904], [349.512695307, 266.6809081776, 383.1987914802, 277.9096069042], [371.1119995134, 234.1466674872, 429.1070556414, 270.185729955]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048842_crop.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Include the coordinates for each object you identify. For your reference, objects involved in this region include three people, a glasses, and a car.", "boxes_value": [[34.512695307, 12.166687034399985, 170.17980955860003, 59.90960690420002], [0, 24.13397219699999, 126.3121948182, 71], [75.86419677000004, 0, 181.54400636399998, 71], [154.43603516820002, 12.166687034399985, 170.17980955860003, 50.97167969039998], [34.512695307, 48.680908177599974, 68.19879148019999, 59.90960690420002], [56.11199951340001, 16.146667487200006, 114.1070556414, 52.185729955]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048843.jpg", "text": "Help me understand what's happening in the selected bounding box within . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[358.510009728, 80.878967296, 537.7457275392001, 334.8889160192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048843_crop.jpg", "text": "Help me understand what's happening in the selected bounding box within . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[45.510009728, 63.878967296, 224.74572753920006, 317.8889160192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048843.jpg", "text": "Help me understand what's happening in the selected bounding box within . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two lamps, a handbag, and two speakers.", "boxes_value": [[358.510009728, 80.878967296, 537.7457275392001, 334.8889160192], [446.9827881216, 80.878967296, 468.3262939392, 129.4181518336], [358.510009728, 143.0223998976, 374.94970705919997, 178.9367675904], [369.7806396672, 323.3122558464, 398.722290048, 334.8889160192], [408.901855488, 147.3569946112, 428.1605224704, 166.3656005632], [510.17712399360005, 88.6902466048, 537.7457275392001, 114.78753664]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048843_crop.jpg", "text": "Help me understand what's happening in the selected bounding box within . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two lamps, a handbag, and two speakers.", "boxes_value": [[45.510009728, 63.878967296, 224.74572753920006, 317.8889160192], [133.98278812159998, 63.878967296, 155.3262939392, 112.4181518336], [45.510009728, 126.0223998976, 61.949707059199966, 161.9367675904], [56.78063966719998, 306.3122558464, 85.72229004799999, 317.8889160192], [95.90185548800002, 130.3569946112, 115.1605224704, 149.3656005632], [197.17712399360005, 71.6902466048, 224.74572753920006, 97.78753664]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048844.jpg", "text": "I'd like some information about the specific region in the image . Specify the location of each mentioned object.", "boxes_value": [[0.1337280533, 0, 416.2025146712, 348.8065185792]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048844_crop.jpg", "text": "I'd like some information about the specific region in the image . Specify the location of each mentioned object.", "boxes_value": [[0.1337280533, 0, 416.2025146712, 348.8065185792]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048844.jpg", "text": "I'd like some information about the specific region in the image . Specify the location of each mentioned object. For your reference, objects involved in this region include a cabinet, a hanger, a lamp, a blackboard, and a cup.", "boxes_value": [[0.1337280533, 0, 416.2025146712, 348.8065185792], [71.6354980487, 67.3618774528, 328.0917968973, 392.2673950208], [384.6022949156, 6.560363776, 416.2025146712, 32.89385984], [0.1337280533, 0, 84.4008178394, 21.3071288832], [140.0722045967, 282.4002075136, 205.643249479, 348.8065185792], [2.3152465819000003, 254.3040771584, 20.0822753638, 288.3778686464]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048844_crop.jpg", "text": "I'd like some information about the specific region in the image . Specify the location of each mentioned object. For your reference, objects involved in this region include a cabinet, a hanger, a lamp, a blackboard, and a cup.", "boxes_value": [[0.1337280533, 0, 416.2025146712, 348.8065185792], [71.6354980487, 67.3618774528, 328.0917968973, 392.2673950208], [384.6022949156, 6.560363776, 416.2025146712, 32.89385984], [0.1337280533, 0, 84.4008178394, 21.3071288832], [140.0722045967, 282.4002075136, 205.643249479, 348.8065185792], [2.3152465819000003, 254.3040771584, 20.0822753638, 288.3778686464]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048846.jpg", "text": "Fill me in on the details of the rectangular box within the image . Specify the location of each mentioned object.", "boxes_value": [[0.46887205530000003, 62.0076294144, 189.46411134660002, 353.5363769344]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048846_crop.jpg", "text": "Fill me in on the details of the rectangular box within the image . Specify the location of each mentioned object.", "boxes_value": [[0.46887205530000003, 62.0076294144, 189.46411134660002, 353.5363769344]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048846.jpg", "text": "Fill me in on the details of the rectangular box within the image . Specify the location of each mentioned object. For your reference, objects involved in this region include four people, and a car.", "boxes_value": [[0.46887205530000003, 62.0076294144, 189.46411134660002, 353.5363769344], [0.46887205530000003, 97.6979369984, 18.3140869207, 353.5363769344], [0.8568115475000001, 62.0076294144, 59.0476074516, 271.494323712], [111.02062987119999, 146.906249984, 189.46411134660002, 306.9948730368], [30.4541626257, 131.1413574144, 116.54022217810001, 242.6831054848], [13.3723144579, 87.1540527104, 674.6302490346, 453.3110961664]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048846_crop.jpg", "text": "Fill me in on the details of the rectangular box within the image . Specify the location of each mentioned object. For your reference, objects involved in this region include four people, and a car.", "boxes_value": [[0.46887205530000003, 62.0076294144, 189.46411134660002, 353.5363769344], [0.46887205530000003, 97.6979369984, 18.3140869207, 353.5363769344], [0.8568115475000001, 62.0076294144, 59.0476074516, 271.494323712], [111.02062987119999, 146.906249984, 189.46411134660002, 306.9948730368], [30.4541626257, 131.1413574144, 116.54022217810001, 242.6831054848], [13.3723144579, 87.1540527104, 236, 426]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048848.jpg", "text": "What insights can you provide about the area in the selected picture ? Give coordinates for the items you reference.", "boxes_value": [[0.0762939392, 400.643310574, 262.2045287936, 606.6770019323]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048848_crop.jpg", "text": "What insights can you provide about the area in the selected picture ? Give coordinates for the items you reference.", "boxes_value": [[0.0762939392, 51.643310574, 262.2045287936, 257.67700193229996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048848.jpg", "text": "What insights can you provide about the area in the selected picture ? Give coordinates for the items you reference. For your reference, objects involved in this region include two chairs, a desk, a bench, and a blackboard.", "boxes_value": [[0.0762939392, 400.643310574, 262.2045287936, 606.6770019323], [190.5955810304, 487.4217529311, 301.9872436736, 632.6287841822], [175.345581056, 448.9650879285, 262.2045287936, 566.3242187421], [17.064025856, 448.3848877105, 109.0942993408, 606.6770019323], [0.0762939392, 462.99987789249997, 184.4915161088, 570.6301269825], [38.0778198016, 400.643310574, 92.5245971456, 464.16455074860005]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048848_crop.jpg", "text": "What insights can you provide about the area in the selected picture ? Give coordinates for the items you reference. For your reference, objects involved in this region include two chairs, a desk, a bench, and a blackboard.", "boxes_value": [[0.0762939392, 51.643310574, 262.2045287936, 257.67700193229996], [190.5955810304, 138.4217529311, 301.9872436736, 283.6287841822], [175.345581056, 99.9650879285, 262.2045287936, 217.32421874210002], [17.064025856, 99.38488771049998, 109.0942993408, 257.67700193229996], [0.0762939392, 113.99987789249997, 184.4915161088, 221.63012698249997], [38.0778198016, 51.643310574, 92.5245971456, 115.16455074860005]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048849.jpg", "text": "I would like a description of the content within the bbox in . Remember to mention the objects and their corresponding locations.", "boxes_value": [[0, 295.2083129614, 56.1000976384, 608.2659912426]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048849_crop.jpg", "text": "I would like a description of the content within the bbox in . Remember to mention the objects and their corresponding locations.", "boxes_value": [[0, 79.2083129614, 56.1000976384, 392.26599124259997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048849.jpg", "text": "I would like a description of the content within the bbox in . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, and three flags.", "boxes_value": [[0, 295.2083129614, 56.1000976384, 608.2659912426], [27.000427264, 570.0765380709, 53.675720192, 626.2954101835], [27.9019165184, 560.324340807, 45.3576049664, 608.2659912426], [0, 298.66339112820003, 21.5490112512, 333.2144775508], [16.3663940608, 295.2083129614, 41.1279907328, 338.3971557929], [46.3106079232, 299.8151245345, 56.1000976384, 341.8522949516]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048849_crop.jpg", "text": "I would like a description of the content within the bbox in . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, and three flags.", "boxes_value": [[0, 79.2083129614, 56.1000976384, 392.26599124259997], [27.000427264, 354.0765380709, 53.675720192, 410.29541018350005], [27.9019165184, 344.324340807, 45.3576049664, 392.26599124259997], [0, 82.66339112820003, 21.5490112512, 117.21447755079998], [16.3663940608, 79.2083129614, 41.1279907328, 122.39715579289998], [46.3106079232, 83.8151245345, 56.1000976384, 125.85229495160002]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048850.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Please point out the objects and their coordinates.", "boxes_value": [[277.7515259, 351.5749512, 374.57360839999996, 448.77355957500004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048850_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Please point out the objects and their coordinates.", "boxes_value": [[24.75152589999999, 24.574951199999987, 121.57360839999996, 121.77355957500004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048850.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a potted plant, a chair, a desk, a towel, a wine glass, a cup, and two bottles.", "boxes_value": [[277.7515259, 351.5749512, 374.57360839999996, 448.77355957500004], [156.12823484999998, 192.6913452, 339.98809815, 446.37145995], [277.7515259, 390.24694822500004, 374.57360839999996, 438.33740235], [260.09582520000004, 382.7294922, 499.4805908, 574.58691405], [301.02832029999996, 423.73205564999995, 360.77648925, 448.77355957500004], [323.2295532, 404.74023435, 347.94714355, 453.39477539999996], [296.65960695, 370.078979475, 314.68170165, 390.794067375], [326.16448975000003, 355.9700928, 364.15960695, 395.0279541], [319.74645995000003, 351.5749512, 330.6781616, 387.298095675]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6], [7, 8]]}, {"image_path": "objects365_v1_00048850_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a potted plant, a chair, a desk, a towel, a wine glass, a cup, and two bottles.", "boxes_value": [[24.75152589999999, 24.574951199999987, 121.57360839999996, 121.77355957500004], [0, 0, 86.98809814999998, 119.37145994999997], [24.75152589999999, 63.24694822500004, 121.57360839999996, 111.33740234999999], [7.095825200000036, 55.72949219999998, 145, 146], [48.02832029999996, 96.73205564999995, 107.77648925, 121.77355957500004], [70.2295532, 77.74023434999998, 94.94714355000002, 126.39477539999996], [43.65960695000001, 43.07897947499998, 61.68170164999998, 63.794067375], [73.16448975000003, 28.970092799999975, 111.15960695000001, 68.02795409999999], [66.74645995000003, 24.574951199999987, 77.67816160000001, 60.29809567500001]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6], [7, 8]]}, {"image_path": "objects365_v1_00048855.jpg", "text": "In , what elements can be found within the coordinates ? Include the coordinates for each object you identify.", "boxes_value": [[591.660522424, 296.65148928, 768.257202137, 365.6891479552]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048855_crop.jpg", "text": "In , what elements can be found within the coordinates ? Include the coordinates for each object you identify.", "boxes_value": [[44.660522423999964, 17.65148928000002, 221.25720213700004, 86.68914795519999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048855.jpg", "text": "In , what elements can be found within the coordinates ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two pillows, a couch, a desk, and a vase.", "boxes_value": [[591.660522424, 296.65148928, 768.257202137, 365.6891479552], [628.9171142462001, 311.3297119232, 718.7672118972, 420.6248779264], [695.969360382, 327.4222412288, 769.0563964605999, 384.4166870016], [467.99169920450004, 293.22558592, 768.6595459361, 511.6654663168], [591.660522424, 303.5661621248, 768.257202137, 365.6891479552], [658.3708496278, 296.65148928, 673.576171893, 317.2853393408]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048855_crop.jpg", "text": "In , what elements can be found within the coordinates ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two pillows, a couch, a desk, and a vase.", "boxes_value": [[44.660522423999964, 17.65148928000002, 221.25720213700004, 86.68914795519999], [81.91711424620007, 32.32971192320002, 171.7672118972, 103], [148.969360382, 48.422241228799976, 222, 103], [0, 14.225585920000015, 221.65954593610002, 103], [44.660522423999964, 24.566162124799973, 221.25720213700004, 86.68914795519999], [111.3708496278, 17.65148928000002, 126.57617189300004, 38.28533934080002]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048856.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[161.6902466048, 544.148071296, 416.6782836736, 585.1118164224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048856_crop.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[64.6902466048, 11.148071296000012, 319.6782836736, 52.11181642240001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048856.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, a high heels, and two leather shoes.", "boxes_value": [[161.6902466048, 544.148071296, 416.6782836736, 585.1118164224], [137.7077026304, 181.45269772799998, 363.5518798848, 609.0217284864], [148.6710204928, 160.2569580288, 417.63751219200003, 588.5568847872], [161.6902466048, 550.5917968896, 192.9884643328, 585.1118164224], [383.5390624768, 544.148071296, 416.6782836736, 585.1118164224], [223.6580200448, 549.1295165951999, 287.3264160256, 576.9844970496]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048856_crop.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, a high heels, and two leather shoes.", "boxes_value": [[64.6902466048, 11.148071296000012, 319.6782836736, 52.11181642240001], [40.70770263040001, 0, 266.5518798848, 62], [51.67102049280001, 0, 320.63751219200003, 55.55688478720003], [64.6902466048, 17.591796889600005, 95.98846433279999, 52.11181642240001], [286.5390624768, 11.148071296000012, 319.6782836736, 52.11181642240001], [126.6580200448, 16.129516595199902, 190.3264160256, 43.98449704960001]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048857.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[135.1881713664, 199.760864256, 194.6492919552, 458.0541991936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048857_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[15.188171366400013, 64.76086425599999, 74.6492919552, 323.0541991936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048857.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three people, a leather shoes, and a cell phone.", "boxes_value": [[135.1881713664, 199.760864256, 194.6492919552, 458.0541991936], [125.63549806079999, 230.542297344, 146.4114990336, 348.066040064], [135.1881713664, 216.6515503104, 194.6492919552, 457.9135131648], [151.29833986559998, 201.6642456064, 265.973388672, 493.84405519359996], [153.2920531968, 439.983947776, 170.2227172608, 458.0541991936], [141.621826176, 199.760864256, 194.1552123648, 248.7658691584]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048857_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three people, a leather shoes, and a cell phone.", "boxes_value": [[15.188171366400013, 64.76086425599999, 74.6492919552, 323.0541991936], [5.635498060799989, 95.54229734399999, 26.411499033599995, 213.066040064], [15.188171366400013, 81.65155031040001, 74.6492919552, 322.9135131648], [31.298339865599985, 66.66424560639999, 89, 358.84405519359996], [33.2920531968, 304.983947776, 50.22271726080001, 323.0541991936], [21.621826176000013, 64.76086425599999, 74.15521236480001, 113.7658691584]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048858.jpg", "text": "Explain the content within the rectangular region of the image . Please point out the objects and their coordinates.", "boxes_value": [[440.0184326126, 314.1210937344, 495.6184081796, 395.7573852672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048858_crop.jpg", "text": "Explain the content within the rectangular region of the image . Please point out the objects and their coordinates.", "boxes_value": [[14.018432612600009, 21.121093734400006, 69.61840817960001, 102.7573852672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048858.jpg", "text": "Explain the content within the rectangular region of the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include three people, a backpack, and a bicycle.", "boxes_value": [[440.0184326126, 314.1210937344, 495.6184081796, 395.7573852672], [440.0184326126, 316.3038940672, 459.88183591200004, 369.3456420864], [454.2065429844, 330.0554809344, 489.34948731279997, 395.7573852672], [474.94311519900003, 314.1210937344, 493.2785644338, 352.3198852608], [478.3461913802, 343.6450195456, 495.6184081796, 370.8328247296], [426.81750489399997, 353.0485839872, 509.97119139299997, 412.2575683584]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048858_crop.jpg", "text": "Explain the content within the rectangular region of the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include three people, a backpack, and a bicycle.", "boxes_value": [[14.018432612600009, 21.121093734400006, 69.61840817960001, 102.7573852672], [14.018432612600009, 23.30389406720002, 33.88183591200004, 76.3456420864], [28.206542984400016, 37.05548093440001, 63.349487312799965, 102.7573852672], [48.94311519900003, 21.121093734400006, 67.2785644338, 59.31988526079999], [52.346191380200025, 50.64501954560001, 69.61840817960001, 77.83282472960002], [0.817504893999967, 60.048583987200004, 83, 119.25756835840002]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048865.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Give coordinates for the items you reference.", "boxes_value": [[107.9422607579, 192.0855102464, 375.37768551560004, 512.5025634816]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048865_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Give coordinates for the items you reference.", "boxes_value": [[66.9422607579, 81.08551024639999, 334.37768551560004, 401]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048865.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Give coordinates for the items you reference. For your reference, objects involved in this region include a picture, two cabinets, a cup, a gas stove, and a wine glass.", "boxes_value": [[107.9422607579, 192.0855102464, 375.37768551560004, 512.5025634816], [193.8190307649, 192.0855102464, 252.11865232219998, 253.1613769728], [247.49603267810002, 300.0889282048, 375.37768551560004, 512.5025634816], [46.6422729652, 344.1611328, 245.32855227119998, 513.2250976768], [335.024414053, 244.6776733184, 353.41931154310004, 271.287841792], [172.37506104020002, 272.4974975488, 366.9724120851, 319.9927368192], [107.9422607579, 268.8694457856, 189.54998778520002, 320.4375610368]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048865_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Give coordinates for the items you reference. For your reference, objects involved in this region include a picture, two cabinets, a cup, a gas stove, and a wine glass.", "boxes_value": [[66.9422607579, 81.08551024639999, 334.37768551560004, 401], [152.8190307649, 81.08551024639999, 211.11865232219998, 142.1613769728], [206.49603267810002, 189.08892820480003, 334.37768551560004, 401], [5.6422729652, 233.16113280000002, 204.32855227119998, 401], [294.024414053, 133.6776733184, 312.41931154310004, 160.287841792], [131.37506104020002, 161.49749754880003, 325.9724120851, 208.9927368192], [66.9422607579, 157.86944578560002, 148.54998778520002, 209.4375610368]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048866.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each mentioned object.", "boxes_value": [[41.8799438171, 208.885253888, 479.244262684, 511.2941894656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048866_crop.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each mentioned object.", "boxes_value": [[41.8799438171, 75.885253888, 479.244262684, 378.2941894656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048866.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three cabinets, and three desks.", "boxes_value": [[41.8799438171, 208.885253888, 479.244262684, 511.2941894656], [41.8799438171, 208.885253888, 299.1184692472, 294.1300048896], [340.1818237283, 211.4918823424, 549.5767822355, 269.6571655168], [320.70263674980004, 303.2711792128, 547.6644286795, 418.8480224768], [229.6784057838, 310.457275392, 314.7142333795, 371.53936768], [42.2349853849, 300.330139136, 314.02447509449996, 511.2941894656], [284.33001705699996, 368.3005981696, 479.244262684, 510.8497924608]], "boxes_seq": [[0], [0], [1, 2, 5], [3, 4, 6]]}, {"image_path": "objects365_v1_00048866_crop.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three cabinets, and three desks.", "boxes_value": [[41.8799438171, 75.885253888, 479.244262684, 378.2941894656], [41.8799438171, 75.885253888, 299.1184692472, 161.1300048896], [340.1818237283, 78.4918823424, 549.5767822355, 136.65716551679998], [320.70263674980004, 170.2711792128, 547.6644286795, 285.8480224768], [229.6784057838, 177.45727539199999, 314.7142333795, 238.53936768], [42.2349853849, 167.330139136, 314.02447509449996, 378.2941894656], [284.33001705699996, 235.3005981696, 479.244262684, 377.8497924608]], "boxes_seq": [[0], [0], [1, 2, 5], [3, 4, 6]]}, {"image_path": "objects365_v1_00048867.jpg", "text": "Could you please provide a description of the rectangular area in ? Include the coordinates for each object you identify.", "boxes_value": [[56.0044556004, 301.021728512, 615.6357421693, 511.8247070208]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048867_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Include the coordinates for each object you identify.", "boxes_value": [[56.0044556004, 53.02172851199998, 615.6357421693, 263.8247070208]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048867.jpg", "text": "Could you please provide a description of the rectangular area in ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a chair, and three bowls.", "boxes_value": [[56.0044556004, 301.021728512, 615.6357421693, 511.8247070208], [95.6846313338, 130.4003295744, 680.9400634625, 510.8557128704], [56.0044556004, 350.0703735296, 165.3467406926, 511.8247070208], [277.25854493410003, 365.9968871936, 465.890136721, 511.0371093504], [156.20410153150002, 301.021728512, 311.67718508950003, 413.9871215616], [445.6052246143, 306.779968256, 615.6357421693, 414.5047607296]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048867_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a chair, and three bowls.", "boxes_value": [[56.0044556004, 53.02172851199998, 615.6357421693, 263.8247070208], [95.6846313338, 0, 680.9400634625, 262.8557128704], [56.0044556004, 102.07037352959998, 165.3467406926, 263.8247070208], [277.25854493410003, 117.9968871936, 465.890136721, 263.0371093504], [156.20410153150002, 53.02172851199998, 311.67718508950003, 165.98712156160002], [445.6052246143, 58.77996825600002, 615.6357421693, 166.50476072959998]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048868.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Specify the location of each mentioned object.", "boxes_value": [[122.64721676580001, 242.5376586752, 217.6817626984, 272.7310180864]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048868_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Specify the location of each mentioned object.", "boxes_value": [[24.64721676580001, 8.537658675199992, 119.68176269840001, 38.73101808640001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048868.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include five horses.", "boxes_value": [[122.64721676580001, 242.5376586752, 217.6817626984, 272.7310180864], [186.5621948159, 248.6273193472, 220.66931149500002, 279.3717651456], [195.29565430809998, 245.580932608, 217.6817626984, 267.967041024], [149.3701172176, 249.4786376704, 183.14965819510002, 272.7310180864], [128.31567379519998, 242.5376586752, 146.5936889837, 266.0213623296], [122.64721676580001, 248.3218383872, 141.5036010746, 268.9134521344]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048868_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include five horses.", "boxes_value": [[24.64721676580001, 8.537658675199992, 119.68176269840001, 38.73101808640001], [88.5621948159, 14.6273193472, 122.66931149500002, 45.37176514560002], [97.29565430809998, 11.580932608000012, 119.68176269840001, 33.967041024000025], [51.37011721760001, 15.47863767039999, 85.14965819510002, 38.73101808640001], [30.315673795199984, 8.537658675199992, 48.59368898369999, 32.02136232959998], [24.64721676580001, 14.321838387199989, 43.5036010746, 34.91345213440002]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048869.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Include the coordinates for each object you identify.", "boxes_value": [[331.2246093375, 71.637512192, 670.3674316425, 276.4672851456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048869_crop.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Include the coordinates for each object you identify.", "boxes_value": [[85.2246093375, 51.637512192, 424.36743164250004, 256.4672851456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048869.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two desks, a person, a suv, and a car.", "boxes_value": [[331.2246093375, 71.637512192, 670.3674316425, 276.4672851456], [614.9232177795001, 131.2125243904, 727.91369629, 192.5272216576], [603.6777343484999, 121.5735473664, 638.2175293065, 143.5289916928], [331.2246093375, 195.5850829824, 398.339599614, 276.4672851456], [616.187133798, 77.773986816, 670.3674316425, 109.264221184], [479.596801743, 71.637512192, 495.78881839099995, 90.5709838848]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048869_crop.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two desks, a person, a suv, and a car.", "boxes_value": [[85.2246093375, 51.637512192, 424.36743164250004, 256.4672851456], [368.9232177795001, 111.2125243904, 481.91369628999996, 172.5272216576], [357.67773434849994, 101.5735473664, 392.21752930649996, 123.52899169279999], [85.2246093375, 175.5850829824, 152.339599614, 256.4672851456], [370.187133798, 57.773986816000004, 424.36743164250004, 89.264221184], [233.59680174300001, 51.637512192, 249.78881839099995, 70.5709838848]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048872.jpg", "text": "Tell me what you see in the area within the context of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[7.3941650482, 226.8816528384, 269.9722900243, 348.3793945088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048872_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[7.3941650482, 30.881652838399987, 269.9722900243, 152.37939450879998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048872.jpg", "text": "Tell me what you see in the area within the context of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two desks, a person, a trolley, and a machinery vehicle.", "boxes_value": [[7.3941650482, 226.8816528384, 269.9722900243, 348.3793945088], [7.3941650482, 276.93920896, 87.62786863950001, 328.9528198144], [183.3728027285, 255.1500854272, 211.33483884139997, 269.3428955136], [143.8228759739, 244.2157592576, 189.8564452792, 348.3793945088], [82.59191896349999, 248.391662592, 144.0543823623, 275.9748535296], [249.58129882940003, 226.8816528384, 269.9722900243, 268.7965698048]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048872_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two desks, a person, a trolley, and a machinery vehicle.", "boxes_value": [[7.3941650482, 30.881652838399987, 269.9722900243, 152.37939450879998], [7.3941650482, 80.93920895999997, 87.62786863950001, 132.95281981440002], [183.3728027285, 59.1500854272, 211.33483884139997, 73.34289551360001], [143.8228759739, 48.21575925760001, 189.8564452792, 152.37939450879998], [82.59191896349999, 52.39166259199999, 144.0543823623, 79.97485352960001], [249.58129882940003, 30.881652838399987, 269.9722900243, 72.79656980480001]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048874.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Please point out the objects and their coordinates.", "boxes_value": [[109.97436525660001, 116.1755371008, 258.3715210046, 473.6315917824]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048874_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Please point out the objects and their coordinates.", "boxes_value": [[37.97436525660001, 90.1755371008, 186.37152100460003, 447.6315917824]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048874.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, a necklace, and three sandals.", "boxes_value": [[109.97436525660001, 116.1755371008, 258.3715210046, 473.6315917824], [147.4854736197, 116.1755371008, 258.3715210046, 433.7982177792], [191.2793579433, 174.2550659072, 216.13378907179998, 190.0356445184], [109.97436525660001, 449.2686767616, 157.2384033328, 473.6315917824], [185.01208498999998, 416.1351318528, 206.45147705090002, 432.2146606592], [229.3526000993, 408.8262939648, 249.33020018079998, 424.9057617408]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048874_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, a necklace, and three sandals.", "boxes_value": [[37.97436525660001, 90.1755371008, 186.37152100460003, 447.6315917824], [75.4854736197, 90.1755371008, 186.37152100460003, 407.7982177792], [119.2793579433, 148.2550659072, 144.13378907179998, 164.0356445184], [37.97436525660001, 423.2686767616, 85.2384033328, 447.6315917824], [113.01208498999998, 390.1351318528, 134.45147705090002, 406.2146606592], [157.3526000993, 382.8262939648, 177.33020018079998, 398.9057617408]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048875.jpg", "text": "Describe the selected rectangular area in the photo . Provide the coordinates for each element you describe.", "boxes_value": [[284.751831024, 191.288940432, 719.84191896, 268.064941392]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048875_crop.jpg", "text": "Describe the selected rectangular area in the photo . Provide the coordinates for each element you describe.", "boxes_value": [[109.75183102400001, 19.288940432000004, 544.84191896, 96.06494139199998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048875.jpg", "text": "Describe the selected rectangular area in the photo . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two chairs, a person, and three ties.", "boxes_value": [[284.751831024, 191.288940432, 719.84191896, 268.064941392], [358.447509792, 163.888977072, 473.24694823199997, 294.09765624], [284.751831024, 201.376708992, 300.13934328, 228.824646], [284.90905764, 131.15734862399998, 420.779907216, 300.995910624], [334.650512664, 204.510742176, 347.527099584, 234.824401872], [652.439819304, 191.288940432, 687.256835904, 256.01287843200004], [685.917724608, 208.25109864, 719.84191896, 268.064941392]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00048875_crop.jpg", "text": "Describe the selected rectangular area in the photo . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two chairs, a person, and three ties.", "boxes_value": [[109.75183102400001, 19.288940432000004, 544.84191896, 96.06494139199998], [183.447509792, 0, 298.24694823199997, 115], [109.75183102400001, 29.376708992000005, 125.13934327999999, 56.824646], [109.90905764000001, 0, 245.77990721600003, 115], [159.65051266400002, 32.51074217600001, 172.52709958399998, 62.82440187200001], [477.439819304, 19.288940432000004, 512.256835904, 84.01287843200004], [510.91772460799996, 36.25109864000001, 544.84191896, 96.06494139199998]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00048876.jpg", "text": "What details can you provide about the region in the snapshot ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[38.3143310476, 120.6984252928, 314.6351318028, 485.8715209728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048876_crop.jpg", "text": "What details can you provide about the region in the snapshot ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[38.3143310476, 91.6984252928, 314.6351318028, 456.8715209728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048876.jpg", "text": "What details can you provide about the region in the snapshot ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two cabinets, two hats, and two leather shoes.", "boxes_value": [[38.3143310476, 120.6984252928, 314.6351318028, 485.8715209728], [38.3143310476, 123.6401977344, 187.24151610520002, 485.8715209728], [188.982971177, 190.4701538304, 424.9558105702, 499.5858154496], [180.0837402389, 120.6984252928, 268.5061645283, 194.0855102464], [251.6243286076, 300.4144897536, 314.6351318028, 326.452453632], [112.9185790938, 343.6908569088, 128.48632813700002, 377.5444946432], [129.4747314504, 344.6792602624, 145.53668215029998, 377.2974243328]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00048876_crop.jpg", "text": "What details can you provide about the region in the snapshot ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two cabinets, two hats, and two leather shoes.", "boxes_value": [[38.3143310476, 91.6984252928, 314.6351318028, 456.8715209728], [38.3143310476, 94.6401977344, 187.24151610520002, 456.8715209728], [188.982971177, 161.4701538304, 383, 470.5858154496], [180.0837402389, 91.6984252928, 268.5061645283, 165.0855102464], [251.6243286076, 271.4144897536, 314.6351318028, 297.452453632], [112.9185790938, 314.6908569088, 128.48632813700002, 348.5444946432], [129.4747314504, 315.6792602624, 145.53668215029998, 348.2974243328]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00048877.jpg", "text": "What does the selected region in the image encompass? Give coordinates for the items you reference.", "boxes_value": [[284.47937011199997, 180.6031494144, 477.679199232, 303.510070784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048877_crop.jpg", "text": "What does the selected region in the image encompass? Give coordinates for the items you reference.", "boxes_value": [[48.47937011199997, 31.603149414400008, 241.67919923199997, 154.510070784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048877.jpg", "text": "What does the selected region in the image encompass? Give coordinates for the items you reference. For your reference, objects involved in this region include a baseball glove, two people, two helmets, and a hat.", "boxes_value": [[284.47937011199997, 180.6031494144, 477.679199232, 303.510070784], [284.47937011199997, 277.5385132032, 307.395385728, 303.510070784], [286.43139648, 182.8488769536, 374.0718994176, 413.8130492928], [403.5109863168, 207.3400879104, 465.78063966720003, 430.2001342976], [430.15637207040004, 205.1411132928, 466.1867675904, 233.4063110144], [443.20190430720004, 231.5426635776, 477.679199232, 259.186645504], [323.3077392384, 180.6031494144, 362.1335449344, 207.625976576]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048877_crop.jpg", "text": "What does the selected region in the image encompass? Give coordinates for the items you reference. For your reference, objects involved in this region include a baseball glove, two people, two helmets, and a hat.", "boxes_value": [[48.47937011199997, 31.603149414400008, 241.67919923199997, 154.510070784], [48.47937011199997, 128.5385132032, 71.39538572800001, 154.510070784], [50.43139647999999, 33.84887695360001, 138.07189941759998, 185], [167.51098631679997, 58.3400879104, 229.78063966720003, 185], [194.15637207040004, 56.141113292799986, 230.1867675904, 84.40631101439999], [207.20190430720004, 82.5426635776, 241.67919923199997, 110.18664550400001], [87.30773923840002, 31.603149414400008, 126.13354493439999, 58.625976576]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048878.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Include the coordinates for each object you identify.", "boxes_value": [[163.3140869376, 187.256042496, 288.7614746112, 332.422851584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048878_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Include the coordinates for each object you identify.", "boxes_value": [[32.314086937599996, 37.25604249599999, 157.76147461120001, 182.422851584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048878.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Include the coordinates for each object you identify. For your reference, objects involved in this region include three people, a backpack, a desk, and two chairs.", "boxes_value": [[163.3140869376, 187.256042496, 288.7614746112, 332.422851584], [253.66302489600002, 179.7261352448, 298.0195312128, 233.0115356672], [215.35510256639998, 147.7548217856, 248.190490752, 222.3544311296], [172.4799194112, 187.256042496, 288.7614746112, 332.422851584], [199.83569333760002, 170.0417480704, 233.5616454912, 215.6827392512], [163.3140869376, 257.1263427584, 234.197875968, 323.578308096], [187.5740356608, 296.6353759744, 247.8772582656, 370.8014526464], [199.35742187520003, 275.14801024, 277.682251008, 402.685913088]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00048878_crop.jpg", "text": "I'm interested in the selected rectangle in . Can you tell me more about it? Include the coordinates for each object you identify. For your reference, objects involved in this region include three people, a backpack, a desk, and two chairs.", "boxes_value": [[32.314086937599996, 37.25604249599999, 157.76147461120001, 182.422851584], [122.66302489600002, 29.72613524479999, 167.01953121280002, 83.01153566720001], [84.35510256639998, 0, 117.19049075199999, 72.35443112959999], [41.4799194112, 37.25604249599999, 157.76147461120001, 182.422851584], [68.83569333760002, 20.04174807039999, 102.56164549120001, 65.68273925119999], [32.314086937599996, 107.12634275840003, 103.197875968, 173.578308096], [56.57403566080001, 146.6353759744, 116.8772582656, 218], [68.35742187520003, 125.14801024000002, 146.68225100799998, 218]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00048879.jpg", "text": "Please elucidate the area of the image . Please point out the objects and their coordinates.", "boxes_value": [[232.1640625, 315.20391845, 482.08587645, 384.4421997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048879_crop.jpg", "text": "Please elucidate the area of the image . Please point out the objects and their coordinates.", "boxes_value": [[63.1640625, 18.203918450000003, 313.08587645, 87.4421997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048879.jpg", "text": "Please elucidate the area of the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include a suv, three cars, and a van.", "boxes_value": [[232.1640625, 315.20391845, 482.08587645, 384.4421997], [232.1640625, 315.20391845, 353.09393309999996, 384.4421997], [336.49572755, 324.45147705, 425.56329345, 378.9885254], [382.496521, 325.87420655, 451.49768065, 374.00903320000003], [413.32177735, 327.77111815, 482.08587645, 372.34924315], [429.2086792, 321.13183595, 492.7561035, 356.69946289999996]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048879_crop.jpg", "text": "Please elucidate the area of the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include a suv, three cars, and a van.", "boxes_value": [[63.1640625, 18.203918450000003, 313.08587645, 87.4421997], [63.1640625, 18.203918450000003, 184.09393309999996, 87.4421997], [167.49572755000003, 27.451477049999994, 256.56329345, 81.98852540000001], [213.49652099999997, 28.874206549999997, 282.49768065, 77.00903320000003], [244.32177735, 30.771118150000007, 313.08587645, 75.34924315], [260.2086792, 24.13183594999998, 323.7561035, 59.69946289999996]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048881.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[53.752746547200005, 33.9277954048, 446.9329833984, 158.4886474752]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048881_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[53.752746547200005, 31.9277954048, 446.9329833984, 156.4886474752]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048881.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three breads, and three plates.", "boxes_value": [[53.752746547200005, 33.9277954048, 446.9329833984, 158.4886474752], [270.7508544768, 65.119140608, 395.5162353408, 146.2166137856], [343.5306396672, 9.667846656, 487.70397949439996, 72.743652352], [78.0577392384, 33.9277954048, 199.35742187520003, 114.332153344], [329.6058349824, 17.8752441344, 532.0174560767999, 91.3165893632], [230.19122311680002, 71.612854016, 446.9329833984, 158.4886474752], [53.752746547200005, 44.7440185344, 231.0868530432, 131.6198119936]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00048881_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three breads, and three plates.", "boxes_value": [[53.752746547200005, 31.9277954048, 446.9329833984, 156.4886474752], [270.7508544768, 63.119140607999995, 395.5162353408, 144.2166137856], [343.5306396672, 7.667846656, 487.70397949439996, 70.743652352], [78.0577392384, 31.9277954048, 199.35742187520003, 112.332153344], [329.6058349824, 15.875244134399999, 532.0174560767999, 89.3165893632], [230.19122311680002, 69.612854016, 446.9329833984, 156.4886474752], [53.752746547200005, 42.7440185344, 231.0868530432, 129.6198119936]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00048887.jpg", "text": "Please detail the contents of the chosen region in the visual input . Include the coordinates for each object you identify.", "boxes_value": [[0, 31.284973132799998, 354.749206528, 374.9158935552]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048887_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Include the coordinates for each object you identify.", "boxes_value": [[0, 31.284973132799998, 354.749206528, 374.9158935552]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048887.jpg", "text": "Please detail the contents of the chosen region in the visual input . Include the coordinates for each object you identify. For your reference, objects involved in this region include a golf club, a person, three sneakers, and a hat.", "boxes_value": [[0, 31.284973132799998, 354.749206528, 374.9158935552], [333.8591919104, 184.3562621952, 359.643920896, 362.33386229760004], [0, 31.821105945599996, 79.1248779264, 374.4571533312], [318.6511230464, 334.3129883136, 354.749206528, 358.5756835584], [0, 351.6478271232, 21.9315185664, 374.9158935552], [15.9573364224, 320.5189208832, 52.4315185664, 362.3385009408], [8.085144064, 31.284973132799998, 52.7647094784, 70.072692864]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00048887_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Include the coordinates for each object you identify. For your reference, objects involved in this region include a golf club, a person, three sneakers, and a hat.", "boxes_value": [[0, 31.284973132799998, 354.749206528, 374.9158935552], [333.8591919104, 184.3562621952, 359.643920896, 362.33386229760004], [0, 31.821105945599996, 79.1248779264, 374.4571533312], [318.6511230464, 334.3129883136, 354.749206528, 358.5756835584], [0, 351.6478271232, 21.9315185664, 374.9158935552], [15.9573364224, 320.5189208832, 52.4315185664, 362.3385009408], [8.085144064, 31.284973132799998, 52.7647094784, 70.072692864]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00048888.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Remember to mention the objects and their corresponding locations.", "boxes_value": [[70.117980933, 200.4679565312, 283.272827145, 271.57757568]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048888_crop.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Remember to mention the objects and their corresponding locations.", "boxes_value": [[54.117980933, 18.467956531200002, 267.272827145, 89.57757568]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048888.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a handbag, three chairs, and two desks.", "boxes_value": [[70.117980933, 200.4679565312, 283.272827145, 271.57757568], [225.98608401599998, 241.574646016, 243.558227528, 266.9566650368], [172.990600552, 205.3598022656, 216.44085694, 262.8796386816], [68.946533194, 233.45465088, 119.490661598, 259.524780288], [117.36248778100001, 200.4679565312, 148.753051755, 252.6082153472], [208.265136695, 200.8424682496, 283.272827145, 271.57757568], [70.117980933, 219.3569946112, 144.651000956, 234.5484619264]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00048888_crop.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a handbag, three chairs, and two desks.", "boxes_value": [[54.117980933, 18.467956531200002, 267.272827145, 89.57757568], [209.98608401599998, 59.574646016, 227.558227528, 84.95666503680002], [156.990600552, 23.359802265599996, 200.44085694, 80.8796386816], [52.946533194, 51.45465088, 103.490661598, 77.52478028799999], [101.36248778100001, 18.467956531200002, 132.753051755, 70.6082153472], [192.265136695, 18.84246824959999, 267.272827145, 89.57757568], [54.117980933, 37.35699461120001, 128.651000956, 52.548461926399995]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00048892.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Specify the location of each mentioned object.", "boxes_value": [[1.1272583168, 465.35607911920005, 274.983825664, 515.3424072475999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048892_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Specify the location of each mentioned object.", "boxes_value": [[1.1272583168, 13.356079119200047, 274.983825664, 63.34240724759991]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048892.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include three cars, a truck, and a pickup truck.", "boxes_value": [[1.1272583168, 465.35607911920005, 274.983825664, 515.3424072475999], [1.1272583168, 493.2801513352, 65.1616821248, 515.3424072475999], [45.7899169792, 481.71093748759995, 108.20996096, 502.1589355328], [123.930297856, 472.73608398880003, 170.2841186304, 492.3385009384], [232.3198852608, 479.19335940959996, 274.983825664, 497.4121094108], [10.956542976, 465.35607911920005, 41.706176768, 481.8697510116]], "boxes_seq": [[0], [0], [1, 2, 5], [3], [4]]}, {"image_path": "objects365_v1_00048892_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include three cars, a truck, and a pickup truck.", "boxes_value": [[1.1272583168, 13.356079119200047, 274.983825664, 63.34240724759991], [1.1272583168, 41.2801513352, 65.1616821248, 63.34240724759991], [45.7899169792, 29.71093748759995, 108.20996096, 50.1589355328], [123.930297856, 20.736083988800033, 170.2841186304, 40.33850093839999], [232.3198852608, 27.193359409599964, 274.983825664, 45.412109410799985], [10.956542976, 13.356079119200047, 41.706176768, 29.869751011599988]], "boxes_seq": [[0], [0], [1, 2, 5], [3], [4]]}, {"image_path": "objects365_v1_00048895.jpg", "text": "In the submitted image , please give a synopsis of the area . Provide the coordinates for all objects that you mention.", "boxes_value": [[371.2600097648, 255.0057983488, 509.7718505636, 392.6374511616]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048895_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Provide the coordinates for all objects that you mention.", "boxes_value": [[35.260009764799975, 35.0057983488, 173.77185056360003, 172.63745116159998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048895.jpg", "text": "In the submitted image , please give a synopsis of the area . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a flower, a vase, a lamp, a person, and a street lights.", "boxes_value": [[371.2600097648, 255.0057983488, 509.7718505636, 392.6374511616], [371.2600097648, 255.0057983488, 468.48583984159995, 306.63604736], [390.7052001908, 295.2371215872, 445.01757815720003, 332.7864379904], [410.17968747640003, 216.6305542144, 472.69042968440004, 384.6788329984], [481.406860352, 274.3982543872, 509.7718505636, 311.8546753024], [461.94238281839995, 263.0357055488, 489.0766601256, 392.6374511616]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048895_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a flower, a vase, a lamp, a person, and a street lights.", "boxes_value": [[35.260009764799975, 35.0057983488, 173.77185056360003, 172.63745116159998], [35.260009764799975, 35.0057983488, 132.48583984159995, 86.63604736000002], [54.705200190799985, 75.23712158720002, 109.01757815720003, 112.78643799039997], [74.17968747640003, 0, 136.69042968440004, 164.67883299840003], [145.40686035200002, 54.398254387199984, 173.77185056360003, 91.85467530239998], [125.94238281839995, 43.035705548800024, 153.07666012559997, 172.63745116159998]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048897.jpg", "text": "Please elucidate the area of the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[427.551635706, 102.3588867072, 468.92797848000004, 292.0872192512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048897_crop.jpg", "text": "Please elucidate the area of the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[10.551635706000013, 48.3588867072, 51.927978480000036, 238.0872192512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048897.jpg", "text": "Please elucidate the area of the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a cabinet, a person, two boots, a bottle, and a tea pot.", "boxes_value": [[427.551635706, 102.3588867072, 468.92797848000004, 292.0872192512], [397.31555175, 120.2034912256, 490.66918945199996, 222.8924560384], [427.551635706, 115.7918090752, 468.92797848000004, 291.6409301504], [426.767334006, 267.2139282432, 451.83496090200003, 285.6745605632], [443.67333987599994, 271.8776855552, 465.82617190200006, 292.0872192512], [454.306030308, 95.8707885568, 465.87182616, 123.5158691328], [430.08386233799996, 102.3588867072, 451.48510742400003, 119.6679077376]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048897_crop.jpg", "text": "Please elucidate the area of the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a cabinet, a person, two boots, a bottle, and a tea pot.", "boxes_value": [[10.551635706000013, 48.3588867072, 51.927978480000036, 238.0872192512], [0, 66.2034912256, 62, 168.8924560384], [10.551635706000013, 61.79180907520001, 51.927978480000036, 237.64093015039998], [9.767334005999999, 213.21392824319997, 34.834960902000034, 231.67456056319998], [26.673339875999943, 217.8776855552, 48.826171902000056, 238.0872192512], [37.306030308000004, 41.870788556799994, 48.87182616000001, 69.5158691328], [13.08386233799996, 48.3588867072, 34.485107424000034, 65.6679077376]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048898.jpg", "text": "Describe the selected rectangular area in the photo . Include the coordinates for each object you identify.", "boxes_value": [[137.02288819380001, 28.756286596800003, 267.7119750914, 176.2347412224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048898_crop.jpg", "text": "Describe the selected rectangular area in the photo . Include the coordinates for each object you identify.", "boxes_value": [[33.022888193800014, 28.756286596800003, 163.7119750914, 176.2347412224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048898.jpg", "text": "Describe the selected rectangular area in the photo . Include the coordinates for each object you identify. For your reference, objects involved in this region include three lamps, and two moniters.", "boxes_value": [[137.02288819380001, 28.756286596800003, 267.7119750914, 176.2347412224], [239.9900512522, 28.756286596800003, 267.7119750914, 107.3017578024], [179.26586913039998, 85.5202636512, 199.72729491939998, 142.28417967119998], [137.02288819380001, 121.1627197368, 156.824279802, 166.7058715728], [189.79534913900002, 113.32196043840001, 230.70159915339997, 171.0567016704], [167.7888183856, 129.63269042640002, 203.7759399252, 176.2347412224]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048898_crop.jpg", "text": "Describe the selected rectangular area in the photo . Include the coordinates for each object you identify. For your reference, objects involved in this region include three lamps, and two moniters.", "boxes_value": [[33.022888193800014, 28.756286596800003, 163.7119750914, 176.2347412224], [135.9900512522, 28.756286596800003, 163.7119750914, 107.3017578024], [75.26586913039998, 85.5202636512, 95.72729491939998, 142.28417967119998], [33.022888193800014, 121.1627197368, 52.82427980200001, 166.7058715728], [85.79534913900002, 113.32196043840001, 126.70159915339997, 171.0567016704], [63.788818385599996, 129.63269042640002, 99.77593992519999, 176.2347412224]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00048899.jpg", "text": "Detail the chosen region in the depicted scene . Remember to mention the objects and their corresponding locations.", "boxes_value": [[253.19580077679998, 345.6807250944, 618.8709716432, 512.0120849408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048899_crop.jpg", "text": "Detail the chosen region in the depicted scene . Remember to mention the objects and their corresponding locations.", "boxes_value": [[92.19580077679998, 41.680725094399975, 457.87097164320005, 208]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048899.jpg", "text": "Detail the chosen region in the depicted scene . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a backpack, two handbags, a van, and a car.", "boxes_value": [[253.19580077679998, 345.6807250944, 618.8709716432, 512.0120849408], [253.19580077679998, 441.0888671744, 311.4541015624, 512.0120849408], [451.73974611279993, 407.798034688, 465.8830566752, 438.7613525504], [595.2445068024, 406.0332031488, 618.8709716432, 434.4989013504], [577.9134521564, 340.5351562752, 595.2325439376, 359.470703104], [540.5699462747999, 345.6807250944, 559.232055702, 368.0753174016]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048899_crop.jpg", "text": "Detail the chosen region in the depicted scene . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a backpack, two handbags, a van, and a car.", "boxes_value": [[92.19580077679998, 41.680725094399975, 457.87097164320005, 208], [92.19580077679998, 137.08886717439998, 150.4541015624, 208], [290.73974611279993, 103.79803468799997, 304.8830566752, 134.76135255039998], [434.2445068024, 102.0332031488, 457.87097164320005, 130.4989013504], [416.9134521564, 36.535156275199995, 434.2325439376, 55.470703103999995], [379.5699462747999, 41.680725094399975, 398.232055702, 64.07531740159999]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00048901.jpg", "text": "In , what elements can be found within the coordinates ? Include the coordinates for each object you identify.", "boxes_value": [[104.4323730432, 389.8160400274, 512.5677490176, 672.6151123111999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048901_crop.jpg", "text": "In , what elements can be found within the coordinates ? Include the coordinates for each object you identify.", "boxes_value": [[102.4323730432, 70.81604002739999, 510, 353.61511231119994]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048901.jpg", "text": "In , what elements can be found within the coordinates ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two pillows, and three chairs.", "boxes_value": [[104.4323730432, 389.8160400274, 512.5677490176, 672.6151123111999], [96.7655029248, 463.25231934749996, 146.6000976384, 549.1210937194], [104.4323730432, 436.418334976, 143.5333252096, 496.98657226160003], [410.1428222464, 396.0236816167, 472.8392944128, 508.3806152626], [445.5260009984, 389.8160400274, 482.150634752, 483.55029294130003], [485.6091308544, 476.10131838579997, 512.5677490176, 672.6151123111999]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048901_crop.jpg", "text": "In , what elements can be found within the coordinates ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two pillows, and three chairs.", "boxes_value": [[102.4323730432, 70.81604002739999, 510, 353.61511231119994], [94.7655029248, 144.25231934749996, 144.6000976384, 230.12109371940005], [102.4323730432, 117.41833497599998, 141.5333252096, 177.98657226160003], [408.1428222464, 77.0236816167, 470.8392944128, 189.38061526259997], [443.5260009984, 70.81604002739999, 480.150634752, 164.55029294130003], [483.6091308544, 157.10131838579997, 510, 353.61511231119994]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048902.jpg", "text": "In the image , elaborate on the details found within the section . Please mention the objects and their locations.", "boxes_value": [[104.9401244973, 203.141723648, 177.584716798, 488.53137208320004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048902_crop.jpg", "text": "In the image , elaborate on the details found within the section . Please mention the objects and their locations.", "boxes_value": [[18.940124497300005, 72.14172364800001, 91.58471679799999, 357.53137208320004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048902.jpg", "text": "In the image , elaborate on the details found within the section . Please mention the objects and their locations. For your reference, objects involved in this region include two people, and four sneakers.", "boxes_value": [[104.9401244973, 203.141723648, 177.584716798, 488.53137208320004], [104.9401244973, 203.141723648, 177.584716798, 488.53137208320004], [135.4249267762, 153.8471679488, 201.58343505279998, 447.0201416192], [125.16790768370001, 469.3597412352, 173.4832763904, 488.133850112], [153.4590454401, 447.7951049728, 176.8206787114, 466.5357665792], [154.9993285991, 416.9885254144, 189.913452129, 445.741332992], [168.4938354452, 404.4633178624, 190.1841430602, 419.5522461184]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048902_crop.jpg", "text": "In the image , elaborate on the details found within the section . Please mention the objects and their locations. For your reference, objects involved in this region include two people, and four sneakers.", "boxes_value": [[18.940124497300005, 72.14172364800001, 91.58471679799999, 357.53137208320004], [18.940124497300005, 72.14172364800001, 91.58471679799999, 357.53137208320004], [49.42492677620001, 22.847167948800006, 109, 316.0201416192], [39.16790768370001, 338.3597412352, 87.4832763904, 357.133850112], [67.4590454401, 316.7951049728, 90.8206787114, 335.5357665792], [68.99932859910001, 285.9885254144, 103.913452129, 314.741332992], [82.4938354452, 273.4633178624, 104.1841430602, 288.5522461184]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048903.jpg", "text": "Please provide information about the area within the bounding box in the picture . Specify the location of each mentioned object.", "boxes_value": [[0.18267819999999999, 440.6859131068, 248.03741455, 667.0455322214001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048903_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Specify the location of each mentioned object.", "boxes_value": [[0.18267819999999999, 56.6859131068, 248.03741455, 283]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048903.jpg", "text": "Please provide information about the area within the bounding box in the picture . Specify the location of each mentioned object. For your reference, objects involved in this region include a person, a handbag, a backpack, and two lions.", "boxes_value": [[0.18267819999999999, 440.6859131068, 248.03741455, 667.0455322214001], [168.1967163, 574.6331787362, 200.92614745, 667.0455322214001], [231.15740965, 630.8398437574, 248.03741455, 651.9398193395], [41.7145996, 592.2744140791, 95.77807615, 666.9168701225], [0.18267819999999999, 440.6859131068, 83.24780275, 516.1127929661], [17.3685913, 487.9470214516, 87.5442505, 525.6605224451]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048903_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Specify the location of each mentioned object. For your reference, objects involved in this region include a person, a handbag, a backpack, and two lions.", "boxes_value": [[0.18267819999999999, 56.6859131068, 248.03741455, 283], [168.1967163, 190.63317873619997, 200.92614745, 283], [231.15740965, 246.8398437574, 248.03741455, 267.9398193395], [41.7145996, 208.27441407909998, 95.77807615, 282.9168701225], [0.18267819999999999, 56.6859131068, 83.24780275, 132.11279296609996], [17.3685913, 103.9470214516, 87.5442505, 141.6605224451]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048904.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Please mention the objects and their locations.", "boxes_value": [[413.15930173440006, 68.2719116288, 703.448364288, 511.8867187712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048904_crop.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Please mention the objects and their locations.", "boxes_value": [[73.15930173440006, 68.2719116288, 363.448364288, 511.8867187712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048904.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Please mention the objects and their locations. For your reference, objects involved in this region include two flags, three hats, and two leather shoes.", "boxes_value": [[413.15930173440006, 68.2719116288, 703.448364288, 511.8867187712], [673.9572754176, 432.6654663168, 703.448364288, 511.8867187712], [500.19091799040007, 362.944152832, 553.2254638848, 423.9089965568], [488.662719744, 68.2719116288, 530.7191162112, 85.1769409024], [586.3820800512, 50.5422363136, 642.8696289024, 86.4138793984], [556.6263427583999, 381.6755981312, 578.4093017856, 401.2051391488], [413.15930173440006, 344.118835456, 438.32238766079996, 364.3994750976], [423.9072265728, 58.7485961728, 459.580200192, 76.2989501952]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 7], [5, 6]]}, {"image_path": "objects365_v1_00048904_crop.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Please mention the objects and their locations. For your reference, objects involved in this region include two flags, three hats, and two leather shoes.", "boxes_value": [[73.15930173440006, 68.2719116288, 363.448364288, 511.8867187712], [333.95727541760004, 432.6654663168, 363.448364288, 511.8867187712], [160.19091799040007, 362.944152832, 213.22546388479998, 423.9089965568], [148.66271974400001, 68.2719116288, 190.7191162112, 85.1769409024], [246.38208005119998, 50.5422363136, 302.8696289024, 86.4138793984], [216.62634275839991, 381.6755981312, 238.40930178559995, 401.2051391488], [73.15930173440006, 344.118835456, 98.32238766079996, 364.3994750976], [83.90722657280003, 58.7485961728, 119.580200192, 76.2989501952]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 7], [5, 6]]}, {"image_path": "objects365_v1_00048905.jpg", "text": "Please, can you help me understand what's inside the region in image ? Give coordinates for the items you reference.", "boxes_value": [[0, 80.6384887808, 120.55029295950001, 261.4144897536]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048905_crop.jpg", "text": "Please, can you help me understand what's inside the region in image ? Give coordinates for the items you reference.", "boxes_value": [[0, 45.6384887808, 120.55029295950001, 226.41448975359998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048905.jpg", "text": "Please, can you help me understand what's inside the region in image ? Give coordinates for the items you reference. For your reference, objects involved in this region include five people.", "boxes_value": [[0, 80.6384887808, 120.55029295950001, 261.4144897536], [66.511840842, 80.6384887808, 120.55029295950001, 181.6668701184], [72.35437014, 162.1038818304, 121.8820800825, 323.8271484416], [7.004150356499999, 172.2224121344, 74.60241696450001, 336.9931030528], [0, 132.7901001216, 44.5587157845, 261.4144897536], [0, 88.529296896, 8.378906229, 159.3465575936]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048905_crop.jpg", "text": "Please, can you help me understand what's inside the region in image ? Give coordinates for the items you reference. For your reference, objects involved in this region include five people.", "boxes_value": [[0, 45.6384887808, 120.55029295950001, 226.41448975359998], [66.511840842, 45.6384887808, 120.55029295950001, 146.6668701184], [72.35437014, 127.10388183040001, 121.8820800825, 271], [7.004150356499999, 137.2224121344, 74.60241696450001, 271], [0, 97.7901001216, 44.5587157845, 226.41448975359998], [0, 53.529296896000005, 8.378906229, 124.34655759360001]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048907.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[290.9839477847, 322.9107055616, 458.80273437439996, 468.3438720512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048907_crop.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[41.98394778469998, 36.910705561600025, 209.80273437439996, 182.34387205119998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048907.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a desk, three sneakers, and a handbag.", "boxes_value": [[290.9839477847, 322.9107055616, 458.80273437439996, 468.3438720512], [0.2728882105, 234.0223388672, 494.07177735169995, 475.6276855296], [337.3555297728, 331.635620096, 372.69152833289996, 352.5754394624], [356.1141357691, 322.9107055616, 379.2352294857, 341.6693115392], [347.8560790721, 339.1807250944, 458.80273437439996, 403.6162719744], [290.9839477847, 429.807678208, 315.5070190727, 468.3438720512]], "boxes_seq": [[0], [0], [1], [2, 3, 5], [4]]}, {"image_path": "objects365_v1_00048907_crop.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a desk, three sneakers, and a handbag.", "boxes_value": [[41.98394778469998, 36.910705561600025, 209.80273437439996, 182.34387205119998], [0, 0, 245.07177735169995, 189.6276855296], [88.35552977280003, 45.635620096000025, 123.69152833289996, 66.57543946240003], [107.11413576910002, 36.910705561600025, 130.23522948570002, 55.66931153920001], [98.85607907209999, 53.180725094399975, 209.80273437439996, 117.61627197439998], [41.98394778469998, 143.80767820800003, 66.50701907270002, 182.34387205119998]], "boxes_seq": [[0], [0], [1], [2, 3, 5], [4]]}, {"image_path": "objects365_v1_00048908.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[335.43420412390003, 185.8649291776, 424.881835933, 378.0664062464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048908_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[22.43420412390003, 48.864929177600004, 111.88183593299999, 241.0664062464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048908.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include five storage boxes.", "boxes_value": [[335.43420412390003, 185.8649291776, 424.881835933, 378.0664062464], [338.3911742838, 185.8649291776, 358.35046388660004, 264.963256832], [354.65429689649994, 199.1712036352, 458.14746096429997, 280.487182592], [337.65191653, 263.4847412224, 424.881835933, 340.3653564416], [335.43420412390003, 321.1452026368, 356.87207029449996, 378.0664062464], [356.87207029449996, 324.8413696512, 421.92480471289997, 394.3295898624]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048908_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include five storage boxes.", "boxes_value": [[22.43420412390003, 48.864929177600004, 111.88183593299999, 241.0664062464], [25.391174283800012, 48.864929177600004, 45.35046388660004, 127.96325683200001], [41.65429689649994, 62.17120363519999, 134, 143.487182592], [24.651916529999994, 126.48474122239998, 111.88183593299999, 203.3653564416], [22.43420412390003, 184.14520263679998, 43.87207029449996, 241.0664062464], [43.87207029449996, 187.8413696512, 108.92480471289997, 257.3295898624]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048910.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[441.9259033356, 105.4028320256, 774.1877441490001, 509.9708862464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048910_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[83.92590333560003, 101.4028320256, 416, 505.9708862464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048910.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two chairs, three people, and a bottle.", "boxes_value": [[441.9259033356, 105.4028320256, 774.1877441490001, 509.9708862464], [379.1621093562, 359.0827636736, 547.1148681378, 509.9708862464], [538.1334228348, 406.6843872256, 705.1881103452, 509.9708862464], [388.842041052, 200.2111816192, 773.8364257812, 487.81665039360007], [661.1417236116, 118.6504516608, 774.1877441490001, 350.9245605376], [441.9259033356, 105.4028320256, 538.8225097715999, 250.7476806656], [567.6411133062, 219.8626708992, 586.4324951376001, 288.5688476672]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00048910_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two chairs, three people, and a bottle.", "boxes_value": [[83.92590333560003, 101.4028320256, 416, 505.9708862464], [21.162109356200006, 355.0827636736, 189.11486813780004, 505.9708862464], [180.13342283479994, 402.6843872256, 347.1881103452, 505.9708862464], [30.842041052000013, 196.2111816192, 415.8364257812, 483.81665039360007], [303.1417236116, 114.6504516608, 416, 346.9245605376], [83.92590333560003, 101.4028320256, 180.82250977159993, 246.7476806656], [209.64111330620005, 215.8626708992, 228.43249513760009, 284.5688476672]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00048911.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Give coordinates for the items you reference.", "boxes_value": [[439.8347168058, 181.0673217536, 546.8516845392, 261.6517334016]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048911_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Give coordinates for the items you reference.", "boxes_value": [[26.834716805799985, 21.067321753599998, 133.85168453920005, 101.65173340159998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048911.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Give coordinates for the items you reference. For your reference, objects involved in this region include two potted plants, a picture, two cups, and a bottle.", "boxes_value": [[439.8347168058, 181.0673217536, 546.8516845392, 261.6517334016], [465.8668212612, 225.6261596672, 487.625244111, 263.2371215872], [433.2292480404, 223.4503173632, 459.33935544660005, 261.682983424], [472.5166015302, 193.3955078144, 537.825195339, 254.9290161152], [439.8347168058, 239.2962646528, 459.34558104599995, 260.798034688], [467.1101074224, 240.2917480448, 486.0235595562, 261.6517334016], [532.9002685626, 181.0673217536, 546.8516845392, 254.5442504704]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048911_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Give coordinates for the items you reference. For your reference, objects involved in this region include two potted plants, a picture, two cups, and a bottle.", "boxes_value": [[26.834716805799985, 21.067321753599998, 133.85168453920005, 101.65173340159998], [52.866821261200016, 65.6261596672, 74.62524411099997, 103.23712158720002], [20.229248040400023, 63.450317363200014, 46.33935544660005, 101.68298342399999], [59.51660153019998, 33.39550781439999, 124.82519533899995, 94.9290161152], [26.834716805799985, 79.2962646528, 46.34558104599995, 100.79803468799997], [54.110107422400006, 80.29174804479999, 73.02355955619998, 101.65173340159998], [119.90026856259999, 21.067321753599998, 133.85168453920005, 94.54425047039999]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048913.jpg", "text": "What does the area look like in the context of the image ? Include the coordinates for each object you identify.", "boxes_value": [[52.875976559200005, 163.1263427584, 258.0137328889, 324.4526367232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048913_crop.jpg", "text": "What does the area look like in the context of the image ? Include the coordinates for each object you identify.", "boxes_value": [[51.875976559200005, 41.1263427584, 257.0137328889, 202.4526367232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048913.jpg", "text": "What does the area look like in the context of the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a chair, two pillows, a lamp, and a tea pot.", "boxes_value": [[52.875976559200005, 163.1263427584, 258.0137328889, 324.4526367232], [124.05792237920001, 233.9193115136, 241.1386108582, 342.8315429888], [151.2860107277, 246.1719360512, 200.2965087631, 293.1403198464], [98.1912841579, 163.1263427584, 173.7491454782, 324.4526367232], [52.875976559200005, 249.1864624128, 70.3797607491, 296.4465942528], [244.42773436800002, 181.130798336, 258.0137328889, 210.3929443328]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5]]}, {"image_path": "objects365_v1_00048913_crop.jpg", "text": "What does the area look like in the context of the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a chair, two pillows, a lamp, and a tea pot.", "boxes_value": [[51.875976559200005, 41.1263427584, 257.0137328889, 202.4526367232], [123.05792237920001, 111.91931151360001, 240.1386108582, 220.83154298879998], [150.2860107277, 124.17193605119999, 199.2965087631, 171.14031984640002], [97.1912841579, 41.1263427584, 172.7491454782, 202.4526367232], [51.875976559200005, 127.18646241280001, 69.3797607491, 174.44659425280003], [243.42773436800002, 59.130798336, 257.0137328889, 88.3929443328]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5]]}, {"image_path": "objects365_v1_00048915.jpg", "text": "Please share details about the rectangular region within the image . Specify the location of each mentioned object.", "boxes_value": [[577.1511230593001, 183.933959936, 774.5545654584, 294.3336792064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048915_crop.jpg", "text": "Please share details about the rectangular region within the image . Specify the location of each mentioned object.", "boxes_value": [[50.15112305930006, 27.933959936000008, 246, 138.3336792064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048915.jpg", "text": "Please share details about the rectangular region within the image . Specify the location of each mentioned object. For your reference, objects involved in this region include two drums, a cymbal, and two people.", "boxes_value": [[577.1511230593001, 183.933959936, 774.5545654584, 294.3336792064], [662.6926269764, 217.5656738304, 737.2673340022, 290.678039552], [737.8380126926, 183.933959936, 774.5545654584, 294.3336792064], [577.1511230593001, 205.8619384832, 633.4477539209, 273.8621826048], [46.8764648156, 0.5943603712, 773.2558593668999, 511.165344256], [624.8002929754, 160.5285644288, 772.3044433915001, 331.2355346432]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048915_crop.jpg", "text": "Please share details about the rectangular region within the image . Specify the location of each mentioned object. For your reference, objects involved in this region include two drums, a cymbal, and two people.", "boxes_value": [[50.15112305930006, 27.933959936000008, 246, 138.3336792064], [135.6926269764, 61.56567383039999, 210.26733400219996, 134.67803955199997], [210.83801269260005, 27.933959936000008, 246, 138.3336792064], [50.15112305930006, 49.86193848319999, 106.44775392090003, 117.86218260480001], [0, 0, 246, 165], [97.8002929754, 4.528564428799996, 245.30444339150006, 165]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048916.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Include the coordinates for each object you identify.", "boxes_value": [[397.67578123280003, 0.2339477504, 680.7819824295, 474.1633300992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048916_crop.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Include the coordinates for each object you identify.", "boxes_value": [[71.67578123280003, 0.2339477504, 354.7819824295, 474.1633300992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048916.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a chair, a cabinet, a picture, a book, and a bakset.", "boxes_value": [[397.67578123280003, 0.2339477504, 680.7819824295, 474.1633300992], [261.0808715635, 382.230834944, 668.7084960827, 511.2581176832], [307.8826294223, 142.6686401536, 537.3240967006, 389.6201171968], [499.90270994950004, 172.7777099776, 680.7819824295, 442.1558227456], [576.8044433675, 0.2339477504, 656.6789550487, 23.1350708224], [397.67578123280003, 384.8760375808, 553.1662597763, 474.1633300992], [369.5035400481, 361.7958984192, 644.460937501, 512.2612304896]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048916_crop.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a chair, a cabinet, a picture, a book, and a bakset.", "boxes_value": [[71.67578123280003, 0.2339477504, 354.7819824295, 474.1633300992], [0, 382.230834944, 342.7084960827, 511.2581176832], [0, 142.6686401536, 211.32409670059997, 389.6201171968], [173.90270994950004, 172.7777099776, 354.7819824295, 442.1558227456], [250.80444336749997, 0.2339477504, 330.6789550487, 23.1350708224], [71.67578123280003, 384.8760375808, 227.16625977629997, 474.1633300992], [43.50354004809998, 361.7958984192, 318.460937501, 512]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048917.jpg", "text": "Can you elaborate on the content of the bounding box in ? Please mention the objects and their locations.", "boxes_value": [[110.91229248959999, 201.5006713856, 408.7152099576, 512.4318847488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048917_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Please mention the objects and their locations.", "boxes_value": [[74.91229248959999, 78.50067138559999, 372.7152099576, 389]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048917.jpg", "text": "Can you elaborate on the content of the bounding box in ? Please mention the objects and their locations. For your reference, objects involved in this region include two people, a gloves, a sneakers, and two hockey sticks.", "boxes_value": [[110.91229248959999, 201.5006713856, 408.7152099576, 512.4318847488], [15.0220946952, 5.5617675776, 281.2447509552, 510.0755004928], [246.3303222576, 115.5422363136, 613.8048095712, 495.236816384], [170.2525635, 268.7761230336, 231.2666625744, 338.3837280256], [218.376342792, 447.8237304832, 261.34399411920003, 491.3485718016], [237.3576049944, 238.8124389888, 455.0093994168, 500.6855468544], [110.91229248959999, 201.5006713856, 408.7152099576, 512.4318847488]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048917_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Please mention the objects and their locations. For your reference, objects involved in this region include two people, a gloves, a sneakers, and two hockey sticks.", "boxes_value": [[74.91229248959999, 78.50067138559999, 372.7152099576, 389], [0, 0, 245.2447509552, 387.0755004928], [210.3303222576, 0, 447, 372.236816384], [134.2525635, 145.77612303360002, 195.2666625744, 215.38372802560002], [182.376342792, 324.8237304832, 225.34399411920003, 368.3485718016], [201.3576049944, 115.81243898880001, 419.0093994168, 377.6855468544], [74.91229248959999, 78.50067138559999, 372.7152099576, 389]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048919.jpg", "text": "In the provided image , please explain the content within the region . Remember to mention the objects and their corresponding locations.", "boxes_value": [[520.0404052512, 239.1841430528, 735.3043213296, 345.7946166784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048919_crop.jpg", "text": "In the provided image , please explain the content within the region . Remember to mention the objects and their corresponding locations.", "boxes_value": [[54.04040525120001, 27.18414305280001, 269.30432132960004, 133.79461667840002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048919.jpg", "text": "In the provided image , please explain the content within the region . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, a glasses, a laptop, a chair, and a desk.", "boxes_value": [[520.0404052512, 239.1841430528, 735.3043213296, 345.7946166784], [636.3177490511999, 259.7617187328, 732.6209716367999, 367.5884399616], [651.9566650416, 239.1841430528, 730.9747314719999, 326.43322752], [509.55957028800003, 253.176879872, 640.4332275744, 409.56677248], [666.2247314880001, 284.4345702912, 690.5074463136, 297.1760253952], [595.8696289344, 270.3380737536, 646.2369384816001, 296.6234741248], [727.2694091712, 288.5863647232, 735.3043213296, 332.4133300736], [520.0404052512, 286.4024047616, 660.8220215088, 345.7946166784]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00048919_crop.jpg", "text": "In the provided image , please explain the content within the region . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, a glasses, a laptop, a chair, and a desk.", "boxes_value": [[54.04040525120001, 27.18414305280001, 269.30432132960004, 133.79461667840002], [170.31774905119994, 47.76171873279998, 266.62097163679994, 155.5884399616], [185.9566650416, 27.18414305280001, 264.9747314719999, 114.43322752], [43.55957028800003, 41.176879872, 174.4332275744, 160], [200.2247314880001, 72.43457029119998, 224.50744631359998, 85.17602539519999], [129.8696289344, 58.338073753599986, 180.23693848160008, 84.6234741248], [261.2694091712, 76.5863647232, 269.30432132960004, 120.41333007359998], [54.04040525120001, 74.4024047616, 194.82202150880005, 133.79461667840002]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00048921.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each object you identify.", "boxes_value": [[204.89947506160001, 1.832458496, 352.32629396560003, 369.1771850752]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048921_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each object you identify.", "boxes_value": [[36.899475061600015, 1.832458496, 184.32629396560003, 369.1771850752]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048921.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include four people, two pillows, two leather shoes, and two chairs.", "boxes_value": [[204.89947506160001, 1.832458496, 352.32629396560003, 369.1771850752], [204.89947506160001, 1.832458496, 352.32629396560003, 369.1771850752], [209.380615222, 95.1561889792, 566.2351074572, 450.9227905024], [251.34381106560002, 0, 295.9587402136, 19.1156005888], [223.7963256992, 170.626708992, 257.3323974892, 218.5353393664], [290.6788330376, 95.1845703168, 352.5107422128, 164.9172363264], [275.9078368892, 273.8101196288, 334.991699198, 338.0465698304], [222.32019044359998, 336.6725464064, 250.8316040424, 367.588500992], [327.7779541012, 335.2985229312, 352.1672363352, 365.1839599616], [204.89947506160001, 1.832458496, 352.32629396560003, 369.1771850752], [209.380615222, 95.1561889792, 566.2351074572, 450.9227905024]], "boxes_seq": [[0], [0], [1, 2, 9, 10], [3, 5], [4, 6], [7, 8]]}, {"image_path": "objects365_v1_00048921_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include four people, two pillows, two leather shoes, and two chairs.", "boxes_value": [[36.899475061600015, 1.832458496, 184.32629396560003, 369.1771850752], [36.899475061600015, 1.832458496, 184.32629396560003, 369.1771850752], [41.38061522199999, 95.1561889792, 221, 450.9227905024], [83.34381106560002, 0, 127.95874021359998, 19.1156005888], [55.7963256992, 170.626708992, 89.33239748919999, 218.5353393664], [122.67883303759999, 95.1845703168, 184.51074221279998, 164.9172363264], [107.90783688919998, 273.8101196288, 166.991699198, 338.0465698304], [54.320190443599984, 336.6725464064, 82.83160404239999, 367.588500992], [159.7779541012, 335.2985229312, 184.16723633520002, 365.1839599616], [36.899475061600015, 1.832458496, 184.32629396560003, 369.1771850752], [41.38061522199999, 95.1561889792, 221, 450.9227905024]], "boxes_seq": [[0], [0], [1, 2, 9, 10], [3, 5], [4, 6], [7, 8]]}, {"image_path": "objects365_v1_00048922.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Specify the location of each mentioned object.", "boxes_value": [[145.2730713088, 42.3779906955, 369.9039306752, 422.90539553080004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048922_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Specify the location of each mentioned object.", "boxes_value": [[56.27307130880001, 42.3779906955, 280.9039306752, 422.90539553080004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048922.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Specify the location of each mentioned object. For your reference, objects involved in this region include a bench, two people, a handbag, a belt, and a cabinet.", "boxes_value": [[145.2730713088, 42.3779906955, 369.9039306752, 422.90539553080004], [139.301696768, 269.55035398399997, 179.9302368256, 327.5057983331], [146.13488768, 58.2647094811, 298.980163584, 656.5447997789], [134.0103759872, 173.49963376230002, 183.4971923968, 263.72760010630003], [145.2730713088, 250.0511474559, 182.3941040128, 273.1192627066], [181.1425170944, 329.7054443647, 272.0516967936, 367.8491211006], [309.860168448, 42.3779906955, 369.9039306752, 422.90539553080004]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048922_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Specify the location of each mentioned object. For your reference, objects involved in this region include a bench, two people, a handbag, a belt, and a cabinet.", "boxes_value": [[56.27307130880001, 42.3779906955, 280.9039306752, 422.90539553080004], [50.301696768, 269.55035398399997, 90.93023682559999, 327.5057983331], [57.13488767999999, 58.2647094811, 209.98016358400002, 518], [45.01037598720001, 173.49963376230002, 94.49719239679999, 263.72760010630003], [56.27307130880001, 250.0511474559, 93.3941040128, 273.1192627066], [92.1425170944, 329.7054443647, 183.0516967936, 367.8491211006], [220.86016844800002, 42.3779906955, 280.9039306752, 422.90539553080004]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00048923.jpg", "text": "In the image , please describe the bounding box . Include the coordinates for each object you identify.", "boxes_value": [[145.8222045696, 0, 425.5757446144, 152.9758300672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048923_crop.jpg", "text": "In the image , please describe the bounding box . Include the coordinates for each object you identify.", "boxes_value": [[70.82220456959999, 0, 350.5757446144, 152.9758300672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048923.jpg", "text": "In the image , please describe the bounding box . Include the coordinates for each object you identify. For your reference, objects involved in this region include a lamp, a bowl, two wine glasses, and a bottle.", "boxes_value": [[145.8222045696, 0, 425.5757446144, 152.9758300672], [349.9289550848, 0, 425.5757446144, 132.3073120256], [145.8222045696, 109.4545288192, 250.6966552576, 143.5570678784], [247.1622314496, 81.62774656, 268.5352173056, 148.5755004928], [275.4500122112, 88.228210432, 299.651794432, 152.9758300672], [248.1051635712, 69.0553588736, 281.4218749952, 141.6607055872]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048923_crop.jpg", "text": "In the image , please describe the bounding box . Include the coordinates for each object you identify. For your reference, objects involved in this region include a lamp, a bowl, two wine glasses, and a bottle.", "boxes_value": [[70.82220456959999, 0, 350.5757446144, 152.9758300672], [274.9289550848, 0, 350.5757446144, 132.3073120256], [70.82220456959999, 109.4545288192, 175.6966552576, 143.5570678784], [172.1622314496, 81.62774656, 193.53521730559999, 148.5755004928], [200.45001221119998, 88.228210432, 224.65179443199997, 152.9758300672], [173.1051635712, 69.0553588736, 206.42187499520003, 141.6607055872]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048926.jpg", "text": "Regarding the coordinates in image , can you provide a description? Specify the location of each mentioned object.", "boxes_value": [[565.6214565649, 257.95697024, 681.4512939551, 454.678934272]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048926_crop.jpg", "text": "Regarding the coordinates in image , can you provide a description? Specify the location of each mentioned object.", "boxes_value": [[29.621456564899972, 49.956970239999976, 145.45129395510003, 246.678934272]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048926.jpg", "text": "Regarding the coordinates in image , can you provide a description? Specify the location of each mentioned object. For your reference, objects involved in this region include a chair, two people, two leather shoes, and a high heels.", "boxes_value": [[565.6214565649, 257.95697024, 681.4512939551, 454.678934272], [666.3256836153, 257.95697024, 681.4512939551, 314.5267944448], [528.4902343513, 185.557312, 612.2996825871, 455.3049316352], [600.3475341538, 172.7340087808, 675.5003662307, 427.8263549952], [599.3533004365, 389.3375420928, 622.9687557508, 408.3325822976], [604.4870950667, 409.3593412096, 633.3980713249, 422.647277824], [565.6214565649, 427.5222083584, 593.7480655885, 454.678934272]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048926_crop.jpg", "text": "Regarding the coordinates in image , can you provide a description? Specify the location of each mentioned object. For your reference, objects involved in this region include a chair, two people, two leather shoes, and a high heels.", "boxes_value": [[29.621456564899972, 49.956970239999976, 145.45129395510003, 246.678934272], [130.32568361530002, 49.956970239999976, 145.45129395510003, 106.52679444479998], [0, 0, 76.29968258710005, 247.30493163519998], [64.34753415379998, 0, 139.50036623070002, 219.8263549952], [63.353300436500035, 181.3375420928, 86.96875575080003, 200.33258229760003], [68.48709506670002, 201.35934120960002, 97.3980713249, 214.647277824], [29.621456564899972, 219.5222083584, 57.748065588500026, 246.678934272]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048927.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Include the coordinates for each object you identify.", "boxes_value": [[137.4001464525, 68.0001220608, 490.826293922, 276.0869750784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048927_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Include the coordinates for each object you identify.", "boxes_value": [[88.4001464525, 53.000122060799995, 441.826293922, 261.0869750784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048927.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include three cabinets, two storage boxes, and a flag.", "boxes_value": [[137.4001464525, 68.0001220608, 490.826293922, 276.0869750784], [323.61389160240003, 108.8746337792, 490.826293922, 276.0869750784], [257.74237063699996, 133.576416, 324.8806762904, 266.5862426624], [137.4001464525, 167.7789917184, 185.537048352, 247.5848388608], [317.31475827, 77.7686157312, 340.36840819540004, 110.5908203008], [339.5869751233, 68.0001220608, 374.3627929973, 101.9945068544], [126.7942504819, 89.0366821376, 151.9823608506, 139.0725097472]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048927_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include three cabinets, two storage boxes, and a flag.", "boxes_value": [[88.4001464525, 53.000122060799995, 441.826293922, 261.0869750784], [274.61389160240003, 93.8746337792, 441.826293922, 261.0869750784], [208.74237063699996, 118.576416, 275.8806762904, 251.58624266240002], [88.4001464525, 152.7789917184, 136.537048352, 232.5848388608], [268.31475827, 62.7686157312, 291.36840819540004, 95.5908203008], [290.5869751233, 53.000122060799995, 325.3627929973, 86.9945068544], [77.7942504819, 74.0366821376, 102.9823608506, 124.0725097472]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048929.jpg", "text": "What information can you give me about the coordinates in image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[461.8000487979, 291.5628051968, 615.8222656445, 326.6766357504]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048929_crop.jpg", "text": "What information can you give me about the coordinates in image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[38.80004879789999, 9.562805196800014, 192.82226564450002, 44.67663575040001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048929.jpg", "text": "What information can you give me about the coordinates in image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include an umbrella, three chairs, and a desk.", "boxes_value": [[461.8000487979, 291.5628051968, 615.8222656445, 326.6766357504], [465.5528564316, 231.0092163072, 559.735717747, 321.0695800832], [461.8000487979, 291.5628051968, 490.130493136, 325.4796142592], [492.52465818229996, 306.3265380864, 512.8533935497, 324.6815185408], [577.1171874966, 293.5578613248, 615.8222656445, 326.6766357504], [525.2443847405, 293.5578613248, 557.5651855466, 325.8786010624]], "boxes_seq": [[0], [0], [1], [2, 4, 5], [3]]}, {"image_path": "objects365_v1_00048929_crop.jpg", "text": "What information can you give me about the coordinates in image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include an umbrella, three chairs, and a desk.", "boxes_value": [[38.80004879789999, 9.562805196800014, 192.82226564450002, 44.67663575040001], [42.55285643159999, 0, 136.73571774699997, 39.06958008319998], [38.80004879789999, 9.562805196800014, 67.13049313599998, 43.47961425919999], [69.52465818229996, 24.326538086399978, 89.85339354969994, 42.68151854080003], [154.11718749659997, 11.557861324800001, 192.82226564450002, 44.67663575040001], [102.24438474049998, 11.557861324800001, 134.5651855466, 43.87860106239998]], "boxes_seq": [[0], [0], [1], [2, 4, 5], [3]]}, {"image_path": "objects365_v1_00048931.jpg", "text": "Can you divulge the contents of the area within the given image ? Specify the location of each mentioned object.", "boxes_value": [[0.6957397258000001, 236.3815307776, 228.4221802075, 323.2607421952]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048931_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Specify the location of each mentioned object.", "boxes_value": [[0.6957397258000001, 22.381530777600005, 228.4221802075, 109.26074219520001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048931.jpg", "text": "Can you divulge the contents of the area within the given image ? Specify the location of each mentioned object. For your reference, objects involved in this region include a lamp, and four people.", "boxes_value": [[0.6957397258000001, 236.3815307776, 228.4221802075, 323.2607421952], [0.6957397258000001, 308.4003906048, 35.8913574028, 323.2607421952], [66.0364990007, 267.2002563584, 85.1241455061, 316.0173339648], [152.1815795883, 236.3815307776, 166.91717529640002, 269.0560912896], [191.9035644837, 225.8103637504, 212.7255859463, 265.8526611456], [215.60858153729998, 239.9052734464, 228.4221802075, 274.8221435392]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048931_crop.jpg", "text": "Can you divulge the contents of the area within the given image ? Specify the location of each mentioned object. For your reference, objects involved in this region include a lamp, and four people.", "boxes_value": [[0.6957397258000001, 22.381530777600005, 228.4221802075, 109.26074219520001], [0.6957397258000001, 94.40039060480001, 35.8913574028, 109.26074219520001], [66.0364990007, 53.2002563584, 85.1241455061, 102.0173339648], [152.1815795883, 22.381530777600005, 166.91717529640002, 55.056091289599976], [191.9035644837, 11.8103637504, 212.7255859463, 51.852661145599996], [215.60858153729998, 25.905273446400003, 228.4221802075, 60.8221435392]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048932.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[238.59002688830003, 394.5264282112, 465.5634765352, 461.6054076928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048932_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[57.59002688830003, 17.526428211200027, 284.5634765352, 84.60540769279999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048932.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, and four sneakers.", "boxes_value": [[238.59002688830003, 394.5264282112, 465.5634765352, 461.6054076928], [233.5213623143, 216.4811401216, 336.3328246968, 464.0082397696], [326.10040282479997, 187.7329101312, 486.547485381, 455.09838868480006], [238.59002688830003, 428.20281984, 256.9340820049, 461.6054076928], [296.6339111316, 422.7269897216, 335.7861327826, 447.9158325248], [408.34094241109995, 394.5264282112, 439.2794189708, 434.7738647552], [442.2911377277, 426.0125122048, 465.5634765352, 455.3082275328]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048932_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, and four sneakers.", "boxes_value": [[57.59002688830003, 17.526428211200027, 284.5634765352, 84.60540769279999], [52.521362314300006, 0, 155.3328246968, 87.00823976959998], [145.10040282479997, 0, 305.547485381, 78.09838868480006], [57.59002688830003, 51.20281984000002, 75.9340820049, 84.60540769279999], [115.63391113159997, 45.72698972159998, 154.78613278260002, 70.91583252480001], [227.34094241109995, 17.526428211200027, 258.2794189708, 57.77386475520001], [261.2911377277, 49.012512204799975, 284.5634765352, 78.3082275328]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00048934.jpg", "text": "What can you tell me about the area within the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[202.34649657519998, 59.7904052736, 363.1907958732, 182.064880384]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048934_crop.jpg", "text": "What can you tell me about the area within the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[40.34649657519998, 30.7904052736, 201.1907958732, 153.064880384]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048934.jpg", "text": "What can you tell me about the area within the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two cars, a van, a pickup truck, and a street lights.", "boxes_value": [[202.34649657519998, 59.7904052736, 363.1907958732, 182.064880384], [328.84826662259997, 141.2832031232, 382.9376220922, 192.5822753792], [303.0914306774, 119.604553216, 363.1907958732, 182.064880384], [284.097900425, 136.4049682432, 305.5443115112, 171.8862915072], [239.97271730679998, 142.096252416, 264.032470677, 156.0184936448], [202.34649657519998, 59.7904052736, 247.1760254258, 136.412231424]], "boxes_seq": [[0], [0], [1, 4], [2], [3], [5]]}, {"image_path": "objects365_v1_00048934_crop.jpg", "text": "What can you tell me about the area within the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two cars, a van, a pickup truck, and a street lights.", "boxes_value": [[40.34649657519998, 30.7904052736, 201.1907958732, 153.064880384], [166.84826662259997, 112.2832031232, 220.9376220922, 163.5822753792], [141.0914306774, 90.604553216, 201.1907958732, 153.064880384], [122.09790042499998, 107.40496824319999, 143.54431151120002, 142.8862915072], [77.97271730679998, 113.096252416, 102.03247067699999, 127.0184936448], [40.34649657519998, 30.7904052736, 85.17602542579999, 107.412231424]], "boxes_seq": [[0], [0], [1, 4], [2], [3], [5]]}, {"image_path": "objects365_v1_00048935.jpg", "text": "Could you give me a description of the rectangular region found in ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[0, 59.5496826368, 145.6531372032, 460.324157696]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048935_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[0, 59.5496826368, 145.6531372032, 460.324157696]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048935.jpg", "text": "Could you give me a description of the rectangular region found in ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four people, and a blackboard.", "boxes_value": [[0, 59.5496826368, 145.6531372032, 460.324157696], [9.0316772352, 242.2004394496, 87.6010742016, 460.324157696], [47.1939697152, 176.7258911232, 134.7427368192, 264.648864768], [0, 224.2416992256, 25.8679809792, 284.478271488], [0.052307097600000005, 172.9844970496, 51.683654784000005, 358.1838989312], [0, 59.5496826368, 145.6531372032, 185.40332032]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048935_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four people, and a blackboard.", "boxes_value": [[0, 59.5496826368, 145.6531372032, 460.324157696], [9.0316772352, 242.2004394496, 87.6010742016, 460.324157696], [47.1939697152, 176.7258911232, 134.7427368192, 264.648864768], [0, 224.2416992256, 25.8679809792, 284.478271488], [0.052307097600000005, 172.9844970496, 51.683654784000005, 358.1838989312], [0, 59.5496826368, 145.6531372032, 185.40332032]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048936.jpg", "text": "I'd like some information about the specific region in the image . Provide the coordinates for each element you describe.", "boxes_value": [[62.96960448, 285.982360832, 231.8978881536, 346.5171508736]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048936_crop.jpg", "text": "I'd like some information about the specific region in the image . Provide the coordinates for each element you describe.", "boxes_value": [[42.96960448, 15.982360831999983, 211.8978881536, 76.51715087359997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048936.jpg", "text": "I'd like some information about the specific region in the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, a trash bin can, a telephone, a moniter, and a keyboard.", "boxes_value": [[62.96960448, 285.982360832, 231.8978881536, 346.5171508736], [62.96960448, 295.183593728, 103.78906252799999, 346.5171508736], [208.29040527360002, 285.982360832, 231.8978881536, 330.2377929728], [134.5240478208, 314.4206542848, 155.201599104, 332.8462524416], [110.32965089279999, 288.2877807616, 143.4649658112, 313.0257568256], [102.37939453439999, 322.5949707264, 144.16644287999998, 338.6982422016]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048936_crop.jpg", "text": "I'd like some information about the specific region in the image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a person, a trash bin can, a telephone, a moniter, and a keyboard.", "boxes_value": [[42.96960448, 15.982360831999983, 211.8978881536, 76.51715087359997], [42.96960448, 25.183593728000005, 83.78906252799999, 76.51715087359997], [188.29040527360002, 15.982360831999983, 211.8978881536, 60.23779297279998], [114.5240478208, 44.42065428479998, 135.201599104, 62.84625244159997], [90.32965089279999, 18.287780761600004, 123.46496581119999, 43.02575682560001], [82.37939453439999, 52.59497072639999, 124.16644287999998, 68.69824220160001]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048937.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Give coordinates for the items you reference.", "boxes_value": [[349.3146972672, 16.2902897664, 656.8721127936, 196.1820560896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048937_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Give coordinates for the items you reference.", "boxes_value": [[77.31469726720002, 16.2902897664, 384.87211279359997, 196.1820560896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048937.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include five hats.", "boxes_value": [[349.3146972672, 16.2902897664, 656.8721127936, 196.1820560896], [623.7188408064001, 16.2902897664, 649.0993972992001, 30.1342297088], [526.1968869888001, 81.888027648, 576.9059332608, 117.622590464], [604.3788943104, 159.280053504, 656.8721127936, 188.2625081344], [494.85724400640004, 154.630205696, 542.0026225152, 196.1820560896], [349.3146972672, 126.5197143552, 398.37011719680004, 159.2598877184]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048937_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include five hats.", "boxes_value": [[77.31469726720002, 16.2902897664, 384.87211279359997, 196.1820560896], [351.71884080640007, 16.2902897664, 377.09939729920006, 30.1342297088], [254.1968869888001, 81.888027648, 304.9059332608, 117.622590464], [332.3788943104, 159.280053504, 384.87211279359997, 188.2625081344], [222.85724400640004, 154.630205696, 270.00262251519996, 196.1820560896], [77.31469726720002, 126.5197143552, 126.37011719680004, 159.2598877184]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048939.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Please point out the objects and their coordinates.", "boxes_value": [[209.20532224, 173.882751456, 433.062499968, 478.701477072]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048939_crop.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Please point out the objects and their coordinates.", "boxes_value": [[56.20532223999999, 76.882751456, 280.062499968, 381.701477072]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048939.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, four chairs, and a desk.", "boxes_value": [[209.20532224, 173.882751456, 433.062499968, 478.701477072], [263.151611328, 290.086242672, 412.307617216, 478.701477072], [213.523559552, 213.24255369600002, 313.999145536, 315.028747536], [209.20532224, 255.19079592, 309.757690432, 367.464050304], [145.049194304, 223.11273192, 281.38098144, 345.873046896], [360.404968256, 173.882751456, 433.062499968, 262.58154297600004], [393.43115232, 202.662658704, 474.10925292800005, 292.305053712]], "boxes_seq": [[0], [0], [1], [2, 3, 5, 6], [4]]}, {"image_path": "objects365_v1_00048939_crop.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, four chairs, and a desk.", "boxes_value": [[56.20532223999999, 76.882751456, 280.062499968, 381.701477072], [110.151611328, 193.08624267200003, 259.307617216, 381.701477072], [60.523559551999995, 116.24255369600002, 160.99914553600001, 218.02874753600003], [56.20532223999999, 158.19079592, 156.757690432, 270.464050304], [0, 126.11273191999999, 128.38098144000003, 248.873046896], [207.40496825600002, 76.882751456, 280.062499968, 165.58154297600004], [240.43115232000002, 105.662658704, 321.10925292800005, 195.30505371200002]], "boxes_seq": [[0], [0], [1], [2, 3, 5, 6], [4]]}, {"image_path": "objects365_v1_00048940.jpg", "text": "Analyze and describe the region in the included photo . Give coordinates for the items you reference.", "boxes_value": [[378.54272462250003, 257.3446044672, 616.2181396484999, 362.1148071424]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048940_crop.jpg", "text": "Analyze and describe the region in the included photo . Give coordinates for the items you reference.", "boxes_value": [[59.54272462250003, 26.344604467199986, 297.2181396484999, 131.11480714240002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048940.jpg", "text": "Analyze and describe the region in the included photo . Give coordinates for the items you reference. For your reference, objects involved in this region include a lamp, three people, and a hat.", "boxes_value": [[378.54272462250003, 257.3446044672, 616.2181396484999, 362.1148071424], [555.1353759427, 257.3446044672, 567.3807373068, 283.1021117952], [439.5062255613, 286.5372924928, 532.1785888546, 369.0691528192], [545.6793213063, 328.1320800768, 567.7990722723, 362.1148071424], [580.3277587519, 309.4458008064, 616.2181396484999, 360.8831787008], [378.54272462250003, 278.7877197312, 400.99658203640007, 298.1028442624]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048940_crop.jpg", "text": "Analyze and describe the region in the included photo . Give coordinates for the items you reference. For your reference, objects involved in this region include a lamp, three people, and a hat.", "boxes_value": [[59.54272462250003, 26.344604467199986, 297.2181396484999, 131.11480714240002], [236.13537594269997, 26.344604467199986, 248.38073730680003, 52.10211179520002], [120.50622556130003, 55.53729249280002, 213.17858885459998, 138.06915281919999], [226.67932130630004, 97.13208007679998, 248.7990722723, 131.11480714240002], [261.3277587519, 78.44580080639997, 297.2181396484999, 129.8831787008], [59.54272462250003, 47.78771973120001, 81.99658203640007, 67.10284426240003]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048941.jpg", "text": "Please detail the contents of the chosen region in the visual input . Remember to mention the objects and their corresponding locations.", "boxes_value": [[34.152343764, 0.5194092032, 339.51782225899996, 142.3002929664]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048941_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Remember to mention the objects and their corresponding locations.", "boxes_value": [[34.152343764, 0.5194092032, 339.51782225899996, 142.3002929664]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048941.jpg", "text": "Please detail the contents of the chosen region in the visual input . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four lamps, and a projector.", "boxes_value": [[34.152343764, 0.5194092032, 339.51782225899996, 142.3002929664], [228.333862295, 0.5194092032, 308.121948211, 41.0272216576], [252.533325198, 61.1343994368, 303.03662111799997, 108.2771606528], [262.410644496, 109.0609131008, 304.15307615, 142.3002929664], [34.152343764, 61.6865234432, 96.76599124500001, 105.416748032], [304.39587401299997, 54.9846191616, 339.51782225899996, 69.2426147328]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048941_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four lamps, and a projector.", "boxes_value": [[34.152343764, 0.5194092032, 339.51782225899996, 142.3002929664], [228.333862295, 0.5194092032, 308.121948211, 41.0272216576], [252.533325198, 61.1343994368, 303.03662111799997, 108.2771606528], [262.410644496, 109.0609131008, 304.15307615, 142.3002929664], [34.152343764, 61.6865234432, 96.76599124500001, 105.416748032], [304.39587401299997, 54.9846191616, 339.51782225899996, 69.2426147328]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048942.jpg", "text": "Please describe the section of the picture defined by the bbox . Give coordinates for the items you reference.", "boxes_value": [[394.9411621298, 202.1376953344, 521.2358398754, 341.2168579072]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048942_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Give coordinates for the items you reference.", "boxes_value": [[31.941162129799977, 35.13769533440001, 158.23583987539996, 174.2168579072]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048942.jpg", "text": "Please describe the section of the picture defined by the bbox . Give coordinates for the items you reference. For your reference, objects involved in this region include four paddles, three people, two helmets, and a boat.", "boxes_value": [[394.9411621298, 202.1376953344, 521.2358398754, 341.2168579072], [398.28552243089996, 264.1092529152, 596.6606445436, 294.448974592], [415.9559325857, 303.7843017728, 625.3333740199, 318.787414528], [372.6135253672, 324.121887232, 575.3228759681, 342.1256713728], [433.18811038750005, 220.809997568, 574.7142333959, 265.0949707264], [361.161743202, 229.7657470464, 502.7911376982, 374.2052612096], [395.31481937219996, 171.986450176, 482.8708496274, 298.2467651584], [394.9411621298, 202.1376953344, 521.2358398754, 341.2168579072], [471.094604526, 201.6348266496, 504.1101074074, 221.2058715648], [428.8892822474, 229.8851928576, 462.07495113519997, 255.412658688], [207.5498046882, 227.2363891712, 527.6573486456999, 433.111999488]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6, 7], [8, 9], [10]]}, {"image_path": "objects365_v1_00048942_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Give coordinates for the items you reference. For your reference, objects involved in this region include four paddles, three people, two helmets, and a boat.", "boxes_value": [[31.941162129799977, 35.13769533440001, 158.23583987539996, 174.2168579072], [35.28552243089996, 97.10925291519999, 189, 127.44897459200001], [52.9559325857, 136.7843017728, 189, 151.787414528], [9.613525367199998, 157.121887232, 189, 175.1256713728], [70.18811038750005, 53.809997568, 189, 98.09497072639999], [0, 62.76574704640001, 139.79113769819998, 207.2052612096], [32.31481937219996, 4.986450176000005, 119.87084962739999, 131.24676515840002], [31.941162129799977, 35.13769533440001, 158.23583987539996, 174.2168579072], [108.09460452600001, 34.6348266496, 141.1101074074, 54.20587156479999], [65.88928224739999, 62.8851928576, 99.07495113519997, 88.412658688], [0, 60.23638917119999, 164.65734864569993, 208]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6, 7], [8, 9], [10]]}, {"image_path": "objects365_v1_00048944.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[386.077880832, 368.014587392, 768.3623046912, 484.5371093504]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048944_crop.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[96.077880832, 30.01458739200001, 478, 146.5371093504]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048944.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a towel, a flower, a sink, and two cups.", "boxes_value": [[386.077880832, 368.014587392, 768.3623046912, 484.5371093504], [453.9752197632, 401.1974487552, 631.4780273664, 484.5371093504], [505.92724608000003, 386.0447387648, 558.9615478272, 434.7497558528], [575.358032256, 368.014587392, 768.3623046912, 478.0117797888], [386.077880832, 388.25897216, 409.41784665600005, 424.1399536128], [410.4630127104, 399.0581054464, 439.02844239359996, 437.3775634944]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048944_crop.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a towel, a flower, a sink, and two cups.", "boxes_value": [[96.077880832, 30.01458739200001, 478, 146.5371093504], [163.97521976320002, 63.197448755200014, 341.47802736640006, 146.5371093504], [215.92724608000003, 48.0447387648, 268.9615478272, 96.74975585279998], [285.358032256, 30.01458739200001, 478, 140.01177978880003], [96.077880832, 50.258972159999985, 119.41784665600005, 86.13995361280001], [120.4630127104, 61.05810544640002, 149.02844239359996, 99.37756349440002]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00048946.jpg", "text": "Please tell me about the area in the image . What does it contain? Provide the coordinates for all objects that you mention.", "boxes_value": [[493.5357666077, 271.9005127168, 694.9183349381, 382.7577514496]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048946_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Provide the coordinates for all objects that you mention.", "boxes_value": [[50.5357666077, 27.900512716799994, 251.91833493809997, 138.7577514496]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048946.jpg", "text": "Please tell me about the area in the image . What does it contain? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four chairs, and a desk.", "boxes_value": [[493.5357666077, 271.9005127168, 694.9183349381, 382.7577514496], [493.5357666077, 288.6400756736, 607.2298583754, 382.7577514496], [658.8280029022, 279.0225219584, 708.2586670221, 351.715515136], [628.9841308274999, 271.9005127168, 663.6862792896, 344.0809936384], [623.8342284926, 279.6664428544, 648.4173584174, 352.4095458816], [625.8215331992, 280.5181884928, 694.9183349381, 350.3273925632]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048946_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four chairs, and a desk.", "boxes_value": [[50.5357666077, 27.900512716799994, 251.91833493809997, 138.7577514496], [50.5357666077, 44.640075673599995, 164.2298583754, 138.7577514496], [215.82800290219996, 35.02252195839998, 265.2586670221, 107.71551513600002], [185.98413082749994, 27.900512716799994, 220.68627928959995, 100.0809936384], [180.8342284926, 35.666442854399975, 205.4173584174, 108.40954588160002], [182.82153319919996, 36.51818849279999, 251.91833493809997, 106.32739256320002]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00048947.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[163.83514402379998, 300.0891113472, 296.66168215019997, 424.603088384]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048947_crop.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[33.835144023799984, 32.0891113472, 166.66168215019997, 156.603088384]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048947.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a lamp, four people, and a book.", "boxes_value": [[163.83514402379998, 300.0891113472, 296.66168215019997, 424.603088384], [163.83514402379998, 300.0891113472, 220.4197997895, 401.9939575296], [198.1262817429, 369.7819213824, 300.7319946555, 459.6840820224], [227.22607419599998, 350.4385375744, 261.8128051953, 377.90448], [276.6694335909, 302.1603393536, 296.66168215019997, 329.0657348608], [213.73876950989998, 300.8065185792, 233.2139892498, 328.365844736], [224.65783689900002, 403.905517568, 255.5928344685, 424.603088384]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00048947_crop.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a lamp, four people, and a book.", "boxes_value": [[33.835144023799984, 32.0891113472, 166.66168215019997, 156.603088384], [33.835144023799984, 32.0891113472, 90.41979978949999, 133.99395752959998], [68.1262817429, 101.78192138240001, 170.7319946555, 187], [97.22607419599998, 82.43853757440002, 131.81280519529997, 109.90447999999998], [146.66943359089998, 34.16033935360002, 166.66168215019997, 61.06573486079998], [83.73876950989998, 32.806518579199974, 103.2139892498, 60.365844735999985], [94.65783689900002, 135.905517568, 125.5928344685, 156.603088384]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00048948.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Specify the location of each mentioned object.", "boxes_value": [[137.7751465097, 295.3672485376, 441.8441162084, 362.6685180416]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048948_crop.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Specify the location of each mentioned object.", "boxes_value": [[76.7751465097, 17.367248537600005, 380.8441162084, 84.66851804160001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048948.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Specify the location of each mentioned object. For your reference, objects involved in this region include three people, and five hockey sticks.", "boxes_value": [[137.7751465097, 295.3672485376, 441.8441162084, 362.6685180416], [277.37457276920003, 254.974487296, 306.6101073977, 323.6779174912], [222.31445311730002, 238.89501952, 263.2440796134, 369.480163584], [158.4835815207, 253.025512704, 197.4642333668, 341.2191772672], [137.7751465097, 302.4473876992, 159.4929199452, 329.7338256896], [195.6892700418, 308.016052224, 210.1677856427, 334.1887817216], [194.8340453765, 328.54052736, 228.7233886715, 362.6685180416], [303.6616821142, 295.3672485376, 349.2451172085, 324.7220459008], [401.2723388882, 296.7991332864, 441.8441162084, 328.0632324096]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6, 7, 8]]}, {"image_path": "objects365_v1_00048948_crop.jpg", "text": "What details can you provide for the rectangle defined by the coordinates in ? Specify the location of each mentioned object. For your reference, objects involved in this region include three people, and five hockey sticks.", "boxes_value": [[76.7751465097, 17.367248537600005, 380.8441162084, 84.66851804160001], [216.37457276920003, 0, 245.6101073977, 45.67791749119999], [161.31445311730002, 0, 202.2440796134, 91.48016358400002], [97.4835815207, 0, 136.4642333668, 63.219177267199996], [76.7751465097, 24.44738769920002, 98.49291994519999, 51.733825689599996], [134.6892700418, 30.01605222400002, 149.1677856427, 56.18878172159998], [133.8340453765, 50.54052736, 167.7233886715, 84.66851804160001], [242.6616821142, 17.367248537600005, 288.2451172085, 46.722045900800026], [340.2723388882, 18.79913328639998, 380.8441162084, 50.063232409600005]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6, 7, 8]]}, {"image_path": "objects365_v1_00048949.jpg", "text": "Regarding the image , what's going on in the section ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[331.7009277148, 261.5191650304, 492.20581052020003, 331.1472167936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048949_crop.jpg", "text": "Regarding the image , what's going on in the section ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[40.7009277148, 17.519165030400018, 201.20581052020003, 87.14721679360002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048949.jpg", "text": "Regarding the image , what's going on in the section ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, three helmets, a glasses, and a boat.", "boxes_value": [[331.7009277148, 261.5191650304, 492.20581052020003, 331.1472167936], [331.7009277148, 270.9949950976, 405.0114746398, 331.1472167936], [389.66003420190003, 192.0452270592, 508.39807131189997, 314.8559570432], [397.805664032, 261.2829590016, 479.26184078709997, 370.3087768576], [360.5926513798, 271.4550171136, 401.4996337674, 306.6250610176], [424.39404299179995, 261.5191650304, 464.44873049750004, 302.9875488256], [462.34509280189997, 311.9938964992, 492.20581052020003, 328.0927123968], [457.4114990172, 284.4700927488, 499.4761962596, 324.1978149376], [206.3096118643, 190.1182994432, 605.6885397787, 478.7689463808]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 7], [6], [8]]}, {"image_path": "objects365_v1_00048949_crop.jpg", "text": "Regarding the image , what's going on in the section ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, three helmets, a glasses, and a boat.", "boxes_value": [[40.7009277148, 17.519165030400018, 201.20581052020003, 87.14721679360002], [40.7009277148, 26.994995097599997, 114.01147463979999, 87.14721679360002], [98.66003420190003, 0, 217.39807131189997, 70.85595704320002], [106.80566403199998, 17.282959001599977, 188.26184078709997, 104], [69.59265137979997, 27.45501711359998, 110.49963376739998, 62.62506101759999], [133.39404299179995, 17.519165030400018, 173.44873049750004, 58.987548825600015], [171.34509280189997, 67.99389649919999, 201.20581052020003, 84.09271239679998], [166.41149901720001, 40.47009274880003, 208.4761962596, 80.19781493760001], [0, 0, 241, 104]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 7], [6], [8]]}, {"image_path": "objects365_v1_00048951.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[591.4791259728, 169.4014892544, 765.5930176124, 315.298767104]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048951_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[44.479125972800034, 37.40148925439999, 218.5930176124, 183.29876710399998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048951.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four cymbals, a person, a hat, and a speaker.", "boxes_value": [[591.4791259728, 169.4014892544, 765.5930176124, 315.298767104], [574.2069091734, 222.1777954304, 611.1502685575999, 231.2937011712], [591.4791259728, 169.4014892544, 646.6544189604, 208.2640380928], [578.4323730658, 309.9600219648, 640.9354248136, 319.3782959104], [719.2451172039999, 270.4428100608, 765.5930176124, 278.9102172672], [667.775390657, 174.4963989504, 738.3037109602, 289.431457536], [713.5346679404, 289.710266112, 750.3181152495999, 315.298767104], [712.003906256, 233.3252563456, 747.2204589714, 279.992553728]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6], [7]]}, {"image_path": "objects365_v1_00048951_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four cymbals, a person, a hat, and a speaker.", "boxes_value": [[44.479125972800034, 37.40148925439999, 218.5930176124, 183.29876710399998], [27.20690917340005, 90.17779543040001, 64.15026855759993, 99.29370117120001], [44.479125972800034, 37.40148925439999, 99.6544189604, 76.2640380928], [31.432373065799993, 177.96002196479998, 93.93542481359998, 187.37829591040003], [172.24511720399994, 138.4428100608, 218.5930176124, 146.91021726719998], [120.775390657, 42.49639895039999, 191.30371096019996, 157.43145753599998], [166.53466794040003, 157.710266112, 203.31811524959994, 183.29876710399998], [165.00390625600005, 101.3252563456, 200.22045897140003, 147.99255372800002]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6], [7]]}, {"image_path": "objects365_v1_00048953.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Provide the coordinates for each element you describe.", "boxes_value": [[14.432373043200002, 322.4765014528, 137.5609741056, 510.8049926656]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048953_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Provide the coordinates for each element you describe.", "boxes_value": [[14.432373043200002, 47.476501452799994, 137.5609741056, 235.80499266560003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048953.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a lamp, a picture, a moniter, three chairs, and two desks.", "boxes_value": [[14.432373043200002, 322.4765014528, 137.5609741056, 510.8049926656], [27.2094116352, 322.4765014528, 44.537902848, 343.9638671872], [121.0988159232, 362.3682250752, 137.5609741056, 386.6873168896], [122.1143188224, 363.2224121344, 138.1038207744, 385.02142336], [14.432373043200002, 470.1047973888, 43.1619262464, 510.8049926656], [15.6294555648, 480.0803222528, 127.7544556032, 510.0069580288], [115.7838134784, 473.695983872, 146.109436032, 511.2039794688], [86.25622556159999, 452.5478515712, 110.99554444799999, 480.4793701376], [94.63568117759999, 458.932189952, 178.430175744, 480.8783569408]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 6, 7], [5, 8]]}, {"image_path": "objects365_v1_00048953_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a lamp, a picture, a moniter, three chairs, and two desks.", "boxes_value": [[14.432373043200002, 47.476501452799994, 137.5609741056, 235.80499266560003], [27.2094116352, 47.476501452799994, 44.537902848, 68.96386718719998], [121.0988159232, 87.36822507519997, 137.5609741056, 111.68731688960003], [122.1143188224, 88.22241213439997, 138.1038207744, 110.02142335999997], [14.432373043200002, 195.10479738880002, 43.1619262464, 235.80499266560003], [15.6294555648, 205.0803222528, 127.7544556032, 235.0069580288], [115.7838134784, 198.695983872, 146.109436032, 236.20397946880001], [86.25622556159999, 177.5478515712, 110.99554444799999, 205.47937013759997], [94.63568117759999, 183.932189952, 168, 205.87835694080002]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 6, 7], [5, 8]]}, {"image_path": "objects365_v1_00048957.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Include the coordinates for each object you identify.", "boxes_value": [[289.8580322304, 0, 510.3580322304, 277.8052368406]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048957_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Include the coordinates for each object you identify.", "boxes_value": [[55.8580322304, 0, 276.3580322304, 277.8052368406]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048957.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two lamps, a picture, a person, and two gloves.", "boxes_value": [[289.8580322304, 0, 510.3580322304, 277.8052368406], [289.8580322304, 0, 510.3580322304, 74.841247551], [135.94036864, 107.7000732318, 400.54040524799996, 160.44714357840002], [453.2825317376, 152.0097656056, 486.5882568192, 212.3201293948], [458.7014770688, 159.889038057, 483.3475341824, 207.76483152740002], [267.0226440192, 239.496582052, 308.5237426688, 277.8052368406], [307.9916381696, 238.4324340874, 359.0698852352, 277.8052368406]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048957_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two lamps, a picture, a person, and two gloves.", "boxes_value": [[55.8580322304, 0, 276.3580322304, 277.8052368406], [55.8580322304, 0, 276.3580322304, 74.841247551], [0, 107.7000732318, 166.54040524799996, 160.44714357840002], [219.28253173759998, 152.0097656056, 252.5882568192, 212.3201293948], [224.7014770688, 159.889038057, 249.3475341824, 207.76483152740002], [33.022644019200015, 239.496582052, 74.52374266880003, 277.8052368406], [73.99163816959998, 238.4324340874, 125.06988523519999, 277.8052368406]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048971.jpg", "text": "Please tell me about the area in the image . What does it contain? Include the coordinates for each mentioned object.", "boxes_value": [[296.3823242058, 93.042419456, 607.120971654, 478.812194816]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048971_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Include the coordinates for each mentioned object.", "boxes_value": [[78.38232420579999, 93.042419456, 389.12097165399996, 478.812194816]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048971.jpg", "text": "Please tell me about the area in the image . What does it contain? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a stool, a chair, a cabinet, a bed, two handbags, a hat, and a stuffed toy.", "boxes_value": [[296.3823242058, 93.042419456, 607.120971654, 478.812194816], [492.76562502210004, 415.5701904384, 575.9331054783, 478.812194816], [480.63708497550004, 319.4077758976, 638.308837894, 461.4855956992], [406.9991455103, 204.1860961792, 607.120971654, 371.3874511872], [103.7842407406, 161.7360229376, 463.310546882, 467.5499267584], [439.1489257972, 172.952758784, 500.0561523722, 218.7550048768], [296.3823242058, 93.042419456, 343.1591186467, 158.3349609472], [538.2294921662, 200.1856078848, 599.6439208706, 225.3433227776], [508.2783203004, 187.4503173632, 534.7623291067, 221.7236327936]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6], [7], [8]]}, {"image_path": "objects365_v1_00048971_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a stool, a chair, a cabinet, a bed, two handbags, a hat, and a stuffed toy.", "boxes_value": [[78.38232420579999, 93.042419456, 389.12097165399996, 478.812194816], [274.76562502210004, 415.5701904384, 357.93310547830004, 478.812194816], [262.63708497550004, 319.4077758976, 420.308837894, 461.4855956992], [188.99914551030002, 204.1860961792, 389.12097165399996, 371.3874511872], [0, 161.7360229376, 245.31054688199998, 467.5499267584], [221.14892579719998, 172.952758784, 282.0561523722, 218.7550048768], [78.38232420579999, 93.042419456, 125.1591186467, 158.3349609472], [320.2294921662, 200.1856078848, 381.6439208706, 225.3433227776], [290.2783203004, 187.4503173632, 316.7623291067, 221.7236327936]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6], [7], [8]]}, {"image_path": "objects365_v1_00048973.jpg", "text": "Describe the visual elements within the selected area of the image . Please mention the objects and their locations.", "boxes_value": [[437.23474119969995, 179.9075927552, 682.2435303047, 320.2854614016]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048973_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Please mention the objects and their locations.", "boxes_value": [[62.234741199699954, 35.9075927552, 307.2435303047, 176.2854614016]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048973.jpg", "text": "Describe the visual elements within the selected area of the image . Please mention the objects and their locations. For your reference, objects involved in this region include two cars, and three traffic lights.", "boxes_value": [[437.23474119969995, 179.9075927552, 682.2435303047, 320.2854614016], [437.23474119969995, 218.6800537088, 477.87683104769997, 256.4191894528], [465.29711917070006, 233.1950683648, 534.0018310255999, 286.4169922048], [559.1612549162, 223.518371584, 593.0296631001, 320.2854614016], [667.2059326232, 182.8561401344, 682.2435303047, 226.7894897664], [661.6036376904, 179.9075927552, 678.1156005617, 212.931335424]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048973_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Please mention the objects and their locations. For your reference, objects involved in this region include two cars, and three traffic lights.", "boxes_value": [[62.234741199699954, 35.9075927552, 307.2435303047, 176.2854614016], [62.234741199699954, 74.68005370879999, 102.87683104769997, 112.41918945280003], [90.29711917070006, 89.1950683648, 159.00183102559993, 142.4169922048], [184.16125491620005, 79.518371584, 218.02966310010004, 176.2854614016], [292.20593262320006, 38.85614013439999, 307.2435303047, 82.78948976640001], [286.6036376904, 35.9075927552, 303.11560056170003, 68.931335424]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00048974.jpg", "text": "What's going on in the section of contained within the bounding box ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[260.662178469, 396.3495005696, 524.343292396, 501.4596995584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048974_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[66.66217846900003, 26.349500569600025, 330.34329239600004, 131.45969955840002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048974.jpg", "text": "What's going on in the section of contained within the bounding box ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, two boots, and a sneakers.", "boxes_value": [[260.662178469, 396.3495005696, 524.343292396, 501.4596995584], [92.044128407, 54.4622192128, 628.7296142820001, 512.012817408], [179.423522971, 16.3319702016, 464.970947287, 501.62493895680007], [260.662178469, 396.3495005696, 355.376146587, 501.4596995584], [406.883029707, 328.3757554176, 465.25195948, 487.5347497472], [497.728780549, 444.0114030592, 524.343292396, 469.8194146304]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048974_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, two boots, and a sneakers.", "boxes_value": [[66.66217846900003, 26.349500569600025, 330.34329239600004, 131.45969955840002], [0, 0, 396, 142], [0, 0, 270.970947287, 131.62493895680007], [66.66217846900003, 26.349500569600025, 161.376146587, 131.45969955840002], [212.883029707, 0, 271.25195948, 117.53474974720001], [303.728780549, 74.0114030592, 330.34329239600004, 99.81941463039999]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00048976.jpg", "text": "In , what elements can be found within the coordinates ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[257.772705088, 160.617248544, 420.281738304, 257.945190432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048976_crop.jpg", "text": "In , what elements can be found within the coordinates ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[40.77270508800001, 24.617248544000006, 203.281738304, 121.945190432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048976.jpg", "text": "In , what elements can be found within the coordinates ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a picture, a potted plant, four people, and a moniter.", "boxes_value": [[257.772705088, 160.617248544, 420.281738304, 257.945190432], [295.191833472, 183.065002464, 314.766723648, 214.60229491200002], [260.969482432, 160.617248544, 278.84368896, 207.768066384], [374.78521728, 182.991333024, 408.59460448, 217.11529540799998], [257.772705088, 238.783264176, 276.711914048, 257.945190432], [298.547607424, 219.84405518399998, 339.099731456, 310.52923584], [352.022888192, 225.414428688, 391.238159168, 277.552795392], [358.99102784, 176.95684814400002, 420.281738304, 223.45318603200002]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5, 6], [7]]}, {"image_path": "objects365_v1_00048976_crop.jpg", "text": "In , what elements can be found within the coordinates ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a picture, a potted plant, four people, and a moniter.", "boxes_value": [[40.77270508800001, 24.617248544000006, 203.281738304, 121.945190432], [78.19183347199998, 47.065002464, 97.76672364799998, 78.60229491200002], [43.96948243200001, 24.617248544000006, 61.84368896000001, 71.76806638400001], [157.78521727999998, 46.991333024, 191.59460448, 81.11529540799998], [40.77270508800001, 102.78326417599999, 59.71191404799998, 121.945190432], [81.54760742399998, 83.84405518399998, 122.09973145599997, 146], [135.02288819199998, 89.41442868799999, 174.23815916799998, 141.552795392], [141.99102784000002, 40.95684814400002, 203.281738304, 87.45318603200002]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5, 6], [7]]}, {"image_path": "objects365_v1_00048980.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each object you identify.", "boxes_value": [[117.6322631602, 321.9396972544, 337.7283325507, 418.9305419776]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048980_crop.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each object you identify.", "boxes_value": [[55.6322631602, 24.939697254400016, 275.7283325507, 121.9305419776]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048980.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two cabinets, two people, a bottle, and a cup.", "boxes_value": [[117.6322631602, 321.9396972544, 337.7283325507, 418.9305419776], [222.06134035219998, 323.0349731328, 260.2308349662, 376.1403808768], [303.8176879883, 318.740600576, 333.68951415920003, 374.2167968768], [254.2867431333, 270.7117919744, 323.3779297186, 486.2191161856], [321.6616210676, 321.9396972544, 337.7283325507, 377.7932739072], [117.6322631602, 348.5624999936, 146.1598510771, 418.9305419776], [143.1077270647, 378.9890136576, 161.7962036455, 418.8112182784]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048980_crop.jpg", "text": "Can you give me a description of the region in image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two cabinets, two people, a bottle, and a cup.", "boxes_value": [[55.6322631602, 24.939697254400016, 275.7283325507, 121.9305419776], [160.06134035219998, 26.03497313280002, 198.2308349662, 79.14038087680001], [241.8176879883, 21.74060057600002, 271.68951415920003, 77.2167968768], [192.2867431333, 0, 261.3779297186, 146], [259.6616210676, 24.939697254400016, 275.7283325507, 80.79327390719999], [55.6322631602, 51.5624999936, 84.1598510771, 121.9305419776], [81.10772706469999, 81.98901365760003, 99.79620364549999, 121.81121827840002]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00048983.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Specify the location of each mentioned object.", "boxes_value": [[139.1895141772, 149.8649371136, 385.4758221612, 309.0111694336]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048983_crop.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Specify the location of each mentioned object.", "boxes_value": [[62.18951417720001, 39.86493711360001, 308.4758221612, 199.0111694336]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048983.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Specify the location of each mentioned object. For your reference, objects involved in this region include a person, and four street lights.", "boxes_value": [[139.1895141772, 149.8649371136, 385.4758221612, 309.0111694336], [139.1895141772, 270.7764282368, 155.5783081314, 309.0111694336], [336.3914704586, 192.0602569728, 348.44727619360003, 220.7645561856], [376.00340346, 154.1705819648, 385.4758221612, 186.8934831104], [287.8812047718, 149.8649371136, 299.362924491, 194.643643904], [221.8613165356, 189.7639130112, 239.06890531899998, 223.6561137152]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048983_crop.jpg", "text": "I would like to know about the section of the image enclosed by the rectangle . Can you describe it? Specify the location of each mentioned object. For your reference, objects involved in this region include a person, and four street lights.", "boxes_value": [[62.18951417720001, 39.86493711360001, 308.4758221612, 199.0111694336], [62.18951417720001, 160.77642823679997, 78.57830813140001, 199.0111694336], [259.3914704586, 82.0602569728, 271.44727619360003, 110.76455618559999], [299.00340346, 44.17058196479999, 308.4758221612, 76.89348311040001], [210.88120477180001, 39.86493711360001, 222.362924491, 84.64364390399999], [144.8613165356, 79.7639130112, 162.06890531899998, 113.65611371520001]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048984.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Include the coordinates for each object you identify.", "boxes_value": [[119.8328857716, 259.2782593024, 418.4942627308, 311.360900864]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048984_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Include the coordinates for each object you identify.", "boxes_value": [[74.8328857716, 13.278259302399988, 373.4942627308, 65.36090086399997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048984.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, two street lights, and a traffic light.", "boxes_value": [[119.8328857716, 259.2782593024, 418.4942627308, 311.360900864], [119.8328857716, 268.1493530112, 141.2365722616, 305.9694213632], [325.3090820436, 269.6442871296, 339.49609377400003, 301.1708374016], [191.658508338, 188.696716288, 223.5723266568, 316.6743774208], [224.7907104124, 279.7494507008, 237.4332885876, 304.9014892544], [407.317748994, 259.2782593024, 418.4942627308, 311.360900864]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00048984_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, two street lights, and a traffic light.", "boxes_value": [[74.8328857716, 13.278259302399988, 373.4942627308, 65.36090086399997], [74.8328857716, 22.14935301119999, 96.2365722616, 59.96942136320001], [280.3090820436, 23.644287129600002, 294.49609377400003, 55.17083740160001], [146.658508338, 0, 178.5723266568, 70.6743774208], [179.7907104124, 33.749450700800026, 192.4332885876, 58.90148925440002], [362.317748994, 13.278259302399988, 373.4942627308, 65.36090086399997]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00048985.jpg", "text": "Can you provide a description of the area in the image ? Include the coordinates for each mentioned object.", "boxes_value": [[319.7887573422, 236.8427124224, 493.2388916357, 512.646972672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048985_crop.jpg", "text": "Can you provide a description of the area in the image ? Include the coordinates for each mentioned object.", "boxes_value": [[43.78875734219997, 69.84271242240001, 217.23889163569999, 345]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048985.jpg", "text": "Can you provide a description of the area in the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, a book, a bottle, a wine glass, and two cars.", "boxes_value": [[319.7887573422, 236.8427124224, 493.2388916357, 512.646972672], [405.73120118689997, 82.073791488, 642.5675048891001, 511.52142336], [397.33251950259995, 412.1437378048, 493.2388916357, 462.1178588672], [336.3329467489, 327.177612288, 420.7955322397, 512.646972672], [319.7887573422, 386.3884887552, 374.6458740266, 512.646972672], [229.6174316418, 252.1440429568, 407.7849121111, 368.8150024192], [325.3134155284, 236.8427124224, 418.6314697568, 280.757080064]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048985_crop.jpg", "text": "Can you provide a description of the area in the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, a book, a bottle, a wine glass, and two cars.", "boxes_value": [[43.78875734219997, 69.84271242240001, 217.23889163569999, 345], [129.73120118689997, 0, 260, 344.52142336], [121.33251950259995, 245.14373780480003, 217.23889163569999, 295.1178588672], [60.332946748899985, 160.17761228799998, 144.79553223969998, 345], [43.78875734219997, 219.3884887552, 98.64587402659998, 345], [0, 85.1440429568, 131.78491211110003, 201.81500241920003], [49.313415528400014, 69.84271242240001, 142.63146975680002, 113.75708006399998]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00048987.jpg", "text": "In the image , please describe the bounding box . Give coordinates for the items you reference.", "boxes_value": [[207.66027833599998, 374.00903318400003, 548.5985107199999, 456.60900878399997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048987_crop.jpg", "text": "In the image , please describe the bounding box . Give coordinates for the items you reference.", "boxes_value": [[85.66027833599998, 21.00903318400003, 426.5985107199999, 103.60900878399997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048987.jpg", "text": "In the image , please describe the bounding box . Give coordinates for the items you reference. For your reference, objects involved in this region include two vans, a car, a suv, and a trolley.", "boxes_value": [[207.66027833599998, 374.00903318400003, 548.5985107199999, 456.60900878399997], [207.66027833599998, 378.00341798399995, 277.38488768, 424.86938476800003], [280.095642112, 389.392700208, 469.26843264, 455.479248048], [351.78039552, 374.710083024, 490.56213376000005, 454.48596192], [466.926391616, 401.46624753599997, 512.8786620799999, 456.60900878399997], [415.278808576, 374.00903318400003, 548.5985107199999, 429.16625976]], "boxes_seq": [[0], [0], [1, 5], [2], [3], [4]]}, {"image_path": "objects365_v1_00048987_crop.jpg", "text": "In the image , please describe the bounding box . Give coordinates for the items you reference. For your reference, objects involved in this region include two vans, a car, a suv, and a trolley.", "boxes_value": [[85.66027833599998, 21.00903318400003, 426.5985107199999, 103.60900878399997], [85.66027833599998, 25.003417983999952, 155.38488768000002, 71.86938476800003], [158.095642112, 36.39270020800001, 347.26843264, 102.47924804799999], [229.78039552, 21.710083024000028, 368.56213376000005, 101.48596192000002], [344.926391616, 48.46624753599997, 390.8786620799999, 103.60900878399997], [293.278808576, 21.00903318400003, 426.5985107199999, 76.16625976]], "boxes_seq": [[0], [0], [1, 5], [2], [3], [4]]}, {"image_path": "objects365_v1_00048988.jpg", "text": "Please, can you help me understand what's inside the region in image ? Please point out the objects and their coordinates.", "boxes_value": [[499.18078611, 296.7145996288, 571.4470214712, 506.5867309568]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048988_crop.jpg", "text": "Please, can you help me understand what's inside the region in image ? Please point out the objects and their coordinates.", "boxes_value": [[18.180786109999985, 52.71459962879999, 90.44702147119995, 262.5867309568]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048988.jpg", "text": "Please, can you help me understand what's inside the region in image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, a handbag, a hat, and a traffic light.", "boxes_value": [[499.18078611, 296.7145996288, 571.4470214712, 506.5867309568], [519.1314697536, 402.7655639552, 555.4868164211999, 496.5358276608], [437.1102294984, 413.1845092864, 553.7133789396, 511.6099853312], [548.177978502, 476.1579589632, 571.4470214712, 506.5867309568], [522.4030761576, 404.20281984, 541.0183105188, 422.8181152256], [499.18078611, 296.7145996288, 516.430542006, 332.6318969856]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048988_crop.jpg", "text": "Please, can you help me understand what's inside the region in image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, a handbag, a hat, and a traffic light.", "boxes_value": [[18.180786109999985, 52.71459962879999, 90.44702147119995, 262.5867309568], [38.13146975359996, 158.76556395519998, 74.48681642119993, 252.5358276608], [0, 169.1845092864, 72.71337893960003, 267.6099853312], [67.17797850199997, 232.15795896319997, 90.44702147119995, 262.5867309568], [41.403076157600026, 160.20281984000002, 60.018310518800035, 178.81811522560002], [18.180786109999985, 52.71459962879999, 35.430542005999996, 88.63189698560001]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00048990.jpg", "text": "Could you tell me more about the area in the snapshot ? Specify the location of each mentioned object.", "boxes_value": [[60.5706787374, 3.9633788928, 229.66448972709998, 103.6079711744]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048990_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Specify the location of each mentioned object.", "boxes_value": [[42.5706787374, 3.9633788928, 211.66448972709998, 103.6079711744]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048990.jpg", "text": "Could you tell me more about the area in the snapshot ? Specify the location of each mentioned object. For your reference, objects involved in this region include five lamps.", "boxes_value": [[60.5706787374, 3.9633788928, 229.66448972709998, 103.6079711744], [207.0180054024, 90.0200805888, 229.66448972709998, 103.6079711744], [157.1956786776, 16.0415038976, 185.88128663560002, 36.4233398272], [114.9222412123, 3.9633788928, 142.0980224839, 25.1000976384], [98.3148193241, 68.8833007616, 121.7161865302, 84.7359008768], [60.5706787374, 62.8442382848, 85.48181150139999, 79.451721216]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048990_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Specify the location of each mentioned object. For your reference, objects involved in this region include five lamps.", "boxes_value": [[42.5706787374, 3.9633788928, 211.66448972709998, 103.6079711744], [189.0180054024, 90.0200805888, 211.66448972709998, 103.6079711744], [139.1956786776, 16.0415038976, 167.88128663560002, 36.4233398272], [96.9222412123, 3.9633788928, 124.0980224839, 25.1000976384], [80.3148193241, 68.8833007616, 103.7161865302, 84.7359008768], [42.5706787374, 62.8442382848, 67.48181150139999, 79.451721216]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00048995.jpg", "text": "Describe what can be found within the bounds of in the image . Please mention the objects and their locations.", "boxes_value": [[624.0469970688, 72.516052224, 767.1362304768, 258.8149413888]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048995_crop.jpg", "text": "Describe what can be found within the bounds of in the image . Please mention the objects and their locations.", "boxes_value": [[36.04699706880001, 47.516052224000006, 179.13623047680005, 233.81494138879998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00048995.jpg", "text": "Describe what can be found within the bounds of in the image . Please mention the objects and their locations. For your reference, objects involved in this region include a backpack, a hat, a handbag, two bicycles, and a van.", "boxes_value": [[624.0469970688, 72.516052224, 767.1362304768, 258.8149413888], [624.0469970688, 114.6591186432, 666.2132568576001, 164.123352064], [713.128662144, 72.516052224, 737.2020263424, 86.3035278336], [728.8857421824, 148.6754760704, 757.1173095936, 173.6242675712], [589.2972412416, 143.822387712, 697.182983424, 253.3959961088], [686.8377685248, 139.5986938368, 767.1362304768, 258.8149413888], [667.4912109312, 79.2419433472, 705.3571777536, 121.0458984448]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00048995_crop.jpg", "text": "Describe what can be found within the bounds of in the image . Please mention the objects and their locations. For your reference, objects involved in this region include a backpack, a hat, a handbag, two bicycles, and a van.", "boxes_value": [[36.04699706880001, 47.516052224000006, 179.13623047680005, 233.81494138879998], [36.04699706880001, 89.6591186432, 78.21325685760007, 139.123352064], [125.12866214400003, 47.516052224000006, 149.20202634240002, 61.3035278336], [140.8857421824, 123.67547607040001, 169.1173095936, 148.6242675712], [1.297241241599977, 118.822387712, 109.18298342399999, 228.3959961088], [98.83776852480003, 114.59869383680001, 179.13623047680005, 233.81494138879998], [79.49121093120004, 54.24194334720001, 117.35717775360001, 96.0458984448]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00049002.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Specify the location of each mentioned object.", "boxes_value": [[442.1668701132, 180.5803222528, 765.1115722292, 239.317871104]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049002_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Specify the location of each mentioned object.", "boxes_value": [[81.16687011319999, 15.580322252799988, 404.11157222919996, 74.317871104]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049002.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Specify the location of each mentioned object. For your reference, objects involved in this region include four benches, and a person.", "boxes_value": [[442.1668701132, 180.5803222528, 765.1115722292, 239.317871104], [442.1668701132, 180.5803222528, 477.629882776, 196.116516096], [514.4439697052, 185.96545408, 550.5825195012, 201.5016479744], [585.3702392916, 192.6828002816, 622.5219726184, 211.5964355584], [729.7414550983999, 220.4537964032, 765.1115722292, 239.317871104], [727.1789551124, 214.3297119232, 741.7250976916, 238.7888794112]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049002_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Specify the location of each mentioned object. For your reference, objects involved in this region include four benches, and a person.", "boxes_value": [[81.16687011319999, 15.580322252799988, 404.11157222919996, 74.317871104], [81.16687011319999, 15.580322252799988, 116.62988277599999, 31.116516095999998], [153.44396970519995, 20.96545408, 189.58251950119995, 36.50164797439999], [224.3702392916, 27.68280028160001, 261.5219726184, 46.5964355584], [368.7414550983999, 55.45379640319999, 404.11157222919996, 74.317871104], [366.1789551124, 49.329711923199994, 380.7250976916, 73.78887941120001]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049005.jpg", "text": "Could you describe the content of the bbox in the image ? Please mention the objects and their locations.", "boxes_value": [[444.2892262247, 227.6073846272, 661.2422026279, 305.4459228672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049005_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Please mention the objects and their locations.", "boxes_value": [[54.28922622469997, 19.60738462719999, 271.24220262790004, 97.44592286720001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049005.jpg", "text": "Could you describe the content of the bbox in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a potted plant, a person, two ballons, and two hats.", "boxes_value": [[444.2892262247, 227.6073846272, 661.2422026279, 305.4459228672], [475.2703857083, 172.515319808, 530.2481689589, 348.4439697408], [626.6981200971001, 225.8555297792, 675.7933349937, 360.2535400448], [495.4309082158, 285.3305664, 536.2650146348, 305.4459228672], [580.1671142297, 287.336242688, 621.2093505557, 308.9974365184], [444.2892262247, 227.6073846272, 465.7097103185, 254.9859104256], [636.6291109154, 228.4550600704, 661.2422026279, 254.5449373184]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00049005_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a potted plant, a person, two ballons, and two hats.", "boxes_value": [[54.28922622469997, 19.60738462719999, 271.24220262790004, 97.44592286720001], [85.27038570830001, 0, 140.2481689589, 116], [236.69812009710006, 17.855529779199998, 285.7933349937, 116], [105.4309082158, 77.33056640000001, 146.26501463479997, 97.44592286720001], [190.16711422970002, 79.33624268800003, 231.20935055569998, 100.99743651839998], [54.28922622469997, 19.60738462719999, 75.70971031850002, 46.9859104256], [246.62911091540002, 20.45506007040001, 271.24220262790004, 46.5449373184]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00049006.jpg", "text": "Regarding the image , what's going on in the section ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[13.6476440519, 392.4260864512, 334.3383178518, 512.0847168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049006_crop.jpg", "text": "Regarding the image , what's going on in the section ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[13.6476440519, 30.42608645119998, 334.3383178518, 150]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049006.jpg", "text": "Regarding the image , what's going on in the section ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four people, and a handbag.", "boxes_value": [[13.6476440519, 392.4260864512, 334.3383178518, 512.0847168], [13.6476440519, 392.4260864512, 39.9763183864, 450.4385375744], [64.5200805447, 399.7891845632, 118.07000735390001, 511.7978515456], [99.3115844699, 404.7935791104, 149.2277831928, 512.0847168], [287.5014037974, 405.1760253952, 329.9588622942, 511.8934326272], [317.8906250073, 421.6843261952, 334.3383178518, 467.502929664]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049006_crop.jpg", "text": "Regarding the image , what's going on in the section ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four people, and a handbag.", "boxes_value": [[13.6476440519, 30.42608645119998, 334.3383178518, 150], [13.6476440519, 30.42608645119998, 39.9763183864, 88.43853757440002], [64.5200805447, 37.789184563200024, 118.07000735390001, 149.7978515456], [99.3115844699, 42.7935791104, 149.2277831928, 150], [287.5014037974, 43.176025395199986, 329.9588622942, 149.8934326272], [317.8906250073, 59.684326195200015, 334.3383178518, 105.50292966400002]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049009.jpg", "text": "What details can you provide about the region in the snapshot ? Please mention the objects and their locations.", "boxes_value": [[258.667175296, 99.83056641600001, 405.708251968, 300.168640128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049009_crop.jpg", "text": "What details can you provide about the region in the snapshot ? Please mention the objects and their locations.", "boxes_value": [[37.66717529599998, 50.83056641600001, 184.708251968, 251.168640128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049009.jpg", "text": "What details can you provide about the region in the snapshot ? Please mention the objects and their locations. For your reference, objects involved in this region include a flower, and four people.", "boxes_value": [[258.667175296, 99.83056641600001, 405.708251968, 300.168640128], [258.667175296, 253.64135740800003, 286.104125952, 281.078308128], [354.62908934399996, 246.065124528, 405.708251968, 300.168640128], [308.59069824, 217.16510011199998, 349.924438464, 299.83258056], [256.002807616, 238.213562016, 302.136230464, 301.276733376], [365.13903808000003, 99.83056641600001, 392.84191897600004, 207.028625472]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049009_crop.jpg", "text": "What details can you provide about the region in the snapshot ? Please mention the objects and their locations. For your reference, objects involved in this region include a flower, and four people.", "boxes_value": [[37.66717529599998, 50.83056641600001, 184.708251968, 251.168640128], [37.66717529599998, 204.64135740800003, 65.104125952, 232.078308128], [133.62908934399996, 197.065124528, 184.708251968, 251.168640128], [87.59069824, 168.16510011199998, 128.924438464, 250.83258056], [35.002807615999984, 189.213562016, 81.136230464, 252.27673337599998], [144.13903808000003, 50.83056641600001, 171.84191897600004, 158.028625472]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049010.jpg", "text": "Could you tell me more about the area in the snapshot ? Please point out the objects and their coordinates.", "boxes_value": [[291.7449340907, 36.377380352, 585.0286865103, 300.2649536]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049010_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Please point out the objects and their coordinates.", "boxes_value": [[73.74493409069999, 36.377380352, 367.02868651029996, 300.2649536]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049010.jpg", "text": "Could you tell me more about the area in the snapshot ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a cabinet, two people, a moniter, and an extractor.", "boxes_value": [[291.7449340907, 36.377380352, 585.0286865103, 300.2649536], [345.0499267866, 36.377380352, 585.0286865103, 202.04449464319998], [349.0892333865, 201.1370849792, 431.44726563800003, 311.0721435648], [432.192626953, 224.9874267648, 495.9176025414, 300.2649536], [291.7449340907, 248.2513427968, 326.23681643410004, 285.4484863488], [397.5407714617, 189.8499756032, 480.8790283065, 232.388305664]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049010_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a cabinet, two people, a moniter, and an extractor.", "boxes_value": [[73.74493409069999, 36.377380352, 367.02868651029996, 300.2649536], [127.0499267866, 36.377380352, 367.02868651029996, 202.04449464319998], [131.08923338649998, 201.1370849792, 213.44726563800003, 311.0721435648], [214.192626953, 224.9874267648, 277.9176025414, 300.2649536], [73.74493409069999, 248.2513427968, 108.23681643410004, 285.4484863488], [179.5407714617, 189.8499756032, 262.8790283065, 232.388305664]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049013.jpg", "text": "Kindly describe what I should be seeing in the area of image . Include the coordinates for each object you identify.", "boxes_value": [[8.1853637776, 82.121459968, 568.8170166098, 186.1003418112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049013_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Include the coordinates for each object you identify.", "boxes_value": [[8.1853637776, 26.121459967999996, 568.8170166098, 130.1003418112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049013.jpg", "text": "Kindly describe what I should be seeing in the area of image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a cabinet, a storage box, two people, and a barrel.", "boxes_value": [[8.1853637776, 82.121459968, 568.8170166098, 186.1003418112], [0, 98.4320678912, 117.6465454415, 194.8751220736], [308.86224361850003, 82.121459968, 423.54260256640003, 148.124511744], [543.4716797094001, 136.6574096896, 568.8170166098, 183.7272338944], [7.9152221833, 63.5632324096, 22.919738759799998, 103.4085083136], [8.1853637776, 147.6378173952, 40.1926880054, 186.1003418112]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049013_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a cabinet, a storage box, two people, and a barrel.", "boxes_value": [[8.1853637776, 26.121459967999996, 568.8170166098, 130.1003418112], [0, 42.432067891200006, 117.6465454415, 138.8751220736], [308.86224361850003, 26.121459967999996, 423.54260256640003, 92.12451174399999], [543.4716797094001, 80.6574096896, 568.8170166098, 127.7272338944], [7.9152221833, 7.563232409599998, 22.919738759799998, 47.408508313599995], [8.1853637776, 91.63781739519999, 40.1926880054, 130.1003418112]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049015.jpg", "text": "Please explain what is contained in the portion of defined by the box . Please mention the objects and their locations.", "boxes_value": [[277.999389668, 289.38189696, 365.786010744, 472.4583130112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049015_crop.jpg", "text": "Please explain what is contained in the portion of defined by the box . Please mention the objects and their locations.", "boxes_value": [[21.999389667999992, 46.381896960000006, 109.78601074400001, 229.4583130112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049015.jpg", "text": "Please explain what is contained in the portion of defined by the box . Please mention the objects and their locations. For your reference, objects involved in this region include a person, two bracelets, a necklace, and a desk.", "boxes_value": [[277.999389668, 289.38189696, 365.786010744, 472.4583130112], [122.54425052100001, 55.1958618112, 471.90344239499996, 512.0064697344], [306.51123048700003, 398.177307136, 363.53503417, 472.4583130112], [293.00561521, 420.6867065344, 365.786010744, 467.2061157376], [277.999389668, 289.38189696, 338.774658222, 379.4194946048], [0, 207.7481079296, 579.5900879229999, 419.1160278528]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049015_crop.jpg", "text": "Please explain what is contained in the portion of defined by the box . Please mention the objects and their locations. For your reference, objects involved in this region include a person, two bracelets, a necklace, and a desk.", "boxes_value": [[21.999389667999992, 46.381896960000006, 109.78601074400001, 229.4583130112], [0, 0, 131, 269], [50.511230487000034, 155.17730713600002, 107.53503417000002, 229.4583130112], [37.005615209999974, 177.6867065344, 109.78601074400001, 224.2061157376], [21.999389667999992, 46.381896960000006, 82.77465822200003, 136.41949460479998], [0, 0, 131, 176.11602785280002]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049016.jpg", "text": "Please explain what is contained in the portion of defined by the box . Provide the coordinates for all objects that you mention.", "boxes_value": [[268.0351562539, 80.6210937344, 459.1384277133, 482.1170043904]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049016_crop.jpg", "text": "Please explain what is contained in the portion of defined by the box . Provide the coordinates for all objects that you mention.", "boxes_value": [[48.03515625390003, 80.6210937344, 239.13842771330002, 482.1170043904]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049016.jpg", "text": "Please explain what is contained in the portion of defined by the box . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a book, a picture, two people, and a suv.", "boxes_value": [[268.0351562539, 80.6210937344, 459.1384277133, 482.1170043904], [414.10559081509996, 380.7941894656, 507.69006346820004, 481.9872436736], [268.0351562539, 190.5117797888, 405.99987795109996, 331.0432739328], [301.0432738932, 273.7032470528, 442.74768067680003, 391.7992553472], [278.1456909021, 80.6210937344, 358.25463864510004, 198.781860352], [412.01208496419997, 344.5961913856, 459.1384277133, 482.1170043904]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049016_crop.jpg", "text": "Please explain what is contained in the portion of defined by the box . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a book, a picture, two people, and a suv.", "boxes_value": [[48.03515625390003, 80.6210937344, 239.13842771330002, 482.1170043904], [194.10559081509996, 380.7941894656, 286, 481.9872436736], [48.03515625390003, 190.5117797888, 185.99987795109996, 331.0432739328], [81.04327389320002, 273.7032470528, 222.74768067680003, 391.7992553472], [58.1456909021, 80.6210937344, 138.25463864510004, 198.781860352], [192.01208496419997, 344.5961913856, 239.13842771330002, 482.1170043904]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049017.jpg", "text": "Describe the selected rectangular area in the photo . Please mention the objects and their locations.", "boxes_value": [[247.11822506160001, 61.2561035264, 377.5074463216, 119.840759296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049017_crop.jpg", "text": "Describe the selected rectangular area in the photo . Please mention the objects and their locations.", "boxes_value": [[33.118225061600015, 15.256103526399997, 163.5074463216, 73.840759296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049017.jpg", "text": "Describe the selected rectangular area in the photo . Please mention the objects and their locations. For your reference, objects involved in this region include two cymbals, and three microphones.", "boxes_value": [[247.11822506160001, 61.2561035264, 377.5074463216, 119.840759296], [276.9975336488, 84.7006982144, 370.291057588, 101.0202650624], [295.3892822108, 108.1820068352, 368.35058594839995, 119.840759296], [235.9868164284, 57.995910656, 270.1198730392, 74.8436279296], [350.2136230168, 61.2561035264, 377.5074463216, 79.3290405376], [247.11822506160001, 63.0375366144, 280.2719726524, 99.7320556544]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00049017_crop.jpg", "text": "Describe the selected rectangular area in the photo . Please mention the objects and their locations. For your reference, objects involved in this region include two cymbals, and three microphones.", "boxes_value": [[33.118225061600015, 15.256103526399997, 163.5074463216, 73.840759296], [62.997533648800015, 38.700698214400006, 156.291057588, 55.0202650624], [81.38928221079999, 62.1820068352, 154.35058594839995, 73.840759296], [21.986816428400004, 11.995910656, 56.1198730392, 28.843627929600004], [136.2136230168, 15.256103526399997, 163.5074463216, 33.329040537599994], [33.118225061600015, 17.037536614399997, 66.2719726524, 53.7320556544]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00049020.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Include the coordinates for each mentioned object.", "boxes_value": [[328.2927856221, 289.0497436672, 472.2962646402, 414.0394287104]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049020_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Include the coordinates for each mentioned object.", "boxes_value": [[36.292785622099984, 32.049743667200005, 180.29626464019998, 157.0394287104]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049020.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a stool, a guitar, a piano, and two people.", "boxes_value": [[328.2927856221, 289.0497436672, 472.2962646402, 414.0394287104], [367.12994385729996, 379.0627441152, 407.5255127021, 414.0394287104], [334.6540527356, 334.2313232384, 388.7589111601, 352.040832512], [398.3724365283, 342.2368774656, 472.2962646402, 408.2207031296], [374.65191650250006, 326.3249511936, 422.8905029284, 410.8990478336], [328.2927856221, 289.0497436672, 377.7843017351, 395.2371826176]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00049020_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a stool, a guitar, a piano, and two people.", "boxes_value": [[36.292785622099984, 32.049743667200005, 180.29626464019998, 157.0394287104], [75.12994385729996, 122.06274411520002, 115.5255127021, 157.0394287104], [42.654052735599976, 77.23132323840002, 96.75891116010001, 95.04083251200001], [106.3724365283, 85.2368774656, 180.29626464019998, 151.2207031296], [82.65191650250006, 69.32495119359999, 130.8905029284, 153.89904783359998], [36.292785622099984, 32.049743667200005, 85.78430173509997, 138.2371826176]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00049021.jpg", "text": "Please elucidate the area of the image . Include the coordinates for each mentioned object.", "boxes_value": [[149.9801025203, 107.8565673984, 355.00927731359997, 344.7196655104]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049021_crop.jpg", "text": "Please elucidate the area of the image . Include the coordinates for each mentioned object.", "boxes_value": [[51.980102520299994, 59.8565673984, 257.00927731359997, 296.7196655104]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049021.jpg", "text": "Please elucidate the area of the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include three people, and two boats.", "boxes_value": [[149.9801025203, 107.8565673984, 355.00927731359997, 344.7196655104], [337.0187987959, 319.5330200064, 355.00927731359997, 344.7196655104], [326.48156737429997, 318.7619629056, 340.1029052811, 342.4066162176], [305.1867065221, 315.6411743232, 321.9288940439, 343.8385009664], [149.9801025203, 107.8565673984, 337.3848877152, 154.2760009728], [291.72216799700004, 125.2769775616, 342.243774418, 134.530029312]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049021_crop.jpg", "text": "Please elucidate the area of the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include three people, and two boats.", "boxes_value": [[51.980102520299994, 59.8565673984, 257.00927731359997, 296.7196655104], [239.0187987959, 271.5330200064, 257.00927731359997, 296.7196655104], [228.48156737429997, 270.7619629056, 242.1029052811, 294.4066162176], [207.1867065221, 267.6411743232, 223.92889404390002, 295.8385009664], [51.980102520299994, 59.8565673984, 239.38488771520002, 106.2760009728], [193.72216799700004, 77.2769775616, 244.243774418, 86.53002931200001]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049022.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Include the coordinates for each mentioned object.", "boxes_value": [[82.053771939, 434.929138176, 321.455566422, 511.8227538944]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049022_crop.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Include the coordinates for each mentioned object.", "boxes_value": [[60.053771939, 19.92913817599998, 299.455566422, 96.82275389440002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049022.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Include the coordinates for each mentioned object. For your reference, objects involved in this region include three people, a bus, and a car.", "boxes_value": [[82.053771939, 434.929138176, 321.455566422, 511.8227538944], [190.2672729675, 436.9431152128, 204.90515139150003, 482.6370849792], [82.053771939, 461.8376464896, 107.21990969550001, 511.8227538944], [105.137146017, 468.4328613376, 128.04699705750002, 490.6484985344], [173.1013183215, 434.929138176, 321.455566422, 478.7907715072], [173.1013183215, 458.1997070336, 193.134338361, 483.2409668096]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049022_crop.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Include the coordinates for each mentioned object. For your reference, objects involved in this region include three people, a bus, and a car.", "boxes_value": [[60.053771939, 19.92913817599998, 299.455566422, 96.82275389440002], [168.2672729675, 21.943115212800024, 182.90515139150003, 67.63708497919998], [60.053771939, 46.83764648959999, 85.21990969550001, 96.82275389440002], [83.137146017, 53.4328613376, 106.04699705750002, 75.6484985344], [151.1013183215, 19.92913817599998, 299.455566422, 63.79077150720002], [151.1013183215, 43.19970703360002, 171.134338361, 68.24096680960002]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049023.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Provide the coordinates for each element you describe.", "boxes_value": [[430.3726354187, 215.9394531328, 683.0028076080999, 511.8941650432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049023_crop.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Provide the coordinates for each element you describe.", "boxes_value": [[63.372635418699986, 74.9394531328, 316, 370.8941650432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049023.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Provide the coordinates for each element you describe. For your reference, objects involved in this region include three people, a hat, and a boat.", "boxes_value": [[430.3726354187, 215.9394531328, 683.0028076080999, 511.8941650432], [583.0286865012, 275.5399170048, 683.0028076080999, 511.8941650432], [647.6846923785, 215.9394531328, 668.0102539132, 280.981201152], [429.1180420075, 291.0295410176, 557.4814453071, 511.9627075072], [430.3726354187, 290.8128128512, 475.5961724054, 340.3433533952], [423.4633789376, 258.7261352448, 625.3365478495, 307.4766845952]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049023_crop.jpg", "text": "Please elaborate on the area with the coordinates in the visual . Provide the coordinates for each element you describe. For your reference, objects involved in this region include three people, a hat, and a boat.", "boxes_value": [[63.372635418699986, 74.9394531328, 316, 370.8941650432], [216.0286865012, 134.53991700479997, 316, 370.8941650432], [280.6846923785, 74.9394531328, 301.0102539132, 139.98120115199998], [62.11804200749998, 150.02954101760002, 190.4814453071, 370.9627075072], [63.372635418699986, 149.81281285120002, 108.59617240540001, 199.34335339519998], [56.4633789376, 117.72613524479999, 258.3365478495, 166.47668459520003]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049025.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Please point out the objects and their coordinates.", "boxes_value": [[0.2567138816, 190.90423584869998, 234.2925414912, 516.8616943314]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049025_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Please point out the objects and their coordinates.", "boxes_value": [[0.2567138816, 81.90423584869998, 234.2925414912, 407.86169433140003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049025.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Please point out the objects and their coordinates. For your reference, objects involved in this region include a chair, and four moniters.", "boxes_value": [[0.2567138816, 190.90423584869998, 234.2925414912, 516.8616943314], [0.2567138816, 190.90423584869998, 160.6161499136, 516.8616943314], [184.502197248, 271.9638061294, 234.379699712, 317.28259279860004], [182.0822143488, 319.1895752166, 234.2925414912, 366.1261596817], [124.8618164224, 289.65649415229996, 177.5995483648, 338.7025146681], [72.65148928, 291.765991209, 124.598144512, 337.9114379751]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049025_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Please point out the objects and their coordinates. For your reference, objects involved in this region include a chair, and four moniters.", "boxes_value": [[0.2567138816, 81.90423584869998, 234.2925414912, 407.86169433140003], [0.2567138816, 81.90423584869998, 160.6161499136, 407.86169433140003], [184.502197248, 162.96380612939998, 234.379699712, 208.28259279860004], [182.0822143488, 210.18957521660002, 234.2925414912, 257.1261596817], [124.8618164224, 180.65649415229996, 177.5995483648, 229.70251466809998], [72.65148928, 182.765991209, 124.598144512, 228.9114379751]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049027.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each object you identify.", "boxes_value": [[310.73248287999996, 276.32843016, 461.03723142399997, 330.437927232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049027_crop.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each object you identify.", "boxes_value": [[37.732482879999964, 14.328430159999982, 188.03723142399997, 68.43792723199999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049027.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each object you identify. For your reference, objects involved in this region include two cabinets, a desk, an oven, and an induction cooker.", "boxes_value": [[310.73248287999996, 276.32843016, 461.03723142399997, 330.437927232], [310.73248287999996, 278.770690896, 347.36926272, 330.437927232], [410.309448256, 290.98291017599996, 461.03723142399997, 326.68029787200004], [0, 304.77325440000004, 566.059326144, 478.697265648], [345.02954099199997, 297.52874755199997, 408.28588863999994, 329.760192864], [343.478332544, 276.32843016, 451.203613312, 299.597106912]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049027_crop.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each object you identify. For your reference, objects involved in this region include two cabinets, a desk, an oven, and an induction cooker.", "boxes_value": [[37.732482879999964, 14.328430159999982, 188.03723142399997, 68.43792723199999], [37.732482879999964, 16.77069089600002, 74.36926272, 68.43792723199999], [137.309448256, 28.98291017599996, 188.03723142399997, 64.68029787200004], [0, 42.77325440000004, 225, 81], [72.02954099199997, 35.52874755199997, 135.28588863999994, 67.76019286399998], [70.47833254400001, 14.328430159999982, 178.20361331200002, 37.597106912000015]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049029.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Please mention the objects and their locations.", "boxes_value": [[381.1125488436, 219.3130492928, 479.27868651750003, 388.0387573248]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049029_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Please mention the objects and their locations.", "boxes_value": [[25.112548843599996, 42.3130492928, 123.27868651750003, 211.03875732479997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049029.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Please mention the objects and their locations. For your reference, objects involved in this region include two pictures, two bottles, and a speaker.", "boxes_value": [[381.1125488436, 219.3130492928, 479.27868651750003, 388.0387573248], [381.1125488436, 261.762756352, 413.0927734218, 292.3525390848], [437.657348607, 259.9088134656, 471.7232665734, 291.657348608], [404.3203124997, 311.765380864, 415.93664549339996, 349.9020996096], [466.3472900187, 343.326782208, 479.27868651750003, 388.0387573248], [416.62890623699997, 219.3130492928, 435.7852783131, 241.707214336]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049029_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Please mention the objects and their locations. For your reference, objects involved in this region include two pictures, two bottles, and a speaker.", "boxes_value": [[25.112548843599996, 42.3130492928, 123.27868651750003, 211.03875732479997], [25.112548843599996, 84.762756352, 57.09277342180002, 115.35253908480001], [81.65734860700002, 82.90881346560002, 115.7232665734, 114.657348608], [48.32031249969998, 134.765380864, 59.936645493399965, 172.9020996096], [110.3472900187, 166.326782208, 123.27868651750003, 211.03875732479997], [60.62890623699997, 42.3130492928, 79.78527831309998, 64.70721433599999]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049030.jpg", "text": "Can you generate a description of the contents within the selected region in ? Include the coordinates for each mentioned object.", "boxes_value": [[8.005249028, 84.0895995904, 190.6633911149, 311.2677612544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049030_crop.jpg", "text": "Can you generate a description of the contents within the selected region in ? Include the coordinates for each mentioned object.", "boxes_value": [[8.005249028, 57.0895995904, 190.6633911149, 284.2677612544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049030.jpg", "text": "Can you generate a description of the contents within the selected region in ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three people, and two street lights.", "boxes_value": [[8.005249028, 84.0895995904, 190.6633911149, 311.2677612544], [173.1468506023, 229.146118144, 190.6633911149, 282.7967529472], [51.501708980800004, 231.0515746816, 86.1826171748, 311.2677612544], [30.190124524300003, 235.0604858368, 52.4282837192, 309.0363769344], [8.005249028, 84.0895995904, 35.954711936399995, 280.5120239104], [182.33074952020002, 117.0459594752, 202.3761596858, 159.82574464]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049030_crop.jpg", "text": "Can you generate a description of the contents within the selected region in ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three people, and two street lights.", "boxes_value": [[8.005249028, 57.0895995904, 190.6633911149, 284.2677612544], [173.1468506023, 202.146118144, 190.6633911149, 255.7967529472], [51.501708980800004, 204.0515746816, 86.1826171748, 284.2677612544], [30.190124524300003, 208.0604858368, 52.4282837192, 282.0363769344], [8.005249028, 57.0895995904, 35.954711936399995, 253.5120239104], [182.33074952020002, 90.0459594752, 202.3761596858, 132.82574464]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049033.jpg", "text": "What can be observed in the rectangular region in the photograph ? Please mention the objects and their locations.", "boxes_value": [[7.337097180600001, 0, 809.4577636794, 294.9296875008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049033_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Please mention the objects and their locations.", "boxes_value": [[7.337097180600001, 0, 809.4577636794, 294.9296875008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049033.jpg", "text": "What can be observed in the rectangular region in the photograph ? Please mention the objects and their locations. For your reference, objects involved in this region include two benches, two bracelets, five people, a barrel, three glasses, a boat, and a camera.", "boxes_value": [[7.337097180600001, 0, 809.4577636794, 294.9296875008], [106.09893801979999, 91.8547363328, 308.8642578428, 142.843261696], [515.2912597998, 91.1848144384, 707.4415282924, 144.8826904064], [755.5700683608001, 191.0285033984, 765.04943845, 224.412231424], [707.5384521678, 115.0046386688, 800.5360107778, 297.9338378752], [590.3547363032001, 95.2468872192, 804.9644775744, 308.1533813248], [77.23437502440001, 211.93670656, 103.9509277136, 231.4689941504], [242.8454589858, 0.0270385664, 359.9847411952, 150.5486450176], [463.144775357, 0.1503296, 559.5893554283999, 151.7335204864], [70.3789673092, 198.9404907008, 284.575317378, 343.838012672], [418.77307125500005, 99.7062377984, 468.4145507954, 151.9835815424], [437.6696866278, 202.4242832896, 512.9914454126, 226.0190511104], [688.6834555568, 121.7501577216, 723.2117397098, 131.7510797312], [746.5012871162, 133.0453662208, 794.6889186192001, 155.0570002944], [7.337097180600001, 0, 809.4577636794, 294.9296875008], [95.802185028, 180.8237304832, 126.74261473060001, 209.9224853504]], "boxes_seq": [[0], [0], [1, 2], [3, 6], [4, 5, 7, 8, 9], [10], [11, 12, 13], [14], [15]]}, {"image_path": "objects365_v1_00049033_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Please mention the objects and their locations. For your reference, objects involved in this region include two benches, two bracelets, five people, a barrel, three glasses, a boat, and a camera.", "boxes_value": [[7.337097180600001, 0, 809.4577636794, 294.9296875008], [106.09893801979999, 91.8547363328, 308.8642578428, 142.843261696], [515.2912597998, 91.1848144384, 707.4415282924, 144.8826904064], [755.5700683608001, 191.0285033984, 765.04943845, 224.412231424], [707.5384521678, 115.0046386688, 800.5360107778, 297.9338378752], [590.3547363032001, 95.2468872192, 804.9644775744, 308.1533813248], [77.23437502440001, 211.93670656, 103.9509277136, 231.4689941504], [242.8454589858, 0.0270385664, 359.9847411952, 150.5486450176], [463.144775357, 0.1503296, 559.5893554283999, 151.7335204864], [70.3789673092, 198.9404907008, 284.575317378, 343.838012672], [418.77307125500005, 99.7062377984, 468.4145507954, 151.9835815424], [437.6696866278, 202.4242832896, 512.9914454126, 226.0190511104], [688.6834555568, 121.7501577216, 723.2117397098, 131.7510797312], [746.5012871162, 133.0453662208, 794.6889186192001, 155.0570002944], [7.337097180600001, 0, 809.4577636794, 294.9296875008], [95.802185028, 180.8237304832, 126.74261473060001, 209.9224853504]], "boxes_seq": [[0], [0], [1, 2], [3, 6], [4, 5, 7, 8, 9], [10], [11, 12, 13], [14], [15]]}, {"image_path": "objects365_v1_00049034.jpg", "text": "Could you please provide a description of the rectangular area in ? Specify the location of each mentioned object.", "boxes_value": [[115.48474119020001, 398.185913088, 338.7156982296, 511.7060547072]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049034_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Specify the location of each mentioned object.", "boxes_value": [[56.48474119020001, 29.185913088000007, 279.7156982296, 142.7060547072]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049034.jpg", "text": "Could you please provide a description of the rectangular area in ? Specify the location of each mentioned object. For your reference, objects involved in this region include six potted plants.", "boxes_value": [[115.48474119020001, 398.185913088, 338.7156982296, 511.7060547072], [115.48474119020001, 404.2809448448, 151.29309079840002, 465.9932251136], [177.19702149379998, 405.0427856384, 211.4816284288, 469.8026123264], [240.4331054442, 398.185913088, 286.14587404179997, 469.0407714816], [309.0023193142, 416.4710083072, 338.7156982296, 460.660095232], [250.3375244336, 461.4219360256, 311.2879638874, 511.7060547072], [85.771423325, 467.5169677824, 152.05499269839999, 513.2298584064]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00049034_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Specify the location of each mentioned object. For your reference, objects involved in this region include six potted plants.", "boxes_value": [[56.48474119020001, 29.185913088000007, 279.7156982296, 142.7060547072], [56.48474119020001, 35.28094484479999, 92.29309079840002, 96.99322511359998], [118.19702149379998, 36.042785638400005, 152.4816284288, 100.80261232639998], [181.4331054442, 29.185913088000007, 227.14587404179997, 100.04077148160002], [250.0023193142, 47.47100830720001, 279.7156982296, 91.660095232], [191.3375244336, 92.42193602560002, 252.2879638874, 142.7060547072], [26.771423325, 98.5169677824, 93.05499269839999, 143]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00049036.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give coordinates for the items you reference.", "boxes_value": [[401.69165036000004, 172.6784057856, 582.447631843, 347.049865728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049036_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give coordinates for the items you reference.", "boxes_value": [[45.69165036000004, 43.678405785600006, 226.44763184299995, 218.049865728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049036.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give coordinates for the items you reference. For your reference, objects involved in this region include three people, a bracelet, and a lifesaver.", "boxes_value": [[401.69165036000004, 172.6784057856, 582.447631843, 347.049865728], [189.50720211499998, 160.7113647616, 550.131103522, 440.586914048], [431.45410154, 125.7824707072, 648.678710911, 344.7225952256], [502.54577634800006, 176.3735351808, 579.068115241, 289.4290771456], [566.1696777660001, 325.2489624064, 582.447631843, 347.049865728], [401.69165036000004, 172.6784057856, 520.743041973, 310.7089843712]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049036_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Give coordinates for the items you reference. For your reference, objects involved in this region include three people, a bracelet, and a lifesaver.", "boxes_value": [[45.69165036000004, 43.678405785600006, 226.44763184299995, 218.049865728], [0, 31.71136476160001, 194.131103522, 261], [75.45410154000001, 0, 271, 215.7225952256], [146.54577634800006, 47.37353518079999, 223.068115241, 160.4290771456], [210.16967776600006, 196.24896240639998, 226.44763184299995, 218.049865728], [45.69165036000004, 43.678405785600006, 164.743041973, 181.70898437120002]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049037.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[104.47009274999999, 233.3095092872, 346.65075685, 591.7940673956]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049037_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[61.47009274999999, 90.3095092872, 303.65075685, 448.7940673956]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049037.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a desk, a potted plant, a candle, a person, and a handbag.", "boxes_value": [[104.47009274999999, 233.3095092872, 346.65075685, 591.7940673956], [25.90283205, 353.7664184552, 297.31695555, 584.7065429736], [132.24639895, 332.3389892648, 187.00537110000002, 379.9555053712], [104.47009274999999, 233.3095092872, 120.28186035, 282.3416748108], [228.13226319999998, 303.80572510200005, 267.02593995, 380.62066652240003], [252.60284425, 452.71142580320003, 346.65075685, 591.7940673956]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049037_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a desk, a potted plant, a candle, a person, and a handbag.", "boxes_value": [[61.47009274999999, 90.3095092872, 303.65075685, 448.7940673956], [0, 210.76641845519998, 254.31695555, 441.7065429736], [89.24639895000001, 189.33898926479998, 144.00537110000002, 236.9555053712], [61.47009274999999, 90.3095092872, 77.28186035, 139.3416748108], [185.13226319999998, 160.80572510200005, 224.02593995, 237.62066652240003], [209.60284425, 309.71142580320003, 303.65075685, 448.7940673956]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049038.jpg", "text": "Can you analyze the content of the area within the photograph ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[9.250122042, 141.1385498112, 216.986450229, 319.7529296896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049038_crop.jpg", "text": "Can you analyze the content of the area within the photograph ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[9.250122042, 45.138549811199994, 216.986450229, 223.75292968960002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049038.jpg", "text": "Can you analyze the content of the area within the photograph ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include five pictures.", "boxes_value": [[9.250122042, 141.1385498112, 216.986450229, 319.7529296896], [169.42065431400002, 141.1385498112, 216.986450229, 207.6335449088], [170.391357459, 226.0774536192, 198.0571899375, 319.7529296896], [9.250122042, 148.904357888, 35.945190405, 238.2116088832], [65.167297344, 238.347473152, 122.559814467, 312.6968994304], [132.800415057, 230.4293212672, 161.2171631205, 269.6567993344]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049038_crop.jpg", "text": "Can you analyze the content of the area within the photograph ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include five pictures.", "boxes_value": [[9.250122042, 45.138549811199994, 216.986450229, 223.75292968960002], [169.42065431400002, 45.138549811199994, 216.986450229, 111.63354490879999], [170.391357459, 130.0774536192, 198.0571899375, 223.75292968960002], [9.250122042, 52.90435788799999, 35.945190405, 142.2116088832], [65.167297344, 142.347473152, 122.559814467, 216.69689943039998], [132.800415057, 134.4293212672, 161.2171631205, 173.65679933439998]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049039.jpg", "text": "I'd like a thorough description of the area in the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[158.284301772, 302.7864990208, 403.2111816384, 377.4801025536]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049039_crop.jpg", "text": "I'd like a thorough description of the area in the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[61.28430177199999, 18.786499020800022, 306.2111816384, 93.48010255359998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049039.jpg", "text": "I'd like a thorough description of the area in the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a picture, two desks, a trash bin can, and a cup.", "boxes_value": [[158.284301772, 302.7864990208, 403.2111816384, 377.4801025536], [246.9131469864, 252.3180541952, 331.08380129520003, 347.3997192192], [183.2466430776, 275.1185302528, 550.6818847751999, 511.5420532224], [63.9308471592, 242.6850586112, 423.10351560719994, 399.8676147712], [158.284301772, 312.6965331968, 189.0347289984, 377.4801025536], [382.9033202856, 302.7864990208, 403.2111816384, 318.2591552512]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049039_crop.jpg", "text": "I'd like a thorough description of the area in the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a picture, two desks, a trash bin can, and a cup.", "boxes_value": [[61.28430177199999, 18.786499020800022, 306.2111816384, 93.48010255359998], [149.9131469864, 0, 234.08380129520003, 63.399719219199994], [86.2466430776, 0, 367, 112], [0, 0, 326.10351560719994, 112], [61.28430177199999, 28.696533196799976, 92.0347289984, 93.48010255359998], [285.9033202856, 18.786499020800022, 306.2111816384, 34.25915525120001]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049040.jpg", "text": "In the provided image , please explain the content within the region . Include the coordinates for each mentioned object.", "boxes_value": [[44.8407592704, 0, 228.2713012992, 511.4346924032]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049040_crop.jpg", "text": "In the provided image , please explain the content within the region . Include the coordinates for each mentioned object.", "boxes_value": [[44.8407592704, 0, 228.2713012992, 511.4346924032]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049040.jpg", "text": "In the provided image , please explain the content within the region . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a lamp, a cabinet, a person, a cup, a moniter, and a camera.", "boxes_value": [[44.8407592704, 0, 228.2713012992, 511.4346924032], [179.1913452288, 0, 221.9680175616, 141.39019776], [44.8407592704, 370.1190795776, 221.5988159232, 511.4346924032], [143.60906979840001, 187.1614380032, 228.2713012992, 336.8798217728], [111.1525268736, 396.3361206272, 130.35198973439998, 429.2495117312], [9.5416260096, 276.5213623296, 68.4674682624, 342.5041503744], [177.14495846399998, 318.1575317504, 218.7811889664, 349.2082519552]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00049040_crop.jpg", "text": "In the provided image , please explain the content within the region . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a lamp, a cabinet, a person, a cup, a moniter, and a camera.", "boxes_value": [[44.8407592704, 0, 228.2713012992, 511.4346924032], [179.1913452288, 0, 221.9680175616, 141.39019776], [44.8407592704, 370.1190795776, 221.5988159232, 511.4346924032], [143.60906979840001, 187.1614380032, 228.2713012992, 336.8798217728], [111.1525268736, 396.3361206272, 130.35198973439998, 429.2495117312], [9.5416260096, 276.5213623296, 68.4674682624, 342.5041503744], [177.14495846399998, 318.1575317504, 218.7811889664, 349.2082519552]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00049041.jpg", "text": "Within the input image , what can be found in the region defined by ? Please mention the objects and their locations.", "boxes_value": [[700.06677248, 88.65490725, 991.25048832, 343.8845215]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049041_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Please mention the objects and their locations.", "boxes_value": [[73.06677248000005, 64.65490725, 364.25048832000004, 319.8845215]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049041.jpg", "text": "Within the input image , what can be found in the region defined by ? Please mention the objects and their locations. For your reference, objects involved in this region include a boat, and four street lights.", "boxes_value": [[700.06677248, 88.65490725, 991.25048832, 343.8845215], [700.06677248, 327.43981935, 746.6600341504, 343.8845215], [946.1988525056, 88.65490725, 991.25048832, 249.32861330000003], [891.554687488, 145.8388672, 918.4042969088, 234.2576294], [871.797607424, 169.69561769999999, 887.6855469056, 229.6032715], [860.1368408064, 180.62768555, 871.797607424, 228.5829468]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049041_crop.jpg", "text": "Within the input image , what can be found in the region defined by ? Please mention the objects and their locations. For your reference, objects involved in this region include a boat, and four street lights.", "boxes_value": [[73.06677248000005, 64.65490725, 364.25048832000004, 319.8845215], [73.06677248000005, 303.43981935, 119.66003415039995, 319.8845215], [319.19885250560003, 64.65490725, 364.25048832000004, 225.32861330000003], [264.554687488, 121.83886720000001, 291.4042969088, 210.2576294], [244.79760742400003, 145.69561769999999, 260.68554690559995, 205.6032715], [233.13684080639996, 156.62768555, 244.79760742400003, 204.5829468]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049043.jpg", "text": "I am interested in the region of the image ; please describe it. Please point out the objects and their coordinates.", "boxes_value": [[63.120971648, 294.0817260544, 193.677856448, 381.248901376]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049043_crop.jpg", "text": "I am interested in the region of the image ; please describe it. Please point out the objects and their coordinates.", "boxes_value": [[33.120971648, 22.081726054400008, 163.677856448, 109.24890137599999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049043.jpg", "text": "I am interested in the region of the image ; please describe it. Please point out the objects and their coordinates. For your reference, objects involved in this region include a bench, a pillow, a wine glass, a bowl, and a plate.", "boxes_value": [[63.120971648, 294.0817260544, 193.677856448, 381.248901376], [0, 291.2230224384, 223.162841792, 512.2442627072], [63.120971648, 294.0817260544, 170.759704576, 381.248901376], [174.45727539199999, 313.1923828224, 193.677856448, 360.2398071296], [140.606079104, 333.2736205824, 177.612915008, 352.4942016512], [128.557312, 347.0435790848, 191.09600832, 361.100463872]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049043_crop.jpg", "text": "I am interested in the region of the image ; please describe it. Please point out the objects and their coordinates. For your reference, objects involved in this region include a bench, a pillow, a wine glass, a bowl, and a plate.", "boxes_value": [[33.120971648, 22.081726054400008, 163.677856448, 109.24890137599999], [0, 19.223022438399994, 193.162841792, 131], [33.120971648, 22.081726054400008, 140.759704576, 109.24890137599999], [144.45727539199999, 41.19238282240002, 163.677856448, 88.23980712960002], [110.606079104, 61.273620582399985, 147.612915008, 80.4942016512], [98.557312, 75.0435790848, 161.09600832, 89.10046387199998]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049045.jpg", "text": "Could you describe the content of the bbox in the image ? Include the coordinates for each mentioned object.", "boxes_value": [[333.15045162819996, 204.0991211008, 682.445312478, 511.1160278528]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049045_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Include the coordinates for each mentioned object.", "boxes_value": [[88.15045162819996, 77.0991211008, 437.445312478, 384.1160278528]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049045.jpg", "text": "Could you describe the content of the bbox in the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a storage box, two chairs, a cabinet, and two flags.", "boxes_value": [[333.15045162819996, 204.0991211008, 682.445312478, 511.1160278528], [333.15045162819996, 287.0584106496, 399.56018069159995, 369.3326415872], [598.1906738502, 369.0191650304, 681.3558349315, 511.1160278528], [568.4494628918, 342.031799296, 664.8330077841, 446.1260376064], [398.0716552726, 204.0991211008, 512.0847168003, 299.4376220672], [633.7458496374, 334.083251968, 681.8557128646, 426.348632832], [563.3656006116, 259.2693481472, 682.445312478, 372.1744995328]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00049045_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a storage box, two chairs, a cabinet, and two flags.", "boxes_value": [[88.15045162819996, 77.0991211008, 437.445312478, 384.1160278528], [88.15045162819996, 160.05841064959998, 154.56018069159995, 242.3326415872], [353.1906738502, 242.01916503040002, 436.3558349315, 384.1160278528], [323.44946289179995, 215.03179929599997, 419.8330077841, 319.1260376064], [153.07165527260003, 77.0991211008, 267.08471680030004, 172.43762206719998], [388.74584963740006, 207.083251968, 436.85571286460004, 299.348632832], [318.36560061160003, 132.2693481472, 437.445312478, 245.17449953279998]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00049047.jpg", "text": "Describe the image content present in the specified rectangular area of . Specify the location of each mentioned object.", "boxes_value": [[123.32745362499999, 153.30822755, 230.28161618750002, 366.11468505]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049047_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Specify the location of each mentioned object.", "boxes_value": [[27.32745362499999, 53.30822755, 134.28161618750002, 266.11468505]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049047.jpg", "text": "Describe the image content present in the specified rectangular area of . Specify the location of each mentioned object. For your reference, objects involved in this region include a stool, a chair, a pillow, a mirror, and a picture.", "boxes_value": [[123.32745362499999, 153.30822755, 230.28161618750002, 366.11468505], [123.32745362499999, 304.8796997, 157.006713875, 366.11468505], [144.75970456250002, 243.64471435000002, 215.94537356249998, 323.25024414999996], [152.909973125, 272.04187010000004, 193.587463375, 295.00500489999996], [168.10546875, 153.30822755, 230.28161618750002, 255.7720337], [199.528442375, 178.25030519999999, 213.037109375, 215.2214966]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049047_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Specify the location of each mentioned object. For your reference, objects involved in this region include a stool, a chair, a pillow, a mirror, and a picture.", "boxes_value": [[27.32745362499999, 53.30822755, 134.28161618750002, 266.11468505], [27.32745362499999, 204.8796997, 61.006713875, 266.11468505], [48.75970456250002, 143.64471435000002, 119.94537356249998, 223.25024414999996], [56.90997312499999, 172.04187010000004, 97.587463375, 195.00500489999996], [72.10546875, 53.30822755, 134.28161618750002, 155.7720337], [103.528442375, 78.25030519999999, 117.037109375, 115.2214966]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049048.jpg", "text": "Please enlighten me about the area in the photograph . Remember to mention the objects and their corresponding locations.", "boxes_value": [[388.3234863427, 320.6741332992, 682.6379394461, 511.934814464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049048_crop.jpg", "text": "Please enlighten me about the area in the photograph . Remember to mention the objects and their corresponding locations.", "boxes_value": [[74.3234863427, 48.67413329919998, 368.63793944609995, 239.934814464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049048.jpg", "text": "Please enlighten me about the area in the photograph . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four cars, a van, and a street lights.", "boxes_value": [[388.3234863427, 320.6741332992, 682.6379394461, 511.934814464], [659.8869628831, 427.6225586176, 682.6379394461, 511.934814464], [388.3234863427, 398.618408192, 445.4370116861, 429.5293579264], [450.8471679958, 390.6201782272, 499.97717286339997, 421.6773681664], [540.3916015315, 320.6741332992, 574.4317627009, 387.3361206272], [511.5943603653, 390.4592895488, 538.215576167, 412.1943359488], [498.1153564581, 393.8290405376, 520.5242920168, 415.5640869376]], "boxes_seq": [[0], [0], [1, 2, 5, 6], [3], [4]]}, {"image_path": "objects365_v1_00049048_crop.jpg", "text": "Please enlighten me about the area in the photograph . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four cars, a van, and a street lights.", "boxes_value": [[74.3234863427, 48.67413329919998, 368.63793944609995, 239.934814464], [345.88696288309995, 155.62255861760002, 368.63793944609995, 239.934814464], [74.3234863427, 126.618408192, 131.4370116861, 157.5293579264], [136.84716799580002, 118.62017822719997, 185.97717286339997, 149.67736816640002], [226.3916015315, 48.67413329919998, 260.4317627009, 115.33612062719999], [197.59436036530002, 118.45928954879997, 224.215576167, 140.19433594880002], [184.1153564581, 121.82904053760001, 206.52429201680002, 143.5640869376]], "boxes_seq": [[0], [0], [1, 2, 5, 6], [3], [4]]}, {"image_path": "objects365_v1_00049049.jpg", "text": "Please provide details for the area within the bounding box in . Include the coordinates for each object you identify.", "boxes_value": [[373.5349121004, 147.702392576, 684.0148925804999, 512.0140380672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049049_crop.jpg", "text": "Please provide details for the area within the bounding box in . Include the coordinates for each object you identify.", "boxes_value": [[78.53491210039999, 91.702392576, 388, 456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049049.jpg", "text": "Please provide details for the area within the bounding box in . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, a pickup truck, a van, a bus, and a car.", "boxes_value": [[373.5349121004, 147.702392576, 684.0148925804999, 512.0140380672], [642.149780245, 224.3394165248, 683.0975341758001, 512.0140380672], [10.8648681927, 229.9298095616, 660.4132080312, 483.292785664], [373.5349121004, 147.702392576, 684.0148925804999, 350.6719970816], [272.2965088175, 145.598571776, 455.4033202978, 247.9743652352], [454.0288086152, 171.5192870912, 518.3338623189001, 208.3875122176]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049049_crop.jpg", "text": "Please provide details for the area within the bounding box in . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, a pickup truck, a van, a bus, and a car.", "boxes_value": [[78.53491210039999, 91.702392576, 388, 456], [347.149780245, 168.3394165248, 388, 456], [0, 173.9298095616, 365.41320803120004, 427.292785664], [78.53491210039999, 91.702392576, 388, 294.6719970816], [0, 89.598571776, 160.4033202978, 191.9743652352], [159.02880861519998, 115.5192870912, 223.33386231890006, 152.3875122176]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049050.jpg", "text": "Can you provide a description of the area in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[230.541076689, 48.6343994368, 627.248168932, 418.6868285952]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049050_crop.jpg", "text": "Can you provide a description of the area in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[99.541076689, 48.6343994368, 496.248168932, 418.6868285952]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049050.jpg", "text": "Can you provide a description of the area in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three cabinets, a chair, and two pens.", "boxes_value": [[230.541076689, 48.6343994368, 627.248168932, 418.6868285952], [428.99401853, 308.561523456, 607.945434558, 509.7437744128], [400.524536104, 48.6343994368, 627.248168932, 372.8381958144], [352.332031282, 102.303283712, 424.62072753099994, 319.1693725696], [230.541076689, 238.118408192, 365.475463892, 361.885376], [307.402832044, 405.932922368, 367.42114255, 418.6868285952], [408.184570332, 364.5260009984, 448.814941421, 376.1009521664]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00049050_crop.jpg", "text": "Can you provide a description of the area in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three cabinets, a chair, and two pens.", "boxes_value": [[99.541076689, 48.6343994368, 496.248168932, 418.6868285952], [297.99401853, 308.561523456, 476.945434558, 509.7437744128], [269.524536104, 48.6343994368, 496.248168932, 372.8381958144], [221.332031282, 102.303283712, 293.62072753099994, 319.1693725696], [99.541076689, 238.118408192, 234.475463892, 361.885376], [176.40283204399998, 405.932922368, 236.42114255, 418.6868285952], [277.184570332, 364.5260009984, 317.814941421, 376.1009521664]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00049051.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[113.2817993216, 283.10021970959997, 227.7934570496, 374.99169921419997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049051_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[29.281799321600005, 23.10021970959997, 143.7934570496, 114.99169921419997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049051.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a chair, three people, and a handbag.", "boxes_value": [[113.2817993216, 283.10021970959997, 227.7934570496, 374.99169921419997], [113.2817993216, 295.948974612, 176.3563232256, 374.99169921419997], [29.6312866304, 336.1219482216, 270.7840576, 848.9078369334], [155.6596069376, 287.24780275439997, 196.306945792, 336.1905517266], [184.2786865152, 283.10021970959997, 223.2669677568, 354.85522461420004], [205.3774413824, 286.5516357408, 227.7934570496, 326.942749044]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049051_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a chair, three people, and a handbag.", "boxes_value": [[29.281799321600005, 23.10021970959997, 143.7934570496, 114.99169921419997], [29.281799321600005, 35.94897461199997, 92.3563232256, 114.99169921419997], [0, 76.12194822160001, 172, 137], [71.65960693759999, 27.24780275439997, 112.306945792, 76.1905517266], [100.27868651520001, 23.10021970959997, 139.2669677568, 94.85522461420004], [121.37744138240001, 26.55163574080001, 143.7934570496, 66.94274904399998]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049052.jpg", "text": "Explain the content within the rectangular region of the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[315.27441408, 480.767089856, 415.3364868096, 590.565307648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049052_crop.jpg", "text": "Explain the content within the rectangular region of the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[25.274414079999985, 27.767089855999984, 125.33648680959999, 137.56530764800004]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049052.jpg", "text": "Explain the content within the rectangular region of the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, two leather shoes, and a high heels.", "boxes_value": [[315.27441408, 480.767089856, 415.3364868096, 590.565307648], [211.9754028544, 113.95153811200001, 372.6160278528, 590.930542016], [341.3117065216, 155.965270976, 438.519836416, 540.678955072], [345.7877197312, 520.2727050880001, 407.0416259584, 542.149047872], [315.27441408, 549.945800768, 344.497070336, 590.565307648], [372.1544799744, 480.767089856, 415.3364868096, 523.545532224]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00049052_crop.jpg", "text": "Explain the content within the rectangular region of the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, two leather shoes, and a high heels.", "boxes_value": [[25.274414079999985, 27.767089855999984, 125.33648680959999, 137.56530764800004], [0, 0, 82.61602785280002, 137.930542016], [51.3117065216, 0, 148.51983641599998, 87.67895507200001], [55.78771973120001, 67.27270508800007, 117.0416259584, 89.14904787199998], [25.274414079999985, 96.94580076800003, 54.49707033599998, 137.56530764800004], [82.15447997439998, 27.767089855999984, 125.33648680959999, 70.545532224]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00049057.jpg", "text": "Please explain what is contained in the portion of defined by the box . Provide the coordinates for each element you describe.", "boxes_value": [[336.549804672, 178.5288086016, 695.3986816512, 349.8214721536]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049057_crop.jpg", "text": "Please explain what is contained in the portion of defined by the box . Provide the coordinates for each element you describe.", "boxes_value": [[90.549804672, 43.52880860159999, 449.39868165120004, 214.82147215359998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049057.jpg", "text": "Please explain what is contained in the portion of defined by the box . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, two backpacks, and a hat.", "boxes_value": [[336.549804672, 178.5288086016, 695.3986816512, 349.8214721536], [336.549804672, 204.0938720768, 414.97375488, 349.8214721536], [625.8865966848, 191.6173095936, 695.3986816512, 285.4884033024], [368.78967283199995, 191.067810048, 408.9218750208, 285.9008788992], [373.4625244416, 203.4373169152, 389.6804199168, 222.1290893312], [636.5096435712, 178.5288086016, 678.8367919872001, 227.7295532032]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00049057_crop.jpg", "text": "Please explain what is contained in the portion of defined by the box . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, two backpacks, and a hat.", "boxes_value": [[90.549804672, 43.52880860159999, 449.39868165120004, 214.82147215359998], [90.549804672, 69.09387207680001, 168.97375488, 214.82147215359998], [379.8865966848, 56.6173095936, 449.39868165120004, 150.4884033024], [122.78967283199995, 56.06781004800001, 162.92187502079997, 150.9008788992], [127.46252444160001, 68.4373169152, 143.68041991680002, 87.12908933119999], [390.50964357119994, 43.52880860159999, 432.83679198720006, 92.7295532032]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00049059.jpg", "text": "What details can you provide about the region in the snapshot ? Include the coordinates for each mentioned object.", "boxes_value": [[303.2738036736, 293.6711425536, 535.060424832, 354.2887572992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049059_crop.jpg", "text": "What details can you provide about the region in the snapshot ? Include the coordinates for each mentioned object.", "boxes_value": [[58.273803673600014, 15.67114255360002, 290.06042483199997, 76.28875729919997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049059.jpg", "text": "What details can you provide about the region in the snapshot ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, three umbrellas, and a chair.", "boxes_value": [[303.2738036736, 293.6711425536, 535.060424832, 354.2887572992], [347.102783232, 322.788574208, 361.9895019264, 344.7723999232], [306.051757824, 299.9693603328, 366.98901365759997, 352.3958129664], [398.97558597119996, 295.7974853632, 473.39697262079994, 354.0941772288], [474.9916992, 293.6711425536, 535.060424832, 353.3854370304], [303.2738036736, 328.790466304, 332.12707522560004, 354.2887572992]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049059_crop.jpg", "text": "What details can you provide about the region in the snapshot ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, three umbrellas, and a chair.", "boxes_value": [[58.273803673600014, 15.67114255360002, 290.06042483199997, 76.28875729919997], [102.10278323199998, 44.788574208, 116.98950192640001, 66.7723999232], [61.05175782399999, 21.969360332800022, 121.98901365759997, 74.3958129664], [153.97558597119996, 17.797485363199996, 228.39697262079994, 76.09417722879999], [229.99169920000003, 15.67114255360002, 290.06042483199997, 75.3854370304], [58.273803673600014, 50.790466304000006, 87.12707522560004, 76.28875729919997]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049060.jpg", "text": "Please elucidate the area of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[286.2692870966, 272.0358886912, 454.037475612, 344.5896606208]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049060_crop.jpg", "text": "Please elucidate the area of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[42.26928709660001, 19.0358886912, 210.03747561199998, 91.58966062079998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049060.jpg", "text": "Please elucidate the area of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a person, and four sneakers.", "boxes_value": [[286.2692870966, 272.0358886912, 454.037475612, 344.5896606208], [347.3867187401, 62.9650878976, 454.2962646535, 345.13592529920004], [286.2692870966, 295.9730834944, 331.5287475611, 319.105712896], [352.44873048430003, 272.0358886912, 370.15014646139997, 315.484985344], [348.93041990430004, 324.6114502144, 390.8413086073, 344.5896606208], [425.7238769225, 286.0468749824, 454.037475612, 332.332031232]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049060_crop.jpg", "text": "Please elucidate the area of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a person, and four sneakers.", "boxes_value": [[42.26928709660001, 19.0358886912, 210.03747561199998, 91.58966062079998], [103.38671874009998, 0, 210.29626465349997, 92.13592529920004], [42.26928709660001, 42.97308349439999, 87.52874756109998, 66.105712896], [108.44873048430003, 19.0358886912, 126.15014646139997, 62.484985343999995], [104.93041990430004, 71.61145021440001, 146.8413086073, 91.58966062079998], [181.72387692249998, 33.04687498240003, 210.03747561199998, 79.33203123200002]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049061.jpg", "text": "Help me grasp the context of the region within image . Provide the coordinates for all objects that you mention.", "boxes_value": [[104.9139404544, 303.999694848, 366.3181152, 369.8659057664]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049061_crop.jpg", "text": "Help me grasp the context of the region within image . Provide the coordinates for all objects that you mention.", "boxes_value": [[65.9139404544, 16.99969484799999, 327.3181152, 82.86590576639998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049061.jpg", "text": "Help me grasp the context of the region within image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include five street lights.", "boxes_value": [[104.9139404544, 303.999694848, 366.3181152, 369.8659057664], [104.9139404544, 303.999694848, 137.846984832, 369.8659057664], [152.720031744, 255.1312256, 167.5930176, 361.367004416], [160.1565551616, 281.1589965824, 202.6508788992, 369.3347167744], [248.3322753792, 311.967346176, 271.7041015296, 356.0552368128], [349.9752197376, 313.2877807616, 366.3181152, 351.4212036096]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049061_crop.jpg", "text": "Help me grasp the context of the region within image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include five street lights.", "boxes_value": [[65.9139404544, 16.99969484799999, 327.3181152, 82.86590576639998], [65.9139404544, 16.99969484799999, 98.846984832, 82.86590576639998], [113.72003174400001, 0, 128.5930176, 74.36700441599999], [121.15655516160001, 0, 163.6508788992, 82.33471677440002], [209.3322753792, 24.967346175999978, 232.7041015296, 69.05523681279999], [310.9752197376, 26.287780761600004, 327.3181152, 64.42120360960001]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049062.jpg", "text": "Kindly describe what I should be seeing in the area of image . Specify the location of each mentioned object.", "boxes_value": [[352.16467284040004, 135.537902848, 496.70446775740004, 271.3184204288]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049062_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Specify the location of each mentioned object.", "boxes_value": [[36.164672840400044, 34.53790284799999, 180.70446775740004, 170.31842042879998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049062.jpg", "text": "Kindly describe what I should be seeing in the area of image . Specify the location of each mentioned object. For your reference, objects involved in this region include five people.", "boxes_value": [[352.16467284040004, 135.537902848, 496.70446775740004, 271.3184204288], [352.16467284040004, 150.8292846592, 373.4526367082, 180.2127075328], [387.244873057, 135.537902848, 404.3352050514, 182.6113281024], [389.793457039, 196.7890625024, 428.0860595648, 271.3184204288], [418.06323241760003, 215.5499267584, 449.15991213099994, 265.664489728], [457.89782711540005, 212.9799194112, 496.70446775740004, 266.9494628864]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049062_crop.jpg", "text": "Kindly describe what I should be seeing in the area of image . Specify the location of each mentioned object. For your reference, objects involved in this region include five people.", "boxes_value": [[36.164672840400044, 34.53790284799999, 180.70446775740004, 170.31842042879998], [36.164672840400044, 49.8292846592, 57.452636708199975, 79.21270753280001], [71.244873057, 34.53790284799999, 88.33520505140001, 81.6113281024], [73.79345703899997, 95.78906250239999, 112.0860595648, 170.31842042879998], [102.06323241760003, 114.5499267584, 133.15991213099994, 164.66448972799998], [141.89782711540005, 111.9799194112, 180.70446775740004, 165.9494628864]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049064.jpg", "text": "Detail the chosen region in the depicted scene . Include the coordinates for each mentioned object.", "boxes_value": [[0.47729495040000003, 116.3714599424, 269.9783935488, 323.2693481472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049064_crop.jpg", "text": "Detail the chosen region in the depicted scene . Include the coordinates for each mentioned object.", "boxes_value": [[0.47729495040000003, 52.371459942399994, 269.9783935488, 259.2693481472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049064.jpg", "text": "Detail the chosen region in the depicted scene . Include the coordinates for each mentioned object. For your reference, objects involved in this region include three people, a cup, and a bottle.", "boxes_value": [[0.47729495040000003, 116.3714599424, 269.9783935488, 323.2693481472], [0.47729495040000003, 163.0839233536, 44.8529663232, 259.4116821504], [103.2990112512, 207.4596557824, 275.39013672960004, 452.0671386624], [194.21502689279998, 192.306945792, 269.9783935488, 323.2693481472], [243.63781739520002, 242.4583130112, 259.0307617536, 279.9007568384], [71.499572736, 116.3714599424, 109.94427486720001, 156.8665161216]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049064_crop.jpg", "text": "Detail the chosen region in the depicted scene . Include the coordinates for each mentioned object. For your reference, objects involved in this region include three people, a cup, and a bottle.", "boxes_value": [[0.47729495040000003, 52.371459942399994, 269.9783935488, 259.2693481472], [0.47729495040000003, 99.0839233536, 44.8529663232, 195.4116821504], [103.2990112512, 143.4596557824, 275.39013672960004, 310], [194.21502689279998, 128.306945792, 269.9783935488, 259.2693481472], [243.63781739520002, 178.4583130112, 259.0307617536, 215.9007568384], [71.499572736, 52.371459942399994, 109.94427486720001, 92.8665161216]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049065.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[221.03637695370003, 337.4204711936, 546.5129394186, 447.5073242112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049065_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[82.03637695370003, 28.420471193600008, 407.5129394186, 138.5073242112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049065.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two flowers, a desk, two vases, and a bakset.", "boxes_value": [[221.03637695370003, 337.4204711936, 546.5129394186, 447.5073242112], [331.8741455337, 399.7228393472, 380.2730712858, 437.6002807808], [434.6843261607, 337.4204711936, 546.5129394186, 444.73986816], [221.03637695370003, 360.5650024448, 247.24652101380002, 373.6700439552], [220.3466186691, 372.2905883648, 250.69525145280002, 392.982788096], [382.442138694, 358.1296996864, 401.0834960748, 384.6578369024], [343.2177734139, 435.3337402368, 374.40966800160004, 447.5073242112]], "boxes_seq": [[0], [0], [1, 3], [2], [4, 5], [6]]}, {"image_path": "objects365_v1_00049065_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two flowers, a desk, two vases, and a bakset.", "boxes_value": [[82.03637695370003, 28.420471193600008, 407.5129394186, 138.5073242112], [192.87414553370002, 90.72283934720002, 241.27307128579997, 128.6002807808], [295.6843261607, 28.420471193600008, 407.5129394186, 135.73986816000001], [82.03637695370003, 51.56500244479997, 108.24652101380002, 64.67004395520001], [81.3466186691, 63.29058836479999, 111.69525145280002, 83.98278809599998], [243.442138694, 49.12969968639999, 262.0834960748, 75.65783690239999], [204.21777341389998, 126.3337402368, 235.40966800160004, 138.5073242112]], "boxes_seq": [[0], [0], [1, 3], [2], [4, 5], [6]]}, {"image_path": "objects365_v1_00049066.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Please point out the objects and their coordinates.", "boxes_value": [[70.6722820608, 187.8456014848, 299.6387939328, 431.152954112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049066_crop.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Please point out the objects and their coordinates.", "boxes_value": [[57.6722820608, 60.8456014848, 286.6387939328, 304.152954112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049066.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two benches, two people, a hat, and a glasses.", "boxes_value": [[70.6722820608, 187.8456014848, 299.6387939328, 431.152954112], [151.91827392, 272.3081665024, 516.9068603136, 510.7916870144], [85.822631808, 264.085144064, 299.6387939328, 431.152954112], [200.743652352, 214.490966784, 256.8880614912, 274.1010742272], [0, 186.0722045952, 153.6101074176, 358.6642456064], [70.6722820608, 187.8456014848, 146.33249840640002, 236.7004840448], [218.6359554048, 216.086307072, 244.427605248, 232.9500781056]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00049066_crop.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two benches, two people, a hat, and a glasses.", "boxes_value": [[57.6722820608, 60.8456014848, 286.6387939328, 304.152954112], [138.91827392, 145.3081665024, 343, 364], [72.822631808, 137.08514406400002, 286.6387939328, 304.152954112], [187.743652352, 87.490966784, 243.8880614912, 147.1010742272], [0, 59.07220459519999, 140.6101074176, 231.66424560640002], [57.6722820608, 60.8456014848, 133.33249840640002, 109.70048404479999], [205.6359554048, 89.08630707200001, 231.427605248, 105.9500781056]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00049070.jpg", "text": "Describe the selected rectangular area in the photo . Give coordinates for the items you reference.", "boxes_value": [[230.847694336, 104.2501220352, 512.0040283136, 767.2554931968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049070_crop.jpg", "text": "Describe the selected rectangular area in the photo . Give coordinates for the items you reference.", "boxes_value": [[70.84769433599999, 104.2501220352, 352, 767.2554931968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049070.jpg", "text": "Describe the selected rectangular area in the photo . Give coordinates for the items you reference. For your reference, objects involved in this region include two cabinets, two high heels, and a handbag.", "boxes_value": [[230.847694336, 104.2501220352, 512.0040283136, 767.2554931968], [413.7766723584, 305.549072256, 510.9061279232, 767.2554931968], [374.9554443264, 104.2501220352, 510.442993152, 457.7739257856], [246.843765504, 559.2125246976, 301.4888727552, 591.7169419008], [475.9868412416, 172.8660331008, 512.0040283136, 230.0000610048], [230.847694336, 408.12917191680003, 251.1007526912, 436.25841960959997]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00049070_crop.jpg", "text": "Describe the selected rectangular area in the photo . Give coordinates for the items you reference. For your reference, objects involved in this region include two cabinets, two high heels, and a handbag.", "boxes_value": [[70.84769433599999, 104.2501220352, 352, 767.2554931968], [253.7766723584, 305.549072256, 350.9061279232, 767.2554931968], [214.95544432640003, 104.2501220352, 350.442993152, 457.7739257856], [86.843765504, 559.2125246976, 141.48887275520002, 591.7169419008], [315.9868412416, 172.8660331008, 352, 230.0000610048], [70.84769433599999, 408.12917191680003, 91.1007526912, 436.25841960959997]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00049071.jpg", "text": "What's inside the area of the provided graphic ? Include the coordinates for each object you identify.", "boxes_value": [[0, 74.2694091776, 329.88342282689996, 467.2103271424]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049071_crop.jpg", "text": "What's inside the area of the provided graphic ? Include the coordinates for each object you identify.", "boxes_value": [[0, 74.2694091776, 329.88342282689996, 467.2103271424]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049071.jpg", "text": "What's inside the area of the provided graphic ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, an air conditioner, and three people.", "boxes_value": [[0, 74.2694091776, 329.88342282689996, 467.2103271424], [0, 336.7502441472, 121.5542602755, 467.2103271424], [69.3439941177, 95.8235473408, 114.1399536389, 121.0484619264], [229.7383423122, 74.2694091776, 329.88342282689996, 384.6528320512], [112.3497314792, 40.445617664, 341.1580810478, 451.6373291008], [51.334167509900006, 148.5491943424, 128.2668456772, 466.2279663104]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00049071_crop.jpg", "text": "What's inside the area of the provided graphic ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, an air conditioner, and three people.", "boxes_value": [[0, 74.2694091776, 329.88342282689996, 467.2103271424], [0, 336.7502441472, 121.5542602755, 467.2103271424], [69.3439941177, 95.8235473408, 114.1399536389, 121.0484619264], [229.7383423122, 74.2694091776, 329.88342282689996, 384.6528320512], [112.3497314792, 40.445617664, 341.1580810478, 451.6373291008], [51.334167509900006, 148.5491943424, 128.2668456772, 466.2279663104]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00049072.jpg", "text": "Could you please share some information on the region in this photograph ? Specify the location of each mentioned object.", "boxes_value": [[121.13623043799998, 176.8389282304, 680.5961914118001, 512.1539306496]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049072_crop.jpg", "text": "Could you please share some information on the region in this photograph ? Specify the location of each mentioned object.", "boxes_value": [[121.13623043799998, 83.8389282304, 680.5961914118001, 419]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049072.jpg", "text": "Could you please share some information on the region in this photograph ? Specify the location of each mentioned object. For your reference, objects involved in this region include five people, and two hats.", "boxes_value": [[121.13623043799998, 176.8389282304, 680.5961914118001, 512.1539306496], [0.0082397088, 100.487304704, 239.5160522358, 511.987609856], [85.51141358720001, 159.8770752, 223.13494874420002, 403.4624023552], [239.14758302599998, 309.8494873088, 442.0982666206, 512.1539306496], [553.8675536889999, 416.0452880896, 580.2537841815999, 446.411499008], [574.7016601473999, 346.0758666752, 680.5961914118001, 461.05615232], [121.13623043799998, 176.8389282304, 184.7372436824, 221.5315551744], [361.4898681314, 309.4825439232, 414.0767822388, 358.688842752]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6, 7]]}, {"image_path": "objects365_v1_00049072_crop.jpg", "text": "Could you please share some information on the region in this photograph ? Specify the location of each mentioned object. For your reference, objects involved in this region include five people, and two hats.", "boxes_value": [[121.13623043799998, 83.8389282304, 680.5961914118001, 419], [0.0082397088, 7.487304703999996, 239.5160522358, 418.987609856], [85.51141358720001, 66.87707520000001, 223.13494874420002, 310.4624023552], [239.14758302599998, 216.8494873088, 442.0982666206, 419], [553.8675536889999, 323.0452880896, 580.2537841815999, 353.411499008], [574.7016601473999, 253.07586667520002, 680.5961914118001, 368.05615232], [121.13623043799998, 83.8389282304, 184.7372436824, 128.5315551744], [361.4898681314, 216.4825439232, 414.0767822388, 265.688842752]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6, 7]]}, {"image_path": "objects365_v1_00049074.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Include the coordinates for each mentioned object.", "boxes_value": [[12.034790063600001, 424.8471069184, 771.982543922, 510.4341430784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049074_crop.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Include the coordinates for each mentioned object.", "boxes_value": [[12.034790063600001, 21.847106918400016, 771.982543922, 107.43414307839998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049074.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three desks, and three chairs.", "boxes_value": [[12.034790063600001, 424.8471069184, 771.982543922, 510.4341430784], [518.1226806364, 479.5025024512, 771.982543922, 509.9656982528], [376.8677978472, 435.001464832, 492.9179687704, 510.4341430784], [159.99884029359998, 424.8471069184, 284.75280764120004, 509.7088623104], [1.8803711308, 421.9458618368, 204.9683227164, 501.0050659328], [12.034790063600001, 471.9924926976, 141.8659668004, 510.4341430784], [706.9440918308, 366.8704834048, 772.4385985988, 466.5058593792]], "boxes_seq": [[0], [0], [1, 4, 6], [2, 3, 5]]}, {"image_path": "objects365_v1_00049074_crop.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include three desks, and three chairs.", "boxes_value": [[12.034790063600001, 21.847106918400016, 771.982543922, 107.43414307839998], [518.1226806364, 76.50250245119997, 771.982543922, 106.9656982528], [376.8677978472, 32.00146483200001, 492.9179687704, 107.43414307839998], [159.99884029359998, 21.847106918400016, 284.75280764120004, 106.70886231039998], [1.8803711308, 18.94586183680002, 204.9683227164, 98.0050659328], [12.034790063600001, 68.99249269760003, 141.8659668004, 107.43414307839998], [706.9440918308, 0, 772, 63.50585937919999]], "boxes_seq": [[0], [0], [1, 4, 6], [2, 3, 5]]}, {"image_path": "objects365_v1_00049075.jpg", "text": "Analyze and describe the region in the included photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[307.05322262000004, 273.8169555456, 508.81176758600003, 424.9742431744]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049075_crop.jpg", "text": "Analyze and describe the region in the included photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[51.05322262000004, 37.816955545600024, 252.81176758600003, 188.9742431744]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049075.jpg", "text": "Analyze and describe the region in the included photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a desk, two tea pots, a plate, and a pot.", "boxes_value": [[307.05322262000004, 273.8169555456, 508.81176758600003, 424.9742431744], [307.05322262000004, 324.2811889664, 425.8194579936, 424.9742431744], [322.6534424192, 273.8169555456, 354.9016113132, 332.1706543104], [348.7590332004, 321.9047241216, 391.75659177719996, 332.910034176], [371.6290283124, 291.5949096448, 406.4279785412, 331.2122192384], [470.242675764, 353.398803712, 508.81176758600003, 391.5060424704]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5]]}, {"image_path": "objects365_v1_00049075_crop.jpg", "text": "Analyze and describe the region in the included photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a desk, two tea pots, a plate, and a pot.", "boxes_value": [[51.05322262000004, 37.816955545600024, 252.81176758600003, 188.9742431744], [51.05322262000004, 88.28118896640001, 169.8194579936, 188.9742431744], [66.65344241920002, 37.816955545600024, 98.90161131320002, 96.17065431039998], [92.75903320039998, 85.9047241216, 135.75659177719996, 96.91003417600001], [115.62902831240001, 55.5949096448, 150.4279785412, 95.2122192384], [214.242675764, 117.39880371200002, 252.81176758600003, 155.50604247040002]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5]]}, {"image_path": "objects365_v1_00049076.jpg", "text": "What does the area within the given visual contain? Provide the coordinates for each element you describe.", "boxes_value": [[70.3262939648, 499.9447021555, 512.4133300736, 682.5318603256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049076_crop.jpg", "text": "What does the area within the given visual contain? Provide the coordinates for each element you describe.", "boxes_value": [[70.3262939648, 45.94470215550001, 512, 228.53186032559995]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049076.jpg", "text": "What does the area within the given visual contain? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two cabinets, a tea pot, a gas stove, an oven, and two cups.", "boxes_value": [[70.3262939648, 499.9447021555, 512.4133300736, 682.5318603256], [0, 583.0418701087, 195.0139160064, 680.3533935776], [480.5824585216, 534.8408203028, 512.4133300736, 682.1723632756], [209.2003173888, 436.169555641, 305.1452636672, 551.3034668176], [151.068969728, 499.9447021555, 495.417907712, 629.5605468699], [213.146240256, 588.1994629219, 491.0640869376, 682.5318603256], [67.5845337088, 461.1776123203, 125.1613769728, 530.1131591878], [70.3262939648, 527.3714599731, 123.9863281152, 582.9898681962]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00049076_crop.jpg", "text": "What does the area within the given visual contain? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two cabinets, a tea pot, a gas stove, an oven, and two cups.", "boxes_value": [[70.3262939648, 45.94470215550001, 512, 228.53186032559995], [0, 129.04187010869998, 195.0139160064, 226.35339357759995], [480.5824585216, 80.84082030280001, 512, 228.1723632756], [209.2003173888, 0, 305.1452636672, 97.3034668176], [151.068969728, 45.94470215550001, 495.417907712, 175.5605468699], [213.146240256, 134.19946292190002, 491.0640869376, 228.53186032559995], [67.5845337088, 7.1776123203, 125.1613769728, 76.11315918779997], [70.3262939648, 73.37145997309995, 123.9863281152, 128.98986819619995]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00049077.jpg", "text": "I need details about the area located within image . Please point out the objects and their coordinates.", "boxes_value": [[125.5527954132, 137.507446272, 190.31072997479998, 330.8842773504]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049077_crop.jpg", "text": "I need details about the area located within image . Please point out the objects and their coordinates.", "boxes_value": [[16.552795413200002, 48.50744627200001, 81.31072997479998, 241.8842773504]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049077.jpg", "text": "I need details about the area located within image . Please point out the objects and their coordinates. For your reference, objects involved in this region include two cabinets, a flower, a vase, and a cup.", "boxes_value": [[125.5527954132, 137.507446272, 190.31072997479998, 330.8842773504], [125.5527954132, 137.507446272, 190.31072997479998, 246.3139038208], [129.1687011762, 243.3554687488, 206.0892944358, 332.1099853312], [149.22064207559998, 288.718872064, 182.09271240660001, 311.7293090816], [159.3738403152, 305.2153320448, 176.55548094780002, 330.8842773504], [160.4869384788, 304.7243652096, 175.07531736299998, 332.1096191488]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049077_crop.jpg", "text": "I need details about the area located within image . Please point out the objects and their coordinates. For your reference, objects involved in this region include two cabinets, a flower, a vase, and a cup.", "boxes_value": [[16.552795413200002, 48.50744627200001, 81.31072997479998, 241.8842773504], [16.552795413200002, 48.50744627200001, 81.31072997479998, 157.3139038208], [20.16870117619999, 154.3554687488, 97, 243.1099853312], [40.22064207559998, 199.71887206399998, 73.09271240660001, 222.72930908159998], [50.3738403152, 216.2153320448, 67.55548094780002, 241.8842773504], [51.4869384788, 215.72436520960002, 66.07531736299998, 243.1096191488]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049078.jpg", "text": "Could you give me a description of the rectangular region found in ? Include the coordinates for each object you identify.", "boxes_value": [[421.28503417810003, 78.7228393472, 510.7552490589, 420.1620483584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049078_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Include the coordinates for each object you identify.", "boxes_value": [[23.285034178100034, 78.7228393472, 112.7552490589, 420.1620483584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049078.jpg", "text": "Could you give me a description of the rectangular region found in ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a guitar, a drum, a person, a bracelet, and a glasses.", "boxes_value": [[421.28503417810003, 78.7228393472, 510.7552490589, 420.1620483584], [320.6624756198, 252.3375854592, 672.4374999817, 395.0706176512], [467.0769042638, 363.2242431488, 510.7552490589, 420.1620483584], [358.5555419808, 50.5809936384, 577.8731689482, 511.9373169152], [439.302368202, 135.8922119168, 460.2198486132, 151.8847046144], [421.28503417810003, 78.7228393472, 480.56274414390003, 97.4421386752]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049078_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a guitar, a drum, a person, a bracelet, and a glasses.", "boxes_value": [[23.285034178100034, 78.7228393472, 112.7552490589, 420.1620483584], [0, 252.3375854592, 135, 395.0706176512], [69.07690426379997, 363.2242431488, 112.7552490589, 420.1620483584], [0, 50.5809936384, 135, 505], [41.302368202000025, 135.8922119168, 62.219848613199986, 151.8847046144], [23.285034178100034, 78.7228393472, 82.56274414390003, 97.4421386752]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049081.jpg", "text": "Can you generate a description of the contents within the selected region in ? Include the coordinates for each object you identify.", "boxes_value": [[257.3853759744, 184.0598754816, 349.8062744064, 326.7785644544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049081_crop.jpg", "text": "Can you generate a description of the contents within the selected region in ? Include the coordinates for each object you identify.", "boxes_value": [[23.385375974400006, 36.05987548159999, 115.80627440640001, 178.7785644544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049081.jpg", "text": "Can you generate a description of the contents within the selected region in ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two bottles, a wine glass, a cup, a napkin, a chair, and a desk.", "boxes_value": [[257.3853759744, 184.0598754816, 349.8062744064, 326.7785644544], [306.0106201344, 184.0598754816, 331.8293457408, 243.4429321216], [257.3853759744, 203.8542480384, 294.39221191679997, 266.6798095872], [271.9837646592, 268.936584448, 281.876342784, 299.5136108544], [270.7233886464, 294.2255248896, 314.4374999808, 326.7785644544], [281.7591552768, 209.518371584, 349.8062744064, 256.2286987264], [265.61242675200003, 158.771362304, 350.38293457919997, 237.7752685568], [0, 229.4065551872, 768.5390625024, 513.7078857216]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00049081_crop.jpg", "text": "Can you generate a description of the contents within the selected region in ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two bottles, a wine glass, a cup, a napkin, a chair, and a desk.", "boxes_value": [[23.385375974400006, 36.05987548159999, 115.80627440640001, 178.7785644544], [72.01062013440003, 36.05987548159999, 97.82934574080002, 95.4429321216], [23.385375974400006, 55.85424803839999, 60.392211916799965, 118.6798095872], [37.983764659200006, 120.93658444800002, 47.876342783999974, 151.51361085439999], [36.7233886464, 146.22552488960002, 80.4374999808, 178.7785644544], [47.759155276800016, 61.51837158399999, 115.80627440640001, 108.22869872640001], [31.612426752000033, 10.771362304000007, 116.38293457919997, 89.7752685568], [0, 81.40655518720001, 138, 214]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00049083.jpg", "text": "Share some details about the objects or environment within the bounding box in . Please point out the objects and their coordinates.", "boxes_value": [[154.2589111296, 502.93957516800003, 434.8214111232, 767.0423583744]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049083_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Please point out the objects and their coordinates.", "boxes_value": [[70.2589111296, 66.93957516800003, 350.8214111232, 331.0423583744]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049083.jpg", "text": "Share some details about the objects or environment within the bounding box in . Please point out the objects and their coordinates. For your reference, objects involved in this region include two high heels, two leather shoes, and a sneakers.", "boxes_value": [[154.2589111296, 502.93957516800003, 434.8214111232, 767.0423583744], [244.787109376, 502.93957516800003, 335.315246592, 617.4090576384], [340.5524292096, 603.9420165888, 434.8214111232, 716.1671142911999], [277.7064208896, 689.9812012032, 381.7015991296, 767.0423583744], [154.2589111296, 575.5117187328, 274.7137451008, 693.7220458751999], [83.9312133632, 657.061889664, 227.57922365440004, 767.0423583744]], "boxes_seq": [[0], [0], [1, 4], [2, 5], [3]]}, {"image_path": "objects365_v1_00049083_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Please point out the objects and their coordinates. For your reference, objects involved in this region include two high heels, two leather shoes, and a sneakers.", "boxes_value": [[70.2589111296, 66.93957516800003, 350.8214111232, 331.0423583744], [160.787109376, 66.93957516800003, 251.315246592, 181.4090576384], [256.5524292096, 167.94201658880002, 350.8214111232, 280.16711429119994], [193.7064208896, 253.98120120320004, 297.7015991296, 331.0423583744], [70.2589111296, 139.51171873279998, 190.7137451008, 257.7220458751999], [0, 221.06188966399998, 143.57922365440004, 331.0423583744]], "boxes_seq": [[0], [0], [1, 4], [2, 5], [3]]}, {"image_path": "objects365_v1_00049085.jpg", "text": "Can you give me a description of the region in image ? Please point out the objects and their coordinates.", "boxes_value": [[0, 35.8793334784, 145.39477537690001, 389.8934936576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049085_crop.jpg", "text": "Can you give me a description of the region in image ? Please point out the objects and their coordinates.", "boxes_value": [[0, 35.8793334784, 145.39477537690001, 389.8934936576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049085.jpg", "text": "Can you give me a description of the region in image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a lamp, a power outlet, a nightstand, a carpet, and a telephone.", "boxes_value": [[0, 35.8793334784, 145.39477537690001, 389.8934936576], [0, 35.8793334784, 64.2764892777, 129.2440185344], [17.5966797029, 169.5045776384, 47.66827394839999, 187.3249511936], [0.6171264941, 233.3010253824, 145.39477537690001, 389.8934936576], [0, 215.6565551616, 681.855834985, 511.6215209984], [25.4368286435, 237.432495104, 73.7615966596, 264.5998535168]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049085_crop.jpg", "text": "Can you give me a description of the region in image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a lamp, a power outlet, a nightstand, a carpet, and a telephone.", "boxes_value": [[0, 35.8793334784, 145.39477537690001, 389.8934936576], [0, 35.8793334784, 64.2764892777, 129.2440185344], [17.5966797029, 169.5045776384, 47.66827394839999, 187.3249511936], [0.6171264941, 233.3010253824, 145.39477537690001, 389.8934936576], [0, 215.6565551616, 181, 478], [25.4368286435, 237.432495104, 73.7615966596, 264.5998535168]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049086.jpg", "text": "Tell me what you see in the area within the context of the image . Please mention the objects and their locations.", "boxes_value": [[141.6203003195, 274.6932983296, 350.65527343170004, 357.4566650368]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049086_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Please mention the objects and their locations.", "boxes_value": [[52.62030031949999, 20.693298329599997, 261.65527343170004, 103.45666503680002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049086.jpg", "text": "Tell me what you see in the area within the context of the image . Please mention the objects and their locations. For your reference, objects involved in this region include a hat, a speaker, and three tripods.", "boxes_value": [[141.6203003195, 274.6932983296, 350.65527343170004, 357.4566650368], [276.43182172219997, 276.7728859136, 326.8751143573, 323.4597631488], [186.4310913223, 277.7460937728, 209.5077514494, 311.8995971584], [141.6203003195, 274.6932983296, 165.9007568256, 328.700195328], [189.72729494670003, 313.042724608, 219.00000000609998, 351.1652832256], [318.2108764675, 293.8000488448, 350.65527343170004, 357.4566650368]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00049086_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Please mention the objects and their locations. For your reference, objects involved in this region include a hat, a speaker, and three tripods.", "boxes_value": [[52.62030031949999, 20.693298329599997, 261.65527343170004, 103.45666503680002], [187.43182172219997, 22.77288591360002, 237.8751143573, 69.4597631488], [97.4310913223, 23.74609377280001, 120.50775144939999, 57.89959715840001], [52.62030031949999, 20.693298329599997, 76.90075682560001, 74.700195328], [100.72729494670003, 59.042724608000015, 130.00000000609998, 97.16528322559998], [229.2108764675, 39.80004884480002, 261.65527343170004, 103.45666503680002]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00049088.jpg", "text": "Describe what can be found within the bounds of in the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[59.279296896000005, 196.4436035072, 475.3395995904, 328.392395008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049088_crop.jpg", "text": "Describe what can be found within the bounds of in the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[59.279296896000005, 33.44360350720001, 475.3395995904, 165.392395008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049088.jpg", "text": "Describe what can be found within the bounds of in the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, two gloves, three sneakers, and three hockey sticks.", "boxes_value": [[59.279296896000005, 196.4436035072, 475.3395995904, 328.392395008], [165.6512451072, 104.6141357568, 331.4118652416, 321.9691772416], [315.4945068288, 135.9000854528, 490.58605954560005, 317.029296896], [166.0202636544, 215.673217792, 202.8578491392, 246.4869995008], [181.5430298112, 196.4436035072, 199.38256834560002, 215.2098998784], [189.8530273536, 297.5353393664, 231.21545410559997, 316.9503783936], [308.2424316672, 297.3243408384, 331.456054656, 322.0151367168], [451.10559083519996, 289.2211303936, 475.3395995904, 316.5089721856], [59.279296896000005, 205.1547851776, 196.2665405184, 328.392395008], [229.87683102719998, 186.8218994176, 477.3704834304, 299.3653564416], [358.7161865472, 215.3397216768, 505.88830563839997, 323.8092041216]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5, 6, 7], [8, 9, 10]]}, {"image_path": "objects365_v1_00049088_crop.jpg", "text": "Describe what can be found within the bounds of in the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, two gloves, three sneakers, and three hockey sticks.", "boxes_value": [[59.279296896000005, 33.44360350720001, 475.3395995904, 165.392395008], [165.6512451072, 0, 331.4118652416, 158.9691772416], [315.4945068288, 0, 490.58605954560005, 154.029296896], [166.0202636544, 52.673217792, 202.8578491392, 83.48699950080001], [181.5430298112, 33.44360350720001, 199.38256834560002, 52.209899878399995], [189.8530273536, 134.53533936640002, 231.21545410559997, 153.95037839359998], [308.2424316672, 134.32434083840002, 331.456054656, 159.0151367168], [451.10559083519996, 126.22113039359999, 475.3395995904, 153.5089721856], [59.279296896000005, 42.15478517759999, 196.2665405184, 165.392395008], [229.87683102719998, 23.821899417600008, 477.3704834304, 136.3653564416], [358.7161865472, 52.339721676799996, 505.88830563839997, 160.80920412159998]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5, 6, 7], [8, 9, 10]]}, {"image_path": "objects365_v1_00049090.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Please mention the objects and their locations.", "boxes_value": [[32.393916229300004, 21.9731848192, 126.55159301249999, 214.802780672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049090_crop.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Please mention the objects and their locations.", "boxes_value": [[24.393916229300004, 21.9731848192, 118.55159301249999, 214.802780672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049090.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Please mention the objects and their locations. For your reference, objects involved in this region include a person, a hat, a mask, a gloves, and a glasses.", "boxes_value": [[32.393916229300004, 21.9731848192, 126.55159301249999, 214.802780672], [0, 21.10266112, 126.9461059699, 512.0395507712], [32.393916229300004, 21.9731848192, 101.6424573405, 71.8028228608], [37.157043417, 68.1388788736, 88.0858646333, 114.1886681088], [84.76160453, 184.0171935744, 126.55159301249999, 214.802780672], [51.7269672984, 63.8000374272, 100.8594334248, 85.216753408]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049090_crop.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Please mention the objects and their locations. For your reference, objects involved in this region include a person, a hat, a mask, a gloves, and a glasses.", "boxes_value": [[24.393916229300004, 21.9731848192, 118.55159301249999, 214.802780672], [0, 21.10266112, 118.9461059699, 263], [24.393916229300004, 21.9731848192, 93.6424573405, 71.8028228608], [29.157043416999997, 68.1388788736, 80.0858646333, 114.1886681088], [76.76160453, 184.0171935744, 118.55159301249999, 214.802780672], [43.7269672984, 63.8000374272, 92.8594334248, 85.216753408]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049092.jpg", "text": "What sort of things can be seen in the region of the photo ? Please mention the objects and their locations.", "boxes_value": [[121.8828735681, 153.5877074944, 250.7396240231, 395.8171997184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049092_crop.jpg", "text": "What sort of things can be seen in the region of the photo ? Please mention the objects and their locations.", "boxes_value": [[32.882873568099996, 60.58770749440001, 161.7396240231, 302.8171997184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049092.jpg", "text": "What sort of things can be seen in the region of the photo ? Please mention the objects and their locations. For your reference, objects involved in this region include five people.", "boxes_value": [[121.8828735681, 153.5877074944, 250.7396240231, 395.8171997184], [121.8828735681, 163.1834106368, 230.862792942, 395.8171997184], [205.5026855519, 153.5877074944, 250.7396240231, 334.5354614272], [234.6302490133, 179.6658935296, 269.4071045009, 273.2673950208], [0.1646118107, 164.0117797888, 213.88543701249998, 488.296142592], [204.5751953203, 154.8936767488, 222.93383788260002, 181.9176635904]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049092_crop.jpg", "text": "What sort of things can be seen in the region of the photo ? Please mention the objects and their locations. For your reference, objects involved in this region include five people.", "boxes_value": [[32.882873568099996, 60.58770749440001, 161.7396240231, 302.8171997184], [32.882873568099996, 70.1834106368, 141.862792942, 302.8171997184], [116.50268555189999, 60.58770749440001, 161.7396240231, 241.5354614272], [145.6302490133, 86.6658935296, 180.4071045009, 180.2673950208], [0, 71.0117797888, 124.88543701249998, 363], [115.5751953203, 61.893676748800004, 133.93383788260002, 88.9176635904]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049093.jpg", "text": "Describe the image content present in the specified rectangular area of . Give coordinates for the items you reference.", "boxes_value": [[554.8247070564, 309.29119872, 615.67065427, 385.4829101568]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049093_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Give coordinates for the items you reference.", "boxes_value": [[15.82470705640003, 19.29119872000001, 76.67065427, 95.48291015680002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049093.jpg", "text": "Describe the image content present in the specified rectangular area of . Give coordinates for the items you reference. For your reference, objects involved in this region include a potted plant, three chairs, and a desk.", "boxes_value": [[554.8247070564, 309.29119872, 615.67065427, 385.4829101568], [532.3126220296, 348.264648448, 613.9978027608, 422.6607055872], [532.7979736432, 324.9784545792, 574.600952158, 384.0562134016], [554.8247070564, 323.116516096, 615.67065427, 385.4829101568], [601.3083495948, 329.2066650624, 616.404052726, 400.8840331776], [580.1633301196, 309.29119872, 612.9416504048, 323.116516096]], "boxes_seq": [[0], [0], [1], [2, 4, 5], [3]]}, {"image_path": "objects365_v1_00049093_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Give coordinates for the items you reference. For your reference, objects involved in this region include a potted plant, three chairs, and a desk.", "boxes_value": [[15.82470705640003, 19.29119872000001, 76.67065427, 95.48291015680002], [0, 58.264648448, 74.99780276080003, 114], [0, 34.97845457919999, 35.60095215800004, 94.05621340160002], [15.82470705640003, 33.116516096, 76.67065427, 95.48291015680002], [62.308349594800006, 39.20666506240002, 77.40405272600003, 110.88403317759997], [41.163330119600005, 19.29119872000001, 73.94165040480004, 33.116516096]], "boxes_seq": [[0], [0], [1], [2, 4, 5], [3]]}, {"image_path": "objects365_v1_00049095.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Include the coordinates for each mentioned object.", "boxes_value": [[0, 209.2731323392, 98.2868042133, 512.1943359488]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049095_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Include the coordinates for each mentioned object.", "boxes_value": [[0, 76.2731323392, 98.2868042133, 379]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049095.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a desk, a storage box, a keyboard, and two computer boxes.", "boxes_value": [[0, 209.2731323392, 98.2868042133, 512.1943359488], [0, 209.2731323392, 98.2868042133, 277.8638916096], [0, 428.3516845568, 64.3772583045, 512.1943359488], [28.252807650900003, 215.5151367168, 102.07806394709999, 231.1999511552], [13.9247436483, 275.0941161984, 76.54150386810001, 426.417907712], [0.076843257, 291.8909912064, 36.054443391, 442.5841064448]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00049095_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a desk, a storage box, a keyboard, and two computer boxes.", "boxes_value": [[0, 76.2731323392, 98.2868042133, 379], [0, 76.2731323392, 98.2868042133, 144.8638916096], [0, 295.3516845568, 64.3772583045, 379], [28.252807650900003, 82.51513671679999, 102.07806394709999, 98.19995115520001], [13.9247436483, 142.0941161984, 76.54150386810001, 293.417907712], [0.076843257, 158.89099120639997, 36.054443391, 309.5841064448]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00049101.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for each element you describe.", "boxes_value": [[75.534179712, 225.876709008, 247.41552736, 376.20550536]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049101_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for each element you describe.", "boxes_value": [[43.534179712, 37.876709008000006, 215.41552736, 188.20550536000002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049101.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a flower, a vase, a cabinet, and two chairs.", "boxes_value": [[75.534179712, 225.876709008, 247.41552736, 376.20550536], [105.70770265600001, 225.876709008, 168.87359616, 280.684265136], [127.7990112, 256.589050272, 142.8858032, 285.146118144], [75.534179712, 276.525146496, 174.13696288, 340.105041504], [179.52508544, 266.28765868799996, 247.41552736, 312.086792016], [92.237365696, 281.91326904, 197.84472659199997, 376.20550536]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00049101_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a flower, a vase, a cabinet, and two chairs.", "boxes_value": [[43.534179712, 37.876709008000006, 215.41552736, 188.20550536000002], [73.70770265600001, 37.876709008000006, 136.87359616, 92.68426513600002], [95.7990112, 68.58905027200001, 110.8858032, 97.14611814400001], [43.534179712, 88.52514649599999, 142.13696288, 152.10504150399998], [147.52508544, 78.28765868799996, 215.41552736, 124.086792016], [60.237365696, 93.91326903999999, 165.84472659199997, 188.20550536000002]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00049103.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Provide the coordinates for all objects that you mention.", "boxes_value": [[206.9957885625, 113.2216796672, 567.3883056408, 447.624389632]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049103_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Provide the coordinates for all objects that you mention.", "boxes_value": [[90.9957885625, 84.2216796672, 451, 418.624389632]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049103.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a chair, three pictures, a lamp, and a bed.", "boxes_value": [[206.9957885625, 113.2216796672, 567.3883056408, 447.624389632], [469.493286129, 259.6311035392, 567.3883056408, 447.624389632], [524.0718994014001, 113.2216796672, 566.5219726761, 196.3891601408], [536.2005615309, 201.5871582208, 566.5219726761, 235.3739623936], [206.9957885625, 192.0575561728, 255.51013180770002, 276.957702656], [198.7568359623, 115.5642700288, 228.0351562587, 188.7601928704], [0, 0.75781248, 455.1047973612, 512.3029785088]], "boxes_seq": [[0], [0], [1], [2, 3, 5], [4], [6]]}, {"image_path": "objects365_v1_00049103_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a chair, three pictures, a lamp, and a bed.", "boxes_value": [[90.9957885625, 84.2216796672, 451, 418.624389632], [353.493286129, 230.6311035392, 451, 418.624389632], [408.07189940140006, 84.2216796672, 450.52197267609995, 167.3891601408], [420.2005615309, 172.5871582208, 450.52197267609995, 206.3739623936], [90.9957885625, 163.0575561728, 139.51013180770002, 247.95770265599998], [82.75683596229999, 86.5642700288, 112.0351562587, 159.7601928704], [0, 0, 339.1047973612, 483]], "boxes_seq": [[0], [0], [1], [2, 3, 5], [4], [6]]}, {"image_path": "objects365_v1_00049105.jpg", "text": "Please explain what is contained in the portion of defined by the box . Specify the location of each mentioned object.", "boxes_value": [[240.0247802637, 13.6707763712, 602.4659424022, 432.81378176]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049105_crop.jpg", "text": "Please explain what is contained in the portion of defined by the box . Specify the location of each mentioned object.", "boxes_value": [[91.0247802637, 13.6707763712, 453.46594240219997, 432.81378176]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049105.jpg", "text": "Please explain what is contained in the portion of defined by the box . Specify the location of each mentioned object. For your reference, objects involved in this region include a couch, two pillows, and three pictures.", "boxes_value": [[240.0247802637, 13.6707763712, 602.4659424022, 432.81378176], [78.9312744065, 237.0852050944, 592.0140380703999, 510.944152832], [240.0247802637, 285.4132080128, 367.0596923524, 383.768005376], [409.17297362579995, 287.9293823488, 553.3515625256, 432.81378176], [458.1705322262, 13.6707763712, 602.4659424022, 173.1918334976], [340.84344480379997, 26.8314819584, 445.63183593350004, 162.071166976], [242.3244629148, 32.2052612096, 327.4090575984, 154.9061279232]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00049105_crop.jpg", "text": "Please explain what is contained in the portion of defined by the box . Specify the location of each mentioned object. For your reference, objects involved in this region include a couch, two pillows, and three pictures.", "boxes_value": [[91.0247802637, 13.6707763712, 453.46594240219997, 432.81378176], [0, 237.0852050944, 443.0140380703999, 510.944152832], [91.0247802637, 285.4132080128, 218.05969235240002, 383.768005376], [260.17297362579995, 287.9293823488, 404.3515625256, 432.81378176], [309.1705322262, 13.6707763712, 453.46594240219997, 173.1918334976], [191.84344480379997, 26.8314819584, 296.63183593350004, 162.071166976], [93.3244629148, 32.2052612096, 178.4090575984, 154.9061279232]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00049106.jpg", "text": "Can you provide a description of the area in the image ? Specify the location of each mentioned object.", "boxes_value": [[466.65148922879996, 0, 749.6646728448, 336.048278784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049106_crop.jpg", "text": "Can you provide a description of the area in the image ? Specify the location of each mentioned object.", "boxes_value": [[71.65148922879996, 0, 354.66467284479995, 336.048278784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049106.jpg", "text": "Can you provide a description of the area in the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include two people, a sneakers, a hat, and a pickup truck.", "boxes_value": [[466.65148922879996, 0, 749.6646728448, 336.048278784], [466.65148922879996, 87.3616943104, 593.9489746176, 336.048278784], [673.4146728192001, 0, 749.6646728448, 223.171630848], [569.2667236608, 263.3386840576, 592.5532226304, 307.2755126784], [674.6782226688, 0.2435913216, 718.3253174015999, 16.1865844736], [567.5683593983999, 0.5304565248, 733.7364502272, 71.3838500864]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049106_crop.jpg", "text": "Can you provide a description of the area in the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include two people, a sneakers, a hat, and a pickup truck.", "boxes_value": [[71.65148922879996, 0, 354.66467284479995, 336.048278784], [71.65148922879996, 87.3616943104, 198.94897461760002, 336.048278784], [278.41467281920006, 0, 354.66467284479995, 223.171630848], [174.26672366080004, 263.3386840576, 197.5532226304, 307.2755126784], [279.6782226688, 0.2435913216, 323.32531740159993, 16.1865844736], [172.56835939839993, 0.5304565248, 338.73645022719995, 71.3838500864]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049107.jpg", "text": "What can you tell me about the selected region in the photo ? Please point out the objects and their coordinates.", "boxes_value": [[136.4045410304, 73.4666747924, 512.3405151232, 242.52178953720002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049107_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Please point out the objects and their coordinates.", "boxes_value": [[94.4045410304, 42.4666747924, 470, 211.52178953720002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049107.jpg", "text": "What can you tell me about the selected region in the photo ? Please point out the objects and their coordinates. For your reference, objects involved in this region include five lamps.", "boxes_value": [[136.4045410304, 73.4666747924, 512.3405151232, 242.52178953720002], [136.4045410304, 73.8926391724, 169.1711426048, 108.020935094], [259.6985473536, 155.6199951284, 444.9010619904, 214.0300292936], [265.2762450944, 73.4666747924, 300.4024047616, 111.466674796], [279.5085449216, 121.73242185480001, 310.171325696, 159.53240963800002], [449.144958464, 208.56433104439998, 512.3405151232, 242.52178953720002]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049107_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Please point out the objects and their coordinates. For your reference, objects involved in this region include five lamps.", "boxes_value": [[94.4045410304, 42.4666747924, 470, 211.52178953720002], [94.4045410304, 42.892639172399996, 127.1711426048, 77.020935094], [217.69854735360002, 124.6199951284, 402.9010619904, 183.0300292936], [223.2762450944, 42.4666747924, 258.4024047616, 80.466674796], [237.5085449216, 90.73242185480001, 268.171325696, 128.53240963800002], [407.144958464, 177.56433104439998, 470, 211.52178953720002]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049108.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[130.2084350464, 246.9505615229, 358.3025512448, 355.2944946231]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049108_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[57.20843504640001, 27.95056152289999, 285.3025512448, 136.29449462309998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049108.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a potted plant, a picture, two people, and two umbrellas.", "boxes_value": [[130.2084350464, 246.9505615229, 358.3025512448, 355.2944946231], [185.4478149632, 253.7380371406, 257.7728881664, 355.2944946231], [346.0562134016, 246.9505615229, 358.3025512448, 297.4054565569], [235.6156616192, 282.11029053709996, 256.9709472768, 371.35632324510004], [179.150634752, 284.4975585792, 194.1376953344, 315.1742553995], [121.7781982208, 267.1687622204, 186.6441650176, 312.12335203739997], [130.2084350464, 279.5798950366, 188.7517089792, 284.9658813526]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00049108_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a potted plant, a picture, two people, and two umbrellas.", "boxes_value": [[57.20843504640001, 27.95056152289999, 285.3025512448, 136.29449462309998], [112.44781496319999, 34.7380371406, 184.77288816639998, 136.29449462309998], [273.0562134016, 27.95056152289999, 285.3025512448, 78.40545655689999], [162.6156616192, 63.11029053709996, 183.97094727680002, 152.35632324510004], [106.150634752, 65.49755857920002, 121.13769533440001, 96.1742553995], [48.77819822079999, 48.168762220400026, 113.64416501759999, 93.12335203739997], [57.20843504640001, 60.57989503660002, 115.7517089792, 65.96588135259998]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00049112.jpg", "text": "In the photo , can you delve into the details of the region ? Provide the coordinates for each element you describe.", "boxes_value": [[601.866210944, 188.26989744, 639.61901856, 356.093078592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049112_crop.jpg", "text": "In the photo , can you delve into the details of the region ? Provide the coordinates for each element you describe.", "boxes_value": [[9.866210944000045, 42.269897439999994, 47.61901855999997, 210.09307859199998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049112.jpg", "text": "In the photo , can you delve into the details of the region ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a lamp, a flower, a vase, a desk, and a cabinet.", "boxes_value": [[601.866210944, 188.26989744, 639.61901856, 356.093078592], [601.866210944, 188.26989744, 630.594238272, 230.726501472], [601.980712896, 245.23071288, 639.4555664, 293.245300272], [614.472412096, 280.363342272, 637.113403328, 313.153747536], [601.976928704, 301.464599616, 639.61901856, 356.093078592], [602.980712896, 280.21160889600003, 639.61901856, 307.911865248]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049112_crop.jpg", "text": "In the photo , can you delve into the details of the region ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a lamp, a flower, a vase, a desk, and a cabinet.", "boxes_value": [[9.866210944000045, 42.269897439999994, 47.61901855999997, 210.09307859199998], [9.866210944000045, 42.269897439999994, 38.594238271999984, 84.726501472], [9.980712896, 99.23071288, 47.45556639999995, 147.245300272], [22.47241209599997, 134.363342272, 45.113403328000004, 167.15374753600003], [9.976928703999988, 155.464599616, 47.61901855999997, 210.09307859199998], [10.980712896, 134.21160889600003, 47.61901855999997, 161.91186524800003]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049114.jpg", "text": "What's the story in the section of the included visual ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[5.1187744399, 248.371398912, 125.3679199308, 328.999572736]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049114_crop.jpg", "text": "What's the story in the section of the included visual ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[5.1187744399, 20.37139891199999, 125.3679199308, 100.999572736]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049114.jpg", "text": "What's the story in the section of the included visual ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include five chairs, and a desk.", "boxes_value": [[5.1187744399, 248.371398912, 125.3679199308, 328.999572736], [77.56396482230001, 261.4826660352, 125.3679199308, 328.999572736], [46.0231933867, 271.339172352, 91.36303707740001, 314.7077026304], [8.5685424866, 304.3583984128, 50.9514160174, 335.4063110144], [0, 305.8368530432, 31.238464366099997, 346.7412719616], [5.1187744399, 253.5974731264, 38.6308594065, 297.4588622848], [32.1126708991, 248.371398912, 46.2168579001, 276.8798828032]], "boxes_seq": [[0], [0], [1, 2, 4, 5, 6], [3]]}, {"image_path": "objects365_v1_00049114_crop.jpg", "text": "What's the story in the section of the included visual ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include five chairs, and a desk.", "boxes_value": [[5.1187744399, 20.37139891199999, 125.3679199308, 100.999572736], [77.56396482230001, 33.4826660352, 125.3679199308, 100.999572736], [46.0231933867, 43.33917235199999, 91.36303707740001, 86.70770263039998], [8.5685424866, 76.3583984128, 50.9514160174, 107.40631101439999], [0, 77.8368530432, 31.238464366099997, 118.74127196159998], [5.1187744399, 25.59747312639999, 38.6308594065, 69.45886228479998], [32.1126708991, 20.37139891199999, 46.2168579001, 48.87988280320002]], "boxes_seq": [[0], [0], [1, 2, 4, 5, 6], [3]]}, {"image_path": "objects365_v1_00049115.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Specify the location of each mentioned object.", "boxes_value": [[248.648681666, 115.360168448, 417.11120604999996, 512.0256347648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049115_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Specify the location of each mentioned object.", "boxes_value": [[42.64868166599999, 99.360168448, 211.11120604999996, 496]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049115.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Specify the location of each mentioned object. For your reference, objects involved in this region include three people, a handbag, a leather shoes, a bow tie, a hat, a glasses, and a canned.", "boxes_value": [[248.648681666, 115.360168448, 417.11120604999996, 512.0256347648], [376.66101072600003, 150.9182129152, 476.88049316600006, 511.9220580864], [248.648681666, 115.360168448, 417.11120604999996, 512.0256347648], [229.04174801199997, 157.3858642432, 289.965087874, 496.7443237376], [244.57409669, 379.126708992, 276.521362271, 451.336242688], [378.318725554, 476.9836425728, 409.500244153, 491.8657226752], [330.99365237399996, 205.3688964608, 359.507324253, 218.4265746944], [321.933227539, 116.0971679744, 379.76000978300004, 173.3909912064], [400.279174834, 168.3278198272, 432.25708010100004, 178.4541625856], [333.05578614, 294.665344256, 349.295165989, 332.265380864]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6], [7], [8], [9]]}, {"image_path": "objects365_v1_00049115_crop.jpg", "text": "In , can you tell me more about the area specified by the bounding box ? Specify the location of each mentioned object. For your reference, objects involved in this region include three people, a handbag, a leather shoes, a bow tie, a hat, a glasses, and a canned.", "boxes_value": [[42.64868166599999, 99.360168448, 211.11120604999996, 496], [170.66101072600003, 134.9182129152, 253, 495.9220580864], [42.64868166599999, 99.360168448, 211.11120604999996, 496], [23.04174801199997, 141.3858642432, 83.965087874, 480.7443237376], [38.574096690000005, 363.126708992, 70.52136227099999, 435.336242688], [172.31872555400003, 460.9836425728, 203.50024415299998, 475.8657226752], [124.99365237399996, 189.3688964608, 153.50732425299998, 202.4265746944], [115.93322753899997, 100.0971679744, 173.76000978300004, 157.3909912064], [194.279174834, 152.3278198272, 226.25708010100004, 162.4541625856], [127.05578614000001, 278.665344256, 143.295165989, 316.265380864]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6], [7], [8], [9]]}, {"image_path": "objects365_v1_00049116.jpg", "text": "Please detail the contents of the chosen region in the visual input . Provide the coordinates for all objects that you mention.", "boxes_value": [[506.5391845632, 226.9155884032, 681.8826904084, 341.8204345856]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049116_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Provide the coordinates for all objects that you mention.", "boxes_value": [[44.539184563200024, 28.91558840319999, 219.88269040839998, 143.82043458560003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049116.jpg", "text": "Please detail the contents of the chosen region in the visual input . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two people, a bus, a car, and a truck.", "boxes_value": [[506.5391845632, 226.9155884032, 681.8826904084, 341.8204345856], [506.5391845632, 248.1799926784, 538.5881347724, 285.4461059584], [542.6873779007001, 228.8016357376, 559.4571533284001, 273.5209350656], [56.9773559749, 156.513549824, 638.1574706701999, 420.209838848], [633.0745849434, 264.3636474368, 681.2342529404, 341.8204345856], [668.0245361262, 226.9155884032, 681.8826904084, 274.0924072448]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049116_crop.jpg", "text": "Please detail the contents of the chosen region in the visual input . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two people, a bus, a car, and a truck.", "boxes_value": [[44.539184563200024, 28.91558840319999, 219.88269040839998, 143.82043458560003], [44.539184563200024, 50.1799926784, 76.58813477240005, 87.44610595839998], [80.68737790070008, 30.80163573760001, 97.45715332840007, 75.52093506559999], [0, 0, 176.15747067019993, 172], [171.07458494340005, 66.36364743680002, 219.23425294039998, 143.82043458560003], [206.02453612620002, 28.91558840319999, 219.88269040839998, 76.09240724479997]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049117.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[501.6114502092, 96.9483032064, 682.7335205150999, 269.265808128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049117_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[45.61145020919997, 43.9483032064, 226.73352051509994, 216.265808128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049117.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three cymbals, two drums, and a person.", "boxes_value": [[501.6114502092, 96.9483032064, 682.7335205150999, 269.265808128], [504.12707519450004, 96.9483032064, 602.2348632854, 167.3847046144], [572.6767577969999, 130.2797851648, 682.7335205150999, 151.66229248], [501.6114502092, 188.138244608, 578.9656982422999, 203.8606567424], [592.172485328, 183.7359619072, 682.1046142517, 279.3281860096], [556.3254394626, 240.9655151616, 597.2036133148, 269.265808128], [655.1405029443999, 217.2966308352, 683.6490478651, 320.430603008]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00049117_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three cymbals, two drums, and a person.", "boxes_value": [[45.61145020919997, 43.9483032064, 226.73352051509994, 216.265808128], [48.12707519450004, 43.9483032064, 146.23486328540002, 114.3847046144], [116.67675779699994, 77.27978516479999, 226.73352051509994, 98.66229247999999], [45.61145020919997, 135.138244608, 122.96569824229994, 150.8606567424], [136.172485328, 130.7359619072, 226.1046142517, 226.32818600960002], [100.3254394626, 187.9655151616, 141.2036133148, 216.265808128], [199.14050294439994, 164.2966308352, 227, 259]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00049118.jpg", "text": "Share some details about the objects or environment within the bounding box in . Specify the location of each mentioned object.", "boxes_value": [[315.6667480576, 333.405029293, 488.8022460928, 582.734619137]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049118_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Specify the location of each mentioned object.", "boxes_value": [[43.66674805759999, 62.405029292999984, 216.80224609279998, 311.734619137]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049118.jpg", "text": "Share some details about the objects or environment within the bounding box in . Specify the location of each mentioned object. For your reference, objects involved in this region include three people, a necklace, and a leather shoes.", "boxes_value": [[315.6667480576, 333.405029293, 488.8022460928, 582.734619137], [397.5841064448, 333.405029293, 488.8022460928, 582.734619137], [356.367004416, 357.72985840999996, 413.8006592, 580.707519508], [288.1223144448, 317.188476572, 364.4752807424, 585.437377912], [315.6667480576, 377.906372089, 338.7494507008, 397.888305653], [404.2869873152, 556.568115269, 442.0407714816, 570.051635738]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049118_crop.jpg", "text": "Share some details about the objects or environment within the bounding box in . Specify the location of each mentioned object. For your reference, objects involved in this region include three people, a necklace, and a leather shoes.", "boxes_value": [[43.66674805759999, 62.405029292999984, 216.80224609279998, 311.734619137], [125.5841064448, 62.405029292999984, 216.80224609279998, 311.734619137], [84.36700441599999, 86.72985840999996, 141.80065919999998, 309.707519508], [16.122314444799997, 46.18847657200001, 92.4752807424, 314.437377912], [43.66674805759999, 106.906372089, 66.74945070080003, 126.88830565299997], [132.2869873152, 285.56811526900003, 170.04077148160002, 299.05163573799996]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049121.jpg", "text": "For the image , can you assess and describe what's happening at ? Include the coordinates for each object you identify.", "boxes_value": [[0.9259643519999999, 238.14074707199998, 265.390747072, 462.171569808]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049121_crop.jpg", "text": "For the image , can you assess and describe what's happening at ? Include the coordinates for each object you identify.", "boxes_value": [[0.9259643519999999, 56.14074707199998, 265.390747072, 280.171569808]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049121.jpg", "text": "For the image , can you assess and describe what's happening at ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two chairs, a radiator, a desk, and a couch.", "boxes_value": [[0.9259643519999999, 238.14074707199998, 265.390747072, 462.171569808], [146.80249024, 238.846618656, 202.56719968, 329.90545656], [201.155456512, 238.14074707199998, 265.390747072, 329.90545656], [89.02026368, 255.736999488, 289.20275878399997, 314.858215344], [66.93725587200001, 270.13861084800004, 126.94757081600001, 280.140319824], [0.9259643519999999, 256.802978496, 153.618835456, 462.171569808]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049121_crop.jpg", "text": "For the image , can you assess and describe what's happening at ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two chairs, a radiator, a desk, and a couch.", "boxes_value": [[0.9259643519999999, 56.14074707199998, 265.390747072, 280.171569808], [146.80249024, 56.846618656000004, 202.56719968, 147.90545656], [201.155456512, 56.14074707199998, 265.390747072, 147.90545656], [89.02026368, 73.73699948800001, 289.20275878399997, 132.85821534399997], [66.93725587200001, 88.13861084800004, 126.94757081600001, 98.14031982400002], [0.9259643519999999, 74.80297849599998, 153.618835456, 280.171569808]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049122.jpg", "text": "Please describe the area in the image for me. Specify the location of each mentioned object.", "boxes_value": [[61.555725125399995, 192.1808471552, 396.7486572232, 433.1813354496]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049122_crop.jpg", "text": "Please describe the area in the image for me. Specify the location of each mentioned object.", "boxes_value": [[61.555725125399995, 61.18084715520001, 396.7486572232, 302.1813354496]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049122.jpg", "text": "Please describe the area in the image for me. Specify the location of each mentioned object. For your reference, objects involved in this region include two street lights, two cars, and a suv.", "boxes_value": [[61.555725125399995, 192.1808471552, 396.7486572232, 433.1813354496], [100.187194822, 192.1808471552, 128.1673584078, 379.3568725504], [61.555725125399995, 369.290893568, 182.0308838087, 416.3197632], [151.2898559615, 368.1910400512, 329.3962402426, 433.1813354496], [340.0212402476, 358.7421264896, 396.7486572232, 400.5413208064], [171.8890991033, 299.7216186368, 181.6402588006, 335.4758910976]], "boxes_seq": [[0], [0], [1, 5], [2, 3], [4]]}, {"image_path": "objects365_v1_00049122_crop.jpg", "text": "Please describe the area in the image for me. Specify the location of each mentioned object. For your reference, objects involved in this region include two street lights, two cars, and a suv.", "boxes_value": [[61.555725125399995, 61.18084715520001, 396.7486572232, 302.1813354496], [100.187194822, 61.18084715520001, 128.1673584078, 248.3568725504], [61.555725125399995, 238.290893568, 182.0308838087, 285.3197632], [151.2898559615, 237.1910400512, 329.3962402426, 302.1813354496], [340.0212402476, 227.74212648960003, 396.7486572232, 269.5413208064], [171.8890991033, 168.72161863679997, 181.6402588006, 204.47589109760003]], "boxes_seq": [[0], [0], [1, 5], [2, 3], [4]]}, {"image_path": "objects365_v1_00049123.jpg", "text": "What objects or scenery can be found in the area in the image ? Please mention the objects and their locations.", "boxes_value": [[210.25146484159998, 19.3547973632, 510.21325686240004, 226.8284912128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049123_crop.jpg", "text": "What objects or scenery can be found in the area in the image ? Please mention the objects and their locations.", "boxes_value": [[75.25146484159998, 19.3547973632, 375.21325686240004, 226.8284912128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049123.jpg", "text": "What objects or scenery can be found in the area in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a gun, two people, two helmets, and a glasses.", "boxes_value": [[210.25146484159998, 19.3547973632, 510.21325686240004, 226.8284912128], [210.25146484159998, 133.6629638656, 422.1668701432, 226.8284912128], [464.1361083632, 53.6869507072, 538.623657208, 258.6817627136], [222.87823485200002, 18.4281616384, 408.11169431760004, 333.5066528256], [248.67059322560002, 19.3547973632, 326.5009765472, 68.9915161088], [253.3947143728, 59.6492919808, 284.48284912319997, 73.3501586944], [484.06823730720004, 53.184753408, 510.21325686240004, 71.08587648]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 6], [5]]}, {"image_path": "objects365_v1_00049123_crop.jpg", "text": "What objects or scenery can be found in the area in the image ? Please mention the objects and their locations. For your reference, objects involved in this region include a gun, two people, two helmets, and a glasses.", "boxes_value": [[75.25146484159998, 19.3547973632, 375.21325686240004, 226.8284912128], [75.25146484159998, 133.6629638656, 287.1668701432, 226.8284912128], [329.1361083632, 53.6869507072, 403.62365720800005, 258.6817627136], [87.87823485200002, 18.4281616384, 273.11169431760004, 278], [113.67059322560002, 19.3547973632, 191.50097654720003, 68.9915161088], [118.3947143728, 59.6492919808, 149.48284912319997, 73.3501586944], [349.06823730720004, 53.184753408, 375.21325686240004, 71.08587648]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 6], [5]]}, {"image_path": "objects365_v1_00049126.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Please point out the objects and their coordinates.", "boxes_value": [[0.1378784256, 591.8144531081, 512.0953369088, 683.5930175503]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049126_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Please point out the objects and their coordinates.", "boxes_value": [[0.1378784256, 23.814453108099997, 512, 115]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049126.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Please point out the objects and their coordinates. For your reference, objects involved in this region include two carpets, a couch, a storage box, a bowl, and a side table.", "boxes_value": [[0.1378784256, 591.8144531081, 512.0953369088, 683.5930175503], [0.1378784256, 591.8144531081, 499.0947265536, 683.5930175503], [125.4127197184, 513.0396728523999, 376.7112427008, 662.7493896559], [477.8032226816, 566.4614257643, 511.290893568, 625.9191894687], [471.0481567232, 634.3603515674, 512.0953369088, 682.9390869182], [400.400695808, 615.0117187412, 437.2906493952, 636.9645996275], [373.0021362176, 562.4865722352, 463.6478881792, 654.7226562482]], "boxes_seq": [[0], [0], [1, 4], [2], [3], [5], [6]]}, {"image_path": "objects365_v1_00049126_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Please point out the objects and their coordinates. For your reference, objects involved in this region include two carpets, a couch, a storage box, a bowl, and a side table.", "boxes_value": [[0.1378784256, 23.814453108099997, 512, 115], [0.1378784256, 23.814453108099997, 499.0947265536, 115], [125.4127197184, 0, 376.7112427008, 94.74938965590002], [477.8032226816, 0, 511.290893568, 57.91918946869998], [471.0481567232, 66.36035156740002, 512, 114.93908691820002], [400.400695808, 47.01171874119996, 437.2906493952, 68.96459962749998], [373.0021362176, 0, 463.6478881792, 86.7226562482]], "boxes_seq": [[0], [0], [1, 4], [2], [3], [5], [6]]}, {"image_path": "objects365_v1_00049129.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[4.9171753278, 136.5040283136, 243.1119384549, 192.2512206848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049129_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[4.9171753278, 14.504028313600003, 243.1119384549, 70.25122068479999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049129.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include five lamps.", "boxes_value": [[4.9171753278, 136.5040283136, 243.1119384549, 192.2512206848], [93.26562499299999, 160.0789794816, 130.3521728437, 192.2512206848], [137.65966793730001, 136.5040283136, 176.9083251913, 183.0491943424], [200.80621336570002, 138.764648448, 230.1941527997, 168.7985229312], [213.71185305039998, 162.6625366016, 243.1119384549, 189.5447387648], [4.9171753278, 159.064025856, 23.186645534300002, 183.5617065472]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049129_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include five lamps.", "boxes_value": [[4.9171753278, 14.504028313600003, 243.1119384549, 70.25122068479999], [93.26562499299999, 38.07897948159999, 130.3521728437, 70.25122068479999], [137.65966793730001, 14.504028313600003, 176.9083251913, 61.0491943424], [200.80621336570002, 16.764648448000003, 230.1941527997, 46.79852293120001], [213.71185305039998, 40.66253660160001, 243.1119384549, 67.5447387648], [4.9171753278, 37.064025856, 23.186645534300002, 61.561706547200004]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049132.jpg", "text": "What does the selected region in the image encompass? Include the coordinates for each object you identify.", "boxes_value": [[283.98242187700004, 245.0669555712, 664.818603526, 318.9545898496]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049132_crop.jpg", "text": "What does the selected region in the image encompass? Include the coordinates for each object you identify.", "boxes_value": [[95.98242187700004, 19.066955571199998, 476.81860352599995, 92.95458984959998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049132.jpg", "text": "What does the selected region in the image encompass? Include the coordinates for each object you identify. For your reference, objects involved in this region include a sink, a faucet, a tea pot, a gas stove, a blender, and a kettle.", "boxes_value": [[283.98242187700004, 245.0669555712, 664.818603526, 318.9545898496], [294.73059084700003, 314.8206787072, 347.369140657, 321.986145024], [283.98242187700004, 266.867309568, 302.44726565900004, 318.9545898496], [439.963378932, 262.4784545792, 468.007568336, 299.2259521536], [489.83264160600004, 292.9616699392, 625.863159152, 313.0302734336], [634.523803683, 245.0669555712, 664.818603526, 307.3554687488], [410.212524414, 265.982788096, 428.233642606, 294.2775268352]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00049132_crop.jpg", "text": "What does the selected region in the image encompass? Include the coordinates for each object you identify. For your reference, objects involved in this region include a sink, a faucet, a tea pot, a gas stove, a blender, and a kettle.", "boxes_value": [[95.98242187700004, 19.066955571199998, 476.81860352599995, 92.95458984959998], [106.73059084700003, 88.82067870719999, 159.369140657, 95.986145024], [95.98242187700004, 40.867309567999996, 114.44726565900004, 92.95458984959998], [251.963378932, 36.47845457919999, 280.007568336, 73.22595215360002], [301.83264160600004, 66.96166993920002, 437.863159152, 87.03027343359997], [446.523803683, 19.066955571199998, 476.81860352599995, 81.35546874879998], [222.21252441399997, 39.98278809599998, 240.233642606, 68.27752683519998]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00049133.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[158.1702880768, 158.3560791163, 350.8574829056, 295.3055420211]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049133_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[49.170288076800006, 34.35607911630001, 241.85748290560002, 171.3055420211]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049133.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a cabinet, a wine glass, two cups, and a bottle.", "boxes_value": [[158.1702880768, 158.3560791163, 350.8574829056, 295.3055420211], [199.470397952, 132.11334227650002, 321.7935791104, 397.3925781566], [158.1702880768, 247.79650881179998, 181.6848144384, 295.3055420211], [249.3215942144, 159.5217284869, 284.290893568, 214.30688474], [219.5977172992, 158.3560791163, 251.0700683776, 215.47253419769999], [331.4593505792, 161.34399409830002, 350.8574829056, 216.4096680083]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049133_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a cabinet, a wine glass, two cups, and a bottle.", "boxes_value": [[49.170288076800006, 34.35607911630001, 241.85748290560002, 171.3055420211], [90.47039795200001, 8.11334227650002, 212.7935791104, 205], [49.170288076800006, 123.79650881179998, 72.6848144384, 171.3055420211], [140.3215942144, 35.5217284869, 175.290893568, 90.30688473999999], [110.59771729920001, 34.35607911630001, 142.0700683776, 91.47253419769999], [222.45935057920002, 37.343994098300016, 241.85748290560002, 92.4096680083]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049134.jpg", "text": "Regarding the coordinates in image , can you provide a description? Include the coordinates for each mentioned object.", "boxes_value": [[251.8840332288, 124.1602783232, 455.14160156160005, 328.0596923904]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049134_crop.jpg", "text": "Regarding the coordinates in image , can you provide a description? Include the coordinates for each mentioned object.", "boxes_value": [[50.88403322880001, 51.160278323200004, 254.14160156160005, 255.05969239040002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049134.jpg", "text": "Regarding the coordinates in image , can you provide a description? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a desk, four people, and a hat.", "boxes_value": [[251.8840332288, 124.1602783232, 455.14160156160005, 328.0596923904], [251.8840332288, 269.1392822272, 305.07617187840003, 328.0596923904], [302.3645019648, 167.4007568384, 347.1550292736, 267.1779785216], [301.3739013888, 198.3601074176, 398.0389404672, 327.6652832256], [382.46972659200003, 157.2572021248, 411.2359619328, 199.2808838144], [340.96118161920003, 123.3619384832, 514.3193359104, 511.8735962112], [399.0833740032, 124.1602783232, 455.14160156160005, 175.623657216]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00049134_crop.jpg", "text": "Regarding the coordinates in image , can you provide a description? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a desk, four people, and a hat.", "boxes_value": [[50.88403322880001, 51.160278323200004, 254.14160156160005, 255.05969239040002], [50.88403322880001, 196.1392822272, 104.07617187840003, 255.05969239040002], [101.36450196480001, 94.40075683840001, 146.1550292736, 194.17797852159998], [100.3739013888, 125.3601074176, 197.0389404672, 254.66528322559998], [181.46972659200003, 84.25720212479999, 210.23596193280002, 126.2808838144], [139.96118161920003, 50.36193848320001, 304, 306], [198.0833740032, 51.160278323200004, 254.14160156160005, 102.623657216]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00049135.jpg", "text": "Kindly give an overview of the section in photo . Include the coordinates for each mentioned object.", "boxes_value": [[249.75677491199997, 279.1837768704, 356.1729736704, 386.386413568]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049135_crop.jpg", "text": "Kindly give an overview of the section in photo . Include the coordinates for each mentioned object.", "boxes_value": [[26.75677491199997, 27.18377687039998, 133.17297367039998, 134.38641356800002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049135.jpg", "text": "Kindly give an overview of the section in photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include three people, and three backpacks.", "boxes_value": [[249.75677491199997, 279.1837768704, 356.1729736704, 386.386413568], [321.5382080256, 280.0084228608, 356.1729736704, 386.386413568], [276.733032192, 279.1837768704, 307.2446289408, 369.0690307584], [244.02252195839998, 273.136474624, 275.0838623232, 349.2778320384], [329.4027099648, 292.6139526144, 353.5443115008, 322.8592529408], [287.2609863168, 288.5812378112, 303.795043968, 319.6330566656], [249.75677491199997, 284.5485229568, 267.3583984128, 305.5186157056]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00049135_crop.jpg", "text": "Kindly give an overview of the section in photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include three people, and three backpacks.", "boxes_value": [[26.75677491199997, 27.18377687039998, 133.17297367039998, 134.38641356800002], [98.5382080256, 28.00842286080001, 133.17297367039998, 134.38641356800002], [53.733032191999996, 27.18377687039998, 84.2446289408, 117.0690307584], [21.022521958399977, 21.136474624000016, 52.08386232319998, 97.27783203839999], [106.40270996480001, 40.61395261439998, 130.5443115008, 70.85925294079999], [64.26098631679997, 36.581237811200026, 80.79504396800002, 67.63305666560001], [26.75677491199997, 32.548522956800014, 44.3583984128, 53.518615705599984]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00049138.jpg", "text": "What does the area within the given visual contain? Provide the coordinates for all objects that you mention.", "boxes_value": [[233.68225096449999, 279.4661860352, 312.6995849245, 368.7212524544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049138_crop.jpg", "text": "What does the area within the given visual contain? Provide the coordinates for all objects that you mention.", "boxes_value": [[20.682250964499985, 22.466186035199996, 99.69958492450002, 111.72125245439997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049138.jpg", "text": "What does the area within the given visual contain? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a handbag, four cups, and a bottle.", "boxes_value": [[233.68225096449999, 279.4661860352, 312.6995849245, 368.7212524544], [252.5551518485, 279.4661860352, 284.1595638225, 307.8689782272], [233.68225096449999, 315.8702392832, 264.437377926, 354.68737792], [259.9584961185, 343.3408203264, 272.4993896295, 368.7212524544], [274.291015621, 328.4111328256, 303.851806619, 384.8453369344], [253.6877441455, 320.44635008, 279.79333498249997, 358.836975104], [292.73645022750003, 280.0814209024, 312.6995849245, 365.8569335808]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00049138_crop.jpg", "text": "What does the area within the given visual contain? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a handbag, four cups, and a bottle.", "boxes_value": [[20.682250964499985, 22.466186035199996, 99.69958492450002, 111.72125245439997], [39.5551518485, 22.466186035199996, 71.15956382249999, 50.86897822719999], [20.682250964499985, 58.87023928320002, 51.43737792600001, 97.68737792000002], [46.95849611850002, 86.34082032639998, 59.4993896295, 111.72125245439997], [61.291015620999985, 71.41113282560002, 90.851806619, 127.8453369344], [40.687744145500005, 63.44635008, 66.79333498249997, 101.83697510399998], [79.73645022750003, 23.081420902399998, 99.69958492450002, 108.85693358079999]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00049139.jpg", "text": "Kindly give an overview of the section in photo . Please point out the objects and their coordinates.", "boxes_value": [[8.6694336074, 308.1874389504, 399.57568356219997, 509.5062255616]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049139_crop.jpg", "text": "Kindly give an overview of the section in photo . Please point out the objects and their coordinates.", "boxes_value": [[8.6694336074, 51.18743895040001, 399.57568356219997, 252.5062255616]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049139.jpg", "text": "Kindly give an overview of the section in photo . Please point out the objects and their coordinates. For your reference, objects involved in this region include a nightstand, a lamp, a potted plant, a bed, and two pillows.", "boxes_value": [[8.6694336074, 308.1874389504, 399.57568356219997, 509.5062255616], [8.6694336074, 423.3973999104, 236.7414550824, 509.5062255616], [116.7900390584, 313.2669067264, 197.1519775144, 442.3588867072], [45.832214361, 371.4010619904, 154.4062499886, 487.6693115392], [197.3962402387, 209.5247192576, 680.719238293, 509.1284179456], [315.4704589706, 316.4273071104, 421.37585445919996, 422.3327026176], [252.35076904680002, 308.1874389504, 399.57568356219997, 418.0340576256]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00049139_crop.jpg", "text": "Kindly give an overview of the section in photo . Please point out the objects and their coordinates. For your reference, objects involved in this region include a nightstand, a lamp, a potted plant, a bed, and two pillows.", "boxes_value": [[8.6694336074, 51.18743895040001, 399.57568356219997, 252.5062255616], [8.6694336074, 166.3973999104, 236.7414550824, 252.5062255616], [116.7900390584, 56.26690672640001, 197.1519775144, 185.35888670719999], [45.832214361, 114.40106199040002, 154.4062499886, 230.6693115392], [197.3962402387, 0, 497, 252.1284179456], [315.4704589706, 59.42730711040002, 421.37585445919996, 165.33270261759998], [252.35076904680002, 51.18743895040001, 399.57568356219997, 161.03405762559998]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00049140.jpg", "text": "What sort of things can be seen in the region of the photo ? Provide the coordinates for all objects that you mention.", "boxes_value": [[391.32189944, 357.6817016832, 531.3208007884999, 423.736511232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049140_crop.jpg", "text": "What sort of things can be seen in the region of the photo ? Provide the coordinates for all objects that you mention.", "boxes_value": [[35.32189943999998, 16.681701683200004, 175.32080078849992, 82.736511232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049140.jpg", "text": "What sort of things can be seen in the region of the photo ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include five sheep.", "boxes_value": [[391.32189944, 357.6817016832, 531.3208007884999, 423.736511232], [440.7236327841, 357.6817016832, 457.86608886930003, 381.7804565504], [507.04467771250006, 358.2089843712, 531.3208007884999, 380.6599120896], [493.9025878701, 383.9454345728, 538.9870605245, 410.777038592], [455.0241699228, 396.9049072128, 478.387695309, 422.093750016], [391.32189944, 396.7224121344, 435.6762695458, 423.736511232]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049140_crop.jpg", "text": "What sort of things can be seen in the region of the photo ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include five sheep.", "boxes_value": [[35.32189943999998, 16.681701683200004, 175.32080078849992, 82.736511232], [84.7236327841, 16.681701683200004, 101.86608886930003, 40.780456550400004], [151.04467771250006, 17.208984371200017, 175.32080078849992, 39.65991208960003], [137.90258787009998, 42.945434572800025, 182.98706052449995, 69.777038592], [99.02416992280001, 55.904907212800026, 122.38769530899998, 81.093750016], [35.32189943999998, 55.722412134399974, 79.67626954579998, 82.736511232]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049143.jpg", "text": "Fill me in about the selected portion within the presented image . Specify the location of each mentioned object.", "boxes_value": [[224.3828735393, 134.602355968, 439.97241208040003, 159.3599243264]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049143_crop.jpg", "text": "Fill me in about the selected portion within the presented image . Specify the location of each mentioned object.", "boxes_value": [[54.38287353929999, 6.602355968000012, 269.97241208040003, 31.359924326400005]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049143.jpg", "text": "Fill me in about the selected portion within the presented image . Specify the location of each mentioned object. For your reference, objects involved in this region include a picture, a cabinet, a person, a cup, and a bowl.", "boxes_value": [[224.3828735393, 134.602355968, 439.97241208040003, 159.3599243264], [417.89941403309996, 134.602355968, 439.97241208040003, 159.3599243264], [241.06091311970002, 0.2499999744, 343.71984862520003, 158.3447265792], [415.43896486129995, 124.3587646464, 429.7818603623, 149.191162112], [224.3828735393, 135.8148803584, 240.5069579786, 158.2094116352], [396.1660156749, 139.0050049024, 417.35607913219997, 158.9605712896]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049143_crop.jpg", "text": "Fill me in about the selected portion within the presented image . Specify the location of each mentioned object. For your reference, objects involved in this region include a picture, a cabinet, a person, a cup, and a bowl.", "boxes_value": [[54.38287353929999, 6.602355968000012, 269.97241208040003, 31.359924326400005], [247.89941403309996, 6.602355968000012, 269.97241208040003, 31.359924326400005], [71.06091311970002, 0, 173.71984862520003, 30.3447265792], [245.43896486129995, 0, 259.7818603623, 21.191162112], [54.38287353929999, 7.814880358399989, 70.50695797860001, 30.209411635200013], [226.1660156749, 11.005004902400003, 247.35607913219997, 30.96057128960001]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049146.jpg", "text": "Can you generate a description for the selected region in the image ? Include the coordinates for each object you identify.", "boxes_value": [[299.8665161182, 0.737976064, 425.7459716993, 263.7598266368]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049146_crop.jpg", "text": "Can you generate a description for the selected region in the image ? Include the coordinates for each object you identify.", "boxes_value": [[31.866516118200025, 0.737976064, 157.74597169930001, 263.7598266368]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049146.jpg", "text": "Can you generate a description for the selected region in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include three lamps, a picture, and two candles.", "boxes_value": [[299.8665161182, 0.737976064, 425.7459716993, 263.7598266368], [329.004455579, 0.737976064, 425.7459716993, 141.2869873152], [325.2487182851, 110.305908224, 391.9178466505, 191.9180908032], [370.0780029472, 228.1263427584, 383.2968749666, 266.6334838784], [403.41247560790003, 229.2758178816, 416.63134762730004, 263.7598266368], [299.8665161182, 115.0305175552, 317.3416748219, 170.4594116096], [402.5328369027, 110.6617431552, 420.8270263942, 170.4594116096]], "boxes_seq": [[0], [0], [1, 5, 6], [2], [3, 4]]}, {"image_path": "objects365_v1_00049146_crop.jpg", "text": "Can you generate a description for the selected region in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include three lamps, a picture, and two candles.", "boxes_value": [[31.866516118200025, 0.737976064, 157.74597169930001, 263.7598266368], [61.00445557900002, 0.737976064, 157.74597169930001, 141.2869873152], [57.24871828509998, 110.305908224, 123.9178466505, 191.9180908032], [102.07800294719999, 228.1263427584, 115.2968749666, 266.6334838784], [135.41247560790003, 229.2758178816, 148.63134762730004, 263.7598266368], [31.866516118200025, 115.0305175552, 49.34167482190003, 170.4594116096], [134.5328369027, 110.6617431552, 152.8270263942, 170.4594116096]], "boxes_seq": [[0], [0], [1, 5, 6], [2], [3, 4]]}, {"image_path": "objects365_v1_00049148.jpg", "text": "Describe the bbox in the provided photo . Include the coordinates for each mentioned object.", "boxes_value": [[352.09973141750004, 154.9030151168, 449.6302490012, 247.9215698432]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049148_crop.jpg", "text": "Describe the bbox in the provided photo . Include the coordinates for each mentioned object.", "boxes_value": [[25.09973141750004, 23.903015116799992, 122.63024900120001, 116.92156984319999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049148.jpg", "text": "Describe the bbox in the provided photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include three people, a hat, and a gloves.", "boxes_value": [[352.09973141750004, 154.9030151168, 449.6302490012, 247.9215698432], [423.6322021263, 168.9421997056, 449.6302490012, 244.0180663808], [358.10656738689994, 155.147277824, 449.3649902422, 310.0742797824], [352.09973141750004, 207.2191161856, 365.0609130692, 247.9215698432], [388.96984865179996, 154.9030151168, 406.1477051051, 174.0684814336], [424.8361816176, 186.3447265792, 448.40856936, 202.8573608448]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049148_crop.jpg", "text": "Describe the bbox in the provided photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include three people, a hat, and a gloves.", "boxes_value": [[25.09973141750004, 23.903015116799992, 122.63024900120001, 116.92156984319999], [96.6322021263, 37.94219970559999, 122.63024900120001, 113.01806638080001], [31.10656738689994, 24.147277824000014, 122.36499024220001, 140], [25.09973141750004, 76.2191161856, 38.06091306920001, 116.92156984319999], [61.969848651799964, 23.903015116799992, 79.14770510509999, 43.0684814336], [97.8361816176, 55.3447265792, 121.40856936, 71.85736084480001]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049149.jpg", "text": "Please provide details for the area within the bounding box in . Include the coordinates for each mentioned object.", "boxes_value": [[0, 20.9187012096, 193.5769653248, 687.9880371456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049149_crop.jpg", "text": "Please provide details for the area within the bounding box in . Include the coordinates for each mentioned object.", "boxes_value": [[0, 20.9187012096, 193.5769653248, 687.9880371456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049149.jpg", "text": "Please provide details for the area within the bounding box in . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a glasses, two hats, and two sneakers.", "boxes_value": [[0, 20.9187012096, 193.5769653248, 687.9880371456], [157.5613403136, 73.6558227456, 182.0004882944, 90.3773803776], [79.0986938368, 20.9187012096, 193.5769653248, 108.3851928576], [0.2872924672, 270.75183106559996, 54.5582885888, 299.91235353599996], [0, 602.2918701312001, 48.8416137728, 623.2786864896], [51.90216064, 639.0187987968, 89.940795904, 687.9880371456]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049149_crop.jpg", "text": "Please provide details for the area within the bounding box in . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a glasses, two hats, and two sneakers.", "boxes_value": [[0, 20.9187012096, 193.5769653248, 687.9880371456], [157.5613403136, 73.6558227456, 182.0004882944, 90.3773803776], [79.0986938368, 20.9187012096, 193.5769653248, 108.3851928576], [0.2872924672, 270.75183106559996, 54.5582885888, 299.91235353599996], [0, 602.2918701312001, 48.8416137728, 623.2786864896], [51.90216064, 639.0187987968, 89.940795904, 687.9880371456]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049150.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Please point out the objects and their coordinates.", "boxes_value": [[236.826477056, 48.0747070361, 342.7321777152, 488.67407229990005]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049150_crop.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Please point out the objects and their coordinates.", "boxes_value": [[26.826477055999987, 48.0747070361, 132.7321777152, 488.67407229990005]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049150.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, a boots, a gloves, a helmet, and a horse.", "boxes_value": [[236.826477056, 48.0747070361, 342.7321777152, 488.67407229990005], [209.968078592, 46.5299682894, 344.4824828928, 478.9948730833], [301.3454589952, 371.2563476586, 342.7321777152, 488.67407229990005], [236.826477056, 240.45983886800002, 271.6882323968, 269.2960815444], [257.4852905472, 48.0747070361, 307.8411254784, 89.3923339926], [5.59899904, 130.5647583255, 493.0900268544, 680.708496063]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049150_crop.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, a boots, a gloves, a helmet, and a horse.", "boxes_value": [[26.826477055999987, 48.0747070361, 132.7321777152, 488.67407229990005], [0, 46.5299682894, 134.48248289280002, 478.9948730833], [91.34545899519998, 371.2563476586, 132.7321777152, 488.67407229990005], [26.826477055999987, 240.45983886800002, 61.688232396800004, 269.2960815444], [47.48529054720001, 48.0747070361, 97.84112547839999, 89.3923339926], [0, 130.5647583255, 159, 598]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049151.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Include the coordinates for each mentioned object.", "boxes_value": [[2.4093017594, 225.6606445568, 139.3305053595, 331.3367920128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049151_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Include the coordinates for each mentioned object.", "boxes_value": [[2.4093017594, 26.66064455680001, 139.3305053595, 132.3367920128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049151.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, a bottle, a dog, and two chairs.", "boxes_value": [[2.4093017594, 225.6606445568, 139.3305053595, 331.3367920128], [61.503356949, 166.3996581888, 140.3211670113, 311.1785278464], [5.9406738564, 181.296875008, 64.3757324069, 322.104125952], [4.952148419, 292.9395141632, 16.214965849000002, 319.8166503936], [46.8423461818, 282.5822143488, 99.1336059879, 331.3367920128], [2.4093017594, 232.5866699264, 65.8086547579, 317.2966308352], [68.4724731249, 225.6606445568, 139.3305053595, 310.370666496]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00049151_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, a bottle, a dog, and two chairs.", "boxes_value": [[2.4093017594, 26.66064455680001, 139.3305053595, 132.3367920128], [61.503356949, 0, 140.3211670113, 112.17852784640002], [5.9406738564, 0, 64.3757324069, 123.104125952], [4.952148419, 93.93951416319999, 16.214965849000002, 120.81665039360001], [46.8423461818, 83.5822143488, 99.1336059879, 132.3367920128], [2.4093017594, 33.58666992639999, 65.8086547579, 118.2966308352], [68.4724731249, 26.66064455680001, 139.3305053595, 111.37066649600001]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00049152.jpg", "text": "Tell me what you see within the designated area in the picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[96.0845337088, 610.4836425984, 163.8355713024, 709.3374023424]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049152_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[17.084533708799995, 25.483642598400024, 84.83557130240001, 124.33740234239997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049152.jpg", "text": "Tell me what you see within the designated area in the picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four people, and a backpack.", "boxes_value": [[96.0845337088, 610.4836425984, 163.8355713024, 709.3374023424], [75.1082763776, 608.3137207295999, 111.0332031488, 702.5864257536], [96.0845337088, 614.8236083712001, 125.7407226368, 708.3730468608], [126.2229003776, 610.4836425984, 153.2268676608, 709.3374023424], [146.4758910976, 613.3769531135999, 163.8355713024, 676.5468750336], [124.4776001024, 639.6025390847999, 145.2857666048, 663.5136718848]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049152_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four people, and a backpack.", "boxes_value": [[17.084533708799995, 25.483642598400024, 84.83557130240001, 124.33740234239997], [0, 23.313720729599936, 32.0332031488, 117.5864257536], [17.084533708799995, 29.823608371200066, 46.7407226368, 123.37304686079995], [47.2229003776, 25.483642598400024, 74.2268676608, 124.33740234239997], [67.4758910976, 28.37695311359994, 84.83557130240001, 91.54687503360003], [45.477600102400004, 54.6025390847999, 66.28576660479999, 78.51367188480003]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049153.jpg", "text": "Please provide information about the area within the bounding box in the picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[133.12121584000002, 330.3280029184, 291.55334472, 492.473449728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049153_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[40.12121584000002, 41.32800291839999, 198.55334471999998, 203.473449728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049153.jpg", "text": "Please provide information about the area within the bounding box in the picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four people, and a handbag.", "boxes_value": [[133.12121584000002, 330.3280029184, 291.55334472, 492.473449728], [274.84375, 350.1320190464, 311.97631832, 472.6694336], [253.18310544, 330.3280029184, 291.55334472, 454.7220459008], [178.29913328, 337.7545165824, 225.33361816, 478.8581542912], [133.12121584000002, 343.3244018688, 184.487854, 492.473449728], [172.1090088, 420.0886840832, 199.8773804, 458.4955444224]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049153_crop.jpg", "text": "Please provide information about the area within the bounding box in the picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four people, and a handbag.", "boxes_value": [[40.12121584000002, 41.32800291839999, 198.55334471999998, 203.473449728], [181.84375, 61.13201904639999, 218.97631832000002, 183.6694336], [160.18310544, 41.32800291839999, 198.55334471999998, 165.72204590080003], [85.29913328, 48.754516582400015, 132.33361816, 189.85815429119998], [40.12121584000002, 54.32440186880001, 91.487854, 203.473449728], [79.1090088, 131.0886840832, 106.87738039999999, 169.49554442239997]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049156.jpg", "text": "Please provide details for the area within the bounding box in . Provide the coordinates for each element you describe.", "boxes_value": [[0.1268921112, 273.4902953984, 682.4223632682, 510.64916992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049156_crop.jpg", "text": "Please provide details for the area within the bounding box in . Provide the coordinates for each element you describe.", "boxes_value": [[0.1268921112, 59.49029539840001, 682.4223632682, 296.64916992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049156.jpg", "text": "Please provide details for the area within the bounding box in . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a couch, a chair, a desk, a cabinet, a picture, a stool, and two stuffed toys.", "boxes_value": [[0.1268921112, 273.4902953984, 682.4223632682, 510.64916992], [266.0118408104, 193.0273437696, 679.3923339643001, 446.9727783424], [420.4062499695, 396.2618408448, 648.0363769329999, 506.756042496], [380.9940185511, 470.7249145344, 682.4223632682, 510.64916992], [105.278259288, 270.8153686528, 246.43170165090004, 353.24468992], [0.1268921112, 350.436584448, 56.385803254699994, 424.6186523648], [9.4669799472, 407.1917114368, 58.847167988, 455.3374633984], [244.2410278488, 273.4902953984, 271.5274658435, 315.3945312256], [261.2950439715, 271.0540161024, 288.09423830369997, 309.0601806848]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6], [7, 8]]}, {"image_path": "objects365_v1_00049156_crop.jpg", "text": "Please provide details for the area within the bounding box in . Provide the coordinates for each element you describe. For your reference, objects involved in this region include a couch, a chair, a desk, a cabinet, a picture, a stool, and two stuffed toys.", "boxes_value": [[0.1268921112, 59.49029539840001, 682.4223632682, 296.64916992], [266.0118408104, 0, 679.3923339643001, 232.97277834239998], [420.4062499695, 182.26184084480002, 648.0363769329999, 292.756042496], [380.9940185511, 256.7249145344, 682.4223632682, 296.64916992], [105.278259288, 56.815368652799975, 246.43170165090004, 139.24468991999998], [0.1268921112, 136.43658444800002, 56.385803254699994, 210.61865236480003], [9.4669799472, 193.1917114368, 58.847167988, 241.33746339840002], [244.2410278488, 59.49029539840001, 271.5274658435, 101.39453122560002], [261.2950439715, 57.0540161024, 288.09423830369997, 95.0601806848]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6], [7, 8]]}, {"image_path": "objects365_v1_00049157.jpg", "text": "What's the story in the section of the included visual ? Give coordinates for the items you reference.", "boxes_value": [[375.50402832919997, 168.4090576384, 671.706665049, 352.7449032704]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049157_crop.jpg", "text": "What's the story in the section of the included visual ? Give coordinates for the items you reference.", "boxes_value": [[74.50402832919997, 46.409057638399986, 370.706665049, 230.74490327040002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049157.jpg", "text": "What's the story in the section of the included visual ? Give coordinates for the items you reference. For your reference, objects involved in this region include two people, two leather shoes, a hat, a helmet, two sports cars, and a car.", "boxes_value": [[375.50402832919997, 168.4090576384, 671.706665049, 352.7449032704], [375.1236572412, 169.3114013696, 426.28247066939997, 350.4413452288], [612.2518310202, 175.53338624, 671.706665049, 275.0857544192], [397.9978027054, 337.4070434816, 425.74023435960004, 350.2112427008], [375.5905761414, 337.8643188224, 405.31457518720003, 349.9063720448], [375.50402832919997, 168.4090576384, 404.8824463218, 183.6228637696], [557.0853271166001, 212.236816384, 579.942749039, 240.3453369344], [580.3980713120001, 210.211035392, 765.977172817, 377.7752410624], [438.50242316900005, 201.7399291904, 644.307422613, 352.7449032704], [566.7426757934, 200.1101074432, 765.977172817, 277.994323712]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6], [7, 8], [9]]}, {"image_path": "objects365_v1_00049157_crop.jpg", "text": "What's the story in the section of the included visual ? Give coordinates for the items you reference. For your reference, objects involved in this region include two people, two leather shoes, a hat, a helmet, two sports cars, and a car.", "boxes_value": [[74.50402832919997, 46.409057638399986, 370.706665049, 230.74490327040002], [74.12365724120002, 47.31140136959999, 125.28247066939997, 228.4413452288], [311.25183102020003, 53.53338624, 370.706665049, 153.0857544192], [96.99780270539998, 215.4070434816, 124.74023435960004, 228.21124270080003], [74.59057614139999, 215.86431882239998, 104.31457518720003, 227.90637204479998], [74.50402832919997, 46.409057638399986, 103.88244632179999, 61.6228637696], [256.08532711660007, 90.23681638400001, 278.942749039, 118.3453369344], [279.39807131200007, 88.21103539200001, 444, 255.77524106240003], [137.50242316900005, 79.7399291904, 343.30742261299997, 230.74490327040002], [265.74267579340005, 78.11010744320001, 444, 155.99432371199998]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6], [7, 8], [9]]}, {"image_path": "objects365_v1_00049158.jpg", "text": "In the image , elaborate on the details found within the section . Include the coordinates for each object you identify.", "boxes_value": [[1.0352782848, 238.560363776, 417.79833984000004, 510.4368896512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049158_crop.jpg", "text": "In the image , elaborate on the details found within the section . Include the coordinates for each object you identify.", "boxes_value": [[1.0352782848, 68.560363776, 417.79833984000004, 340.4368896512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049158.jpg", "text": "In the image , elaborate on the details found within the section . Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a chair, two pillows, and a vase.", "boxes_value": [[1.0352782848, 238.560363776, 417.79833984000004, 510.4368896512], [244.19158932480002, 340.2492675584, 417.79833984000004, 507.40216064], [174.5411986944, 248.6505737216, 313.338012672, 408.5929565184], [357.1976318208, 249.4064331264, 434.85607910399995, 318.9754638848], [183.72973632, 238.560363776, 260.0410156032, 313.8018798592], [1.0352782848, 421.37475584, 134.6285400576, 510.4368896512]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049158_crop.jpg", "text": "In the image , elaborate on the details found within the section . Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a chair, two pillows, and a vase.", "boxes_value": [[1.0352782848, 68.560363776, 417.79833984000004, 340.4368896512], [244.19158932480002, 170.2492675584, 417.79833984000004, 337.40216064], [174.5411986944, 78.65057372160001, 313.338012672, 238.5929565184], [357.1976318208, 79.4064331264, 434.85607910399995, 148.97546388479998], [183.72973632, 68.560363776, 260.0410156032, 143.80187985920003], [1.0352782848, 251.37475583999998, 134.6285400576, 340.4368896512]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049161.jpg", "text": "Describe the bbox in the provided photo . Give coordinates for the items you reference.", "boxes_value": [[114.9003296256, 195.5147704832, 310.9931640576, 349.9046020608]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049161_crop.jpg", "text": "Describe the bbox in the provided photo . Give coordinates for the items you reference.", "boxes_value": [[49.900329625599994, 39.51477048320001, 245.99316405759998, 193.90460206080002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049161.jpg", "text": "Describe the bbox in the provided photo . Give coordinates for the items you reference. For your reference, objects involved in this region include four benches, and a truck.", "boxes_value": [[114.9003296256, 195.5147704832, 310.9931640576, 349.9046020608], [114.9003296256, 283.0270385664, 139.3441161984, 316.6372680704], [130.17773437440002, 287.1010131968, 156.6585083136, 323.257507328], [209.62011717119998, 302.8876342784, 248.2255859712, 335.479431168], [240.5289917184, 310.7802123776, 286.0672607232, 349.9046020608], [279.0046386432, 195.5147704832, 310.9931640576, 206.734619136]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049161_crop.jpg", "text": "Describe the bbox in the provided photo . Give coordinates for the items you reference. For your reference, objects involved in this region include four benches, and a truck.", "boxes_value": [[49.900329625599994, 39.51477048320001, 245.99316405759998, 193.90460206080002], [49.900329625599994, 127.0270385664, 74.3441161984, 160.6372680704], [65.17773437440002, 131.1010131968, 91.65850831360001, 167.25750732799997], [144.62011717119998, 146.88763427840001, 183.2255859712, 179.47943116800002], [175.5289917184, 154.78021237759998, 221.06726072319998, 193.90460206080002], [214.0046386432, 39.51477048320001, 245.99316405759998, 50.73461913599999]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049164.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Include the coordinates for each mentioned object.", "boxes_value": [[144.80761721529998, 139.0891723776, 283.61236573499997, 201.5565185536]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049164_crop.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Include the coordinates for each mentioned object.", "boxes_value": [[34.807617215299985, 16.089172377599994, 173.61236573499997, 78.5565185536]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049164.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two benches, three suvs, and a car.", "boxes_value": [[144.80761721529998, 139.0891723776, 283.61236573499997, 201.5565185536], [215.29351807679998, 186.80548096, 258.9948730437, 201.5565185536], [261.2075805624, 187.1737670656, 295.5048217707, 201.5565185536], [144.80761721529998, 154.1249389568, 183.9761352463, 178.8884887552], [161.4521484059, 139.0891723776, 181.15203857010002, 158.5115356672], [172.42895510780002, 143.5031128064, 202.15716550270002, 162.053527808], [246.8683471708, 144.5733642752, 283.61236573499997, 162.4102782976]], "boxes_seq": [[0], [0], [1, 2], [3, 5, 6], [4]]}, {"image_path": "objects365_v1_00049164_crop.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two benches, three suvs, and a car.", "boxes_value": [[34.807617215299985, 16.089172377599994, 173.61236573499997, 78.5565185536], [105.29351807679998, 63.80548096000001, 148.9948730437, 78.5565185536], [151.20758056239998, 64.1737670656, 185.5048217707, 78.5565185536], [34.807617215299985, 31.12493895680001, 73.9761352463, 55.8884887552], [51.452148405900004, 16.089172377599994, 71.15203857010002, 35.51153566720001], [62.42895510780002, 20.503112806399997, 92.15716550270002, 39.05352780800001], [136.8683471708, 21.573364275199992, 173.61236573499997, 39.4102782976]], "boxes_seq": [[0], [0], [1, 2], [3, 5, 6], [4]]}, {"image_path": "objects365_v1_00049165.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Specify the location of each mentioned object.", "boxes_value": [[389.6750488203, 268.6177368064, 535.4870605605, 403.0534667776]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049165_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Specify the location of each mentioned object.", "boxes_value": [[36.67504882029999, 33.61773680639999, 182.48706056050003, 168.05346677760002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049165.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include a paddle, two people, and two boats.", "boxes_value": [[389.6750488203, 268.6177368064, 535.4870605605, 403.0534667776], [389.6750488203, 268.6177368064, 535.4870605605, 346.822692864], [437.9919433668, 284.0708007936, 487.2746582334, 327.9240112128], [439.5783691134, 362.9591064576, 486.11645507789996, 403.0534667776], [430.4166259716, 321.3398437376, 490.6827392373, 349.9052734464], [439.4739990069, 347.8151245312, 484.0639648407, 373.9420166144]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049165_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include a paddle, two people, and two boats.", "boxes_value": [[36.67504882029999, 33.61773680639999, 182.48706056050003, 168.05346677760002], [36.67504882029999, 33.61773680639999, 182.48706056050003, 111.82269286399998], [84.9919433668, 49.07080079359997, 134.27465823339998, 92.9240112128], [86.57836911340002, 127.9591064576, 133.11645507789996, 168.05346677760002], [77.41662597160001, 86.3398437376, 137.68273923729998, 114.90527344639997], [86.47399900689999, 112.81512453120001, 131.0639648407, 138.94201661440002]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049166.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Specify the location of each mentioned object.", "boxes_value": [[197.1189575168, 8.6642456064, 307.7803344896, 273.186340352]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049166_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Specify the location of each mentioned object.", "boxes_value": [[28.11895751680001, 8.6642456064, 138.78033448960002, 273.186340352]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049166.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include a person, a barrel, a sneakers, and two leather shoes.", "boxes_value": [[197.1189575168, 8.6642456064, 307.7803344896, 273.186340352], [197.1189575168, 8.6642456064, 307.7803344896, 273.186340352], [255.3807983616, 220.2470703104, 307.2375488512, 256.4547729408], [177.5194092032, 242.4151001088, 220.1683959808, 271.5552978432], [220.5543213056, 250.906249984, 245.448974592, 265.1868896256], [271.308532736, 258.8185424896, 295.0452880896, 271.9412841984]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00049166_crop.jpg", "text": "What is happening within the rectangle defined by coordinates in the image ? Specify the location of each mentioned object. For your reference, objects involved in this region include a person, a barrel, a sneakers, and two leather shoes.", "boxes_value": [[28.11895751680001, 8.6642456064, 138.78033448960002, 273.186340352], [28.11895751680001, 8.6642456064, 138.78033448960002, 273.186340352], [86.3807983616, 220.2470703104, 138.23754885120002, 256.4547729408], [8.519409203200013, 242.4151001088, 51.1683959808, 271.5552978432], [51.55432130560001, 250.906249984, 76.44897459200001, 265.1868896256], [102.30853273600002, 258.8185424896, 126.04528808959998, 271.9412841984]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00049168.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Include the coordinates for each mentioned object.", "boxes_value": [[271.5153198496, 191.35565184, 682.4667968576999, 386.3818359296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049168_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Include the coordinates for each mentioned object.", "boxes_value": [[103.51531984960002, 49.35565184000001, 514.4667968576999, 244.38183592960002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049168.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two chairs, four storage boxes, and a moniter.", "boxes_value": [[271.5153198496, 191.35565184, 682.4667968576999, 386.3818359296], [271.5153198496, 191.35565184, 397.67468262729994, 386.3818359296], [395.35986326190005, 191.9343261696, 508.20886227709997, 367.8630371328], [621.0076904345001, 339.8055419904, 682.7919922134, 400.2891235328], [627.1861572007, 299.1579589632, 682.4667968576999, 360.291931136], [641.1484374966, 270.2498168832, 682.4794921885, 313.4921874944], [644.7320556487, 227.2463989248, 682.7183838008, 278.8505248768], [646.0086669992, 147.3772582912, 683.0260009903999, 233.9599609344]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6], [7]]}, {"image_path": "objects365_v1_00049168_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two chairs, four storage boxes, and a moniter.", "boxes_value": [[103.51531984960002, 49.35565184000001, 514.4667968576999, 244.38183592960002], [103.51531984960002, 49.35565184000001, 229.67468262729994, 244.38183592960002], [227.35986326190005, 49.93432616960001, 340.20886227709997, 225.8630371328], [453.0076904345001, 197.8055419904, 514.7919922134, 258.2891235328], [459.1861572007, 157.15795896319997, 514.4667968576999, 218.29193113600002], [473.14843749659997, 128.24981688320003, 514.4794921885, 171.4921874944], [476.7320556487, 85.24639892479999, 514.7183838008, 136.85052487680002], [478.00866699920005, 5.3772582912000075, 515, 91.95996093439999]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6], [7]]}, {"image_path": "objects365_v1_00049169.jpg", "text": "Please share details about the rectangular region within the image . Specify the location of each mentioned object.", "boxes_value": [[0.047302257800000004, 240.9365234176, 497.11437986600004, 352.6668701184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049169_crop.jpg", "text": "Please share details about the rectangular region within the image . Specify the location of each mentioned object.", "boxes_value": [[0.047302257800000004, 27.9365234176, 497.11437986600004, 139.66687011840003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049169.jpg", "text": "Please share details about the rectangular region within the image . Specify the location of each mentioned object. For your reference, objects involved in this region include five people.", "boxes_value": [[0.047302257800000004, 240.9365234176, 497.11437986600004, 352.6668701184], [37.4604492286, 311.8907470848, 79.4976806376, 352.6668701184], [0.047302257800000004, 290.872131328, 24.0085449173, 336.6926879744], [413.3475341693, 241.3844604416, 445.5999755544, 262.886108416], [449.6315917792, 233.3213500928, 469.341430658, 262.886108416], [478.7484130849, 240.9365234176, 497.11437986600004, 274.9807739392]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049169_crop.jpg", "text": "Please share details about the rectangular region within the image . Specify the location of each mentioned object. For your reference, objects involved in this region include five people.", "boxes_value": [[0.047302257800000004, 27.9365234176, 497.11437986600004, 139.66687011840003], [37.4604492286, 98.89074708480001, 79.4976806376, 139.66687011840003], [0.047302257800000004, 77.87213132800002, 24.0085449173, 123.69268797439997], [413.3475341693, 28.384460441599998, 445.5999755544, 49.88610841600001], [449.6315917792, 20.321350092800003, 469.341430658, 49.88610841600001], [478.7484130849, 27.9365234176, 497.11437986600004, 61.98077393919999]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049171.jpg", "text": "In the image , please describe the bounding box . Please point out the objects and their coordinates.", "boxes_value": [[221.3477172736, 427.9997558625, 434.5464477696, 577.8555908174001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049171_crop.jpg", "text": "In the image , please describe the bounding box . Please point out the objects and their coordinates.", "boxes_value": [[53.34771727360001, 37.999755862500024, 266.5464477696, 187.85559081740007]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049171.jpg", "text": "In the image , please describe the bounding box . Please point out the objects and their coordinates. For your reference, objects involved in this region include five sneakers.", "boxes_value": [[221.3477172736, 427.9997558625, 434.5464477696, 577.8555908174001], [221.3477172736, 538.1987304501, 248.7469482496, 577.8555908174001], [286.0399170048, 513.683593778, 308.2889404416, 551.1773681474999], [303.7291259904, 427.9997558625, 325.1745605632, 485.47363282550003], [341.4731445248, 453.30541988830004, 358.6295165952, 472.60632324019997], [415.6744384512, 489.3337402373, 434.5464477696, 509.06359866569994]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049171_crop.jpg", "text": "In the image , please describe the bounding box . Please point out the objects and their coordinates. For your reference, objects involved in this region include five sneakers.", "boxes_value": [[53.34771727360001, 37.999755862500024, 266.5464477696, 187.85559081740007], [53.34771727360001, 148.19873045010002, 80.7469482496, 187.85559081740007], [118.03991700479997, 123.68359377800004, 140.2889404416, 161.17736814749992], [135.7291259904, 37.999755862500024, 157.17456056319998, 95.47363282550003], [173.47314452479998, 63.30541988830004, 190.62951659520002, 82.60632324019997], [247.6744384512, 99.33374023729999, 266.5464477696, 119.06359866569994]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049173.jpg", "text": "Please provide insights on the specified area within the graphic . Remember to mention the objects and their corresponding locations.", "boxes_value": [[284.7216797188, 0, 682.5200195644, 469.2934570496001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049173_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Remember to mention the objects and their corresponding locations.", "boxes_value": [[99.72167971879998, 0, 497.52001956439995, 469.2934570496001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049173.jpg", "text": "Please provide insights on the specified area within the graphic . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a picture, a chair, a bed, two lamps, a bowl, and a cup.", "boxes_value": [[284.7216797188, 0, 682.5200195644, 469.2934570496001], [338.9141235471, 0.2442016768, 471.1750488619, 68.5769653248], [475.94738767060005, 204.7714843648, 682.5200195644, 469.2934570496001], [0.0805664068, 311.8074340864, 586.3920898125999, 512.9259033088], [284.7216797188, 6.7119140864, 315.7460327371, 58.4191284224], [501.8918456825, 0, 535.3493652326999, 41.3861694464], [492.38098141439997, 73.6333618176, 549.3627929398, 88.7202148352], [275.8618774145, 59.3891601408, 296.8786010724, 98.3319091712]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6], [7]]}, {"image_path": "objects365_v1_00049173_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a picture, a chair, a bed, two lamps, a bowl, and a cup.", "boxes_value": [[99.72167971879998, 0, 497.52001956439995, 469.2934570496001], [153.91412354710002, 0.2442016768, 286.1750488619, 68.5769653248], [290.94738767060005, 204.7714843648, 497.52001956439995, 469.2934570496001], [0, 311.8074340864, 401.3920898125999, 512], [99.72167971879998, 6.7119140864, 130.7460327371, 58.4191284224], [316.8918456825, 0, 350.34936523269994, 41.3861694464], [307.38098141439997, 73.6333618176, 364.3627929398, 88.7202148352], [90.86187741449999, 59.3891601408, 111.87860107239999, 98.3319091712]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5], [6], [7]]}, {"image_path": "objects365_v1_00049175.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Provide the coordinates for all objects that you mention.", "boxes_value": [[44.8856201216, 461.864501937, 396.4237670912, 660.1347655964]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049175_crop.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Provide the coordinates for all objects that you mention.", "boxes_value": [[44.8856201216, 49.864501937, 396.4237670912, 248.13476559640003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049175.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four lamps, and a picture.", "boxes_value": [[44.8856201216, 461.864501937, 396.4237670912, 660.1347655964], [266.7392578048, 480.2932129229, 349.027709952, 606.8708496133], [253.7256469504, 461.864501937, 307.9420776448, 544.0288086149], [95.9461059584, 484.3544921792, 157.2476196352, 646.8404541097], [44.8856201216, 473.2381591638, 105.547546368, 660.1347655964], [352.5166625792, 469.7050780941, 396.4237670912, 488.19226071]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049175_crop.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four lamps, and a picture.", "boxes_value": [[44.8856201216, 49.864501937, 396.4237670912, 248.13476559640003], [266.7392578048, 68.29321292290001, 349.027709952, 194.87084961330004], [253.7256469504, 49.864501937, 307.9420776448, 132.02880861489996], [95.9461059584, 72.35449217920001, 157.2476196352, 234.84045410969998], [44.8856201216, 61.23815916379999, 105.547546368, 248.13476559640003], [352.5166625792, 57.705078094099974, 396.4237670912, 76.19226071000003]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049176.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Give coordinates for the items you reference.", "boxes_value": [[261.4354247863, 275.350402816, 386.6666259434, 334.90216064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049176_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Give coordinates for the items you reference.", "boxes_value": [[31.43542478630002, 15.350402815999985, 156.6666259434, 74.90216063999998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049176.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Give coordinates for the items you reference. For your reference, objects involved in this region include three people, a backpack, and a handbag.", "boxes_value": [[261.4354247863, 275.350402816, 386.6666259434, 334.90216064], [266.844421362, 255.7058105344, 309.00836183819996, 394.1760253952], [262.5321655597, 272.9547119104, 280.260192856, 364.4696045056], [306.13354489899996, 275.350402816, 323.8615722636, 297.390625024], [372.2896728523, 287.9877319168, 386.6666259434, 313.7149658112], [261.4354247863, 279.664184576, 291.3244628987, 334.90216064]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049176_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Give coordinates for the items you reference. For your reference, objects involved in this region include three people, a backpack, and a handbag.", "boxes_value": [[31.43542478630002, 15.350402815999985, 156.6666259434, 74.90216063999998], [36.84442136199999, 0, 79.00836183819996, 89], [32.53216555969999, 12.954711910400022, 50.260192856, 89], [76.13354489899996, 15.350402815999985, 93.86157226360001, 37.390625023999974], [142.2896728523, 27.987731916799987, 156.6666259434, 53.71496581119999], [31.43542478630002, 19.664184576000025, 61.324462898700006, 74.90216063999998]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049177.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Include the coordinates for each object you identify.", "boxes_value": [[416.32739254710003, 251.5789794816, 574.1557617179001, 492.4365234176]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049177_crop.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Include the coordinates for each object you identify.", "boxes_value": [[40.32739254710003, 60.57897948159999, 198.15576171790008, 301.4365234176]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049177.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a storage box, a person, a bracelet, and a pen.", "boxes_value": [[416.32739254710003, 251.5789794816, 574.1557617179001, 492.4365234176], [0, 225.886108416, 623.4384765748, 511.030761728], [522.2672119187, 251.5789794816, 559.1994628924, 335.5159301632], [416.60839841750004, 38.5369872896, 682.7426757886001, 511.7624511488], [549.870849593, 444.0795898368, 574.1557617179001, 492.4365234176], [416.32739254710003, 317.0076294144, 481.0404052926, 381.6774291968]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049177_crop.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a storage box, a person, a bracelet, and a pen.", "boxes_value": [[40.32739254710003, 60.57897948159999, 198.15576171790008, 301.4365234176], [0, 34.88610841600001, 237, 320.030761728], [146.2672119187, 60.57897948159999, 183.1994628924, 144.51593016319998], [40.60839841750004, 0, 237, 320.7624511488], [173.870849593, 253.07958983679998, 198.15576171790008, 301.4365234176], [40.32739254710003, 126.00762941440001, 105.04040529259998, 190.6774291968]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049179.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[456.2324218523, 312.119567872, 557.2750244027, 409.32940672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049179_crop.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[26.23242185229998, 25.119567872000005, 127.27502440269996, 122.32940672000001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049179.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three people, a flag, and a handbag.", "boxes_value": [[456.2324218523, 312.119567872, 557.2750244027, 409.32940672], [456.2324218523, 341.2355346432, 472.8636474516, 368.8496093696], [492.9466552646, 336.21478272, 514.2847900483999, 372.9289550848], [518.050415065, 329.3112182784, 557.2750244027, 409.32940672], [481.9005127249, 312.119567872, 491.6939697456, 348.845153792], [525.6406250049, 356.5387573248, 541.2241210912, 377.5676879872]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049179_crop.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three people, a flag, and a handbag.", "boxes_value": [[26.23242185229998, 25.119567872000005, 127.27502440269996, 122.32940672000001], [26.23242185229998, 54.235534643200026, 42.8636474516, 81.84960936959999], [62.9466552646, 49.21478272000002, 84.28479004839994, 85.92895508480001], [88.05041506500004, 42.31121827840002, 127.27502440269996, 122.32940672000001], [51.90051272490001, 25.119567872000005, 61.693969745599986, 61.84515379200002], [95.64062500490002, 69.53875732479997, 111.2241210912, 90.56768798719997]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049180.jpg", "text": "Please describe the area in the image for me. Include the coordinates for each object you identify.", "boxes_value": [[64.4247436482, 360.3794555904, 602.367431673, 513.2933349376]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049180_crop.jpg", "text": "Please describe the area in the image for me. Include the coordinates for each object you identify.", "boxes_value": [[64.4247436482, 38.37945559040003, 602.367431673, 190]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049180.jpg", "text": "Please describe the area in the image for me. Include the coordinates for each object you identify. For your reference, objects involved in this region include two chairs, a desk, and three carpets.", "boxes_value": [[64.4247436482, 360.3794555904, 602.367431673, 513.2933349376], [323.5185547059, 360.3794555904, 503.836547826, 513.0057373184], [486.4487304793, 429.286682112, 602.367431673, 511.717712384], [404.0648193578, 352.6515503104, 623.619140593, 513.6995849728], [169.4529418834, 481.1144409088, 292.8052978743, 512.3994140672], [64.4247436482, 494.969238272, 179.28540038970002, 512.3994140672], [291.4644775572, 466.8127441408, 348.6713866903, 513.2933349376]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00049180_crop.jpg", "text": "Please describe the area in the image for me. Include the coordinates for each object you identify. For your reference, objects involved in this region include two chairs, a desk, and three carpets.", "boxes_value": [[64.4247436482, 38.37945559040003, 602.367431673, 190], [323.5185547059, 38.37945559040003, 503.836547826, 190], [486.4487304793, 107.286682112, 602.367431673, 189.71771238399998], [404.0648193578, 30.65155031040001, 623.619140593, 190], [169.4529418834, 159.11444090880002, 292.8052978743, 190], [64.4247436482, 172.96923827199998, 179.28540038970002, 190], [291.4644775572, 144.81274414080002, 348.6713866903, 190]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00049182.jpg", "text": "Please tell me more about the rectangular section in the photo . Provide the coordinates for all objects that you mention.", "boxes_value": [[129.3600463872, 173.13787845119998, 271.9051513856, 537.1448974848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049182_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Provide the coordinates for all objects that you mention.", "boxes_value": [[36.360046387199986, 91.13787845119998, 178.9051513856, 455.1448974848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049182.jpg", "text": "Please tell me more about the rectangular section in the photo . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two chairs, a cabinet, a bowl, and a bottle.", "boxes_value": [[129.3600463872, 173.13787845119998, 271.9051513856, 537.1448974848], [167.13488768, 358.4812012032, 271.9051513856, 537.1448974848], [195.7767944192, 347.6782226688, 283.4739990016, 510.3680419584], [129.3600463872, 173.13787845119998, 249.4798583808, 320.4675293184], [129.4503783936, 342.4770508032, 200.2892455936, 361.6173095424], [183.3648071168, 253.866577152, 202.755615232, 311.4331054848]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049182_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two chairs, a cabinet, a bowl, and a bottle.", "boxes_value": [[36.360046387199986, 91.13787845119998, 178.9051513856, 455.1448974848], [74.13488767999999, 276.4812012032, 178.9051513856, 455.1448974848], [102.7767944192, 265.6782226688, 190.47399900160002, 428.3680419584], [36.360046387199986, 91.13787845119998, 156.4798583808, 238.4675293184], [36.450378393600005, 260.4770508032, 107.28924559359999, 279.6173095424], [90.3648071168, 171.866577152, 109.755615232, 229.43310548480002]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049190.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each object you identify.", "boxes_value": [[446.9797362969, 169.1538696192, 539.0874023767001, 453.100769024]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049190_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each object you identify.", "boxes_value": [[23.9797362969, 71.15386961920001, 116.08740237670008, 355.100769024]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049190.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a chair, two cups, and a bottle.", "boxes_value": [[446.9797362969, 169.1538696192, 539.0874023767001, 453.100769024], [54.199707055199994, 252.1636962816, 585.1081543116, 510.99426268159993], [342.5817871031, 315.3427734528, 551.4805908467999, 512.0133056512], [476.3875732478, 212.1195068416, 512.8583984371, 292.8049316352], [501.3675537095, 169.1538696192, 539.0874023767001, 281.5639037952], [446.9797362969, 435.3402099712, 466.42443846879996, 453.100769024]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4]]}, {"image_path": "objects365_v1_00049190_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a chair, two cups, and a bottle.", "boxes_value": [[23.9797362969, 71.15386961920001, 116.08740237670008, 355.100769024], [0, 154.1636962816, 139, 412.99426268159993], [0, 217.34277345279997, 128.48059084679994, 414], [53.38757324779999, 114.11950684160001, 89.85839843710005, 194.80493163519998], [78.36755370949999, 71.15386961920001, 116.08740237670008, 183.56390379520002], [23.9797362969, 337.3402099712, 43.42443846879996, 355.100769024]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4]]}, {"image_path": "objects365_v1_00049191.jpg", "text": "Describe the visual elements within the selected area of the image . Give coordinates for the items you reference.", "boxes_value": [[248.9761963, 273.04681395, 356.12805175, 328.2177124]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049191_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Give coordinates for the items you reference.", "boxes_value": [[26.976196299999998, 14.04681395, 134.12805175, 69.21771239999998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049191.jpg", "text": "Describe the visual elements within the selected area of the image . Give coordinates for the items you reference. For your reference, objects involved in this region include a couch, a pillow, two potted plants, a cup, and a bottle.", "boxes_value": [[248.9761963, 273.04681395, 356.12805175, 328.2177124], [166.6986084, 264.96698000000004, 313.76428225, 354.3864746], [265.74053955, 280.63134764999995, 304.5911865, 316.48065185], [304.60980225000003, 273.04681395, 356.12805175, 326.75335695], [291.9523926, 171.7302246, 378.2111206, 321.7147217], [248.9761963, 306.5802002, 265.68640135, 328.2177124], [253.26086425, 294.5831299, 266.11486815, 318.36303710000004]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00049191_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Give coordinates for the items you reference. For your reference, objects involved in this region include a couch, a pillow, two potted plants, a cup, and a bottle.", "boxes_value": [[26.976196299999998, 14.04681395, 134.12805175, 69.21771239999998], [0, 5.966980000000035, 91.76428225000001, 83], [43.740539549999994, 21.631347649999952, 82.59118649999999, 57.480651850000015], [82.60980225000003, 14.04681395, 134.12805175, 67.75335695000001], [69.9523926, 0, 156.21112060000002, 62.714721699999984], [26.976196299999998, 47.58020019999998, 43.68640134999998, 69.21771239999998], [31.260864249999997, 35.58312990000002, 44.11486815000001, 59.36303710000004]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00049192.jpg", "text": "Please, can you help me understand what's inside the region in image ? Please mention the objects and their locations.", "boxes_value": [[205.299194312, 3.4132690432, 389.201477035, 171.2407226368]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049192_crop.jpg", "text": "Please, can you help me understand what's inside the region in image ? Please mention the objects and their locations.", "boxes_value": [[46.299194312, 3.4132690432, 230.20147703499998, 171.2407226368]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049192.jpg", "text": "Please, can you help me understand what's inside the region in image ? Please mention the objects and their locations. For your reference, objects involved in this region include a cabinet, a mirror, a flower, a vase, a lamp, and a car.", "boxes_value": [[205.299194312, 3.4132690432, 389.201477035, 171.2407226368], [196.93139647799998, 27.4364013568, 262.823425309, 178.438903808], [258.70513917299996, 3.4132690432, 336.9519043, 167.4569091584], [269.163330094, 3.6915283456, 389.201477035, 126.5083618304], [318.623474139, 114.282287616, 339.74133299, 159.2965698048], [205.299194312, 148.796447744, 223.66265869199998, 171.2407226368], [274.447570809, 119.6234130944, 325.416503908, 163.7697754112]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00049192_crop.jpg", "text": "Please, can you help me understand what's inside the region in image ? Please mention the objects and their locations. For your reference, objects involved in this region include a cabinet, a mirror, a flower, a vase, a lamp, and a car.", "boxes_value": [[46.299194312, 3.4132690432, 230.20147703499998, 171.2407226368], [37.93139647799998, 27.4364013568, 103.82342530900002, 178.438903808], [99.70513917299996, 3.4132690432, 177.95190430000002, 167.4569091584], [110.163330094, 3.6915283456, 230.20147703499998, 126.5083618304], [159.623474139, 114.282287616, 180.74133299, 159.2965698048], [46.299194312, 148.796447744, 64.66265869199998, 171.2407226368], [115.44757080900001, 119.6234130944, 166.41650390799998, 163.7697754112]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00049195.jpg", "text": "In , what elements can be found within the coordinates ? Please mention the objects and their locations.", "boxes_value": [[287.0662841678, 80.8364868096, 491.5137939358, 455.3631591936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049195_crop.jpg", "text": "In , what elements can be found within the coordinates ? Please mention the objects and their locations.", "boxes_value": [[52.066284167800006, 80.8364868096, 256.5137939358, 455.3631591936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049195.jpg", "text": "In , what elements can be found within the coordinates ? Please mention the objects and their locations. For your reference, objects involved in this region include a baseball bat, a baseball glove, a person, a helmet, and two sneakers.", "boxes_value": [[287.0662841678, 80.8364868096, 491.5137939358, 455.3631591936], [324.7241821396, 11.7449951232, 486.2275390852, 159.503356928], [461.4865722879, 290.0805664256, 516.4664306974, 351.9328613376], [287.0662841678, 80.8364868096, 491.5137939358, 455.3631591936], [314.8934936278, 80.0488281088, 389.9312744214, 125.3638305792], [379.6988525494, 329.0377197056, 441.0932617293, 377.7635497984], [374.3389892862, 437.6962890752, 421.1157226397, 457.186645504]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00049195_crop.jpg", "text": "In , what elements can be found within the coordinates ? Please mention the objects and their locations. For your reference, objects involved in this region include a baseball bat, a baseball glove, a person, a helmet, and two sneakers.", "boxes_value": [[52.066284167800006, 80.8364868096, 256.5137939358, 455.3631591936], [89.72418213959998, 11.7449951232, 251.22753908520002, 159.503356928], [226.4865722879, 290.0805664256, 281.4664306974, 351.9328613376], [52.066284167800006, 80.8364868096, 256.5137939358, 455.3631591936], [79.89349362780001, 80.0488281088, 154.93127442140002, 125.3638305792], [144.6988525494, 329.0377197056, 206.09326172930002, 377.7635497984], [139.3389892862, 437.6962890752, 186.11572263969998, 457.186645504]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00049196.jpg", "text": "In the image , elaborate on the details found within the section . Give coordinates for the items you reference.", "boxes_value": [[168.12060544000002, 139.112426736, 535.94543456, 257.725280784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049196_crop.jpg", "text": "In the image , elaborate on the details found within the section . Give coordinates for the items you reference.", "boxes_value": [[92.12060544000002, 30.112426736000003, 459.94543455999997, 148.725280784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049196.jpg", "text": "In the image , elaborate on the details found within the section . Give coordinates for the items you reference. For your reference, objects involved in this region include a person, a moniter, and three pictures.", "boxes_value": [[168.12060544000002, 139.112426736, 535.94543456, 257.725280784], [508.61694336, 232.2600708, 535.94543456, 257.725280784], [168.12060544000002, 139.112426736, 192.71746828800002, 233.907165504], [281.02722169599997, 165.09161376, 311.969238272, 191.449646016], [235.76013184, 161.653564464, 267.275207488, 189.73065187199998], [324.57525632, 165.66461184, 352.652282688, 192.59564207999998]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00049196_crop.jpg", "text": "In the image , elaborate on the details found within the section . Give coordinates for the items you reference. For your reference, objects involved in this region include a person, a moniter, and three pictures.", "boxes_value": [[92.12060544000002, 30.112426736000003, 459.94543455999997, 148.725280784], [432.61694336, 123.2600708, 459.94543455999997, 148.725280784], [92.12060544000002, 30.112426736000003, 116.71746828800002, 124.907165504], [205.02722169599997, 56.09161376, 235.96923827199998, 82.449646016], [159.76013184, 52.653564464, 191.27520748799998, 80.73065187199998], [248.57525632, 56.66461183999999, 276.652282688, 83.59564207999998]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00049198.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Specify the location of each mentioned object.", "boxes_value": [[1.4320068608, 1.0971679782, 252.419372544, 106.8347778064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049198_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Specify the location of each mentioned object.", "boxes_value": [[1.4320068608, 1.0971679782, 252.419372544, 106.8347778064]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049198.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Specify the location of each mentioned object. For your reference, objects involved in this region include three people, and two sneakers.", "boxes_value": [[1.4320068608, 1.0971679782, 252.419372544, 106.8347778064], [1.4320068608, 4.4320068604, 62.5679931392, 106.8347778064], [223.814086912, 0.6110229580000001, 268.1376953344, 88.4939575044], [218.3884887552, 1.0971679782, 252.419372544, 49.9779662872], [6.99983232, 82.1375784684, 39.4086023168, 100.282139811], [29.0991924736, 85.4365896216, 61.6769276416, 106.88016211739999]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049198_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Specify the location of each mentioned object. For your reference, objects involved in this region include three people, and two sneakers.", "boxes_value": [[1.4320068608, 1.0971679782, 252.419372544, 106.8347778064], [1.4320068608, 4.4320068604, 62.5679931392, 106.8347778064], [223.814086912, 0.6110229580000001, 268.1376953344, 88.4939575044], [218.3884887552, 1.0971679782, 252.419372544, 49.9779662872], [6.99983232, 82.1375784684, 39.4086023168, 100.282139811], [29.0991924736, 85.4365896216, 61.6769276416, 106.88016211739999]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049202.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Remember to mention the objects and their corresponding locations.", "boxes_value": [[0.4642333805, 128.2752685568, 346.42626954680003, 466.89019776]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049202_crop.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Remember to mention the objects and their corresponding locations.", "boxes_value": [[0.4642333805, 85.2752685568, 346.42626954680003, 423.89019776]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049202.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two desks, a lamp, a car, and three traffic signs.", "boxes_value": [[0.4642333805, 128.2752685568, 346.42626954680003, 466.89019776], [284.29931637970003, 370.0872802816, 385.3057861147, 393.2345581056], [328.3894043071, 179.0181884928, 346.42626954680003, 204.7851562496], [312.2261962631, 325.62524416, 378.1754150218, 347.7885131776], [0.4642333805, 335.46777344, 95.1196899143, 466.89019776], [45.5375976211, 160.5904541184, 144.4394531606, 191.5797118976], [162.9011230745, 198.1731567616, 201.8024902391, 211.3600463872], [166.2080688469, 128.2752685568, 246.13690184740003, 152.0117187584]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5, 6, 7]]}, {"image_path": "objects365_v1_00049202_crop.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two desks, a lamp, a car, and three traffic signs.", "boxes_value": [[0.4642333805, 85.2752685568, 346.42626954680003, 423.89019776], [284.29931637970003, 327.0872802816, 385.3057861147, 350.2345581056], [328.3894043071, 136.0181884928, 346.42626954680003, 161.7851562496], [312.2261962631, 282.62524416, 378.1754150218, 304.7885131776], [0.4642333805, 292.46777344, 95.1196899143, 423.89019776], [45.5375976211, 117.5904541184, 144.4394531606, 148.5797118976], [162.9011230745, 155.1731567616, 201.8024902391, 168.3600463872], [166.2080688469, 85.2752685568, 246.13690184740003, 109.01171875840001]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5, 6, 7]]}, {"image_path": "objects365_v1_00049204.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Please mention the objects and their locations.", "boxes_value": [[32.012512192, 50.411926271999995, 352.272216768, 324.88739016]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049204_crop.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Please mention the objects and their locations.", "boxes_value": [[32.012512192, 50.411926271999995, 352.272216768, 324.88739016]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049204.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Please mention the objects and their locations. For your reference, objects involved in this region include three people, and two street lights.", "boxes_value": [[32.012512192, 50.411926271999995, 352.272216768, 324.88739016], [318.390502912, 246.209655744, 352.272216768, 300.11242675200003], [318.38824460800004, 211.90136716799998, 327.925048832, 244.067932128], [32.012512192, 265.455505392, 46.079223616, 324.88739016], [217.40631104, 61.515869136, 232.42932128, 183.332641584], [70.11578368, 50.411926271999995, 94.936340352, 183.98583983999998]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049204_crop.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Please mention the objects and their locations. For your reference, objects involved in this region include three people, and two street lights.", "boxes_value": [[32.012512192, 50.411926271999995, 352.272216768, 324.88739016], [318.390502912, 246.209655744, 352.272216768, 300.11242675200003], [318.38824460800004, 211.90136716799998, 327.925048832, 244.067932128], [32.012512192, 265.455505392, 46.079223616, 324.88739016], [217.40631104, 61.515869136, 232.42932128, 183.332641584], [70.11578368, 50.411926271999995, 94.936340352, 183.98583983999998]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049209.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[52.965332065800006, 214.6440429568, 757.7825927661, 272.3528442368]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049209_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[52.965332065800006, 14.644042956800007, 757.7825927661, 72.35284423680002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049209.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three helmets, and two glasses.", "boxes_value": [[52.965332065800006, 214.6440429568, 757.7825927661, 272.3528442368], [681.7456054859999, 218.9214477312, 757.7825927661, 272.3528442368], [145.89874266660001, 220.4036865024, 170.6229858102, 237.856079104], [52.965332065800006, 214.6440429568, 85.7323608339, 229.7299194368], [549.4335937761, 245.2604980224, 581.4705810506999, 262.1942749184], [542.5686034791, 226.4959716864, 591.5393066286, 266.7709960704]], "boxes_seq": [[0], [0], [1, 2, 5], [3, 4]]}, {"image_path": "objects365_v1_00049209_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three helmets, and two glasses.", "boxes_value": [[52.965332065800006, 14.644042956800007, 757.7825927661, 72.35284423680002], [681.7456054859999, 18.921447731200004, 757.7825927661, 72.35284423680002], [145.89874266660001, 20.403686502400006, 170.6229858102, 37.856079104], [52.965332065800006, 14.644042956800007, 85.7323608339, 29.729919436800003], [549.4335937761, 45.260498022399986, 581.4705810506999, 62.194274918400026], [542.5686034791, 26.495971686399997, 591.5393066286, 66.77099607039997]], "boxes_seq": [[0], [0], [1, 2, 5], [3, 4]]}, {"image_path": "objects365_v1_00049211.jpg", "text": "What sort of things can be seen in the region of the photo ? Give coordinates for the items you reference.", "boxes_value": [[0, 212.19036863999997, 167.7755936256, 598.9067382528001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049211_crop.jpg", "text": "What sort of things can be seen in the region of the photo ? Give coordinates for the items you reference.", "boxes_value": [[0, 97.19036863999997, 167.7755936256, 483.9067382528001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049211.jpg", "text": "What sort of things can be seen in the region of the photo ? Give coordinates for the items you reference. For your reference, objects involved in this region include a flower, a vase, a person, a lantern, and a bakset.", "boxes_value": [[0, 212.19036863999997, 167.7755936256, 598.9067382528001], [0.4136352768, 377.4461670144, 64.8156127744, 544.3192138752], [0.0368041984, 553.3588866816, 19.5540771328, 598.9067382528001], [59.4418945536, 370.791259776, 121.6891479552, 565.0466308608001], [0, 212.19036863999997, 11.5447387648, 256.945312512], [115.2675072512, 494.2432866048, 167.7755936256, 543.19150272]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049211_crop.jpg", "text": "What sort of things can be seen in the region of the photo ? Give coordinates for the items you reference. For your reference, objects involved in this region include a flower, a vase, a person, a lantern, and a bakset.", "boxes_value": [[0, 97.19036863999997, 167.7755936256, 483.9067382528001], [0.4136352768, 262.4461670144, 64.8156127744, 429.31921387520003], [0.0368041984, 438.3588866816, 19.5540771328, 483.9067382528001], [59.4418945536, 255.791259776, 121.6891479552, 450.04663086080006], [0, 97.19036863999997, 11.5447387648, 141.945312512], [115.2675072512, 379.2432866048, 167.7755936256, 428.19150272]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049212.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Specify the location of each mentioned object.", "boxes_value": [[167.923278784, 195.647033712, 314.80157471999996, 239.60961912000002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049212_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Specify the location of each mentioned object.", "boxes_value": [[36.92327878399999, 11.647033711999995, 183.80157471999996, 55.60961912000002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049212.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Specify the location of each mentioned object. For your reference, objects involved in this region include a cabinet, a storage box, a barrel, a cup, and a bottle.", "boxes_value": [[167.923278784, 195.647033712, 314.80157471999996, 239.60961912000002], [136.68554688, 150.92657472000002, 564.993164032, 404.218872048], [267.992004416, 213.93414307199998, 314.80157471999996, 237.33892824], [167.923278784, 195.647033712, 193.08953856, 236.772399888], [243.70660403199997, 220.341064464, 264.473815936, 238.325012208], [191.039184576, 201.07250976, 202.600341824, 239.60961912000002]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049212_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Specify the location of each mentioned object. For your reference, objects involved in this region include a cabinet, a storage box, a barrel, a cup, and a bottle.", "boxes_value": [[36.92327878399999, 11.647033711999995, 183.80157471999996, 55.60961912000002], [5.685546880000004, 0, 220, 66], [136.992004416, 29.934143071999983, 183.80157471999996, 53.33892824], [36.92327878399999, 11.647033711999995, 62.089538559999994, 52.772399887999995], [112.70660403199997, 36.341064464, 133.473815936, 54.325012208000004], [60.039184576, 17.072509760000003, 71.600341824, 55.60961912000002]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049214.jpg", "text": "What can I find in the bbox of the provided image ? Specify the location of each mentioned object.", "boxes_value": [[272.2593994468, 206.2298584064, 409.2514648737, 395.438476544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049214_crop.jpg", "text": "What can I find in the bbox of the provided image ? Specify the location of each mentioned object.", "boxes_value": [[34.25939944679999, 48.22985840640001, 171.2514648737, 237.43847654400003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049214.jpg", "text": "What can I find in the bbox of the provided image ? Specify the location of each mentioned object. For your reference, objects involved in this region include a lamp, and six people.", "boxes_value": [[272.2593994468, 206.2298584064, 409.2514648737, 395.438476544], [272.2593994468, 206.2298584064, 287.7239990297, 226.9523926016], [385.78735353109994, 336.0687256064, 413.71020505200005, 395.8523559424], [330.9926147586, 243.3078613504, 349.2530517561, 322.8709716992], [347.5139160157, 239.3948974592, 378.8175049099, 345.4790649344], [379.6870117199, 240.2644653568, 409.2514648737, 302.0019531264], [346.1658935319, 338.7636108288, 372.8605956941, 395.438476544], [337.5414428509, 352.7269897216, 347.3979492408, 407.7591552512]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6, 7]]}, {"image_path": "objects365_v1_00049214_crop.jpg", "text": "What can I find in the bbox of the provided image ? Specify the location of each mentioned object. For your reference, objects involved in this region include a lamp, and six people.", "boxes_value": [[34.25939944679999, 48.22985840640001, 171.2514648737, 237.43847654400003], [34.25939944679999, 48.22985840640001, 49.723999029699996, 68.9523926016], [147.78735353109994, 178.0687256064, 175.71020505200005, 237.85235594239998], [92.99261475859998, 85.3078613504, 111.25305175609998, 164.87097169920003], [109.51391601569998, 81.3948974592, 140.81750490989998, 187.47906493440001], [141.6870117199, 82.2644653568, 171.2514648737, 144.0019531264], [108.1658935319, 180.76361082879998, 134.86059569410003, 237.43847654400003], [99.54144285090001, 194.72698972159998, 109.39794924080002, 249.7591552512]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6, 7]]}, {"image_path": "objects365_v1_00049215.jpg", "text": "Please share details about the rectangular region within the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[260.6479492132, 258.9522094592, 441.6230468664, 441.403747584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049215_crop.jpg", "text": "Please share details about the rectangular region within the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[45.64794921319998, 45.95220945919999, 226.6230468664, 228.40374758399997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049215.jpg", "text": "Please share details about the rectangular region within the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three chairs, a potted plant, and two people.", "boxes_value": [[260.6479492132, 258.9522094592, 441.6230468664, 441.403747584], [391.233886694, 288.7911376896, 441.6230468664, 386.7305908224], [331.6185912935, 287.3717041152, 372.071777343, 385.311157248], [260.6479492132, 288.7911376896, 318.1341552824, 382.4723510784], [318.5239868212, 258.9522094592, 385.15307615570003, 441.403747584], [398.0593261664, 247.4453735424, 437.4853515894, 289.7984008704], [291.7792968452, 259.7129516544, 314.8132324537, 294.6826782208]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00049215_crop.jpg", "text": "Please share details about the rectangular region within the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three chairs, a potted plant, and two people.", "boxes_value": [[45.64794921319998, 45.95220945919999, 226.6230468664, 228.40374758399997], [176.23388669399998, 75.79113768960002, 226.6230468664, 173.73059082240002], [116.6185912935, 74.37170411519998, 157.071777343, 172.31115724799997], [45.64794921319998, 75.79113768960002, 103.1341552824, 169.47235107839998], [103.52398682120003, 45.95220945919999, 170.15307615570003, 228.40374758399997], [183.0593261664, 34.445373542400006, 222.4853515894, 76.79840087039997], [76.77929684520001, 46.7129516544, 99.81323245369998, 81.68267822080003]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5, 6]]}, {"image_path": "objects365_v1_00049222.jpg", "text": "Please tell me more about the rectangular section in the photo . Please mention the objects and their locations.", "boxes_value": [[433.457275392, 268.66833494400004, 640.237670912, 480.586059552]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049222_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Please mention the objects and their locations.", "boxes_value": [[52.457275391999985, 53.66833494400004, 259, 265]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049222.jpg", "text": "Please tell me more about the rectangular section in the photo . Please mention the objects and their locations. For your reference, objects involved in this region include a person, a slippers, three chairs, and a desk.", "boxes_value": [[433.457275392, 268.66833494400004, 640.237670912, 480.586059552], [416.416015616, 188.347900368, 543.255371072, 419.643249504], [429.208007808, 397.482238752, 481.51635744, 419.673706032], [558.039184576, 321.968811024, 640.237670912, 480.586059552], [536.20532224, 268.66833494400004, 632.531494144, 389.397216816], [433.457275392, 305.27227785599996, 582.441894528, 454.899047856], [397.495483392, 270.59484864, 468.134765632, 383.617614768]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00049222_crop.jpg", "text": "Please tell me more about the rectangular section in the photo . Please mention the objects and their locations. For your reference, objects involved in this region include a person, a slippers, three chairs, and a desk.", "boxes_value": [[52.457275391999985, 53.66833494400004, 259, 265], [35.41601561599998, 0, 162.255371072, 204.64324950399998], [48.20800780799999, 182.482238752, 100.51635743999998, 204.67370603199998], [177.03918457600003, 106.96881102399999, 259, 265], [155.20532224, 53.66833494400004, 251.53149414400002, 174.39721681600003], [52.457275391999985, 90.27227785599996, 201.44189452800003, 239.89904785599998], [16.495483391999983, 55.59484864000001, 87.13476563199998, 168.617614768]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00049223.jpg", "text": "Please describe the region in the picture . Please point out the objects and their coordinates.", "boxes_value": [[529.6845702884, 181.0911865344, 647.8385009644, 213.7996826112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049223_crop.jpg", "text": "Please describe the region in the picture . Please point out the objects and their coordinates.", "boxes_value": [[29.684570288399982, 9.09118653440001, 147.83850096440005, 41.79968261120001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049223.jpg", "text": "Please describe the region in the picture . Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, two vans, and a car.", "boxes_value": [[529.6845702884, 181.0911865344, 647.8385009644, 213.7996826112], [564.3582763324, 186.479003904, 586.5063476876, 239.5001220608], [552.9486084052, 185.4722290176, 564.6938476711999, 228.0905151488], [582.9146728856, 181.0911865344, 647.8385009644, 213.7996826112], [559.321044886, 185.983825664, 575.2406005552, 208.5647583232], [529.6845702884, 191.9244995072, 554.2489013608, 207.3585205248]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049223_crop.jpg", "text": "Please describe the region in the picture . Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, two vans, and a car.", "boxes_value": [[29.684570288399982, 9.09118653440001, 147.83850096440005, 41.79968261120001], [64.35827633240001, 14.479003903999995, 86.50634768760005, 49], [52.94860840520005, 13.4722290176, 64.69384767119993, 49], [82.9146728856, 9.09118653440001, 147.83850096440005, 41.79968261120001], [59.32104488599998, 13.983825663999994, 75.24060055519999, 36.56475832320001], [29.684570288399982, 19.92449950720001, 54.248901360800005, 35.358520524800014]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049224.jpg", "text": "In the provided image , please explain the content within the region . Include the coordinates for each mentioned object.", "boxes_value": [[261.526550271, 63.561950689599996, 486.56701660199997, 349.08123777120005]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049224_crop.jpg", "text": "In the provided image , please explain the content within the region . Include the coordinates for each mentioned object.", "boxes_value": [[56.52655027100002, 63.561950689599996, 281.56701660199997, 349.08123777120005]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049224.jpg", "text": "In the provided image , please explain the content within the region . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a potted plant, a desk, two chairs, a book, and a bakset.", "boxes_value": [[261.526550271, 63.561950689599996, 486.56701660199997, 349.08123777120005], [283.409118648, 63.561950689599996, 341.977172844, 191.67956544], [131.49816891449998, 153.24426271, 479.24609374199997, 423.62475584], [329.827941879, 157.81994629279998, 501.2091064545, 442.42413331599994], [344.722534203, 125.7905273592, 486.56701660199997, 349.08123777120005], [261.526550271, 190.7761840864, 325.38897705899996, 208.5884399324], [329.79313671150004, 134.501308744, 412.74922121400004, 180.3052817676]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00049224_crop.jpg", "text": "In the provided image , please explain the content within the region . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a potted plant, a desk, two chairs, a book, and a bakset.", "boxes_value": [[56.52655027100002, 63.561950689599996, 281.56701660199997, 349.08123777120005], [78.409118648, 63.561950689599996, 136.977172844, 191.67956544], [0, 153.24426271, 274.24609374199997, 420], [124.82794187899998, 157.81994629279998, 296.2091064545, 420], [139.722534203, 125.7905273592, 281.56701660199997, 349.08123777120005], [56.52655027100002, 190.7761840864, 120.38897705899996, 208.5884399324], [124.79313671150004, 134.501308744, 207.74922121400004, 180.3052817676]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00049225.jpg", "text": "Detail the chosen region in the depicted scene . Please point out the objects and their coordinates.", "boxes_value": [[399.76464842089996, 221.2378540032, 646.1093750077, 332.7175292928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049225_crop.jpg", "text": "Detail the chosen region in the depicted scene . Please point out the objects and their coordinates.", "boxes_value": [[61.76464842089996, 28.2378540032, 308.1093750077, 139.7175292928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049225.jpg", "text": "Detail the chosen region in the depicted scene . Please point out the objects and their coordinates. For your reference, objects involved in this region include two chairs, a stool, a hat, and a handbag.", "boxes_value": [[399.76464842089996, 221.2378540032, 646.1093750077, 332.7175292928], [399.76464842089996, 221.2378540032, 418.98706052489996, 280.8275757056], [420.42883302300004, 230.1282348544, 469.20581055810004, 288.036010752], [593.8769531495, 278.1262206976, 646.1093750077, 332.7175292928], [429.51110837090005, 269.2003784192, 462.3692627054, 295.1760864256], [565.2690429983, 271.4447631872, 588.2078857462, 316.1044311552]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049225_crop.jpg", "text": "Detail the chosen region in the depicted scene . Please point out the objects and their coordinates. For your reference, objects involved in this region include two chairs, a stool, a hat, and a handbag.", "boxes_value": [[61.76464842089996, 28.2378540032, 308.1093750077, 139.7175292928], [61.76464842089996, 28.2378540032, 80.98706052489996, 87.8275757056], [82.42883302300004, 37.128234854400006, 131.20581055810004, 95.03601075199998], [255.87695314949997, 85.12622069759999, 308.1093750077, 139.7175292928], [91.51110837090005, 76.20037841919998, 124.36926270539999, 102.17608642559998], [227.26904299830005, 78.44476318720001, 250.20788574619996, 123.10443115520002]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049229.jpg", "text": "Please provide details for the area marked as in this photographic . Include the coordinates for each object you identify.", "boxes_value": [[208.08227538, 11.4338989322, 587.17199709, 215.02453610860002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049229_crop.jpg", "text": "Please provide details for the area marked as in this photographic . Include the coordinates for each object you identify.", "boxes_value": [[95.08227538, 11.4338989322, 474.17199709, 215.02453610860002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049229.jpg", "text": "Please provide details for the area marked as in this photographic . Include the coordinates for each object you identify. For your reference, objects involved in this region include a clock, a picture, a cabinet, a person, and a blackboard.", "boxes_value": [[208.08227538, 11.4338989322, 587.17199709, 215.02453610860002], [208.08227538, 11.4338989322, 261.97344971, 90.42504880860001], [315.86462404, 6.266235331, 372.70874025, 79.35156248320001], [526.9197997699999, 112.66851805799999, 587.17199709, 215.02453610860002], [323.81927487, 17.1187744386, 366.34765622, 74.1568603588], [227.40441892, 89.70739743979999, 429.28430174, 206.4127807764]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049229_crop.jpg", "text": "Please provide details for the area marked as in this photographic . Include the coordinates for each object you identify. For your reference, objects involved in this region include a clock, a picture, a cabinet, a person, and a blackboard.", "boxes_value": [[95.08227538, 11.4338989322, 474.17199709, 215.02453610860002], [95.08227538, 11.4338989322, 148.97344971, 90.42504880860001], [202.86462404000002, 6.266235331, 259.70874025, 79.35156248320001], [413.91979976999994, 112.66851805799999, 474.17199709, 215.02453610860002], [210.81927487000002, 17.1187744386, 253.34765621999998, 74.1568603588], [114.40441892000001, 89.70739743979999, 316.28430174, 206.4127807764]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049230.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each mentioned object.", "boxes_value": [[371.447875968, 250.2387695104, 624.7145995776, 511.9973144576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049230_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each mentioned object.", "boxes_value": [[63.447875968000005, 66.2387695104, 316.7145995776, 327.9973144576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049230.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include four people, and a hat.", "boxes_value": [[371.447875968, 250.2387695104, 624.7145995776, 511.9973144576], [371.447875968, 467.2182617088, 412.86328128, 511.9973144576], [382.5676269312, 250.2387695104, 414.99401856, 360.5418701312], [590.4746093568, 343.0847168, 624.7145995776, 478.1770019328], [524.6337890304, 308.230834944, 552.5640869376, 345.2921142784], [493.3355712768, 482.7295532032, 513.7280273664, 502.8792114176]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049230_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include four people, and a hat.", "boxes_value": [[63.447875968000005, 66.2387695104, 316.7145995776, 327.9973144576], [63.447875968000005, 283.2182617088, 104.86328128000002, 327.9973144576], [74.56762693119998, 66.2387695104, 106.99401855999997, 176.54187013120003], [282.4746093568, 159.08471680000002, 316.7145995776, 294.1770019328], [216.63378903039995, 124.23083494399998, 244.5640869376, 161.2921142784], [185.3355712768, 298.7295532032, 205.72802736640006, 318.8792114176]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049231.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[56.049804672, 335.066711424, 259.050598144, 391.466979984]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049231_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[51.049804672, 15.066711424000005, 254.050598144, 71.46697998399998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049231.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four chairs, and a crane.", "boxes_value": [[56.049804672, 335.066711424, 259.050598144, 391.466979984], [56.049804672, 339.943786608, 72.731933568, 362.787780768], [158.75988768, 370.359252912, 179.86761472, 391.466979984], [220.768615744, 335.066711424, 259.050598144, 347.82739257599997], [95.119567872, 275.33526609600005, 143.81927488, 385.609313952], [136.542297344, 291.56854247999996, 219.94750976, 428.711364768]], "boxes_seq": [[0], [0], [1, 2, 4, 5], [3]]}, {"image_path": "objects365_v1_00049231_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four chairs, and a crane.", "boxes_value": [[51.049804672, 15.066711424000005, 254.050598144, 71.46697998399998], [51.049804672, 19.943786607999982, 67.731933568, 42.787780768000005], [153.75988768, 50.35925291199999, 174.86761472, 71.46697998399998], [215.768615744, 15.066711424000005, 254.050598144, 27.827392575999966], [90.119567872, 0, 138.81927488, 65.60931395199998], [131.542297344, 0, 214.94750976, 85]], "boxes_seq": [[0], [0], [1, 2, 4, 5], [3]]}, {"image_path": "objects365_v1_00049232.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[451.01196289070003, 268.0350951936, 501.2929687529, 392.0612792832]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049232_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[13.011962890700033, 31.0350951936, 63.29296875289998, 155.0612792832]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049232.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include an american football, two people, a hat, and a sneakers.", "boxes_value": [[451.01196289070003, 268.0350951936, 501.2929687529, 392.0612792832], [460.59521485799996, 356.4946289152, 481.4445800604, 392.0612792832], [459.6447754249, 177.3625488384, 717.1466064752, 393.480163584], [400.2521972513, 269.364379904, 524.0428466711, 392.4311523328], [451.01196289070003, 268.0350951936, 488.17614742940003, 321.3765869056], [471.56164550719996, 369.0341796864, 501.2929687529, 388.27209472]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049232_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include an american football, two people, a hat, and a sneakers.", "boxes_value": [[13.011962890700033, 31.0350951936, 63.29296875289998, 155.0612792832], [22.595214857999963, 119.4946289152, 43.44458006040003, 155.0612792832], [21.64477542489999, 0, 75, 156.48016358400002], [0, 32.364379903999975, 75, 155.43115233280002], [13.011962890700033, 31.0350951936, 50.17614742940003, 84.37658690559999], [33.561645507199955, 132.03417968640002, 63.29296875289998, 151.27209471999998]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049233.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[438.9403076267, 297.9671630848, 622.6140136831, 450.3549804544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049233_crop.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[45.94030762670002, 38.967163084800006, 229.61401368309998, 191.3549804544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049233.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three stools, and three desks.", "boxes_value": [[438.9403076267, 297.9671630848, 622.6140136831, 450.3549804544], [569.3857421723, 376.5765380608, 622.6140136831, 450.3549804544], [480.7843017388, 369.8387451392, 531.9912109129, 398.8110961664], [470.8117675821, 334.7431640576, 504.22546387159997, 373.8496093696], [438.9403076267, 297.9671630848, 476.0058593689, 375.8048095744], [496.656616191, 333.444152832, 597.2631836186, 396.9851684352], [468.747924831, 394.7794799616, 620.7125244032, 509.2550048768]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00049233_crop.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three stools, and three desks.", "boxes_value": [[45.94030762670002, 38.967163084800006, 229.61401368309998, 191.3549804544], [176.38574217229996, 117.57653806079998, 229.61401368309998, 191.3549804544], [87.78430173880002, 110.8387451392, 138.9912109129, 139.81109616639998], [77.8117675821, 75.74316405759998, 111.22546387159997, 114.84960936959999], [45.94030762670002, 38.967163084800006, 83.00585936890002, 116.8048095744], [103.65661619100001, 74.44415283199999, 204.2631836186, 137.98516843520002], [75.74792483099998, 135.77947996159998, 227.71252440319995, 229]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00049234.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each object you identify.", "boxes_value": [[490.7628173636, 296.401733376, 771.4500732112, 461.7794189312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049234_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each object you identify.", "boxes_value": [[70.76281736359999, 41.40173337599998, 351.4500732112, 206.77941893119998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049234.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a hat, three cups, and a bottle.", "boxes_value": [[490.7628173636, 296.401733376, 771.4500732112, 461.7794189312], [761.2677001584, 296.401733376, 771.4500732112, 333.3126831104], [490.7628173636, 410.5919799808, 522.0140381184, 461.7794189312], [554.8818359056, 393.0805053952, 587.2106933903999, 441.5738525184], [611.9093017424, 337.991821312, 642.6396484616, 366.6027831808], [654.965698222, 357.6473388544, 678.0769042708, 392.9782104576]], "boxes_seq": [[0], [0], [1], [2, 4, 5], [3]]}, {"image_path": "objects365_v1_00049234_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a hat, three cups, and a bottle.", "boxes_value": [[70.76281736359999, 41.40173337599998, 351.4500732112, 206.77941893119998], [341.2677001584, 41.40173337599998, 351.4500732112, 78.31268311039997], [70.76281736359999, 155.59197998079998, 102.01403811839998, 206.77941893119998], [134.88183590560004, 138.08050539520002, 167.2106933903999, 186.57385251839997], [191.90930174239998, 82.99182131200001, 222.63964846160002, 111.60278318079997], [234.96569822200001, 102.6473388544, 258.0769042708, 137.97821045760003]], "boxes_seq": [[0], [0], [1], [2, 4, 5], [3]]}, {"image_path": "objects365_v1_00049236.jpg", "text": "In , what elements can be found within the coordinates ? Specify the location of each mentioned object.", "boxes_value": [[76.1019286938, 404.5246582272, 221.5643310253, 446.8602294784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049236_crop.jpg", "text": "In , what elements can be found within the coordinates ? Specify the location of each mentioned object.", "boxes_value": [[37.101928693800005, 11.524658227200007, 182.5643310253, 53.860229478400015]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049236.jpg", "text": "In , what elements can be found within the coordinates ? Specify the location of each mentioned object. For your reference, objects involved in this region include five cars, a sports car, and a van.", "boxes_value": [[76.1019286938, 404.5246582272, 221.5643310253, 446.8602294784], [91.2536620983, 398.6032714752, 121.8079834032, 413.99884032], [156.8625488253, 404.5246582272, 189.1932983391, 422.6440429568], [117.71661378009999, 412.3392944128, 156.3481445422, 428.8211059712], [76.1019286938, 418.3056030208, 112.4215088074, 435.0111084032], [67.40588378310001, 433.1834106368, 111.8781738054, 455.555419904], [124.4680786358, 426.5714111488, 172.1104736381, 446.8602294784], [171.929321288, 412.8945923072, 221.5643310253, 440.5199584768]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00049236_crop.jpg", "text": "In , what elements can be found within the coordinates ? Specify the location of each mentioned object. For your reference, objects involved in this region include five cars, a sports car, and a van.", "boxes_value": [[37.101928693800005, 11.524658227200007, 182.5643310253, 53.860229478400015], [52.2536620983, 5.6032714752000174, 82.8079834032, 20.99884032], [117.8625488253, 11.524658227200007, 150.1932983391, 29.64404295679998], [78.71661378009999, 19.339294412799973, 117.3481445422, 35.821105971199984], [37.101928693800005, 25.305603020799992, 73.4215088074, 42.01110840320001], [28.40588378310001, 40.18341063679998, 72.8781738054, 62.55541990400002], [85.4680786358, 33.571411148799996, 133.1104736381, 53.860229478400015], [132.929321288, 19.894592307200014, 182.5643310253, 47.519958476800014]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00049237.jpg", "text": "Tell me about the region of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[776.9420166144, 207.5259399168, 1021.1181640704, 504.8566894592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049237_crop.jpg", "text": "Tell me about the region of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[61.94201661440002, 74.52593991680001, 306.1181640704, 371.8566894592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049237.jpg", "text": "Tell me about the region of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four boats, and a street lights.", "boxes_value": [[776.9420166144, 207.5259399168, 1021.1181640704, 504.8566894592], [825.1826171904, 322.6085204992, 960.1494140928, 504.8566894592], [776.9420166144, 307.4913330176, 829.8698730496, 341.2838134784], [987.0882568192, 277.3966674944, 1021.1181640704, 292.6100463616], [920.6300048384, 233.7584228352, 964.668701184, 248.5714111488], [876.5183105024, 207.5259399168, 885.5510253568, 262.1736450048]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049237_crop.jpg", "text": "Tell me about the region of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four boats, and a street lights.", "boxes_value": [[61.94201661440002, 74.52593991680001, 306.1181640704, 371.8566894592], [110.18261719040004, 189.60852049919998, 245.14941409280004, 371.8566894592], [61.94201661440002, 174.49133301760003, 114.86987304959996, 208.28381347840002], [272.08825681919996, 144.3966674944, 306.1181640704, 159.61004636159998], [205.63000483840005, 100.75842283520001, 249.66870118400004, 115.5714111488], [161.51831050240003, 74.52593991680001, 170.55102535679998, 129.1736450048]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049239.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Please mention the objects and their locations.", "boxes_value": [[135.039672832, 590.225097693, 196.0745849856, 725.011718739]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049239_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Please mention the objects and their locations.", "boxes_value": [[16.039672832000008, 34.22509769299995, 77.07458498560001, 169.011718739]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049239.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Please mention the objects and their locations. For your reference, objects involved in this region include a fan, a storage box, a towel, a cabinet, and a barrel.", "boxes_value": [[135.039672832, 590.225097693, 196.0745849856, 725.011718739], [126.6719970816, 649.068969703, 171.3813476352, 736.500610384], [138.731262208, 619.3103027650001, 192.7891235328, 651.005004881], [135.039672832, 590.225097693, 164.791931136, 635.8452148509999], [114.56329344, 521.840209968, 277.8043823104, 726.785156251], [181.7243041792, 696.823608404, 196.0745849856, 725.011718739]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049239_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Please mention the objects and their locations. For your reference, objects involved in this region include a fan, a storage box, a towel, a cabinet, and a barrel.", "boxes_value": [[16.039672832000008, 34.22509769299995, 77.07458498560001, 169.011718739], [7.671997081599997, 93.068969703, 52.3813476352, 180.50061038399997], [19.731262208000004, 63.310302765000074, 73.7891235328, 95.00500488099999], [16.039672832000008, 34.22509769299995, 45.79193113599999, 79.84521485099992], [0, 0, 92, 170.785156251], [62.724304179200004, 140.82360840399997, 77.07458498560001, 169.011718739]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049242.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[387.4267578164, 235.2639770624, 589.0146484197, 339.7119751168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049242_crop.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[50.42675781640003, 26.263977062399988, 252.01464841970005, 130.71197511679998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049242.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a desk, three chairs, a couch, and a napkin.", "boxes_value": [[387.4267578164, 235.2639770624, 589.0146484197, 339.7119751168], [448.3853759777, 235.2639770624, 533.0360107657, 339.7119751168], [373.9746094009, 257.7919921664, 444.97204591269997, 385.4506835968], [471.59594727039996, 248.2346801664, 528.9399414018, 334.2506714112], [535.7667236522, 241.4080200192, 589.0146484197, 331.5200195072], [425.8572998114, 202.4959716864, 660.0119628794, 305.5786743296], [387.4267578164, 294.2645874176, 425.45874021559996, 303.6884765696]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00049242_crop.jpg", "text": "Could you give me a breakdown of the content in the specified area of the picture ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a desk, three chairs, a couch, and a napkin.", "boxes_value": [[50.42675781640003, 26.263977062399988, 252.01464841970005, 130.71197511679998], [111.3853759777, 26.263977062399988, 196.03601076569998, 130.71197511679998], [36.97460940090002, 48.79199216640001, 107.97204591269997, 156], [134.59594727039996, 39.23468016640001, 191.93994140179996, 125.25067141120002], [198.76672365219997, 32.408020019199995, 252.01464841970005, 122.5200195072], [88.85729981140003, 0, 302, 96.5786743296], [50.42675781640003, 85.26458741760001, 88.45874021559996, 94.68847656960003]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00049243.jpg", "text": "Offer a thorough description of the area within the illustration . Provide the coordinates for each element you describe.", "boxes_value": [[374.98327636, 119.7284546045, 592.5684814699999, 296.1135254063]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049243_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Provide the coordinates for each element you describe.", "boxes_value": [[54.98327635999999, 44.7284546045, 272.56848146999994, 221.11352540630003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049243.jpg", "text": "Offer a thorough description of the area within the illustration . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two fans, a lamp, and three umbrellas.", "boxes_value": [[374.98327636, 119.7284546045, 592.5684814699999, 296.1135254063], [441.38073728, 119.7284546045, 528.09143065, 165.67767332969999], [537.72583011, 161.2309570529, 592.5684814699999, 188.6522826923], [459.19116207999997, 135.8330688246, 476.37182614, 160.44317625349998], [374.98327636, 229.3780517525, 388.30200191999995, 296.1135254063], [478.90686032, 240.48736574799997, 488.05163575000006, 277.9517212031], [491.88659666999996, 239.7498779263, 499.99890136, 276.7717285368]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00049243_crop.jpg", "text": "Offer a thorough description of the area within the illustration . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two fans, a lamp, and three umbrellas.", "boxes_value": [[54.98327635999999, 44.7284546045, 272.56848146999994, 221.11352540630003], [121.38073728, 44.7284546045, 208.09143065, 90.67767332969999], [217.72583010999995, 86.23095705290001, 272.56848146999994, 113.65228269229999], [139.19116207999997, 60.83306882459999, 156.37182614, 85.44317625349998], [54.98327635999999, 154.3780517525, 68.30200191999995, 221.11352540630003], [158.90686032000002, 165.48736574799997, 168.05163575000006, 202.95172120310002], [171.88659666999996, 164.7498779263, 179.99890136, 201.77172853680003]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00049244.jpg", "text": "In the provided image , would you mind describing the selected area ? Specify the location of each mentioned object.", "boxes_value": [[0, 0.24816896, 448.31445312500006, 511.4924926976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049244_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Specify the location of each mentioned object.", "boxes_value": [[0, 0.24816896, 448.31445312500006, 511.4924926976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049244.jpg", "text": "In the provided image , would you mind describing the selected area ? Specify the location of each mentioned object. For your reference, objects involved in this region include three lamps, a person, and a leather shoes.", "boxes_value": [[0, 0.24816896, 448.31445312500006, 511.4924926976], [418.304321281, 16.3649291776, 448.31445312500006, 40.3562621952], [271.617919958, 1.8120727552, 291.200561498, 52.167236352], [174.11425777899998, 0.24816896, 226.608764613, 20.2531127808], [0, 345.4736938496, 135.114562968, 511.4924926976], [111.77054388500001, 448.1418457088, 162.83874514599998, 503.960626432]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049244_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Specify the location of each mentioned object. For your reference, objects involved in this region include three lamps, a person, and a leather shoes.", "boxes_value": [[0, 0.24816896, 448.31445312500006, 511.4924926976], [418.304321281, 16.3649291776, 448.31445312500006, 40.3562621952], [271.617919958, 1.8120727552, 291.200561498, 52.167236352], [174.11425777899998, 0.24816896, 226.608764613, 20.2531127808], [0, 345.4736938496, 135.114562968, 511.4924926976], [111.77054388500001, 448.1418457088, 162.83874514599998, 503.960626432]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049245.jpg", "text": "Please provide insights on the specified area within the graphic . Please point out the objects and their coordinates.", "boxes_value": [[104.2112427008, 264.8649902592, 303.7712402432, 461.63000486399994]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049245_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Please point out the objects and their coordinates.", "boxes_value": [[50.2112427008, 49.8649902592, 249.7712402432, 246.63000486399994]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049245.jpg", "text": "Please provide insights on the specified area within the graphic . Please point out the objects and their coordinates. For your reference, objects involved in this region include a desk, and four people.", "boxes_value": [[104.2112427008, 264.8649902592, 303.7712402432, 461.63000486399994], [156.839904768, 410.8941650688, 238.9698486272, 460.2698974464], [102.821533184, 186.8810424576, 439.8779296768, 642.3240966912], [124.8861694464, 217.76989747200003, 347.8151245312, 573.0867919872001], [104.2112427008, 264.8649902592, 155.079467776, 461.63000486399994], [271.3497314304, 282.1937256192, 303.7712402432, 325.7950439424]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049245_crop.jpg", "text": "Please provide insights on the specified area within the graphic . Please point out the objects and their coordinates. For your reference, objects involved in this region include a desk, and four people.", "boxes_value": [[50.2112427008, 49.8649902592, 249.7712402432, 246.63000486399994], [102.839904768, 195.89416506880002, 184.9698486272, 245.26989744640002], [48.821533184, 0, 299, 295], [70.8861694464, 2.769897472000025, 293.8151245312, 295], [50.2112427008, 49.8649902592, 101.079467776, 246.63000486399994], [217.34973143040003, 67.1937256192, 249.7712402432, 110.79504394240001]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049246.jpg", "text": "Can you share some insights about the rectangular region in the image ? Please point out the objects and their coordinates.", "boxes_value": [[60.667602534000004, 272.0711059456, 326.65039060500004, 377.7612304896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049246_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Please point out the objects and their coordinates.", "boxes_value": [[60.667602534000004, 27.07110594559998, 326.65039060500004, 132.7612304896]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049246.jpg", "text": "Can you share some insights about the rectangular region in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, a book, a pen, a cup, and a laptop.", "boxes_value": [[60.667602534000004, 272.0711059456, 326.65039060500004, 377.7612304896], [17.193054186, 167.3485718016, 179.664489774, 479.49066163199996], [280.94116211700003, 356.7573242368, 326.65039060500004, 368.9818725376], [293.09106442, 307.40100096, 310.729126015, 329.1729736192], [60.667602534000004, 333.2145385984, 95.39508055, 376.851135232], [135.65991208, 272.0711059456, 285.252075185, 377.7612304896]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049246_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, a book, a pen, a cup, and a laptop.", "boxes_value": [[60.667602534000004, 27.07110594559998, 326.65039060500004, 132.7612304896], [17.193054186, 0, 179.664489774, 159], [280.94116211700003, 111.7573242368, 326.65039060500004, 123.9818725376], [293.09106442, 62.401000959999976, 310.729126015, 84.17297361919998], [60.667602534000004, 88.2145385984, 95.39508055, 131.851135232], [135.65991208, 27.07110594559998, 285.252075185, 132.7612304896]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049248.jpg", "text": "What does the area within the given visual contain? Remember to mention the objects and their corresponding locations.", "boxes_value": [[44.2699584773, 250.1914062336, 279.4406127996, 388.6142578176]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049248_crop.jpg", "text": "What does the area within the given visual contain? Remember to mention the objects and their corresponding locations.", "boxes_value": [[44.2699584773, 35.19140623359999, 279.4406127996, 173.6142578176]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049248.jpg", "text": "What does the area within the given visual contain? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two benches, and three people.", "boxes_value": [[44.2699584773, 250.1914062336, 279.4406127996, 388.6142578176], [228.0143432861, 348.007568384, 279.4406127996, 362.5437011968], [172.34527586020002, 346.847045888, 226.4340210279, 362.8945923072], [231.6978760056, 250.1914062336, 255.8292236399, 291.8037109248], [86.7088623229, 360.1278076416, 104.7308959848, 388.6142578176], [44.2699584773, 320.0143432704, 62.0012817267, 360.9998779392]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00049248_crop.jpg", "text": "What does the area within the given visual contain? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two benches, and three people.", "boxes_value": [[44.2699584773, 35.19140623359999, 279.4406127996, 173.6142578176], [228.0143432861, 133.00756838400002, 279.4406127996, 147.5437011968], [172.34527586020002, 131.84704588800003, 226.4340210279, 147.8945923072], [231.6978760056, 35.19140623359999, 255.8292236399, 76.80371092479999], [86.7088623229, 145.1278076416, 104.7308959848, 173.6142578176], [44.2699584773, 105.01434327039999, 62.0012817267, 145.99987793920002]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00049249.jpg", "text": "Explain the content within the rectangular region of the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[243.71185300000002, 332.7857055744, 506.91650387500005, 512.4345702912]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049249_crop.jpg", "text": "Explain the content within the rectangular region of the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[66.71185300000002, 45.785705574400026, 329.91650387500005, 225]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049249.jpg", "text": "Explain the content within the rectangular region of the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a desk, and four chairs.", "boxes_value": [[243.71185300000002, 332.7857055744, 506.91650387500005, 512.4345702912], [148.300170875, 354.5118408192, 439.141235375, 512.4345702912], [243.71185300000002, 374.2521972736, 393.738464375, 512.4345702912], [387.1583251875, 343.9836425728, 506.91650387500005, 512.4345702912], [355.808776875, 337.2977905152, 428.5660400625, 398.2108764672], [325.3522949375, 332.7857055744, 385.701355, 373.958435072]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049249_crop.jpg", "text": "Explain the content within the rectangular region of the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a desk, and four chairs.", "boxes_value": [[66.71185300000002, 45.785705574400026, 329.91650387500005, 225], [0, 67.51184081920002, 262.141235375, 225], [66.71185300000002, 87.25219727360002, 216.73846437499998, 225], [210.1583251875, 56.98364257280002, 329.91650387500005, 225], [178.80877687499998, 50.297790515200006, 251.5660400625, 111.21087646720002], [148.3522949375, 45.785705574400026, 208.70135499999998, 86.95843507199999]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049250.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[329.9039917134, 419.0344848384, 448.2904053058, 477.9473876992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049250_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[29.90399171339999, 15.034484838399976, 148.2904053058, 73.94738769920002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049250.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a bench, a spoon, a fork, a napkin, and a desk.", "boxes_value": [[329.9039917134, 419.0344848384, 448.2904053058, 477.9473876992], [0, 2.9063110144, 446.8377685316, 508.9271850496], [333.7818603353, 459.1392211968, 448.2904053058, 477.9473876992], [339.8645629885, 419.0344848384, 442.064208997, 433.086914048], [329.9039917134, 429.5916748288, 444.28833006039997, 457.9958496256], [320.69183346560004, 407.3289184768, 680.7337646784999, 513.2687988224]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049250_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a bench, a spoon, a fork, a napkin, and a desk.", "boxes_value": [[29.90399171339999, 15.034484838399976, 148.2904053058, 73.94738769920002], [0, 0, 146.83776853159998, 88], [33.781860335299996, 55.13922119680001, 148.2904053058, 73.94738769920002], [39.86456298849998, 15.034484838399976, 142.06420899699998, 29.086914047999983], [29.90399171339999, 25.591674828800024, 144.28833006039997, 53.99584962559999], [20.69183346560004, 3.328918476800027, 177, 88]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049251.jpg", "text": "Please enlighten me about the area in the photograph . Provide the coordinates for each element you describe.", "boxes_value": [[195.734191872, 105.2549438176, 329.2523803648, 170.01715091719998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049251_crop.jpg", "text": "Please enlighten me about the area in the photograph . Provide the coordinates for each element you describe.", "boxes_value": [[33.734191872, 16.254943817599994, 167.2523803648, 81.01715091719998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049251.jpg", "text": "Please enlighten me about the area in the photograph . Provide the coordinates for each element you describe. For your reference, objects involved in this region include five people.", "boxes_value": [[195.734191872, 105.2549438176, 329.2523803648, 170.01715091719998], [195.734191872, 131.0246581656, 216.4049682432, 170.01715091719998], [215.3087768576, 134.15655516639998, 240.3641967616, 167.6682129196], [218.23388672, 105.2549438176, 233.7437133824, 132.92108156959998], [269.9132690432, 117.5510253872, 289.3154907136, 159.5891723948], [308.0836791808, 131.090209998, 329.2523803648, 155.69158935040002]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049251_crop.jpg", "text": "Please enlighten me about the area in the photograph . Provide the coordinates for each element you describe. For your reference, objects involved in this region include five people.", "boxes_value": [[33.734191872, 16.254943817599994, 167.2523803648, 81.01715091719998], [33.734191872, 42.02465816559999, 54.40496824319999, 81.01715091719998], [53.30877685760001, 45.15655516639998, 78.3641967616, 78.6682129196], [56.23388671999999, 16.254943817599994, 71.74371338239999, 43.92108156959998], [107.91326904319999, 28.5510253872, 127.31549071360001, 70.58917239479999], [146.0836791808, 42.090209998000006, 167.2523803648, 66.69158935040002]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049252.jpg", "text": "Can you analyze the content of the area within the photograph ? Provide the coordinates for each element you describe.", "boxes_value": [[248.83190915039998, 82.5568237056, 553.9062500174, 217.0830688256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049252_crop.jpg", "text": "Can you analyze the content of the area within the photograph ? Provide the coordinates for each element you describe.", "boxes_value": [[76.83190915039998, 34.556823705599996, 381.9062500174, 169.0830688256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049252.jpg", "text": "Can you analyze the content of the area within the photograph ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, two plates, and a coffee machine.", "boxes_value": [[248.83190915039998, 82.5568237056, 553.9062500174, 217.0830688256], [252.7501831436, 160.9216308736, 278.0010986092, 217.0830688256], [248.83190915039998, 82.5568237056, 273.6474609326, 137.8475341824], [419.1685791092, 117.3774413824, 440.04663083440005, 133.6429443584], [334.755981415, 88.5954589696, 374.5837402262, 130.781433088], [494.01135251219995, 152.8368530432, 553.9062500174, 205.402893056]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049252_crop.jpg", "text": "Can you analyze the content of the area within the photograph ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include two people, two plates, and a coffee machine.", "boxes_value": [[76.83190915039998, 34.556823705599996, 381.9062500174, 169.0830688256], [80.7501831436, 112.92163087360001, 106.00109860920003, 169.0830688256], [76.83190915039998, 34.556823705599996, 101.64746093259998, 89.84753418240001], [247.16857910919998, 69.3774413824, 268.04663083440005, 85.6429443584], [162.755981415, 40.5954589696, 202.5837402262, 82.781433088], [322.01135251219995, 104.8368530432, 381.9062500174, 157.402893056]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049255.jpg", "text": "Tell me what you see in the area within the context of the image . Include the coordinates for each object you identify.", "boxes_value": [[486.2729492281, 0, 658.8588513157, 180.4948436992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049255_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Include the coordinates for each object you identify.", "boxes_value": [[43.2729492281, 0, 215.8588513157, 180.4948436992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049255.jpg", "text": "Tell me what you see in the area within the context of the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, two slippers, a bench, and a chair.", "boxes_value": [[486.2729492281, 0, 658.8588513157, 180.4948436992], [486.2729492281, 0, 555.7072754232, 166.6318969856], [538.1660156380001, 0, 675.5727539284, 180.518798848], [537.2156363689, 164.0318521856, 579.7450310894, 180.4948436992], [625.9328683195, 155.8003564544, 658.8588513157, 176.379095808], [461.0067138361, 69.883789056, 680.7337646784999, 114.0680542208], [534.4481200957, 77.0487670784, 630.5787353352, 222.7374267392]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00049255_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include two people, two slippers, a bench, and a chair.", "boxes_value": [[43.2729492281, 0, 215.8588513157, 180.4948436992], [43.2729492281, 0, 112.70727542320003, 166.6318969856], [95.16601563800009, 0, 232.57275392839995, 180.518798848], [94.21563636890005, 164.0318521856, 136.74503108939996, 180.4948436992], [182.93286831950002, 155.8003564544, 215.8588513157, 176.379095808], [18.006713836100005, 69.883789056, 237.73376467849994, 114.0680542208], [91.4481200957, 77.0487670784, 187.5787353352, 222.7374267392]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00049256.jpg", "text": "Can you elaborate on the content of the bounding box in ? Please mention the objects and their locations.", "boxes_value": [[457.891601583, 198.7584838656, 682.2572021206, 360.259399424]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049256_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Please mention the objects and their locations.", "boxes_value": [[56.89160158300001, 40.7584838656, 281.2572021206, 202.25939942399998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049256.jpg", "text": "Can you elaborate on the content of the bounding box in ? Please mention the objects and their locations. For your reference, objects involved in this region include two chairs, three people, a leather shoes, and a candy.", "boxes_value": [[457.891601583, 198.7584838656, 682.2572021206, 360.259399424], [531.5794677779, 198.7584838656, 560.4361571958, 235.8600463872], [457.891601583, 231.2223510528, 512.5133056522001, 315.7314452992], [444.5998535319, 195.9171142656, 479.0172119012, 256.4741211136], [458.5410156044, 192.649597184, 479.81323241900003, 218.5285644288], [478.8304443218, 155.805053696, 527.0021972525, 288.5574951424], [652.5406494429, 331.285644544, 682.2572021206, 360.259399424], [425.5187987961, 255.028137216, 488.6273193477, 342.7603759616]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00049256_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Please mention the objects and their locations. For your reference, objects involved in this region include two chairs, three people, a leather shoes, and a candy.", "boxes_value": [[56.89160158300001, 40.7584838656, 281.2572021206, 202.25939942399998], [130.57946777790005, 40.7584838656, 159.43615719579998, 77.86004638719999], [56.89160158300001, 73.22235105280001, 111.51330565220007, 157.7314452992], [43.59985353190001, 37.91711426559999, 78.01721190120003, 98.4741211136], [57.5410156044, 34.64959718399999, 78.81323241900003, 60.528564428799996], [77.83044432179997, 0, 126.00219725249997, 130.5574951424], [251.54064944289996, 173.28564454399998, 281.2572021206, 202.25939942399998], [24.518798796099986, 97.028137216, 87.6273193477, 184.7603759616]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6], [7]]}, {"image_path": "objects365_v1_00049260.jpg", "text": "In the image , please describe the bounding box . Include the coordinates for each object you identify.", "boxes_value": [[0, 396.4425659392, 105.7500000116, 511.0171508736]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049260_crop.jpg", "text": "In the image , please describe the bounding box . Include the coordinates for each object you identify.", "boxes_value": [[0, 29.442565939199994, 105.7500000116, 144.01715087359997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049260.jpg", "text": "In the image , please describe the bounding box . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, two desks, and two chairs.", "boxes_value": [[0, 396.4425659392, 105.7500000116, 511.0171508736], [48.004089339400004, 349.2962036224, 209.34454347690001, 512.0695800832], [0, 458.3225097728, 105.7500000116, 511.0171508736], [4.7116089075000005, 396.4425659392, 60.3068847402, 427.8659667968], [11.4797363473, 428.8328247296, 56.439392117699995, 461.7065429504], [37.101928732, 404.6610107392, 191.80175780140002, 426.899108864]], "boxes_seq": [[0], [0], [1], [2, 5], [3, 4]]}, {"image_path": "objects365_v1_00049260_crop.jpg", "text": "In the image , please describe the bounding box . Include the coordinates for each object you identify. For your reference, objects involved in this region include a person, two desks, and two chairs.", "boxes_value": [[0, 29.442565939199994, 105.7500000116, 144.01715087359997], [48.004089339400004, 0, 132, 145], [0, 91.3225097728, 105.7500000116, 144.01715087359997], [4.7116089075000005, 29.442565939199994, 60.3068847402, 60.86596679680002], [11.4797363473, 61.83282472960002, 56.439392117699995, 94.70654295039998], [37.101928732, 37.66101073919998, 132, 59.89910886400003]], "boxes_seq": [[0], [0], [1], [2, 5], [3, 4]]}, {"image_path": "objects365_v1_00049263.jpg", "text": "Regarding the coordinates in image , can you provide a description? Include the coordinates for each object you identify.", "boxes_value": [[67.0629901568, 202.0432129024, 263.1711425728, 512.486572288]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049263_crop.jpg", "text": "Regarding the coordinates in image , can you provide a description? Include the coordinates for each object you identify.", "boxes_value": [[49.0629901568, 78.0432129024, 245.17114257280002, 388]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049263.jpg", "text": "Regarding the coordinates in image , can you provide a description? Include the coordinates for each object you identify. For your reference, objects involved in this region include two skateboards, a person, and two sneakers.", "boxes_value": [[67.0629901568, 202.0432129024, 263.1711425728, 512.486572288], [82.8678588672, 452.77807616, 263.1304931904, 512.486572288], [216.3792114176, 202.0432129024, 263.1711425728, 213.7411499008], [53.079284684799994, 28.117675776, 220.6905517696, 489.0485839872], [67.0629901568, 426.7494225408, 132.9149561344, 470.040992768], [139.92697100799998, 437.4198799872, 186.87698381439998, 487.113724672]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00049263_crop.jpg", "text": "Regarding the coordinates in image , can you provide a description? Include the coordinates for each object you identify. For your reference, objects involved in this region include two skateboards, a person, and two sneakers.", "boxes_value": [[49.0629901568, 78.0432129024, 245.17114257280002, 388], [64.8678588672, 328.77807616, 245.1304931904, 388], [198.3792114176, 78.0432129024, 245.17114257280002, 89.7411499008], [35.079284684799994, 0, 202.6905517696, 365.0485839872], [49.0629901568, 302.7494225408, 114.9149561344, 346.040992768], [121.92697100799998, 313.4198799872, 168.87698381439998, 363.113724672]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00049264.jpg", "text": "Describe the bbox in the provided photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[361.4490966528, 225.2276000768, 767.9997558528, 294.4851074048]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049264_crop.jpg", "text": "Describe the bbox in the provided photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[102.4490966528, 18.2276000768, 508.99975585280004, 87.48510740479998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049264.jpg", "text": "Describe the bbox in the provided photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, and three umbrellas.", "boxes_value": [[361.4490966528, 225.2276000768, 767.9997558528, 294.4851074048], [666.9233398272, 239.486450176, 699.0058593792, 294.4851074048], [731.5975342080001, 225.2276000768, 767.9997558528, 249.1621704192], [508.1295165696, 130.7046508544, 767.9366455296, 313.154724096], [355.5231933696, 238.4852295168, 397.7941894656, 247.5715332096], [361.4490966528, 250.731994624, 371.32543948800003, 282.3364868096]], "boxes_seq": [[0], [0], [1, 5], [2, 3, 4]]}, {"image_path": "objects365_v1_00049264_crop.jpg", "text": "Describe the bbox in the provided photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two people, and three umbrellas.", "boxes_value": [[102.4490966528, 18.2276000768, 508.99975585280004, 87.48510740479998], [407.92333982720004, 32.486450176000005, 440.00585937920005, 87.48510740479998], [472.59753420800007, 18.2276000768, 508.99975585280004, 42.16217041920001], [249.1295165696, 0, 508.9366455296, 104], [96.5231933696, 31.48522951679999, 138.79418946560003, 40.571533209600005], [102.4490966528, 43.73199462400001, 112.32543948800003, 75.33648680959999]], "boxes_seq": [[0], [0], [1, 5], [2, 3, 4]]}, {"image_path": "objects365_v1_00049270.jpg", "text": "Please enlighten me about the area in the photograph . Remember to mention the objects and their corresponding locations.", "boxes_value": [[142.1256713728, 81.8472289925, 294.4900512768, 549.4724121137]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049270_crop.jpg", "text": "Please enlighten me about the area in the photograph . Remember to mention the objects and their corresponding locations.", "boxes_value": [[38.125671372800014, 81.8472289925, 190.4900512768, 549.4724121137]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049270.jpg", "text": "Please enlighten me about the area in the photograph . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, a hat, a glasses, a belt, and a sneakers.", "boxes_value": [[142.1256713728, 81.8472289925, 294.4900512768, 549.4724121137], [56.4144897536, 51.230468760099996, 465.8659667968, 600.5624999837999], [217.5089721856, 68.0681762713, 337.5747070464, 412.1900024165], [246.042419456, 68.50653079050001, 297.2986450432, 100.80499265670001], [248.1460571136, 81.8472289925, 294.4900512768, 97.63153073650001], [177.23272704, 272.1271362269, 258.6809081856, 318.4683838163], [142.1256713728, 491.1948241931, 225.680358912, 549.4724121137]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00049270_crop.jpg", "text": "Please enlighten me about the area in the photograph . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, a hat, a glasses, a belt, and a sneakers.", "boxes_value": [[38.125671372800014, 81.8472289925, 190.4900512768, 549.4724121137], [0, 51.230468760099996, 228, 600.5624999837999], [113.50897218559999, 68.0681762713, 228, 412.1900024165], [142.042419456, 68.50653079050001, 193.2986450432, 100.80499265670001], [144.1460571136, 81.8472289925, 190.4900512768, 97.63153073650001], [73.23272703999999, 272.1271362269, 154.6809081856, 318.4683838163], [38.125671372800014, 491.1948241931, 121.680358912, 549.4724121137]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00049271.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give coordinates for the items you reference.", "boxes_value": [[314.24914547550003, 180.0658000896, 530.669189475, 315.2769775616]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049271_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give coordinates for the items you reference.", "boxes_value": [[54.24914547550003, 34.0658000896, 270.66918947500005, 169.2769775616]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049271.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give coordinates for the items you reference. For your reference, objects involved in this region include two pictures, three people, and a hat.", "boxes_value": [[314.24914547550003, 180.0658000896, 530.669189475, 315.2769775616], [314.24914547550003, 246.8353271296, 395.0754394185, 315.2769775616], [396.705078144, 249.7685547008, 452.11022946, 314.9510497792], [277.970703093, 178.7427978752, 367.7269287105, 304.4016113152], [459.55456540649993, 180.123657216, 530.669189475, 302.3303222784], [403.580200158, 269.7249755648, 433.50842282850004, 307.7692260864], [491.91313799700004, 180.0658000896, 529.291775763, 195.0634016256]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00049271_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give coordinates for the items you reference. For your reference, objects involved in this region include two pictures, three people, and a hat.", "boxes_value": [[54.24914547550003, 34.0658000896, 270.66918947500005, 169.2769775616], [54.24914547550003, 100.83532712959999, 135.07543941850003, 169.2769775616], [136.70507814400003, 103.7685547008, 192.11022946000003, 168.9510497792], [17.970703092999997, 32.74279787520001, 107.72692871049998, 158.4016113152], [199.55456540649993, 34.123657216, 270.66918947500005, 156.3303222784], [143.58020015800003, 123.72497556479999, 173.50842282850004, 161.7692260864], [231.91313799700004, 34.0658000896, 269.29177576300003, 49.063401625599994]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00049273.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each mentioned object.", "boxes_value": [[303.5775146243, 212.8259887616, 694.9658203196, 310.7066650624]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049273_crop.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each mentioned object.", "boxes_value": [[98.57751462430002, 24.8259887616, 489.96582031959997, 122.70666506240002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049273.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a cabinet, two chairs, a person, and three moniters.", "boxes_value": [[303.5775146243, 212.8259887616, 694.9658203196, 310.7066650624], [0.2539062784, 1.3062744064, 642.7452392831, 510.4255371264], [683.5316162423001, 268.0076294144, 704.3464355538999, 306.5227051008], [668.4866943068, 271.3175048704, 694.9658203196, 310.3340453888], [668.8237304645, 248.750366208, 696.2030029063, 309.1380004864], [303.5775146243, 212.8259887616, 410.99523922860004, 310.7066650624], [433.5472412166, 225.5181884928, 493.78161622830004, 293.066650368], [510.13085938330005, 228.0996704256, 548.8529052910001, 283.6012573184]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5, 6, 7]]}, {"image_path": "objects365_v1_00049273_crop.jpg", "text": "What does the area within the given visual contain? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a cabinet, two chairs, a person, and three moniters.", "boxes_value": [[98.57751462430002, 24.8259887616, 489.96582031959997, 122.70666506240002], [0, 0, 437.7452392831, 147], [478.53161624230006, 80.00762941440001, 499.34643555389994, 118.52270510080001], [463.4866943068, 83.3175048704, 489.96582031959997, 122.3340453888], [463.82373046450004, 60.750366208, 491.20300290629996, 121.13800048640002], [98.57751462430002, 24.8259887616, 205.99523922860004, 122.70666506240002], [228.5472412166, 37.51818849279999, 288.78161622830004, 105.06665036800001], [305.13085938330005, 40.09967042560001, 343.85290529100007, 95.60125731839997]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5, 6, 7]]}, {"image_path": "objects365_v1_00049274.jpg", "text": "What's inside the area of the provided graphic ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[0, 101.8303833088, 193.80175784280001, 471.026672384]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049274_crop.jpg", "text": "What's inside the area of the provided graphic ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[0, 92.8303833088, 193.80175784280001, 462.026672384]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049274.jpg", "text": "What's inside the area of the provided graphic ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two lamps, a sandals, two leather shoes, and a moniter.", "boxes_value": [[0, 101.8303833088, 193.80175784280001, 471.026672384], [20.415588417200002, 101.8303833088, 37.2525634804, 135.1981811712], [161.5399169966, 115.6060790784, 178.8579101858, 149.454956032], [166.4562987946, 412.5451660288, 193.80175784280001, 427.9777831936], [123.40740963740001, 442.8688964608, 171.0589599306, 460.7382812672], [136.1325683216, 451.2620849664, 177.55694582040002, 471.026672384], [0, 306.9626464768, 64.9720459278, 407.5803222528]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00049274_crop.jpg", "text": "What's inside the area of the provided graphic ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two lamps, a sandals, two leather shoes, and a moniter.", "boxes_value": [[0, 92.8303833088, 193.80175784280001, 462.026672384], [20.415588417200002, 92.8303833088, 37.2525634804, 126.19818117119999], [161.5399169966, 106.6060790784, 178.8579101858, 140.454956032], [166.4562987946, 403.5451660288, 193.80175784280001, 418.9777831936], [123.40740963740001, 433.8688964608, 171.0589599306, 451.7382812672], [136.1325683216, 442.2620849664, 177.55694582040002, 462.026672384], [0, 297.9626464768, 64.9720459278, 398.5803222528]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00049277.jpg", "text": "Describe what's happening within the coordinates of the given image . Please mention the objects and their locations.", "boxes_value": [[169.5210571346, 236.5166625792, 267.1716308268, 511.7278442496]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049277_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Please mention the objects and their locations.", "boxes_value": [[24.5210571346, 69.51666257919999, 122.17163082680003, 344.7278442496]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049277.jpg", "text": "Describe what's happening within the coordinates of the given image . Please mention the objects and their locations. For your reference, objects involved in this region include two people, a truck, and two machinery vehicles.", "boxes_value": [[169.5210571346, 236.5166625792, 267.1716308268, 511.7278442496], [243.896789537, 356.9218139648, 257.945556619, 384.8096923648], [245.9936523378, 360.9058227712, 267.1716308268, 406.8264160256], [169.5210571346, 236.5166625792, 202.46368408099997, 280.7291870208], [196.8287963512, 266.8585815552, 239.74096676439999, 371.7550048768], [180.5638427418, 470.89385984, 244.583251927, 511.7278442496]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00049277_crop.jpg", "text": "Describe what's happening within the coordinates of the given image . Please mention the objects and their locations. For your reference, objects involved in this region include two people, a truck, and two machinery vehicles.", "boxes_value": [[24.5210571346, 69.51666257919999, 122.17163082680003, 344.7278442496], [98.89678953699999, 189.92181396479998, 112.945556619, 217.80969236480001], [100.9936523378, 193.9058227712, 122.17163082680003, 239.8264160256], [24.5210571346, 69.51666257919999, 57.46368408099997, 113.7291870208], [51.8287963512, 99.85858155519998, 94.74096676439999, 204.7550048768], [35.56384274179999, 303.89385984, 99.58325192699999, 344.7278442496]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00049278.jpg", "text": "Can you discuss the entities within the region of image ? Include the coordinates for each mentioned object.", "boxes_value": [[232.5496418325, 70.290161152, 665.5646923874999, 410.5825454592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049278_crop.jpg", "text": "Can you discuss the entities within the region of image ? Include the coordinates for each mentioned object.", "boxes_value": [[108.54964183249999, 70.290161152, 541.5646923874999, 410.5825454592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049278.jpg", "text": "Can you discuss the entities within the region of image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include an american football, two sneakers, a helmet, and a belt.", "boxes_value": [[232.5496418325, 70.290161152, 665.5646923874999, 410.5825454592], [552.70202634, 70.290161152, 585.1994628599999, 95.4922485248], [598.0040520075, 343.36485248, 665.5646923874999, 410.5825454592], [232.5496418325, 106.7599936512, 326.49255704250004, 217.025037056], [597.1208236875, 268.515422208, 625.2118268625001, 292.6499460096], [423.728396505, 75.6248976384, 463.62467529, 96.0717405184]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5]]}, {"image_path": "objects365_v1_00049278_crop.jpg", "text": "Can you discuss the entities within the region of image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include an american football, two sneakers, a helmet, and a belt.", "boxes_value": [[108.54964183249999, 70.290161152, 541.5646923874999, 410.5825454592], [428.70202634, 70.290161152, 461.1994628599999, 95.4922485248], [474.00405200750004, 343.36485248, 541.5646923874999, 410.5825454592], [108.54964183249999, 106.7599936512, 202.49255704250004, 217.025037056], [473.1208236875, 268.515422208, 501.21182686250006, 292.6499460096], [299.728396505, 75.6248976384, 339.62467529, 96.0717405184]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5]]}, {"image_path": "objects365_v1_00049281.jpg", "text": "Please describe the region in the picture . Include the coordinates for each mentioned object.", "boxes_value": [[135.0236816157, 150.0542602752, 743.7677002103, 451.6072387584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049281_crop.jpg", "text": "Please describe the region in the picture . Include the coordinates for each mentioned object.", "boxes_value": [[135.0236816157, 76.0542602752, 743.7677002103, 377.6072387584]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049281.jpg", "text": "Please describe the region in the picture . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two helmets, two gloves, a boots, and a sneakers.", "boxes_value": [[135.0236816157, 150.0542602752, 743.7677002103, 451.6072387584], [255.79260251109997, 154.3121337856, 299.1456299006, 194.18139648], [135.0236816157, 150.0542602752, 192.6986084213, 198.4392700416], [260.4376220651, 224.6317138432, 310.37097171839997, 266.82342528], [222.292846721, 404.0593261568, 271.3348389008, 451.6072387584], [719.7297363624, 412.773681664, 743.7677002103, 430.0571899392], [666.6364746178999, 161.5099487232, 744.5683593437001, 223.1921997312]], "boxes_seq": [[0], [0], [1, 6], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049281_crop.jpg", "text": "Please describe the region in the picture . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two helmets, two gloves, a boots, and a sneakers.", "boxes_value": [[135.0236816157, 76.0542602752, 743.7677002103, 377.6072387584], [255.79260251109997, 80.3121337856, 299.1456299006, 120.18139647999999], [135.0236816157, 76.0542602752, 192.6986084213, 124.4392700416], [260.4376220651, 150.6317138432, 310.37097171839997, 192.82342527999998], [222.292846721, 330.0593261568, 271.3348389008, 377.6072387584], [719.7297363624, 338.773681664, 743.7677002103, 356.0571899392], [666.6364746178999, 87.50994872320001, 744.5683593437001, 149.1921997312]], "boxes_seq": [[0], [0], [1, 6], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049286.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Please point out the objects and their coordinates.", "boxes_value": [[137.6488037024, 62.6921386496, 293.5219726252, 478.3538208256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049286_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Please point out the objects and their coordinates.", "boxes_value": [[39.64880370239999, 62.6921386496, 195.52197262520002, 478.3538208256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049286.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include a golf club, a person, a hat, a gloves, and two sneakers.", "boxes_value": [[137.6488037024, 62.6921386496, 293.5219726252, 478.3538208256], [174.6749669832, 310.67217792, 191.5989622796, 493.827415552], [137.6488037024, 62.6921386496, 293.5219726252, 478.3538208256], [204.1361084148, 63.7026366976, 253.2784423708, 93.9046630912], [276.313842806, 269.9977417216, 293.7183837844, 305.3187866112], [204.6480102664, 459.4571533312, 236.8976440304, 479.9331054592], [250.206970196, 458.2971191296, 294.20935061719996, 476.7254638592]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00049286_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Please point out the objects and their coordinates. For your reference, objects involved in this region include a golf club, a person, a hat, a gloves, and two sneakers.", "boxes_value": [[39.64880370239999, 62.6921386496, 195.52197262520002, 478.3538208256], [76.6749669832, 310.67217792, 93.59896227959999, 493.827415552], [39.64880370239999, 62.6921386496, 195.52197262520002, 478.3538208256], [106.13610841479999, 63.7026366976, 155.2784423708, 93.9046630912], [178.31384280600003, 269.9977417216, 195.71838378439998, 305.3187866112], [106.64801026640001, 459.4571533312, 138.8976440304, 479.9331054592], [152.206970196, 458.2971191296, 196.20935061719996, 476.7254638592]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00049287.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Provide the coordinates for each element you describe.", "boxes_value": [[61.119201672, 320.8692626944, 529.9288330127999, 352.3829956096]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049287_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Provide the coordinates for each element you describe.", "boxes_value": [[61.119201672, 8.869262694399993, 529.9288330127999, 40.382995609600016]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049287.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Provide the coordinates for each element you describe. For your reference, objects involved in this region include five boats.", "boxes_value": [[61.119201672, 320.8692626944, 529.9288330127999, 352.3829956096], [78.3072509856, 325.7791137792, 201.4625244096, 352.3829956096], [61.119201672, 332.6948852736, 90.70336912319999, 343.0686034944], [167.3532715104, 325.20281984, 241.12158203520002, 341.723815936], [305.9508056352, 327.0686034944, 357.5457763488, 338.4675293184], [407.7408447552, 320.8692626944, 529.9288330127999, 349.266418432]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049287_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Provide the coordinates for each element you describe. For your reference, objects involved in this region include five boats.", "boxes_value": [[61.119201672, 8.869262694399993, 529.9288330127999, 40.382995609600016], [78.3072509856, 13.779113779199974, 201.4625244096, 40.382995609600016], [61.119201672, 20.694885273599994, 90.70336912319999, 31.06860349440001], [167.3532715104, 13.202819840000018, 241.12158203520002, 29.723815935999994], [305.9508056352, 15.068603494400008, 357.5457763488, 26.46752931840001], [407.7408447552, 8.869262694399993, 529.9288330127999, 37.26641843200002]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049292.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Include the coordinates for each object you identify.", "boxes_value": [[42.0380248772, 470.6295776256, 319.8677978404, 511.6528320512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049292_crop.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Include the coordinates for each object you identify.", "boxes_value": [[42.0380248772, 10.629577625600007, 319.8677978404, 51.652832051199994]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049292.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Include the coordinates for each object you identify. For your reference, objects involved in this region include three suvs, two cars, and a van.", "boxes_value": [[42.0380248772, 470.6295776256, 319.8677978404, 511.6528320512], [75.5083617936, 481.4929809408, 105.78204349280001, 499.9204711936], [42.0380248772, 482.809265152, 94.123840356, 508.570129408], [104.4731445128, 482.4067382784, 174.5836181532, 511.6528320512], [161.31866454040002, 470.6295776256, 246.2337646564, 511.2411499008], [229.2097167704, 479.0390624768, 319.8677978404, 511.651367168], [298.3217773244, 476.5480956928, 333.8153076104, 508.392395008]], "boxes_seq": [[0], [0], [1, 2, 4], [3, 5], [6]]}, {"image_path": "objects365_v1_00049292_crop.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Include the coordinates for each object you identify. For your reference, objects involved in this region include three suvs, two cars, and a van.", "boxes_value": [[42.0380248772, 10.629577625600007, 319.8677978404, 51.652832051199994], [75.5083617936, 21.49298094080001, 105.78204349280001, 39.92047119360001], [42.0380248772, 22.809265152000023, 94.123840356, 48.570129408000014], [104.4731445128, 22.406738278399985, 174.5836181532, 51.652832051199994], [161.31866454040002, 10.629577625600007, 246.2337646564, 51.241149900799996], [229.2097167704, 19.039062476799984, 319.8677978404, 51.65136716799998], [298.3217773244, 16.54809569280002, 333.8153076104, 48.392395007999994]], "boxes_seq": [[0], [0], [1, 2, 4], [3, 5], [6]]}, {"image_path": "objects365_v1_00049297.jpg", "text": "Please describe the area in the image for me. Include the coordinates for each mentioned object.", "boxes_value": [[407.5053710592, 280.0148315648, 741.5235595775999, 511.5796508672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049297_crop.jpg", "text": "Please describe the area in the image for me. Include the coordinates for each mentioned object.", "boxes_value": [[83.5053710592, 58.01483156479998, 417.52355957759994, 289.5796508672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049297.jpg", "text": "Please describe the area in the image for me. Include the coordinates for each mentioned object. For your reference, objects involved in this region include a couch, a chair, two desks, two lamps, and a vase.", "boxes_value": [[407.5053710592, 280.0148315648, 741.5235595775999, 511.5796508672], [413.21936033279997, 283.021301248, 587.896484352, 387.1558227456], [439.4149169664, 331.0478515712, 735.6943359744, 511.5796508672], [568.3621826304001, 324.7749633536, 646.6923828480001, 349.9525756928], [407.5053710592, 299.597412096, 420.79357908479994, 333.1674804736], [423.5910644736, 268.1254272512, 447.36987302399996, 298.8980102656], [588.6440429568, 280.0148315648, 630.6066894336, 333.866882304], [690.8756103168, 338.3713378816, 741.5235595775999, 426.1610107392]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6], [7]]}, {"image_path": "objects365_v1_00049297_crop.jpg", "text": "Please describe the area in the image for me. Include the coordinates for each mentioned object. For your reference, objects involved in this region include a couch, a chair, two desks, two lamps, and a vase.", "boxes_value": [[83.5053710592, 58.01483156479998, 417.52355957759994, 289.5796508672], [89.21936033279997, 61.021301247999986, 263.896484352, 165.1558227456], [115.41491696640003, 109.0478515712, 411.6943359744, 289.5796508672], [244.36218263040007, 102.77496335360001, 322.6923828480001, 127.9525756928], [83.5053710592, 77.59741209600003, 96.79357908479994, 111.1674804736], [99.5910644736, 46.125427251199994, 123.36987302399996, 76.89801026560002], [264.64404295680004, 58.01483156479998, 306.60668943359997, 111.866882304], [366.8756103168, 116.37133788160003, 417.52355957759994, 204.16101073919998]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6], [7]]}, {"image_path": "objects365_v1_00049298.jpg", "text": "What's going on in the section of contained within the bounding box ? Include the coordinates for each mentioned object.", "boxes_value": [[340.0104980276, 68.6790160896, 580.8579101283, 196.2626953216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049298_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Include the coordinates for each mentioned object.", "boxes_value": [[61.01049802760002, 32.6790160896, 301.8579101283, 160.2626953216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049298.jpg", "text": "What's going on in the section of contained within the bounding box ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a lamp, three people, and a cow.", "boxes_value": [[340.0104980276, 68.6790160896, 580.8579101283, 196.2626953216], [340.0104980276, 68.6790160896, 370.64208984690003, 95.2263794176], [468.694335968, 173.3964233216, 485.2723388606, 192.832763648], [432.6800537278, 177.3980102656, 456.11791990750004, 196.2626953216], [553.8913574375, 151.8421020672, 580.8579101283, 191.1361694208], [213.8212280459, 108.7564697088, 508.2875976292, 468.880859392]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049298_crop.jpg", "text": "What's going on in the section of contained within the bounding box ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a lamp, three people, and a cow.", "boxes_value": [[61.01049802760002, 32.6790160896, 301.8579101283, 160.2626953216], [61.01049802760002, 32.6790160896, 91.64208984690003, 59.2263794176], [189.69433596800002, 137.3964233216, 206.27233886059997, 156.832763648], [153.6800537278, 141.3980102656, 177.11791990750004, 160.2626953216], [274.8913574375, 115.84210206719999, 301.8579101283, 155.1361694208], [0, 72.7564697088, 229.28759762919998, 192]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049299.jpg", "text": "Please describe the area in the image for me. Remember to mention the objects and their corresponding locations.", "boxes_value": [[563.6356200959999, 272.2120971776, 599.3763427583999, 344.6611938304]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049299_crop.jpg", "text": "Please describe the area in the image for me. Remember to mention the objects and their corresponding locations.", "boxes_value": [[9.635620095999911, 18.212097177600015, 45.376342758399915, 90.66119383040001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049299.jpg", "text": "Please describe the area in the image for me. Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, a knife, a spoon, a plate, a wine glass, and a desk.", "boxes_value": [[563.6356200959999, 272.2120971776, 599.3763427583999, 344.6611938304], [567.5557861632, 177.0944214016, 761.8773193728, 452.5947875840001], [563.6356200959999, 314.8772582912, 593.4195556608, 344.6611938304], [575.9904784896, 316.6422729728, 599.3763427583999, 341.3518676992], [545.1357421824, 289.7937011712, 589.5128173824, 307.679504384], [572.1558838272, 272.2120971776, 599.2738036992, 321.8922729472], [333.5864257536, 237.9755249152, 656.1115723008, 511.0421752832]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00049299_crop.jpg", "text": "Please describe the area in the image for me. Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, a knife, a spoon, a plate, a wine glass, and a desk.", "boxes_value": [[9.635620095999911, 18.212097177600015, 45.376342758399915, 90.66119383040001], [13.555786163199969, 0, 54, 108], [9.635620095999911, 60.87725829120001, 39.41955566080003, 90.66119383040001], [21.990478489600036, 62.642272972800015, 45.376342758399915, 87.3518676992], [0, 35.793701171199984, 35.512817382400044, 53.67950438399998], [18.1558838272, 18.212097177600015, 45.273803699200016, 67.89227294720001], [0, 0, 54, 108]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00049300.jpg", "text": "Can you provide some context for the area within the picture ? Include the coordinates for each object you identify.", "boxes_value": [[442.84228512410004, 408.7935791104, 682.9296874722, 512.0626220544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049300_crop.jpg", "text": "Can you provide some context for the area within the picture ? Include the coordinates for each object you identify.", "boxes_value": [[60.842285124100044, 26.793579110400003, 300.92968747220004, 130]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049300.jpg", "text": "Can you provide some context for the area within the picture ? Include the coordinates for each object you identify. For your reference, objects involved in this region include five people.", "boxes_value": [[442.84228512410004, 408.7935791104, 682.9296874722, 512.0626220544], [442.84228512410004, 421.4035644416, 469.15893551679994, 454.0251464704], [469.29602051509994, 416.880432128, 482.5913086235, 449.7761230336], [493.4195556539, 408.7935791104, 514.3905029443, 452.6545410048], [645.9804687295, 421.6400146432, 662.0194091594, 453.5830077952], [649.5040283011, 412.2658691584, 682.9296874722, 512.0626220544]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049300_crop.jpg", "text": "Can you provide some context for the area within the picture ? Include the coordinates for each object you identify. For your reference, objects involved in this region include five people.", "boxes_value": [[60.842285124100044, 26.793579110400003, 300.92968747220004, 130], [60.842285124100044, 39.4035644416, 87.15893551679994, 72.02514647039999], [87.29602051509994, 34.880432127999995, 100.59130862350003, 67.77612303360002], [111.41955565389998, 26.793579110400003, 132.3905029443, 70.65454100480002], [263.9804687295, 39.640014643200004, 280.01940915939997, 71.58300779519999], [267.5040283011, 30.265869158399994, 300.92968747220004, 130]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049301.jpg", "text": "Could you please provide a description of the rectangular area in ? Include the coordinates for each object you identify.", "boxes_value": [[0.63720704, 327.3724365312, 56.6296996864, 475.52648924159996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049301_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Include the coordinates for each object you identify.", "boxes_value": [[0.63720704, 37.37243653119998, 56.6296996864, 185.52648924159996]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049301.jpg", "text": "Could you please provide a description of the rectangular area in ? Include the coordinates for each object you identify. For your reference, objects involved in this region include three chairs, a desk, and a handbag.", "boxes_value": [[0.63720704, 327.3724365312, 56.6296996864, 475.52648924159996], [0.63720704, 334.1566162176, 42.7006835712, 422.00598144], [2.9611816448, 327.3724365312, 43.3476562432, 401.5147705344], [32.2763672064, 344.5346679552, 54.5301513728, 414.6174316032], [13.5079955968, 391.01123043840005, 59.1953124864, 493.19348144639997], [29.5125122048, 414.695922816, 56.6296996864, 475.52648924159996]], "boxes_seq": [[0], [0], [1, 2, 4], [3], [5]]}, {"image_path": "objects365_v1_00049301_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Include the coordinates for each object you identify. For your reference, objects involved in this region include three chairs, a desk, and a handbag.", "boxes_value": [[0.63720704, 37.37243653119998, 56.6296996864, 185.52648924159996], [0.63720704, 44.1566162176, 42.7006835712, 132.00598144000003], [2.9611816448, 37.37243653119998, 43.3476562432, 111.51477053439999], [32.2763672064, 54.53466795520001, 54.5301513728, 124.61743160319998], [13.5079955968, 101.01123043840005, 59.1953124864, 203.19348144639997], [29.5125122048, 124.695922816, 56.6296996864, 185.52648924159996]], "boxes_seq": [[0], [0], [1, 2, 4], [3], [5]]}, {"image_path": "objects365_v1_00049304.jpg", "text": "What does the area look like in the context of the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[395.4730224693, 70.2921142784, 611.7449950914, 252.90789795839999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049304_crop.jpg", "text": "What does the area look like in the context of the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[54.47302246930002, 46.29211427840001, 270.7449950914, 228.90789795839999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049304.jpg", "text": "What does the area look like in the context of the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a tea pot, three glasses, two potted plants, a plate, and a person.", "boxes_value": [[395.4730224693, 70.2921142784, 611.7449950914, 252.90789795839999], [395.4730224693, 70.2921142784, 611.7449950914, 252.90789795839999], [460.2280273665, 69.4067993088, 537.9672851316, 99.0740356608], [464.3485107309, 95.7777099776, 534.1214599545001, 108.1390381056], [457.7557373016, 103.7438964736, 495.9385986666, 134.2352294912], [550.099731417, 187.3459472896, 570.9373779489, 216.3068237312], [440.1297607368, 212.437011712, 498.8588866854, 240.5906372096], [470.70532223159995, 185.1915283456, 496.7398681266, 214.2533569536], [503.2905273771, 199.9718017536, 524.8148193387, 235.1557006848]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 7], [6], [8]]}, {"image_path": "objects365_v1_00049304_crop.jpg", "text": "What does the area look like in the context of the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a tea pot, three glasses, two potted plants, a plate, and a person.", "boxes_value": [[54.47302246930002, 46.29211427840001, 270.7449950914, 228.90789795839999], [54.47302246930002, 46.29211427840001, 270.7449950914, 228.90789795839999], [119.22802736649999, 45.406799308800004, 196.96728513159997, 75.0740356608], [123.34851073089999, 71.7777099776, 193.1214599545001, 84.1390381056], [116.75573730159999, 79.7438964736, 154.9385986666, 110.23522949119999], [209.09973141700004, 163.3459472896, 229.9373779489, 192.3068237312], [99.1297607368, 188.437011712, 157.85888668540002, 216.5906372096], [129.70532223159995, 161.1915283456, 155.73986812660002, 190.2533569536], [162.29052737709998, 175.9718017536, 183.81481933869998, 211.1557006848]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5, 7], [6], [8]]}, {"image_path": "objects365_v1_00049305.jpg", "text": "Please enlighten me about the region in the given photo . Provide the coordinates for each element you describe.", "boxes_value": [[313.68878175559996, 255.7706909184, 351.61938475730005, 417.2435913216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049305_crop.jpg", "text": "Please enlighten me about the region in the given photo . Provide the coordinates for each element you describe.", "boxes_value": [[9.688781755599962, 40.77069091839999, 47.619384757300054, 202.24359132159998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049305.jpg", "text": "Please enlighten me about the region in the given photo . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two candles, and three bottles.", "boxes_value": [[313.68878175559996, 255.7706909184, 351.61938475730005, 417.2435913216], [343.5410155934, 297.3898315264, 360.9732665979, 404.5343627776], [336.3129882595, 305.8933715968, 351.61938475730005, 398.156738304], [313.68878175559996, 332.3466186752, 338.8558349611, 417.2435913216], [322.9946288892, 255.7706909184, 334.0275878897, 283.7471313408], [330.56396484649997, 286.8373412864, 353.108398453, 342.3117676032]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00049305_crop.jpg", "text": "Please enlighten me about the region in the given photo . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two candles, and three bottles.", "boxes_value": [[9.688781755599962, 40.77069091839999, 47.619384757300054, 202.24359132159998], [39.541015593400004, 82.38983152639997, 56.97326659790002, 189.5343627776], [32.3129882595, 90.89337159680002, 47.619384757300054, 183.156738304], [9.688781755599962, 117.34661867519998, 34.855834961100015, 202.24359132159998], [18.994628889199987, 40.77069091839999, 30.027587889699987, 68.74713134080002], [26.56396484649997, 71.83734128639998, 49.10839845300001, 127.3117676032]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00049306.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[614.211791974, 9.757507328, 724.8905029288, 221.7418823168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049306_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[28.211791973999993, 9.757507328, 138.89050292879995, 221.7418823168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049306.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three cabinets, a boots, a fire extinguisher, and two chairs.", "boxes_value": [[614.211791974, 9.757507328, 724.8905029288, 221.7418823168], [620.1496582292, 9.757507328, 680.8983154552001, 61.37109376], [614.211791974, 60.9143066624, 672.2199707128, 111.1576538112], [621.0631103224, 111.6144409088, 666.738891608, 137.1928100352], [636.59460452, 191.1666870272, 664.8685303064001, 221.7418823168], [706.6540527432, 66.8145752064, 724.8905029288, 99.4191894528], [576.0491943588, 129.6914062336, 675.6966552656, 223.22558592], [699.5386962672, 194.49285888, 714.822021492, 223.22558592]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00049306_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three cabinets, a boots, a fire extinguisher, and two chairs.", "boxes_value": [[28.211791973999993, 9.757507328, 138.89050292879995, 221.7418823168], [34.149658229199986, 9.757507328, 94.89831545520008, 61.37109376], [28.211791973999993, 60.9143066624, 86.21997071279998, 111.1576538112], [35.06311032240001, 111.6144409088, 80.73889160800002, 137.1928100352], [50.59460451999996, 191.1666870272, 78.86853030640009, 221.7418823168], [120.65405274320005, 66.8145752064, 138.89050292879995, 99.4191894528], [0, 129.6914062336, 89.69665526560004, 223.22558592], [113.53869626719995, 194.49285888, 128.82202149199998, 223.22558592]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6, 7]]}, {"image_path": "objects365_v1_00049307.jpg", "text": "What's the story in the section of the included visual ? Include the coordinates for each object you identify.", "boxes_value": [[1.89849856, 499.31079098879997, 512.59899904, 575.3326416384]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049307_crop.jpg", "text": "What's the story in the section of the included visual ? Include the coordinates for each object you identify.", "boxes_value": [[1.89849856, 19.310790988799965, 512, 95.33264163839999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049307.jpg", "text": "What's the story in the section of the included visual ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a carpet, two people, a sandals, and a leather shoes.", "boxes_value": [[1.89849856, 499.31079098879997, 512.59899904, 575.3326416384], [1.89849856, 506.101562496, 512.59899904, 575.3326416384], [61.5842285056, 243.85357662720003, 344.7144164864, 709.7738036736], [106.6984252928, 251.63183592960002, 428.720031744, 708.218139648], [422.6691283968, 499.31079098879997, 454.3436279296, 518.3154296832], [483.7336425984, 494.46191408640004, 511.5814819328, 521.0665283328]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049307_crop.jpg", "text": "What's the story in the section of the included visual ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a carpet, two people, a sandals, and a leather shoes.", "boxes_value": [[1.89849856, 19.310790988799965, 512, 95.33264163839999], [1.89849856, 26.101562495999985, 512, 95.33264163839999], [61.5842285056, 0, 344.7144164864, 114], [106.6984252928, 0, 428.720031744, 114], [422.6691283968, 19.310790988799965, 454.3436279296, 38.31542968320002], [483.7336425984, 14.461914086400043, 511.5814819328, 41.066528332799976]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049308.jpg", "text": "Could you please provide a description of the rectangular area in ? Please mention the objects and their locations.", "boxes_value": [[338.9525146624, 117.526306128, 432.9481201152, 352.000854513]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049308_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Please mention the objects and their locations.", "boxes_value": [[23.952514662400006, 59.526306128, 117.94812011520003, 294.000854513]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049308.jpg", "text": "Could you please provide a description of the rectangular area in ? Please mention the objects and their locations. For your reference, objects involved in this region include a lamp, a person, a faucet, a bottle, and a cup.", "boxes_value": [[338.9525146624, 117.526306128, 432.9481201152, 352.000854513], [338.9525146624, 117.526306128, 432.9481201152, 141.18505863], [141.9503173632, 67.060791028, 380.5858154496, 657.975952172], [381.41644288, 248.339965797, 393.6595459072, 273.008911098], [356.718139648, 290.7718506, 371.9155273216, 333.788207984], [376.7525024256, 332.741699213, 398.9746703872, 352.000854513]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049308_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Please mention the objects and their locations. For your reference, objects involved in this region include a lamp, a person, a faucet, a bottle, and a cup.", "boxes_value": [[23.952514662400006, 59.526306128, 117.94812011520003, 294.000854513], [23.952514662400006, 59.526306128, 117.94812011520003, 83.18505862999999], [0, 9.060791027999997, 65.58581544959998, 352], [66.41644287999998, 190.339965797, 78.65954590720003, 215.008911098], [41.718139647999976, 232.7718506, 56.915527321599996, 275.788207984], [61.75250242560003, 274.741699213, 83.97467038719998, 294.000854513]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049309.jpg", "text": "In the provided image , please explain the content within the region . Please mention the objects and their locations.", "boxes_value": [[14.9349364979, 72.7730102784, 146.2866210679, 243.4816894464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049309_crop.jpg", "text": "In the provided image , please explain the content within the region . Please mention the objects and their locations.", "boxes_value": [[14.9349364979, 42.773010278399994, 146.2866210679, 213.4816894464]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049309.jpg", "text": "In the provided image , please explain the content within the region . Please mention the objects and their locations. For your reference, objects involved in this region include three people, and two wild birds.", "boxes_value": [[14.9349364979, 72.7730102784, 146.2866210679, 243.4816894464], [93.8055419951, 72.7730102784, 146.2866210679, 231.8064574976], [46.0955200132, 85.4956664832, 94.6007080116, 141.1574096896], [0.7709350889000001, 87.0859985408, 61.9988403432, 183.3012695552], [78.1709594895, 77.554687488, 211.6692504967, 188.3528442368], [14.9349364979, 167.2741699072, 80.3328857533, 243.4816894464]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049309_crop.jpg", "text": "In the provided image , please explain the content within the region . Please mention the objects and their locations. For your reference, objects involved in this region include three people, and two wild birds.", "boxes_value": [[14.9349364979, 42.773010278399994, 146.2866210679, 213.4816894464], [93.8055419951, 42.773010278399994, 146.2866210679, 201.8064574976], [46.0955200132, 55.4956664832, 94.6007080116, 111.1574096896], [0.7709350889000001, 57.085998540800006, 61.9988403432, 153.3012695552], [78.1709594895, 47.554687488, 179, 158.3528442368], [14.9349364979, 137.2741699072, 80.3328857533, 213.4816894464]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049310.jpg", "text": "Could you tell me more about the area in the snapshot ? Specify the location of each mentioned object.", "boxes_value": [[333.574923913, 160.2291870208, 636.7174072490001, 398.8020019712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049310_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Specify the location of each mentioned object.", "boxes_value": [[76.57492391300002, 60.2291870208, 379.7174072490001, 298.8020019712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049310.jpg", "text": "Could you tell me more about the area in the snapshot ? Specify the location of each mentioned object. For your reference, objects involved in this region include a soccer, two people, and three sneakers.", "boxes_value": [[333.574923913, 160.2291870208, 636.7174072490001, 398.8020019712], [425.65698244300006, 259.6693725696, 456.838500965, 290.1422118912], [323.871948246, 178.2346801664, 438.094360319, 393.1752929792], [526.433837853, 160.2291870208, 636.7174072490001, 398.8020019712], [333.574923913, 369.470833664, 350.775863823, 392.4053314048], [402.924543252, 351.9969306624, 429.68145728200005, 385.5795880448], [596.974049518, 340.7559575552, 618.92912235, 381.188124928]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00049310_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Specify the location of each mentioned object. For your reference, objects involved in this region include a soccer, two people, and three sneakers.", "boxes_value": [[76.57492391300002, 60.2291870208, 379.7174072490001, 298.8020019712], [168.65698244300006, 159.6693725696, 199.83850096499998, 190.14221189120002], [66.87194824599999, 78.23468016640001, 181.09436031899997, 293.1752929792], [269.433837853, 60.2291870208, 379.7174072490001, 298.8020019712], [76.57492391300002, 269.470833664, 93.77586382300001, 292.4053314048], [145.92454325199998, 251.99693066240002, 172.68145728200005, 285.5795880448], [339.974049518, 240.75595755519998, 361.92912234999994, 281.188124928]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00049313.jpg", "text": "Describe the selected rectangular area in the photo . Include the coordinates for each mentioned object.", "boxes_value": [[547.1752929792, 304.2565918208, 712.6396484352, 498.2583007744]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049313_crop.jpg", "text": "Describe the selected rectangular area in the photo . Include the coordinates for each mentioned object.", "boxes_value": [[42.17529297919998, 49.2565918208, 207.6396484352, 243.25830077440003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049313.jpg", "text": "Describe the selected rectangular area in the photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include four people, and a sneakers.", "boxes_value": [[547.1752929792, 304.2565918208, 712.6396484352, 498.2583007744], [644.5483398143999, 351.9443969536, 712.6396484352, 498.2583007744], [601.8142089984, 304.2565918208, 631.0913085696, 365.1938476544], [685.5563964672, 322.3970947072, 701.5037842176, 352.7562866176], [547.1752929792, 316.21246336, 589.7396239871999, 416.1266479616], [646.538330112, 474.913391104, 672.2915039232, 487.217712384]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049313_crop.jpg", "text": "Describe the selected rectangular area in the photo . Include the coordinates for each mentioned object. For your reference, objects involved in this region include four people, and a sneakers.", "boxes_value": [[42.17529297919998, 49.2565918208, 207.6396484352, 243.25830077440003], [139.54833981439992, 96.9443969536, 207.6396484352, 243.25830077440003], [96.81420899839998, 49.2565918208, 126.09130856959996, 110.19384765439997], [180.5563964672, 67.39709470719998, 196.5037842176, 97.75628661759998], [42.17529297919998, 61.212463360000015, 84.73962398719993, 161.12664796159999], [141.53833011200004, 219.91339110400003, 167.29150392320003, 232.21771238399998]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049315.jpg", "text": "In the image , please describe the bounding box . Specify the location of each mentioned object.", "boxes_value": [[0, 137.7707519488, 104.6085205248, 512.6434326016]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049315_crop.jpg", "text": "In the image , please describe the bounding box . Specify the location of each mentioned object.", "boxes_value": [[0, 93.77075194880001, 104.6085205248, 468]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049315.jpg", "text": "In the image , please describe the bounding box . Specify the location of each mentioned object. For your reference, objects involved in this region include a lamp, a stool, a desk, a chair, and a trash bin can.", "boxes_value": [[0, 137.7707519488, 104.6085205248, 512.6434326016], [9.521057126399999, 137.7707519488, 42.5522460672, 192.2883300864], [57.84436032, 374.410766592, 86.3592529152, 412.9244384768], [0.0738525696, 357.0056152576, 54.1411133184, 411.8134765568], [88.931762688, 353.3850097664, 104.6085205248, 413.1977539072], [0, 478.2225341952, 58.83819578879999, 512.6434326016]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049315_crop.jpg", "text": "In the image , please describe the bounding box . Specify the location of each mentioned object. For your reference, objects involved in this region include a lamp, a stool, a desk, a chair, and a trash bin can.", "boxes_value": [[0, 93.77075194880001, 104.6085205248, 468], [9.521057126399999, 93.77075194880001, 42.5522460672, 148.2883300864], [57.84436032, 330.410766592, 86.3592529152, 368.9244384768], [0.0738525696, 313.0056152576, 54.1411133184, 367.8134765568], [88.931762688, 309.3850097664, 104.6085205248, 369.1977539072], [0, 434.2225341952, 58.83819578879999, 468]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049317.jpg", "text": "Please help me understand the content present within the rectangle in . Include the coordinates for each mentioned object.", "boxes_value": [[166.8919677392, 155.0534057472, 289.3394775024, 246.0032348672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049317_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Include the coordinates for each mentioned object.", "boxes_value": [[30.8919677392, 23.053405747200003, 153.33947750239997, 114.0032348672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049317.jpg", "text": "Please help me understand the content present within the rectangle in . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a lamp, and four flags.", "boxes_value": [[166.8919677392, 155.0534057472, 289.3394775024, 246.0032348672], [219.64843746480003, 155.0534057472, 238.0537109104, 239.7714233344], [166.8919677392, 161.6707153408, 194.8300781504, 223.840637184], [189.1693115328, 218.5000610304, 207.35693361839998, 246.0032348672], [262.7753906344, 164.47491456, 289.3394775024, 223.7332763648], [250.06091308560002, 215.3327026176, 271.17590328, 262.3306884608]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049317_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a lamp, and four flags.", "boxes_value": [[30.8919677392, 23.053405747200003, 153.33947750239997, 114.0032348672], [83.64843746480003, 23.053405747200003, 102.05371091040001, 107.7714233344], [30.8919677392, 29.6707153408, 58.8300781504, 91.840637184], [53.16931153280001, 86.50006103039999, 71.35693361839998, 114.0032348672], [126.77539063440003, 32.47491456, 153.33947750239997, 91.73327636479999], [114.06091308560002, 83.3327026176, 135.17590328, 130.3306884608]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049318.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Include the coordinates for each mentioned object.", "boxes_value": [[552.2655029379999, 55.834350592, 653.1801964449, 193.709865472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049318_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Include the coordinates for each mentioned object.", "boxes_value": [[25.26550293799994, 34.834350592, 126.18019644490005, 172.709865472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049318.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, two gloves, and two helmets.", "boxes_value": [[552.2655029379999, 55.834350592, 653.1801964449, 193.709865472], [581.5417480442, 63.038940416, 710.9312743839, 349.4805297664], [498.55395508400005, 53.2232055808, 610.0966796862999, 337.8800658944], [540.7802413196999, 166.7085191168, 575.9638138497, 201.0738690048], [583.7369287539001, 158.1171816448, 620.5569465107001, 193.709865472], [609.7650146772, 65.1358032384, 653.1801964449, 109.1530761728], [552.2655029379999, 55.834350592, 586.5115966422001, 98.9591064576]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00049318_crop.jpg", "text": "Would you kindly provide a description for the content within the rectangular area of ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, two gloves, and two helmets.", "boxes_value": [[25.26550293799994, 34.834350592, 126.18019644490005, 172.709865472], [54.54174804419995, 42.038940416, 151, 207], [0, 32.2232055808, 83.09667968629992, 207], [13.78024131969994, 145.7085191168, 48.963813849700045, 180.0738690048], [56.73692875390009, 137.1171816448, 93.55694651070007, 172.709865472], [82.76501467720004, 44.1358032384, 126.18019644490005, 88.1530761728], [25.26550293799994, 34.834350592, 59.51159664220006, 77.9591064576]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00049320.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[427.4017334248, 197.3543090688, 493.18164059500003, 500.11260984320006]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049320_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[17.4017334248, 76.3543090688, 83.18164059500003, 379.11260984320006]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049320.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, two high heels, and a sneakers.", "boxes_value": [[427.4017334248, 197.3543090688, 493.18164059500003, 500.11260984320006], [431.78710936299996, 168.5364990464, 514.4816894547, 452.95599365119995], [427.4017334248, 197.3543090688, 493.18164059500003, 500.11260984320006], [436.7478027016, 480.4523926016, 452.44262696830003, 499.8529662976], [457.45617678900004, 478.2725830144, 480.1264648248, 496.1472167936], [476.6943359361, 422.8522338816, 500.25622555869995, 445.8845825024]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049320_crop.jpg", "text": "In the given photograph , can you explain the area with coordinates ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, two high heels, and a sneakers.", "boxes_value": [[17.4017334248, 76.3543090688, 83.18164059500003, 379.11260984320006], [21.787109362999956, 47.536499046399996, 99, 331.95599365119995], [17.4017334248, 76.3543090688, 83.18164059500003, 379.11260984320006], [26.747802701599994, 359.4523926016, 42.44262696830003, 378.8529662976], [47.45617678900004, 357.2725830144, 70.1264648248, 375.1472167936], [66.6943359361, 301.8522338816, 90.25622555869995, 324.8845825024]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049321.jpg", "text": "Please enlighten me about the region in the given photo . Remember to mention the objects and their corresponding locations.", "boxes_value": [[231.70697025180002, 1.3937988096, 408.14929200740005, 394.311035136]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049321_crop.jpg", "text": "Please enlighten me about the region in the given photo . Remember to mention the objects and their corresponding locations.", "boxes_value": [[44.706970251800016, 1.3937988096, 221.14929200740005, 394.311035136]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049321.jpg", "text": "Please enlighten me about the region in the given photo . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two lamps, two wine glasses, a bottle, and a napkin.", "boxes_value": [[231.70697025180002, 1.3937988096, 408.14929200740005, 394.311035136], [231.70697025180002, 1.3937988096, 407.36230467980005, 120.33538816], [391.6269531294, 151.4772948992, 408.14929200740005, 217.1910400512], [271.8095703226, 356.4401855488, 297.13806151899996, 412.5381469696], [299.0141601324, 363.569702144, 320.40270995320003, 410.8495483392], [252.48480221379998, 381.0182495232, 271.4343261492, 411.9752807424], [293.7231445098, 373.5330810368, 341.1394042682, 394.311035136]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00049321_crop.jpg", "text": "Please enlighten me about the region in the given photo . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two lamps, two wine glasses, a bottle, and a napkin.", "boxes_value": [[44.706970251800016, 1.3937988096, 221.14929200740005, 394.311035136], [44.706970251800016, 1.3937988096, 220.36230467980005, 120.33538816], [204.62695312940002, 151.4772948992, 221.14929200740005, 217.1910400512], [84.8095703226, 356.4401855488, 110.13806151899996, 412.5381469696], [112.01416013239998, 363.569702144, 133.40270995320003, 410.8495483392], [65.48480221379998, 381.0182495232, 84.43432614919999, 411.9752807424], [106.72314450980002, 373.5330810368, 154.1394042682, 394.311035136]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00049322.jpg", "text": "I need details about the area located within image . Please point out the objects and their coordinates.", "boxes_value": [[144.216613772, 56.3507262976, 769.928796329, 303.4262262272]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049322_crop.jpg", "text": "I need details about the area located within image . Please point out the objects and their coordinates.", "boxes_value": [[144.216613772, 56.3507262976, 769.928796329, 303.4262262272]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049322.jpg", "text": "I need details about the area located within image . Please point out the objects and their coordinates. For your reference, objects involved in this region include a chair, a guitar, two people, a bracelet, and a hat.", "boxes_value": [[144.216613772, 56.3507262976, 769.928796329, 303.4262262272], [505.523559541, 191.0674438656, 622.47790528, 439.456115712], [529.1730340820001, 250.1964550144, 769.928796329, 303.4262262272], [11.750000030999999, 0.4675293184, 521.046020495, 511.7050781184], [144.216613772, 256.9878540288, 182.679016135, 302.8153686528], [536.975097659, 58.3348388864, 738.022216778, 441.5808105472], [612.09290989, 56.3507262976, 694.877688428, 107.2184817664]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4], [6]]}, {"image_path": "objects365_v1_00049322_crop.jpg", "text": "I need details about the area located within image . Please point out the objects and their coordinates. For your reference, objects involved in this region include a chair, a guitar, two people, a bracelet, and a hat.", "boxes_value": [[144.216613772, 56.3507262976, 769.928796329, 303.4262262272], [505.523559541, 191.0674438656, 622.47790528, 365], [529.1730340820001, 250.1964550144, 769.928796329, 303.4262262272], [11.750000030999999, 0.4675293184, 521.046020495, 365], [144.216613772, 256.9878540288, 182.679016135, 302.8153686528], [536.975097659, 58.3348388864, 738.022216778, 365], [612.09290989, 56.3507262976, 694.877688428, 107.2184817664]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4], [6]]}, {"image_path": "objects365_v1_00049323.jpg", "text": "Please explain what is contained in the portion of defined by the box . Please point out the objects and their coordinates.", "boxes_value": [[127.82342528, 451.456909211, 253.5549926912, 509.86791994600003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049323_crop.jpg", "text": "Please explain what is contained in the portion of defined by the box . Please point out the objects and their coordinates.", "boxes_value": [[31.823425279999995, 15.456909210999981, 157.5549926912, 73.86791994600003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049323.jpg", "text": "Please explain what is contained in the portion of defined by the box . Please point out the objects and their coordinates. For your reference, objects involved in this region include three people, and two backpacks.", "boxes_value": [[127.82342528, 451.456909211, 253.5549926912, 509.86791994600003], [122.0372924928, 449.713256839, 154.8672485376, 536.732543963], [149.5193481216, 461.918579122, 176.2548217856, 509.28674319600003], [214.9049072128, 451.456909211, 253.5549926912, 509.86791994600003], [127.82342528, 464.207519545, 146.8252563456, 491.96301269500003], [223.5362548736, 462.621459993, 243.7711792128, 492.97387693400003]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049323_crop.jpg", "text": "Please explain what is contained in the portion of defined by the box . Please point out the objects and their coordinates. For your reference, objects involved in this region include three people, and two backpacks.", "boxes_value": [[31.823425279999995, 15.456909210999981, 157.5549926912, 73.86791994600003], [26.037292492800006, 13.713256838999996, 58.867248537600005, 88], [53.51934812159999, 25.918579121999983, 80.2548217856, 73.28674319600003], [118.9049072128, 15.456909210999981, 157.5549926912, 73.86791994600003], [31.823425279999995, 28.207519545000025, 50.825256345599996, 55.96301269500003], [127.5362548736, 26.621459992999974, 147.7711792128, 56.97387693400003]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049324.jpg", "text": "Can you give me a description of the region in image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[71.173034691, 86.4658203136, 309.540466321, 471.646118144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049324_crop.jpg", "text": "Can you give me a description of the region in image ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[60.173034691, 86.4658203136, 298.540466321, 471.646118144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049324.jpg", "text": "Can you give me a description of the region in image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a chair, a desk, a flower, a vase, and two pillows.", "boxes_value": [[71.173034691, 86.4658203136, 309.540466321, 471.646118144], [71.173034691, 167.5969238528, 235.80072022899998, 471.646118144], [83.776123051, 210.9199828992, 383.099182115, 477.15997312], [201.89990236, 10.0757446144, 353.191955586, 231.8053588992], [239.102905263, 86.4658203136, 309.540466321, 231.8053588992], [85.745178203, 234.1884765696, 188.516479471, 265.1655883776], [82.829711909, 261.5212402176, 231.520080571, 307.804748544]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00049324_crop.jpg", "text": "Can you give me a description of the region in image ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a chair, a desk, a flower, a vase, and two pillows.", "boxes_value": [[60.173034691, 86.4658203136, 298.540466321, 471.646118144], [60.173034691, 167.5969238528, 224.80072022899998, 471.646118144], [72.776123051, 210.9199828992, 358, 477.15997312], [190.89990236, 10.0757446144, 342.191955586, 231.8053588992], [228.102905263, 86.4658203136, 298.540466321, 231.8053588992], [74.745178203, 234.1884765696, 177.516479471, 265.1655883776], [71.829711909, 261.5212402176, 220.520080571, 307.804748544]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00049327.jpg", "text": "Can you give me a visual rundown of the area in ? Please mention the objects and their locations.", "boxes_value": [[0, 199.617248512, 242.90643307669998, 358.3375854592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049327_crop.jpg", "text": "Can you give me a visual rundown of the area in ? Please mention the objects and their locations.", "boxes_value": [[0, 40.617248512, 242.90643307669998, 199.3375854592]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049327.jpg", "text": "Can you give me a visual rundown of the area in ? Please mention the objects and their locations. For your reference, objects involved in this region include four pictures, a glasses, and a hat.", "boxes_value": [[0, 199.617248512, 242.90643307669998, 358.3375854592], [213.4589233195, 268.9967041024, 242.90643307669998, 301.7910156288], [125.6153564195, 212.5971679744, 202.8563842939, 358.3375854592], [50.309814423199995, 254.4445190656, 104.9471435517, 307.2205810688], [0, 199.617248512, 20.9743041671, 297.306945792], [224.9156493968, 294.5693359616, 246.2880859432, 315.486450176], [0, 294.1130981376, 56.1918335086, 350.3889160192]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00049327_crop.jpg", "text": "Can you give me a visual rundown of the area in ? Please mention the objects and their locations. For your reference, objects involved in this region include four pictures, a glasses, and a hat.", "boxes_value": [[0, 40.617248512, 242.90643307669998, 199.3375854592], [213.4589233195, 109.99670410239997, 242.90643307669998, 142.79101562879998], [125.6153564195, 53.59716797440001, 202.8563842939, 199.3375854592], [50.309814423199995, 95.44451906559999, 104.9471435517, 148.22058106880002], [0, 40.617248512, 20.9743041671, 138.30694579200002], [224.9156493968, 135.56933596160002, 246.2880859432, 156.486450176], [0, 135.1130981376, 56.1918335086, 191.3889160192]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00049328.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Please point out the objects and their coordinates.", "boxes_value": [[424.04113766399996, 347.7987060736, 513.6684570624, 446.083251968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049328_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Please point out the objects and their coordinates.", "boxes_value": [[23.04113766399996, 24.798706073599988, 112.66845706239997, 123.08325196800001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049328.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Please point out the objects and their coordinates. For your reference, objects involved in this region include three wine glasses, a bottle, and a desk.", "boxes_value": [[424.04113766399996, 347.7987060736, 513.6684570624, 446.083251968], [481.58593751039996, 375.298034688, 513.6684570624, 446.083251968], [467.32702632959996, 347.7987060736, 504.50207516160003, 426.222656256], [449.5034179584, 371.7333373952, 484.6413573888, 438.444580096], [424.04113766399996, 380.8997192192, 454.5958252032, 435.3890991104], [92.5632324096, 251.1362304512, 673.82507328, 512.6593017344]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049328_crop.jpg", "text": "Can you help me by describing the part of that lies within the bounding box ? Please point out the objects and their coordinates. For your reference, objects involved in this region include three wine glasses, a bottle, and a desk.", "boxes_value": [[23.04113766399996, 24.798706073599988, 112.66845706239997, 123.08325196800001], [80.58593751039996, 52.29803468799997, 112.66845706239997, 123.08325196800001], [66.32702632959996, 24.798706073599988, 103.50207516160003, 103.222656256], [48.50341795840001, 48.73333739520001, 83.64135738879997, 115.44458009599998], [23.04113766399996, 57.899719219199994, 53.59582520319998, 112.38909911040002], [0, 0, 135, 147]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049329.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Provide the coordinates for all objects that you mention.", "boxes_value": [[143.34252928, 28.671508769400003, 382.50805663999995, 220.7058715869]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049329_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Provide the coordinates for all objects that you mention.", "boxes_value": [[60.34252928000001, 28.671508769400003, 299.50805663999995, 220.7058715869]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049329.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four lamps, a cabinet, a picture, and a person.", "boxes_value": [[143.34252928, 28.671508769400003, 382.50805663999995, 220.7058715869], [207.605957056, 192.2412719514, 258.75329587199997, 220.7058715869], [162.685241728, 132.7423705818, 200.934570304, 328.4364624114], [267.048828096, 74.0304565644, 295.913635264, 114.1412963754], [363.764648448, 68.0325927519, 382.50805663999995, 85.2764892672], [260.301208512, 50.4138183465, 285.042480448, 69.9069213927], [143.34252928, 28.671508769400003, 166.95922854399998, 50.788696284900006], [270.773437504, 80.48663328090001, 292.014343232, 110.8692016698]], "boxes_seq": [[0], [0], [1, 4, 5, 6], [2], [3], [7]]}, {"image_path": "objects365_v1_00049329_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four lamps, a cabinet, a picture, and a person.", "boxes_value": [[60.34252928000001, 28.671508769400003, 299.50805663999995, 220.7058715869], [124.605957056, 192.2412719514, 175.75329587199997, 220.7058715869], [79.685241728, 132.7423705818, 117.934570304, 268], [184.04882809600002, 74.0304565644, 212.913635264, 114.1412963754], [280.764648448, 68.0325927519, 299.50805663999995, 85.2764892672], [177.30120851200002, 50.4138183465, 202.042480448, 69.9069213927], [60.34252928000001, 28.671508769400003, 83.95922854399998, 50.788696284900006], [187.77343750400001, 80.48663328090001, 209.014343232, 110.8692016698]], "boxes_seq": [[0], [0], [1, 4, 5, 6], [2], [3], [7]]}, {"image_path": "objects365_v1_00049331.jpg", "text": "In the provided image , would you mind describing the selected area ? Please point out the objects and their coordinates.", "boxes_value": [[549.625366213, 226.4061889536, 755.6783447290001, 378.3869628928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049331_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Please point out the objects and their coordinates.", "boxes_value": [[51.62536621300001, 38.40618895360001, 257.67834472900006, 190.3869628928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049331.jpg", "text": "In the provided image , would you mind describing the selected area ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, a glasses, a tie, a tea pot, and a cup.", "boxes_value": [[549.625366213, 226.4061889536, 755.6783447290001, 378.3869628928], [501.64526365800003, 192.4152221696, 673.197631799, 350.8385619968], [549.625366213, 226.4061889536, 602.109130854, 240.7390747136], [570.070922883, 278.046813952, 594.099609411, 340.0155029504], [673.213012703, 300.1441650176, 730.257568348, 371.8017577984], [726.536621118, 337.280700672, 755.6783447290001, 378.3869628928]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049331_crop.jpg", "text": "In the provided image , would you mind describing the selected area ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, a glasses, a tie, a tea pot, and a cup.", "boxes_value": [[51.62536621300001, 38.40618895360001, 257.67834472900006, 190.3869628928], [3.645263658000033, 4.415222169600014, 175.19763179899996, 162.83856199680002], [51.62536621300001, 38.40618895360001, 104.109130854, 52.73907471359999], [72.07092288299998, 90.04681395199998, 96.09960941099996, 152.0155029504], [175.213012703, 112.14416501760002, 232.25756834799995, 183.8017577984], [228.53662111799997, 149.28070067200002, 257.67834472900006, 190.3869628928]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049333.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[0.0146484376, 327.3513183744, 318.0589599876, 423.5974731264]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049333_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[0.0146484376, 24.351318374400023, 318.0589599876, 120.59747312640002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049333.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a desk, a carpet, a book, a cup, and a laptop.", "boxes_value": [[0.0146484376, 327.3513183744, 318.0589599876, 423.5974731264], [0, 362.2800903168, 219.7097778407, 511.2076416], [251.04705810040002, 339.8977660928, 318.0589599876, 365.4260864512], [110.7554321041, 360.2087402496, 187.31402591230002, 387.8054199296], [75.1322631888, 344.4309692416, 98.1699218986, 385.039672832], [0.0146484376, 327.3513183744, 128.67413333, 423.5974731264]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049333_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a desk, a carpet, a book, a cup, and a laptop.", "boxes_value": [[0.0146484376, 24.351318374400023, 318.0589599876, 120.59747312640002], [0, 59.2800903168, 219.7097778407, 144], [251.04705810040002, 36.8977660928, 318.0589599876, 62.42608645119998], [110.7554321041, 57.2087402496, 187.31402591230002, 84.80541992960002], [75.1322631888, 41.430969241599996, 98.1699218986, 82.03967283200001], [0.0146484376, 24.351318374400023, 128.67413333, 120.59747312640002]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049334.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Please point out the objects and their coordinates.", "boxes_value": [[356.6094970461, 62.8052368384, 547.480957042, 272.3908691456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049334_crop.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Please point out the objects and their coordinates.", "boxes_value": [[48.60949704609999, 52.8052368384, 239.480957042, 262.3908691456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049334.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Please point out the objects and their coordinates. For your reference, objects involved in this region include two lamps, a vase, and two pictures.", "boxes_value": [[356.6094970461, 62.8052368384, 547.480957042, 272.3908691456], [356.6094970461, 177.2251587072, 398.8524170044, 257.6878662144], [445.61596675469997, 199.2852783104, 486.30004883169994, 270.482360832], [360.05981443769997, 108.344421376, 390.5728759499, 138.8574829056], [524.2042236098, 62.8052368384, 547.480957042, 130.911193856], [502.83996577820005, 240.1174926848, 526.0767821967, 272.3908691456]], "boxes_seq": [[0], [0], [1, 3], [2], [4, 5]]}, {"image_path": "objects365_v1_00049334_crop.jpg", "text": "Give me a vivid description of what's happening in the area within the snapshot . Please point out the objects and their coordinates. For your reference, objects involved in this region include two lamps, a vase, and two pictures.", "boxes_value": [[48.60949704609999, 52.8052368384, 239.480957042, 262.3908691456], [48.60949704609999, 167.2251587072, 90.85241700440002, 247.6878662144], [137.61596675469997, 189.2852783104, 178.30004883169994, 260.482360832], [52.05981443769997, 98.344421376, 82.57287594989998, 128.8574829056], [216.2042236098, 52.8052368384, 239.480957042, 120.91119385600001], [194.83996577820005, 230.1174926848, 218.07678219670004, 262.3908691456]], "boxes_seq": [[0], [0], [1, 3], [2], [4, 5]]}, {"image_path": "objects365_v1_00049338.jpg", "text": "Describe the visual elements within the selected area of the image . Give coordinates for the items you reference.", "boxes_value": [[179.2239990312, 70.5852661248, 328.75354002489996, 204.35913088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049338_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Give coordinates for the items you reference.", "boxes_value": [[38.22399903120001, 33.5852661248, 187.75354002489996, 167.35913088]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049338.jpg", "text": "Describe the visual elements within the selected area of the image . Give coordinates for the items you reference. For your reference, objects involved in this region include a lamp, two potted plants, two umbrellas, and a traffic light.", "boxes_value": [[179.2239990312, 70.5852661248, 328.75354002489996, 204.35913088], [297.3095703159, 129.8192749056, 328.75354002489996, 174.2401733632], [224.26770020549998, 70.5852661248, 255.95098874330003, 128.6076660224], [179.2239990312, 81.273559552, 205.94482423589997, 134.7152710144], [215.2779540777, 187.5115356672, 275.5746459991, 204.35913088], [280.4515990982, 184.4080200192, 340.5266113488, 201.255615232], [195.67840576569998, 147.7539673088, 214.2828369426, 181.9185180672]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00049338_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Give coordinates for the items you reference. For your reference, objects involved in this region include a lamp, two potted plants, two umbrellas, and a traffic light.", "boxes_value": [[38.22399903120001, 33.5852661248, 187.75354002489996, 167.35913088], [156.30957031589998, 92.8192749056, 187.75354002489996, 137.2401733632], [83.26770020549998, 33.5852661248, 114.95098874330003, 91.6076660224], [38.22399903120001, 44.273559551999995, 64.94482423589997, 97.7152710144], [74.2779540777, 150.5115356672, 134.57464599910003, 167.35913088], [139.4515990982, 147.4080200192, 199.52661134879997, 164.255615232], [54.67840576569998, 110.75396730879999, 73.2828369426, 144.9185180672]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00049339.jpg", "text": "Could you give me a description of the rectangular region found in ? Include the coordinates for each object you identify.", "boxes_value": [[368.8925781504, 295.5426635776, 665.1805420032, 511.6667480576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049339_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Include the coordinates for each object you identify.", "boxes_value": [[74.89257815040003, 54.542663577600024, 371.1805420032, 270.6667480576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049339.jpg", "text": "Could you give me a description of the rectangular region found in ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, and six chairs.", "boxes_value": [[368.8925781504, 295.5426635776, 665.1805420032, 511.6667480576], [257.10986327039996, 327.0281982464, 614.1663818495999, 512.124389632], [368.8925781504, 295.5426635776, 418.53588864, 333.414855936], [504.6257324544, 295.9721679872, 557.70532224, 335.0141601792], [594.1153564415999, 313.0804443136, 665.1805420032, 406.0794067456], [444.13537597440006, 389.2190551552, 626.2530517248, 511.6667480576], [609.7816161792, 357.4195556864, 701.41760256, 511.7755737088], [620.4368896512, 336.6416015872, 700.3520507904, 496.4718017536]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6, 7]]}, {"image_path": "objects365_v1_00049339_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, and six chairs.", "boxes_value": [[74.89257815040003, 54.542663577600024, 371.1805420032, 270.6667480576], [0, 86.0281982464, 320.16638184959993, 271], [74.89257815040003, 54.542663577600024, 124.53588864, 92.41485593599998], [210.6257324544, 54.97216798720001, 263.70532224, 94.01416017920002], [300.11535644159994, 72.08044431360003, 371.1805420032, 165.0794067456], [150.13537597440006, 148.2190551552, 332.2530517248, 270.6667480576], [315.7816161792, 116.41955568639997, 407.41760256, 270.7755737088], [326.4368896512, 95.6416015872, 406.35205079039997, 255.4718017536]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6, 7]]}, {"image_path": "objects365_v1_00049341.jpg", "text": "I'd like a thorough description of the area in the image . Include the coordinates for each object you identify.", "boxes_value": [[247.27728271400002, 332.1441039872, 395.58215335099993, 383.5904541184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049341_crop.jpg", "text": "I'd like a thorough description of the area in the image . Include the coordinates for each object you identify.", "boxes_value": [[37.27728271400002, 13.144103987200026, 185.58215335099993, 64.59045411839998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049341.jpg", "text": "I'd like a thorough description of the area in the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include three chairs, a desk, and an umbrella.", "boxes_value": [[247.27728271400002, 332.1441039872, 395.58215335099993, 383.5904541184], [247.27728271400002, 332.1441039872, 297.02294922299996, 383.5904541184], [303.400634768, 332.994445824, 345.493164093, 383.5904541184], [338.26513674800003, 333.4196167168, 378.656860351, 383.5904541184], [372.402709987, 357.7107544064, 395.58215335099993, 382.4967651328], [302.653930656, 262.642456064, 413.358520498, 377.447204608]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049341_crop.jpg", "text": "I'd like a thorough description of the area in the image . Include the coordinates for each object you identify. For your reference, objects involved in this region include three chairs, a desk, and an umbrella.", "boxes_value": [[37.27728271400002, 13.144103987200026, 185.58215335099993, 64.59045411839998], [37.27728271400002, 13.144103987200026, 87.02294922299996, 64.59045411839998], [93.40063476799997, 13.994445824000024, 135.49316409300002, 64.59045411839998], [128.26513674800003, 14.419616716800022, 168.65686035099998, 64.59045411839998], [162.40270998699998, 38.710754406399985, 185.58215335099993, 63.49676513280002], [92.653930656, 0, 203.35852049800002, 58.44720460799999]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049342.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for all objects that you mention.", "boxes_value": [[457.47253418400004, 115.173278832, 711.328491216, 279.180053712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049342_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for all objects that you mention.", "boxes_value": [[63.47253418400004, 41.173278831999994, 317.328491216, 205.18005371200002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049342.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include five people.", "boxes_value": [[457.47253418400004, 115.173278832, 711.328491216, 279.180053712], [683.754760728, 115.173278832, 711.328491216, 173.928588864], [671.900634792, 118.78106688, 686.331787104, 177.79406735999999], [578.356079112, 145.324035648, 666.820312488, 279.180053712], [487.90051272, 136.29028319999998, 523.6837158239999, 174.264404304], [457.47253418400004, 142.86273192, 491.064941376, 175.238098128]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049342_crop.jpg", "text": "Care to elaborate on the targeted area in the visual illustration ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include five people.", "boxes_value": [[63.47253418400004, 41.173278831999994, 317.328491216, 205.18005371200002], [289.75476072799995, 41.173278831999994, 317.328491216, 99.928588864], [277.900634792, 44.78106688, 292.331787104, 103.79406735999999], [184.35607911199997, 71.324035648, 272.820312488, 205.18005371200002], [93.90051272, 62.290283199999976, 129.68371582399993, 100.26440430400001], [63.47253418400004, 68.86273191999999, 97.06494137599998, 101.23809812799999]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049343.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Provide the coordinates for all objects that you mention.", "boxes_value": [[203.298950166, 415.0415038976, 486.270751934, 455.1051635712]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049343_crop.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Provide the coordinates for all objects that you mention.", "boxes_value": [[71.298950166, 10.041503897600023, 354.270751934, 50.10516357120002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049343.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four people, and two bicycles.", "boxes_value": [[203.298950166, 415.0415038976, 486.270751934, 455.1051635712], [474.71765134000003, 415.6929931776, 486.270751934, 442.3701171712], [348.092163112, 415.0415038976, 357.219238278, 448.836242688], [360.179321298, 415.0415038976, 370.29309080400003, 447.109497088], [419.18298338200003, 416.6423340032, 430.96313472599996, 449.4453735424], [203.298950166, 433.31628416, 232.49328614400002, 455.1051635712], [411.643676766, 432.5136718848, 444.004150388, 450.0413208064]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00049343_crop.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include four people, and two bicycles.", "boxes_value": [[71.298950166, 10.041503897600023, 354.270751934, 50.10516357120002], [342.71765134000003, 10.692993177599988, 354.270751934, 37.37011717119998], [216.09216311199998, 10.041503897600023, 225.21923827799998, 43.83624268800003], [228.179321298, 10.041503897600023, 238.29309080400003, 42.10949708800001], [287.18298338200003, 11.642334003200006, 298.96313472599996, 44.44537354239998], [71.298950166, 28.31628416000001, 100.49328614400002, 50.10516357120002], [279.643676766, 27.513671884799976, 312.004150388, 45.041320806399995]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5, 6]]}, {"image_path": "objects365_v1_00049344.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Give coordinates for the items you reference.", "boxes_value": [[268.05651858, 130.4718628, 722.11791996, 427.76428225]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049344_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Give coordinates for the items you reference.", "boxes_value": [[114.05651857999999, 74.4718628, 568.11791996, 371.76428225]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049344.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Give coordinates for the items you reference. For your reference, objects involved in this region include a cabinet, two plates, two bowls, and a bottle.", "boxes_value": [[268.05651858, 130.4718628, 722.11791996, 427.76428225], [268.05651858, 130.4718628, 722.11791996, 427.76428225], [387.70239258, 290.78009035, 423.92248533000003, 302.15502929999997], [486.81579590999996, 281.82293699999997, 524.1821289300001, 303.4281616], [437.85314937, 281.50079345, 471.03186033000003, 303.16101075], [572.37048342, 152.0484009, 587.38220217, 180.6003418], [591.1575927299999, 166.72058105000002, 631.24853517, 176.74328615000002]], "boxes_seq": [[0], [0], [1], [2, 6], [3, 4], [5]]}, {"image_path": "objects365_v1_00049344_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Give coordinates for the items you reference. For your reference, objects involved in this region include a cabinet, two plates, two bowls, and a bottle.", "boxes_value": [[114.05651857999999, 74.4718628, 568.11791996, 371.76428225], [114.05651857999999, 74.4718628, 568.11791996, 371.76428225], [233.70239257999998, 234.78009035000002, 269.92248533000003, 246.15502929999997], [332.81579590999996, 225.82293699999997, 370.1821289300001, 247.4281616], [283.85314937, 225.50079345, 317.03186033000003, 247.16101075], [418.37048342, 96.04840089999999, 433.38220217, 124.6003418], [437.1575927299999, 110.72058105000002, 477.24853516999997, 120.74328615000002]], "boxes_seq": [[0], [0], [1], [2, 6], [3, 4], [5]]}, {"image_path": "objects365_v1_00049347.jpg", "text": "I'd like a thorough description of the area in the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[392.07775879999997, 387.092956536, 537.940307645, 425.3872680585]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049347_crop.jpg", "text": "I'd like a thorough description of the area in the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[37.07775879999997, 10.092956535999974, 182.94030764499996, 48.38726805850001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049347.jpg", "text": "I'd like a thorough description of the area in the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, three sneakers, and a skiboard.", "boxes_value": [[392.07775879999997, 387.092956536, 537.940307645, 425.3872680585], [447.07055666, 141.62695311599998, 551.58483887, 423.751281723], [520.29907228, 387.092956536, 537.940307645, 409.03686524249997], [480.71398927499996, 398.28002930099996, 504.80932619, 425.3872680585], [392.07775879999997, 390.104858385, 414.02160644, 413.339599629], [472.71826173, 351.937805175, 545.1729736049999, 455.1696777375]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049347_crop.jpg", "text": "I'd like a thorough description of the area in the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, three sneakers, and a skiboard.", "boxes_value": [[37.07775879999997, 10.092956535999974, 182.94030764499996, 48.38726805850001], [92.07055666000002, 0, 196.58483887, 46.75128172299998], [165.29907228000002, 10.092956535999974, 182.94030764499996, 32.03686524249997], [125.71398927499996, 21.280029300999956, 149.80932618999998, 48.38726805850001], [37.07775879999997, 13.104858385, 59.02160644000003, 36.33959962900002], [117.71826173, 0, 190.17297360499992, 57]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049351.jpg", "text": "What can I find in the bbox of the provided image ? Include the coordinates for each object you identify.", "boxes_value": [[401.8577880842, 129.5566406144, 480.89367676, 187.6898193408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049351_crop.jpg", "text": "What can I find in the bbox of the provided image ? Include the coordinates for each object you identify.", "boxes_value": [[19.857788084200024, 14.55664061440001, 98.89367676, 72.6898193408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049351.jpg", "text": "What can I find in the bbox of the provided image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, two people, a telephone, and a keyboard.", "boxes_value": [[401.8577880842, 129.5566406144, 480.89367676, 187.6898193408], [384.8314819321, 115.5474243072, 427.630249025, 163.101623552], [416.6027832113, 83.3788452352, 542.8892822248, 305.1502685696], [163.5655517755, 152.3869628928, 519.6928711107, 512.3089599488], [439.68444821979995, 133.386108416, 480.89367676, 187.6898193408], [401.8577880842, 129.5566406144, 436.9727172888, 178.2644653568]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049351_crop.jpg", "text": "What can I find in the bbox of the provided image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, two people, a telephone, and a keyboard.", "boxes_value": [[19.857788084200024, 14.55664061440001, 98.89367676, 72.6898193408], [2.8314819321000186, 0.5474243072000036, 45.63024902500001, 48.10162355200001], [34.60278321129999, 0, 118, 87], [0, 37.3869628928, 118, 87], [57.68444821979995, 18.386108416000013, 98.89367676, 72.6898193408], [19.857788084200024, 14.55664061440001, 54.97271728880003, 63.2644653568]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049352.jpg", "text": "Please describe the region in the picture . Give coordinates for the items you reference.", "boxes_value": [[114.22985840639998, 87.423584, 388.60144043519995, 465.273559552]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049352_crop.jpg", "text": "Please describe the region in the picture . Give coordinates for the items you reference.", "boxes_value": [[69.22985840639998, 87.423584, 343.60144043519995, 465.273559552]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049352.jpg", "text": "Please describe the region in the picture . Give coordinates for the items you reference. For your reference, objects involved in this region include two people, a hat, two leather shoes, a sneakers, and a horse.", "boxes_value": [[114.22985840639998, 87.423584, 388.60144043519995, 465.273559552], [129.5290526976, 46.4569091584, 406.2531738624, 378.8439330816], [72.7058105088, 147.2506103296, 230.44439700479998, 379.1846923776], [85.6571655168, 146.9218139648, 159.0426025728, 182.1954956288], [129.7285156608, 292.3361816576, 159.97167966720002, 312.84002688], [328.615844736, 357.4359130624, 363.984985344, 376.914550784], [112.02166748159999, 362.823120128, 143.2130126592, 379.4584961024], [114.22985840639998, 87.423584, 388.60144043519995, 465.273559552]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6], [7]]}, {"image_path": "objects365_v1_00049352_crop.jpg", "text": "Please describe the region in the picture . Give coordinates for the items you reference. For your reference, objects involved in this region include two people, a hat, two leather shoes, a sneakers, and a horse.", "boxes_value": [[69.22985840639998, 87.423584, 343.60144043519995, 465.273559552], [84.52905269760001, 46.4569091584, 361.2531738624, 378.8439330816], [27.7058105088, 147.2506103296, 185.44439700479998, 379.1846923776], [40.657165516800006, 146.9218139648, 114.0426025728, 182.1954956288], [84.72851566080001, 292.3361816576, 114.97167966720002, 312.84002688], [283.615844736, 357.4359130624, 318.984985344, 376.914550784], [67.02166748159999, 362.823120128, 98.21301265919999, 379.4584961024], [69.22985840639998, 87.423584, 343.60144043519995, 465.273559552]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6], [7]]}, {"image_path": "objects365_v1_00049353.jpg", "text": "What insights can you provide about the area in the selected picture ? Include the coordinates for each mentioned object.", "boxes_value": [[559.303466784, 56.3063354368, 839.315917916, 337.9335327232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049353_crop.jpg", "text": "What insights can you provide about the area in the selected picture ? Include the coordinates for each mentioned object.", "boxes_value": [[70.30346678399997, 56.3063354368, 350.315917916, 337.9335327232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049353.jpg", "text": "What insights can you provide about the area in the selected picture ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a chair, a pillow, two lamps, and a plate.", "boxes_value": [[559.303466784, 56.3063354368, 839.315917916, 337.9335327232], [656.292724618, 181.3584594944, 826.352172888, 309.638671872], [688.3599853640001, 236.3034668032, 787.3623046719999, 273.4833984512], [803.189331028, 56.3063354368, 835.9842529340001, 135.2363891712], [559.303466784, 74.5476073984, 578.886108404, 133.295288064], [768.8967284600001, 309.0951538176, 839.315917916, 337.9335327232]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049353_crop.jpg", "text": "What insights can you provide about the area in the selected picture ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a chair, a pillow, two lamps, and a plate.", "boxes_value": [[70.30346678399997, 56.3063354368, 350.315917916, 337.9335327232], [167.29272461799997, 181.3584594944, 337.35217288800004, 309.638671872], [199.35998536400007, 236.3034668032, 298.36230467199994, 273.4833984512], [314.18933102799997, 56.3063354368, 346.9842529340001, 135.2363891712], [70.30346678399997, 74.5476073984, 89.88610840399997, 133.295288064], [279.8967284600001, 309.0951538176, 350.315917916, 337.9335327232]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049354.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Provide the coordinates for all objects that you mention.", "boxes_value": [[137.469909675, 238.1155395382, 265.9953003, 486.4935302862]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049354_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Provide the coordinates for all objects that you mention.", "boxes_value": [[32.469909675, 62.11553953820001, 160.9953003, 310.4935302862]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049354.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three people, and two hockey sticks.", "boxes_value": [[137.469909675, 238.1155395382, 265.9953003, 486.4935302862], [162.422180175, 393.995422378, 230.6867676, 501.8638305422], [153.7438965, 260.8991088808, 231.4971924, 370.28790280879997], [137.469909675, 238.1155395382, 200.395935075, 337.3868408306], [212.78302005, 301.8807373008, 265.9953003, 341.6655883836], [219.573913575, 464.3875732442, 248.94763185, 486.4935302862]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049354_crop.jpg", "text": "Help me visualize the section of the photo enclosed by the bounding box . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three people, and two hockey sticks.", "boxes_value": [[32.469909675, 62.11553953820001, 160.9953003, 310.4935302862], [57.422180174999994, 217.995422378, 125.6867676, 325.8638305422], [48.743896500000005, 84.89910888079999, 126.49719239999999, 194.28790280879997], [32.469909675, 62.11553953820001, 95.39593507500001, 161.38684083060002], [107.78302005, 125.88073730079998, 160.9953003, 165.66558838359998], [114.573913575, 288.3875732442, 143.94763185, 310.4935302862]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049355.jpg", "text": "Please enlighten me about the region in the given photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[280.80139161600005, 132.816711424, 530.9377441536, 409.9492187648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049355_crop.jpg", "text": "Please enlighten me about the region in the given photo . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[62.801391616000046, 69.816711424, 312.93774415359997, 346.9492187648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049355.jpg", "text": "Please enlighten me about the region in the given photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two lamps, a flag, a slippers, and a street lights.", "boxes_value": [[280.80139161600005, 132.816711424, 530.9377441536, 409.9492187648], [518.3898925824001, 181.1677856256, 530.9377441536, 206.2635497984], [440.374877952, 178.4400024576, 452.3771972352, 205.1724242944], [512.1341552639999, 132.816711424, 527.6386718976, 162.8953857536], [514.6885986048001, 384.3884887552, 527.25231936, 409.9492187648], [280.80139161600005, 154.781860352, 319.9423828224, 345.4945678848]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049355_crop.jpg", "text": "Please enlighten me about the region in the given photo . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two lamps, a flag, a slippers, and a street lights.", "boxes_value": [[62.801391616000046, 69.816711424, 312.93774415359997, 346.9492187648], [300.3898925824001, 118.1677856256, 312.93774415359997, 143.2635497984], [222.37487795200002, 115.4400024576, 234.37719723520001, 142.1724242944], [294.1341552639999, 69.816711424, 309.63867189760003, 99.89538575360001], [296.68859860480006, 321.3884887552, 309.25231936, 346.9492187648], [62.801391616000046, 91.781860352, 101.94238282240002, 282.4945678848]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049356.jpg", "text": "Can you provide some context for the area within the picture ? Please point out the objects and their coordinates.", "boxes_value": [[51.2150268778, 253.1202392576, 220.35668945839998, 511.367004416]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049356_crop.jpg", "text": "Can you provide some context for the area within the picture ? Please point out the objects and their coordinates.", "boxes_value": [[43.2150268778, 65.12023925759999, 212.35668945839998, 323.367004416]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049356.jpg", "text": "Can you provide some context for the area within the picture ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, a belt, and two sneakers.", "boxes_value": [[51.2150268778, 253.1202392576, 220.35668945839998, 511.367004416], [110.2363281516, 253.1202392576, 220.35668945839998, 511.367004416], [51.2150268778, 292.6077880832, 69.9039306806, 350.1402587648], [127.09887698680001, 361.7600097792, 173.76428220940002, 375.7370605568], [110.651184071, 481.1837768704, 141.13336180320002, 505.0949096448], [160.4813232612, 484.651794432, 183.8449096706, 511.1183471616]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00049356_crop.jpg", "text": "Can you provide some context for the area within the picture ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, a belt, and two sneakers.", "boxes_value": [[43.2150268778, 65.12023925759999, 212.35668945839998, 323.367004416], [102.2363281516, 65.12023925759999, 212.35668945839998, 323.367004416], [43.2150268778, 104.60778808319998, 61.9039306806, 162.14025876480002], [119.09887698680001, 173.7600097792, 165.76428220940002, 187.73706055679997], [102.651184071, 293.1837768704, 133.13336180320002, 317.0949096448], [152.4813232612, 296.651794432, 175.8449096706, 323.1183471616]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00049358.jpg", "text": "I am interested in the region of the image ; please describe it. Please point out the objects and their coordinates.", "boxes_value": [[177.0678710824, 163.414553088, 338.7045898228, 361.0053100544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049358_crop.jpg", "text": "I am interested in the region of the image ; please describe it. Please point out the objects and their coordinates.", "boxes_value": [[41.06787108239999, 49.41455308799999, 202.70458982280002, 247.0053100544]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049358.jpg", "text": "I am interested in the region of the image ; please describe it. Please point out the objects and their coordinates. For your reference, objects involved in this region include a paddle, two people, a helmet, and a boat.", "boxes_value": [[177.0678710824, 163.414553088, 338.7045898228, 361.0053100544], [177.0678710824, 222.8465575936, 338.7045898228, 361.0053100544], [135.07354739090002, 136.7066650624, 264.8596191159, 301.8889770496], [206.5599975586, 164.4683837952, 361.3316650478, 330.3447876096], [287.77981999220003, 163.414553088, 325.7855590797, 191.9188574208], [100.3713989288, 211.3163451904, 691.0021972858001, 464.6421508608]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049358_crop.jpg", "text": "I am interested in the region of the image ; please describe it. Please point out the objects and their coordinates. For your reference, objects involved in this region include a paddle, two people, a helmet, and a boat.", "boxes_value": [[41.06787108239999, 49.41455308799999, 202.70458982280002, 247.0053100544], [41.06787108239999, 108.84655759360001, 202.70458982280002, 247.0053100544], [0, 22.706665062399992, 128.8596191159, 187.8889770496], [70.5599975586, 50.4683837952, 225.33166504780002, 216.34478760960002], [151.77981999220003, 49.41455308799999, 189.78555907970002, 77.91885742080001], [0, 97.3163451904, 243, 296]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049359.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Include the coordinates for each mentioned object.", "boxes_value": [[98.143493632, 77.6333007534, 351.3205566464, 207.3728637498]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049359_crop.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Include the coordinates for each mentioned object.", "boxes_value": [[64.143493632, 32.63330075339999, 317.3205566464, 162.3728637498]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049359.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, a handbag, two skating and skiing shoes, a car, and a suv.", "boxes_value": [[98.143493632, 77.6333007534, 351.3205566464, 207.3728637498], [90.8367920128, 1.4067382854, 156.3082885632, 209.57250978439998], [265.0863647232, 127.06140136619999, 351.3205566464, 190.07867430160002], [116.9132080128, 149.4661865424, 148.4624023552, 194.59344479819998], [98.143493632, 156.2552489974, 143.6701049856, 207.3728637498], [183.7380981248, 77.6333007534, 276.5118408192, 113.03967284119999], [143.8498534912, 70.01416018799999, 223.626281728, 106.7651367286]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00049359_crop.jpg", "text": "I'd appreciate it if you could describe the portion of that lies within the rectangle . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a person, a handbag, two skating and skiing shoes, a car, and a suv.", "boxes_value": [[64.143493632, 32.63330075339999, 317.3205566464, 162.3728637498], [56.836792012800004, 0, 122.3082885632, 164.57250978439998], [231.0863647232, 82.06140136619999, 317.3205566464, 145.07867430160002], [82.9132080128, 104.46618654240001, 114.4624023552, 149.59344479819998], [64.143493632, 111.2552489974, 109.6701049856, 162.3728637498], [149.7380981248, 32.63330075339999, 242.51184081920002, 68.03967284119999], [109.84985349120001, 25.01416018799999, 189.626281728, 61.7651367286]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00049360.jpg", "text": "Tell me what you see in the area within the context of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[271.57556148599997, 275.4996337664, 401.2740478184, 406.4070434816]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049360_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[32.57556148599997, 33.499633766399995, 162.2740478184, 164.4070434816]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049360.jpg", "text": "Tell me what you see in the area within the context of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a bench, a person, two leather shoes, and a bicycle.", "boxes_value": [[271.57556148599997, 275.4996337664, 401.2740478184, 406.4070434816], [334.301513696, 347.043151872, 450.4694824192, 410.6589965824], [335.9613036828, 264.160400384, 412.9339599352, 415.0267944448], [377.0601806628, 384.4631958016, 401.2740478184, 406.4070434816], [354.73791501200003, 396.9484863488, 376.3034667652, 413.59552], [271.57556148599997, 275.4996337664, 400.58764651120003, 400.507873536]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049360_crop.jpg", "text": "Tell me what you see in the area within the context of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a bench, a person, two leather shoes, and a bicycle.", "boxes_value": [[32.57556148599997, 33.499633766399995, 162.2740478184, 164.4070434816], [95.30151369599997, 105.04315187200001, 194, 168.6589965824], [96.96130368280001, 22.160400384000013, 173.9339599352, 173.02679444479998], [138.0601806628, 142.46319580160002, 162.2740478184, 164.4070434816], [115.73791501200003, 154.94848634879997, 137.30346676520003, 171.59552000000002], [32.57556148599997, 33.499633766399995, 161.58764651120003, 158.50787353599998]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049362.jpg", "text": "Fill me in on the details of the rectangular box within the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[162.7620239054, 230.16711424, 320.7227172913, 440.230957056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049362_crop.jpg", "text": "Fill me in on the details of the rectangular box within the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[39.762023905400014, 53.16711423999999, 197.7227172913, 263.230957056]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049362.jpg", "text": "Fill me in on the details of the rectangular box within the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a flower, two people, and three cows.", "boxes_value": [[162.7620239054, 230.16711424, 320.7227172913, 440.230957056], [250.1439209168, 288.4674072064, 294.951660139, 310.5946655232], [272.5400390926, 213.9386596864, 310.7515869045, 270.0724487168], [145.3935547193, 217.9965210112, 181.9143676541, 298.1393432576], [162.7620239054, 259.7771606528, 315.5388793835, 440.230957056], [226.3388671754, 260.7299194368, 332.7926025206, 384.0], [255.95056152680002, 230.16711424, 320.7227172913, 276.5285033984]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00049362_crop.jpg", "text": "Fill me in on the details of the rectangular box within the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a flower, two people, and three cows.", "boxes_value": [[39.762023905400014, 53.16711423999999, 197.7227172913, 263.230957056], [127.1439209168, 111.46740720640003, 171.951660139, 133.59466552319998], [149.5400390926, 36.9386596864, 187.75158690450002, 93.07244871680001], [22.3935547193, 40.9965210112, 58.914367654100005, 121.13934325759999], [39.762023905400014, 82.77716065279998, 192.5388793835, 263.230957056], [103.3388671754, 83.7299194368, 209.79260252059998, 207.0], [132.95056152680002, 53.16711423999999, 197.7227172913, 99.5285033984]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5, 6]]}, {"image_path": "objects365_v1_00049364.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Please mention the objects and their locations.", "boxes_value": [[304.2606031104, 192.6292621312, 767.9290885632, 512.0286490112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049364_crop.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Please mention the objects and their locations.", "boxes_value": [[116.26060311039998, 80.6292621312, 579.9290885632, 400]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049364.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Please mention the objects and their locations. For your reference, objects involved in this region include two people, three sneakers, and a hat.", "boxes_value": [[304.2606031104, 192.6292621312, 767.9290885632, 512.0286490112], [286.4331054336, 172.7120361472, 407.07360837120007, 413.4183960064], [697.349243136, 191.24700928, 767.517578112, 511.2438354432], [304.2606031104, 402.9191723008, 342.6216289536, 412.5094287872], [388.57104437759995, 348.7056681984, 405.8292755712, 386.2422683648], [738.500704128, 192.6292621312, 767.9290885632, 217.3015440384], [697.8212441088, 495.407054336, 747.2305204992, 512.0286490112]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 6], [5]]}, {"image_path": "objects365_v1_00049364_crop.jpg", "text": "Please provide a detailed account of the area covered by the bounding box in . Please mention the objects and their locations. For your reference, objects involved in this region include two people, three sneakers, and a hat.", "boxes_value": [[116.26060311039998, 80.6292621312, 579.9290885632, 400], [98.43310543360002, 60.712036147199996, 219.07360837120007, 301.4183960064], [509.34924313600004, 79.24700927999999, 579.517578112, 399.2438354432], [116.26060311039998, 290.9191723008, 154.6216289536, 300.5094287872], [200.57104437759995, 236.70566819840002, 217.82927557120001, 274.2422683648], [550.500704128, 80.6292621312, 579.9290885632, 105.30154403840001], [509.8212441088, 383.407054336, 559.2305204992, 400]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 6], [5]]}, {"image_path": "objects365_v1_00049365.jpg", "text": "Can you elaborate on the content of the bounding box in ? Include the coordinates for each object you identify.", "boxes_value": [[496.62731930549995, 230.7197876224, 716.3807372814, 442.4335327232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049365_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Include the coordinates for each object you identify.", "boxes_value": [[55.62731930549995, 53.71978762239999, 275.3807372814, 265.4335327232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049365.jpg", "text": "Can you elaborate on the content of the bounding box in ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a stool, a picture, a lamp, a piano, and a book.", "boxes_value": [[496.62731930549995, 230.7197876224, 716.3807372814, 442.4335327232], [603.7346191275, 370.7304077312, 705.4022217090001, 459.279663104], [626.0621337801, 230.7197876224, 661.8295898721, 261.014587392], [632.3165282934, 202.7703857664, 686.8472900304, 262.382751488], [496.62731930549995, 261.6607666176, 716.3807372814, 442.4335327232], [594.261718734, 269.9171142656, 661.2460937664, 318.9948120064]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049365_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a stool, a picture, a lamp, a piano, and a book.", "boxes_value": [[55.62731930549995, 53.71978762239999, 275.3807372814, 265.4335327232], [162.73461912749997, 193.7304077312, 264.40222170900006, 282.279663104], [185.0621337801, 53.71978762239999, 220.82958987209997, 84.01458739200001], [191.31652829339998, 25.77038576640001, 245.84729003040002, 85.382751488], [55.62731930549995, 84.66076661760002, 275.3807372814, 265.4335327232], [153.26171873400006, 92.91711426559999, 220.2460937664, 141.99481200640002]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049366.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Please point out the objects and their coordinates.", "boxes_value": [[453.176513664, 350.4835815424, 696.3699951360001, 470.050903296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049366_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Please point out the objects and their coordinates.", "boxes_value": [[61.176513664000026, 30.483581542399975, 304.36999513600006, 150.050903296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049366.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, and three street lights.", "boxes_value": [[453.176513664, 350.4835815424, 696.3699951360001, 470.050903296], [489.38867189760003, 414.4225463808, 505.2840576, 457.1833496064], [468.3061523712, 350.4835815424, 491.0495605248, 398.1968384], [453.176513664, 359.098571776, 479.59374996480005, 470.050903296], [614.7625732608, 366.39501952, 637.358276352, 448.441223168], [675.5294189568, 367.9306640384, 696.3699951360001, 440.10498048]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00049366_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Please point out the objects and their coordinates. For your reference, objects involved in this region include two people, and three street lights.", "boxes_value": [[61.176513664000026, 30.483581542399975, 304.36999513600006, 150.050903296], [97.38867189760003, 94.42254638079999, 113.28405759999998, 137.1833496064], [76.30615237120003, 30.483581542399975, 99.04956052479997, 78.19683839999999], [61.176513664000026, 39.09857177600003, 87.59374996480005, 150.050903296], [222.76257326079997, 46.395019520000005, 245.35827635199996, 128.44122316800002], [283.5294189568, 47.93066403839998, 304.36999513600006, 120.10498048]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00049367.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Remember to mention the objects and their corresponding locations.", "boxes_value": [[99.617248512, 304.1891479199, 365.0847734272, 681.8847656356]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049367_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Remember to mention the objects and their corresponding locations.", "boxes_value": [[66.617248512, 95.1891479199, 332.0847734272, 472.88476563560005]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049367.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, two gloves, two sneakers, and a skiboard.", "boxes_value": [[99.617248512, 304.1891479199, 365.0847734272, 681.8847656356], [96.7592163328, 51.806823726699996, 366.7390136832, 645.0518798924], [106.4028936192, 317.79892754459996, 149.414160896, 373.5180692521], [307.9536659456, 304.1891479199, 365.0847734272, 357.0435870303], [209.2729246208, 585.8859819633, 248.1961857024, 637.6129473493], [123.744179968, 587.4224265257, 158.0581074432, 640.1736881728], [99.617248512, 530.5983886833, 285.407409664, 681.8847656356]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00049367_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a person, two gloves, two sneakers, and a skiboard.", "boxes_value": [[66.617248512, 95.1891479199, 332.0847734272, 472.88476563560005], [63.759216332799994, 0, 333.7390136832, 436.05187989240005], [73.4028936192, 108.79892754459996, 116.414160896, 164.51806925210002], [274.9536659456, 95.1891479199, 332.0847734272, 148.0435870303], [176.2729246208, 376.88598196329997, 215.1961857024, 428.61294734930004], [90.744179968, 378.42242652569996, 125.05810744319999, 431.1736881728], [66.617248512, 321.59838868329996, 252.407409664, 472.88476563560005]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00049370.jpg", "text": "What can you tell me about the selected region in the photo ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[0, 382.677124045, 271.7986450432, 682.0878906552]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049370_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[0, 75.67712404500003, 271.7986450432, 375.0878906552]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049370.jpg", "text": "What can you tell me about the selected region in the photo ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four chairs, a towel, a cup, a wine glass, and a desk.", "boxes_value": [[0, 382.677124045, 271.7986450432, 682.0878906552], [111.6535034368, 344.8314208802, 357.651062016, 482.6136474362], [0, 378.5377196971, 19.4044189696, 474.9262695451], [0, 382.677124045, 53.7021484544, 476.7001953248], [61.3895874048, 364.3455810786, 87.408569344, 432.94104004860003], [2.2497558528, 495.469116192, 271.7986450432, 630.9207763395], [23.1689453056, 443.4108887028, 183.5447387648, 682.0878906552], [127.3105468928, 378.6835937443, 207.7667846656, 498.8587646663], [1.389770496, 477.2916259929, 510.3420410368, 681.7111816637]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6], [7], [8]]}, {"image_path": "objects365_v1_00049370_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four chairs, a towel, a cup, a wine glass, and a desk.", "boxes_value": [[0, 75.67712404500003, 271.7986450432, 375.0878906552], [111.6535034368, 37.83142088020003, 339, 175.61364743619998], [0, 71.53771969709999, 19.4044189696, 167.9262695451], [0, 75.67712404500003, 53.7021484544, 169.7001953248], [61.3895874048, 57.34558107859999, 87.408569344, 125.94104004860003], [2.2497558528, 188.469116192, 271.7986450432, 323.9207763395], [23.1689453056, 136.41088870279998, 183.5447387648, 375.0878906552], [127.3105468928, 71.68359374430003, 207.7667846656, 191.85876466629998], [1.389770496, 170.29162599289998, 339, 374.7111816637]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5], [6], [7], [8]]}, {"image_path": "objects365_v1_00049371.jpg", "text": "What can I find in the bbox of the provided image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[135.2725219513, 359.8041381888, 407.11633304139997, 462.1308593664]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049371_crop.jpg", "text": "What can I find in the bbox of the provided image ? Remember to mention the objects and their corresponding locations.", "boxes_value": [[68.2725219513, 25.804138188799982, 340.11633304139997, 128.1308593664]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049371.jpg", "text": "What can I find in the bbox of the provided image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a desk, two people, and two sneakers.", "boxes_value": [[135.2725219513, 359.8041381888, 407.11633304139997, 462.1308593664], [0.0729370236, 287.4713744896, 535.8394775635, 512.9143066624], [286.67462157430003, 113.8339843584, 682.966064462, 511.9593505792], [65.6756591727, 54.2034912256, 329.2675781189, 511.9547119104], [135.2725219513, 359.8041381888, 223.4214477697, 462.1308593664], [283.8311767375, 391.7897949184, 407.11633304139997, 431.8574829056]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049371_crop.jpg", "text": "What can I find in the bbox of the provided image ? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a desk, two people, and two sneakers.", "boxes_value": [[68.2725219513, 25.804138188799982, 340.11633304139997, 128.1308593664], [0, 0, 408, 153], [219.67462157430003, 0, 408, 153], [0, 0, 262.2675781189, 153], [68.2725219513, 25.804138188799982, 156.4214477697, 128.1308593664], [216.83117673750002, 57.78979491839999, 340.11633304139997, 97.85748290560002]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049373.jpg", "text": "Please help me understand the content present within the rectangle in . Specify the location of each mentioned object.", "boxes_value": [[268.0377197578, 189.0566406144, 435.6522216622, 511.8074951168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049373_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Specify the location of each mentioned object.", "boxes_value": [[42.037719757800005, 81.05664061440001, 209.65222166220002, 403.8074951168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049373.jpg", "text": "Please help me understand the content present within the rectangle in . Specify the location of each mentioned object. For your reference, objects involved in this region include a cabinet, a person, a watch, a bottle, and a cup.", "boxes_value": [[268.0377197578, 189.0566406144, 435.6522216622, 511.8074951168], [0.45758056269999997, 204.2308959744, 445.9140625298, 512.119995136], [289.4274902295, 8.611389184, 682.71240236, 511.3264160256], [401.0568847624, 377.0462035968, 435.6522216622, 435.0299072512], [323.67913818259996, 189.0566406144, 355.5673827947, 240.9889526272], [268.0377197578, 476.3573608448, 344.424316408, 511.8074951168]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049373_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Specify the location of each mentioned object. For your reference, objects involved in this region include a cabinet, a person, a watch, a bottle, and a cup.", "boxes_value": [[42.037719757800005, 81.05664061440001, 209.65222166220002, 403.8074951168], [0, 96.2308959744, 219.9140625298, 404], [63.427490229499995, 0, 251, 403.3264160256], [175.0568847624, 269.0462035968, 209.65222166220002, 327.0299072512], [97.67913818259996, 81.05664061440001, 129.5673827947, 132.9889526272], [42.037719757800005, 368.3573608448, 118.42431640799998, 403.8074951168]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049376.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Include the coordinates for each mentioned object.", "boxes_value": [[70.80303955000001, 197.2714844009, 136.58709715, 307.4077758773]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049376_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Include the coordinates for each mentioned object.", "boxes_value": [[16.80303955000001, 28.271484400899993, 82.58709715, 138.40777587730003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049376.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a helmet, three bicycles, and a bakset.", "boxes_value": [[70.80303955000001, 197.2714844009, 136.58709715, 307.4077758773], [107.0845337, 285.6619262567, 130.76190185, 303.07171628789996], [70.80303955000001, 219.2987671098, 112.4761963, 307.4077758773], [102.35559079999999, 197.2714844009, 136.58709715, 274.9622192177], [108.90423585, 200.8434448034, 157.42370605, 299.6684570208], [91.0923462, 244.57464601430001, 110.4918823, 263.0788574372]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049376_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a helmet, three bicycles, and a bakset.", "boxes_value": [[16.80303955000001, 28.271484400899993, 82.58709715, 138.40777587730003], [53.084533699999994, 116.66192625669999, 76.76190184999999, 134.07171628789996], [16.80303955000001, 50.298767109799996, 58.4761963, 138.40777587730003], [48.35559079999999, 28.271484400899993, 82.58709715, 105.96221921770001], [54.904235850000006, 31.84344480339999, 99, 130.66845702080002], [37.092346199999994, 75.57464601430001, 56.4918823, 94.07885743719999]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049378.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Please point out the objects and their coordinates.", "boxes_value": [[665.882934563, 292.285949696, 755.1632080300001, 350.5987548672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049378_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Please point out the objects and their coordinates.", "boxes_value": [[22.88293456300005, 15.285949695999989, 112.16320803000008, 73.5987548672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049378.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Please point out the objects and their coordinates. For your reference, objects involved in this region include four people, and a car.", "boxes_value": [[665.882934563, 292.285949696, 755.1632080300001, 350.5987548672], [737.3308105240001, 294.2808838144, 755.1632080300001, 320.3137206784], [696.443481426, 293.2770996224, 728.821166997, 337.7137451008], [665.882934563, 299.3892211712, 696.939086921, 350.5987548672], [669.1868896789999, 292.285949696, 699.912597692, 318.8818359296], [446.19873043900003, 309.1302490112, 769.3757324329999, 511.1159057408]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049378_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Please point out the objects and their coordinates. For your reference, objects involved in this region include four people, and a car.", "boxes_value": [[22.88293456300005, 15.285949695999989, 112.16320803000008, 73.5987548672], [94.33081052400007, 17.2808838144, 112.16320803000008, 43.31372067839999], [53.44348142599995, 16.277099622399987, 85.82116699699998, 60.7137451008], [22.88293456300005, 22.389221171200006, 53.93908692100001, 73.5987548672], [26.186889678999933, 15.285949695999989, 56.91259769199996, 41.881835929600015], [0, 32.13024901120002, 126.37573243299994, 88]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049380.jpg", "text": "What does the area within the given visual contain? Provide the coordinates for each element you describe.", "boxes_value": [[416.09167483579995, 132.6712646656, 598.6190185296, 356.7153320448]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049380_crop.jpg", "text": "What does the area within the given visual contain? Provide the coordinates for each element you describe.", "boxes_value": [[46.09167483579995, 56.671264665600006, 228.6190185296, 280.7153320448]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049380.jpg", "text": "What does the area within the given visual contain? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a bed, two pillows, a flower, a vase, and a person.", "boxes_value": [[416.09167483579995, 132.6712646656, 598.6190185296, 356.7153320448], [40.0086669739, 128.1027831808, 680.2141113, 510.2536621055999], [495.6291504001, 184.8930053632, 598.6190185296, 356.7153320448], [517.3657226593, 277.014587392, 631.7413330176, 358.7854614016], [500.19445797559996, 132.6712646656, 577.9642333735, 195.111389184], [528.6103515558, 191.3724975616, 555.5306396707, 223.9011841024], [416.09167483579995, 165.6028442624, 437.49035647679995, 209.351257344]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00049380_crop.jpg", "text": "What does the area within the given visual contain? Provide the coordinates for each element you describe. For your reference, objects involved in this region include a bed, two pillows, a flower, a vase, and a person.", "boxes_value": [[46.09167483579995, 56.671264665600006, 228.6190185296, 280.7153320448], [0, 52.1027831808, 274, 336], [125.6291504001, 108.89300536319999, 228.6190185296, 280.7153320448], [147.3657226593, 201.014587392, 261.7413330176, 282.7854614016], [130.19445797559996, 56.671264665600006, 207.96423337349995, 119.11138918399999], [158.61035155579998, 115.3724975616, 185.5306396707, 147.9011841024], [46.09167483579995, 89.6028442624, 67.49035647679995, 133.351257344]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00049383.jpg", "text": "In the submitted image , please give a synopsis of the area . Include the coordinates for each mentioned object.", "boxes_value": [[57.607177705, 53.8555908096, 620.697998074, 511.7053833216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049383_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Include the coordinates for each mentioned object.", "boxes_value": [[57.607177705, 53.8555908096, 620.697998074, 511.7053833216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049383.jpg", "text": "In the submitted image , please give a synopsis of the area . Include the coordinates for each mentioned object. For your reference, objects involved in this region include three people, a necklace, a glasses, a tea pot, and two microphones.", "boxes_value": [[57.607177705, 53.8555908096, 620.697998074, 511.7053833216], [506.749999987, 148.56677248, 770.3330078289999, 512.2220459008], [260.143554671, 53.8555908096, 558.5731201049999, 511.7053833216], [1.027954081, 127.6333007872, 258.356445347, 511.66229248], [51.656738286999996, 300.2797241344, 156.032287565, 379.0537719808], [15.026184096, 192.1498413056, 125.522583032, 233.4990844928], [57.607177705, 374.716247552, 188.561035124, 511.5777587712], [173.973815939, 298.926513664, 277.032470715, 411.9585571328], [524.15954591, 261.6202392576, 620.697998074, 413.3345947136]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6], [7, 8]]}, {"image_path": "objects365_v1_00049383_crop.jpg", "text": "In the submitted image , please give a synopsis of the area . Include the coordinates for each mentioned object. For your reference, objects involved in this region include three people, a necklace, a glasses, a tea pot, and two microphones.", "boxes_value": [[57.607177705, 53.8555908096, 620.697998074, 511.7053833216], [506.749999987, 148.56677248, 761, 512], [260.143554671, 53.8555908096, 558.5731201049999, 511.7053833216], [1.027954081, 127.6333007872, 258.356445347, 511.66229248], [51.656738286999996, 300.2797241344, 156.032287565, 379.0537719808], [15.026184096, 192.1498413056, 125.522583032, 233.4990844928], [57.607177705, 374.716247552, 188.561035124, 511.5777587712], [173.973815939, 298.926513664, 277.032470715, 411.9585571328], [524.15954591, 261.6202392576, 620.697998074, 413.3345947136]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6], [7, 8]]}, {"image_path": "objects365_v1_00049384.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each object you identify.", "boxes_value": [[0.1856079137, 160.811889664, 480.3785400446, 511.6377563648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049384_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each object you identify.", "boxes_value": [[0.1856079137, 87.811889664, 480.3785400446, 438.6377563648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049384.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a picture, a moniter, a monkey, a ring, a flute, a cabinet, and two cars.", "boxes_value": [[0.1856079137, 160.811889664, 480.3785400446, 511.6377563648], [0.1856079137, 160.811889664, 480.3785400446, 511.6377563648], [189.2850342106, 158.4893188608, 303.0726318377, 201.8591919104], [256.3298339539, 392.626281728, 297.214782688, 422.4612426752], [327.7203368974, 256.9691162112, 392.91503908109996, 328.8163451904], [373.4019775565, 203.3125000192, 457.0302734248, 227.4215698432], [83.2160644566, 109.3488159232, 119.5187377779, 223.02392576], [70.98583985479999, 158.0039062528, 86.7012939316, 196.3495483392], [0, 178.6158447104, 47.06207272229999, 250.888610816]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6], [7, 8]]}, {"image_path": "objects365_v1_00049384_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Include the coordinates for each object you identify. For your reference, objects involved in this region include a picture, a moniter, a monkey, a ring, a flute, a cabinet, and two cars.", "boxes_value": [[0.1856079137, 87.811889664, 480.3785400446, 438.6377563648], [0.1856079137, 87.811889664, 480.3785400446, 438.6377563648], [189.2850342106, 85.48931886080001, 303.0726318377, 128.8591919104], [256.3298339539, 319.626281728, 297.214782688, 349.4612426752], [327.7203368974, 183.9691162112, 392.91503908109996, 255.8163451904], [373.4019775565, 130.3125000192, 457.0302734248, 154.4215698432], [83.2160644566, 36.34881592319999, 119.5187377779, 150.02392576], [70.98583985479999, 85.0039062528, 86.7012939316, 123.3495483392], [0, 105.61584471040001, 47.06207272229999, 177.888610816]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6], [7, 8]]}, {"image_path": "objects365_v1_00049385.jpg", "text": "Can you break down the region in the image for me? Include the coordinates for each mentioned object.", "boxes_value": [[653.0113525072, 313.7877807616, 683.20141603, 478.7719116288]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049385_crop.jpg", "text": "Can you break down the region in the image for me? Include the coordinates for each mentioned object.", "boxes_value": [[8.011352507199945, 41.787780761600004, 38.20141603000002, 206.7719116288]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049385.jpg", "text": "Can you break down the region in the image for me? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, a street lights, a traffic light, and a traffic sign.", "boxes_value": [[653.0113525072, 313.7877807616, 683.20141603, 478.7719116288], [662.563232388, 417.2659301888, 691.1706543, 487.1497802752], [665.83264158, 425.8481445376, 683.20141603, 478.7719116288], [610.3946533367999, 270.6399536128, 700.2382812776, 461.4508056576], [659.9790039216, 384.1057739264, 673.8090820696, 406.108337408], [653.0113525072, 313.7877807616, 675.3664550395999, 337.413085952]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049385_crop.jpg", "text": "Can you break down the region in the image for me? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, a street lights, a traffic light, and a traffic sign.", "boxes_value": [[8.011352507199945, 41.787780761600004, 38.20141603000002, 206.7719116288], [17.56323238799996, 145.26593018879998, 45, 215.1497802752], [20.832641579999972, 153.84814453759998, 38.20141603000002, 206.7719116288], [0, 0, 45, 189.45080565759997], [14.979003921599997, 112.10577392639999, 28.80908206959998, 134.108337408], [8.011352507199945, 41.787780761600004, 30.36645503959994, 65.41308595200002]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049386.jpg", "text": "Could you please provide a description of the rectangular area in ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[341.99011233079995, 134.5124511744, 457.4227294594, 276.5143432704]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049386_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[28.990112330799946, 35.51245117440001, 144.4227294594, 177.5143432704]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049386.jpg", "text": "Could you please provide a description of the rectangular area in ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four people, and a boat.", "boxes_value": [[341.99011233079995, 134.5124511744, 457.4227294594, 276.5143432704], [356.5302734627, 155.1110229504, 395.9100341732, 291.7283935744], [341.99011233079995, 134.5124511744, 370.7675781199, 270.52392576], [379.1412353672, 202.6263427584, 401.1740722632, 276.5143432704], [436.7604980324, 170.1596069376, 457.4227294594, 195.9873657344], [231.50347897470002, 176.7256469504, 504.58020020669994, 260.5595703296]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049386_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include four people, and a boat.", "boxes_value": [[28.990112330799946, 35.51245117440001, 144.4227294594, 177.5143432704], [43.530273462699995, 56.11102295040001, 82.91003417320002, 192.7283935744], [28.990112330799946, 35.51245117440001, 57.76757811990001, 171.52392576], [66.14123536720001, 103.6263427584, 88.1740722632, 177.5143432704], [123.7604980324, 71.15960693759999, 144.4227294594, 96.98736573439999], [0, 77.7256469504, 173, 161.55957032959998]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049387.jpg", "text": "Could you please share some information on the region in this photograph ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[89.4955444368, 447.9545898496, 532.2254638816, 498.2288818176]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049387_crop.jpg", "text": "Could you please share some information on the region in this photograph ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[89.4955444368, 12.954589849599984, 532.2254638816, 63.228881817599984]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049387.jpg", "text": "Could you please share some information on the region in this photograph ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a potted plant, two vases, and three flowers.", "boxes_value": [[89.4955444368, 447.9545898496, 532.2254638816, 498.2288818176], [226.7360839744, 459.5827026432, 272.100097636, 491.165222144], [89.4955444368, 481.9776001024, 109.59350581839999, 497.4817504768], [82.604797332, 457.8599853568, 119.3554076824, 491.739440896], [510.40478515399997, 447.9545898496, 532.2254638816, 476.6659546112], [509.25634765419994, 438.1926879744, 532.2254638816, 455.4195556864], [153.254333465, 457.580627456, 197.66003421119999, 498.2288818176]], "boxes_seq": [[0], [0], [1], [2, 4], [3, 5, 6]]}, {"image_path": "objects365_v1_00049387_crop.jpg", "text": "Could you please share some information on the region in this photograph ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a potted plant, two vases, and three flowers.", "boxes_value": [[89.4955444368, 12.954589849599984, 532.2254638816, 63.228881817599984], [226.7360839744, 24.58270264319998, 272.100097636, 56.16522214399998], [89.4955444368, 46.977600102400004, 109.59350581839999, 62.481750476800016], [82.604797332, 22.859985356799996, 119.3554076824, 56.73944089600002], [510.40478515399997, 12.954589849599984, 532.2254638816, 41.66595461119999], [509.25634765419994, 3.1926879743999734, 532.2254638816, 20.419555686399974], [153.254333465, 22.580627456000002, 197.66003421119999, 63.228881817599984]], "boxes_seq": [[0], [0], [1], [2, 4], [3, 5, 6]]}, {"image_path": "objects365_v1_00049388.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[8.884704598399999, 187.7112427008, 674.1323241873, 400.388732928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049388_crop.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[8.884704598399999, 53.7112427008, 674.1323241873, 266.388732928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049388.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two hats, and three sneakers.", "boxes_value": [[8.884704598399999, 187.7112427008, 674.1323241873, 400.388732928], [51.691467327000005, 223.4570312704, 77.2915039261, 249.0570678784], [8.884704598399999, 374.133850112, 30.7146606195, 395.3737793024], [47.529602089, 387.4088134656, 79.3894653215, 400.388732928], [425.2995605274, 187.7112427008, 446.6184081975, 204.938598656], [649.1234130775999, 367.9661865472, 674.1323241873, 381.1287841792]], "boxes_seq": [[0], [0], [1, 4], [2, 3, 5]]}, {"image_path": "objects365_v1_00049388_crop.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two hats, and three sneakers.", "boxes_value": [[8.884704598399999, 53.7112427008, 674.1323241873, 266.388732928], [51.691467327000005, 89.4570312704, 77.2915039261, 115.0570678784], [8.884704598399999, 240.133850112, 30.7146606195, 261.3737793024], [47.529602089, 253.40881346560002, 79.3894653215, 266.388732928], [425.2995605274, 53.7112427008, 446.6184081975, 70.93859865600001], [649.1234130775999, 233.96618654719998, 674.1323241873, 247.1287841792]], "boxes_seq": [[0], [0], [1, 4], [2, 3, 5]]}, {"image_path": "objects365_v1_00049389.jpg", "text": "What's inside the area of the provided graphic ? Include the coordinates for each object you identify.", "boxes_value": [[15.6307372839, 70.7256469504, 266.3508910943, 133.4755859456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049389_crop.jpg", "text": "What's inside the area of the provided graphic ? Include the coordinates for each object you identify.", "boxes_value": [[15.6307372839, 15.725646950400005, 266.3508910943, 78.47558594559999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049389.jpg", "text": "What's inside the area of the provided graphic ? Include the coordinates for each object you identify. For your reference, objects involved in this region include five hats.", "boxes_value": [[15.6307372839, 70.7256469504, 266.3508910943, 133.4755859456], [15.6307372839, 117.8408813568, 42.040588354, 130.3063964672], [54.2947997792, 70.7256469504, 106.2695312697, 99.670898432], [140.0742187637, 109.8122558464, 175.3578491184, 133.4755859456], [170.075866706, 107.910766592, 196.4857788363, 122.0664672768], [218.6013183582, 79.5039062528, 266.3508910943, 109.0747070464]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049389_crop.jpg", "text": "What's inside the area of the provided graphic ? Include the coordinates for each object you identify. For your reference, objects involved in this region include five hats.", "boxes_value": [[15.6307372839, 15.725646950400005, 266.3508910943, 78.47558594559999], [15.6307372839, 62.8408813568, 42.040588354, 75.30639646719999], [54.2947997792, 15.725646950400005, 106.2695312697, 44.670898432], [140.0742187637, 54.81225584640001, 175.3578491184, 78.47558594559999], [170.075866706, 52.910766592, 196.4857788363, 67.0664672768], [218.6013183582, 24.503906252799993, 266.3508910943, 54.07470704639999]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049390.jpg", "text": "Fill me in on the details of the rectangular box within the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[153.9779663011, 130.2220459008, 517.4852294915, 368.2592773632]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049390_crop.jpg", "text": "Fill me in on the details of the rectangular box within the image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[90.9779663011, 60.2220459008, 454.48522949150004, 298.2592773632]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049390.jpg", "text": "Fill me in on the details of the rectangular box within the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two cabinets, a person, two stools, and a chair.", "boxes_value": [[153.9779663011, 130.2220459008, 517.4852294915, 368.2592773632], [153.9779663011, 130.2220459008, 219.4904175013, 197.47375488], [443.8562011512, 133.1207885824, 517.4852294915, 204.4308471808], [261.4304198989, 138.6958007808, 330.2150268399, 270.5803222528], [437.9458007521, 217.025512704, 516.2626952807, 369.3081665024], [322.9050903212, 216.5982055424, 404.3297118922, 368.2592773632], [227.2035522786, 242.0647583232, 316.93682862090003, 446.8757324288]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00049390_crop.jpg", "text": "Fill me in on the details of the rectangular box within the image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include two cabinets, a person, two stools, and a chair.", "boxes_value": [[90.9779663011, 60.2220459008, 454.48522949150004, 298.2592773632], [90.9779663011, 60.2220459008, 156.4904175013, 127.47375488], [380.8562011512, 63.120788582399996, 454.48522949150004, 134.4308471808], [198.43041989890003, 68.6958007808, 267.2150268399, 200.5803222528], [374.9458007521, 147.025512704, 453.2626952807, 299.3081665024], [259.9050903212, 146.5982055424, 341.3297118922, 298.2592773632], [164.2035522786, 172.0647583232, 253.93682862090003, 357]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5], [6]]}, {"image_path": "objects365_v1_00049391.jpg", "text": "Can you share some insights about the rectangular region in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[90.9836425728, 55.6468506112, 425.455688448, 250.6063232512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049391_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[83.9836425728, 49.6468506112, 418.455688448, 244.6063232512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049391.jpg", "text": "Can you share some insights about the rectangular region in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three people, a bracelet, and a helmet.", "boxes_value": [[90.9836425728, 55.6468506112, 425.455688448, 250.6063232512], [220.2049560576, 74.257507328, 294.99890135040005, 234.7307739136], [187.54852293119998, 65.8300170752, 247.9454345472, 230.8681640448], [90.9836425728, 55.6468506112, 158.40344240640002, 231.219299328], [327.911987328, 230.0037231616, 349.6182861312, 250.6063232512], [367.4016113664, 66.4417724416, 425.455688448, 125.5143432704]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049391_crop.jpg", "text": "Can you share some insights about the rectangular region in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three people, a bracelet, and a helmet.", "boxes_value": [[83.9836425728, 49.6468506112, 418.455688448, 244.6063232512], [213.2049560576, 68.257507328, 287.99890135040005, 228.7307739136], [180.54852293119998, 59.830017075200004, 240.9454345472, 224.8681640448], [83.9836425728, 49.6468506112, 151.40344240640002, 225.219299328], [320.911987328, 224.0037231616, 342.6182861312, 244.6063232512], [360.4016113664, 60.441772441599994, 418.455688448, 119.5143432704]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049392.jpg", "text": "Please help me understand the content present within the rectangle in . Please mention the objects and their locations.", "boxes_value": [[533.35534668, 340.716674816, 649.661865255, 376.395751936]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049392_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Please mention the objects and their locations.", "boxes_value": [[29.355346680000025, 9.716674816000022, 145.66186525499995, 45.39575193600001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049392.jpg", "text": "Please help me understand the content present within the rectangle in . Please mention the objects and their locations. For your reference, objects involved in this region include two people, a car, a suv, and two trucks.", "boxes_value": [[533.35534668, 340.716674816, 649.661865255, 376.395751936], [596.203124985, 356.7346801664, 612.8244629, 396.1198119936], [622.58044433, 354.2053222912, 644.4409179649999, 399.191162112], [533.35534668, 355.6860961792, 578.33618167, 376.395751936], [559.727050805, 354.5751342592, 599.558105445, 373.3761596928], [600.52722171, 342.3641357312, 628.341064455, 370.178039552], [627.371948255, 340.716674816, 649.661865255, 371.1471557632]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00049392_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Please mention the objects and their locations. For your reference, objects involved in this region include two people, a car, a suv, and two trucks.", "boxes_value": [[29.355346680000025, 9.716674816000022, 145.66186525499995, 45.39575193600001], [92.20312498500004, 25.734680166399983, 108.82446289999996, 54], [118.58044432999998, 23.20532229119999, 140.44091796499993, 54], [29.355346680000025, 24.68609617919998, 74.33618166999997, 45.39575193600001], [55.727050804999976, 23.575134259200013, 95.55810544500002, 42.3761596928], [96.52722171000005, 11.364135731200008, 124.34106445500004, 39.17803955199997], [123.371948255, 9.716674816000022, 145.66186525499995, 40.147155763199976]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00049393.jpg", "text": "Tell me what you see within the designated area in the picture . Include the coordinates for each mentioned object.", "boxes_value": [[124.604042112, 147.435852032, 384.485839872, 264.9158935552]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049393_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Include the coordinates for each mentioned object.", "boxes_value": [[65.604042112, 29.435852032000014, 325.485839872, 146.9158935552]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049393.jpg", "text": "Tell me what you see within the designated area in the picture . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a cabinet, three people, and a coffee machine.", "boxes_value": [[124.604042112, 147.435852032, 384.485839872, 264.9158935552], [228.8652953856, 182.6025390592, 370.3585204992, 351.990234368], [199.66064455679998, 188.5913085952, 221.7349243392, 209.9172973568], [238.1970825216, 147.435852032, 274.488647424, 190.8361206272], [355.67712399360005, 216.2776489472, 384.485839872, 264.9158935552], [124.604042112, 180.1068793856, 152.69262696959998, 246.112923392]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049393_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a cabinet, three people, and a coffee machine.", "boxes_value": [[65.604042112, 29.435852032000014, 325.485839872, 146.9158935552], [169.8652953856, 64.60253905920001, 311.3585204992, 176], [140.66064455679998, 70.59130859519999, 162.7349243392, 91.91729735679999], [179.1970825216, 29.435852032000014, 215.48864742400002, 72.83612062719999], [296.67712399360005, 98.27764894719999, 325.485839872, 146.9158935552], [65.604042112, 62.106879385599996, 93.69262696959998, 128.112923392]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049396.jpg", "text": "Please describe the content within the area displayed in the image . Include the coordinates for each mentioned object.", "boxes_value": [[255.14428713, 300.1767578112, 682.5463867111, 511.8742065664]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049396_crop.jpg", "text": "Please describe the content within the area displayed in the image . Include the coordinates for each mentioned object.", "boxes_value": [[107.14428713000001, 53.17675781119999, 534.5463867111, 264.8742065664]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049396.jpg", "text": "Please describe the content within the area displayed in the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two beds, a nightstand, a chair, and a pillow.", "boxes_value": [[255.14428713, 300.1767578112, 682.5463867111, 511.8742065664], [334.9934082179, 339.3494262784, 667.2408447145999, 510.04534912], [463.015380842, 413.1144409088, 681.8719482527, 511.8742065664], [629.567382812, 378.0164184576, 682.5463867111, 428.6920165888], [255.14428713, 300.1767578112, 324.5111084054, 431.7167358464], [526.8964843566, 325.215820288, 649.0256347477, 368.2765502976]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049396_crop.jpg", "text": "Please describe the content within the area displayed in the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two beds, a nightstand, a chair, and a pillow.", "boxes_value": [[107.14428713000001, 53.17675781119999, 534.5463867111, 264.8742065664], [186.99340821790003, 92.34942627840002, 519.2408447145999, 263.04534912], [315.015380842, 166.11444090880002, 533.8719482527, 264.8742065664], [481.567382812, 131.01641845760003, 534.5463867111, 181.69201658880002], [107.14428713000001, 53.17675781119999, 176.51110840540002, 184.7167358464], [378.8964843566, 78.21582028799997, 501.0256347477, 121.27655029760001]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049398.jpg", "text": "Please help me understand the content present within the rectangle in . Provide the coordinates for all objects that you mention.", "boxes_value": [[359.2753906176, 204.835083008, 600.4265136384, 249.258667008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049398_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Provide the coordinates for all objects that you mention.", "boxes_value": [[61.27539061760001, 11.835083007999998, 302.4265136384, 56.258667008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049398.jpg", "text": "Please help me understand the content present within the rectangle in . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a person, two hats, a sneakers, and a boat.", "boxes_value": [[359.2753906176, 204.835083008, 600.4265136384, 249.258667008], [436.97277834240003, 171.3004760576, 563.02392576, 240.6053466624], [359.2753906176, 204.835083008, 392.30664061440007, 228.245544448], [523.0178222592, 221.5083618304, 534.9942627072, 249.258667008], [578.8105468416, 226.4742431744, 600.4265136384, 241.6638793728], [97.83898928640001, 206.6903686656, 482.235351552, 285.9473266688]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5]]}, {"image_path": "objects365_v1_00049398_crop.jpg", "text": "Please help me understand the content present within the rectangle in . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a person, two hats, a sneakers, and a boat.", "boxes_value": [[61.27539061760001, 11.835083007999998, 302.4265136384, 56.258667008], [138.97277834240003, 0, 265.02392576, 47.605346662399995], [61.27539061760001, 11.835083007999998, 94.30664061440007, 35.245544448000004], [225.0178222592, 28.50836183039999, 236.99426270720005, 56.258667008], [280.81054684159994, 33.47424317439999, 302.4265136384, 48.66387937280001], [0, 13.690368665600005, 184.235351552, 67]], "boxes_seq": [[0], [0], [1], [2, 4], [3], [5]]}, {"image_path": "objects365_v1_00049399.jpg", "text": "In the photo , can you delve into the details of the region ? Please point out the objects and their coordinates.", "boxes_value": [[694.4570312644, 125.8593750016, 832.9858398578999, 386.5864258048]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049399_crop.jpg", "text": "In the photo , can you delve into the details of the region ? Please point out the objects and their coordinates.", "boxes_value": [[35.45703126440003, 65.8593750016, 173.98583985789992, 326.5864258048]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049399.jpg", "text": "In the photo , can you delve into the details of the region ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two boots, a handbag, and two swings.", "boxes_value": [[694.4570312644, 125.8593750016, 832.9858398578999, 386.5864258048], [713.3185794195, 260.1622985216, 735.2155659096, 294.8669564416], [751.3284428605, 256.4439422976, 769.5070731662, 293.627504384], [769.5070731662, 273.3831205888, 792.2302602234, 302.3036688384], [744.0120849791999, 121.917480448, 910.697143516, 355.0513915904], [694.4570312644, 125.8593750016, 832.9858398578999, 386.5864258048]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00049399_crop.jpg", "text": "In the photo , can you delve into the details of the region ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two boots, a handbag, and two swings.", "boxes_value": [[35.45703126440003, 65.8593750016, 173.98583985789992, 326.5864258048], [54.3185794195, 200.1622985216, 76.21556590959995, 234.8669564416], [92.32844286049999, 196.44394229760002, 110.50707316620003, 233.62750438400002], [110.50707316620003, 213.3831205888, 133.23026022340002, 242.30366883839997], [85.01208497919993, 61.917480448000006, 208, 295.0513915904], [35.45703126440003, 65.8593750016, 173.98583985789992, 326.5864258048]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00049401.jpg", "text": "What can you tell me about the selected region in the photo ? Please point out the objects and their coordinates.", "boxes_value": [[0, 423.225280768, 150.8239135543, 511.974182144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049401_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Please point out the objects and their coordinates.", "boxes_value": [[0, 22.225280768000005, 150.8239135543, 110.974182144]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049401.jpg", "text": "What can you tell me about the selected region in the photo ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a carpet, a person, and three sneakers.", "boxes_value": [[0, 423.225280768, 150.8239135543, 511.974182144], [0, 423.225280768, 150.8239135543, 511.974182144], [23.0897216825, 306.6869507072, 83.8475341556, 511.595642112], [29.8004575885, 479.9674945024, 67.8258917252, 497.6880851456], [44.1984375176, 499.5339800064, 75.9477840396, 512.0860651008], [0, 433.201232896, 22.629882780299997, 447.8144531456]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00049401_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a carpet, a person, and three sneakers.", "boxes_value": [[0, 22.225280768000005, 150.8239135543, 110.974182144], [0, 22.225280768000005, 150.8239135543, 110.974182144], [23.0897216825, 0, 83.8475341556, 110.59564211200001], [29.8004575885, 78.96749450239997, 67.8258917252, 96.68808514559998], [44.1984375176, 98.53398000639999, 75.9477840396, 111], [0, 32.20123289600002, 22.629882780299997, 46.8144531456]], "boxes_seq": [[0], [0], [1], [2], [3, 4, 5]]}, {"image_path": "objects365_v1_00049403.jpg", "text": "Could you give me a description of the rectangular region found in ? Give coordinates for the items you reference.", "boxes_value": [[113.9075927552, 143.30718995200002, 202.2849731584, 303.676574704]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049403_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Give coordinates for the items you reference.", "boxes_value": [[22.9075927552, 40.307189952000016, 111.28497315839999, 200.67657470400002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049403.jpg", "text": "Could you give me a description of the rectangular region found in ? Give coordinates for the items you reference. For your reference, objects involved in this region include two people, a hat, three cups, and a chair.", "boxes_value": [[113.9075927552, 143.30718995200002, 202.2849731584, 303.676574704], [118.6521606656, 131.4569701968, 175.4514160128, 196.8374023632], [114.974487296, 143.30718995200002, 138.2662963712, 185.804443344], [148.2790527488, 131.4636840688, 167.579467776, 152.9788818528], [182.8796386816, 254.7532348384, 202.2849731584, 303.676574704], [169.4848022528, 208.876281744, 181.8178100736, 236.8310546624], [113.9075927552, 202.18237304960002, 125.8902587904, 229.99926756960002], [102.6590576128, 224.21289061279998, 175.4447021568, 370.67187500800003]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5, 6], [7]]}, {"image_path": "objects365_v1_00049403_crop.jpg", "text": "Could you give me a description of the rectangular region found in ? Give coordinates for the items you reference. For your reference, objects involved in this region include two people, a hat, three cups, and a chair.", "boxes_value": [[22.9075927552, 40.307189952000016, 111.28497315839999, 200.67657470400002], [27.652160665599993, 28.4569701968, 84.4514160128, 93.8374023632], [23.974487296000007, 40.307189952000016, 47.26629637120001, 82.80444334399999], [57.27905274880001, 28.463684068800006, 76.579467776, 49.97888185279999], [91.8796386816, 151.7532348384, 111.28497315839999, 200.67657470400002], [78.4848022528, 105.87628174400001, 90.81781007359999, 133.8310546624], [22.9075927552, 99.18237304960002, 34.8902587904, 126.99926756960002], [11.659057612799998, 121.21289061279998, 84.44470215679999, 240]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5, 6], [7]]}, {"image_path": "objects365_v1_00049406.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[0.211914087, 0.6757812736, 715.3713379059, 512.0603027456]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049406_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[0.211914087, 0.6757812736, 715.3713379059, 512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049406.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a cabinet, a chair, two people, and a trash bin can.", "boxes_value": [[0.211914087, 0.6757812736, 715.3713379059, 512.0603027456], [0.211914087, 0.6757812736, 193.8000488718, 510.5602417152], [0.34497073860000005, 397.5792846848, 121.39477543569998, 511.292785664], [208.8948974432, 12.2725830144, 715.3713379059, 511.3493042176], [670.9863281677, 71.5344848384, 732.6116943292, 174.6238403072], [155.2795410521, 459.6029052928, 219.0086670329, 512.0603027456]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049406_crop.jpg", "text": "Could you please describe the contents of the bounding box in the given image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a cabinet, a chair, two people, and a trash bin can.", "boxes_value": [[0.211914087, 0.6757812736, 715.3713379059, 512], [0.211914087, 0.6757812736, 193.8000488718, 510.5602417152], [0.34497073860000005, 397.5792846848, 121.39477543569998, 511.292785664], [208.8948974432, 12.2725830144, 715.3713379059, 511.3493042176], [670.9863281677, 71.5344848384, 732.6116943292, 174.6238403072], [155.2795410521, 459.6029052928, 219.0086670329, 512]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049408.jpg", "text": "Please enlighten me about the region in the given photo . Remember to mention the objects and their corresponding locations.", "boxes_value": [[520.225219756, 352.7626342912, 717.30908202, 471.8138427904]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049408_crop.jpg", "text": "Please enlighten me about the region in the given photo . Remember to mention the objects and their corresponding locations.", "boxes_value": [[50.225219756, 30.762634291200015, 247.30908202, 149.81384279039997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049408.jpg", "text": "Please enlighten me about the region in the given photo . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two benches, and four people.", "boxes_value": [[520.225219756, 352.7626342912, 717.30908202, 471.8138427904], [612.330078112, 386.6125488128, 678.474731448, 417.2496948224], [525.715576188, 385.3933105664, 571.437744136, 414.0458374144], [520.225219756, 372.3011474432, 529.994506832, 411.802856448], [533.39245604, 379.097106944, 550.382446268, 400.3345947136], [664.336669948, 352.7626342912, 717.30908202, 471.8138427904], [691.642089864, 338.0177612288, 721.131835964, 461.983947776]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00049408_crop.jpg", "text": "Please enlighten me about the region in the given photo . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two benches, and four people.", "boxes_value": [[50.225219756, 30.762634291200015, 247.30908202, 149.81384279039997], [142.33007811200002, 64.61254881280001, 208.47473144799994, 95.24969482239999], [55.715576188, 63.393310566399975, 101.43774413599999, 92.04583741440001], [50.225219756, 50.30114744320002, 59.994506831999956, 89.802856448], [63.392456039999956, 57.09710694400002, 80.38244626799997, 78.33459471359998], [194.336669948, 30.762634291200015, 247.30908202, 149.81384279039997], [221.642089864, 16.017761228799998, 251.13183596399995, 139.98394777599998]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00049409.jpg", "text": "Help me grasp the context of the region within image . Provide the coordinates for each element you describe.", "boxes_value": [[74.2791137792, 248.29547116299997, 437.7261962752, 682.7139892422]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049409_crop.jpg", "text": "Help me grasp the context of the region within image . Provide the coordinates for each element you describe.", "boxes_value": [[74.2791137792, 109.29547116299997, 437.7261962752, 543.7139892422]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049409.jpg", "text": "Help me grasp the context of the region within image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two chairs, two desks, a lamp, a cabinet, a picture, and a bottle.", "boxes_value": [[74.2791137792, 248.29547116299997, 437.7261962752, 682.7139892422], [208.898376448, 457.25683593089997, 401.1356811776, 682.0235595682], [153.9773559808, 434.1748046918, 437.7261962752, 682.7139892422], [167.8262939648, 324.5537109402, 275.5266113536, 481.3701171907], [74.2791137792, 316.7454833706, 123.9869384704, 482.1000976872], [90.5102539264, 178.2736816419, 112.8280029184, 317.2527465827], [198.5064697344, 446.8649902479, 385.5613403136, 660.7656249666], [412.3558960128, 248.29547116299997, 435.5579223552, 277.1223144371], [243.4713134592, 242.40124509179998, 285.3940429824, 296.0750732235]], "boxes_seq": [[0], [0], [1, 6], [2, 4], [3], [5], [7], [8]]}, {"image_path": "objects365_v1_00049409_crop.jpg", "text": "Help me grasp the context of the region within image . Provide the coordinates for each element you describe. For your reference, objects involved in this region include two chairs, two desks, a lamp, a cabinet, a picture, and a bottle.", "boxes_value": [[74.2791137792, 109.29547116299997, 437.7261962752, 543.7139892422], [208.898376448, 318.25683593089997, 401.1356811776, 543.0235595682], [153.9773559808, 295.1748046918, 437.7261962752, 543.7139892422], [167.8262939648, 185.5537109402, 275.5266113536, 342.3701171907], [74.2791137792, 177.74548337060003, 123.9869384704, 343.1000976872], [90.5102539264, 39.2736816419, 112.8280029184, 178.2527465827], [198.5064697344, 307.8649902479, 385.5613403136, 521.7656249666], [412.3558960128, 109.29547116299997, 435.5579223552, 138.1223144371], [243.4713134592, 103.40124509179998, 285.3940429824, 157.0750732235]], "boxes_seq": [[0], [0], [1, 6], [2, 4], [3], [5], [7], [8]]}, {"image_path": "objects365_v1_00049410.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Give coordinates for the items you reference.", "boxes_value": [[501.3176269824, 0, 768.3898925568, 511.122619648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049410_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Give coordinates for the items you reference.", "boxes_value": [[67.31762698239999, 0, 334, 511.122619648]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049410.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include six cabinets, a towel, and a person.", "boxes_value": [[501.3176269824, 0, 768.3898925568, 511.122619648], [514.7915039232, 106.6060790784, 584.15332032, 237.1158447104], [586.891235328, 96.5668335104, 685.4581298687999, 236.20318602239996], [653.5151367168, 103.8681030144, 736.5668945663999, 237.11584471039998], [726.5275878912, 105.6934203904, 768.5097656063999, 237.11584471039998], [665.2453613568, 349.7037963776, 767.1058349568, 422.6950073344], [501.3176269824, 0, 768.3898925568, 511.122619648], [673.5218505984, 292.4366455296, 704.5267333632, 351.8626708992], [379.7232665856, 107.1748046848, 624.593627904, 512.0371093504]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6], [7], [8]]}, {"image_path": "objects365_v1_00049410_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include six cabinets, a towel, and a person.", "boxes_value": [[67.31762698239999, 0, 334, 511.122619648], [80.79150392320003, 106.6060790784, 150.15332032000003, 237.1158447104], [152.891235328, 96.5668335104, 251.45812986879992, 236.20318602239996], [219.51513671680004, 103.8681030144, 302.5668945663999, 237.11584471039998], [292.5275878912, 105.6934203904, 334, 237.11584471039998], [231.2453613568, 349.7037963776, 333.1058349568, 422.6950073344], [67.31762698239999, 0, 334, 511.122619648], [239.52185059839996, 292.4366455296, 270.52673336320004, 351.8626708992], [0, 107.1748046848, 190.59362790399996, 512]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6], [7], [8]]}, {"image_path": "objects365_v1_00049414.jpg", "text": "Can you elaborate on the content of the bounding box in ? Specify the location of each mentioned object.", "boxes_value": [[39.98876955, 46.547912625, 179.93536375, 471.27832035]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049414_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Specify the location of each mentioned object.", "boxes_value": [[34.98876955, 46.547912625, 174.93536375, 471.27832035]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049414.jpg", "text": "Can you elaborate on the content of the bounding box in ? Specify the location of each mentioned object. For your reference, objects involved in this region include a lamp, a chair, two people, a bottle, and two laptops.", "boxes_value": [[39.98876955, 46.547912625, 179.93536375, 471.27832035], [41.53356935, 46.547912625, 88.78234864999999, 61.50006104999999], [55.6849365, 273.748352025, 210.3059082, 373.9822998], [55.4418335, 199.09832759999998, 214.65216065, 552.4437255749999], [84.4526367, 112.01922607499999, 266.62023925, 317.65625002499996], [39.98876955, 382.26733394999997, 79.95288085, 471.27832035], [85.8789673, 381.21752932500004, 273.91674804999997, 492.20373532499997], [82.8546753, 293.10754395000004, 179.93536375, 359.13366697500004]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6, 7]]}, {"image_path": "objects365_v1_00049414_crop.jpg", "text": "Can you elaborate on the content of the bounding box in ? Specify the location of each mentioned object. For your reference, objects involved in this region include a lamp, a chair, two people, a bottle, and two laptops.", "boxes_value": [[34.98876955, 46.547912625, 174.93536375, 471.27832035], [36.53356935, 46.547912625, 83.78234864999999, 61.50006104999999], [50.6849365, 273.748352025, 205.3059082, 373.9822998], [50.4418335, 199.09832759999998, 209, 552.4437255749999], [79.4526367, 112.01922607499999, 209, 317.65625002499996], [34.98876955, 382.26733394999997, 74.95288085, 471.27832035], [80.8789673, 381.21752932500004, 209, 492.20373532499997], [77.8546753, 293.10754395000004, 174.93536375, 359.13366697500004]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6, 7]]}, {"image_path": "objects365_v1_00049415.jpg", "text": "In the image , elaborate on the details found within the section . Specify the location of each mentioned object.", "boxes_value": [[166.836792, 241.47052, 356.7437744, 489.20910645]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049415_crop.jpg", "text": "In the image , elaborate on the details found within the section . Specify the location of each mentioned object.", "boxes_value": [[47.836792, 62.47051999999999, 237.7437744, 310.20910645]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049415.jpg", "text": "In the image , elaborate on the details found within the section . Specify the location of each mentioned object. For your reference, objects involved in this region include a potted plant, two cabinets, and two people.", "boxes_value": [[166.836792, 241.47052, 356.7437744, 489.20910645], [194.16540528, 241.47052, 356.7437744, 489.20910645], [199.12158200000002, 360.0079956, 236.80371096, 393.42425534999995], [186.3239136, 393.42425534999995, 242.4916992, 424.70758055000005], [164.14678952, 377.57318115, 174.90679928, 419.9407959], [166.836792, 320.07421875, 177.93310544000002, 360.08813475]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049415_crop.jpg", "text": "In the image , elaborate on the details found within the section . Specify the location of each mentioned object. For your reference, objects involved in this region include a potted plant, two cabinets, and two people.", "boxes_value": [[47.836792, 62.47051999999999, 237.7437744, 310.20910645], [75.16540527999999, 62.47051999999999, 237.7437744, 310.20910645], [80.12158200000002, 181.00799560000002, 117.80371095999999, 214.42425534999995], [67.3239136, 214.42425534999995, 123.4916992, 245.70758055000005], [45.14678952, 198.57318114999998, 55.90679928, 240.9407959], [47.836792, 141.07421875, 58.93310544000002, 181.08813475]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049417.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[373.4675293011, 133.0043335168, 518.8231201484, 450.5356445184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049417_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[36.4675293011, 80.00433351679999, 181.8231201484, 397.5356445184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049417.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a cabinet, a power outlet, two pictures, and a lamp.", "boxes_value": [[373.4675293011, 133.0043335168, 518.8231201484, 450.5356445184], [354.2661133001, 330.2603759616, 475.5295410427, 507.017211904], [489.42382814219997, 426.1286621184, 518.8231201484, 450.5356445184], [391.43139649740004, 187.7681274368, 432.4770507603, 273.6775512576], [373.4675293011, 133.0043335168, 425.422241242, 169.8226318336], [460.75744629269997, 191.1614380032, 491.05712893460003, 266.349365248]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4]]}, {"image_path": "objects365_v1_00049417_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a cabinet, a power outlet, two pictures, and a lamp.", "boxes_value": [[36.4675293011, 80.00433351679999, 181.8231201484, 397.5356445184], [17.266113300100017, 277.2603759616, 138.52954104269998, 454.017211904], [152.42382814219997, 373.1286621184, 181.8231201484, 397.5356445184], [54.431396497400044, 134.7681274368, 95.47705076030002, 220.6775512576], [36.4675293011, 80.00433351679999, 88.42224124199998, 116.82263183360001], [123.75744629269997, 138.1614380032, 154.05712893460003, 213.34936524800003]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4]]}, {"image_path": "objects365_v1_00049420.jpg", "text": "What details can you provide about the region in the snapshot ? Include the coordinates for each object you identify.", "boxes_value": [[28.6384277523, 72.5113525248, 420.6705322434, 167.4080810496]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049420_crop.jpg", "text": "What details can you provide about the region in the snapshot ? Include the coordinates for each object you identify.", "boxes_value": [[28.6384277523, 24.511352524800003, 420.6705322434, 119.40808104960001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049420.jpg", "text": "What details can you provide about the region in the snapshot ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two pictures, a potted plant, a router, and a moniter.", "boxes_value": [[28.6384277523, 72.5113525248, 420.6705322434, 167.4080810496], [248.38812257179998, 87.7580566528, 325.6245727753, 167.4080810496], [116.46862793250001, 72.5113525248, 244.1237792878, 163.548278784], [185.4916382091, 149.262756352, 212.2559814418, 166.7898559488], [28.6384277523, 108.6541748224, 81.9346923912, 156.0878296064], [382.92443848159996, 81.5504150528, 420.6705322434, 122.6721801728]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5]]}, {"image_path": "objects365_v1_00049420_crop.jpg", "text": "What details can you provide about the region in the snapshot ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two pictures, a potted plant, a router, and a moniter.", "boxes_value": [[28.6384277523, 24.511352524800003, 420.6705322434, 119.40808104960001], [248.38812257179998, 39.75805665279999, 325.6245727753, 119.40808104960001], [116.46862793250001, 24.511352524800003, 244.1237792878, 115.54827878399999], [185.4916382091, 101.262756352, 212.2559814418, 118.78985594880001], [28.6384277523, 60.654174822399995, 81.9346923912, 108.08782960639999], [382.92443848159996, 33.550415052800005, 420.6705322434, 74.6721801728]], "boxes_seq": [[0], [0], [1, 3], [2], [4], [5]]}, {"image_path": "objects365_v1_00049422.jpg", "text": "What objects or scenery can be found in the area in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[359.7502441104, 147.4586791936, 647.0307617464, 320.7512206848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049422_crop.jpg", "text": "What objects or scenery can be found in the area in the image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[72.75024411039999, 43.458679193600005, 360.0307617464, 216.7512206848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049422.jpg", "text": "What objects or scenery can be found in the area in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a chair, a pillow, a bed, a cabinet, a cup, and a bottle.", "boxes_value": [[359.7502441104, 147.4586791936, 647.0307617464, 320.7512206848], [385.1461181615, 241.3041381888, 533.9534912224999, 399.6777344], [470.5802001886, 293.4192504832, 529.9296874932, 320.7512206848], [410.625122055, 0.3366088704, 683.0046386628, 511.6431274496], [590.1258545184, 255.4577636864, 647.0307617464, 310.465881344], [401.7954101353, 152.2574462976, 425.2235107127, 182.3235473408], [359.7502441104, 147.4586791936, 379.3797607549, 182.123657216]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00049422_crop.jpg", "text": "What objects or scenery can be found in the area in the image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a chair, a pillow, a bed, a cabinet, a cup, and a bottle.", "boxes_value": [[72.75024411039999, 43.458679193600005, 360.0307617464, 216.7512206848], [98.1461181615, 137.3041381888, 246.9534912224999, 260], [183.58020018859997, 189.41925048320002, 242.92968749320005, 216.7512206848], [123.62512205500002, 0, 396, 260], [303.12585451840005, 151.4577636864, 360.0307617464, 206.46588134400002], [114.79541013530002, 48.25744629760001, 138.22351071269998, 78.32354734079999], [72.75024411039999, 43.458679193600005, 92.37976075490002, 78.123657216]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5], [6]]}, {"image_path": "objects365_v1_00049424.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Please mention the objects and their locations.", "boxes_value": [[93.04345702399999, 186.529296864, 238.334533696, 480.229248048]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049424_crop.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Please mention the objects and their locations.", "boxes_value": [[37.04345702399999, 73.529296864, 182.334533696, 367]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049424.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Please mention the objects and their locations. For your reference, objects involved in this region include a person, three leather shoes, and a gloves.", "boxes_value": [[93.04345702399999, 186.529296864, 238.334533696, 480.229248048], [9.91711424, 108.051818832, 289.709838848, 478.888732896], [184.084167488, 186.529296864, 208.40332031999998, 223.943298336], [206.532592768, 225.813964848, 278.866394048, 259.486572288], [200.920532224, 285.052856448, 238.334533696, 313.113342288], [93.04345702399999, 437.8267212, 187.202026368, 480.229248048]], "boxes_seq": [[0], [0], [1], [2, 3, 5], [4]]}, {"image_path": "objects365_v1_00049424_crop.jpg", "text": "What's happening in the section of the photo enclosed by the coordinates ? Please mention the objects and their locations. For your reference, objects involved in this region include a person, three leather shoes, and a gloves.", "boxes_value": [[37.04345702399999, 73.529296864, 182.334533696, 367], [0, 0, 218, 365.888732896], [128.084167488, 73.529296864, 152.40332031999998, 110.943298336], [150.532592768, 112.81396484800001, 218, 146.486572288], [144.920532224, 172.052856448, 182.334533696, 200.113342288], [37.04345702399999, 324.8267212, 131.202026368, 367]], "boxes_seq": [[0], [0], [1], [2, 3, 5], [4]]}, {"image_path": "objects365_v1_00049427.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Give coordinates for the items you reference.", "boxes_value": [[124.7978515456, 756.0449218399999, 494.111389184, 800.48486328]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049427_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Give coordinates for the items you reference.", "boxes_value": [[92.7978515456, 12.044921839999915, 462.111389184, 56]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049427.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Give coordinates for the items you reference. For your reference, objects involved in this region include a person, three suvs, a van, and two cars.", "boxes_value": [[124.7978515456, 756.0449218399999, 494.111389184, 800.48486328], [124.7978515456, 759.034668, 139.0736694272, 796.15173336], [288.9234619392, 762.53149416, 354.8370361344, 781.9840088000001], [264.4289550848, 768.31896976, 346.3164673024, 799.6680908], [334.3342285312, 756.0449218399999, 417.0132446208, 800.48486328], [372.1598510592, 779.6083984, 494.111389184, 800.0715332000001], [438.7715453952, 767.2277832, 499.5406494208, 795.5749512], [479.8748168704, 762.62146, 512.4739990016, 799.29541016]], "boxes_seq": [[0], [0], [1], [2, 3, 7], [4], [5, 6]]}, {"image_path": "objects365_v1_00049427_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Give coordinates for the items you reference. For your reference, objects involved in this region include a person, three suvs, a van, and two cars.", "boxes_value": [[92.7978515456, 12.044921839999915, 462.111389184, 56], [92.7978515456, 15.03466800000001, 107.0736694272, 52.15173335999998], [256.9234619392, 18.531494159999966, 322.8370361344, 37.98400880000008], [232.4289550848, 24.31896975999996, 314.3164673024, 55.66809079999996], [302.3342285312, 12.044921839999915, 385.0132446208, 56], [340.1598510592, 35.608398400000056, 462.111389184, 56], [406.7715453952, 23.227783199999976, 467.5406494208, 51.57495119999999], [447.8748168704, 18.621459999999956, 480, 55.29541015999996]], "boxes_seq": [[0], [0], [1], [2, 3, 7], [4], [5, 6]]}, {"image_path": "objects365_v1_00049428.jpg", "text": "Please tell me about the area in the image . What does it contain? Remember to mention the objects and their corresponding locations.", "boxes_value": [[501.73864742399996, 17.226013184, 633.7044677376, 411.8576659968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049428_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Remember to mention the objects and their corresponding locations.", "boxes_value": [[33.738647423999964, 17.226013184, 165.7044677376, 411.8576659968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049428.jpg", "text": "Please tell me about the area in the image . What does it contain? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a soccer, three people, a flag, and a sneakers.", "boxes_value": [[501.73864742399996, 17.226013184, 633.7044677376, 411.8576659968], [588.7257080064, 395.400695808, 609.604858368, 416.2798461952], [536.5628662272, 17.226013184, 575.2880859648001, 126.5023803904], [558.8581542912, 245.0948486144, 633.7044677376, 411.8576659968], [501.73864742399996, 207.343383808, 556.8885497855999, 356.0512084992], [530.381591808, 298.686340352, 559.0267333632, 342.2269286912], [568.0445556479999, 384.9536132608, 579.90515136, 411.3106079232]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00049428_crop.jpg", "text": "Please tell me about the area in the image . What does it contain? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a soccer, three people, a flag, and a sneakers.", "boxes_value": [[33.738647423999964, 17.226013184, 165.7044677376, 411.8576659968], [120.72570800640005, 395.400695808, 141.604858368, 416.2798461952], [68.56286622719995, 17.226013184, 107.28808596480008, 126.5023803904], [90.85815429119998, 245.0948486144, 165.7044677376, 411.8576659968], [33.738647423999964, 207.343383808, 88.88854978559993, 356.0512084992], [62.38159180800005, 298.686340352, 91.02673336320004, 342.2269286912], [100.04455564799991, 384.9536132608, 111.90515135999999, 411.3106079232]], "boxes_seq": [[0], [0], [1], [2, 3, 4], [5], [6]]}, {"image_path": "objects365_v1_00049429.jpg", "text": "Could you please provide a description of the rectangular area in ? Provide the coordinates for all objects that you mention.", "boxes_value": [[218.6401367315, 399.7928466944, 556.2396239959, 512.4354248192]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049429_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Provide the coordinates for all objects that you mention.", "boxes_value": [[84.64013673150001, 28.792846694399998, 422.23962399590005, 141]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049429.jpg", "text": "Could you please provide a description of the rectangular area in ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a desk, a person, two cups, and a chair.", "boxes_value": [[218.6401367315, 399.7928466944, 556.2396239959, 512.4354248192], [193.55999752990002, 290.6729736192, 552.3041992372, 512.57043456], [350.8247070175, 131.5304565248, 683.4313964600001, 510.6148681728], [363.1146240435, 399.7928466944, 403.7303466765, 449.4342651392], [270.3190917936, 412.4852905472, 335.4733886535, 452.818908672], [218.6401367315, 461.1168212992, 556.2396239959, 512.4354248192]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049429_crop.jpg", "text": "Could you please provide a description of the rectangular area in ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a desk, a person, two cups, and a chair.", "boxes_value": [[84.64013673150001, 28.792846694399998, 422.23962399590005, 141], [59.559997529900016, 0, 418.3041992372, 141], [216.82470701749997, 0, 506, 139.61486817280002], [229.1146240435, 28.792846694399998, 269.7303466765, 78.43426513920002], [136.31909179360002, 41.48529054720001, 201.47338865350002, 81.81890867200002], [84.64013673150001, 90.11682129920001, 422.23962399590005, 141]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049431.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Include the coordinates for each object you identify.", "boxes_value": [[361.3249511524, 117.6211547648, 454.9256591936, 216.171630848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049431_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Include the coordinates for each object you identify.", "boxes_value": [[24.32495115239999, 25.621154764799996, 117.92565919359998, 124.171630848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049431.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Include the coordinates for each object you identify. For your reference, objects involved in this region include a chair, a vase, a cabinet, and two bottles.", "boxes_value": [[361.3249511524, 117.6211547648, 454.9256591936, 216.171630848], [361.3249511524, 169.9965820416, 429.22949217170003, 216.171630848], [435.4136962745, 115.1380004864, 456.3171386452, 179.2118530048], [236.8484496961, 175.7755126784, 496.25988771820005, 308.4025268736], [418.7543945259, 150.5410766848, 448.01647948170006, 186.7123412992], [431.3533935716, 117.6211547648, 454.9256591936, 179.3967895552]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00049431_crop.jpg", "text": "In relation to the picture , please describe the content of the area marked by . Include the coordinates for each object you identify. For your reference, objects involved in this region include a chair, a vase, a cabinet, and two bottles.", "boxes_value": [[24.32495115239999, 25.621154764799996, 117.92565919359998, 124.171630848], [24.32495115239999, 77.99658204159999, 92.22949217170003, 124.171630848], [98.41369627450001, 23.138000486400003, 119.31713864519998, 87.21185300479999], [0, 83.77551267839999, 141, 148], [81.75439452590001, 58.541076684800004, 111.01647948170006, 94.7123412992], [94.35339357160001, 25.621154764799996, 117.92565919359998, 87.3967895552]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00049432.jpg", "text": "Could you tell me more about the area in the snapshot ? Please mention the objects and their locations.", "boxes_value": [[291.5034179826, 218.4897460736, 646.7225341597, 508.4660644352]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049432_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Please mention the objects and their locations.", "boxes_value": [[89.50341798260001, 73.4897460736, 444.7225341597, 363.4660644352]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049432.jpg", "text": "Could you tell me more about the area in the snapshot ? Please mention the objects and their locations. For your reference, objects involved in this region include a person, and five boats.", "boxes_value": [[291.5034179826, 218.4897460736, 646.7225341597, 508.4660644352], [523.8317871338, 242.7680664064, 576.2403564678, 344.6915283456], [297.9940796093, 279.6687622144, 580.1883545052, 410.708068864], [458.670898436, 249.172363264, 511.06579588579996, 269.370361344], [553.0716552607, 218.4897460736, 646.7225341597, 284.7305908224], [611.8732909854, 207.259704576, 681.6641845703, 298.6816406016], [291.5034179826, 372.9840088064, 560.8829345426, 508.4660644352]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00049432_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Please mention the objects and their locations. For your reference, objects involved in this region include a person, and five boats.", "boxes_value": [[89.50341798260001, 73.4897460736, 444.7225341597, 363.4660644352], [321.83178713380005, 97.76806640640001, 374.2403564678, 199.69152834559998], [95.9940796093, 134.66876221439998, 378.1883545052, 265.708068864], [256.670898436, 104.17236326400001, 309.06579588579996, 124.370361344], [351.0716552607, 73.4897460736, 444.7225341597, 139.73059082240002], [409.8732909854, 62.25970457599999, 479.6641845703, 153.6816406016], [89.50341798260001, 227.98400880640003, 358.88293454259997, 363.4660644352]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00049437.jpg", "text": "I would like a description of the content within the bbox in . Include the coordinates for each object you identify.", "boxes_value": [[324.470581074, 228.0526123008, 532.9921875195, 307.6076660224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049437_crop.jpg", "text": "I would like a description of the content within the bbox in . Include the coordinates for each object you identify.", "boxes_value": [[52.470581073999995, 20.052612300800007, 260.9921875195, 99.6076660224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049437.jpg", "text": "I would like a description of the content within the bbox in . Include the coordinates for each object you identify. For your reference, objects involved in this region include three people, a backpack, and a trash bin can.", "boxes_value": [[324.470581074, 228.0526123008, 532.9921875195, 307.6076660224], [461.73413085100003, 230.760436992, 475.53320315450003, 256.5361938432], [362.935791046, 228.794555648, 396.92797848649997, 307.2138061312], [324.470581074, 228.0526123008, 343.8596191675, 307.6076660224], [368.82158634750004, 240.1060272128, 389.502737338, 272.1448595456], [516.6845703455, 245.8063964672, 532.9921875195, 269.9140624896]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049437_crop.jpg", "text": "I would like a description of the content within the bbox in . Include the coordinates for each object you identify. For your reference, objects involved in this region include three people, a backpack, and a trash bin can.", "boxes_value": [[52.470581073999995, 20.052612300800007, 260.9921875195, 99.6076660224], [189.73413085100003, 22.760436991999995, 203.53320315450003, 48.53619384320001], [90.93579104600002, 20.794555648, 124.92797848649997, 99.21380613119999], [52.470581073999995, 20.052612300800007, 71.8596191675, 99.6076660224], [96.82158634750004, 32.1060272128, 117.50273733799997, 64.14485954560001], [244.6845703455, 37.80639646719999, 260.9921875195, 61.914062489599985]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049438.jpg", "text": "In the image , could you provide a description for the coordinates ? Provide the coordinates for all objects that you mention.", "boxes_value": [[482.54125977600006, 86.4579467776, 767.4357910271999, 512.4425048576]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049438_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Provide the coordinates for all objects that you mention.", "boxes_value": [[71.54125977600006, 86.4579467776, 356.4357910271999, 512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049438.jpg", "text": "In the image , could you provide a description for the coordinates ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a chair, a stool, a trombone, and two people.", "boxes_value": [[482.54125977600006, 86.4579467776, 767.4357910271999, 512.4425048576], [443.0496826368, 459.2923584, 536.9416503552, 511.8718872064], [533.185913088, 481.0753174016, 560.2268066304, 511.8718872064], [339.3718262016, 97.3501586944, 767.7329101824, 352.4259643392], [510.7756347648, 86.4579467776, 767.4357910271999, 512.4425048576], [482.54125977600006, 423.7371215872, 564.3176269824, 512.2480468992]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00049438_crop.jpg", "text": "In the image , could you provide a description for the coordinates ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a chair, a stool, a trombone, and two people.", "boxes_value": [[71.54125977600006, 86.4579467776, 356.4357910271999, 512], [32.049682636800014, 459.2923584, 125.94165035519995, 511.8718872064], [122.185913088, 481.0753174016, 149.2268066304, 511.8718872064], [0, 97.3501586944, 356.7329101824, 352.4259643392], [99.77563476479997, 86.4579467776, 356.4357910271999, 512], [71.54125977600006, 423.7371215872, 153.31762698240004, 512]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5]]}, {"image_path": "objects365_v1_00049441.jpg", "text": "Please describe the section of the picture defined by the bbox . Give coordinates for the items you reference.", "boxes_value": [[130.7226562545, 178.598144512, 309.4648437271, 357.055847168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049441_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Give coordinates for the items you reference.", "boxes_value": [[44.72265625450001, 45.598144512000005, 223.4648437271, 224.055847168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049441.jpg", "text": "Please describe the section of the picture defined by the bbox . Give coordinates for the items you reference. For your reference, objects involved in this region include three pictures, a pillow, and a lamp.", "boxes_value": [[130.7226562545, 178.598144512, 309.4648437271, 357.055847168], [130.7226562545, 182.6212768768, 180.7244873318, 227.4505005056], [203.71380617019997, 178.598144512, 266.3598022636, 243.54302976], [277.8544922129, 199.2885131776, 309.4648437271, 234.9220580864], [229.00213620239998, 302.740600576, 319.8100586096, 355.6161499136], [188.329162597, 298.6700439552, 236.7910766716, 357.055847168]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049441_crop.jpg", "text": "Please describe the section of the picture defined by the bbox . Give coordinates for the items you reference. For your reference, objects involved in this region include three pictures, a pillow, and a lamp.", "boxes_value": [[44.72265625450001, 45.598144512000005, 223.4648437271, 224.055847168], [44.72265625450001, 49.62127687680001, 94.72448733179999, 94.45050050559999], [117.71380617019997, 45.598144512000005, 180.3598022636, 110.54302976], [191.85449221290003, 66.28851317760001, 223.4648437271, 101.9220580864], [143.00213620239998, 169.74060057600002, 233.8100586096, 222.6161499136], [102.32916259699999, 165.67004395520001, 150.7910766716, 224.055847168]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049442.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Include the coordinates for each mentioned object.", "boxes_value": [[417.1026946712, 229.8296935424, 665.3602286864, 423.1660662272]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049442_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Include the coordinates for each mentioned object.", "boxes_value": [[62.10269467120003, 48.82969354240001, 310.3602286864, 242.16606622720002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049442.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two chairs, a person, a belt, and a leather shoes.", "boxes_value": [[417.1026946712, 229.8296935424, 665.3602286864, 423.1660662272], [440.08850097719994, 240.0650024448, 529.2725829927999, 414.252685568], [528.0994873048, 251.9523926016, 632.5267334068, 414.7361449984], [599.7202148179999, 119.4564819456, 686.0325927828, 389.2733154304], [622.3558684036, 229.8296935424, 665.3602286864, 241.7753260032], [417.1026946712, 401.5730292736, 435.96557754040003, 423.1660662272]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049442_crop.jpg", "text": "I would like to know more about the rectangular region within the picture . Can you describe it? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two chairs, a person, a belt, and a leather shoes.", "boxes_value": [[62.10269467120003, 48.82969354240001, 310.3602286864, 242.16606622720002], [85.08850097719994, 59.0650024448, 174.27258299279993, 233.252685568], [173.09948730480005, 70.9523926016, 277.52673340679996, 233.7361449984], [244.72021481799993, 0, 331.03259278279995, 208.27331543039998], [267.35586840359997, 48.82969354240001, 310.3602286864, 60.77532600320001], [62.10269467120003, 220.5730292736, 80.96557754040003, 242.16606622720002]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049443.jpg", "text": "What can be observed in the rectangular region in the photograph ? Provide the coordinates for each element you describe.", "boxes_value": [[10.7211914299, 0, 250.3944701979, 318.7498779136]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049443_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Provide the coordinates for each element you describe.", "boxes_value": [[10.7211914299, 0, 250.3944701979, 318.7498779136]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049443.jpg", "text": "What can be observed in the rectangular region in the photograph ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include four lamps, and a person.", "boxes_value": [[10.7211914299, 0, 250.3944701979, 318.7498779136], [193.10076905629998, 210.4217529344, 221.43554688150002, 227.2652588032], [10.7211914299, 0, 48.50067137580001, 29.9226684416], [140.17150880670002, 23.2556762624, 172.3952026391, 64.9242553856], [217.73992920540002, 55.1655273472, 250.3944701979, 90.269165056], [189.5161132886, 257.8421020672, 212.9958496358, 318.7498779136]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049443_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include four lamps, and a person.", "boxes_value": [[10.7211914299, 0, 250.3944701979, 318.7498779136], [193.10076905629998, 210.4217529344, 221.43554688150002, 227.2652588032], [10.7211914299, 0, 48.50067137580001, 29.9226684416], [140.17150880670002, 23.2556762624, 172.3952026391, 64.9242553856], [217.73992920540002, 55.1655273472, 250.3944701979, 90.269165056], [189.5161132886, 257.8421020672, 212.9958496358, 318.7498779136]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049445.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each mentioned object.", "boxes_value": [[374.91552731520005, 230.1882934784, 642.1783447233, 511.8564453376]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049445_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each mentioned object.", "boxes_value": [[66.91552731520005, 71.1882934784, 334.17834472330003, 352.8564453376]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049445.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a desk, two glasses, a sneakers, and a backpack.", "boxes_value": [[374.91552731520005, 230.1882934784, 642.1783447233, 511.8564453376], [374.91552731520005, 361.486572288, 632.6651611377, 511.8564453376], [426.92065430279996, 230.1882934784, 486.6549072168, 250.7123412992], [562.8382568321999, 318.5936889856, 599.8288574352, 336.6520385536], [620.8143310335, 425.3576049664, 642.1783447233, 451.7910156288], [422.25866701290005, 454.4573364224, 596.772216777, 511.5635986432]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049445_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a desk, two glasses, a sneakers, and a backpack.", "boxes_value": [[66.91552731520005, 71.1882934784, 334.17834472330003, 352.8564453376], [66.91552731520005, 202.486572288, 324.6651611377, 352.8564453376], [118.92065430279996, 71.1882934784, 178.65490721679998, 91.7123412992], [254.83825683219993, 159.5936889856, 291.8288574352, 177.6520385536], [312.8143310335, 266.3576049664, 334.17834472330003, 292.7910156288], [114.25866701290005, 295.4573364224, 288.77221677700004, 352.5635986432]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049446.jpg", "text": "What does the selected region in the image encompass? Include the coordinates for each object you identify.", "boxes_value": [[284.54919435240004, 365.3334961152, 521.8581542713999, 512.5795898368]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049446_crop.jpg", "text": "What does the selected region in the image encompass? Include the coordinates for each object you identify.", "boxes_value": [[59.549194352400036, 37.33349611519998, 296.85815427139994, 184]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049446.jpg", "text": "What does the selected region in the image encompass? Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a carpet, two couches, a pillow, and a glasses.", "boxes_value": [[284.54919435240004, 365.3334961152, 521.8581542713999, 512.5795898368], [109.4891357611, 313.619567872, 445.0894775177, 511.1391601664], [0, 372.1640625152, 465.7885742112, 510.8020630016], [236.3801269777, 227.5612182528, 651.8100586176, 444.717834496], [316.6336059899, 315.6827392512, 683.2821044758999, 512.3825683456], [330.9015503003, 449.395446784, 521.8581542713999, 512.5795898368], [284.54919435240004, 365.3334961152, 324.6436157328, 384.6647339008]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00049446_crop.jpg", "text": "What does the selected region in the image encompass? Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a carpet, two couches, a pillow, and a glasses.", "boxes_value": [[59.549194352400036, 37.33349611519998, 296.85815427139994, 184], [0, 0, 220.0894775177, 183.13916016640002], [0, 44.16406251519999, 240.7885742112, 182.8020630016], [11.380126977700002, 0, 356, 116.71783449600002], [91.63360598989999, 0, 356, 184], [105.90155030030002, 121.395446784, 296.85815427139994, 184], [59.549194352400036, 37.33349611519998, 99.64361573280001, 56.6647339008]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00049449.jpg", "text": "What sort of things can be seen in the region of the photo ? Specify the location of each mentioned object.", "boxes_value": [[0, 258.8739623936, 344.5958252295, 510.3533325312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049449_crop.jpg", "text": "What sort of things can be seen in the region of the photo ? Specify the location of each mentioned object.", "boxes_value": [[0, 62.87396239359998, 344.5958252295, 314.3533325312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049449.jpg", "text": "What sort of things can be seen in the region of the photo ? Specify the location of each mentioned object. For your reference, objects involved in this region include two chairs, three benches, and a desk.", "boxes_value": [[0, 258.8739623936, 344.5958252295, 510.3533325312], [149.0145263806, 260.1494751232, 228.0974120876, 349.436584448], [286.3465576384, 258.8739623936, 344.5958252295, 346.460327168], [32.9022827377, 321.1668701184, 135.6607666338, 385.7136840704], [0, 358.3458251776, 112.94030758619999, 478.1447143424], [37.5447387368, 411.43072512, 199.5388183906, 510.3533325312], [100.8944701911, 350.3150635008, 529.4010009624001, 510.791625984]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00049449_crop.jpg", "text": "What sort of things can be seen in the region of the photo ? Specify the location of each mentioned object. For your reference, objects involved in this region include two chairs, three benches, and a desk.", "boxes_value": [[0, 62.87396239359998, 344.5958252295, 314.3533325312], [149.0145263806, 64.14947512319998, 228.0974120876, 153.43658444800002], [286.3465576384, 62.87396239359998, 344.5958252295, 150.460327168], [32.9022827377, 125.16687011840003, 135.6607666338, 189.7136840704], [0, 162.34582517759998, 112.94030758619999, 282.1447143424], [37.5447387368, 215.43072511999998, 199.5388183906, 314.3533325312], [100.8944701911, 154.31506350080002, 430, 314.791625984]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00049450.jpg", "text": "I am interested in the region of the image ; please describe it. Include the coordinates for each object you identify.", "boxes_value": [[120.7067260536, 254.7282714624, 359.230346696, 327.229370112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049450_crop.jpg", "text": "I am interested in the region of the image ; please describe it. Include the coordinates for each object you identify.", "boxes_value": [[59.70672605359999, 18.728271462399988, 298.230346696, 91.22937011200003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049450.jpg", "text": "I am interested in the region of the image ; please describe it. Include the coordinates for each object you identify. For your reference, objects involved in this region include four storage boxes, and a plate.", "boxes_value": [[120.7067260536, 254.7282714624, 359.230346696, 327.229370112], [190.99920657420003, 267.6691283968, 263.2523193254, 317.2757568512], [292.9084472663, 254.7282714624, 359.230346696, 309.1876831232], [120.7067260536, 256.6287231488, 178.544433565, 309.3165283328], [126.6489868318, 228.106018048, 196.7672729151, 297.0358886912], [178.664367682, 280.8180541952, 296.55578610829997, 327.229370112]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049450_crop.jpg", "text": "I am interested in the region of the image ; please describe it. Include the coordinates for each object you identify. For your reference, objects involved in this region include four storage boxes, and a plate.", "boxes_value": [[59.70672605359999, 18.728271462399988, 298.230346696, 91.22937011200003], [129.99920657420003, 31.669128396799977, 202.25231932539998, 81.27575685120001], [231.90844726630002, 18.728271462399988, 298.230346696, 73.18768312319997], [59.70672605359999, 20.62872314880002, 117.54443356499999, 73.31652833279998], [65.6489868318, 0, 135.7672729151, 61.0358886912], [117.664367682, 44.81805419519998, 235.55578610829997, 91.22937011200003]], "boxes_seq": [[0], [0], [1, 2, 3, 4], [5]]}, {"image_path": "objects365_v1_00049455.jpg", "text": "Can you provide some context for the area within the picture ? Please point out the objects and their coordinates.", "boxes_value": [[290.0016479316, 358.649902336, 478.6657715135, 459.204284672]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049455_crop.jpg", "text": "Can you provide some context for the area within the picture ? Please point out the objects and their coordinates.", "boxes_value": [[48.00164793160002, 25.649902336000025, 236.66577151349998, 126.20428467199997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049455.jpg", "text": "Can you provide some context for the area within the picture ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two potted plants, two people, and a traffic light.", "boxes_value": [[290.0016479316, 358.649902336, 478.6657715135, 459.204284672], [434.2305908508, 367.40930176, 478.6657715135, 448.321228032], [359.9508056867, 376.6942749184, 383.8264160087, 445.0051879936], [348.9802245916, 401.7853393408, 366.3394164782, 459.204284672], [430.6257324227, 389.7673950208, 455.99694823859994, 457.6782226432], [290.0016479316, 358.649902336, 309.9628295806, 374.8682861568]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049455_crop.jpg", "text": "Can you provide some context for the area within the picture ? Please point out the objects and their coordinates. For your reference, objects involved in this region include two potted plants, two people, and a traffic light.", "boxes_value": [[48.00164793160002, 25.649902336000025, 236.66577151349998, 126.20428467199997], [192.2305908508, 34.409301760000005, 236.66577151349998, 115.32122803200002], [117.9508056867, 43.694274918400026, 141.8264160087, 112.00518799359998], [106.9802245916, 68.78533934080002, 124.33941647820001, 126.20428467199997], [188.62573242270003, 56.767395020799995, 213.99694823859994, 124.6782226432], [48.00164793160002, 25.649902336000025, 67.96282958059999, 41.868286156800025]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049456.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Specify the location of each mentioned object.", "boxes_value": [[580.8314209268, 108.613586432, 772.3089599584, 273.142761216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049456_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Specify the location of each mentioned object.", "boxes_value": [[48.83142092679998, 41.613586432000005, 240, 206.142761216]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049456.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Specify the location of each mentioned object. For your reference, objects involved in this region include three people, two hats, and a cell phone.", "boxes_value": [[580.8314209268, 108.613586432, 772.3089599584, 273.142761216], [541.9207763872, 110.7963867136, 688.5511474408, 511.998107904], [674.3641357104001, 168.0740966912, 748.1362304408, 418.47375488], [693.212524434, 118.11578368, 771.9653320276, 494.77923584], [580.8314209268, 108.613586432, 666.0112305016, 148.7321166848], [749.4760741916, 116.7182006784, 772.3089599584, 137.2985839616], [651.7869872955999, 240.9065551872, 684.6678467052001, 273.142761216]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00049456_crop.jpg", "text": "In , can you paint a picture of the area enclosed by coordinates ? Specify the location of each mentioned object. For your reference, objects involved in this region include three people, two hats, and a cell phone.", "boxes_value": [[48.83142092679998, 41.613586432000005, 240, 206.142761216], [9.920776387200021, 43.7963867136, 156.55114744080004, 247], [142.3641357104001, 101.0740966912, 216.13623044079998, 247], [161.212524434, 51.11578368000001, 239.96533202759997, 247], [48.83142092679998, 41.613586432000005, 134.01123050160004, 81.73211668479999], [217.4760741916, 49.718200678399995, 240, 70.2985839616], [119.78698729559994, 173.9065551872, 152.66784670520008, 206.142761216]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00049458.jpg", "text": "I'd like some information about the specific region in the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[159.333557152, 390.8197021696, 683.052856409, 430.146545408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049458_crop.jpg", "text": "I'd like some information about the specific region in the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[131.333557152, 10.81970216960002, 655.052856409, 50.14654540800001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049458.jpg", "text": "I'd like some information about the specific region in the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four lamps, and two air conditioners.", "boxes_value": [[159.333557152, 390.8197021696, 683.052856409, 430.146545408], [159.333557152, 395.588073728, 182.628540017, 429.122619648], [289.887695328, 393.796142592, 309.342895477, 427.3306884608], [512.0858153949999, 393.5401611264, 530.005004914, 430.146545408], [188.196105944, 348.9537964032, 243.826782199, 439.6636962816], [578.911865224, 344.3134155264, 643.093749991, 441.6928711168], [656.644042978, 390.8197021696, 683.052856409, 424.6116333056]], "boxes_seq": [[0], [0], [1, 2, 3, 6], [4, 5]]}, {"image_path": "objects365_v1_00049458_crop.jpg", "text": "I'd like some information about the specific region in the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include four lamps, and two air conditioners.", "boxes_value": [[131.333557152, 10.81970216960002, 655.052856409, 50.14654540800001], [131.333557152, 15.588073727999983, 154.628540017, 49.12261964800001], [261.887695328, 13.796142592000024, 281.342895477, 47.33068846079999], [484.08581539499994, 13.540161126399994, 502.005004914, 50.14654540800001], [160.196105944, 0, 215.826782199, 59], [550.911865224, 0, 615.093749991, 59], [628.644042978, 10.81970216960002, 655.052856409, 44.61163330559998]], "boxes_seq": [[0], [0], [1, 2, 3, 6], [4, 5]]}, {"image_path": "objects365_v1_00049460.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Please point out the objects and their coordinates.", "boxes_value": [[226.0892333697, 356.3255615488, 324.1932373006, 379.9717407232]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049460_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Please point out the objects and their coordinates.", "boxes_value": [[25.089233369699997, 6.32556154880001, 123.1932373006, 29.971740723200014]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049460.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Please point out the objects and their coordinates. For your reference, objects involved in this region include a carriage, two vans, and two horses.", "boxes_value": [[226.0892333697, 356.3255615488, 324.1932373006, 379.9717407232], [110.1005859649, 350.0702514688, 364.34045411610003, 445.5079955968], [226.0892333697, 356.3255615488, 290.6940307359, 379.9717407232], [295.0822143743, 358.191467264, 324.1932373006, 376.8434448384], [245.5588379065, 355.1334838784, 362.7135009684, 445.8532715008], [252.1675414809, 344.3192138752, 358.50793460179995, 440.4461669888]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049460_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Please point out the objects and their coordinates. For your reference, objects involved in this region include a carriage, two vans, and two horses.", "boxes_value": [[25.089233369699997, 6.32556154880001, 123.1932373006, 29.971740723200014], [0, 0.07025146879999511, 147, 35], [25.089233369699997, 6.32556154880001, 89.69403073590001, 29.971740723200014], [94.08221437430001, 8.191467263999982, 123.1932373006, 26.84344483839999], [44.5588379065, 5.1334838784, 147, 35], [51.16754148090001, 0, 147, 35]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049467.jpg", "text": "What's the story in the section of the included visual ? Provide the coordinates for all objects that you mention.", "boxes_value": [[0, 43.9675903476, 209.4345702912, 602.8687744361999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049467_crop.jpg", "text": "What's the story in the section of the included visual ? Provide the coordinates for all objects that you mention.", "boxes_value": [[0, 43.9675903476, 209.4345702912, 602.8687744361999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049467.jpg", "text": "What's the story in the section of the included visual ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a desk, two potted plants, a cabinet, a picture, and a moniter.", "boxes_value": [[0, 43.9675903476, 209.4345702912, 602.8687744361999], [79.356384256, 319.3640747039, 182.9147338752, 533.9444580011], [0, 90.9960326963, 193.0177001984, 258.6906738593], [184.988769536, 159.35253906210002, 242.9538574336, 199.3439331026], [0, 240.7481689223, 102.0619506688, 602.8687744361999], [0, 43.9675903476, 76.013549824, 176.5982665914], [186.5291137536, 214.7108153974, 209.4345702912, 249.62780759560002]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00049467_crop.jpg", "text": "What's the story in the section of the included visual ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a desk, two potted plants, a cabinet, a picture, and a moniter.", "boxes_value": [[0, 43.9675903476, 209.4345702912, 602.8687744361999], [79.356384256, 319.3640747039, 182.9147338752, 533.9444580011], [0, 90.9960326963, 193.0177001984, 258.6906738593], [184.988769536, 159.35253906210002, 242.9538574336, 199.3439331026], [0, 240.7481689223, 102.0619506688, 602.8687744361999], [0, 43.9675903476, 76.013549824, 176.5982665914], [186.5291137536, 214.7108153974, 209.4345702912, 249.62780759560002]], "boxes_seq": [[0], [0], [1], [2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00049468.jpg", "text": "Please provide details for the area within the bounding box in . Please mention the objects and their locations.", "boxes_value": [[495.03674319360005, 126.790161152, 606.7381592064, 387.7984619008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049468_crop.jpg", "text": "Please provide details for the area within the bounding box in . Please mention the objects and their locations.", "boxes_value": [[28.036743193600046, 65.790161152, 139.73815920640004, 326.7984619008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049468.jpg", "text": "Please provide details for the area within the bounding box in . Please mention the objects and their locations. For your reference, objects involved in this region include a couch, two pillows, two potted plants, a vase, and a tea pot.", "boxes_value": [[495.03674319360005, 126.790161152, 606.7381592064, 387.7984619008], [308.4858398208, 266.809265152, 642.640991232, 510.7425537024], [504.52355957759994, 367.055847168, 541.280639616, 403.812927232], [519.4809570048, 313.3295898624, 560.3979492096, 387.7984619008], [513.7525635072, 126.790161152, 593.8305663744001, 257.68255616], [495.03674319360005, 215.3989868032, 525.2093505792, 255.2275390464], [521.1414795264, 234.1594238464, 537.1170653952, 254.2576904192], [586.2631835904, 243.1957397504, 606.7381592064, 259.0638427648]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6], [7]]}, {"image_path": "objects365_v1_00049468_crop.jpg", "text": "Please provide details for the area within the bounding box in . Please mention the objects and their locations. For your reference, objects involved in this region include a couch, two pillows, two potted plants, a vase, and a tea pot.", "boxes_value": [[28.036743193600046, 65.790161152, 139.73815920640004, 326.7984619008], [0, 205.80926515200002, 167, 392], [37.52355957759994, 306.055847168, 74.28063961600003, 342.812927232], [52.48095700479996, 252.32958986239998, 93.39794920960003, 326.7984619008], [46.752563507199966, 65.790161152, 126.83056637440006, 196.68255616], [28.036743193600046, 154.3989868032, 58.20935057919996, 194.2275390464], [54.14147952639996, 173.1594238464, 70.11706539520003, 193.2576904192], [119.26318359039999, 182.1957397504, 139.73815920640004, 198.06384276479997]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5], [6], [7]]}, {"image_path": "objects365_v1_00049469.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give coordinates for the items you reference.", "boxes_value": [[0, 225.891235328, 214.19451905329998, 426.9108276224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049469_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give coordinates for the items you reference.", "boxes_value": [[0, 50.89123532799999, 214.19451905329998, 251.9108276224]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049469.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give coordinates for the items you reference. For your reference, objects involved in this region include a baseball glove, two people, and two helmets.", "boxes_value": [[0, 225.891235328, 214.19451905329998, 426.9108276224], [163.5293579321, 362.9647826944, 214.19451905329998, 396.9729004032], [0.8350219566999999, 226.8308716032, 27.9374389455, 426.9108276224], [0, 293.3344116224, 193.74072264900002, 496.9445190656], [0, 225.891235328, 27.496826169, 286.2730102784], [53.8704833898, 292.5194092032, 116.3344115879, 361.9237060608]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049469_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Give coordinates for the items you reference. For your reference, objects involved in this region include a baseball glove, two people, and two helmets.", "boxes_value": [[0, 50.89123532799999, 214.19451905329998, 251.9108276224], [163.5293579321, 187.96478269440001, 214.19451905329998, 221.97290040320001], [0.8350219566999999, 51.830871603199995, 27.9374389455, 251.9108276224], [0, 118.33441162240001, 193.74072264900002, 302], [0, 50.89123532799999, 27.496826169, 111.27301027840002], [53.8704833898, 117.51940920319998, 116.3344115879, 186.9237060608]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049473.jpg", "text": "Can you provide a description of the area in the image ? Include the coordinates for each object you identify.", "boxes_value": [[309.89807131009997, 268.8032226816, 636.7382812256, 512.0722656256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049473_crop.jpg", "text": "Can you provide a description of the area in the image ? Include the coordinates for each object you identify.", "boxes_value": [[81.89807131009997, 61.803222681600005, 408.73828122559996, 305]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049473.jpg", "text": "Can you provide a description of the area in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two desks, and three chairs.", "boxes_value": [[309.89807131009997, 268.8032226816, 636.7382812256, 512.0722656256], [572.2674560455, 311.2349243392, 682.5898437472, 444.1876220928], [309.89807131009997, 268.8032226816, 419.513305673, 369.2249145344], [327.64678954500005, 410.225708032, 388.60607909529995, 512.0722656256], [377.4549560406, 429.5542602752, 522.4190674067, 512.0722656256], [514.9849853247, 467.4678955008, 636.7382812256, 511.3288574464]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00049473_crop.jpg", "text": "Can you provide a description of the area in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include two desks, and three chairs.", "boxes_value": [[81.89807131009997, 61.803222681600005, 408.73828122559996, 305], [344.2674560455, 104.2349243392, 454.5898437472, 237.18762209279998], [81.89807131009997, 61.803222681600005, 191.513305673, 162.2249145344], [99.64678954500005, 203.225708032, 160.60607909529995, 305], [149.4549560406, 222.55426027520002, 294.41906740670004, 305], [286.9849853247, 260.4678955008, 408.73828122559996, 304.3288574464]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00049474.jpg", "text": "What is taking place within the specified area in this capture ? Provide the coordinates for all objects that you mention.", "boxes_value": [[263.11865233230003, 128.7152710144, 683.1063232247, 213.2418823168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049474_crop.jpg", "text": "What is taking place within the specified area in this capture ? Provide the coordinates for all objects that you mention.", "boxes_value": [[105.11865233230003, 21.715271014400003, 525, 106.2418823168]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049474.jpg", "text": "What is taking place within the specified area in this capture ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include five cars.", "boxes_value": [[263.11865233230003, 128.7152710144, 683.1063232247, 213.2418823168], [65.3453369458, 104.5059204096, 625.1104736200999, 392.540405248], [663.9340820468, 149.7532348416, 682.4777831859001, 213.2418823168], [642.2473144208, 135.9240112128, 683.1063232247, 188.09783936], [592.902221652, 137.1812133888, 632.5040283262, 183.383361792], [263.11865233230003, 128.7152710144, 373.2872314392, 174.1066284032]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049474_crop.jpg", "text": "What is taking place within the specified area in this capture ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include five cars.", "boxes_value": [[105.11865233230003, 21.715271014400003, 525, 106.2418823168], [0, 0, 467.1104736200999, 127], [505.9340820468, 42.753234841600005, 524.4777831859001, 106.2418823168], [484.24731442079997, 28.924011212799996, 525, 81.09783936], [434.902221652, 30.18121338879999, 474.5040283262, 76.38336179199999], [105.11865233230003, 21.715271014400003, 215.2872314392, 67.1066284032]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049476.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Please mention the objects and their locations.", "boxes_value": [[86.1352539146, 388.3831176704, 390.5635986484, 503.9101562368]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049476_crop.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Please mention the objects and their locations.", "boxes_value": [[76.1352539146, 29.383117670399997, 380.5635986484, 144.9101562368]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049476.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Please mention the objects and their locations. For your reference, objects involved in this region include five people.", "boxes_value": [[86.1352539146, 388.3831176704, 390.5635986484, 503.9101562368], [327.20019529120003, 402.7280883712, 390.5635986484, 502.6060790784], [267.0585937796, 399.4295044096, 335.25488277979997, 502.6060790784], [192.9172363466, 394.366577152, 269.7818603668, 503.4498901504], [133.19323727699998, 395.287109376, 196.5993652242, 503.9101562368], [86.1352539146, 388.3831176704, 144.1289062276, 503.4498901504]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049476_crop.jpg", "text": "In the image , what do you observe within the rectangular box defined by the coordinates ? Please mention the objects and their locations. For your reference, objects involved in this region include five people.", "boxes_value": [[76.1352539146, 29.383117670399997, 380.5635986484, 144.9101562368], [317.20019529120003, 43.72808837119999, 380.5635986484, 143.6060790784], [257.0585937796, 40.429504409599986, 325.25488277979997, 143.6060790784], [182.9172363466, 35.36657715199999, 259.7818603668, 144.4498901504], [123.19323727699998, 36.28710937599999, 186.5993652242, 144.9101562368], [76.1352539146, 29.383117670399997, 134.1289062276, 144.4498901504]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049478.jpg", "text": "Regarding the image , what's going on in the section ? Give coordinates for the items you reference.", "boxes_value": [[0.2819824128, 434.41259767279996, 511.9662475776, 647.2760009479001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049478_crop.jpg", "text": "Regarding the image , what's going on in the section ? Give coordinates for the items you reference.", "boxes_value": [[0.2819824128, 53.41259767279996, 511.9662475776, 266]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049478.jpg", "text": "Regarding the image , what's going on in the section ? Give coordinates for the items you reference. For your reference, objects involved in this region include a carpet, two chairs, two cabinets, a plate, and a laptop.", "boxes_value": [[0.2819824128, 434.41259767279996, 511.9662475776, 647.2760009479001], [0.2819824128, 434.41259767279996, 511.9662475776, 647.2760009479001], [46.18139648, 411.36022952139996, 146.692626944, 500.10949708439995], [302.9739379712, 489.6744384594, 414.009765632, 592.0665283413], [381.3970336768, 433.280151373, 491.5316772352, 532.722167956], [130.7814941184, 419.8310546999, 189.872985856, 463.05712890539996], [208.4282836992, 453.41455075370004, 246.537475584, 463.847412102], [336.386840832, 467.6168212914, 384.5349731328, 508.1038818149]], "boxes_seq": [[0], [0], [1], [2, 4], [3, 5], [6], [7]]}, {"image_path": "objects365_v1_00049478_crop.jpg", "text": "Regarding the image , what's going on in the section ? Give coordinates for the items you reference. For your reference, objects involved in this region include a carpet, two chairs, two cabinets, a plate, and a laptop.", "boxes_value": [[0.2819824128, 53.41259767279996, 511.9662475776, 266], [0.2819824128, 53.41259767279996, 511.9662475776, 266], [46.18139648, 30.36022952139996, 146.692626944, 119.10949708439995], [302.9739379712, 108.6744384594, 414.009765632, 211.0665283413], [381.3970336768, 52.280151373000024, 491.5316772352, 151.72216795600002], [130.7814941184, 38.831054699899994, 189.872985856, 82.05712890539996], [208.4282836992, 72.41455075370004, 246.537475584, 82.84741210200002], [336.386840832, 86.61682129140002, 384.5349731328, 127.10388181489998]], "boxes_seq": [[0], [0], [1], [2, 4], [3, 5], [6], [7]]}, {"image_path": "objects365_v1_00049479.jpg", "text": "Help me understand what's happening in the selected bounding box within . Specify the location of each mentioned object.", "boxes_value": [[151.87432862330002, 346.9334106624, 390.767211918, 512.3729248256]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049479_crop.jpg", "text": "Help me understand what's happening in the selected bounding box within . Specify the location of each mentioned object.", "boxes_value": [[59.874328623300016, 41.93341066239998, 298.767211918, 207]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049479.jpg", "text": "Help me understand what's happening in the selected bounding box within . Specify the location of each mentioned object. For your reference, objects involved in this region include a desk, a person, two cups, and a bottle.", "boxes_value": [[151.87432862330002, 346.9334106624, 390.767211918, 512.3729248256], [164.72167971919998, 355.9676513792, 390.767211918, 512.3729248256], [199.3726806409, 243.4833374208, 415.9588622757, 511.8001709056], [151.87432862330002, 346.9334106624, 194.6577758486, 423.8195190272], [217.59960938519998, 403.3578491392, 257.9027710014, 448.0014038016], [364.2702636634, 400.2738647552, 399.3474121218, 446.2840576]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4]]}, {"image_path": "objects365_v1_00049479_crop.jpg", "text": "Help me understand what's happening in the selected bounding box within . Specify the location of each mentioned object. For your reference, objects involved in this region include a desk, a person, two cups, and a bottle.", "boxes_value": [[59.874328623300016, 41.93341066239998, 298.767211918, 207], [72.72167971919998, 50.96765137919999, 298.767211918, 207], [107.3726806409, 0, 323.9588622757, 206.8001709056], [59.874328623300016, 41.93341066239998, 102.65777584860001, 118.81951902719999], [125.59960938519998, 98.35784913920003, 165.90277100140003, 143.00140380160002], [272.2702636634, 95.27386475520001, 307.3474121218, 141.28405759999998]], "boxes_seq": [[0], [0], [1], [2], [3, 5], [4]]}, {"image_path": "objects365_v1_00049480.jpg", "text": "Can you provide some context for the area within the picture ? Give coordinates for the items you reference.", "boxes_value": [[255.00854490970002, 155.165649408, 683.0207519305, 255.8873901568]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049480_crop.jpg", "text": "Can you provide some context for the area within the picture ? Give coordinates for the items you reference.", "boxes_value": [[107.00854490970002, 26.165649408000007, 535, 126.8873901568]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049480.jpg", "text": "Can you provide some context for the area within the picture ? Give coordinates for the items you reference. For your reference, objects involved in this region include five umbrellas, and a street lights.", "boxes_value": [[255.00854490970002, 155.165649408, 683.0207519305, 255.8873901568], [527.2961425579, 215.422729472, 618.3415527392, 255.8873901568], [552.8063965017001, 170.5598144512, 670.2418213157999, 233.4559326208], [625.8187255644, 155.165649408, 683.0207519305, 196.9497680896], [241.2349853683, 220.531799296, 332.90032958349997, 256.8654785024], [255.00854490970002, 234.0678710784, 339.78710935419997, 253.7783203328], [401.8813476541, 157.7050781184, 439.5323486236, 241.1193847808]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00049480_crop.jpg", "text": "Can you provide some context for the area within the picture ? Give coordinates for the items you reference. For your reference, objects involved in this region include five umbrellas, and a street lights.", "boxes_value": [[107.00854490970002, 26.165649408000007, 535, 126.8873901568], [379.2961425579, 86.42272947199999, 470.3415527392, 126.8873901568], [404.8063965017001, 41.5598144512, 522.2418213157999, 104.45593262080001], [477.8187255644, 26.165649408000007, 535, 67.94976808960001], [93.23498536829999, 91.531799296, 184.90032958349997, 127.86547850239998], [107.00854490970002, 105.0678710784, 191.78710935419997, 124.7783203328], [253.8813476541, 28.705078118399996, 291.5323486236, 112.1193847808]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00049481.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Remember to mention the objects and their corresponding locations.", "boxes_value": [[639.8869628571, 73.7944405504, 771.0142241789999, 345.1314770944]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049481_crop.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Remember to mention the objects and their corresponding locations.", "boxes_value": [[32.8869628571, 68.7944405504, 164, 340.1314770944]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049481.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, a helmet, a gloves, and a sneakers.", "boxes_value": [[639.8869628571, 73.7944405504, 771.0142241789999, 345.1314770944], [639.8869628571, 119.8105468928, 704.1148681302, 322.6353759744], [696.5089111107, 121.500732416, 737.9189453253, 233.899536128], [693.973632813, 70.7944946176, 770.8780517781, 363.2003784192], [760.7834690541, 73.7944405504, 771.0142241789999, 112.0093566464], [714.1959636741, 210.69903232, 731.5584661005, 246.8709123584], [699.8560067651999, 288.7395249664, 727.7741474484001, 345.1314770944]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00049481_crop.jpg", "text": "I would like a detailed description of the portion of enclosed by the bbox . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include three people, a helmet, a gloves, and a sneakers.", "boxes_value": [[32.8869628571, 68.7944405504, 164, 340.1314770944], [32.8869628571, 114.8105468928, 97.11486813019997, 317.6353759744], [89.50891111069996, 116.500732416, 130.9189453253, 228.899536128], [86.973632813, 65.7944946176, 163.87805177810003, 358.2003784192], [153.7834690541, 68.7944405504, 164, 107.0093566464], [107.19596367409997, 205.69903232, 124.55846610050003, 241.8709123584], [92.85600676519994, 283.7395249664, 120.77414744840007, 340.1314770944]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6]]}, {"image_path": "objects365_v1_00049482.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Give coordinates for the items you reference.", "boxes_value": [[579.0723876864, 262.1718139904, 763.209838848, 347.7113037312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049482_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Give coordinates for the items you reference.", "boxes_value": [[46.07238768640002, 22.171813990399983, 230.209838848, 107.71130373120002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049482.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Give coordinates for the items you reference. For your reference, objects involved in this region include five sneakers.", "boxes_value": [[579.0723876864, 262.1718139904, 763.209838848, 347.7113037312], [579.0723876864, 317.0283203072, 623.2259521536, 347.7113037312], [629.2592773632, 301.2875976704, 668.3751220992, 317.8906860544], [668.9379883007999, 284.4030761472, 704.114013696, 301.850402816], [736.4760741888, 262.1718139904, 757.5816650496, 288.061401344], [727.7523193344, 297.347900416, 763.209838848, 326.3329467904]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049482_crop.jpg", "text": "I'd like to know more about the area in the given image . Can you describe it? Give coordinates for the items you reference. For your reference, objects involved in this region include five sneakers.", "boxes_value": [[46.07238768640002, 22.171813990399983, 230.209838848, 107.71130373120002], [46.07238768640002, 77.02832030719998, 90.22595215360002, 107.71130373120002], [96.2592773632, 61.287597670399975, 135.37512209919998, 77.89068605440002], [135.93798830079993, 44.40307614720001, 171.11401369600003, 61.850402815999985], [203.4760741888, 22.171813990399983, 224.58166504960002, 48.06140134399999], [194.7523193344, 57.347900416000016, 230.209838848, 86.3329467904]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049483.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Include the coordinates for each object you identify.", "boxes_value": [[120.6767577894, 225.331519744, 666.5988769641, 512.1448974848]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049483_crop.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Include the coordinates for each object you identify.", "boxes_value": [[120.6767577894, 72.33151974399999, 666.5988769641, 359]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049483.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a cabinet, a faucet, a sink, a bakset, and a toiletry.", "boxes_value": [[120.6767577894, 225.331519744, 666.5988769641, 512.1448974848], [120.6767577894, 274.847045888, 666.5988769641, 512.1448974848], [390.8709717071, 167.3674926592, 459.6041260042, 265.0975341568], [253.4452514807, 246.3939209216, 477.3054199292, 367.8925781504], [263.5890894138, 225.331519744, 360.22748100330006, 266.248053248], [548.9584960756999, 262.8607788032, 593.4635009436, 373.233154304]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049483_crop.jpg", "text": "In the photograph , can you describe the objects or scenery enclosed by ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a cabinet, a faucet, a sink, a bakset, and a toiletry.", "boxes_value": [[120.6767577894, 72.33151974399999, 666.5988769641, 359], [120.6767577894, 121.84704588800003, 666.5988769641, 359], [390.8709717071, 14.367492659199996, 459.6041260042, 112.09753415680001], [253.4452514807, 93.3939209216, 477.3054199292, 214.89257815040003], [263.5890894138, 72.33151974399999, 360.22748100330006, 113.24805324800002], [548.9584960756999, 109.86077880319999, 593.4635009436, 220.23315430399998]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049485.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[114.6709595019, 328.7535400448, 522.8228759941, 511.774291968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049485_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[102.6709595019, 46.75354004479999, 510.82287599409995, 229.774291968]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049485.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a soccer, two people, and two sneakers.", "boxes_value": [[114.6709595019, 328.7535400448, 522.8228759941, 511.774291968], [212.9512328909, 476.5245361152, 243.1372070291, 505.1625366016], [114.6709595019, 328.7535400448, 198.2357787845, 510.6632079872], [431.875976542, 304.3093872128, 508.050659192, 511.8001709056], [136.0183105511, 485.296142592, 161.5728149721, 511.774291968], [506.47705075550004, 349.7897949184, 522.8228759941, 379.917480448]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049485_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a soccer, two people, and two sneakers.", "boxes_value": [[102.6709595019, 46.75354004479999, 510.82287599409995, 229.774291968], [200.9512328909, 194.52453611520002, 231.1372070291, 223.16253660159998], [102.6709595019, 46.75354004479999, 186.2357787845, 228.6632079872], [419.875976542, 22.309387212800004, 496.050659192, 229.8001709056], [124.0183105511, 203.29614259200002, 149.5728149721, 229.774291968], [494.47705075550004, 67.78979491839999, 510.82287599409995, 97.91748044799999]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049487.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object.", "boxes_value": [[216.2946167154, 356.8943481344, 613.1262207009, 400.4065551872]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049487_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object.", "boxes_value": [[99.2946167154, 10.89434813439999, 496.12622070090003, 54.40655518720001]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049487.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include five sneakers.", "boxes_value": [[216.2946167154, 356.8943481344, 613.1262207009, 400.4065551872], [216.2946167154, 371.6884765696, 250.6692504571, 400.4065551872], [367.2820434511, 356.8943481344, 393.3894043072, 394.7500000256], [410.3591308756, 364.2914428928, 434.2908935668, 399.536315904], [533.0637207008, 365.5968017408, 564.8276366959, 384.74218752], [587.8890381132001, 356.8943481344, 613.1262207009, 385.177307136]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049487_crop.jpg", "text": "What can you tell me about the selected region in the photo ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include five sneakers.", "boxes_value": [[99.2946167154, 10.89434813439999, 496.12622070090003, 54.40655518720001], [99.2946167154, 25.688476569600027, 133.6692504571, 54.40655518720001], [250.28204345109998, 10.89434813439999, 276.3894043072, 48.7500000256], [293.3591308756, 18.291442892799978, 317.2908935668, 53.53631590399999], [416.0637207008, 19.596801740800004, 447.82763669589997, 38.742187520000016], [470.88903811320006, 10.89434813439999, 496.12622070090003, 39.177307136000024]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049488.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Please point out the objects and their coordinates.", "boxes_value": [[52.383422847999995, 92.8393554432, 403.88598630399997, 217.5233154048]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049488_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Please point out the objects and their coordinates.", "boxes_value": [[52.383422847999995, 31.839355443200006, 403.88598630399997, 156.5233154048]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049488.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, two helmets, and two glasses.", "boxes_value": [[52.383422847999995, 92.8393554432, 403.88598630399997, 217.5233154048], [240.414306624, 90.1865234432, 478.82897952, 415.7186889728], [52.383422847999995, 149.21246336, 108.093261696, 206.911926272], [57.689147968, 201.6062011904, 100.134704576, 217.5233154048], [320.32122803199996, 92.8393554432, 401.896362304, 176.4041747968], [328.942993152, 146.5595703296, 403.88598630399997, 189.0051879936]], "boxes_seq": [[0], [0], [1], [2, 4], [3, 5]]}, {"image_path": "objects365_v1_00049488_crop.jpg", "text": "Could you offer a description of the contents in the selected area of the image ? Please point out the objects and their coordinates. For your reference, objects involved in this region include a person, two helmets, and two glasses.", "boxes_value": [[52.383422847999995, 31.839355443200006, 403.88598630399997, 156.5233154048], [240.414306624, 29.186523443200002, 478.82897952, 187], [52.383422847999995, 88.21246335999999, 108.093261696, 145.911926272], [57.689147968, 140.6062011904, 100.134704576, 156.5233154048], [320.32122803199996, 31.839355443200006, 401.896362304, 115.40417479679999], [328.942993152, 85.5595703296, 403.88598630399997, 128.0051879936]], "boxes_seq": [[0], [0], [1], [2, 4], [3, 5]]}, {"image_path": "objects365_v1_00049490.jpg", "text": "What can be observed in the rectangular region in the photograph ? Provide the coordinates for each element you describe.", "boxes_value": [[403.0826415894, 256.55847168, 606.2849121093, 410.2502441472]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049490_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Provide the coordinates for each element you describe.", "boxes_value": [[51.0826415894, 38.558471680000025, 254.28491210929997, 192.25024414720002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049490.jpg", "text": "What can be observed in the rectangular region in the photograph ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include five people, and a handbag.", "boxes_value": [[403.0826415894, 256.55847168, 606.2849121093, 410.2502441472], [403.0826415894, 256.5720825344, 423.95874026, 325.6886596608], [429.0366210994, 267.2922363392, 465.9929198884, 376.1860961792], [479.2320556982, 256.55847168, 501.1309814286, 355.2723999232], [508.6286620801, 262.6640625152, 555.4281006075, 410.2502441472], [546.5173340043, 268.2333373952, 607.792236352, 465.9431152128], [580.2091064357, 315.4791259648, 606.2849121093, 367.0381469696]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00049490_crop.jpg", "text": "What can be observed in the rectangular region in the photograph ? Provide the coordinates for each element you describe. For your reference, objects involved in this region include five people, and a handbag.", "boxes_value": [[51.0826415894, 38.558471680000025, 254.28491210929997, 192.25024414720002], [51.0826415894, 38.57208253440001, 71.95874026000001, 107.6886596608], [77.03662109940001, 49.292236339199974, 113.99291988840002, 158.18609617919998], [127.23205569819999, 38.558471680000025, 149.13098142860002, 137.2723999232], [156.6286620801, 44.66406251519999, 203.42810060750003, 192.25024414720002], [194.51733400429998, 50.23333739520001, 255.79223635200003, 230], [228.20910643570005, 97.4791259648, 254.28491210929997, 149.0381469696]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5], [6]]}, {"image_path": "objects365_v1_00049491.jpg", "text": "Could you tell me more about the area in the snapshot ? Include the coordinates for each mentioned object.", "boxes_value": [[126.7341308416, 552.6613769472, 370.2326660096, 718.5372314112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049491_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Include the coordinates for each mentioned object.", "boxes_value": [[61.734130841600006, 41.66137694719998, 305.2326660096, 207.53723141119997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049491.jpg", "text": "Could you tell me more about the area in the snapshot ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two sandals, and three leather shoes.", "boxes_value": [[126.7341308416, 552.6613769472, 370.2326660096, 718.5372314112], [126.7341308416, 644.4643554816, 170.8626098688, 700.4129638656], [137.3721923584, 552.6613769472, 187.410766592, 633.4322509824], [250.4514770432, 642.1003418112, 300.8840331776, 657.4665527039999], [276.4557495296, 694.5029296896, 370.2326660096, 718.5372314112], [287.456115712, 587.8935546624, 303.0009765376, 607.609008768]], "boxes_seq": [[0], [0], [1, 5], [2, 3, 4]]}, {"image_path": "objects365_v1_00049491_crop.jpg", "text": "Could you tell me more about the area in the snapshot ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two sandals, and three leather shoes.", "boxes_value": [[61.734130841600006, 41.66137694719998, 305.2326660096, 207.53723141119997], [61.734130841600006, 133.46435548160002, 105.8626098688, 189.41296386559998], [72.37219235840001, 41.66137694719998, 122.41076659199999, 122.43225098239998], [185.4514770432, 131.1003418112, 235.88403317759997, 146.46655270399992], [211.45574952959998, 183.50292968960002, 305.2326660096, 207.53723141119997], [222.45611571199998, 76.89355466239999, 238.00097653760002, 96.60900876799997]], "boxes_seq": [[0], [0], [1, 5], [2, 3, 4]]}, {"image_path": "objects365_v1_00049492.jpg", "text": "Help me understand what's happening in the selected bounding box within . Remember to mention the objects and their corresponding locations.", "boxes_value": [[76.60748289600001, 261.6487426916, 238.405395504, 371.3656615966]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049492_crop.jpg", "text": "Help me understand what's happening in the selected bounding box within . Remember to mention the objects and their corresponding locations.", "boxes_value": [[40.60748289600001, 27.648742691599978, 202.405395504, 137.36566159659998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049492.jpg", "text": "Help me understand what's happening in the selected bounding box within . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, two helmets, and a gloves.", "boxes_value": [[76.60748289600001, 261.6487426916, 238.405395504, 371.3656615966], [130.79907225600002, 84.25543215980001, 414.343444848, 436.06042482199996], [77.540771472, 260.5330199879, 315.327941904, 599.5860595681], [76.60748289600001, 271.97973633050003, 110.640563952, 312.3657226734], [84.159606912, 357.3634643359, 108.496704096, 371.3656615966], [191.032714848, 261.6487426916, 238.405395504, 316.83013915739997]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00049492_crop.jpg", "text": "Help me understand what's happening in the selected bounding box within . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, two helmets, and a gloves.", "boxes_value": [[40.60748289600001, 27.648742691599978, 202.405395504, 137.36566159659998], [94.79907225600002, 0, 242, 164], [41.540771472, 26.53301998789999, 242, 164], [40.60748289600001, 37.97973633050003, 74.640563952, 78.3657226734], [48.159606912, 123.3634643359, 72.496704096, 137.36566159659998], [155.032714848, 27.648742691599978, 202.405395504, 82.83013915739997]], "boxes_seq": [[0], [0], [1, 2], [3, 5], [4]]}, {"image_path": "objects365_v1_00049493.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[125.14672855, 484.667358432, 284.2774048, 719.5909423728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049493_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[40.146728550000006, 59.667358432000015, 199.2774048, 294.5909423728]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049493.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a chair, a desk, a flower, a bowl, and a car.", "boxes_value": [[125.14672855, 484.667358432, 284.2774048, 719.5909423728], [0.594635, 392.9853515824, 262.5140991, 751.5240478448001], [99.5419922, 638.6076659936, 368.4460449, 751.5240478448001], [187.39648440000002, 393.3867187232, 500.39245605, 751.6488037424], [125.14672855, 595.3913574528, 284.2774048, 719.5909423728], [129.76678465, 484.667358432, 153.37976075, 507.397583032]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049493_crop.jpg", "text": "I'm curious about the area in the following image . Can you describe it? Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include a chair, a desk, a flower, a bowl, and a car.", "boxes_value": [[40.146728550000006, 59.667358432000015, 199.2774048, 294.5909423728], [0, 0, 177.5140991, 326.52404784480007], [14.541992199999996, 213.6076659936, 239, 326.52404784480007], [102.39648440000002, 0, 239, 326.6488037424], [40.146728550000006, 170.39135745279998, 199.2774048, 294.5909423728], [44.766784650000005, 59.667358432000015, 68.37976075, 82.397583032]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049494.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[576.5083007623, 127.9188232192, 693.6882323873, 389.8210449408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049494_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Provide the coordinates for all objects that you mention.", "boxes_value": [[29.508300762299996, 65.9188232192, 146.6882323873, 327.8210449408]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049494.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two people, two glasses, and a scissors.", "boxes_value": [[576.5083007623, 127.9188232192, 693.6882323873, 389.8210449408], [492.25378416399997, 79.6063232512, 670.5739745917, 311.4224853504], [571.7901611674, 89.5478515712, 768.2156982073, 512.332153344], [576.5083007623, 127.9188232192, 623.0657958608, 142.8588867072], [659.8647460935999, 223.3192749056, 693.6882323873, 250.3097533952], [617.6373291203, 355.3121337856, 659.5410156265, 389.8210449408]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049494_crop.jpg", "text": "Would you mind describing the rectangular area in the provided image ? Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two people, two glasses, and a scissors.", "boxes_value": [[29.508300762299996, 65.9188232192, 146.6882323873, 327.8210449408], [0, 17.606323251199996, 123.5739745917, 249.4224853504], [24.790161167400015, 27.5478515712, 175, 393], [29.508300762299996, 65.9188232192, 76.0657958608, 80.85888670720001], [112.86474609359993, 161.3192749056, 146.6882323873, 188.3097533952], [70.63732912030002, 293.3121337856, 112.54101562649998, 327.8210449408]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049495.jpg", "text": "Tell me what you see within the designated area in the picture . Please point out the objects and their coordinates.", "boxes_value": [[146.86895752270001, 173.3637085184, 385.5290527663, 441.4031372288]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049495_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Please point out the objects and their coordinates.", "boxes_value": [[59.868957522700015, 67.36370851839999, 298.5290527663, 335.4031372288]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049495.jpg", "text": "Tell me what you see within the designated area in the picture . Please point out the objects and their coordinates. For your reference, objects involved in this region include three people, a street lights, a car, a bus, and a hockey stick.", "boxes_value": [[146.86895752270001, 173.3637085184, 385.5290527663, 441.4031372288], [291.4202880719, 259.9891357184, 317.7061156991, 310.2316284416], [373.0970459229, 299.2514648576, 402.0383300816, 367.889465344], [157.1047363397, 356.4661865472, 221.1292114578, 441.4031372288], [146.86895752270001, 174.0938720768, 162.7918701175, 284.515747072], [361.0217285214, 186.8516845568, 385.5290527663, 202.9954223616], [301.7745971677, 173.3637085184, 321.2453613219, 189.9309082112], [199.32727049800002, 401.565185536, 277.2326050039, 431.1350708224]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00049495_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Please point out the objects and their coordinates. For your reference, objects involved in this region include three people, a street lights, a car, a bus, and a hockey stick.", "boxes_value": [[59.868957522700015, 67.36370851839999, 298.5290527663, 335.4031372288], [204.42028807190002, 153.9891357184, 230.7061156991, 204.23162844159998], [286.0970459229, 193.2514648576, 315.0383300816, 261.889465344], [70.1047363397, 250.46618654719998, 134.1292114578, 335.4031372288], [59.868957522700015, 68.09387207680001, 75.7918701175, 178.515747072], [274.0217285214, 80.8516845568, 298.5290527663, 96.99542236159999], [214.7745971677, 67.36370851839999, 234.24536132190002, 83.9309082112], [112.32727049800002, 295.565185536, 190.2326050039, 325.1350708224]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5], [6], [7]]}, {"image_path": "objects365_v1_00049497.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each mentioned object.", "boxes_value": [[489.71557613539994, 63.580200192, 862.9754639095, 348.2221069312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049497_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each mentioned object.", "boxes_value": [[93.71557613539994, 63.580200192, 466.97546390950004, 348.2221069312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049497.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each mentioned object. For your reference, objects involved in this region include five people.", "boxes_value": [[489.71557613539994, 63.580200192, 862.9754639095, 348.2221069312], [732.4765625107, 220.4064941568, 791.9462890427, 348.2221069312], [487.0527343407, 120.106750464, 553.179565405, 245.2595214848], [489.71557613539994, 156.9425048576, 530.1018066713, 271.0001830912], [834.6273193774999, 80.7136840704, 862.9754639095, 138.9674682368], [818.7398681922999, 63.580200192, 851.137695327, 121.522460928]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049497_crop.jpg", "text": "Describe the image content present in the specified rectangular area of . Include the coordinates for each mentioned object. For your reference, objects involved in this region include five people.", "boxes_value": [[93.71557613539994, 63.580200192, 466.97546390950004, 348.2221069312], [336.4765625107, 220.4064941568, 395.94628904269996, 348.2221069312], [91.0527343407, 120.106750464, 157.17956540499995, 245.2595214848], [93.71557613539994, 156.9425048576, 134.10180667129998, 271.0001830912], [438.62731937749993, 80.7136840704, 466.97546390950004, 138.9674682368], [422.7398681922999, 63.580200192, 455.13769532699996, 121.522460928]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049499.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Give coordinates for the items you reference.", "boxes_value": [[298.928831278, 101.9488525312, 493.85949705400003, 282.5740220928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049499_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Give coordinates for the items you reference.", "boxes_value": [[48.92883127800002, 45.948852531200004, 243.85949705400003, 226.57402209280002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049499.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include three people, and two hats.", "boxes_value": [[298.928831278, 101.9488525312, 493.85949705400003, 282.5740220928], [264.152099596, 190.0193481216, 456.42675778700004, 400.8768921088], [381.717773437, 162.899963392, 467.77978516499996, 280.6689453056], [437.898071303, 101.9488525312, 493.85949705400003, 234.70166016], [298.928831278, 237.9507313152, 365.379742805, 282.5740220928], [381.082229143, 172.1116808192, 427.697998343, 189.697032192]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049499_crop.jpg", "text": "Would you kindly describe the content of the area enclosed by in the image ? Give coordinates for the items you reference. For your reference, objects involved in this region include three people, and two hats.", "boxes_value": [[48.92883127800002, 45.948852531200004, 243.85949705400003, 226.57402209280002], [14.152099596000028, 134.0193481216, 206.42675778700004, 271], [131.717773437, 106.89996339199999, 217.77978516499996, 224.6689453056], [187.898071303, 45.948852531200004, 243.85949705400003, 178.70166016], [48.92883127800002, 181.9507313152, 115.37974280499998, 226.57402209280002], [131.082229143, 116.11168081919999, 177.697998343, 133.697032192]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049500.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Remember to mention the objects and their corresponding locations.", "boxes_value": [[295.16772460659996, 323.3372192256, 766.0924072526, 512.3161620992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049500_crop.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Remember to mention the objects and their corresponding locations.", "boxes_value": [[118.16772460659996, 47.337219225599995, 589, 236]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049500.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include five stools, and two desks.", "boxes_value": [[295.16772460659996, 323.3372192256, 766.0924072526, 512.3161620992], [275.201904273, 333.5739746304, 343.1810302728, 488.071899392], [295.16772460659996, 343.5568847872, 371.2282714812, 512.3161620992], [290.41394040299997, 287.9376220672, 489.5975341988, 450.9923705856], [488.58227542419996, 323.3372192256, 766.0924072526, 511.936279296], [390.5506591684, 390.5342407168, 491.9982910306, 442.49517824], [454.38842776940004, 413.7929687552, 577.610107455, 511.7764892672], [360.3637695398, 441.0106201088, 503.38012692819996, 512.271362304]], "boxes_seq": [[0], [0], [1, 2, 5, 6, 7], [3, 4]]}, {"image_path": "objects365_v1_00049500_crop.jpg", "text": "I'm curious about the area in . Can you provide a description of it? Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include five stools, and two desks.", "boxes_value": [[118.16772460659996, 47.337219225599995, 589, 236], [98.20190427300003, 57.573974630400016, 166.1810302728, 212.07189939199998], [118.16772460659996, 67.55688478719998, 194.2282714812, 236], [113.41394040299997, 11.937622067199982, 312.5975341988, 174.99237058559999], [311.58227542419996, 47.337219225599995, 589, 235.936279296], [213.5506591684, 114.53424071680001, 314.9982910306, 166.49517823999997], [277.38842776940004, 137.79296875519998, 400.61010745500005, 235.77648926720002], [183.3637695398, 165.01062010880003, 326.38012692819996, 236]], "boxes_seq": [[0], [0], [1, 2, 5, 6, 7], [3, 4]]}, {"image_path": "objects365_v1_00049501.jpg", "text": "What is taking place within the specified area in this capture ? Include the coordinates for each object you identify.", "boxes_value": [[7.486633319999999, 39.3695678838, 201.16070556, 248.7551269532]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049501_crop.jpg", "text": "What is taking place within the specified area in this capture ? Include the coordinates for each object you identify.", "boxes_value": [[7.486633319999999, 39.3695678838, 201.16070556, 248.7551269532]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049501.jpg", "text": "What is taking place within the specified area in this capture ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a power outlet, two people, a glasses, a pen, and a bakset.", "boxes_value": [[7.486633319999999, 39.3695678838, 201.16070556, 248.7551269532], [0.16741944, 174.1149597082, 355.095459, 265.943115235], [7.486633319999999, 120.50665282039999, 23.96124268, 141.68829346840002], [130.92932128, 0.1077880944, 246.04022215999998, 211.29913330399998], [26.958221440000003, 70.65960694660001, 153.67303468, 200.6235046256], [154.74499512, 39.3695678838, 201.16070556, 72.5236205932], [13.34780884, 222.5870666376, 61.829711919999994, 248.7551269532], [22.368225080000002, 194.17691040999998, 136.44763183999999, 261.6859130746]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6], [7]]}, {"image_path": "objects365_v1_00049501_crop.jpg", "text": "What is taking place within the specified area in this capture ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a desk, a power outlet, two people, a glasses, a pen, and a bakset.", "boxes_value": [[7.486633319999999, 39.3695678838, 201.16070556, 248.7551269532], [0.16741944, 174.1149597082, 249, 265.943115235], [7.486633319999999, 120.50665282039999, 23.96124268, 141.68829346840002], [130.92932128, 0.1077880944, 246.04022215999998, 211.29913330399998], [26.958221440000003, 70.65960694660001, 153.67303468, 200.6235046256], [154.74499512, 39.3695678838, 201.16070556, 72.5236205932], [13.34780884, 222.5870666376, 61.829711919999994, 248.7551269532], [22.368225080000002, 194.17691040999998, 136.44763183999999, 261.6859130746]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5], [6], [7]]}, {"image_path": "objects365_v1_00049502.jpg", "text": "I'd like some information about the specific region in the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[134.0153198006, 174.2443237376, 247.4468383639, 265.9880981504]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049502_crop.jpg", "text": "I'd like some information about the specific region in the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[29.015319800599997, 23.244323737600013, 142.4468383639, 114.98809815039999]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049502.jpg", "text": "I'd like some information about the specific region in the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two paddles, two people, and a helmet.", "boxes_value": [[134.0153198006, 174.2443237376, 247.4468383639, 265.9880981504], [111.69274904299999, 237.0188598784, 156.2617187243, 260.4886474752], [134.0153198006, 216.2981567488, 212.4122924971, 248.6197509632], [153.04077145230002, 191.811889664, 244.4498291211, 249.5044555776], [163.5303344852, 174.579040512, 247.4468383639, 265.9880981504], [205.20849613069998, 174.2443237376, 230.5018310839, 192.1944580096]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049502_crop.jpg", "text": "I'd like some information about the specific region in the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include two paddles, two people, and a helmet.", "boxes_value": [[29.015319800599997, 23.244323737600013, 142.4468383639, 114.98809815039999], [6.692749042999992, 86.01885987840001, 51.26171872430001, 109.48864747520003], [29.015319800599997, 65.29815674880001, 107.4122924971, 97.6197509632], [48.04077145230002, 40.811889664000006, 139.4498291211, 98.5044555776], [58.5303344852, 23.579040512000006, 142.4468383639, 114.98809815039999], [100.20849613069998, 23.244323737600013, 125.50183108389999, 41.1944580096]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049504.jpg", "text": "What's the story in the section of the included visual ? Please mention the objects and their locations.", "boxes_value": [[119.2171630893, 70.3817138688, 675.9372558658, 357.8683471872]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049504_crop.jpg", "text": "What's the story in the section of the included visual ? Please mention the objects and their locations.", "boxes_value": [[119.2171630893, 70.3817138688, 675.9372558658, 357.8683471872]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049504.jpg", "text": "What's the story in the section of the included visual ? Please mention the objects and their locations. For your reference, objects involved in this region include two people, two barrels, a hat, and an airplane.", "boxes_value": [[119.2171630893, 70.3817138688, 675.9372558658, 357.8683471872], [219.6440429838, 230.6816406016, 286.8504028255, 406.7376098816], [651.0567626867, 213.5013427712, 682.1030273695, 470.3844604416], [183.0477905134, 288.3884277248, 216.3923340091, 328.0288086016], [151.2389526424, 284.7302856192, 181.9288329794, 319.9722290176], [234.1317138582, 229.4077758976, 256.0755615231, 249.459899904], [119.2171630893, 70.3817138688, 675.9372558658, 357.8683471872]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00049504_crop.jpg", "text": "What's the story in the section of the included visual ? Please mention the objects and their locations. For your reference, objects involved in this region include two people, two barrels, a hat, and an airplane.", "boxes_value": [[119.2171630893, 70.3817138688, 675.9372558658, 357.8683471872], [219.6440429838, 230.6816406016, 286.8504028255, 406.7376098816], [651.0567626867, 213.5013427712, 682.1030273695, 429], [183.0477905134, 288.3884277248, 216.3923340091, 328.0288086016], [151.2389526424, 284.7302856192, 181.9288329794, 319.9722290176], [234.1317138582, 229.4077758976, 256.0755615231, 249.459899904], [119.2171630893, 70.3817138688, 675.9372558658, 357.8683471872]], "boxes_seq": [[0], [0], [1, 2], [3, 4], [5], [6]]}, {"image_path": "objects365_v1_00049505.jpg", "text": "Please enlighten me about the area in the photograph . Include the coordinates for each mentioned object.", "boxes_value": [[0.038269056, 204.431762688, 32.2056274176, 512.0181884928]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049505_crop.jpg", "text": "Please enlighten me about the area in the photograph . Include the coordinates for each mentioned object.", "boxes_value": [[0.038269056, 77.43176268799999, 32.2056274176, 385]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049505.jpg", "text": "Please enlighten me about the area in the photograph . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a desk, four people, and a tent.", "boxes_value": [[0.038269056, 204.431762688, 32.2056274176, 512.0181884928], [0, 359.7688598528, 206.93615723520003, 511.0691528192], [17.021850624, 155.3790283264, 187.167236352, 512.040405248], [0.0287475456, 232.4270019584, 19.312927257600002, 300.7250366464], [0.038269056, 459.5968627712, 27.2334594816, 512.0181884928], [0.0825195264, 204.431762688, 32.2056274176, 284.1659546112], [0.46124267520000006, 232.6007079936, 18.57110592, 273.1451416064]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 6], [5]]}, {"image_path": "objects365_v1_00049505_crop.jpg", "text": "Please enlighten me about the area in the photograph . Include the coordinates for each mentioned object. For your reference, objects involved in this region include a desk, four people, and a tent.", "boxes_value": [[0.038269056, 77.43176268799999, 32.2056274176, 385], [0, 232.7688598528, 40, 384.0691528192], [17.021850624, 28.379028326400004, 40, 385], [0.0287475456, 105.42700195840001, 19.312927257600002, 173.72503664639999], [0.038269056, 332.5968627712, 27.2334594816, 385], [0.0825195264, 77.43176268799999, 32.2056274176, 157.1659546112], [0.46124267520000006, 105.6007079936, 18.57110592, 146.1451416064]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 6], [5]]}, {"image_path": "objects365_v1_00049506.jpg", "text": "What does the selected region in the image encompass? Specify the location of each mentioned object.", "boxes_value": [[278.9015502848, 472.2407226232, 512.418334976, 599.9632568616]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049506_crop.jpg", "text": "What does the selected region in the image encompass? Specify the location of each mentioned object.", "boxes_value": [[58.90155028480001, 32.240722623199986, 292, 159.96325686160003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049506.jpg", "text": "What does the selected region in the image encompass? Specify the location of each mentioned object. For your reference, objects involved in this region include a gloves, and four belts.", "boxes_value": [[278.9015502848, 472.2407226232, 512.418334976, 599.9632568616], [404.3457031168, 570.1358642708001, 432.2171631104, 599.9632568616], [381.8786010624, 506.693969708, 417.8631591936, 522.7722167708], [479.1134643712, 472.2407226232, 512.418334976, 486.78771972920003], [278.9015502848, 490.6158446984, 323.6908569088, 510.9049072436], [351.9086914048, 472.87719725119996, 388.4851684352, 481.7060546648]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049506_crop.jpg", "text": "What does the selected region in the image encompass? Specify the location of each mentioned object. For your reference, objects involved in this region include a gloves, and four belts.", "boxes_value": [[58.90155028480001, 32.240722623199986, 292, 159.96325686160003], [184.3457031168, 130.13586427080008, 212.2171631104, 159.96325686160003], [161.87860106239998, 66.693969708, 197.86315919359998, 82.77221677080001], [259.1134643712, 32.240722623199986, 292, 46.787719729200035], [58.90155028480001, 50.6158446984, 103.69085690880001, 70.90490724360001], [131.90869140479998, 32.87719725119996, 168.48516843520002, 41.70605466479998]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5]]}, {"image_path": "objects365_v1_00049508.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Specify the location of each mentioned object.", "boxes_value": [[371.66406247369997, 218.5966186496, 678.8249511798, 331.6289672704]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049508_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Specify the location of each mentioned object.", "boxes_value": [[77.66406247369997, 28.596618649600003, 384.8249511798, 141.62896727039998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049508.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Specify the location of each mentioned object. For your reference, objects involved in this region include a person, a luggage, a trash bin can, and three cars.", "boxes_value": [[371.66406247369997, 218.5966186496, 678.8249511798, 331.6289672704], [375.05993649299995, 259.4567260672, 395.5747070589, 306.0094604288], [371.66406247369997, 272.5092163072, 396.75073245299996, 289.5784911872], [660.6096191608, 304.477539072, 678.8249511798, 331.6289672704], [444.4439697099, 270.9869995008, 578.4119872737, 331.2726440448], [461.5826416336, 218.5966186496, 560.3361816365, 263.3802490368], [449.424560546, 197.3339843584, 548.6702881069, 236.7042236416]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00049508_crop.jpg", "text": "Tell me more about the scenery or objects within the rectangular region in . Specify the location of each mentioned object. For your reference, objects involved in this region include a person, a luggage, a trash bin can, and three cars.", "boxes_value": [[77.66406247369997, 28.596618649600003, 384.8249511798, 141.62896727039998], [81.05993649299995, 69.45672606720001, 101.5747070589, 116.00946042880003], [77.66406247369997, 82.5092163072, 102.75073245299996, 99.57849118719997], [366.6096191608, 114.47753907200001, 384.8249511798, 141.62896727039998], [150.4439697099, 80.98699950079998, 284.4119872737, 141.27264404480002], [167.58264163360002, 28.596618649600003, 266.3361816365, 73.38024903680002], [155.424560546, 7.333984358399988, 254.6702881069, 46.70422364160001]], "boxes_seq": [[0], [0], [1], [2], [3], [4, 5, 6]]}, {"image_path": "objects365_v1_00049509.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Include the coordinates for each mentioned object.", "boxes_value": [[249.7072304736, 379.4411490816, 424.4592163032, 403.9563149312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049509_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Include the coordinates for each mentioned object.", "boxes_value": [[43.70723047359999, 6.441149081599974, 218.45921630319998, 30.956314931199984]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049509.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, and four sneakers.", "boxes_value": [[249.7072304736, 379.4411490816, 424.4592163032, 403.9563149312], [334.031066892, 132.9163818496, 444.313842786, 399.5277709824], [245.9185790976, 160.7713012736, 323.7986450196, 404.0755004928], [303.1419597948, 388.3823737344, 324.3547417392, 403.6877987328], [249.7072304736, 384.3546302976, 289.4476321836, 403.9563149312], [351.74846635919994, 380.7766113792, 391.8239262216, 401.744362496], [393.6839686668, 379.4411490816, 424.4592163032, 400.7297938944]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00049509_crop.jpg", "text": "Give me a comprehensive description of the specified area in the picture . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two people, and four sneakers.", "boxes_value": [[43.70723047359999, 6.441149081599974, 218.45921630319998, 30.956314931199984], [128.031066892, 0, 238.313842786, 26.5277709824], [39.9185790976, 0, 117.79864501959997, 31.075500492800018], [97.14195979480002, 15.382373734400005, 118.3547417392, 30.68779873279999], [43.70723047359999, 11.35463029760001, 83.44763218359998, 30.956314931199984], [145.74846635919994, 7.776611379200006, 185.8239262216, 28.744362496000008], [187.6839686668, 6.441149081599974, 218.45921630319998, 27.729793894400018]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5, 6]]}, {"image_path": "objects365_v1_00049510.jpg", "text": "Help me understand what's happening in the selected bounding box within . Please mention the objects and their locations.", "boxes_value": [[101.01727296, 116.77264406399999, 453.224731456, 285.725463888]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049510_crop.jpg", "text": "Help me understand what's happening in the selected bounding box within . Please mention the objects and their locations.", "boxes_value": [[89.01727296, 42.77264406399999, 441.224731456, 211.72546388799998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049510.jpg", "text": "Help me understand what's happening in the selected bounding box within . Please mention the objects and their locations. For your reference, objects involved in this region include two pictures, a cabinet, and two people.", "boxes_value": [[101.01727296, 116.77264406399999, 453.224731456, 285.725463888], [101.01727296, 116.77264406399999, 210.121459968, 250.05670166399997], [218.96777344, 120.90087892799998, 309.789672832, 246.518188464], [285.45831296, 259.005249024, 324.46984864, 285.725463888], [322.12054444800003, 201.716125488, 391.25646969599995, 283.605346656], [414.389770496, 203.729797344, 453.224731456, 243.42785644799997]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00049510_crop.jpg", "text": "Help me understand what's happening in the selected bounding box within . Please mention the objects and their locations. For your reference, objects involved in this region include two pictures, a cabinet, and two people.", "boxes_value": [[89.01727296, 42.77264406399999, 441.224731456, 211.72546388799998], [89.01727296, 42.77264406399999, 198.121459968, 176.05670166399997], [206.96777344, 46.90087892799998, 297.789672832, 172.518188464], [273.45831296, 185.00524902400002, 312.46984864, 211.72546388799998], [310.12054444800003, 127.71612548799999, 379.25646969599995, 209.605346656], [402.389770496, 129.729797344, 441.224731456, 169.42785644799997]], "boxes_seq": [[0], [0], [1, 2], [3], [4, 5]]}, {"image_path": "objects365_v1_00049512.jpg", "text": "Can you provide some context for the area within the picture ? Include the coordinates for each object you identify.", "boxes_value": [[269.47747801, 270.7059936768, 467.8984375, 397.491821312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049512_crop.jpg", "text": "Can you provide some context for the area within the picture ? Include the coordinates for each object you identify.", "boxes_value": [[50.47747801000003, 31.705993676800006, 248.8984375, 158.491821312]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049512.jpg", "text": "Can you provide some context for the area within the picture ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a chair, a flower, a vase, a desk, and a moniter.", "boxes_value": [[269.47747801, 270.7059936768, 467.8984375, 397.491821312], [355.46655274, 280.2398071296, 467.8984375, 397.491821312], [252.83355714500001, 306.2899780096, 311.483703635, 345.9184570368], [269.47747801, 339.5778808832, 290.876892085, 383.1692505088], [295.01348877, 312.633117696, 358.192626935, 371.0568237056], [306.502807585, 270.7059936768, 353.69726561000004, 304.9532470784]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049512_crop.jpg", "text": "Can you provide some context for the area within the picture ? Include the coordinates for each object you identify. For your reference, objects involved in this region include a chair, a flower, a vase, a desk, and a moniter.", "boxes_value": [[50.47747801000003, 31.705993676800006, 248.8984375, 158.491821312], [136.46655274, 41.239807129600024, 248.8984375, 158.491821312], [33.833557145000015, 67.28997800960002, 92.48370363499998, 106.91845703680002], [50.47747801000003, 100.57788088320001, 71.87689208500001, 144.16925050880002], [76.01348876999998, 73.633117696, 139.19262693500002, 132.05682370559998], [87.50280758500003, 31.705993676800006, 134.69726561000004, 65.95324707840001]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049513.jpg", "text": "What is taking place within the specified area in this capture ? Include the coordinates for each mentioned object.", "boxes_value": [[129.2619018887, 148.7246704128, 307.5988769357, 297.3472289792]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049513_crop.jpg", "text": "What is taking place within the specified area in this capture ? Include the coordinates for each mentioned object.", "boxes_value": [[45.26190188870001, 37.72467041280001, 223.5988769357, 186.3472289792]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049513.jpg", "text": "What is taking place within the specified area in this capture ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two benches, a desk, a person, and an electric drill.", "boxes_value": [[129.2619018887, 148.7246704128, 307.5988769357, 297.3472289792], [98.1890869469, 246.8996581888, 164.7213744865, 278.703552256], [222.84576414469998, 249.0930786304, 257.5741577088, 279.0691528192], [129.2619018887, 218.7514038272, 235.6403808789, 297.3472289792], [271.8510131885, 148.7246704128, 307.5988769357, 244.5675659264], [175.6632690287, 214.407409664, 196.9781494383, 235.4309082112]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049513_crop.jpg", "text": "What is taking place within the specified area in this capture ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include two benches, a desk, a person, and an electric drill.", "boxes_value": [[45.26190188870001, 37.72467041280001, 223.5988769357, 186.3472289792], [14.189086946900005, 135.8996581888, 80.7213744865, 167.70355225600002], [138.84576414469998, 138.0930786304, 173.5741577088, 168.06915281919999], [45.26190188870001, 107.7514038272, 151.6403808789, 186.3472289792], [187.85101318850002, 37.72467041280001, 223.5988769357, 133.5675659264], [91.6632690287, 103.407409664, 112.9781494383, 124.4309082112]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049514.jpg", "text": "Describe the bbox in the provided photo . Give coordinates for the items you reference.", "boxes_value": [[213.8302002176, 243.6766967808, 511.4057006592, 610.1644286976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049514_crop.jpg", "text": "Describe the bbox in the provided photo . Give coordinates for the items you reference.", "boxes_value": [[74.83020021760001, 91.6766967808, 372.4057006592, 458.1644286976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049514.jpg", "text": "Describe the bbox in the provided photo . Give coordinates for the items you reference. For your reference, objects involved in this region include two people, and three sneakers.", "boxes_value": [[213.8302002176, 243.6766967808, 511.4057006592, 610.1644286976], [379.84600832, 243.6766967808, 511.4057006592, 610.1644286976], [213.8302002176, 246.8090820096, 303.4160766464, 504.290161152], [205.7037353472, 465.1336670208, 226.2108764672, 510.1599121152], [243.3533325312, 472.52111815679996, 261.24102784, 503.0793457152], [263.8496704, 460.22338867200006, 282.4827270656, 489.6635741952]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00049514_crop.jpg", "text": "Describe the bbox in the provided photo . Give coordinates for the items you reference. For your reference, objects involved in this region include two people, and three sneakers.", "boxes_value": [[74.83020021760001, 91.6766967808, 372.4057006592, 458.1644286976], [240.84600832, 91.6766967808, 372.4057006592, 458.1644286976], [74.83020021760001, 94.80908200959999, 164.41607664639997, 352.290161152], [66.7037353472, 313.1336670208, 87.2108764672, 358.1599121152], [104.35333253120001, 320.52111815679996, 122.24102784000002, 351.0793457152], [124.84967039999998, 308.22338867200006, 143.4827270656, 337.6635741952]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5]]}, {"image_path": "objects365_v1_00049515.jpg", "text": "Please give me some details about the rectangle in the image . Please mention the objects and their locations.", "boxes_value": [[365.83642581600003, 92.9341430784, 794.987792934, 442.6276855296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049515_crop.jpg", "text": "Please give me some details about the rectangle in the image . Please mention the objects and their locations.", "boxes_value": [[107.83642581600003, 87.9341430784, 536.987792934, 437.6276855296]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049515.jpg", "text": "Please give me some details about the rectangle in the image . Please mention the objects and their locations. For your reference, objects involved in this region include three people, a glasses, and a hat.", "boxes_value": [[365.83642581600003, 92.9341430784, 794.987792934, 442.6276855296], [405.0827636625, 93.8030395392, 726.6799316445, 512.0379638784], [751.4462890305, 195.055542016, 794.987792934, 442.6276855296], [365.83642581600003, 163.9423827968, 389.7380371365, 201.9287719936], [527.6694335970001, 144.0298461696, 604.5351562589999, 180.9075927552], [499.23352053750006, 92.9341430784, 639.6356201445, 154.2489624064]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049515_crop.jpg", "text": "Please give me some details about the rectangle in the image . Please mention the objects and their locations. For your reference, objects involved in this region include three people, a glasses, and a hat.", "boxes_value": [[107.83642581600003, 87.9341430784, 536.987792934, 437.6276855296], [147.08276366249999, 88.8030395392, 468.6799316445, 507], [493.4462890305, 190.055542016, 536.987792934, 437.6276855296], [107.83642581600003, 158.9423827968, 131.7380371365, 196.9287719936], [269.6694335970001, 139.0298461696, 346.5351562589999, 175.9075927552], [241.23352053750006, 87.9341430784, 381.63562014449997, 149.2489624064]], "boxes_seq": [[0], [0], [1, 2, 3], [4], [5]]}, {"image_path": "objects365_v1_00049517.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Please mention the objects and their locations.", "boxes_value": [[111.3311831412, 167.119140608, 390.8989258043, 498.9697092608]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049517_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Please mention the objects and their locations.", "boxes_value": [[70.3311831412, 83.11914060800001, 349.8989258043, 414.9697092608]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049517.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Please mention the objects and their locations. For your reference, objects involved in this region include a cabinet, a person, two leather shoes, and a slippers.", "boxes_value": [[111.3311831412, 167.119140608, 390.8989258043, 498.9697092608], [239.08032226310002, 264.9879760896, 300.48345950000004, 372.3051147264], [290.7539062815, 167.119140608, 390.8989258043, 498.7254028288], [310.7140096105, 470.2721147904, 340.8092651616, 498.161326336], [339.0074126047, 472.7932128768, 385.5725097341, 498.9697092608], [111.3311831412, 363.5478672384, 141.9919014955, 380.1922572288]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049517_crop.jpg", "text": "Help me understand the objects or scenery within the bounding box in the image . Please mention the objects and their locations. For your reference, objects involved in this region include a cabinet, a person, two leather shoes, and a slippers.", "boxes_value": [[70.3311831412, 83.11914060800001, 349.8989258043, 414.9697092608], [198.08032226310002, 180.9879760896, 259.48345950000004, 288.3051147264], [249.7539062815, 83.11914060800001, 349.8989258043, 414.7254028288], [269.7140096105, 386.2721147904, 299.8092651616, 414.161326336], [298.0074126047, 388.7932128768, 344.5725097341, 414.9697092608], [70.3311831412, 279.5478672384, 100.9919014955, 296.1922572288]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5]]}, {"image_path": "objects365_v1_00049519.jpg", "text": "Explain the content within the rectangular region of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[0.523620608, 333.02355955039997, 243.9647826944, 533.6622314389]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049519_crop.jpg", "text": "Explain the content within the rectangular region of the image . Provide the coordinates for all objects that you mention.", "boxes_value": [[0.523620608, 51.023559550399966, 243.9647826944, 251.6622314389]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049519.jpg", "text": "Explain the content within the rectangular region of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a bench, a snowboard, two people, and two sneakers.", "boxes_value": [[0.523620608, 333.02355955039997, 243.9647826944, 533.6622314389], [0.523620608, 450.36120604909996, 235.0818481664, 489.58032228580004], [159.1072997888, 334.7503661805, 357.6702270464, 392.3977050665], [148.9481201152, 356.1050415106, 212.5327758848, 497.2709961209], [50.9717407232, 412.49145506639996, 131.752258304, 533.6622314389], [216.3340453888, 333.02355955039997, 243.9647826944, 351.1649780313], [77.9746703872, 512.177246065, 101.0541381632, 532.4732666276]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00049519_crop.jpg", "text": "Explain the content within the rectangular region of the image . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include a bench, a snowboard, two people, and two sneakers.", "boxes_value": [[0.523620608, 51.023559550399966, 243.9647826944, 251.6622314389], [0.523620608, 168.36120604909996, 235.0818481664, 207.58032228580004], [159.1072997888, 52.75036618050001, 304, 110.39770506650001], [148.9481201152, 74.10504151060002, 212.5327758848, 215.2709961209], [50.9717407232, 130.49145506639996, 131.752258304, 251.6622314389], [216.3340453888, 51.023559550399966, 243.9647826944, 69.1649780313], [77.9746703872, 230.17724606499996, 101.0541381632, 250.47326662759997]], "boxes_seq": [[0], [0], [1], [2], [3, 4], [5, 6]]}, {"image_path": "objects365_v1_00049520.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Include the coordinates for each mentioned object.", "boxes_value": [[159.955078144, 38.460937488, 626.1108398719999, 347.76910401600003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049520_crop.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Include the coordinates for each mentioned object.", "boxes_value": [[116.955078144, 38.460937488, 583.1108398719999, 347.76910401600003]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049520.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a bench, four people, a leather shoes, a handbag, and a hat.", "boxes_value": [[159.955078144, 38.460937488, 626.1108398719999, 347.76910401600003], [473.549438464, 214.95489504, 634.9678954880001, 373.803039552], [97.585510272, 96.86010744, 274.165832512, 342.45593260799996], [341.610656768, 36.266540544, 449.897460928, 479.809326192], [426.578369152, 38.460937488, 467.260009792, 107.19879148800001], [159.955078144, 76.66748044799999, 186.405517568, 103.79614257600001], [192.40942380799999, 335.49383544, 216.409912128, 348.155761728], [218.45745849600002, 279.270446784, 278.76605222399996, 347.76910401600003], [591.928955072, 130.546508784, 626.1108398719999, 162.269287104]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6], [7], [8]]}, {"image_path": "objects365_v1_00049520_crop.jpg", "text": "What can be seen in the bounding box in the context of the provided image ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include a bench, four people, a leather shoes, a handbag, and a hat.", "boxes_value": [[116.955078144, 38.460937488, 583.1108398719999, 347.76910401600003], [430.549438464, 214.95489504, 591.9678954880001, 373.803039552], [54.58551027199999, 96.86010744, 231.165832512, 342.45593260799996], [298.610656768, 36.266540544, 406.897460928, 425], [383.578369152, 38.460937488, 424.260009792, 107.19879148800001], [116.955078144, 76.66748044799999, 143.405517568, 103.79614257600001], [149.40942380799999, 335.49383544, 173.409912128, 348.155761728], [175.45745849600002, 279.270446784, 235.76605222399996, 347.76910401600003], [548.928955072, 130.546508784, 583.1108398719999, 162.269287104]], "boxes_seq": [[0], [0], [1], [2, 3, 4, 5], [6], [7], [8]]}, {"image_path": "objects365_v1_00049523.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[167.7620239468, 425.9233398272, 235.841308598, 450.7066039808]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049523_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about.", "boxes_value": [[17.762023946800014, 6.923339827199982, 85.84130859800001, 31.706603980800026]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049523.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three knives, a plate, and a desk.", "boxes_value": [[167.7620239468, 425.9233398272, 235.841308598, 450.7066039808], [158.50561526279998, 421.145812992, 202.3988647677, 444.4360962048], [167.7620239468, 425.9233398272, 214.34259030200002, 447.4220580864], [189.55932618830002, 431.8952026112, 235.841308598, 450.7066039808], [202.9960327248, 433.3881836032, 233.75115963870002, 450.7066039808], [51.3106079266, 376.166809088, 615.5555420029, 510.5108642816]], "boxes_seq": [[0], [0], [1, 2, 4], [3], [5]]}, {"image_path": "objects365_v1_00049523_crop.jpg", "text": "I'd appreciate a breakdown of the area in the displayed image . Give the top-left and bottom-right coordinates for each item you talk about. For your reference, objects involved in this region include three knives, a plate, and a desk.", "boxes_value": [[17.762023946800014, 6.923339827199982, 85.84130859800001, 31.706603980800026], [8.505615262799978, 2.1458129920000033, 52.39886476769999, 25.43609620479998], [17.762023946800014, 6.923339827199982, 64.34259030200002, 28.4220580864], [39.55932618830002, 12.895202611199977, 85.84130859800001, 31.706603980800026], [52.99603272479999, 14.388183603199991, 83.75115963870002, 31.706603980800026], [0, 0, 102, 37]], "boxes_seq": [[0], [0], [1, 2, 4], [3], [5]]}, {"image_path": "objects365_v1_00049524.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Include the coordinates for each mentioned object.", "boxes_value": [[0, 205.8856201216, 322.6932983731, 319.3413696512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049524_crop.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Include the coordinates for each mentioned object.", "boxes_value": [[0, 28.8856201216, 322.6932983731, 142.3413696512]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049524.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two flowers, three umbrellas, and a street lights.", "boxes_value": [[0, 205.8856201216, 322.6932983731, 319.3413696512], [297.74493404969996, 239.1333618176, 338.1771239962, 269.3035278336], [186.1949462579, 232.043457024, 235.98022460389998, 263.5330810368], [214.4577636925, 207.071899392, 304.7615356601, 317.3889160192], [80.7106322901, 193.892395008, 237.3998413333, 277.3623657472], [0, 208.0481567232, 100.72387698050001, 319.3413696512], [294.1551513445, 205.8856201216, 322.6932983731, 294.2936401408]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00049524_crop.jpg", "text": "Kindly describe the objects or scenery in the bounding box within . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two flowers, three umbrellas, and a street lights.", "boxes_value": [[0, 28.8856201216, 322.6932983731, 142.3413696512], [297.74493404969996, 62.13336181759999, 338.1771239962, 92.30352783360001], [186.1949462579, 55.04345702399999, 235.98022460389998, 86.53308103680001], [214.4577636925, 30.071899392000006, 304.7615356601, 140.3889160192], [80.7106322901, 16.892395007999994, 237.3998413333, 100.36236574719999], [0, 31.04815672320001, 100.72387698050001, 142.3413696512], [294.1551513445, 28.8856201216, 322.6932983731, 117.2936401408]], "boxes_seq": [[0], [0], [1, 2], [3, 4, 5], [6]]}, {"image_path": "objects365_v1_00049525.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Include the coordinates for each mentioned object.", "boxes_value": [[23.5929565184, 576.1398925824001, 449.6477661184, 744.9554443008]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049525_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Include the coordinates for each mentioned object.", "boxes_value": [[23.5929565184, 43.13989258240008, 449.6477661184, 211.95544430079997]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049525.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include six sneakers.", "boxes_value": [[23.5929565184, 576.1398925824001, 449.6477661184, 744.9554443008], [414.9260253696, 714.8632812288, 449.6477661184, 744.9554443008], [308.446044928, 668.484985344, 345.1518554624, 731.3148193536], [251.5148925952, 576.1398925824001, 290.9134521344, 628.2933349632001], [225.2087402496, 561.9676513536, 258.3175659008, 623.758178688], [123.5084838912, 651.9694824192001, 179.2752685568, 681.6601562879999], [23.5929565184, 632.0895995904, 57.9308471808, 696.118164096]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00049525_crop.jpg", "text": "In the photo , what can you see within the region defined by the bounding box ? Include the coordinates for each mentioned object. For your reference, objects involved in this region include six sneakers.", "boxes_value": [[23.5929565184, 43.13989258240008, 449.6477661184, 211.95544430079997], [414.9260253696, 181.86328122880002, 449.6477661184, 211.95544430079997], [308.446044928, 135.48498534400005, 345.1518554624, 198.3148193536], [251.5148925952, 43.13989258240008, 290.9134521344, 95.2933349632001], [225.2087402496, 28.96765135359999, 258.3175659008, 90.75817868800004], [123.5084838912, 118.96948241920006, 179.2752685568, 148.66015628799994], [23.5929565184, 99.08959959039998, 57.9308471808, 163.118164096]], "boxes_seq": [[0], [0], [1, 2, 3, 4, 5, 6]]}, {"image_path": "objects365_v1_00049527.jpg", "text": "Could you describe the content of the bbox in the image ? Include the coordinates for each object you identify.", "boxes_value": [[128.34939461, 56.1691073024, 557.7115478656, 226.280377856]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049527_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Include the coordinates for each object you identify.", "boxes_value": [[107.34939460999999, 43.1691073024, 536.7115478656, 213.280377856]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049527.jpg", "text": "Could you describe the content of the bbox in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include an american football, three helmets, and a gloves.", "boxes_value": [[128.34939461, 56.1691073024, 557.7115478656, 226.280377856], [314.42443850120003, 179.3481445376, 383.7014160112, 215.1810302976], [287.0853829444, 56.1691073024, 363.5714682244, 150.3775781888], [439.124796316, 83.2190642688, 513.7030029472, 153.9066162176], [128.34939461, 167.3651117056, 168.7814400076, 226.280377856], [511.76147459400005, 111.1925048832, 557.7115478656, 136.4326782464]], "boxes_seq": [[0], [0], [1], [2, 3, 5], [4]]}, {"image_path": "objects365_v1_00049527_crop.jpg", "text": "Could you describe the content of the bbox in the image ? Include the coordinates for each object you identify. For your reference, objects involved in this region include an american football, three helmets, and a gloves.", "boxes_value": [[107.34939460999999, 43.1691073024, 536.7115478656, 213.280377856], [293.42443850120003, 166.3481445376, 362.7014160112, 202.1810302976], [266.0853829444, 43.1691073024, 342.5714682244, 137.3775781888], [418.124796316, 70.2190642688, 492.70300294720005, 140.9066162176], [107.34939460999999, 154.3651117056, 147.7814400076, 213.280377856], [490.76147459400005, 98.1925048832, 536.7115478656, 123.4326782464]], "boxes_seq": [[0], [0], [1], [2, 3, 5], [4]]}, {"image_path": "objects365_v1_00049528.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[127.2910071296, 61.42832919359999, 416.3870849536, 238.7406616128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049528_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Remember to mention the objects and their corresponding locations.", "boxes_value": [[72.2910071296, 44.42832919359999, 361.3870849536, 221.7406616128]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049528.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a desk, a glasses, a hat, a bottle, and a plate.", "boxes_value": [[127.2910071296, 61.42832919359999, 416.3870849536, 238.7406616128], [221.4714966016, 126.76788333, 416.3870849536, 238.7406616128], [127.2910071296, 61.42832919359999, 262.11435264, 150.2009023716], [327.0292967424, 123.56913041819999, 489.0392428032, 262.2762760572], [348.21960448, 91.39117431240001, 369.2917480448, 141.27825924180001], [227.3846435328, 167.7868652718, 262.1096191488, 196.72436526840002]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049528_crop.jpg", "text": "Explain what can be found in the bounding box in the context of the image . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include a desk, a glasses, a hat, a bottle, and a plate.", "boxes_value": [[72.2910071296, 44.42832919359999, 361.3870849536, 221.7406616128], [166.4714966016, 109.76788333, 361.3870849536, 221.7406616128], [72.2910071296, 44.42832919359999, 207.11435264, 133.2009023716], [272.0292967424, 106.56913041819999, 433, 245.2762760572], [293.21960448, 74.39117431240001, 314.2917480448, 124.27825924180001], [172.3846435328, 150.7868652718, 207.1096191488, 179.72436526840002]], "boxes_seq": [[0], [0], [1], [2], [3], [4], [5]]}, {"image_path": "objects365_v1_00049529.jpg", "text": "I would like a description of the content within the bbox in . Include the coordinates for each mentioned object.", "boxes_value": [[340.5203857152, 35.6494140416, 667.6259765760001, 223.3079834112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049529_crop.jpg", "text": "I would like a description of the content within the bbox in . Include the coordinates for each mentioned object.", "boxes_value": [[82.5203857152, 35.6494140416, 409.6259765760001, 223.3079834112]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049529.jpg", "text": "I would like a description of the content within the bbox in . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two flowers, and three vases.", "boxes_value": [[340.5203857152, 35.6494140416, 667.6259765760001, 223.3079834112], [577.9675292928, 35.6494140416, 650.8765869312, 113.9774170112], [617.3778076416, 87.375488256, 667.6259765760001, 206.5916748288], [340.5203857152, 116.4406128128, 360.71813967360004, 162.7476806656], [327.71203614719997, 77.0302734336, 375.004394496, 118.903747584], [426.3819580416, 161.0140990976, 461.02856448, 223.3079834112]], "boxes_seq": [[0], [0], [1, 4], [2, 3, 5]]}, {"image_path": "objects365_v1_00049529_crop.jpg", "text": "I would like a description of the content within the bbox in . Include the coordinates for each mentioned object. For your reference, objects involved in this region include two flowers, and three vases.", "boxes_value": [[82.5203857152, 35.6494140416, 409.6259765760001, 223.3079834112], [319.9675292928, 35.6494140416, 392.8765869312, 113.9774170112], [359.37780764160004, 87.375488256, 409.6259765760001, 206.5916748288], [82.5203857152, 116.4406128128, 102.71813967360004, 162.7476806656], [69.71203614719997, 77.0302734336, 117.00439449599997, 118.903747584], [168.3819580416, 161.0140990976, 203.02856448, 223.3079834112]], "boxes_seq": [[0], [0], [1, 4], [2, 3, 5]]}, {"image_path": "objects365_v1_00049531.jpg", "text": "Tell me what you see within the designated area in the picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[13.1321411328, 256.4244995072, 129.1852416768, 344.5963134976]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049531_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Remember to mention the objects and their corresponding locations.", "boxes_value": [[13.1321411328, 22.424499507199982, 129.1852416768, 110.59631349760002]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049531.jpg", "text": "Tell me what you see within the designated area in the picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, a trash bin can, a street lights, and two cars.", "boxes_value": [[13.1321411328, 256.4244995072, 129.1852416768, 344.5963134976], [33.52392576, 259.9990844928, 71.6522827008, 344.5963134976], [69.2692260864, 256.4244995072, 99.6527709696, 343.1069336064], [114.91174318080002, 274.566040064, 129.1852416768, 299.391662592], [97.92449948160001, 186.2263794176, 114.5192871168, 309.6167602688], [101.633483904, 250.6746215936, 129.852294912, 266.0458374144], [13.1321411328, 256.7148437504, 49.4159546112, 270.8920288256]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00049531_crop.jpg", "text": "Tell me what you see within the designated area in the picture . Remember to mention the objects and their corresponding locations. For your reference, objects involved in this region include two people, a trash bin can, a street lights, and two cars.", "boxes_value": [[13.1321411328, 22.424499507199982, 129.1852416768, 110.59631349760002], [33.52392576, 25.999084492800023, 71.6522827008, 110.59631349760002], [69.2692260864, 22.424499507199982, 99.6527709696, 109.10693360639999], [114.91174318080002, 40.56604006399999, 129.1852416768, 65.39166259199999], [97.92449948160001, 0, 114.5192871168, 75.61676026880002], [101.633483904, 16.674621593599994, 129.852294912, 32.04583741440001], [13.1321411328, 22.714843750400007, 49.4159546112, 36.89202882559999]], "boxes_seq": [[0], [0], [1, 2], [3], [4], [5, 6]]}, {"image_path": "objects365_v1_00049532.jpg", "text": "In the displayed image , help me understand the region defined by . Specify the location of each mentioned object.", "boxes_value": [[491.8056640336, 106.4652099584, 655.2321777106, 414.4747924992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049532_crop.jpg", "text": "In the displayed image , help me understand the region defined by . Specify the location of each mentioned object.", "boxes_value": [[41.80566403360001, 77.4652099584, 205.2321777106, 385.4747924992]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049532.jpg", "text": "In the displayed image , help me understand the region defined by . Specify the location of each mentioned object. For your reference, objects involved in this region include a lamp, two people, and two bicycles.", "boxes_value": [[491.8056640336, 106.4652099584, 655.2321777106, 414.4747924992], [537.6501464712, 106.4652099584, 595.7159424076, 135.4981689344], [635.2532959122, 367.971557632, 655.2321777106, 392.4116821504], [491.9913330206, 250.914978048, 504.212280264, 275.9810790912], [491.8056640336, 367.3331298816, 526.9514160092, 414.4747924992], [561.0240478262, 353.1481933824, 581.9492187418, 396.261169408]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049532_crop.jpg", "text": "In the displayed image , help me understand the region defined by . Specify the location of each mentioned object. For your reference, objects involved in this region include a lamp, two people, and two bicycles.", "boxes_value": [[41.80566403360001, 77.4652099584, 205.2321777106, 385.4747924992], [87.65014647119995, 77.4652099584, 145.71594240759998, 106.49816893440001], [185.25329591219997, 338.971557632, 205.2321777106, 363.4116821504], [41.991333020599996, 221.914978048, 54.212280264000015, 246.9810790912], [41.80566403360001, 338.3331298816, 76.95141600919999, 385.4747924992], [111.02404782619999, 324.1481933824, 131.9492187418, 367.261169408]], "boxes_seq": [[0], [0], [1], [2, 3], [4, 5]]}, {"image_path": "objects365_v1_00049533.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each mentioned object.", "boxes_value": [[462.5501709195, 234.0207519744, 765.5751953145, 301.9854736384]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049533_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each mentioned object.", "boxes_value": [[76.55017091949998, 17.020751974400014, 379, 84.98547363839998]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049533.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include four chairs, and two desks.", "boxes_value": [[462.5501709195, 234.0207519744, 765.5751953145, 301.9854736384], [464.42761227000005, 240.779663104, 504.23022459899994, 253.1710204928], [531.265869126, 240.4041747968, 580.455810531, 337.2821044736], [587.2147217009999, 234.0207519744, 630.7722168075001, 301.9854736384], [620.6339110995, 241.15515136, 765.5751953145, 253.5465088], [462.5501709195, 252.4200439296, 544.7836913895001, 264.0604248064], [477.569946294, 255.4240112128, 524.5069580325, 273.4478149632]], "boxes_seq": [[0], [0], [1, 2, 3, 6], [4, 5]]}, {"image_path": "objects365_v1_00049533_crop.jpg", "text": "Describe the visual elements within the selected area of the image . Include the coordinates for each mentioned object. For your reference, objects involved in this region include four chairs, and two desks.", "boxes_value": [[76.55017091949998, 17.020751974400014, 379, 84.98547363839998], [78.42761227000005, 23.779663104000008, 118.23022459899994, 36.17102049280001], [145.26586912599998, 23.404174796799992, 194.455810531, 101], [201.21472170099992, 17.020751974400014, 244.77221680750006, 84.98547363839998], [234.63391109949998, 24.15515135999999, 379, 36.5465088], [76.55017091949998, 35.42004392960001, 158.78369138950006, 47.06042480640002], [91.56994629399998, 38.424011212799996, 138.50695803250005, 56.44781496320002]], "boxes_seq": [[0], [0], [1, 2, 3, 6], [4, 5]]}, {"image_path": "objects365_v1_00049534.jpg", "text": "Please explain what is contained in the portion of defined by the box . Provide the coordinates for all objects that you mention.", "boxes_value": [[403.150512663, 237.7738647552, 554.460937488, 491.0952758784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049534_crop.jpg", "text": "Please explain what is contained in the portion of defined by the box . Provide the coordinates for all objects that you mention.", "boxes_value": [[38.15051266299997, 63.77386475520001, 189.460937488, 317.0952758784]], "boxes_seq": [[0]]}, {"image_path": "objects365_v1_00049534.jpg", "text": "Please explain what is contained in the portion of defined by the box . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three boats, two sailboats, and a truck.", "boxes_value": [[403.150512663, 237.7738647552, 554.460937488, 491.0952758784], [401.771850624, 304.7564697088, 438.93811031399997, 343.151367168], [403.150512663, 326.4373169152, 437.802124023, 345.172363264], [456.901000956, 316.110229504, 485.79382320749994, 343.3970947072], [468.1032714945, 237.7738647552, 507.5794678095, 365.5503540224], [468.94201657650007, 357.5639037952, 512.4733886925, 491.0952758784], [514.7513428095, 334.1616821248, 554.460937488, 350.5750732288]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}, {"image_path": "objects365_v1_00049534_crop.jpg", "text": "Please explain what is contained in the portion of defined by the box . Provide the coordinates for all objects that you mention. For your reference, objects involved in this region include three boats, two sailboats, and a truck.", "boxes_value": [[38.15051266299997, 63.77386475520001, 189.460937488, 317.0952758784], [36.771850624000024, 130.7564697088, 73.93811031399997, 169.15136716799998], [38.15051266299997, 152.43731691519997, 72.80212402299998, 171.172363264], [91.90100095600002, 142.11022950400002, 120.79382320749994, 169.39709470719998], [103.1032714945, 63.77386475520001, 142.5794678095, 191.55035402239997], [103.94201657650007, 183.56390379520002, 147.47338869249995, 317.0952758784], [149.7513428095, 160.1616821248, 189.460937488, 176.57507322880002]], "boxes_seq": [[0], [0], [1, 2, 3], [4, 5], [6]]}]