{ "_name_or_path": "dandelin/vilt-b32-mlm", "architectures": [ "ViltForQuestionAnswering" ], "attention_probs_dropout_prob": 0.0, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 768, "id2label": { "0": "sky", "1": "snowboarder", "2": "windows", "3": "resting", "4": "doughnut", "5": "plate", "6": "soccer ball", "7": "picnic table", "8": "10", "9": "out", "10": "lady", "11": "woman", "12": "white", "13": "girl", "14": "tent", "15": "security", "16": "green", "17": "platform", "18": "brick", "19": "tv", "20": "king", "21": "gray and black", "22": "roof", "23": "bedroom", "24": "shelter", "25": "street", "26": "0", "27": "clock", "28": "happy", "29": "jeep", "30": "snowboarding", "31": "no", "32": "yellow", "33": "dirt", "34": "not there", "35": "not sure", "36": "table", "37": "window", "38": "large", "39": "hair", "40": "protection", "41": "in car", "42": "sleeping", "43": "at table", "44": "curtain", "45": "brown", "46": "exit", "47": "ball", "48": "lying down", "49": "human", "50": "french", "51": "train", "52": "neon", "53": "skateboarding", "54": "skier", "55": "plastic", "56": "black and white", "57": "low", "58": "2", "59": "bicycles", "60": "wine tasting", "61": "wine", "62": "full", "63": "soccer", "64": "suv", "65": "ice cream", "66": "sun", "67": "down", "68": "yes", "69": "8:35", "70": "woods", "71": "cage", "72": "red", "73": "desert", "74": "tabby", "75": "9:35", "76": "don't know", "77": "solid", "78": "on street", "79": "shadows", "80": "giraffes", "81": "beige", "82": "hat", "83": "fence", "84": "calico", "85": "car", "86": "park", "87": "black", "88": "bicycle", "89": "clock tower", "90": "name tag", "91": "tired", "92": "clear", "93": "shade", "94": "man", "95": "sidewalk", "96": "purple", "97": "walking", "98": "cloudy", "99": "bus", "100": "bike rack", "101": "canopy", "102": "red and yellow", "103": "big ben", "104": "shadow", "105": "5", "106": "necklace", "107": "boy", "108": "little girl", "109": "stripes", "110": "cat", "111": "crossing", "112": "tower", "113": "red and blue", "114": "style", "115": "screen", "116": "2010", "117": "talking on phone", "118": "trees", "119": "church", "120": "door", "121": "7", "122": "3", "123": "many", "124": "station", "125": "skiing", "126": "8", "127": "outside", "128": "unknown", "129": "blue", "130": "bricks", "131": "2000", "132": "snow", "133": "leather", "134": "giraffe", "135": "smile", "136": "hawaii", "137": "wall", "138": "chopsticks", "139": "right", "140": "curtains", "141": "bikes", "142": "wedding", "143": "net", "144": "white and black", "145": "double", "146": "chair", "147": "fashion", "148": "2013", "149": "6", "150": "donut", "151": "camera", "152": "7:45", "153": "women", "154": "blonde", "155": "person", "156": "queen", "157": "rack", "158": "photographer", "159": "watching", "160": "7:35", "161": "plain", "162": "forest", "163": "nothing", "164": "lanyard", "165": "monitor", "166": "they aren't", "167": "backpack", "168": "blue and white", "169": "4", "170": "small", "171": "tan", "172": "beagle", "173": "1", "174": "birthday", "175": "air", "176": "stand", "177": "africa", "178": "crown", "179": "talking", "180": "white and blue", "181": "can't tell", "182": "zoo", "183": "pink", "184": "natural", "185": "laying down", "186": "cup", "187": "lg", "188": "cross", "189": "snowboard", "190": "orange", "191": "skateboard", "192": "shrimp", "193": "ground", "194": "arrow", "195": "on road", "196": "smiling", "197": "gray", "198": "dog" }, "image_size": 384, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "0": 26, "1": 173, "10": 8, "2": 58, "2000": 131, "2010": 116, "2013": 148, "3": 122, "4": 169, "5": 105, "6": 149, "7": 121, "7:35": 160, "7:45": 152, "8": 126, "8:35": 69, "9:35": 75, "africa": 177, "air": 175, "arrow": 194, "at table": 43, "backpack": 167, "ball": 47, "beagle": 172, "bedroom": 23, "beige": 81, "bicycle": 88, "bicycles": 59, "big ben": 103, "bike rack": 100, "bikes": 141, "birthday": 174, "black": 87, "black and white": 56, "blonde": 154, "blue": 129, "blue and white": 168, "boy": 107, "brick": 18, "bricks": 130, "brown": 45, "bus": 99, "cage": 71, "calico": 84, "camera": 151, "can't tell": 181, "canopy": 101, "car": 85, "cat": 110, "chair": 146, "chopsticks": 138, "church": 119, "clear": 92, "clock": 27, "clock tower": 89, "cloudy": 98, "cross": 188, "crossing": 111, "crown": 178, "cup": 186, "curtain": 44, "curtains": 140, "desert": 73, "dirt": 33, "dog": 198, "don't know": 76, "donut": 150, "door": 120, "double": 145, "doughnut": 4, "down": 67, "exit": 46, "fashion": 147, "fence": 83, "forest": 162, "french": 50, "full": 62, "giraffe": 134, "giraffes": 80, "girl": 13, "gray": 197, "gray and black": 21, "green": 16, "ground": 193, "hair": 39, "happy": 28, "hat": 82, "hawaii": 136, "human": 49, "ice cream": 65, "in car": 41, "jeep": 29, "king": 20, "lady": 10, "lanyard": 164, "large": 38, "laying down": 185, "leather": 133, "lg": 187, "little girl": 108, "low": 57, "lying down": 48, "man": 94, "many": 123, "monitor": 165, "name tag": 90, "natural": 184, "necklace": 106, "neon": 52, "net": 143, "no": 31, "not sure": 35, "not there": 34, "nothing": 163, "on road": 195, "on street": 78, "orange": 190, "out": 9, "outside": 127, "park": 86, "person": 155, "photographer": 158, "picnic table": 7, "pink": 183, "plain": 161, "plastic": 55, "plate": 5, "platform": 17, "protection": 40, "purple": 96, "queen": 156, "rack": 157, "red": 72, "red and blue": 113, "red and yellow": 102, "resting": 3, "right": 139, "roof": 22, "screen": 115, "security": 15, "shade": 93, "shadow": 104, "shadows": 79, "shelter": 24, "shrimp": 192, "sidewalk": 95, "skateboard": 191, "skateboarding": 53, "skier": 54, "skiing": 125, "sky": 0, "sleeping": 42, "small": 170, "smile": 135, "smiling": 196, "snow": 132, "snowboard": 189, "snowboarder": 1, "snowboarding": 30, "soccer": 63, "soccer ball": 6, "solid": 77, "stand": 176, "station": 124, "street": 25, "stripes": 109, "style": 114, "sun": 66, "suv": 64, "tabby": 74, "table": 36, "talking": 179, "talking on phone": 117, "tan": 171, "tent": 14, "they aren't": 166, "tired": 91, "tower": 112, "train": 51, "trees": 118, "tv": 19, "unknown": 128, "walking": 97, "wall": 137, "watching": 159, "wedding": 142, "white": 12, "white and black": 144, "white and blue": 180, "window": 37, "windows": 2, "wine": 61, "wine tasting": 60, "woman": 11, "women": 153, "woods": 70, "yellow": 32, "yes": 68, "zoo": 182 }, "layer_norm_eps": 1e-12, "max_image_length": -1, "max_position_embeddings": 40, "modality_type_vocab_size": 2, "model_type": "vilt", "num_attention_heads": 12, "num_channels": 3, "num_hidden_layers": 12, "num_images": -1, "patch_size": 32, "qkv_bias": true, "tie_word_embeddings": false, "torch_dtype": "float32", "transformers_version": "4.34.1", "type_vocab_size": 2, "vocab_size": 30522 }