{ "_name_or_path": "MCG-NJU/videomae-base", "architectures": [ "VideoMAEForVideoClassification" ], "attention_probs_dropout_prob": 0.0, "decoder_hidden_size": 384, "decoder_intermediate_size": 1536, "decoder_num_attention_heads": 6, "decoder_num_hidden_layers": 4, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 768, "id2label": { "0": "001", "1": "002", "2": "003", "3": "004", "4": "005", "5": "006", "6": "007", "7": "008", "8": "009", "9": "010", "10": "011", "11": "012", "12": "013", "13": "014", "14": "015", "15": "016", "16": "017", "17": "018", "18": "019", "19": "020", "20": "021", "21": "022", "22": "023", "23": "024", "24": "025", "25": "026", "26": "027", "27": "028", "28": "029", "29": "030", "30": "031", "31": "032", "32": "033", "33": "034", "34": "035", "35": "036", "36": "037", "37": "038", "38": "039", "39": "040", "40": "041", "41": "042", "42": "043", "43": "044", "44": "045", "45": "046", "46": "047", "47": "048", "48": "049", "49": "050", "50": "051", "51": "052", "52": "053", "53": "054", "54": "055", "55": "056", "56": "057", "57": "058", "58": "059", "59": "060", "60": "061", "61": "062", "62": "063", "63": "064", "64": "065", "65": "066", "66": "067", "67": "068", "68": "069", "69": "070", "70": "071", "71": "072", "72": "073", "73": "074", "74": "075", "75": "076", "76": "077", "77": "078", "78": "079", "79": "080", "80": "081", "81": "082", "82": "083", "83": "084", "84": "085", "85": "086", "86": "087", "87": "088", "88": "089", "89": "090", "90": "091", "91": "092", "92": "093", "93": "094", "94": "095", "95": "096", "96": "097", "97": "098", "98": "099", "99": "100", "100": "101", "101": "102", "102": "103", "103": "104", "104": "105", "105": "106", "106": "107", "107": "108", "108": "109", "109": "110", "110": "111", "111": "112", "112": "113", "113": "114", "114": "115", "115": "116", "116": "117", "117": "118", "118": "119", "119": "120", "120": "121", "121": "122", "122": "123", "123": "124" }, "image_size": 224, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "001": 0, "002": 1, "003": 2, "004": 3, "005": 4, "006": 5, "007": 6, "008": 7, "009": 8, "010": 9, "011": 10, "012": 11, "013": 12, "014": 13, "015": 14, "016": 15, "017": 16, "018": 17, "019": 18, "020": 19, "021": 20, "022": 21, "023": 22, "024": 23, "025": 24, "026": 25, "027": 26, "028": 27, "029": 28, "030": 29, "031": 30, "032": 31, "033": 32, "034": 33, "035": 34, "036": 35, "037": 36, "038": 37, "039": 38, "040": 39, "041": 40, "042": 41, "043": 42, "044": 43, "045": 44, "046": 45, "047": 46, "048": 47, "049": 48, "050": 49, "051": 50, "052": 51, "053": 52, "054": 53, "055": 54, "056": 55, "057": 56, "058": 57, "059": 58, "060": 59, "061": 60, "062": 61, "063": 62, "064": 63, "065": 64, "066": 65, "067": 66, "068": 67, "069": 68, "070": 69, "071": 70, "072": 71, "073": 72, "074": 73, "075": 74, "076": 75, "077": 76, "078": 77, "079": 78, "080": 79, "081": 80, "082": 81, "083": 82, "084": 83, "085": 84, "086": 85, "087": 86, "088": 87, "089": 88, "090": 89, "091": 90, "092": 91, "093": 92, "094": 93, "095": 94, "096": 95, "097": 96, "098": 97, "099": 98, "100": 99, "101": 100, "102": 101, "103": 102, "104": 103, "105": 104, "106": 105, "107": 106, "108": 107, "109": 108, "110": 109, "111": 110, "112": 111, "113": 112, "114": 113, "115": 114, "116": 115, "117": 116, "118": 117, "119": 118, "120": 119, "121": 120, "122": 121, "123": 122, "124": 123 }, "layer_norm_eps": 1e-12, "model_type": "videomae", "norm_pix_loss": true, "num_attention_heads": 12, "num_channels": 3, "num_frames": 16, "num_hidden_layers": 12, "patch_size": 16, "problem_type": "single_label_classification", "qkv_bias": true, "torch_dtype": "float32", "transformers_version": "4.33.2", "tubelet_size": 2, "use_mean_pooling": false }