Spaces:

spark-ds549
/

Epik

Sleeping

App Files Files Community

Minh Q. Le commited on Dec 6, 2023

Commit

b63a495

1 Parent(s): 9250417

Push data

Browse files

Files changed (34) hide show

Model/COSMIC/feature_extraction/comet/config/atomic/changes.json +16 -0
Model/COSMIC/feature_extraction/comet/config/atomic/config_0.json +79 -0
Model/COSMIC/feature_extraction/comet/config/atomic/default.json +16 -0
Model/COSMIC/feature_extraction/comet/config/atomic/eval_changes.json +25 -0
Model/COSMIC/feature_extraction/comet/config/conceptnet/changes.json +16 -0
Model/COSMIC/feature_extraction/comet/config/conceptnet/config_0.json +61 -0
Model/COSMIC/feature_extraction/comet/config/conceptnet/default.json +23 -0
Model/COSMIC/feature_extraction/comet/config/conceptnet/eval_changes.json +17 -0
Model/COSMIC/feature_extraction/comet/config/default.json +54 -0
Model/COSMIC/feature_extraction/comet/data/atomic/README.md +33 -0
Model/COSMIC/feature_extraction/comet/data/atomic/sap2019atomic.pdf +0 -0
Model/COSMIC/feature_extraction/comet/model/encoder_bpe_40000.json +0 -0
Model/COSMIC/feature_extraction/comet/model/params_0.npy +3 -0
Model/COSMIC/feature_extraction/comet/model/params_1.npy +3 -0
Model/COSMIC/feature_extraction/comet/model/params_2.npy +3 -0
Model/COSMIC/feature_extraction/comet/model/params_3.npy +3 -0
Model/COSMIC/feature_extraction/comet/model/params_4.npy +3 -0
Model/COSMIC/feature_extraction/comet/model/params_5.npy +3 -0
Model/COSMIC/feature_extraction/comet/model/params_6.npy +3 -0
Model/COSMIC/feature_extraction/comet/model/params_7.npy +3 -0
Model/COSMIC/feature_extraction/comet/model/params_8.npy +3 -0
Model/COSMIC/feature_extraction/comet/model/params_9.npy +3 -0
Model/COSMIC/feature_extraction/comet/model/params_shapes.json +1 -0
Model/COSMIC/feature_extraction/comet/model/vocab_40000.bpe +0 -0
Model/COSMIC/feature_extraction/epik-bin/input0/dict.txt +0 -0
Model/COSMIC/feature_extraction/epik-bin/input0/train.bin +3 -0
Model/COSMIC/feature_extraction/epik-bin/input0/train.idx +0 -0
Model/COSMIC/feature_extraction/epik-bin/input0/valid.bin +3 -0
Model/COSMIC/feature_extraction/epik-bin/input0/valid.idx +0 -0
Model/COSMIC/feature_extraction/epik-bin/label/dict.txt +20 -0
Model/COSMIC/feature_extraction/epik-bin/label/train.bin +3 -0
Model/COSMIC/feature_extraction/epik-bin/label/train.idx +0 -0
Model/COSMIC/feature_extraction/epik-bin/label/valid.bin +3 -0
Model/COSMIC/feature_extraction/epik-bin/label/valid.idx +0 -0

Model/COSMIC/feature_extraction/comet/config/atomic/changes.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "base": {
+        "0": {
+            "gpu_index": 0
+        },
+        "1": {
+            "gpu_index": 1
+        },
+        "2": {
+            "gpu_index": 2
+        },
+        "3": {
+            "gpu_index": 3
+        }
+    }
+}

Model/COSMIC/feature_extraction/comet/config/atomic/config_0.json ADDED Viewed

	@@ -0,0 +1,79 @@

+{
+    "gpu_mode": "T",
+    "gpu_index": 0,
+    "gpu_indices": [
+        0,
+        1
+    ],
+    "multigpu": "F",
+    "topk_size": 10,
+    "beam_size": 1,
+    "gen_seqlength": 40,
+    "eval_sampler": "greedy",
+    "num_sequences": 1,
+    "generate_sequences": "full",
+    "evaluate_sequences": "full",
+    "random_seed": 123,
+    "optimizer": "adam",
+    "batch_size": 64,
+    "learning_rate": 6.25e-05,
+    "clip": 1,
+    "loss": "nll",
+    "weight_decay": 0,
+    "adam": {
+        "b2": 0.999,
+        "b1": 0.9,
+        "e": 1e-08
+    },
+    "model": "transformer",
+    "pretrain": "gpt",
+    "hidden_dim": 768,
+    "num_layers": 12,
+    "num_heads": 12,
+    "embedding_dropout": 0.1,
+    "attention_dropout": 0.1,
+    "residual_dropout": 0.1,
+    "output_dropout": 0.1,
+    "activation": "gelu",
+    "init": "pt",
+    "trainer": "iteration",
+    "iterations": 50000,
+    "cycle": 500,
+    "save_strategy": "best",
+    "epochs": 20,
+    "toy": "F",
+    "do_gen": "F",
+    "save": "T",
+    "test_save": "F",
+    "dataset": "atomic",
+    "categories": [
+        "oReact",
+        "oEffect",
+        "oWant",
+        "xAttr",
+        "xEffect",
+        "xIntent",
+        "xNeed",
+        "xReact",
+        "xWant"
+    ],
+    "eval_categories": [
+        "oReact",
+        "oEffect",
+        "oWant",
+        "xAttr",
+        "xEffect",
+        "xIntent",
+        "xNeed",
+        "xReact",
+        "xWant"
+    ],
+    "exp": "generation",
+    "labels": "individual",
+    "encoder_path": "model/encoder_bpe_40000.json",
+    "bpe_path": "model/vocab_40000.bpe",
+    "learning_rate_schedule": "warmup_linear",
+    "learning_rate_warmup": 0.002,
+    "l2": 0.01,
+    "vector_l2": "T"
+}

Model/COSMIC/feature_extraction/comet/config/atomic/default.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "dataset": "atomic",
+    "categories": ["oReact", "oEffect", "oWant", "xAttr", "xEffect", "xIntent", "xNeed", "xReact", "xWant"],
+    "eval_categories": ["oReact", "oEffect", "oWant", "xAttr", "xEffect", "xIntent", "xNeed", "xReact", "xWant"],
+    "exp": "generation",
+    "labels": "individual",
+    "encoder_path": "model/encoder_bpe_40000.json",
+    "bpe_path": "model/vocab_40000.bpe",
+    "batch_size": 64,
+    "learning_rate_schedule": "warmup_linear",
+    "learning_rate_warmup": 0.002,
+    "l2": 0.01,
+    "vector_l2": "T",
+    "evaluate_sequences": 10000
+}

Model/COSMIC/feature_extraction/comet/config/atomic/eval_changes.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+    "base": {
+        "0": {
+            "gpu_index": 0,
+            "generate_sequences": "full",
+            "evaluate_sequences": "full"
+        },
+        "1": {
+            "gpu_index": 1,
+            "generate_sequences": "full",
+            "evaluate_sequences": "full"
+        },
+        "2": {
+            "gpu_index": 2,
+            "generate_sequences": "full",
+            "evaluate_sequences": "full"
+        },
+        "3": {
+            "gpu_index": 3,
+            "generate_sequences": "full",
+            "evaluate_sequences": "full"
+        }
+    }
+}

Model/COSMIC/feature_extraction/comet/config/conceptnet/changes.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "base": {
+        "0": {
+            "gpu_index": 0
+        },
+        "1": {
+            "gpu_index": 1
+        },
+        "2": {
+            "gpu_index": 2
+        },
+        "3": {
+            "gpu_index": 3
+        }
+    }
+}

Model/COSMIC/feature_extraction/comet/config/conceptnet/config_0.json ADDED Viewed

	@@ -0,0 +1,61 @@

+{
+    "gpu_mode": "T",
+    "gpu_index": 0,
+    "gpu_indices": [
+        0,
+        1
+    ],
+    "multigpu": "F",
+    "topk_size": 10,
+    "beam_size": 1,
+    "gen_seqlength": 40,
+    "eval_sampler": "greedy",
+    "num_sequences": 1,
+    "generate_sequences": "full",
+    "evaluate_sequences": "full",
+    "random_seed": 123,
+    "optimizer": "adam",
+    "batch_size": 64,
+    "learning_rate": 1e-05,
+    "clip": 1,
+    "loss": "nll",
+    "weight_decay": 0,
+    "adam": {
+        "b2": 0.999,
+        "b1": 0.9,
+        "e": 1e-08
+    },
+    "model": "transformer",
+    "pretrain": "gpt",
+    "hidden_dim": 768,
+    "num_layers": 12,
+    "num_heads": 12,
+    "embedding_dropout": 0.1,
+    "attention_dropout": 0.1,
+    "residual_dropout": 0.1,
+    "output_dropout": 0.1,
+    "activation": "gelu",
+    "init": "pt",
+    "trainer": "iteration",
+    "iterations": 100000,
+    "cycle": 500,
+    "save_strategy": "best",
+    "epochs": 20,
+    "toy": "F",
+    "do_gen": "T",
+    "save": "T",
+    "test_save": "F",
+    "dataset": "conceptnet",
+    "exp": "generation",
+    "encoder_path": "model/encoder_bpe_40000.json",
+    "bpe_path": "model/vocab_40000.bpe",
+    "learning_rate_schedule": "warmup_linear",
+    "learning_rate_warmup": 0.002,
+    "l2": 0.01,
+    "vector_l2": "T",
+    "relation_format": "language",
+    "training_set_size": 100,
+    "development_set_versions_to_use": "12",
+    "max_event_1_size": 10,
+    "max_event_2_size": 15
+}

Model/COSMIC/feature_extraction/comet/config/conceptnet/default.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "dataset": "conceptnet",
+    "exp": "generation",
+    "do_gen": "T",
+    "encoder_path": "model/encoder_bpe_40000.json",
+    "bpe_path": "model/vocab_40000.bpe",
+    "batch_size": 64,
+    "learning_rate_schedule": "warmup_linear",
+    "learning_rate_warmup": 0.002,
+    "l2": 0.01,
+    "vector_l2": "T",
+    "generate_sequences": "full",
+    "evaluate_sequences": "full",
+    "relation_format": "language",
+    "training_set_size": 100,
+    "development_set_versions_to_use": "12",
+    "max_event_1_size": 10,
+    "max_event_2_size": 15,
+    "eval_sampler": "greedy",
+    "iterations": 100000,
+    "learning_rate": 1e-5
+}

Model/COSMIC/feature_extraction/comet/config/conceptnet/eval_changes.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+    "base": {
+        "0": {
+            "gpu_index": 0
+        },
+        "1": {
+            "gpu_index": 1
+        },
+        "2": {
+            "gpu_index": 2
+        },
+        "3": {
+            "gpu_index": 3
+        }
+    }
+}

Model/COSMIC/feature_extraction/comet/config/default.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "gpu_mode": "T",
+    "gpu_index": 0,
+    "gpu_indices": [0, 1],
+    "multigpu": "F",
+    "topk_size": 10,
+    "beam_size": 1,
+    "gen_seqlength": 40,
+    "eval_sampler": "greedy",
+    "num_sequences": 1,
+    "generate_sequences": 1000,
+    "evaluate_sequences": 10000,
+    "random_seed": 123,
+    "optimizer": "adam",
+    "batch_size": 64,
+    "learning_rate": 6.25e-5,
+    "clip": 1,
+    "loss": "nll",
+    "weight_decay": 0,
+    "adam": {
+        "b2": 0.999,
+        "b1": 0.9,
+        "e": 1e-8
+    },
+    "model": "transformer",
+    "pretrain": "gpt",
+    "hidden_dim": 768,
+    "num_layers": 12,
+    "num_heads": 12,
+    "embedding_dropout": 0.1,
+    "attention_dropout": 0.1,
+    "residual_dropout": 0.1,
+    "output_dropout": 0.1,
+    "activation": "gelu",
+    "init": "pt",
+    "trainer": "iteration",
+    "iterations": 50000,
+    "cycle": 500,
+    "save_strategy": "best",
+    "epochs": 20,
+    "toy": "F",
+    "do_gen": "F",
+    "save": "T",
+    "test_save": "F"
+}

Model/COSMIC/feature_extraction/comet/data/atomic/README.md ADDED Viewed

	@@ -0,0 +1,33 @@

+# ATOMIC
+This tarball contains the ATOMIC knowledge graph.
+Files present:
+- `v4_atomic_all_agg.csv`: contains one event per line, with all annotations aggregated into one list (but not de-duplicated, so there might be repeats).
+- `v4_atomic_all.csv`: keeps track of which worker did which annotations. Each line is the answers from one worker only, so there are multiple lines for the same event.
+- `v4_atomic_trn.csv`, `v4_atomic_dev.csv`, `v4_atomic_tst.csv`: same as above, but split based on train/dev/test split.
+All files are CSVs containing the following columns:
+- event: just a string representation of the event.
+- oEffect,oReact,oWant,xAttr,xEffect,xIntent,xNeed,xReact,xWant: annotations for each of the dimensions, stored in a json-dumped list of strings.
+**Note**: `[""none""]` means the worker explicitly responded with the empty response, whereas `[]` means the worker did not annotate this dimension.
+- prefix: json-dumped list that represents the prefix of content words (used to make a better trn/dev/tst split).
+- split: string rep of which split the event belongs to.
+Suggested code for loading the data into a pandas dataframe:
+```python
+import pandas as pd
+import json
+df = pd.read_csv("v4_atomic_all.csv",index_col=0)
+df.iloc[:,:9] = df.iloc[:,:9].apply(lambda col: col.apply(json.loads))
+```
+**_Disclaimer/Content warning_**: the events in atomic have been automatically extracted from blogs, stories and books written at various times.
+The events might depict violent or problematic actions, which we left in the corpus for the sake of learning the (probably negative but still important) commonsense implications associated with the events.
+We removed a small set of truly out-dated events, but might have missed some so please email us ([email protected]) if you have any concerns.
+## Paper
+Please cite the following work when using this data:
+> Maarten Sap, Ronan LeBras, Emily Allaway, Chandra Bhagavatula, Nicholas Lourie, Hannah Rashkin, Brendan Roof, Noah A. Smith & Yejin Choi (2019).
+> ATOMIC: An Atlas of Machine Commonsense for If-Then Reasoning. AAAI

Model/COSMIC/feature_extraction/comet/data/atomic/sap2019atomic.pdf ADDED Viewed

Binary file (745 kB). View file

Model/COSMIC/feature_extraction/comet/model/encoder_bpe_40000.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Model/COSMIC/feature_extraction/comet/model/params_0.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d9cd095b901dfbfbe0ce5e01d151dfe0b791e955d71149969ba65a6eab4480f
+size 46614044

Model/COSMIC/feature_extraction/comet/model/params_1.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca074893c040fa69cbf2fc95c06feda45a4e1492d03b645e2076e89ccf7ddd9f
+size 46614044

Model/COSMIC/feature_extraction/comet/model/params_2.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:966c25fbd632f0df18c4d4380ba57f23410f43311a96616f00b3d05ae6592f58
+size 46614044

Model/COSMIC/feature_extraction/comet/model/params_3.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:40df0d328f5d3d1b2bec768855a5d2eeeaf2b2124758ef98116f76a02526fd92
+size 46614044

Model/COSMIC/feature_extraction/comet/model/params_4.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:139f098dcd620ccf0200530e9ce9ff1c342714ff881a0c7258ac9faac4a06e6a
+size 46614040

Model/COSMIC/feature_extraction/comet/model/params_5.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad27b5cb245db9a29657270ff637d3ff1c15fd9df3683324a2936674cef8c3c5
+size 46614040

Model/COSMIC/feature_extraction/comet/model/params_6.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:af5bb5c76ddfea50683e0b9895fe704ae689853ed8bb3f1b3fee4daff2f27d45
+size 46614040

Model/COSMIC/feature_extraction/comet/model/params_7.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:27f55501d895ce1adb9b254aa762519a242edf2bcd2b43298b89538b5591566c
+size 46614040

Model/COSMIC/feature_extraction/comet/model/params_8.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17a2b695128ea0aae98a360351b92769b879bc0f2835862949b6405b0ce88569
+size 46614040

Model/COSMIC/feature_extraction/comet/model/params_9.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f1355fcd519db223f65db7fa7b79dcaf9b4c653915ffe4bd417d87f7903225c1
+size 46614040

Model/COSMIC/feature_extraction/comet/model/params_shapes.json ADDED Viewed

	@@ -0,0 +1 @@

+ [[512, 768], [40478, 768], [1, 768, 2304], [2304], [1, 768, 768], [768], [768], [768], [1, 768, 3072], [3072], [1, 3072, 768], [768], [768], [768], [1, 768, 2304], [2304], [1, 768, 768], [768], [768], [768], [1, 768, 3072], [3072], [1, 3072, 768], [768], [768], [768], [1, 768, 2304], [2304], [1, 768, 768], [768], [768], [768], [1, 768, 3072], [3072], [1, 3072, 768], [768], [768], [768], [1, 768, 2304], [2304], [1, 768, 768], [768], [768], [768], [1, 768, 3072], [3072], [1, 3072, 768], [768], [768], [768], [1, 768, 2304], [2304], [1, 768, 768], [768], [768], [768], [1, 768, 3072], [3072], [1, 3072, 768], [768], [768], [768], [1, 768, 2304], [2304], [1, 768, 768], [768], [768], [768], [1, 768, 3072], [3072], [1, 3072, 768], [768], [768], [768], [1, 768, 2304], [2304], [1, 768, 768], [768], [768], [768], [1, 768, 3072], [3072], [1, 3072, 768], [768], [768], [768], [1, 768, 2304], [2304], [1, 768, 768], [768], [768], [768], [1, 768, 3072], [3072], [1, 3072, 768], [768], [768], [768], [1, 768, 2304], [2304], [1, 768, 768], [768], [768], [768], [1, 768, 3072], [3072], [1, 3072, 768], [768], [768], [768], [1, 768, 2304], [2304], [1, 768, 768], [768], [768], [768], [1, 768, 3072], [3072], [1, 3072, 768], [768], [768], [768], [1, 768, 2304], [2304], [1, 768, 768], [768], [768], [768], [1, 768, 3072], [3072], [1, 3072, 768], [768], [768], [768], [1, 768, 2304], [2304], [1, 768, 768], [768], [768], [768], [1, 768, 3072], [3072], [1, 3072, 768], [768], [768], [768]]

Model/COSMIC/feature_extraction/comet/model/vocab_40000.bpe ADDED Viewed

The diff for this file is too large to render. See raw diff

Model/COSMIC/feature_extraction/epik-bin/input0/dict.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

Model/COSMIC/feature_extraction/epik-bin/input0/train.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ed51da33e9e82f6caf591795fdd8113dbc0c4a53a418f7eeb06b114f5f5b6b28
+size 1775332

Model/COSMIC/feature_extraction/epik-bin/input0/train.idx ADDED Viewed

Binary file (820 kB). View file

Model/COSMIC/feature_extraction/epik-bin/input0/valid.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5bb9cf83ea51adf9bdf7bbb90186d232b7d8c5e8c6fa5d4fa75daf61b1ba0707
+size 223948

Model/COSMIC/feature_extraction/epik-bin/input0/valid.idx ADDED Viewed

Binary file (105 kB). View file

Model/COSMIC/feature_extraction/epik-bin/label/dict.txt ADDED Viewed

	@@ -0,0 +1,20 @@

+0 14125
+1 7674
+2 7674
+3 6735
+4 5564
+5 4875
+6 3589
+7 3475
+8 3045
+9 2900
+10 2823
+11 1918
+12 1616
+13 1423
+14 931
+madeupword0000 0
+madeupword0001 0
+madeupword0002 0
+madeupword0003 0
+madeupword0004 0

Model/COSMIC/feature_extraction/epik-bin/label/train.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:84917004eb53f27db1e68c2b6ff14eafb193e5f3c43cbe995a6fc1cf22a4b442
+size 273468

Model/COSMIC/feature_extraction/epik-bin/label/train.idx ADDED Viewed

Binary file (820 kB). View file

Model/COSMIC/feature_extraction/epik-bin/label/valid.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5637d7a2e1353adf06ec34cdfeaf6ce2f002b0de466ef15afe84e2c0dd3f24a6
+size 35136

Model/COSMIC/feature_extraction/epik-bin/label/valid.idx ADDED Viewed

Binary file (105 kB). View file