File size: 2,150 Bytes
fffb0cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
{
  "builder_name": "google_wellformed_query",
  "citation": "@misc{faruqui2018identifying,\n      title={Identifying Well-formed Natural Language Questions},\n      author={Manaal Faruqui and Dipanjan Das},\n      year={2018},\n      eprint={1808.09419},\n      archivePrefix={arXiv},\n      primaryClass={cs.CL}\n}\n",
  "config_name": "default",
  "dataset_name": "google_wellformed_query",
  "dataset_size": 1230988,
  "description": "Google's query wellformedness dataset was created by crowdsourcing well-formedness annotations for 25,100 queries from the Paralex corpus. Every query was annotated by five raters each with 1/0 rating of whether or not the query is well-formed.\n",
  "download_checksums": {
    "https://raw.githubusercontent.com/google-research-datasets/query-wellformedness/master/train.tsv": {
      "num_bytes": 805818,
      "checksum": null
    },
    "https://raw.githubusercontent.com/google-research-datasets/query-wellformedness/master/test.tsv": {
      "num_bytes": 178070,
      "checksum": null
    },
    "https://raw.githubusercontent.com/google-research-datasets/query-wellformedness/master/dev.tsv": {
      "num_bytes": 173131,
      "checksum": null
    }
  },
  "download_size": 1157019,
  "features": {
    "rating": {
      "dtype": "float32",
      "_type": "Value"
    },
    "content": {
      "dtype": "string",
      "_type": "Value"
    }
  },
  "homepage": "https://github.com/google-research-datasets/query-wellformedness",
  "license": "",
  "size_in_bytes": 2388007,
  "splits": {
    "train": {
      "name": "train",
      "num_bytes": 857383,
      "num_examples": 17500,
      "dataset_name": "google_wellformed_query"
    },
    "test": {
      "name": "test",
      "num_bytes": 189499,
      "num_examples": 3850,
      "dataset_name": "google_wellformed_query"
    },
    "validation": {
      "name": "validation",
      "num_bytes": 184106,
      "num_examples": 3750,
      "dataset_name": "google_wellformed_query"
    }
  },
  "version": {
    "version_str": "0.0.0",
    "major": 0,
    "minor": 0,
    "patch": 0
  }
}