File size: 269 Bytes
3bd5293 |
1 2 3 4 5 6 7 8 9 10 11 12 |
import pandas as pd
import json
with open("data/ccs_synthetic_filtered_large.json") as f:
d = json.load(f)
df = pd.DataFrame(d)
df["index"] = df.index + 1
df["nr_words"] = df["caption"].apply(lambda x: len(x.split()))
df.to_feather("data/ccs_synthetic.feather")
|