File size: 269 Bytes
3bd5293
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
import pandas as pd
import json

with open("data/ccs_synthetic_filtered_large.json") as f:
    d = json.load(f)

df = pd.DataFrame(d)
df["index"] = df.index + 1
df["nr_words"] = df["caption"].apply(lambda x: len(x.split()))

df.to_feather("data/ccs_synthetic.feather")