File size: 679 Bytes
589c7b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
frames = [train, test]
df = pd.concat(frames)

list_frequency_encoding = ['AppVersion',
 'AvSigVersion',
 'Census_OSVersion',
 'EngineVersion',
 'OsBuildLab']

def frequency_encoding(feature):
    t = df[feature].value_counts().reset_index()
    t = t.reset_index()
    t.loc[t[feature] == 1, 'level_0'] = np.nan
    t.set_index('index', inplace=True)
    max_label = t['level_0'].max() + 1
    t.fillna(max_label, inplace=True)
    return t.to_dict()['level_0']

for feature in tqdm(list_frequency_encoding):
    freq_enc_dict = frequency_encoding(feature)
    df[feature] = df[feature].map(lambda x: freq_enc_dict.get(x, np.nan))
    df[feature] = df[feature].astype('int64')