Spaces:
Runtime error
Runtime error
import math | |
from pathlib import Path | |
from typing import Dict, List | |
import spacy | |
from spacy import Language | |
NLP: Language = spacy.load("hu_core_news_trf") | |
def _compute_idf(freq_file: Path) -> Dict[str, float]: | |
freqs: Dict[str, int] = {} | |
with freq_file.open() as f: | |
for line in f: | |
line = line.strip() | |
data: List[str] = line.split() | |
if len(data) == 0: | |
continue | |
word: str = data[0] | |
freq: int = int(data[-1]) | |
if not line.isalpha(): | |
freqs[word] = freq | |
max_freq: int = freqs["a"] | |
idfs: Dict[str, float] = {w: math.log2(max_freq / (float(f) + 1)) + 1 for w, f in freqs.items()} | |
return idfs | |
IDF: Dict[str, float] = _compute_idf(Path(__file__).parent.parent / "resources" / "freq.list") | |