"""Detect language via polyglot and fastlid.""" # pylint: disable= from typing import Any, Callable, List, Optional from polyglot.text import Detector import polyglot.detect.base from polyglot.detect.base import UnknownLanguage from fastlid import fastlid from logzero import logger polyglot.detect.base.logger.setLevel("ERROR") def with_func_attrs(**attrs: Any) -> Callable: """Define func_attrs.""" def with_attrs(fct: Callable) -> Callable: for key, val in attrs.items(): setattr(fct, key, val) return fct return with_attrs # @with_func_attrs(set_languages=None) # def detect(text: str) -> str: def detect(text: str, set_languages: Optional[List[str]] = None) -> str: """Detect language via polyglot and fastlid.""" # if not text.strip(): return "en" try: _ = [(elm.code[:2], elm.confidence) for elm in Detector(text).languages] detect.lang_conf = _ lang, conf = _[0] except UnknownLanguage: if set_languages is None: def_lang = "en" else: # def_lang = set_languages[-1] def_lang = set_languages[0] logger.warning(" UnknownLanguage exception: probably snippet too short, setting to %s", def_lang) lang, conf = def_lang, 0 except Exception as exc: logger.error(exc) lang, conf = "en", 0 del conf # if set_languages is None, # trust polyglot.text.Detector if set_languages is None: return lang # set_languages is set if not isinstance(set_languages, (list, tuple)): logger.warning("set_languages (%s) ought to be a list/tuple") if lang in set_languages: return lang # lang not in set_languages, use fastlid fastlid.set_languages = set_languages lang, _ = fastlid(text) return lang