from zemberek import TurkishMorphology from typing import List from functools import lru_cache morphology = TurkishMorphology.create() # Initialize the Turkish morphology analyzer as a global constant MORPHOLOGY = TurkishMorphology.create_with_default_resources() @lru_cache(maxsize=1000) # Cache results for better performance def is_verb_or_verbform_zemberek(word: str) -> bool: """ Check if a word is a verb or verbform using Zemberek analysis. Args: word (str): The word to analyze Returns: bool: True if the word is a verb or verbform, False otherwise """ try: # Get the analysis result for the word result = MORPHOLOGY.analyze(word) # Return True if there's at least one analysis and it's a verb return bool(result and "Verb" in result[0].primary_pos.value) except Exception as e: print(f"Error analyzing word '{word}': {str(e)}") return False def filter_verbs(words: List[str]) -> List[str]: """ Filter a list of words to keep only verbs and verbforms. Args: words (List[str]): List of words to filter Returns: List[str]: List containing only verbs and verbforms """ return [word for word in words if is_verb_or_verbform_zemberek(word)] def main(): # Example usage words = ["geliyor", "gitti", "yapmak", "kitap"] filtered_words = filter_verbs(words) print(f"Verbs found: {filtered_words}") if __name__ == "__main__": main()