# src/ontology.py from typing import List, Dict, Pattern import re class OntologyRegistry: """Registry for pattern matching and entity validation.""" def __init__(self) -> None: self.temporal_patterns: List[str] = [ r'\b\d{1,2}:\d{2}\s*(?:AM|PM|am|pm)?\b', r'\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{1,2}(?:st|nd|rd|th)?,? \d{4}\b', r'\btomorrow\b', r'\bin \d+ (?:days?|weeks?|months?)\b' ] self.location_patterns: List[str] = [ r'\b(?:in|at|from|to) ([A-Z][a-zA-Z]+(,? [A-Z]{2})?)\b', r'\b[A-Z][a-zA-Z]+ Base\b', r'\bHeadquarters\b', r'\bHQ\b' ] self.entity_types: Dict[str, str] = { 'PER': 'person', 'ORG': 'organization', 'LOC': 'location', 'MISC': 'miscellaneous' } # Compile patterns for better performance self._compiled_patterns: Dict[str, List[Pattern]] = { 'temporal': [re.compile(p) for p in self.temporal_patterns], 'location': [re.compile(p) for p in self.location_patterns] } def validate_pattern(self, text: str, pattern_type: str) -> List[str]: """ Validate text against specified pattern type. Args: text: Input text to validate pattern_type: Type of pattern to match ('temporal' or 'location') Returns: List of matched strings """ matches = [] patterns = self._compiled_patterns.get(pattern_type, []) for pattern in patterns: matches.extend(match.group() for match in pattern.finditer(text)) return matches def get_entity_type(self, ner_type: str) -> str: """Map NER entity type to ontology type.""" return self.entity_types.get(ner_type, 'miscellaneous')