from pathlib import Path
from typing import List, Dict, Optional
from dataclasses import dataclass

@dataclass
class FileInfo:
    path: Path
    content: Optional[str] = None

class FileScanner:
    # スキャン対象の拡張子
    TARGET_EXTENSIONS = {
        '.py', '.js', '.java', '.cpp', '.hpp', '.c', '.h',
        '.go', '.rs', '.php', '.rb', '.ts', '.scala', '.kt',
        '.cs', '.swift', '.m', '.sh', '.pl', '.r'
    }
    
    # スキャン対象から除外するディレクトリ
    EXCLUDED_DIRS = {
        '.git', '__pycache__', 'node_modules', 'venv', '.env',
        'build', 'dist', 'target', 'bin', 'obj'
    }
    
    def __init__(self, base_dir: Path):
        self.base_dir = base_dir
    
    def _should_scan_file(self, path: Path) -> bool:
        if any(excluded in path.parts for excluded in self.EXCLUDED_DIRS):
            return False
        return path.suffix.lower() in self.TARGET_EXTENSIONS
    
    def _read_file_content(self, file_path: Path) -> Optional[str]:
        try:
            # まずUTF-8で試す
            try:
                with file_path.open('r', encoding='utf-8') as f:
                    return f.read()
            except UnicodeDecodeError:
                # UTF-8で失敗したらcp932を試す
                with file_path.open('r', encoding='cp932') as f:
                    return f.read()
        except (OSError, UnicodeDecodeError):
            return None
    
    def scan_files(self) -> List[FileInfo]:
        if not self.base_dir.exists():
            raise FileNotFoundError(f"Directory not found: {self.base_dir}")
        
        files = []
        
        for entry in self.base_dir.rglob('*'):
            if entry.is_file() and self._should_scan_file(entry):
                content = self._read_file_content(entry)
                if content is not None:
                    files.append(FileInfo(
                        path=entry.relative_to(self.base_dir),
                        content=content
                    ))
        
        return sorted(files, key=lambda x: str(x.path))