Spaces:
Running
Running
from pathlib import Path | |
from typing import List, Dict, Optional | |
from dataclasses import dataclass | |
class FileInfo: | |
path: Path | |
content: Optional[str] = None | |
class FileScanner: | |
# スキャン対象の拡張子 | |
TARGET_EXTENSIONS = { | |
'.py', '.js', '.java', '.cpp', '.hpp', '.c', '.h', | |
'.go', '.rs', '.php', '.rb', '.ts', '.scala', '.kt', | |
'.cs', '.swift', '.m', '.sh', '.pl', '.r' | |
} | |
# スキャン対象から除外するディレクトリ | |
EXCLUDED_DIRS = { | |
'.git', '__pycache__', 'node_modules', 'venv', '.env', | |
'build', 'dist', 'target', 'bin', 'obj' | |
} | |
def __init__(self, base_dir: Path): | |
self.base_dir = base_dir | |
def _should_scan_file(self, path: Path) -> bool: | |
if any(excluded in path.parts for excluded in self.EXCLUDED_DIRS): | |
return False | |
return path.suffix.lower() in self.TARGET_EXTENSIONS | |
def _read_file_content(self, file_path: Path) -> Optional[str]: | |
try: | |
# まずUTF-8で試す | |
try: | |
with file_path.open('r', encoding='utf-8') as f: | |
return f.read() | |
except UnicodeDecodeError: | |
# UTF-8で失敗したらcp932を試す | |
with file_path.open('r', encoding='cp932') as f: | |
return f.read() | |
except (OSError, UnicodeDecodeError): | |
return None | |
def scan_files(self) -> List[FileInfo]: | |
if not self.base_dir.exists(): | |
raise FileNotFoundError(f"Directory not found: {self.base_dir}") | |
files = [] | |
for entry in self.base_dir.rglob('*'): | |
if entry.is_file() and self._should_scan_file(entry): | |
content = self._read_file_content(entry) | |
if content is not None: | |
files.append(FileInfo( | |
path=entry.relative_to(self.base_dir), | |
content=content | |
)) | |
return sorted(files, key=lambda x: str(x.path)) | |