repository_reader / core /file_scanner.py
DeL-TaiseiOzaki
first commit
227e75d
raw
history blame
2.09 kB
from pathlib import Path
from typing import List, Dict, Optional
from dataclasses import dataclass
@dataclass
class FileInfo:
path: Path
content: Optional[str] = None
class FileScanner:
# スキャン対象の拡張子
TARGET_EXTENSIONS = {
'.py', '.js', '.java', '.cpp', '.hpp', '.c', '.h',
'.go', '.rs', '.php', '.rb', '.ts', '.scala', '.kt',
'.cs', '.swift', '.m', '.sh', '.pl', '.r'
}
# スキャン対象から除外するディレクトリ
EXCLUDED_DIRS = {
'.git', '__pycache__', 'node_modules', 'venv', '.env',
'build', 'dist', 'target', 'bin', 'obj'
}
def __init__(self, base_dir: Path):
self.base_dir = base_dir
def _should_scan_file(self, path: Path) -> bool:
if any(excluded in path.parts for excluded in self.EXCLUDED_DIRS):
return False
return path.suffix.lower() in self.TARGET_EXTENSIONS
def _read_file_content(self, file_path: Path) -> Optional[str]:
try:
# まずUTF-8で試す
try:
with file_path.open('r', encoding='utf-8') as f:
return f.read()
except UnicodeDecodeError:
# UTF-8で失敗したらcp932を試す
with file_path.open('r', encoding='cp932') as f:
return f.read()
except (OSError, UnicodeDecodeError):
return None
def scan_files(self) -> List[FileInfo]:
if not self.base_dir.exists():
raise FileNotFoundError(f"Directory not found: {self.base_dir}")
files = []
for entry in self.base_dir.rglob('*'):
if entry.is_file() and self._should_scan_file(entry):
content = self._read_file_content(entry)
if content is not None:
files.append(FileInfo(
path=entry.relative_to(self.base_dir),
content=content
))
return sorted(files, key=lambda x: str(x.path))