Spaces:

MKJ-TOE
/

repository_reader

Running

repository_reader / core /file_scanner.py

DeL-TaiseiOzaki

first commit

227e75d 17 days ago

2.09 kB

	from pathlib import Path
	from typing import List, Dict, Optional
	from dataclasses import dataclass

	@dataclass
	class FileInfo:
	path: Path
	content: Optional[str] = None

	class FileScanner:
	# スキャン対象の拡張子
	TARGET_EXTENSIONS = {
	'.py', '.js', '.java', '.cpp', '.hpp', '.c', '.h',
	'.go', '.rs', '.php', '.rb', '.ts', '.scala', '.kt',
	'.cs', '.swift', '.m', '.sh', '.pl', '.r'
	}

	# スキャン対象から除外するディレクトリ
	EXCLUDED_DIRS = {
	'.git', '__pycache__', 'node_modules', 'venv', '.env',
	'build', 'dist', 'target', 'bin', 'obj'
	}

	def __init__(self, base_dir: Path):
	self.base_dir = base_dir

	def _should_scan_file(self, path: Path) -> bool:
	if any(excluded in path.parts for excluded in self.EXCLUDED_DIRS):
	return False
	return path.suffix.lower() in self.TARGET_EXTENSIONS

	def _read_file_content(self, file_path: Path) -> Optional[str]:
	try:
	# まずUTF-8で試す
	try:
	with file_path.open('r', encoding='utf-8') as f:
	return f.read()
	except UnicodeDecodeError:
	# UTF-8で失敗したらcp932を試す
	with file_path.open('r', encoding='cp932') as f:
	return f.read()
	except (OSError, UnicodeDecodeError):
	return None

	def scan_files(self) -> List[FileInfo]:
	if not self.base_dir.exists():
	raise FileNotFoundError(f"Directory not found: {self.base_dir}")

	files = []

	for entry in self.base_dir.rglob('*'):
	if entry.is_file() and self._should_scan_file(entry):
	content = self._read_file_content(entry)
	if content is not None:
	files.append(FileInfo(
	path=entry.relative_to(self.base_dir),
	content=content
	))

	return sorted(files, key=lambda x: str(x.path))