Add --skip-binary option to CLI and update save_repo_to_text to handle binary files

This commit is contained in:
Luke Craig 2025-08-11 14:19:24 -04:00
parent 77209f30aa
commit 5c6a95ddfe
3 changed files with 21 additions and 7 deletions

View file

@ -74,6 +74,11 @@ def parse_args() -> argparse.Namespace:
help="List of files or directories to ignore in both tree and content sections. " help="List of files or directories to ignore in both tree and content sections. "
"Supports wildcards (e.g., '*')." "Supports wildcards (e.g., '*')."
) )
parser.add_argument(
'--skip-binary',
action='store_true',
help='Skip binary files in the output.'
)
return parser.parse_args() return parser.parse_args()
def main() -> NoReturn: def main() -> NoReturn:
@ -95,7 +100,8 @@ def main() -> NoReturn:
path=args.input_dir, path=args.input_dir,
output_dir=args.output_dir, output_dir=args.output_dir,
to_stdout=args.stdout, to_stdout=args.stdout,
cli_ignore_patterns=args.ignore_patterns cli_ignore_patterns=args.ignore_patterns,
skip_binary=args.skip_binary
) )
logging.debug('repo-to-text script finished') logging.debug('repo-to-text script finished')

View file

@ -261,7 +261,8 @@ def save_repo_to_text(
path: str = '.', path: str = '.',
output_dir: Optional[str] = None, output_dir: Optional[str] = None,
to_stdout: bool = False, to_stdout: bool = False,
cli_ignore_patterns: Optional[List[str]] = None cli_ignore_patterns: Optional[List[str]] = None,
skip_binary: bool = False
) -> str: ) -> str:
"""Save repository structure and contents to a text file or multiple files.""" """Save repository structure and contents to a text file or multiple files."""
# pylint: disable=too-many-locals # pylint: disable=too-many-locals
@ -285,7 +286,8 @@ def save_repo_to_text(
gitignore_spec, gitignore_spec,
content_ignore_spec, content_ignore_spec,
tree_and_content_ignore_spec, tree_and_content_ignore_spec,
maximum_word_count_per_file maximum_word_count_per_file,
skip_binary
) )
if to_stdout: if to_stdout:
@ -352,11 +354,12 @@ def save_repo_to_text(
return output_filepaths[0] return output_filepaths[0]
return "" return ""
def _read_file_content(file_path: str) -> str: def _read_file_content(file_path: str, skip_binary: bool = False) -> str:
"""Read file content, handling binary files and broken symlinks. """Read file content, handling binary files and broken symlinks.
Args: Args:
file_path: Path to the file to read file_path: Path to the file to read
skip_binary: Whether to skip binary files
Returns: Returns:
str: File content or appropriate message for special cases str: File content or appropriate message for special cases
@ -365,6 +368,9 @@ def _read_file_content(file_path: str) -> str:
with open(file_path, 'r', encoding='utf-8') as f: with open(file_path, 'r', encoding='utf-8') as f:
return f.read() return f.read()
except UnicodeDecodeError: except UnicodeDecodeError:
if skip_binary:
logging.debug('Skipping binary file: %s', file_path)
return "binary content skipped"
logging.debug('Handling binary file contents: %s', file_path) logging.debug('Handling binary file contents: %s', file_path)
with open(file_path, 'rb') as f_bin: with open(file_path, 'rb') as f_bin:
binary_content: bytes = f_bin.read() binary_content: bytes = f_bin.read()
@ -386,7 +392,8 @@ def generate_output_content(
gitignore_spec: Optional[PathSpec], gitignore_spec: Optional[PathSpec],
content_ignore_spec: Optional[PathSpec], content_ignore_spec: Optional[PathSpec],
tree_and_content_ignore_spec: Optional[PathSpec], tree_and_content_ignore_spec: Optional[PathSpec],
maximum_word_count_per_file: Optional[int] = None maximum_word_count_per_file: Optional[int] = None,
skip_binary: bool = False
) -> List[str]: ) -> List[str]:
"""Generate the output content for the repository, potentially split into segments.""" """Generate the output content for the repository, potentially split into segments."""
# pylint: disable=too-many-arguments # pylint: disable=too-many-arguments
@ -453,7 +460,7 @@ def generate_output_content(
cleaned_relative_path = relative_path.replace('./', '', 1) cleaned_relative_path = relative_path.replace('./', '', 1)
_add_chunk_to_output(f'\n<content full_path="{cleaned_relative_path}">\n') _add_chunk_to_output(f'\n<content full_path="{cleaned_relative_path}">\n')
file_content = _read_file_content(file_path) file_content = _read_file_content(file_path, skip_binary)
_add_chunk_to_output(file_content) _add_chunk_to_output(file_content)
_add_chunk_to_output('\n</content>\n') _add_chunk_to_output('\n</content>\n')

View file

@ -84,7 +84,8 @@ def test_main_normal_execution(mock_save_repo: MagicMock) -> None:
path='.', path='.',
output_dir=None, output_dir=None,
to_stdout=True, to_stdout=True,
cli_ignore_patterns=None cli_ignore_patterns=None,
skip_binary=False
) )
@patch('repo_to_text.cli.cli.create_default_settings_file') @patch('repo_to_text.cli.cli.create_default_settings_file')