From 5c6a95ddfe4168094c4d7eeaac9206242048781f Mon Sep 17 00:00:00 2001 From: Luke Craig Date: Mon, 11 Aug 2025 14:19:24 -0400 Subject: [PATCH] Add --skip-binary option to CLI and update save_repo_to_text to handle binary files --- repo_to_text/cli/cli.py | 8 +++++++- repo_to_text/core/core.py | 17 ++++++++++++----- tests/test_cli.py | 3 ++- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/repo_to_text/cli/cli.py b/repo_to_text/cli/cli.py index 911dd1a..6276f43 100644 --- a/repo_to_text/cli/cli.py +++ b/repo_to_text/cli/cli.py @@ -74,6 +74,11 @@ def parse_args() -> argparse.Namespace: help="List of files or directories to ignore in both tree and content sections. " "Supports wildcards (e.g., '*')." ) + parser.add_argument( + '--skip-binary', + action='store_true', + help='Skip binary files in the output.' + ) return parser.parse_args() def main() -> NoReturn: @@ -95,7 +100,8 @@ def main() -> NoReturn: path=args.input_dir, output_dir=args.output_dir, to_stdout=args.stdout, - cli_ignore_patterns=args.ignore_patterns + cli_ignore_patterns=args.ignore_patterns, + skip_binary=args.skip_binary ) logging.debug('repo-to-text script finished') diff --git a/repo_to_text/core/core.py b/repo_to_text/core/core.py index ccc9460..b5255ea 100644 --- a/repo_to_text/core/core.py +++ b/repo_to_text/core/core.py @@ -261,7 +261,8 @@ def save_repo_to_text( path: str = '.', output_dir: Optional[str] = None, to_stdout: bool = False, - cli_ignore_patterns: Optional[List[str]] = None + cli_ignore_patterns: Optional[List[str]] = None, + skip_binary: bool = False ) -> str: """Save repository structure and contents to a text file or multiple files.""" # pylint: disable=too-many-locals @@ -285,7 +286,8 @@ def save_repo_to_text( gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec, - maximum_word_count_per_file + maximum_word_count_per_file, + skip_binary ) if to_stdout: @@ -352,11 +354,12 @@ def save_repo_to_text( return output_filepaths[0] return "" -def _read_file_content(file_path: str) -> str: +def _read_file_content(file_path: str, skip_binary: bool = False) -> str: """Read file content, handling binary files and broken symlinks. Args: file_path: Path to the file to read + skip_binary: Whether to skip binary files Returns: str: File content or appropriate message for special cases @@ -365,6 +368,9 @@ def _read_file_content(file_path: str) -> str: with open(file_path, 'r', encoding='utf-8') as f: return f.read() except UnicodeDecodeError: + if skip_binary: + logging.debug('Skipping binary file: %s', file_path) + return "binary content skipped" logging.debug('Handling binary file contents: %s', file_path) with open(file_path, 'rb') as f_bin: binary_content: bytes = f_bin.read() @@ -386,7 +392,8 @@ def generate_output_content( gitignore_spec: Optional[PathSpec], content_ignore_spec: Optional[PathSpec], tree_and_content_ignore_spec: Optional[PathSpec], - maximum_word_count_per_file: Optional[int] = None + maximum_word_count_per_file: Optional[int] = None, + skip_binary: bool = False ) -> List[str]: """Generate the output content for the repository, potentially split into segments.""" # pylint: disable=too-many-arguments @@ -453,7 +460,7 @@ def generate_output_content( cleaned_relative_path = relative_path.replace('./', '', 1) _add_chunk_to_output(f'\n\n') - file_content = _read_file_content(file_path) + file_content = _read_file_content(file_path, skip_binary) _add_chunk_to_output(file_content) _add_chunk_to_output('\n\n') diff --git a/tests/test_cli.py b/tests/test_cli.py index 7382ce7..924ceb4 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -84,7 +84,8 @@ def test_main_normal_execution(mock_save_repo: MagicMock) -> None: path='.', output_dir=None, to_stdout=True, - cli_ignore_patterns=None + cli_ignore_patterns=None, + skip_binary=False ) @patch('repo_to_text.cli.cli.create_default_settings_file')