mirror of
https://github.com/kirill-markin/repo-to-text.git
synced 2025-12-06 03:22:23 -08:00
Add --skip-binary option to CLI and update save_repo_to_text to handle binary files
This commit is contained in:
parent
77209f30aa
commit
5c6a95ddfe
3 changed files with 21 additions and 7 deletions
|
|
@ -74,6 +74,11 @@ def parse_args() -> argparse.Namespace:
|
||||||
help="List of files or directories to ignore in both tree and content sections. "
|
help="List of files or directories to ignore in both tree and content sections. "
|
||||||
"Supports wildcards (e.g., '*')."
|
"Supports wildcards (e.g., '*')."
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--skip-binary',
|
||||||
|
action='store_true',
|
||||||
|
help='Skip binary files in the output.'
|
||||||
|
)
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
def main() -> NoReturn:
|
def main() -> NoReturn:
|
||||||
|
|
@ -95,7 +100,8 @@ def main() -> NoReturn:
|
||||||
path=args.input_dir,
|
path=args.input_dir,
|
||||||
output_dir=args.output_dir,
|
output_dir=args.output_dir,
|
||||||
to_stdout=args.stdout,
|
to_stdout=args.stdout,
|
||||||
cli_ignore_patterns=args.ignore_patterns
|
cli_ignore_patterns=args.ignore_patterns,
|
||||||
|
skip_binary=args.skip_binary
|
||||||
)
|
)
|
||||||
|
|
||||||
logging.debug('repo-to-text script finished')
|
logging.debug('repo-to-text script finished')
|
||||||
|
|
|
||||||
|
|
@ -261,7 +261,8 @@ def save_repo_to_text(
|
||||||
path: str = '.',
|
path: str = '.',
|
||||||
output_dir: Optional[str] = None,
|
output_dir: Optional[str] = None,
|
||||||
to_stdout: bool = False,
|
to_stdout: bool = False,
|
||||||
cli_ignore_patterns: Optional[List[str]] = None
|
cli_ignore_patterns: Optional[List[str]] = None,
|
||||||
|
skip_binary: bool = False
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Save repository structure and contents to a text file or multiple files."""
|
"""Save repository structure and contents to a text file or multiple files."""
|
||||||
# pylint: disable=too-many-locals
|
# pylint: disable=too-many-locals
|
||||||
|
|
@ -285,7 +286,8 @@ def save_repo_to_text(
|
||||||
gitignore_spec,
|
gitignore_spec,
|
||||||
content_ignore_spec,
|
content_ignore_spec,
|
||||||
tree_and_content_ignore_spec,
|
tree_and_content_ignore_spec,
|
||||||
maximum_word_count_per_file
|
maximum_word_count_per_file,
|
||||||
|
skip_binary
|
||||||
)
|
)
|
||||||
|
|
||||||
if to_stdout:
|
if to_stdout:
|
||||||
|
|
@ -352,11 +354,12 @@ def save_repo_to_text(
|
||||||
return output_filepaths[0]
|
return output_filepaths[0]
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def _read_file_content(file_path: str) -> str:
|
def _read_file_content(file_path: str, skip_binary: bool = False) -> str:
|
||||||
"""Read file content, handling binary files and broken symlinks.
|
"""Read file content, handling binary files and broken symlinks.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
file_path: Path to the file to read
|
file_path: Path to the file to read
|
||||||
|
skip_binary: Whether to skip binary files
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: File content or appropriate message for special cases
|
str: File content or appropriate message for special cases
|
||||||
|
|
@ -365,6 +368,9 @@ def _read_file_content(file_path: str) -> str:
|
||||||
with open(file_path, 'r', encoding='utf-8') as f:
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||||||
return f.read()
|
return f.read()
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
|
if skip_binary:
|
||||||
|
logging.debug('Skipping binary file: %s', file_path)
|
||||||
|
return "binary content skipped"
|
||||||
logging.debug('Handling binary file contents: %s', file_path)
|
logging.debug('Handling binary file contents: %s', file_path)
|
||||||
with open(file_path, 'rb') as f_bin:
|
with open(file_path, 'rb') as f_bin:
|
||||||
binary_content: bytes = f_bin.read()
|
binary_content: bytes = f_bin.read()
|
||||||
|
|
@ -386,7 +392,8 @@ def generate_output_content(
|
||||||
gitignore_spec: Optional[PathSpec],
|
gitignore_spec: Optional[PathSpec],
|
||||||
content_ignore_spec: Optional[PathSpec],
|
content_ignore_spec: Optional[PathSpec],
|
||||||
tree_and_content_ignore_spec: Optional[PathSpec],
|
tree_and_content_ignore_spec: Optional[PathSpec],
|
||||||
maximum_word_count_per_file: Optional[int] = None
|
maximum_word_count_per_file: Optional[int] = None,
|
||||||
|
skip_binary: bool = False
|
||||||
) -> List[str]:
|
) -> List[str]:
|
||||||
"""Generate the output content for the repository, potentially split into segments."""
|
"""Generate the output content for the repository, potentially split into segments."""
|
||||||
# pylint: disable=too-many-arguments
|
# pylint: disable=too-many-arguments
|
||||||
|
|
@ -453,7 +460,7 @@ def generate_output_content(
|
||||||
cleaned_relative_path = relative_path.replace('./', '', 1)
|
cleaned_relative_path = relative_path.replace('./', '', 1)
|
||||||
|
|
||||||
_add_chunk_to_output(f'\n<content full_path="{cleaned_relative_path}">\n')
|
_add_chunk_to_output(f'\n<content full_path="{cleaned_relative_path}">\n')
|
||||||
file_content = _read_file_content(file_path)
|
file_content = _read_file_content(file_path, skip_binary)
|
||||||
_add_chunk_to_output(file_content)
|
_add_chunk_to_output(file_content)
|
||||||
_add_chunk_to_output('\n</content>\n')
|
_add_chunk_to_output('\n</content>\n')
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -84,7 +84,8 @@ def test_main_normal_execution(mock_save_repo: MagicMock) -> None:
|
||||||
path='.',
|
path='.',
|
||||||
output_dir=None,
|
output_dir=None,
|
||||||
to_stdout=True,
|
to_stdout=True,
|
||||||
cli_ignore_patterns=None
|
cli_ignore_patterns=None,
|
||||||
|
skip_binary=False
|
||||||
)
|
)
|
||||||
|
|
||||||
@patch('repo_to_text.cli.cli.create_default_settings_file')
|
@patch('repo_to_text.cli.cli.create_default_settings_file')
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue