From dbfa602cd3bf3903a986c05a181171240cc643f8 Mon Sep 17 00:00:00 2001 From: Kirill Markin Date: Mon, 16 Dec 2024 01:29:31 +0100 Subject: [PATCH] Refactor devide logic by files and more tests --- repo_to_text/cli/__init__.py | 3 + repo_to_text/cli/cli.py | 71 ++++++++ repo_to_text/core/__init__.py | 3 + repo_to_text/core/core.py | 226 ++++++++++++++++++++++++++ repo_to_text/utils/__init__.py | 3 + repo_to_text/utils/utils.py | 82 ++++++++++ tests/test_cli.py | 107 +++++++++++++ tests/test_core.py | 285 +++++++++++++++++++++++++++++++++ tests/test_utils.py | 142 ++++++++++++++++ 9 files changed, 922 insertions(+) create mode 100644 repo_to_text/cli/__init__.py create mode 100644 repo_to_text/cli/cli.py create mode 100644 repo_to_text/core/__init__.py create mode 100644 repo_to_text/core/core.py create mode 100644 repo_to_text/utils/__init__.py create mode 100644 repo_to_text/utils/utils.py create mode 100644 tests/test_cli.py create mode 100644 tests/test_core.py create mode 100644 tests/test_utils.py diff --git a/repo_to_text/cli/__init__.py b/repo_to_text/cli/__init__.py new file mode 100644 index 0000000..da7c121 --- /dev/null +++ b/repo_to_text/cli/__init__.py @@ -0,0 +1,3 @@ +from .cli import create_default_settings_file, parse_args, main + +__all__ = ['create_default_settings_file', 'parse_args', 'main'] \ No newline at end of file diff --git a/repo_to_text/cli/cli.py b/repo_to_text/cli/cli.py new file mode 100644 index 0000000..a988d79 --- /dev/null +++ b/repo_to_text/cli/cli.py @@ -0,0 +1,71 @@ +import argparse +import textwrap +import os +import logging +from typing import NoReturn + +from ..utils.utils import setup_logging +from ..core.core import save_repo_to_text + +def create_default_settings_file() -> None: + """Create a default .repo-to-text-settings.yaml file.""" + settings_file = '.repo-to-text-settings.yaml' + if os.path.exists(settings_file): + raise FileExistsError(f"The settings file '{settings_file}' already exists. Please remove it or rename it if you want to create a new default settings file.") + + default_settings = textwrap.dedent("""\ + # Details: https://github.com/kirill-markin/repo-to-text + # Syntax: gitignore rules + + # Ignore files and directories for all sections from gitignore file + # Default: True + gitignore-import-and-ignore: True + + # Ignore files and directories for tree + # and "Contents of ..." sections + ignore-tree-and-content: + - ".repo-to-text-settings.yaml" + + # Ignore files and directories for "Contents of ..." section + ignore-content: + - "README.md" + - "LICENSE" + """) + with open('.repo-to-text-settings.yaml', 'w') as f: + f.write(default_settings) + print("Default .repo-to-text-settings.yaml created.") + +def parse_args() -> argparse.Namespace: + """Parse command line arguments. + + Returns: + argparse.Namespace: Parsed command line arguments + """ + parser = argparse.ArgumentParser(description='Convert repository structure and contents to text') + parser.add_argument('input_dir', nargs='?', default='.', help='Directory to process') + parser.add_argument('--debug', action='store_true', help='Enable debug logging') + parser.add_argument('--output-dir', type=str, help='Directory to save the output file') + parser.add_argument('--create-settings', '--init', action='store_true', help='Create default .repo-to-text-settings.yaml file') + parser.add_argument('--stdout', action='store_true', help='Output to stdout instead of a file') + parser.add_argument('--ignore-patterns', nargs='*', help="List of files or directories to ignore in both tree and content sections. Supports wildcards (e.g., '*').") + return parser.parse_args() + +def main() -> NoReturn: + """Main entry point for the CLI.""" + args = parse_args() + setup_logging(debug=args.debug) + logging.debug('repo-to-text script started') + + if args.create_settings: + create_default_settings_file() + logging.debug('.repo-to-text-settings.yaml file created') + else: + save_repo_to_text( + path=args.input_dir, + output_dir=args.output_dir, + to_stdout=args.stdout, + cli_ignore_patterns=args.ignore_patterns + ) + + logging.debug('repo-to-text script finished') + exit(0) \ No newline at end of file diff --git a/repo_to_text/core/__init__.py b/repo_to_text/core/__init__.py new file mode 100644 index 0000000..2c937c6 --- /dev/null +++ b/repo_to_text/core/__init__.py @@ -0,0 +1,3 @@ +from .core import get_tree_structure, load_ignore_specs, should_ignore_file, save_repo_to_text + +__all__ = ['get_tree_structure', 'load_ignore_specs', 'should_ignore_file', 'save_repo_to_text'] \ No newline at end of file diff --git a/repo_to_text/core/core.py b/repo_to_text/core/core.py new file mode 100644 index 0000000..5c3ac25 --- /dev/null +++ b/repo_to_text/core/core.py @@ -0,0 +1,226 @@ +import os +import subprocess +import logging +import yaml +from datetime import datetime, timezone +from typing import Tuple, Optional +import pathspec +from pathspec import PathSpec + +from ..utils.utils import check_tree_command, is_ignored_path, remove_empty_dirs + +def get_tree_structure(path: str = '.', gitignore_spec: Optional[PathSpec] = None, tree_and_content_ignore_spec: Optional[PathSpec] = None) -> str: + """Generate tree structure of the directory. + + Args: + path: Directory path to generate tree for + gitignore_spec: PathSpec object for gitignore patterns + tree_and_content_ignore_spec: PathSpec object for tree and content ignore patterns + + Returns: + str: Generated tree structure + """ + if not check_tree_command(): + return "" + + logging.debug(f'Generating tree structure for path: {path}') + result = subprocess.run(['tree', '-a', '-f', '--noreport', path], stdout=subprocess.PIPE) + tree_output = result.stdout.decode('utf-8') + logging.debug(f'Tree output generated:\n{tree_output}') + + if not gitignore_spec and not tree_and_content_ignore_spec: + logging.debug('No .gitignore or ignore-tree-and-content specification found') + return tree_output + + logging.debug('Filtering tree output based on .gitignore and ignore-tree-and-content specification') + filtered_lines = [] + + for line in tree_output.splitlines(): + idx = line.find('./') + if idx == -1: + idx = line.find(path) + if idx != -1: + full_path = line[idx:].strip() + else: + continue + + if full_path == '.': + continue + + relative_path = os.path.relpath(full_path, path) + relative_path = relative_path.replace(os.sep, '/') + if os.path.isdir(full_path): + relative_path += '/' + + if not should_ignore_file(full_path, relative_path, gitignore_spec, None, tree_and_content_ignore_spec): + display_line = line.replace('./', '', 1) + filtered_lines.append(display_line) + else: + logging.debug(f'Ignored: {relative_path}') + + filtered_tree_output = '\n'.join(filtered_lines) + logging.debug(f'Filtered tree structure:\n{filtered_tree_output}') + logging.debug('Tree structure filtering complete') + return filtered_tree_output + +def load_ignore_specs(path: str = '.', cli_ignore_patterns: Optional[list] = None) -> Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]: + """Load ignore specifications from various sources. + + Args: + path: Base directory path + cli_ignore_patterns: List of patterns from command line + + Returns: + Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]: Tuple of gitignore_spec, content_ignore_spec, and tree_and_content_ignore_spec + """ + gitignore_spec = None + content_ignore_spec = None + tree_and_content_ignore_list = [] + use_gitignore = True + + repo_settings_path = os.path.join(path, '.repo-to-text-settings.yaml') + if os.path.exists(repo_settings_path): + logging.debug(f'Loading .repo-to-text-settings.yaml from path: {repo_settings_path}') + with open(repo_settings_path, 'r') as f: + settings = yaml.safe_load(f) + use_gitignore = settings.get('gitignore-import-and-ignore', True) + if 'ignore-content' in settings: + content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', settings['ignore-content']) + if 'ignore-tree-and-content' in settings: + tree_and_content_ignore_list.extend(settings['ignore-tree-and-content']) + + if cli_ignore_patterns: + tree_and_content_ignore_list.extend(cli_ignore_patterns) + + if use_gitignore: + gitignore_path = os.path.join(path, '.gitignore') + if os.path.exists(gitignore_path): + logging.debug(f'Loading .gitignore from path: {gitignore_path}') + with open(gitignore_path, 'r') as f: + gitignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', f) + + tree_and_content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', tree_and_content_ignore_list) + return gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec + +def should_ignore_file(file_path: str, relative_path: str, gitignore_spec: Optional[PathSpec], + content_ignore_spec: Optional[PathSpec], tree_and_content_ignore_spec: Optional[PathSpec]) -> bool: + """Check if a file should be ignored based on various ignore specifications. + + Args: + file_path: Full path to the file + relative_path: Path relative to the repository root + gitignore_spec: PathSpec object for gitignore patterns + content_ignore_spec: PathSpec object for content ignore patterns + tree_and_content_ignore_spec: PathSpec object for tree and content ignore patterns + + Returns: + bool: True if file should be ignored, False otherwise + """ + relative_path = relative_path.replace(os.sep, '/') + + if relative_path.startswith('./'): + relative_path = relative_path[2:] + + if os.path.isdir(file_path): + relative_path += '/' + + result = ( + is_ignored_path(file_path) or + bool(gitignore_spec and gitignore_spec.match_file(relative_path)) or + bool(content_ignore_spec and content_ignore_spec.match_file(relative_path)) or + bool(tree_and_content_ignore_spec and tree_and_content_ignore_spec.match_file(relative_path)) or + os.path.basename(file_path).startswith('repo-to-text_') + ) + + logging.debug(f'Checking if file should be ignored:') + logging.debug(f' file_path: {file_path}') + logging.debug(f' relative_path: {relative_path}') + logging.debug(f' Result: {result}') + return result + +def save_repo_to_text(path: str = '.', output_dir: Optional[str] = None, to_stdout: bool = False, cli_ignore_patterns: Optional[list] = None) -> str: + """Save repository structure and contents to a text file. + + Args: + path: Repository path + output_dir: Directory to save output file + to_stdout: Whether to output to stdout instead of file + cli_ignore_patterns: List of patterns from command line + + Returns: + str: Path to the output file or the output text if to_stdout is True + """ + logging.debug(f'Starting to save repo structure to text for path: {path}') + gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(path, cli_ignore_patterns) + tree_structure = get_tree_structure(path, gitignore_spec, tree_and_content_ignore_spec) + tree_structure = remove_empty_dirs(tree_structure, path) + logging.debug(f'Final tree structure to be written: {tree_structure}') + + timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-UTC') + output_file = f'repo-to-text_{timestamp}.txt' + + if output_dir: + if not os.path.exists(output_dir): + os.makedirs(output_dir) + output_file = os.path.join(output_dir, output_file) + + output_content = [] + project_name = os.path.basename(os.path.abspath(path)) + output_content.append(f'Directory: {project_name}\n\n') + output_content.append('Directory Structure:\n') + output_content.append('```\n.\n') + + if os.path.exists(os.path.join(path, '.gitignore')): + output_content.append('├── .gitignore\n') + + output_content.append(tree_structure + '\n' + '```\n') + logging.debug('Tree structure written to output content') + + for root, _, files in os.walk(path): + for filename in files: + file_path = os.path.join(root, filename) + relative_path = os.path.relpath(file_path, path) + + if should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec): + continue + + relative_path = relative_path.replace('./', '', 1) + + output_content.append(f'\nContents of {relative_path}:\n') + output_content.append('```\n') + try: + with open(file_path, 'r', encoding='utf-8') as f: + output_content.append(f.read()) + except UnicodeDecodeError: + logging.debug(f'Could not decode file contents: {file_path}') + output_content.append('[Could not decode file contents]\n') + output_content.append('\n```\n') + + output_content.append('\n') + logging.debug('Repository contents written to output content') + + output_text = ''.join(output_content) + + if to_stdout: + print(output_text) + return output_text + + with open(output_file, 'w') as file: + file.write(output_text) + + try: + import importlib.util + if importlib.util.find_spec("pyperclip"): + import pyperclip # type: ignore + pyperclip.copy(output_text) + logging.debug('Repository structure and contents copied to clipboard') + else: + print("Tip: Install 'pyperclip' package to enable automatic clipboard copying:") + print(" pip install pyperclip") + except Exception as e: + logging.warning('Could not copy to clipboard. You might be running this script over SSH or without clipboard support.') + logging.debug(f'Clipboard copy error: {e}') + + print(f"[SUCCESS] Repository structure and contents successfully saved to file: \"./{output_file}\"") + + return output_file \ No newline at end of file diff --git a/repo_to_text/utils/__init__.py b/repo_to_text/utils/__init__.py new file mode 100644 index 0000000..51c6c6e --- /dev/null +++ b/repo_to_text/utils/__init__.py @@ -0,0 +1,3 @@ +from .utils import setup_logging, check_tree_command, is_ignored_path, remove_empty_dirs + +__all__ = ['setup_logging', 'check_tree_command', 'is_ignored_path', 'remove_empty_dirs'] \ No newline at end of file diff --git a/repo_to_text/utils/utils.py b/repo_to_text/utils/utils.py new file mode 100644 index 0000000..b2d663a --- /dev/null +++ b/repo_to_text/utils/utils.py @@ -0,0 +1,82 @@ +import os +import shutil +import logging +from typing import List + +def setup_logging(debug: bool = False) -> None: + """Set up logging configuration. + + Args: + debug: If True, sets logging level to DEBUG, otherwise INFO + """ + logging_level = logging.DEBUG if debug else logging.INFO + logging.basicConfig(level=logging_level, format='%(asctime)s - %(levelname)s - %(message)s') + +def check_tree_command() -> bool: + """Check if the `tree` command is available, and suggest installation if not. + + Returns: + bool: True if tree command is available, False otherwise + """ + if shutil.which('tree') is None: + print("The 'tree' command is not found. Please install it using one of the following commands:") + print("For Debian-based systems (e.g., Ubuntu): sudo apt-get install tree") + print("For Red Hat-based systems (e.g., Fedora, CentOS): sudo yum install tree") + return False + return True + +def is_ignored_path(file_path: str) -> bool: + """Check if a file path should be ignored based on predefined rules. + + Args: + file_path: Path to check + + Returns: + bool: True if path should be ignored, False otherwise + """ + ignored_dirs: List[str] = ['.git'] + ignored_files_prefix: List[str] = ['repo-to-text_'] + is_ignored_dir = any(ignored in file_path for ignored in ignored_dirs) + is_ignored_file = any(file_path.startswith(prefix) for prefix in ignored_files_prefix) + result = is_ignored_dir or is_ignored_file + if result: + logging.debug(f'Path ignored: {file_path}') + return result + +def remove_empty_dirs(tree_output: str, path: str = '.') -> str: + """Remove empty directories from tree output. + + Args: + tree_output: Output from tree command + path: Base path for the tree + + Returns: + str: Tree output with empty directories removed + """ + logging.debug('Removing empty directories from tree output') + lines = tree_output.splitlines() + non_empty_dirs = set() + filtered_lines = [] + + for line in lines: + parts = line.strip().split() + if parts: + full_path = parts[-1] + if os.path.isdir(full_path) and not any(os.path.isfile(os.path.join(full_path, f)) for f in os.listdir(full_path)): + logging.debug(f'Directory is empty and will be removed: {full_path}') + continue + non_empty_dirs.add(os.path.dirname(full_path)) + filtered_lines.append(line) + + final_lines = [] + for line in filtered_lines: + parts = line.strip().split() + if parts: + full_path = parts[-1] + if os.path.isdir(full_path) and full_path not in non_empty_dirs: + logging.debug(f'Directory is empty and will be removed: {full_path}') + continue + final_lines.append(line) + + logging.debug('Empty directory removal complete') + return '\n'.join(filtered_lines) \ No newline at end of file diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..23747e5 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,107 @@ +import os +import pytest +import tempfile +import shutil +from typing import Generator +from unittest.mock import patch, MagicMock +from repo_to_text.cli.cli import ( + create_default_settings_file, + parse_args, + main +) + +@pytest.fixture +def temp_dir() -> Generator[str, None, None]: + """Create a temporary directory for testing.""" + temp_path = tempfile.mkdtemp() + yield temp_path + shutil.rmtree(temp_path) + +def test_parse_args_defaults() -> None: + """Test parsing command line arguments with default values.""" + with patch('sys.argv', ['repo-to-text']): + args = parse_args() + assert args.input_dir == '.' + assert not args.debug + assert args.output_dir is None + assert not args.create_settings + assert not args.stdout + assert args.ignore_patterns is None + +def test_parse_args_with_values() -> None: + """Test parsing command line arguments with provided values.""" + test_args = [ + 'repo-to-text', + 'input/path', + '--debug', + '--output-dir', 'output/path', + '--ignore-patterns', '*.log', 'temp/' + ] + with patch('sys.argv', test_args): + args = parse_args() + assert args.input_dir == 'input/path' + assert args.debug + assert args.output_dir == 'output/path' + assert args.ignore_patterns == ['*.log', 'temp/'] + +def test_create_default_settings_file(temp_dir: str) -> None: + """Test creation of default settings file.""" + os.chdir(temp_dir) + create_default_settings_file() + + settings_file = '.repo-to-text-settings.yaml' + assert os.path.exists(settings_file) + + with open(settings_file, 'r') as f: + content = f.read() + assert 'gitignore-import-and-ignore: True' in content + assert 'ignore-tree-and-content:' in content + assert 'ignore-content:' in content + +def test_create_default_settings_file_already_exists(temp_dir: str) -> None: + """Test handling of existing settings file.""" + os.chdir(temp_dir) + # Create the file first + create_default_settings_file() + + # Try to create it again + with pytest.raises(FileExistsError) as exc_info: + create_default_settings_file() + assert "already exists" in str(exc_info.value) + +@patch('repo_to_text.cli.cli.save_repo_to_text') +def test_main_normal_execution(mock_save_repo: MagicMock) -> None: + """Test main function with normal execution.""" + with patch('sys.argv', ['repo-to-text', '--stdout']): + with pytest.raises(SystemExit) as exc_info: + main() + assert exc_info.value.code == 0 + mock_save_repo.assert_called_once_with( + path='.', + output_dir=None, + to_stdout=True, + cli_ignore_patterns=None + ) + +@patch('repo_to_text.cli.cli.create_default_settings_file') +def test_main_create_settings(mock_create_settings: MagicMock) -> None: + """Test main function with create settings option.""" + with patch('sys.argv', ['repo-to-text', '--create-settings']): + with pytest.raises(SystemExit) as exc_info: + main() + assert exc_info.value.code == 0 + mock_create_settings.assert_called_once() + +@patch('repo_to_text.cli.cli.setup_logging') +@patch('repo_to_text.cli.cli.create_default_settings_file') +def test_main_with_debug_logging(mock_create_settings: MagicMock, mock_setup_logging: MagicMock) -> None: + """Test main function with debug logging enabled.""" + with patch('sys.argv', ['repo-to-text', '--debug', '--create-settings']): + with pytest.raises(SystemExit) as exc_info: + main() + assert exc_info.value.code == 0 + mock_setup_logging.assert_called_once_with(debug=True) + mock_create_settings.assert_called_once() + +if __name__ == "__main__": + pytest.main([__file__]) \ No newline at end of file diff --git a/tests/test_core.py b/tests/test_core.py new file mode 100644 index 0000000..3e5cf9e --- /dev/null +++ b/tests/test_core.py @@ -0,0 +1,285 @@ +import os +import tempfile +import shutil +import pytest +from typing import Generator +from repo_to_text.core.core import ( + get_tree_structure, + load_ignore_specs, + should_ignore_file, + is_ignored_path, + remove_empty_dirs, + save_repo_to_text +) + +@pytest.fixture +def temp_dir() -> Generator[str, None, None]: + """Create a temporary directory for testing.""" + temp_path = tempfile.mkdtemp() + yield temp_path + shutil.rmtree(temp_path) + +@pytest.fixture +def sample_repo(temp_dir: str) -> str: + """Create a sample repository structure for testing.""" + # Create directories + os.makedirs(os.path.join(temp_dir, "src")) + os.makedirs(os.path.join(temp_dir, "tests")) + + # Create sample files + files = { + "README.md": "# Test Project", + ".gitignore": """ +*.pyc +__pycache__/ +.git/ +""", + "src/main.py": "print('Hello World')", + "tests/test_main.py": "def test_sample(): pass", + ".repo-to-text-settings.yaml": """ +gitignore-import-and-ignore: True +ignore-tree-and-content: + - ".git/" + - ".repo-to-text-settings.yaml" +ignore-content: + - "README.md" +""" + } + + for file_path, content in files.items(): + full_path = os.path.join(temp_dir, file_path) + os.makedirs(os.path.dirname(full_path), exist_ok=True) + with open(full_path, "w") as f: + f.write(content) + + return temp_dir + +def test_is_ignored_path() -> None: + """Test the is_ignored_path function.""" + assert is_ignored_path(".git/config") is True + assert is_ignored_path("repo-to-text_output.txt") is True + assert is_ignored_path("src/main.py") is False + assert is_ignored_path("normal_file.txt") is False + +def test_load_ignore_specs(sample_repo: str) -> None: + """Test loading ignore specifications from files.""" + gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo) + + assert gitignore_spec is not None + assert content_ignore_spec is not None + assert tree_and_content_ignore_spec is not None + + # Test gitignore patterns + assert gitignore_spec.match_file("test.pyc") is True + assert gitignore_spec.match_file("__pycache__/cache.py") is True + assert gitignore_spec.match_file(".git/config") is True + + # Test content ignore patterns + assert content_ignore_spec.match_file("README.md") is True + + # Test tree and content ignore patterns + assert tree_and_content_ignore_spec.match_file(".git/config") is True + +def test_should_ignore_file(sample_repo: str) -> None: + """Test file ignoring logic.""" + gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo) + + # Test various file paths + assert should_ignore_file( + ".git/config", + ".git/config", + gitignore_spec, + content_ignore_spec, + tree_and_content_ignore_spec + ) is True + + assert should_ignore_file( + "src/main.py", + "src/main.py", + gitignore_spec, + content_ignore_spec, + tree_and_content_ignore_spec + ) is False + +def test_get_tree_structure(sample_repo: str) -> None: + """Test tree structure generation.""" + gitignore_spec, _, tree_and_content_ignore_spec = load_ignore_specs(sample_repo) + tree_output = get_tree_structure(sample_repo, gitignore_spec, tree_and_content_ignore_spec) + + # Basic structure checks + assert "src" in tree_output + assert "tests" in tree_output + assert "main.py" in tree_output + assert "test_main.py" in tree_output + assert ".git" not in tree_output + +def test_remove_empty_dirs(temp_dir: str) -> None: + """Test removal of empty directories from tree output.""" + # Create test directory structure + os.makedirs(os.path.join(temp_dir, "src")) + os.makedirs(os.path.join(temp_dir, "empty_dir")) + os.makedirs(os.path.join(temp_dir, "tests")) + + # Create some files + with open(os.path.join(temp_dir, "src/main.py"), "w") as f: + f.write("print('test')") + with open(os.path.join(temp_dir, "tests/test_main.py"), "w") as f: + f.write("def test(): pass") + + # Create a mock tree output that matches the actual tree command format + tree_output = ( + f"{temp_dir}\n" + f"├── {os.path.join(temp_dir, 'src')}\n" + f"│ └── {os.path.join(temp_dir, 'src/main.py')}\n" + f"├── {os.path.join(temp_dir, 'empty_dir')}\n" + f"└── {os.path.join(temp_dir, 'tests')}\n" + f" └── {os.path.join(temp_dir, 'tests/test_main.py')}\n" + ) + + filtered_output = remove_empty_dirs(tree_output, temp_dir) + + # Check that empty_dir is removed but other directories remain + assert "empty_dir" not in filtered_output + assert os.path.join(temp_dir, "src") in filtered_output + assert os.path.join(temp_dir, "tests") in filtered_output + assert os.path.join(temp_dir, "src/main.py") in filtered_output + assert os.path.join(temp_dir, "tests/test_main.py") in filtered_output + +def test_save_repo_to_text(sample_repo: str) -> None: + """Test the main save_repo_to_text function.""" + # Create output directory + output_dir = os.path.join(sample_repo, "output") + os.makedirs(output_dir, exist_ok=True) + + # Create .git directory to ensure it's properly ignored + os.makedirs(os.path.join(sample_repo, ".git")) + with open(os.path.join(sample_repo, ".git/config"), "w") as f: + f.write("[core]\n\trepositoryformatversion = 0\n") + + # Test file output + output_file = save_repo_to_text(sample_repo, output_dir=output_dir) + assert os.path.exists(output_file) + assert os.path.dirname(output_file) == output_dir + + # Check file contents + with open(output_file, 'r') as f: + content = f.read() + + # Basic content checks + assert "Directory Structure:" in content + + # Check for expected files + assert "src/main.py" in content + assert "tests/test_main.py" in content + + # Check for file contents + assert "print('Hello World')" in content + assert "def test_sample(): pass" in content + + # Ensure ignored patterns are not in output + assert ".git/config" not in content # Check specific file + assert "repo-to-text_" not in content + assert ".repo-to-text-settings.yaml" not in content + + # Check that .gitignore content is not included + assert "*.pyc" not in content + assert "__pycache__" not in content + +def test_save_repo_to_text_stdout(sample_repo: str) -> None: + """Test save_repo_to_text with stdout output.""" + output = save_repo_to_text(sample_repo, to_stdout=True) + assert isinstance(output, str) + assert "Directory Structure:" in output + assert "src/main.py" in output + assert "tests/test_main.py" in output + +def test_load_ignore_specs_with_cli_patterns(sample_repo: str) -> None: + """Test loading ignore specs with CLI patterns.""" + cli_patterns = ["*.log", "temp/"] + gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo, cli_patterns) + + assert tree_and_content_ignore_spec.match_file("test.log") is True + assert tree_and_content_ignore_spec.match_file("temp/file.txt") is True + assert tree_and_content_ignore_spec.match_file("normal.txt") is False + +def test_load_ignore_specs_without_gitignore(temp_dir: str) -> None: + """Test loading ignore specs when .gitignore is missing.""" + gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(temp_dir) + assert gitignore_spec is None + assert content_ignore_spec is None + assert tree_and_content_ignore_spec is not None + +def test_get_tree_structure_with_special_chars(temp_dir: str) -> None: + """Test tree structure generation with special characters in paths.""" + # Create files with special characters + special_dir = os.path.join(temp_dir, "special chars") + os.makedirs(special_dir) + with open(os.path.join(special_dir, "file with spaces.txt"), "w") as f: + f.write("test") + + tree_output = get_tree_structure(temp_dir) + assert "special chars" in tree_output + assert "file with spaces.txt" in tree_output + +def test_should_ignore_file_edge_cases(sample_repo: str) -> None: + """Test edge cases for should_ignore_file function.""" + gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo) + + # Test with dot-prefixed paths + assert should_ignore_file( + "./src/main.py", + "./src/main.py", + gitignore_spec, + content_ignore_spec, + tree_and_content_ignore_spec + ) is False + + # Test with absolute paths + abs_path = os.path.join(sample_repo, "src/main.py") + rel_path = "src/main.py" + assert should_ignore_file( + abs_path, + rel_path, + gitignore_spec, + content_ignore_spec, + tree_and_content_ignore_spec + ) is False + +def test_save_repo_to_text_with_binary_files(temp_dir: str) -> None: + """Test handling of binary files in save_repo_to_text.""" + # Create a binary file + binary_path = os.path.join(temp_dir, "binary.bin") + binary_content = b'\x00\x01\x02\x03' + with open(binary_path, "wb") as f: + f.write(binary_content) + + output = save_repo_to_text(temp_dir, to_stdout=True) + + # Check that the binary file is listed in the structure + assert "binary.bin" in output + # Check that the file content section exists with raw binary content + expected_content = f"Contents of binary.bin:\n```\n{binary_content.decode('latin1')}\n```" + assert expected_content in output + +def test_save_repo_to_text_custom_output_dir(temp_dir: str) -> None: + """Test save_repo_to_text with custom output directory.""" + # Create a simple file structure + with open(os.path.join(temp_dir, "test.txt"), "w") as f: + f.write("test content") + + # Create custom output directory + output_dir = os.path.join(temp_dir, "custom_output") + output_file = save_repo_to_text(temp_dir, output_dir=output_dir) + + assert os.path.exists(output_file) + assert os.path.dirname(output_file) == output_dir + assert output_file.startswith(output_dir) + +def test_get_tree_structure_empty_directory(temp_dir: str) -> None: + """Test tree structure generation for empty directory.""" + tree_output = get_tree_structure(temp_dir) + # Should only contain the directory itself + assert tree_output.strip() == "" or tree_output.strip() == temp_dir + +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..c6a5ff8 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,142 @@ +import logging +import pytest +from typing import Generator +from repo_to_text.utils.utils import setup_logging + +@pytest.fixture(autouse=True) +def reset_logger() -> Generator[None, None, None]: + """Reset root logger before each test.""" + root_logger = logging.getLogger() + for handler in root_logger.handlers[:]: + root_logger.removeHandler(handler) + root_logger.setLevel(logging.WARNING) # Default level + yield + for handler in root_logger.handlers[:]: + root_logger.removeHandler(handler) + root_logger.setLevel(logging.WARNING) # Reset after test + +def test_setup_logging_debug() -> None: + """Test setup_logging with debug mode.""" + root_logger = logging.getLogger() + root_logger.handlers.clear() # Clear existing handlers + root_logger.setLevel(logging.WARNING) # Reset to default + + setup_logging(debug=True) + assert len(root_logger.handlers) > 0 + assert root_logger.level == logging.DEBUG + +def test_setup_logging_info() -> None: + """Test setup_logging with info mode.""" + root_logger = logging.getLogger() + root_logger.handlers.clear() # Clear existing handlers + root_logger.setLevel(logging.WARNING) # Reset to default + + setup_logging(debug=False) + assert len(root_logger.handlers) > 0 + assert root_logger.level == logging.INFO + +def test_setup_logging_formatter() -> None: + """Test logging formatter setup.""" + setup_logging(debug=True) + logger = logging.getLogger() + handlers = logger.handlers + + # Check if there's at least one handler + assert len(handlers) > 0 + + # Check formatter + formatter = handlers[0].formatter + assert formatter is not None + + # Test format string + test_record = logging.LogRecord( + name='test', + level=logging.DEBUG, + pathname='test.py', + lineno=1, + msg='Test message', + args=(), + exc_info=None + ) + formatted = formatter.format(test_record) + assert 'Test message' in formatted + assert test_record.levelname in formatted + +def test_setup_logging_multiple_calls() -> None: + """Test that multiple calls to setup_logging don't create duplicate handlers.""" + root_logger = logging.getLogger() + root_logger.handlers.clear() + + setup_logging(debug=True) + initial_handler_count = len(root_logger.handlers) + + # Call setup_logging again + setup_logging(debug=True) + assert len(root_logger.handlers) == initial_handler_count, "Should not create duplicate handlers" + +def test_setup_logging_level_change() -> None: + """Test changing log levels between setup_logging calls.""" + root_logger = logging.getLogger() + root_logger.handlers.clear() + + # Start with debug + setup_logging(debug=True) + assert root_logger.level == logging.DEBUG + + # Clear handlers before next setup + root_logger.handlers.clear() + + # Switch to info + setup_logging(debug=False) + assert root_logger.level == logging.INFO + +def test_setup_logging_message_format() -> None: + """Test the actual format of logged messages.""" + setup_logging(debug=True) + logger = logging.getLogger() + + # Create a temporary handler to capture output + import io + log_capture = io.StringIO() + handler = logging.StreamHandler(log_capture) + # Use formatter that includes pathname + handler.setFormatter(logging.Formatter('%(levelname)s %(name)s:%(pathname)s:%(lineno)d %(message)s')) + logger.addHandler(handler) + + # Ensure debug level is set + logger.setLevel(logging.DEBUG) + handler.setLevel(logging.DEBUG) + + # Log a test message + test_message = "Test log message" + logger.debug(test_message) + log_output = log_capture.getvalue() + + # Verify format components + assert test_message in log_output + assert "DEBUG" in log_output + assert "test_utils.py" in log_output + +def test_setup_logging_error_messages() -> None: + """Test logging of error messages.""" + setup_logging(debug=False) + logger = logging.getLogger() + + # Create a temporary handler to capture output + import io + log_capture = io.StringIO() + handler = logging.StreamHandler(log_capture) + handler.setFormatter(logger.handlers[0].formatter) + logger.addHandler(handler) + + # Log an error message + error_message = "Test error message" + logger.error(error_message) + log_output = log_capture.getvalue() + + # Error messages should always be logged regardless of debug setting + assert error_message in log_output + assert "ERROR" in log_output + +if __name__ == "__main__": + pytest.main([__file__]) \ No newline at end of file