Refactor devide logic by files and more tests

2026-03-11 09:57:13 -07:00 · 2024-12-16 01:29:31 +01:00 · 2024-12-16 01:29:31 +01:00 · dbfa602cd3
commit dbfa602cd3
parent 6a434e5174
9 changed files with 922 additions and 0 deletions
--- a/repo_to_text/cli/init.py
+++ b/repo_to_text/cli/init.py
@ -0,0 +1,3 @@
+from .cli import create_default_settings_file, parse_args, main
+
+__all__ = ['create_default_settings_file', 'parse_args', 'main'] 
--- a/repo_to_text/cli/cli.py
+++ b/repo_to_text/cli/cli.py
@ -0,0 +1,71 @@
+import argparse
+import textwrap
+import os
+import logging
+from typing import NoReturn
+
+from ..utils.utils import setup_logging
+from ..core.core import save_repo_to_text
+
+def create_default_settings_file() -> None:
+    """Create a default .repo-to-text-settings.yaml file."""
+    settings_file = '.repo-to-text-settings.yaml'
+    if os.path.exists(settings_file):
+        raise FileExistsError(f"The settings file '{settings_file}' already exists. Please remove it or rename it if you want to create a new default settings file.")
+    
+    default_settings = textwrap.dedent("""\
+        # Details: https://github.com/kirill-markin/repo-to-text
+        # Syntax: gitignore rules
+
+        # Ignore files and directories for all sections from gitignore file
+        # Default: True
+        gitignore-import-and-ignore: True
+
+        # Ignore files and directories for tree
+        # and "Contents of ..." sections
+        ignore-tree-and-content:
+          - ".repo-to-text-settings.yaml"
+
+        # Ignore files and directories for "Contents of ..." section
+        ignore-content:
+          - "README.md"
+          - "LICENSE"
+    """)
+    with open('.repo-to-text-settings.yaml', 'w') as f:
+        f.write(default_settings)
+    print("Default .repo-to-text-settings.yaml created.")
+
+def parse_args() -> argparse.Namespace:
+    """Parse command line arguments.
+    
+    Returns:
+        argparse.Namespace: Parsed command line arguments
+    """
+    parser = argparse.ArgumentParser(description='Convert repository structure and contents to text')
+    parser.add_argument('input_dir', nargs='?', default='.', help='Directory to process')
+    parser.add_argument('--debug', action='store_true', help='Enable debug logging')
+    parser.add_argument('--output-dir', type=str, help='Directory to save the output file')
+    parser.add_argument('--create-settings', '--init', action='store_true', help='Create default .repo-to-text-settings.yaml file')
+    parser.add_argument('--stdout', action='store_true', help='Output to stdout instead of a file')
+    parser.add_argument('--ignore-patterns', nargs='*', help="List of files or directories to ignore in both tree and content sections. Supports wildcards (e.g., '*').")
+    return parser.parse_args()
+
+def main() -> NoReturn:
+    """Main entry point for the CLI."""
+    args = parse_args()
+    setup_logging(debug=args.debug)
+    logging.debug('repo-to-text script started')
+    
+    if args.create_settings:
+        create_default_settings_file()
+        logging.debug('.repo-to-text-settings.yaml file created')
+    else:
+        save_repo_to_text(
+            path=args.input_dir,
+            output_dir=args.output_dir,
+            to_stdout=args.stdout,
+            cli_ignore_patterns=args.ignore_patterns
+        )
+    
+    logging.debug('repo-to-text script finished')
+    exit(0) 
--- a/repo_to_text/core/init.py
+++ b/repo_to_text/core/init.py
@ -0,0 +1,3 @@
+from .core import get_tree_structure, load_ignore_specs, should_ignore_file, save_repo_to_text
+
+__all__ = ['get_tree_structure', 'load_ignore_specs', 'should_ignore_file', 'save_repo_to_text'] 
--- a/repo_to_text/core/core.py
+++ b/repo_to_text/core/core.py
@ -0,0 +1,226 @@
+import os
+import subprocess
+import logging
+import yaml
+from datetime import datetime, timezone
+from typing import Tuple, Optional
+import pathspec
+from pathspec import PathSpec
+
+from ..utils.utils import check_tree_command, is_ignored_path, remove_empty_dirs
+
+def get_tree_structure(path: str = '.', gitignore_spec: Optional[PathSpec] = None, tree_and_content_ignore_spec: Optional[PathSpec] = None) -> str:
+    """Generate tree structure of the directory.
+    
+    Args:
+        path: Directory path to generate tree for
+        gitignore_spec: PathSpec object for gitignore patterns
+        tree_and_content_ignore_spec: PathSpec object for tree and content ignore patterns
+        
+    Returns:
+        str: Generated tree structure
+    """
+    if not check_tree_command():
+        return ""
+    
+    logging.debug(f'Generating tree structure for path: {path}')
+    result = subprocess.run(['tree', '-a', '-f', '--noreport', path], stdout=subprocess.PIPE)
+    tree_output = result.stdout.decode('utf-8')
+    logging.debug(f'Tree output generated:\n{tree_output}')
+
+    if not gitignore_spec and not tree_and_content_ignore_spec:
+        logging.debug('No .gitignore or ignore-tree-and-content specification found')
+        return tree_output
+
+    logging.debug('Filtering tree output based on .gitignore and ignore-tree-and-content specification')
+    filtered_lines = []
+
+    for line in tree_output.splitlines():
+        idx = line.find('./')
+        if idx == -1:
+            idx = line.find(path)
+        if idx != -1:
+            full_path = line[idx:].strip()
+        else:
+            continue
+        
+        if full_path == '.':
+            continue
+
+        relative_path = os.path.relpath(full_path, path)
+        relative_path = relative_path.replace(os.sep, '/')
+        if os.path.isdir(full_path):
+            relative_path += '/'
+
+        if not should_ignore_file(full_path, relative_path, gitignore_spec, None, tree_and_content_ignore_spec):
+            display_line = line.replace('./', '', 1)
+            filtered_lines.append(display_line)
+        else:
+            logging.debug(f'Ignored: {relative_path}')
+
+    filtered_tree_output = '\n'.join(filtered_lines)
+    logging.debug(f'Filtered tree structure:\n{filtered_tree_output}')
+    logging.debug('Tree structure filtering complete')
+    return filtered_tree_output
+
+def load_ignore_specs(path: str = '.', cli_ignore_patterns: Optional[list] = None) -> Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]:
+    """Load ignore specifications from various sources.
+    
+    Args:
+        path: Base directory path
+        cli_ignore_patterns: List of patterns from command line
+        
+    Returns:
+        Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]: Tuple of gitignore_spec, content_ignore_spec, and tree_and_content_ignore_spec
+    """
+    gitignore_spec = None
+    content_ignore_spec = None
+    tree_and_content_ignore_list = []
+    use_gitignore = True
+
+    repo_settings_path = os.path.join(path, '.repo-to-text-settings.yaml')
+    if os.path.exists(repo_settings_path):
+        logging.debug(f'Loading .repo-to-text-settings.yaml from path: {repo_settings_path}')
+        with open(repo_settings_path, 'r') as f:
+            settings = yaml.safe_load(f)
+            use_gitignore = settings.get('gitignore-import-and-ignore', True)
+            if 'ignore-content' in settings:
+                content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', settings['ignore-content'])
+            if 'ignore-tree-and-content' in settings:
+                tree_and_content_ignore_list.extend(settings['ignore-tree-and-content'])
+
+    if cli_ignore_patterns:
+        tree_and_content_ignore_list.extend(cli_ignore_patterns)
+
+    if use_gitignore:
+        gitignore_path = os.path.join(path, '.gitignore')
+        if os.path.exists(gitignore_path):
+            logging.debug(f'Loading .gitignore from path: {gitignore_path}')
+            with open(gitignore_path, 'r') as f:
+                gitignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', f)
+
+    tree_and_content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', tree_and_content_ignore_list)
+    return gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec
+
+def should_ignore_file(file_path: str, relative_path: str, gitignore_spec: Optional[PathSpec], 
+                      content_ignore_spec: Optional[PathSpec], tree_and_content_ignore_spec: Optional[PathSpec]) -> bool:
+    """Check if a file should be ignored based on various ignore specifications.
+    
+    Args:
+        file_path: Full path to the file
+        relative_path: Path relative to the repository root
+        gitignore_spec: PathSpec object for gitignore patterns
+        content_ignore_spec: PathSpec object for content ignore patterns
+        tree_and_content_ignore_spec: PathSpec object for tree and content ignore patterns
+        
+    Returns:
+        bool: True if file should be ignored, False otherwise
+    """
+    relative_path = relative_path.replace(os.sep, '/')
+
+    if relative_path.startswith('./'):
+        relative_path = relative_path[2:]
+
+    if os.path.isdir(file_path):
+        relative_path += '/'
+
+    result = (
+        is_ignored_path(file_path) or
+        bool(gitignore_spec and gitignore_spec.match_file(relative_path)) or
+        bool(content_ignore_spec and content_ignore_spec.match_file(relative_path)) or
+        bool(tree_and_content_ignore_spec and tree_and_content_ignore_spec.match_file(relative_path)) or
+        os.path.basename(file_path).startswith('repo-to-text_')
+    )
+
+    logging.debug(f'Checking if file should be ignored:')
+    logging.debug(f'    file_path: {file_path}')
+    logging.debug(f'    relative_path: {relative_path}')
+    logging.debug(f'    Result: {result}')
+    return result
+
+def save_repo_to_text(path: str = '.', output_dir: Optional[str] = None, to_stdout: bool = False, cli_ignore_patterns: Optional[list] = None) -> str:
+    """Save repository structure and contents to a text file.
+    
+    Args:
+        path: Repository path
+        output_dir: Directory to save output file
+        to_stdout: Whether to output to stdout instead of file
+        cli_ignore_patterns: List of patterns from command line
+        
+    Returns:
+        str: Path to the output file or the output text if to_stdout is True
+    """
+    logging.debug(f'Starting to save repo structure to text for path: {path}')
+    gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(path, cli_ignore_patterns)
+    tree_structure = get_tree_structure(path, gitignore_spec, tree_and_content_ignore_spec)
+    tree_structure = remove_empty_dirs(tree_structure, path)
+    logging.debug(f'Final tree structure to be written: {tree_structure}')
+    
+    timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-UTC')
+    output_file = f'repo-to-text_{timestamp}.txt'
+    
+    if output_dir:
+        if not os.path.exists(output_dir):
+            os.makedirs(output_dir)
+        output_file = os.path.join(output_dir, output_file)
+    
+    output_content = []
+    project_name = os.path.basename(os.path.abspath(path))
+    output_content.append(f'Directory: {project_name}\n\n')
+    output_content.append('Directory Structure:\n')
+    output_content.append('```\n.\n')
+
+    if os.path.exists(os.path.join(path, '.gitignore')):
+        output_content.append('├── .gitignore\n')
+    
+    output_content.append(tree_structure + '\n' + '```\n')
+    logging.debug('Tree structure written to output content')
+
+    for root, _, files in os.walk(path):
+        for filename in files:
+            file_path = os.path.join(root, filename)
+            relative_path = os.path.relpath(file_path, path)
+            
+            if should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec):
+                continue
+
+            relative_path = relative_path.replace('./', '', 1)
+            
+            output_content.append(f'\nContents of {relative_path}:\n')
+            output_content.append('```\n')
+            try:
+                with open(file_path, 'r', encoding='utf-8') as f:
+                    output_content.append(f.read())
+            except UnicodeDecodeError:
+                logging.debug(f'Could not decode file contents: {file_path}')
+                output_content.append('[Could not decode file contents]\n')
+            output_content.append('\n```\n')
+
+    output_content.append('\n')
+    logging.debug('Repository contents written to output content')
+    
+    output_text = ''.join(output_content)
+    
+    if to_stdout:
+        print(output_text)
+        return output_text
+
+    with open(output_file, 'w') as file:
+        file.write(output_text)
+    
+    try:
+        import importlib.util
+        if importlib.util.find_spec("pyperclip"):
+            import pyperclip # type: ignore
+            pyperclip.copy(output_text)
+            logging.debug('Repository structure and contents copied to clipboard')
+        else:
+            print("Tip: Install 'pyperclip' package to enable automatic clipboard copying:")
+            print("     pip install pyperclip")
+    except Exception as e:
+        logging.warning('Could not copy to clipboard. You might be running this script over SSH or without clipboard support.')
+        logging.debug(f'Clipboard copy error: {e}')
+    
+    print(f"[SUCCESS] Repository structure and contents successfully saved to file: \"./{output_file}\"")
+    
+    return output_file 
--- a/repo_to_text/utils/init.py
+++ b/repo_to_text/utils/init.py
@ -0,0 +1,3 @@
+from .utils import setup_logging, check_tree_command, is_ignored_path, remove_empty_dirs
+
+__all__ = ['setup_logging', 'check_tree_command', 'is_ignored_path', 'remove_empty_dirs'] 
--- a/repo_to_text/utils/utils.py
+++ b/repo_to_text/utils/utils.py
@ -0,0 +1,82 @@
+import os
+import shutil
+import logging
+from typing import List
+
+def setup_logging(debug: bool = False) -> None:
+    """Set up logging configuration.
+    
+    Args:
+        debug: If True, sets logging level to DEBUG, otherwise INFO
+    """
+    logging_level = logging.DEBUG if debug else logging.INFO
+    logging.basicConfig(level=logging_level, format='%(asctime)s - %(levelname)s - %(message)s')
+
+def check_tree_command() -> bool:
+    """Check if the `tree` command is available, and suggest installation if not.
+    
+    Returns:
+        bool: True if tree command is available, False otherwise
+    """
+    if shutil.which('tree') is None:
+        print("The 'tree' command is not found. Please install it using one of the following commands:")
+        print("For Debian-based systems (e.g., Ubuntu): sudo apt-get install tree")
+        print("For Red Hat-based systems (e.g., Fedora, CentOS): sudo yum install tree")
+        return False
+    return True
+
+def is_ignored_path(file_path: str) -> bool:
+    """Check if a file path should be ignored based on predefined rules.
+    
+    Args:
+        file_path: Path to check
+        
+    Returns:
+        bool: True if path should be ignored, False otherwise
+    """
+    ignored_dirs: List[str] = ['.git']
+    ignored_files_prefix: List[str] = ['repo-to-text_']
+    is_ignored_dir = any(ignored in file_path for ignored in ignored_dirs)
+    is_ignored_file = any(file_path.startswith(prefix) for prefix in ignored_files_prefix)
+    result = is_ignored_dir or is_ignored_file
+    if result:
+        logging.debug(f'Path ignored: {file_path}')
+    return result
+
+def remove_empty_dirs(tree_output: str, path: str = '.') -> str:
+    """Remove empty directories from tree output.
+    
+    Args:
+        tree_output: Output from tree command
+        path: Base path for the tree
+        
+    Returns:
+        str: Tree output with empty directories removed
+    """
+    logging.debug('Removing empty directories from tree output')
+    lines = tree_output.splitlines()
+    non_empty_dirs = set()
+    filtered_lines = []
+
+    for line in lines:
+        parts = line.strip().split()
+        if parts:
+            full_path = parts[-1]
+            if os.path.isdir(full_path) and not any(os.path.isfile(os.path.join(full_path, f)) for f in os.listdir(full_path)):
+                logging.debug(f'Directory is empty and will be removed: {full_path}')
+                continue
+            non_empty_dirs.add(os.path.dirname(full_path))
+            filtered_lines.append(line)
+    
+    final_lines = []
+    for line in filtered_lines:
+        parts = line.strip().split()
+        if parts:
+            full_path = parts[-1]
+            if os.path.isdir(full_path) and full_path not in non_empty_dirs:
+                logging.debug(f'Directory is empty and will be removed: {full_path}')
+                continue
+            final_lines.append(line)
+    
+    logging.debug('Empty directory removal complete')
+    return '\n'.join(filtered_lines) 
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@ -0,0 +1,107 @@
+import os
+import pytest
+import tempfile
+import shutil
+from typing import Generator
+from unittest.mock import patch, MagicMock
+from repo_to_text.cli.cli import (
+    create_default_settings_file,
+    parse_args,
+    main
+)
+
+@pytest.fixture
+def temp_dir() -> Generator[str, None, None]:
+    """Create a temporary directory for testing."""
+    temp_path = tempfile.mkdtemp()
+    yield temp_path
+    shutil.rmtree(temp_path)
+
+def test_parse_args_defaults() -> None:
+    """Test parsing command line arguments with default values."""
+    with patch('sys.argv', ['repo-to-text']):
+        args = parse_args()
+        assert args.input_dir == '.'
+        assert not args.debug
+        assert args.output_dir is None
+        assert not args.create_settings
+        assert not args.stdout
+        assert args.ignore_patterns is None
+
+def test_parse_args_with_values() -> None:
+    """Test parsing command line arguments with provided values."""
+    test_args = [
+        'repo-to-text',
+        'input/path',
+        '--debug',
+        '--output-dir', 'output/path',
+        '--ignore-patterns', '*.log', 'temp/'
+    ]
+    with patch('sys.argv', test_args):
+        args = parse_args()
+        assert args.input_dir == 'input/path'
+        assert args.debug
+        assert args.output_dir == 'output/path'
+        assert args.ignore_patterns == ['*.log', 'temp/']
+
+def test_create_default_settings_file(temp_dir: str) -> None:
+    """Test creation of default settings file."""
+    os.chdir(temp_dir)
+    create_default_settings_file()
+    
+    settings_file = '.repo-to-text-settings.yaml'
+    assert os.path.exists(settings_file)
+    
+    with open(settings_file, 'r') as f:
+        content = f.read()
+        assert 'gitignore-import-and-ignore: True' in content
+        assert 'ignore-tree-and-content:' in content
+        assert 'ignore-content:' in content
+
+def test_create_default_settings_file_already_exists(temp_dir: str) -> None:
+    """Test handling of existing settings file."""
+    os.chdir(temp_dir)
+    # Create the file first
+    create_default_settings_file()
+    
+    # Try to create it again
+    with pytest.raises(FileExistsError) as exc_info:
+        create_default_settings_file()
+    assert "already exists" in str(exc_info.value)
+
+@patch('repo_to_text.cli.cli.save_repo_to_text')
+def test_main_normal_execution(mock_save_repo: MagicMock) -> None:
+    """Test main function with normal execution."""
+    with patch('sys.argv', ['repo-to-text', '--stdout']):
+        with pytest.raises(SystemExit) as exc_info:
+            main()
+        assert exc_info.value.code == 0
+        mock_save_repo.assert_called_once_with(
+            path='.',
+            output_dir=None,
+            to_stdout=True,
+            cli_ignore_patterns=None
+        )
+
+@patch('repo_to_text.cli.cli.create_default_settings_file')
+def test_main_create_settings(mock_create_settings: MagicMock) -> None:
+    """Test main function with create settings option."""
+    with patch('sys.argv', ['repo-to-text', '--create-settings']):
+        with pytest.raises(SystemExit) as exc_info:
+            main()
+        assert exc_info.value.code == 0
+        mock_create_settings.assert_called_once()
+
+@patch('repo_to_text.cli.cli.setup_logging')
+@patch('repo_to_text.cli.cli.create_default_settings_file')
+def test_main_with_debug_logging(mock_create_settings: MagicMock, mock_setup_logging: MagicMock) -> None:
+    """Test main function with debug logging enabled."""
+    with patch('sys.argv', ['repo-to-text', '--debug', '--create-settings']):
+        with pytest.raises(SystemExit) as exc_info:
+            main()
+        assert exc_info.value.code == 0
+        mock_setup_logging.assert_called_once_with(debug=True)
+        mock_create_settings.assert_called_once()
+
+if __name__ == "__main__":
+    pytest.main([__file__]) 
--- a/tests/test_core.py
+++ b/tests/test_core.py
@ -0,0 +1,285 @@
+import os
+import tempfile
+import shutil
+import pytest
+from typing import Generator
+from repo_to_text.core.core import (
+    get_tree_structure,
+    load_ignore_specs,
+    should_ignore_file,
+    is_ignored_path,
+    remove_empty_dirs,
+    save_repo_to_text
+)
+
+@pytest.fixture
+def temp_dir() -> Generator[str, None, None]:
+    """Create a temporary directory for testing."""
+    temp_path = tempfile.mkdtemp()
+    yield temp_path
+    shutil.rmtree(temp_path)
+
+@pytest.fixture
+def sample_repo(temp_dir: str) -> str:
+    """Create a sample repository structure for testing."""
+    # Create directories
+    os.makedirs(os.path.join(temp_dir, "src"))
+    os.makedirs(os.path.join(temp_dir, "tests"))
+    
+    # Create sample files
+    files = {
+        "README.md": "# Test Project",
+        ".gitignore": """
+*.pyc
+__pycache__/
+.git/
+""",
+        "src/main.py": "print('Hello World')",
+        "tests/test_main.py": "def test_sample(): pass",
+        ".repo-to-text-settings.yaml": """
+gitignore-import-and-ignore: True
+ignore-tree-and-content:
+  - ".git/"
+  - ".repo-to-text-settings.yaml"
+ignore-content:
+  - "README.md"
+"""
+    }
+    
+    for file_path, content in files.items():
+        full_path = os.path.join(temp_dir, file_path)
+        os.makedirs(os.path.dirname(full_path), exist_ok=True)
+        with open(full_path, "w") as f:
+            f.write(content)
+    
+    return temp_dir
+
+def test_is_ignored_path() -> None:
+    """Test the is_ignored_path function."""
+    assert is_ignored_path(".git/config") is True
+    assert is_ignored_path("repo-to-text_output.txt") is True
+    assert is_ignored_path("src/main.py") is False
+    assert is_ignored_path("normal_file.txt") is False
+
+def test_load_ignore_specs(sample_repo: str) -> None:
+    """Test loading ignore specifications from files."""
+    gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo)
+    
+    assert gitignore_spec is not None
+    assert content_ignore_spec is not None
+    assert tree_and_content_ignore_spec is not None
+    
+    # Test gitignore patterns
+    assert gitignore_spec.match_file("test.pyc") is True
+    assert gitignore_spec.match_file("__pycache__/cache.py") is True
+    assert gitignore_spec.match_file(".git/config") is True
+    
+    # Test content ignore patterns
+    assert content_ignore_spec.match_file("README.md") is True
+    
+    # Test tree and content ignore patterns
+    assert tree_and_content_ignore_spec.match_file(".git/config") is True
+
+def test_should_ignore_file(sample_repo: str) -> None:
+    """Test file ignoring logic."""
+    gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo)
+    
+    # Test various file paths
+    assert should_ignore_file(
+        ".git/config",
+        ".git/config",
+        gitignore_spec,
+        content_ignore_spec,
+        tree_and_content_ignore_spec
+    ) is True
+    
+    assert should_ignore_file(
+        "src/main.py",
+        "src/main.py",
+        gitignore_spec,
+        content_ignore_spec,
+        tree_and_content_ignore_spec
+    ) is False
+
+def test_get_tree_structure(sample_repo: str) -> None:
+    """Test tree structure generation."""
+    gitignore_spec, _, tree_and_content_ignore_spec = load_ignore_specs(sample_repo)
+    tree_output = get_tree_structure(sample_repo, gitignore_spec, tree_and_content_ignore_spec)
+    
+    # Basic structure checks
+    assert "src" in tree_output
+    assert "tests" in tree_output
+    assert "main.py" in tree_output
+    assert "test_main.py" in tree_output
+    assert ".git" not in tree_output
+
+def test_remove_empty_dirs(temp_dir: str) -> None:
+    """Test removal of empty directories from tree output."""
+    # Create test directory structure
+    os.makedirs(os.path.join(temp_dir, "src"))
+    os.makedirs(os.path.join(temp_dir, "empty_dir"))
+    os.makedirs(os.path.join(temp_dir, "tests"))
+    
+    # Create some files
+    with open(os.path.join(temp_dir, "src/main.py"), "w") as f:
+        f.write("print('test')")
+    with open(os.path.join(temp_dir, "tests/test_main.py"), "w") as f:
+        f.write("def test(): pass")
+    
+    # Create a mock tree output that matches the actual tree command format
+    tree_output = (
+        f"{temp_dir}\n"
+        f"├── {os.path.join(temp_dir, 'src')}\n"
+        f"│   └── {os.path.join(temp_dir, 'src/main.py')}\n"
+        f"├── {os.path.join(temp_dir, 'empty_dir')}\n"
+        f"└── {os.path.join(temp_dir, 'tests')}\n"
+        f"    └── {os.path.join(temp_dir, 'tests/test_main.py')}\n"
+    )
+    
+    filtered_output = remove_empty_dirs(tree_output, temp_dir)
+    
+    # Check that empty_dir is removed but other directories remain
+    assert "empty_dir" not in filtered_output
+    assert os.path.join(temp_dir, "src") in filtered_output
+    assert os.path.join(temp_dir, "tests") in filtered_output
+    assert os.path.join(temp_dir, "src/main.py") in filtered_output
+    assert os.path.join(temp_dir, "tests/test_main.py") in filtered_output
+
+def test_save_repo_to_text(sample_repo: str) -> None:
+    """Test the main save_repo_to_text function."""
+    # Create output directory
+    output_dir = os.path.join(sample_repo, "output")
+    os.makedirs(output_dir, exist_ok=True)
+    
+    # Create .git directory to ensure it's properly ignored
+    os.makedirs(os.path.join(sample_repo, ".git"))
+    with open(os.path.join(sample_repo, ".git/config"), "w") as f:
+        f.write("[core]\n\trepositoryformatversion = 0\n")
+    
+    # Test file output
+    output_file = save_repo_to_text(sample_repo, output_dir=output_dir)
+    assert os.path.exists(output_file)
+    assert os.path.dirname(output_file) == output_dir
+    
+    # Check file contents
+    with open(output_file, 'r') as f:
+        content = f.read()
+        
+        # Basic content checks
+        assert "Directory Structure:" in content
+        
+        # Check for expected files
+        assert "src/main.py" in content
+        assert "tests/test_main.py" in content
+        
+        # Check for file contents
+        assert "print('Hello World')" in content
+        assert "def test_sample(): pass" in content
+        
+        # Ensure ignored patterns are not in output
+        assert ".git/config" not in content  # Check specific file
+        assert "repo-to-text_" not in content
+        assert ".repo-to-text-settings.yaml" not in content
+        
+        # Check that .gitignore content is not included
+        assert "*.pyc" not in content
+        assert "__pycache__" not in content
+
+def test_save_repo_to_text_stdout(sample_repo: str) -> None:
+    """Test save_repo_to_text with stdout output."""
+    output = save_repo_to_text(sample_repo, to_stdout=True)
+    assert isinstance(output, str)
+    assert "Directory Structure:" in output
+    assert "src/main.py" in output
+    assert "tests/test_main.py" in output
+
+def test_load_ignore_specs_with_cli_patterns(sample_repo: str) -> None:
+    """Test loading ignore specs with CLI patterns."""
+    cli_patterns = ["*.log", "temp/"]
+    gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo, cli_patterns)
+    
+    assert tree_and_content_ignore_spec.match_file("test.log") is True
+    assert tree_and_content_ignore_spec.match_file("temp/file.txt") is True
+    assert tree_and_content_ignore_spec.match_file("normal.txt") is False
+
+def test_load_ignore_specs_without_gitignore(temp_dir: str) -> None:
+    """Test loading ignore specs when .gitignore is missing."""
+    gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(temp_dir)
+    assert gitignore_spec is None
+    assert content_ignore_spec is None
+    assert tree_and_content_ignore_spec is not None
+
+def test_get_tree_structure_with_special_chars(temp_dir: str) -> None:
+    """Test tree structure generation with special characters in paths."""
+    # Create files with special characters
+    special_dir = os.path.join(temp_dir, "special chars")
+    os.makedirs(special_dir)
+    with open(os.path.join(special_dir, "file with spaces.txt"), "w") as f:
+        f.write("test")
+    
+    tree_output = get_tree_structure(temp_dir)
+    assert "special chars" in tree_output
+    assert "file with spaces.txt" in tree_output
+
+def test_should_ignore_file_edge_cases(sample_repo: str) -> None:
+    """Test edge cases for should_ignore_file function."""
+    gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo)
+    
+    # Test with dot-prefixed paths
+    assert should_ignore_file(
+        "./src/main.py",
+        "./src/main.py",
+        gitignore_spec,
+        content_ignore_spec,
+        tree_and_content_ignore_spec
+    ) is False
+    
+    # Test with absolute paths
+    abs_path = os.path.join(sample_repo, "src/main.py")
+    rel_path = "src/main.py"
+    assert should_ignore_file(
+        abs_path,
+        rel_path,
+        gitignore_spec,
+        content_ignore_spec,
+        tree_and_content_ignore_spec
+    ) is False
+
+def test_save_repo_to_text_with_binary_files(temp_dir: str) -> None:
+    """Test handling of binary files in save_repo_to_text."""
+    # Create a binary file
+    binary_path = os.path.join(temp_dir, "binary.bin")
+    binary_content = b'\x00\x01\x02\x03'
+    with open(binary_path, "wb") as f:
+        f.write(binary_content)
+    
+    output = save_repo_to_text(temp_dir, to_stdout=True)
+    
+    # Check that the binary file is listed in the structure
+    assert "binary.bin" in output
+    # Check that the file content section exists with raw binary content
+    expected_content = f"Contents of binary.bin:\n```\n{binary_content.decode('latin1')}\n```"
+    assert expected_content in output
+
+def test_save_repo_to_text_custom_output_dir(temp_dir: str) -> None:
+    """Test save_repo_to_text with custom output directory."""
+    # Create a simple file structure
+    with open(os.path.join(temp_dir, "test.txt"), "w") as f:
+        f.write("test content")
+    
+    # Create custom output directory
+    output_dir = os.path.join(temp_dir, "custom_output")
+    output_file = save_repo_to_text(temp_dir, output_dir=output_dir)
+    
+    assert os.path.exists(output_file)
+    assert os.path.dirname(output_file) == output_dir
+    assert output_file.startswith(output_dir)
+
+def test_get_tree_structure_empty_directory(temp_dir: str) -> None:
+    """Test tree structure generation for empty directory."""
+    tree_output = get_tree_structure(temp_dir)
+    # Should only contain the directory itself
+    assert tree_output.strip() == "" or tree_output.strip() == temp_dir
+
+if __name__ == "__main__":
+    pytest.main([__file__])
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@ -0,0 +1,142 @@
+import logging
+import pytest
+from typing import Generator
+from repo_to_text.utils.utils import setup_logging
+
+@pytest.fixture(autouse=True)
+def reset_logger() -> Generator[None, None, None]:
+    """Reset root logger before each test."""
+    root_logger = logging.getLogger()
+    for handler in root_logger.handlers[:]:
+        root_logger.removeHandler(handler)
+    root_logger.setLevel(logging.WARNING)  # Default level
+    yield
+    for handler in root_logger.handlers[:]:
+        root_logger.removeHandler(handler)
+    root_logger.setLevel(logging.WARNING)  # Reset after test
+
+def test_setup_logging_debug() -> None:
+    """Test setup_logging with debug mode."""
+    root_logger = logging.getLogger()
+    root_logger.handlers.clear()  # Clear existing handlers
+    root_logger.setLevel(logging.WARNING)  # Reset to default
+    
+    setup_logging(debug=True)
+    assert len(root_logger.handlers) > 0
+    assert root_logger.level == logging.DEBUG
+
+def test_setup_logging_info() -> None:
+    """Test setup_logging with info mode."""
+    root_logger = logging.getLogger()
+    root_logger.handlers.clear()  # Clear existing handlers
+    root_logger.setLevel(logging.WARNING)  # Reset to default
+    
+    setup_logging(debug=False)
+    assert len(root_logger.handlers) > 0
+    assert root_logger.level == logging.INFO
+
+def test_setup_logging_formatter() -> None:
+    """Test logging formatter setup."""
+    setup_logging(debug=True)
+    logger = logging.getLogger()
+    handlers = logger.handlers
+    
+    # Check if there's at least one handler
+    assert len(handlers) > 0
+    
+    # Check formatter
+    formatter = handlers[0].formatter
+    assert formatter is not None
+    
+    # Test format string
+    test_record = logging.LogRecord(
+        name='test',
+        level=logging.DEBUG,
+        pathname='test.py',
+        lineno=1,
+        msg='Test message',
+        args=(),
+        exc_info=None
+    )
+    formatted = formatter.format(test_record)
+    assert 'Test message' in formatted
+    assert test_record.levelname in formatted
+
+def test_setup_logging_multiple_calls() -> None:
+    """Test that multiple calls to setup_logging don't create duplicate handlers."""
+    root_logger = logging.getLogger()
+    root_logger.handlers.clear()
+    
+    setup_logging(debug=True)
+    initial_handler_count = len(root_logger.handlers)
+    
+    # Call setup_logging again
+    setup_logging(debug=True)
+    assert len(root_logger.handlers) == initial_handler_count, "Should not create duplicate handlers"
+
+def test_setup_logging_level_change() -> None:
+    """Test changing log levels between setup_logging calls."""
+    root_logger = logging.getLogger()
+    root_logger.handlers.clear()
+    
+    # Start with debug
+    setup_logging(debug=True)
+    assert root_logger.level == logging.DEBUG
+    
+    # Clear handlers before next setup
+    root_logger.handlers.clear()
+    
+    # Switch to info
+    setup_logging(debug=False)
+    assert root_logger.level == logging.INFO
+
+def test_setup_logging_message_format() -> None:
+    """Test the actual format of logged messages."""
+    setup_logging(debug=True)
+    logger = logging.getLogger()
+    
+    # Create a temporary handler to capture output
+    import io
+    log_capture = io.StringIO()
+    handler = logging.StreamHandler(log_capture)
+    # Use formatter that includes pathname
+    handler.setFormatter(logging.Formatter('%(levelname)s %(name)s:%(pathname)s:%(lineno)d %(message)s'))
+    logger.addHandler(handler)
+    
+    # Ensure debug level is set
+    logger.setLevel(logging.DEBUG)
+    handler.setLevel(logging.DEBUG)
+    
+    # Log a test message
+    test_message = "Test log message"
+    logger.debug(test_message)
+    log_output = log_capture.getvalue()
+    
+    # Verify format components
+    assert test_message in log_output
+    assert "DEBUG" in log_output
+    assert "test_utils.py" in log_output
+
+def test_setup_logging_error_messages() -> None:
+    """Test logging of error messages."""
+    setup_logging(debug=False)
+    logger = logging.getLogger()
+    
+    # Create a temporary handler to capture output
+    import io
+    log_capture = io.StringIO()
+    handler = logging.StreamHandler(log_capture)
+    handler.setFormatter(logger.handlers[0].formatter)
+    logger.addHandler(handler)
+    
+    # Log an error message
+    error_message = "Test error message"
+    logger.error(error_message)
+    log_output = log_capture.getvalue()
+    
+    # Error messages should always be logged regardless of debug setting
+    assert error_message in log_output
+    assert "ERROR" in log_output
+
+if __name__ == "__main__":
+    pytest.main([__file__])