Refactor devide logic by files and more tests

This commit is contained in:
Kirill Markin 2024-12-16 01:29:31 +01:00
parent 6a434e5174
commit dbfa602cd3
No known key found for this signature in database
9 changed files with 922 additions and 0 deletions

View file

@ -0,0 +1,3 @@
from .cli import create_default_settings_file, parse_args, main
__all__ = ['create_default_settings_file', 'parse_args', 'main']

71
repo_to_text/cli/cli.py Normal file
View file

@ -0,0 +1,71 @@
import argparse
import textwrap
import os
import logging
from typing import NoReturn
from ..utils.utils import setup_logging
from ..core.core import save_repo_to_text
def create_default_settings_file() -> None:
"""Create a default .repo-to-text-settings.yaml file."""
settings_file = '.repo-to-text-settings.yaml'
if os.path.exists(settings_file):
raise FileExistsError(f"The settings file '{settings_file}' already exists. Please remove it or rename it if you want to create a new default settings file.")
default_settings = textwrap.dedent("""\
# Details: https://github.com/kirill-markin/repo-to-text
# Syntax: gitignore rules
# Ignore files and directories for all sections from gitignore file
# Default: True
gitignore-import-and-ignore: True
# Ignore files and directories for tree
# and "Contents of ..." sections
ignore-tree-and-content:
- ".repo-to-text-settings.yaml"
# Ignore files and directories for "Contents of ..." section
ignore-content:
- "README.md"
- "LICENSE"
""")
with open('.repo-to-text-settings.yaml', 'w') as f:
f.write(default_settings)
print("Default .repo-to-text-settings.yaml created.")
def parse_args() -> argparse.Namespace:
"""Parse command line arguments.
Returns:
argparse.Namespace: Parsed command line arguments
"""
parser = argparse.ArgumentParser(description='Convert repository structure and contents to text')
parser.add_argument('input_dir', nargs='?', default='.', help='Directory to process')
parser.add_argument('--debug', action='store_true', help='Enable debug logging')
parser.add_argument('--output-dir', type=str, help='Directory to save the output file')
parser.add_argument('--create-settings', '--init', action='store_true', help='Create default .repo-to-text-settings.yaml file')
parser.add_argument('--stdout', action='store_true', help='Output to stdout instead of a file')
parser.add_argument('--ignore-patterns', nargs='*', help="List of files or directories to ignore in both tree and content sections. Supports wildcards (e.g., '*').")
return parser.parse_args()
def main() -> NoReturn:
"""Main entry point for the CLI."""
args = parse_args()
setup_logging(debug=args.debug)
logging.debug('repo-to-text script started')
if args.create_settings:
create_default_settings_file()
logging.debug('.repo-to-text-settings.yaml file created')
else:
save_repo_to_text(
path=args.input_dir,
output_dir=args.output_dir,
to_stdout=args.stdout,
cli_ignore_patterns=args.ignore_patterns
)
logging.debug('repo-to-text script finished')
exit(0)

View file

@ -0,0 +1,3 @@
from .core import get_tree_structure, load_ignore_specs, should_ignore_file, save_repo_to_text
__all__ = ['get_tree_structure', 'load_ignore_specs', 'should_ignore_file', 'save_repo_to_text']

226
repo_to_text/core/core.py Normal file
View file

@ -0,0 +1,226 @@
import os
import subprocess
import logging
import yaml
from datetime import datetime, timezone
from typing import Tuple, Optional
import pathspec
from pathspec import PathSpec
from ..utils.utils import check_tree_command, is_ignored_path, remove_empty_dirs
def get_tree_structure(path: str = '.', gitignore_spec: Optional[PathSpec] = None, tree_and_content_ignore_spec: Optional[PathSpec] = None) -> str:
"""Generate tree structure of the directory.
Args:
path: Directory path to generate tree for
gitignore_spec: PathSpec object for gitignore patterns
tree_and_content_ignore_spec: PathSpec object for tree and content ignore patterns
Returns:
str: Generated tree structure
"""
if not check_tree_command():
return ""
logging.debug(f'Generating tree structure for path: {path}')
result = subprocess.run(['tree', '-a', '-f', '--noreport', path], stdout=subprocess.PIPE)
tree_output = result.stdout.decode('utf-8')
logging.debug(f'Tree output generated:\n{tree_output}')
if not gitignore_spec and not tree_and_content_ignore_spec:
logging.debug('No .gitignore or ignore-tree-and-content specification found')
return tree_output
logging.debug('Filtering tree output based on .gitignore and ignore-tree-and-content specification')
filtered_lines = []
for line in tree_output.splitlines():
idx = line.find('./')
if idx == -1:
idx = line.find(path)
if idx != -1:
full_path = line[idx:].strip()
else:
continue
if full_path == '.':
continue
relative_path = os.path.relpath(full_path, path)
relative_path = relative_path.replace(os.sep, '/')
if os.path.isdir(full_path):
relative_path += '/'
if not should_ignore_file(full_path, relative_path, gitignore_spec, None, tree_and_content_ignore_spec):
display_line = line.replace('./', '', 1)
filtered_lines.append(display_line)
else:
logging.debug(f'Ignored: {relative_path}')
filtered_tree_output = '\n'.join(filtered_lines)
logging.debug(f'Filtered tree structure:\n{filtered_tree_output}')
logging.debug('Tree structure filtering complete')
return filtered_tree_output
def load_ignore_specs(path: str = '.', cli_ignore_patterns: Optional[list] = None) -> Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]:
"""Load ignore specifications from various sources.
Args:
path: Base directory path
cli_ignore_patterns: List of patterns from command line
Returns:
Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]: Tuple of gitignore_spec, content_ignore_spec, and tree_and_content_ignore_spec
"""
gitignore_spec = None
content_ignore_spec = None
tree_and_content_ignore_list = []
use_gitignore = True
repo_settings_path = os.path.join(path, '.repo-to-text-settings.yaml')
if os.path.exists(repo_settings_path):
logging.debug(f'Loading .repo-to-text-settings.yaml from path: {repo_settings_path}')
with open(repo_settings_path, 'r') as f:
settings = yaml.safe_load(f)
use_gitignore = settings.get('gitignore-import-and-ignore', True)
if 'ignore-content' in settings:
content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', settings['ignore-content'])
if 'ignore-tree-and-content' in settings:
tree_and_content_ignore_list.extend(settings['ignore-tree-and-content'])
if cli_ignore_patterns:
tree_and_content_ignore_list.extend(cli_ignore_patterns)
if use_gitignore:
gitignore_path = os.path.join(path, '.gitignore')
if os.path.exists(gitignore_path):
logging.debug(f'Loading .gitignore from path: {gitignore_path}')
with open(gitignore_path, 'r') as f:
gitignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', f)
tree_and_content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', tree_and_content_ignore_list)
return gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec
def should_ignore_file(file_path: str, relative_path: str, gitignore_spec: Optional[PathSpec],
content_ignore_spec: Optional[PathSpec], tree_and_content_ignore_spec: Optional[PathSpec]) -> bool:
"""Check if a file should be ignored based on various ignore specifications.
Args:
file_path: Full path to the file
relative_path: Path relative to the repository root
gitignore_spec: PathSpec object for gitignore patterns
content_ignore_spec: PathSpec object for content ignore patterns
tree_and_content_ignore_spec: PathSpec object for tree and content ignore patterns
Returns:
bool: True if file should be ignored, False otherwise
"""
relative_path = relative_path.replace(os.sep, '/')
if relative_path.startswith('./'):
relative_path = relative_path[2:]
if os.path.isdir(file_path):
relative_path += '/'
result = (
is_ignored_path(file_path) or
bool(gitignore_spec and gitignore_spec.match_file(relative_path)) or
bool(content_ignore_spec and content_ignore_spec.match_file(relative_path)) or
bool(tree_and_content_ignore_spec and tree_and_content_ignore_spec.match_file(relative_path)) or
os.path.basename(file_path).startswith('repo-to-text_')
)
logging.debug(f'Checking if file should be ignored:')
logging.debug(f' file_path: {file_path}')
logging.debug(f' relative_path: {relative_path}')
logging.debug(f' Result: {result}')
return result
def save_repo_to_text(path: str = '.', output_dir: Optional[str] = None, to_stdout: bool = False, cli_ignore_patterns: Optional[list] = None) -> str:
"""Save repository structure and contents to a text file.
Args:
path: Repository path
output_dir: Directory to save output file
to_stdout: Whether to output to stdout instead of file
cli_ignore_patterns: List of patterns from command line
Returns:
str: Path to the output file or the output text if to_stdout is True
"""
logging.debug(f'Starting to save repo structure to text for path: {path}')
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(path, cli_ignore_patterns)
tree_structure = get_tree_structure(path, gitignore_spec, tree_and_content_ignore_spec)
tree_structure = remove_empty_dirs(tree_structure, path)
logging.debug(f'Final tree structure to be written: {tree_structure}')
timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-UTC')
output_file = f'repo-to-text_{timestamp}.txt'
if output_dir:
if not os.path.exists(output_dir):
os.makedirs(output_dir)
output_file = os.path.join(output_dir, output_file)
output_content = []
project_name = os.path.basename(os.path.abspath(path))
output_content.append(f'Directory: {project_name}\n\n')
output_content.append('Directory Structure:\n')
output_content.append('```\n.\n')
if os.path.exists(os.path.join(path, '.gitignore')):
output_content.append('├── .gitignore\n')
output_content.append(tree_structure + '\n' + '```\n')
logging.debug('Tree structure written to output content')
for root, _, files in os.walk(path):
for filename in files:
file_path = os.path.join(root, filename)
relative_path = os.path.relpath(file_path, path)
if should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec):
continue
relative_path = relative_path.replace('./', '', 1)
output_content.append(f'\nContents of {relative_path}:\n')
output_content.append('```\n')
try:
with open(file_path, 'r', encoding='utf-8') as f:
output_content.append(f.read())
except UnicodeDecodeError:
logging.debug(f'Could not decode file contents: {file_path}')
output_content.append('[Could not decode file contents]\n')
output_content.append('\n```\n')
output_content.append('\n')
logging.debug('Repository contents written to output content')
output_text = ''.join(output_content)
if to_stdout:
print(output_text)
return output_text
with open(output_file, 'w') as file:
file.write(output_text)
try:
import importlib.util
if importlib.util.find_spec("pyperclip"):
import pyperclip # type: ignore
pyperclip.copy(output_text)
logging.debug('Repository structure and contents copied to clipboard')
else:
print("Tip: Install 'pyperclip' package to enable automatic clipboard copying:")
print(" pip install pyperclip")
except Exception as e:
logging.warning('Could not copy to clipboard. You might be running this script over SSH or without clipboard support.')
logging.debug(f'Clipboard copy error: {e}')
print(f"[SUCCESS] Repository structure and contents successfully saved to file: \"./{output_file}\"")
return output_file

View file

@ -0,0 +1,3 @@
from .utils import setup_logging, check_tree_command, is_ignored_path, remove_empty_dirs
__all__ = ['setup_logging', 'check_tree_command', 'is_ignored_path', 'remove_empty_dirs']

View file

@ -0,0 +1,82 @@
import os
import shutil
import logging
from typing import List
def setup_logging(debug: bool = False) -> None:
"""Set up logging configuration.
Args:
debug: If True, sets logging level to DEBUG, otherwise INFO
"""
logging_level = logging.DEBUG if debug else logging.INFO
logging.basicConfig(level=logging_level, format='%(asctime)s - %(levelname)s - %(message)s')
def check_tree_command() -> bool:
"""Check if the `tree` command is available, and suggest installation if not.
Returns:
bool: True if tree command is available, False otherwise
"""
if shutil.which('tree') is None:
print("The 'tree' command is not found. Please install it using one of the following commands:")
print("For Debian-based systems (e.g., Ubuntu): sudo apt-get install tree")
print("For Red Hat-based systems (e.g., Fedora, CentOS): sudo yum install tree")
return False
return True
def is_ignored_path(file_path: str) -> bool:
"""Check if a file path should be ignored based on predefined rules.
Args:
file_path: Path to check
Returns:
bool: True if path should be ignored, False otherwise
"""
ignored_dirs: List[str] = ['.git']
ignored_files_prefix: List[str] = ['repo-to-text_']
is_ignored_dir = any(ignored in file_path for ignored in ignored_dirs)
is_ignored_file = any(file_path.startswith(prefix) for prefix in ignored_files_prefix)
result = is_ignored_dir or is_ignored_file
if result:
logging.debug(f'Path ignored: {file_path}')
return result
def remove_empty_dirs(tree_output: str, path: str = '.') -> str:
"""Remove empty directories from tree output.
Args:
tree_output: Output from tree command
path: Base path for the tree
Returns:
str: Tree output with empty directories removed
"""
logging.debug('Removing empty directories from tree output')
lines = tree_output.splitlines()
non_empty_dirs = set()
filtered_lines = []
for line in lines:
parts = line.strip().split()
if parts:
full_path = parts[-1]
if os.path.isdir(full_path) and not any(os.path.isfile(os.path.join(full_path, f)) for f in os.listdir(full_path)):
logging.debug(f'Directory is empty and will be removed: {full_path}')
continue
non_empty_dirs.add(os.path.dirname(full_path))
filtered_lines.append(line)
final_lines = []
for line in filtered_lines:
parts = line.strip().split()
if parts:
full_path = parts[-1]
if os.path.isdir(full_path) and full_path not in non_empty_dirs:
logging.debug(f'Directory is empty and will be removed: {full_path}')
continue
final_lines.append(line)
logging.debug('Empty directory removal complete')
return '\n'.join(filtered_lines)

107
tests/test_cli.py Normal file
View file

@ -0,0 +1,107 @@
import os
import pytest
import tempfile
import shutil
from typing import Generator
from unittest.mock import patch, MagicMock
from repo_to_text.cli.cli import (
create_default_settings_file,
parse_args,
main
)
@pytest.fixture
def temp_dir() -> Generator[str, None, None]:
"""Create a temporary directory for testing."""
temp_path = tempfile.mkdtemp()
yield temp_path
shutil.rmtree(temp_path)
def test_parse_args_defaults() -> None:
"""Test parsing command line arguments with default values."""
with patch('sys.argv', ['repo-to-text']):
args = parse_args()
assert args.input_dir == '.'
assert not args.debug
assert args.output_dir is None
assert not args.create_settings
assert not args.stdout
assert args.ignore_patterns is None
def test_parse_args_with_values() -> None:
"""Test parsing command line arguments with provided values."""
test_args = [
'repo-to-text',
'input/path',
'--debug',
'--output-dir', 'output/path',
'--ignore-patterns', '*.log', 'temp/'
]
with patch('sys.argv', test_args):
args = parse_args()
assert args.input_dir == 'input/path'
assert args.debug
assert args.output_dir == 'output/path'
assert args.ignore_patterns == ['*.log', 'temp/']
def test_create_default_settings_file(temp_dir: str) -> None:
"""Test creation of default settings file."""
os.chdir(temp_dir)
create_default_settings_file()
settings_file = '.repo-to-text-settings.yaml'
assert os.path.exists(settings_file)
with open(settings_file, 'r') as f:
content = f.read()
assert 'gitignore-import-and-ignore: True' in content
assert 'ignore-tree-and-content:' in content
assert 'ignore-content:' in content
def test_create_default_settings_file_already_exists(temp_dir: str) -> None:
"""Test handling of existing settings file."""
os.chdir(temp_dir)
# Create the file first
create_default_settings_file()
# Try to create it again
with pytest.raises(FileExistsError) as exc_info:
create_default_settings_file()
assert "already exists" in str(exc_info.value)
@patch('repo_to_text.cli.cli.save_repo_to_text')
def test_main_normal_execution(mock_save_repo: MagicMock) -> None:
"""Test main function with normal execution."""
with patch('sys.argv', ['repo-to-text', '--stdout']):
with pytest.raises(SystemExit) as exc_info:
main()
assert exc_info.value.code == 0
mock_save_repo.assert_called_once_with(
path='.',
output_dir=None,
to_stdout=True,
cli_ignore_patterns=None
)
@patch('repo_to_text.cli.cli.create_default_settings_file')
def test_main_create_settings(mock_create_settings: MagicMock) -> None:
"""Test main function with create settings option."""
with patch('sys.argv', ['repo-to-text', '--create-settings']):
with pytest.raises(SystemExit) as exc_info:
main()
assert exc_info.value.code == 0
mock_create_settings.assert_called_once()
@patch('repo_to_text.cli.cli.setup_logging')
@patch('repo_to_text.cli.cli.create_default_settings_file')
def test_main_with_debug_logging(mock_create_settings: MagicMock, mock_setup_logging: MagicMock) -> None:
"""Test main function with debug logging enabled."""
with patch('sys.argv', ['repo-to-text', '--debug', '--create-settings']):
with pytest.raises(SystemExit) as exc_info:
main()
assert exc_info.value.code == 0
mock_setup_logging.assert_called_once_with(debug=True)
mock_create_settings.assert_called_once()
if __name__ == "__main__":
pytest.main([__file__])

285
tests/test_core.py Normal file
View file

@ -0,0 +1,285 @@
import os
import tempfile
import shutil
import pytest
from typing import Generator
from repo_to_text.core.core import (
get_tree_structure,
load_ignore_specs,
should_ignore_file,
is_ignored_path,
remove_empty_dirs,
save_repo_to_text
)
@pytest.fixture
def temp_dir() -> Generator[str, None, None]:
"""Create a temporary directory for testing."""
temp_path = tempfile.mkdtemp()
yield temp_path
shutil.rmtree(temp_path)
@pytest.fixture
def sample_repo(temp_dir: str) -> str:
"""Create a sample repository structure for testing."""
# Create directories
os.makedirs(os.path.join(temp_dir, "src"))
os.makedirs(os.path.join(temp_dir, "tests"))
# Create sample files
files = {
"README.md": "# Test Project",
".gitignore": """
*.pyc
__pycache__/
.git/
""",
"src/main.py": "print('Hello World')",
"tests/test_main.py": "def test_sample(): pass",
".repo-to-text-settings.yaml": """
gitignore-import-and-ignore: True
ignore-tree-and-content:
- ".git/"
- ".repo-to-text-settings.yaml"
ignore-content:
- "README.md"
"""
}
for file_path, content in files.items():
full_path = os.path.join(temp_dir, file_path)
os.makedirs(os.path.dirname(full_path), exist_ok=True)
with open(full_path, "w") as f:
f.write(content)
return temp_dir
def test_is_ignored_path() -> None:
"""Test the is_ignored_path function."""
assert is_ignored_path(".git/config") is True
assert is_ignored_path("repo-to-text_output.txt") is True
assert is_ignored_path("src/main.py") is False
assert is_ignored_path("normal_file.txt") is False
def test_load_ignore_specs(sample_repo: str) -> None:
"""Test loading ignore specifications from files."""
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo)
assert gitignore_spec is not None
assert content_ignore_spec is not None
assert tree_and_content_ignore_spec is not None
# Test gitignore patterns
assert gitignore_spec.match_file("test.pyc") is True
assert gitignore_spec.match_file("__pycache__/cache.py") is True
assert gitignore_spec.match_file(".git/config") is True
# Test content ignore patterns
assert content_ignore_spec.match_file("README.md") is True
# Test tree and content ignore patterns
assert tree_and_content_ignore_spec.match_file(".git/config") is True
def test_should_ignore_file(sample_repo: str) -> None:
"""Test file ignoring logic."""
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo)
# Test various file paths
assert should_ignore_file(
".git/config",
".git/config",
gitignore_spec,
content_ignore_spec,
tree_and_content_ignore_spec
) is True
assert should_ignore_file(
"src/main.py",
"src/main.py",
gitignore_spec,
content_ignore_spec,
tree_and_content_ignore_spec
) is False
def test_get_tree_structure(sample_repo: str) -> None:
"""Test tree structure generation."""
gitignore_spec, _, tree_and_content_ignore_spec = load_ignore_specs(sample_repo)
tree_output = get_tree_structure(sample_repo, gitignore_spec, tree_and_content_ignore_spec)
# Basic structure checks
assert "src" in tree_output
assert "tests" in tree_output
assert "main.py" in tree_output
assert "test_main.py" in tree_output
assert ".git" not in tree_output
def test_remove_empty_dirs(temp_dir: str) -> None:
"""Test removal of empty directories from tree output."""
# Create test directory structure
os.makedirs(os.path.join(temp_dir, "src"))
os.makedirs(os.path.join(temp_dir, "empty_dir"))
os.makedirs(os.path.join(temp_dir, "tests"))
# Create some files
with open(os.path.join(temp_dir, "src/main.py"), "w") as f:
f.write("print('test')")
with open(os.path.join(temp_dir, "tests/test_main.py"), "w") as f:
f.write("def test(): pass")
# Create a mock tree output that matches the actual tree command format
tree_output = (
f"{temp_dir}\n"
f"├── {os.path.join(temp_dir, 'src')}\n"
f"│ └── {os.path.join(temp_dir, 'src/main.py')}\n"
f"├── {os.path.join(temp_dir, 'empty_dir')}\n"
f"└── {os.path.join(temp_dir, 'tests')}\n"
f" └── {os.path.join(temp_dir, 'tests/test_main.py')}\n"
)
filtered_output = remove_empty_dirs(tree_output, temp_dir)
# Check that empty_dir is removed but other directories remain
assert "empty_dir" not in filtered_output
assert os.path.join(temp_dir, "src") in filtered_output
assert os.path.join(temp_dir, "tests") in filtered_output
assert os.path.join(temp_dir, "src/main.py") in filtered_output
assert os.path.join(temp_dir, "tests/test_main.py") in filtered_output
def test_save_repo_to_text(sample_repo: str) -> None:
"""Test the main save_repo_to_text function."""
# Create output directory
output_dir = os.path.join(sample_repo, "output")
os.makedirs(output_dir, exist_ok=True)
# Create .git directory to ensure it's properly ignored
os.makedirs(os.path.join(sample_repo, ".git"))
with open(os.path.join(sample_repo, ".git/config"), "w") as f:
f.write("[core]\n\trepositoryformatversion = 0\n")
# Test file output
output_file = save_repo_to_text(sample_repo, output_dir=output_dir)
assert os.path.exists(output_file)
assert os.path.dirname(output_file) == output_dir
# Check file contents
with open(output_file, 'r') as f:
content = f.read()
# Basic content checks
assert "Directory Structure:" in content
# Check for expected files
assert "src/main.py" in content
assert "tests/test_main.py" in content
# Check for file contents
assert "print('Hello World')" in content
assert "def test_sample(): pass" in content
# Ensure ignored patterns are not in output
assert ".git/config" not in content # Check specific file
assert "repo-to-text_" not in content
assert ".repo-to-text-settings.yaml" not in content
# Check that .gitignore content is not included
assert "*.pyc" not in content
assert "__pycache__" not in content
def test_save_repo_to_text_stdout(sample_repo: str) -> None:
"""Test save_repo_to_text with stdout output."""
output = save_repo_to_text(sample_repo, to_stdout=True)
assert isinstance(output, str)
assert "Directory Structure:" in output
assert "src/main.py" in output
assert "tests/test_main.py" in output
def test_load_ignore_specs_with_cli_patterns(sample_repo: str) -> None:
"""Test loading ignore specs with CLI patterns."""
cli_patterns = ["*.log", "temp/"]
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo, cli_patterns)
assert tree_and_content_ignore_spec.match_file("test.log") is True
assert tree_and_content_ignore_spec.match_file("temp/file.txt") is True
assert tree_and_content_ignore_spec.match_file("normal.txt") is False
def test_load_ignore_specs_without_gitignore(temp_dir: str) -> None:
"""Test loading ignore specs when .gitignore is missing."""
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(temp_dir)
assert gitignore_spec is None
assert content_ignore_spec is None
assert tree_and_content_ignore_spec is not None
def test_get_tree_structure_with_special_chars(temp_dir: str) -> None:
"""Test tree structure generation with special characters in paths."""
# Create files with special characters
special_dir = os.path.join(temp_dir, "special chars")
os.makedirs(special_dir)
with open(os.path.join(special_dir, "file with spaces.txt"), "w") as f:
f.write("test")
tree_output = get_tree_structure(temp_dir)
assert "special chars" in tree_output
assert "file with spaces.txt" in tree_output
def test_should_ignore_file_edge_cases(sample_repo: str) -> None:
"""Test edge cases for should_ignore_file function."""
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo)
# Test with dot-prefixed paths
assert should_ignore_file(
"./src/main.py",
"./src/main.py",
gitignore_spec,
content_ignore_spec,
tree_and_content_ignore_spec
) is False
# Test with absolute paths
abs_path = os.path.join(sample_repo, "src/main.py")
rel_path = "src/main.py"
assert should_ignore_file(
abs_path,
rel_path,
gitignore_spec,
content_ignore_spec,
tree_and_content_ignore_spec
) is False
def test_save_repo_to_text_with_binary_files(temp_dir: str) -> None:
"""Test handling of binary files in save_repo_to_text."""
# Create a binary file
binary_path = os.path.join(temp_dir, "binary.bin")
binary_content = b'\x00\x01\x02\x03'
with open(binary_path, "wb") as f:
f.write(binary_content)
output = save_repo_to_text(temp_dir, to_stdout=True)
# Check that the binary file is listed in the structure
assert "binary.bin" in output
# Check that the file content section exists with raw binary content
expected_content = f"Contents of binary.bin:\n```\n{binary_content.decode('latin1')}\n```"
assert expected_content in output
def test_save_repo_to_text_custom_output_dir(temp_dir: str) -> None:
"""Test save_repo_to_text with custom output directory."""
# Create a simple file structure
with open(os.path.join(temp_dir, "test.txt"), "w") as f:
f.write("test content")
# Create custom output directory
output_dir = os.path.join(temp_dir, "custom_output")
output_file = save_repo_to_text(temp_dir, output_dir=output_dir)
assert os.path.exists(output_file)
assert os.path.dirname(output_file) == output_dir
assert output_file.startswith(output_dir)
def test_get_tree_structure_empty_directory(temp_dir: str) -> None:
"""Test tree structure generation for empty directory."""
tree_output = get_tree_structure(temp_dir)
# Should only contain the directory itself
assert tree_output.strip() == "" or tree_output.strip() == temp_dir
if __name__ == "__main__":
pytest.main([__file__])

142
tests/test_utils.py Normal file
View file

@ -0,0 +1,142 @@
import logging
import pytest
from typing import Generator
from repo_to_text.utils.utils import setup_logging
@pytest.fixture(autouse=True)
def reset_logger() -> Generator[None, None, None]:
"""Reset root logger before each test."""
root_logger = logging.getLogger()
for handler in root_logger.handlers[:]:
root_logger.removeHandler(handler)
root_logger.setLevel(logging.WARNING) # Default level
yield
for handler in root_logger.handlers[:]:
root_logger.removeHandler(handler)
root_logger.setLevel(logging.WARNING) # Reset after test
def test_setup_logging_debug() -> None:
"""Test setup_logging with debug mode."""
root_logger = logging.getLogger()
root_logger.handlers.clear() # Clear existing handlers
root_logger.setLevel(logging.WARNING) # Reset to default
setup_logging(debug=True)
assert len(root_logger.handlers) > 0
assert root_logger.level == logging.DEBUG
def test_setup_logging_info() -> None:
"""Test setup_logging with info mode."""
root_logger = logging.getLogger()
root_logger.handlers.clear() # Clear existing handlers
root_logger.setLevel(logging.WARNING) # Reset to default
setup_logging(debug=False)
assert len(root_logger.handlers) > 0
assert root_logger.level == logging.INFO
def test_setup_logging_formatter() -> None:
"""Test logging formatter setup."""
setup_logging(debug=True)
logger = logging.getLogger()
handlers = logger.handlers
# Check if there's at least one handler
assert len(handlers) > 0
# Check formatter
formatter = handlers[0].formatter
assert formatter is not None
# Test format string
test_record = logging.LogRecord(
name='test',
level=logging.DEBUG,
pathname='test.py',
lineno=1,
msg='Test message',
args=(),
exc_info=None
)
formatted = formatter.format(test_record)
assert 'Test message' in formatted
assert test_record.levelname in formatted
def test_setup_logging_multiple_calls() -> None:
"""Test that multiple calls to setup_logging don't create duplicate handlers."""
root_logger = logging.getLogger()
root_logger.handlers.clear()
setup_logging(debug=True)
initial_handler_count = len(root_logger.handlers)
# Call setup_logging again
setup_logging(debug=True)
assert len(root_logger.handlers) == initial_handler_count, "Should not create duplicate handlers"
def test_setup_logging_level_change() -> None:
"""Test changing log levels between setup_logging calls."""
root_logger = logging.getLogger()
root_logger.handlers.clear()
# Start with debug
setup_logging(debug=True)
assert root_logger.level == logging.DEBUG
# Clear handlers before next setup
root_logger.handlers.clear()
# Switch to info
setup_logging(debug=False)
assert root_logger.level == logging.INFO
def test_setup_logging_message_format() -> None:
"""Test the actual format of logged messages."""
setup_logging(debug=True)
logger = logging.getLogger()
# Create a temporary handler to capture output
import io
log_capture = io.StringIO()
handler = logging.StreamHandler(log_capture)
# Use formatter that includes pathname
handler.setFormatter(logging.Formatter('%(levelname)s %(name)s:%(pathname)s:%(lineno)d %(message)s'))
logger.addHandler(handler)
# Ensure debug level is set
logger.setLevel(logging.DEBUG)
handler.setLevel(logging.DEBUG)
# Log a test message
test_message = "Test log message"
logger.debug(test_message)
log_output = log_capture.getvalue()
# Verify format components
assert test_message in log_output
assert "DEBUG" in log_output
assert "test_utils.py" in log_output
def test_setup_logging_error_messages() -> None:
"""Test logging of error messages."""
setup_logging(debug=False)
logger = logging.getLogger()
# Create a temporary handler to capture output
import io
log_capture = io.StringIO()
handler = logging.StreamHandler(log_capture)
handler.setFormatter(logger.handlers[0].formatter)
logger.addHandler(handler)
# Log an error message
error_message = "Test error message"
logger.error(error_message)
log_output = log_capture.getvalue()
# Error messages should always be logged regardless of debug setting
assert error_message in log_output
assert "ERROR" in log_output
if __name__ == "__main__":
pytest.main([__file__])