Merge pull request #23 from kirill-markin/refactoring/cleanup

Refactoring/cleanup
This commit is contained in:
Kirill Markin 2024-12-17 17:19:55 +01:00 committed by GitHub
commit 1817078e46
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 416 additions and 278 deletions

View file

@ -14,7 +14,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
python-version: ["3.8", "3.11", "3.13"]
steps:
- uses: actions/checkout@v4
@ -28,6 +28,9 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install .[dev]
- name: Run pylint
run: |
pylint repo_to_text
- name: Run tests
run: |
pytest tests/

View file

@ -180,7 +180,7 @@ To install `repo-to-text` locally for development, follow these steps:
2. Install the package with development dependencies:
```bash
pip install -e .[dev]
pip install -e ".[dev]"
```
### Requirements

View file

@ -4,7 +4,7 @@ build-backend = "hatchling.build"
[project]
name = "repo-to-text"
version = "0.5.3"
version = "0.5.4"
authors = [
{ name = "Kirill Markin", email = "markinkirill@gmail.com" },
]
@ -41,4 +41,10 @@ dev = [
"isort",
"build",
"twine",
"pylint",
]
[tool.pylint]
disable = [
"C0303",
]

View file

@ -1,2 +1,4 @@
"""This is the main package for the repo_to_text package."""
__author__ = 'Kirill Markin'
__email__ = 'markinkirill@gmail.com'

View file

@ -1,3 +1,5 @@
"""This module contains the CLI interface for the repo_to_text package."""
from .cli import create_default_settings_file, parse_args, main
__all__ = ['create_default_settings_file', 'parse_args', 'main']

View file

@ -1,3 +1,7 @@
"""
CLI for repo-to-text
"""
import argparse
import textwrap
import os
@ -12,7 +16,10 @@ def create_default_settings_file() -> None:
"""Create a default .repo-to-text-settings.yaml file."""
settings_file = '.repo-to-text-settings.yaml'
if os.path.exists(settings_file):
raise FileExistsError(f"The settings file '{settings_file}' already exists. Please remove it or rename it if you want to create a new default settings file.")
raise FileExistsError(
f"The settings file '{settings_file}' already exists. "
"Please remove it or rename it if you want to create a new default settings file."
)
default_settings = textwrap.dedent("""\
# Details: https://github.com/kirill-markin/repo-to-text
@ -32,7 +39,7 @@ def create_default_settings_file() -> None:
- "README.md"
- "LICENSE"
""")
with open('.repo-to-text-settings.yaml', 'w') as f:
with open('.repo-to-text-settings.yaml', 'w', encoding='utf-8') as f:
f.write(default_settings)
print("Default .repo-to-text-settings.yaml created.")
@ -42,13 +49,25 @@ def parse_args() -> argparse.Namespace:
Returns:
argparse.Namespace: Parsed command line arguments
"""
parser = argparse.ArgumentParser(description='Convert repository structure and contents to text')
parser = argparse.ArgumentParser(
description='Convert repository structure and contents to text'
)
parser.add_argument('input_dir', nargs='?', default='.', help='Directory to process')
parser.add_argument('--debug', action='store_true', help='Enable debug logging')
parser.add_argument('--output-dir', type=str, help='Directory to save the output file')
parser.add_argument('--create-settings', '--init', action='store_true', help='Create default .repo-to-text-settings.yaml file')
parser.add_argument(
'--create-settings',
'--init',
action='store_true',
help='Create default .repo-to-text-settings.yaml file'
)
parser.add_argument('--stdout', action='store_true', help='Output to stdout instead of a file')
parser.add_argument('--ignore-patterns', nargs='*', help="List of files or directories to ignore in both tree and content sections. Supports wildcards (e.g., '*').")
parser.add_argument(
'--ignore-patterns',
nargs='*',
help="List of files or directories to ignore in both tree and content sections. "
"Supports wildcards (e.g., '*')."
)
return parser.parse_args()
def main() -> NoReturn:
@ -75,6 +94,6 @@ def main() -> NoReturn:
logging.debug('repo-to-text script finished')
sys.exit(0)
except Exception as e:
logging.error(f'Error occurred: {str(e)}')
except (FileNotFoundError, FileExistsError, PermissionError, OSError) as e:
logging.error('Error occurred: %s', str(e))
sys.exit(1)

View file

@ -1,3 +1,5 @@
"""This module contains the core functionality of the repo_to_text package."""
from .core import get_tree_structure, load_ignore_specs, should_ignore_file, save_repo_to_text
__all__ = ['get_tree_structure', 'load_ignore_specs', 'should_ignore_file', 'save_repo_to_text']

View file

@ -1,69 +1,116 @@
"""
Core functionality for repo-to-text
"""
import os
import subprocess
from typing import Tuple, Optional, List, Dict, Any, Set
from datetime import datetime, timezone
from importlib.machinery import ModuleSpec
import logging
import yaml
from datetime import datetime, timezone
from typing import Tuple, Optional, List
import pathspec
from pathspec import PathSpec
from ..utils.utils import check_tree_command, is_ignored_path, remove_empty_dirs
from ..utils.utils import check_tree_command, is_ignored_path
def get_tree_structure(path: str = '.', gitignore_spec: Optional[PathSpec] = None, tree_and_content_ignore_spec: Optional[PathSpec] = None) -> str:
"""Generate tree structure of the directory.
Args:
path: Directory path to generate tree for
gitignore_spec: PathSpec object for gitignore patterns
tree_and_content_ignore_spec: PathSpec object for tree and content ignore patterns
Returns:
str: Generated tree structure
"""
def get_tree_structure(
path: str = '.',
gitignore_spec: Optional[PathSpec] = None,
tree_and_content_ignore_spec: Optional[PathSpec] = None
) -> str:
"""Generate tree structure of the directory."""
if not check_tree_command():
return ""
logging.debug(f'Generating tree structure for path: {path}')
result = subprocess.run(['tree', '-a', '-f', '--noreport', path], stdout=subprocess.PIPE)
tree_output = result.stdout.decode('utf-8')
logging.debug(f'Tree output generated:\n{tree_output}')
logging.debug('Generating tree structure for path: %s', path)
tree_output = run_tree_command(path)
logging.debug('Tree output generated:\n%s', tree_output)
if not gitignore_spec and not tree_and_content_ignore_spec:
logging.debug('No .gitignore or ignore-tree-and-content specification found')
return tree_output
logging.debug('Filtering tree output based on .gitignore and ignore-tree-and-content specification')
filtered_lines: List[str] = []
logging.debug('Filtering tree output based on ignore specifications')
return filter_tree_output(tree_output, path, gitignore_spec, tree_and_content_ignore_spec)
for line in tree_output.splitlines():
idx = line.find('./')
if idx == -1:
idx = line.find(path)
if idx != -1:
full_path = line[idx:].strip()
else:
continue
def run_tree_command(path: str) -> str:
"""Run the tree command and return its output."""
result = subprocess.run(
['tree', '-a', '-f', '--noreport', path],
stdout=subprocess.PIPE,
check=True
)
return result.stdout.decode('utf-8')
if full_path == '.':
continue
def filter_tree_output(
tree_output: str,
path: str,
gitignore_spec: Optional[PathSpec],
tree_and_content_ignore_spec: Optional[PathSpec]
) -> str:
"""Filter the tree output based on ignore specifications."""
lines: List[str] = tree_output.splitlines()
non_empty_dirs: Set[str] = set()
relative_path = os.path.relpath(full_path, path)
relative_path = relative_path.replace(os.sep, '/')
if os.path.isdir(full_path):
relative_path += '/'
filtered_lines = [
process_line(line, path, gitignore_spec, tree_and_content_ignore_spec, non_empty_dirs)
for line in lines
]
if not should_ignore_file(full_path, relative_path, gitignore_spec, None, tree_and_content_ignore_spec):
display_line = line.replace('./', '', 1)
filtered_lines.append(display_line)
else:
logging.debug(f'Ignored: {relative_path}')
filtered_tree_output = '\n'.join(filtered_lines)
logging.debug(f'Filtered tree structure:\n{filtered_tree_output}')
logging.debug('Tree structure filtering complete')
filtered_tree_output = '\n'.join(filter(None, filtered_lines))
logging.debug('Filtered tree structure:\n%s', filtered_tree_output)
return filtered_tree_output
def load_ignore_specs(path: str = '.', cli_ignore_patterns: Optional[List[str]] = None) -> Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]:
def process_line(
line: str,
path: str,
gitignore_spec: Optional[PathSpec],
tree_and_content_ignore_spec: Optional[PathSpec],
non_empty_dirs: Set[str]
) -> Optional[str]:
"""Process a single line of the tree output."""
full_path = extract_full_path(line, path)
if not full_path or full_path == '.':
return None
relative_path = os.path.relpath(full_path, path).replace(os.sep, '/')
if should_ignore_file(
full_path,
relative_path,
gitignore_spec,
None,
tree_and_content_ignore_spec
):
logging.debug('Ignored: %s', relative_path)
return None
if not os.path.isdir(full_path):
mark_non_empty_dirs(relative_path, non_empty_dirs)
if not os.path.isdir(full_path) or os.path.dirname(relative_path) in non_empty_dirs:
return line.replace('./', '', 1)
return None
def extract_full_path(line: str, path: str) -> Optional[str]:
"""Extract the full path from a line of tree output."""
idx = line.find('./')
if idx == -1:
idx = line.find(path)
return line[idx:].strip() if idx != -1 else None
def mark_non_empty_dirs(relative_path: str, non_empty_dirs: Set[str]) -> None:
"""Mark all parent directories of a file as non-empty."""
dir_path = os.path.dirname(relative_path)
while dir_path:
non_empty_dirs.add(dir_path)
dir_path = os.path.dirname(dir_path)
def load_ignore_specs(
path: str = '.',
cli_ignore_patterns: Optional[List[str]] = None
) -> Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]:
"""Load ignore specifications from various sources.
Args:
@ -71,7 +118,8 @@ def load_ignore_specs(path: str = '.', cli_ignore_patterns: Optional[List[str]]
cli_ignore_patterns: List of patterns from command line
Returns:
Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]: Tuple of gitignore_spec, content_ignore_spec, and tree_and_content_ignore_spec
Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]: Tuple of gitignore_spec,
content_ignore_spec, and tree_and_content_ignore_spec
"""
gitignore_spec = None
content_ignore_spec = None
@ -80,14 +128,16 @@ def load_ignore_specs(path: str = '.', cli_ignore_patterns: Optional[List[str]]
repo_settings_path = os.path.join(path, '.repo-to-text-settings.yaml')
if os.path.exists(repo_settings_path):
logging.debug(f'Loading .repo-to-text-settings.yaml from path: {repo_settings_path}')
with open(repo_settings_path, 'r') as f:
settings = yaml.safe_load(f)
logging.debug('Loading .repo-to-text-settings.yaml from path: %s', repo_settings_path)
with open(repo_settings_path, 'r', encoding='utf-8') as f:
settings: Dict[str, Any] = yaml.safe_load(f)
use_gitignore = settings.get('gitignore-import-and-ignore', True)
if 'ignore-content' in settings:
content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', settings['ignore-content'])
content_ignore_spec: Optional[PathSpec] = pathspec.PathSpec.from_lines(
'gitwildmatch', settings['ignore-content']
)
if 'ignore-tree-and-content' in settings:
tree_and_content_ignore_list.extend(settings['ignore-tree-and-content'])
tree_and_content_ignore_list.extend(settings.get('ignore-tree-and-content', []))
if cli_ignore_patterns:
tree_and_content_ignore_list.extend(cli_ignore_patterns)
@ -95,15 +145,22 @@ def load_ignore_specs(path: str = '.', cli_ignore_patterns: Optional[List[str]]
if use_gitignore:
gitignore_path = os.path.join(path, '.gitignore')
if os.path.exists(gitignore_path):
logging.debug(f'Loading .gitignore from path: {gitignore_path}')
with open(gitignore_path, 'r') as f:
logging.debug('Loading .gitignore from path: %s', gitignore_path)
with open(gitignore_path, 'r', encoding='utf-8') as f:
gitignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', f)
tree_and_content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', tree_and_content_ignore_list)
tree_and_content_ignore_spec = pathspec.PathSpec.from_lines(
'gitwildmatch', tree_and_content_ignore_list
)
return gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec
def should_ignore_file(file_path: str, relative_path: str, gitignore_spec: Optional[PathSpec],
content_ignore_spec: Optional[PathSpec], tree_and_content_ignore_spec: Optional[PathSpec]) -> bool:
def should_ignore_file(
file_path: str,
relative_path: str,
gitignore_spec: Optional[PathSpec],
content_ignore_spec: Optional[PathSpec],
tree_and_content_ignore_spec: Optional[PathSpec]
) -> bool:
"""Check if a file should be ignored based on various ignore specifications.
Args:
@ -126,44 +183,73 @@ def should_ignore_file(file_path: str, relative_path: str, gitignore_spec: Optio
result = (
is_ignored_path(file_path) or
bool(gitignore_spec and gitignore_spec.match_file(relative_path)) or
bool(content_ignore_spec and content_ignore_spec.match_file(relative_path)) or
bool(tree_and_content_ignore_spec and tree_and_content_ignore_spec.match_file(relative_path)) or
bool(
gitignore_spec and
gitignore_spec.match_file(relative_path)
) or
bool(
content_ignore_spec and
content_ignore_spec.match_file(relative_path)
) or
bool(
tree_and_content_ignore_spec and
tree_and_content_ignore_spec.match_file(relative_path)
) or
os.path.basename(file_path).startswith('repo-to-text_')
)
logging.debug(f'Checking if file should be ignored:')
logging.debug(f' file_path: {file_path}')
logging.debug(f' relative_path: {relative_path}')
logging.debug(f' Result: {result}')
logging.debug('Checking if file should be ignored:')
logging.debug(' file_path: %s', file_path)
logging.debug(' relative_path: %s', relative_path)
logging.debug(' Result: %s', result)
return result
def save_repo_to_text(path: str = '.', output_dir: Optional[str] = None, to_stdout: bool = False, cli_ignore_patterns: Optional[List[str]] = None) -> str:
"""Save repository structure and contents to a text file.
def save_repo_to_text(
path: str = '.',
output_dir: Optional[str] = None,
to_stdout: bool = False,
cli_ignore_patterns: Optional[List[str]] = None
) -> str:
"""Save repository structure and contents to a text file."""
logging.debug('Starting to save repo structure to text for path: %s', path)
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(
path, cli_ignore_patterns
)
tree_structure: str = get_tree_structure(
path, gitignore_spec, tree_and_content_ignore_spec
)
logging.debug('Final tree structure to be written: %s', tree_structure)
Args:
path: Repository path
output_dir: Directory to save output file
to_stdout: Whether to output to stdout instead of file
cli_ignore_patterns: List of patterns from command line
output_content = generate_output_content(
path,
tree_structure,
gitignore_spec,
content_ignore_spec,
tree_and_content_ignore_spec
)
Returns:
str: Path to the output file or the output text if to_stdout is True
"""
logging.debug(f'Starting to save repo structure to text for path: {path}')
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(path, cli_ignore_patterns)
tree_structure = get_tree_structure(path, gitignore_spec, tree_and_content_ignore_spec)
tree_structure = remove_empty_dirs(tree_structure, path)
logging.debug(f'Final tree structure to be written: {tree_structure}')
if to_stdout:
print(output_content)
return output_content
timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-UTC')
output_file = f'repo-to-text_{timestamp}.txt'
output_file = write_output_to_file(output_content, output_dir)
copy_to_clipboard(output_content)
if output_dir:
if not os.path.exists(output_dir):
os.makedirs(output_dir)
output_file = os.path.join(output_dir, output_file)
print(
"[SUCCESS] Repository structure and contents successfully saved to "
f"file: \"./{output_file}\""
)
return output_file
def generate_output_content(
path: str,
tree_structure: str,
gitignore_spec: Optional[PathSpec],
content_ignore_spec: Optional[PathSpec],
tree_and_content_ignore_spec: Optional[PathSpec]
) -> str:
"""Generate the output content for the repository."""
output_content: List[str] = []
project_name = os.path.basename(os.path.abspath(path))
output_content.append(f'Directory: {project_name}\n\n')
@ -181,7 +267,13 @@ def save_repo_to_text(path: str = '.', output_dir: Optional[str] = None, to_stdo
file_path = os.path.join(root, filename)
relative_path = os.path.relpath(file_path, path)
if should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec):
if should_ignore_file(
file_path,
relative_path,
gitignore_spec,
content_ignore_spec,
tree_and_content_ignore_spec
):
continue
relative_path = relative_path.replace('./', '', 1)
@ -192,35 +284,45 @@ def save_repo_to_text(path: str = '.', output_dir: Optional[str] = None, to_stdo
with open(file_path, 'r', encoding='utf-8') as f:
output_content.append(f.read())
except UnicodeDecodeError:
logging.debug(f'Could not decode file contents: {file_path}')
logging.debug('Could not decode file contents: %s', file_path)
output_content.append('[Could not decode file contents]\n')
output_content.append('\n```\n')
output_content.append('\n')
logging.debug('Repository contents written to output content')
output_text = ''.join(output_content)
return ''.join(output_content)
if to_stdout:
print(output_text)
return output_text
def write_output_to_file(output_content: str, output_dir: Optional[str]) -> str:
"""Write the output content to a file."""
timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-UTC')
output_file = f'repo-to-text_{timestamp}.txt'
with open(output_file, 'w') as file:
file.write(output_text)
if output_dir:
if not os.path.exists(output_dir):
os.makedirs(output_dir)
output_file = os.path.join(output_dir, output_file)
with open(output_file, 'w', encoding='utf-8') as file:
file.write(output_content)
return output_file
def copy_to_clipboard(output_content: str) -> None:
"""Copy the output content to the clipboard if possible."""
try:
import importlib.util
if importlib.util.find_spec("pyperclip"):
import pyperclip # type: ignore
pyperclip.copy(output_text) # type: ignore
import importlib.util # pylint: disable=import-outside-toplevel
spec: Optional[ModuleSpec] = importlib.util.find_spec("pyperclip")
if spec:
import pyperclip # pylint: disable=import-outside-toplevel # type: ignore
pyperclip.copy(output_content) # type: ignore
logging.debug('Repository structure and contents copied to clipboard')
else:
print("Tip: Install 'pyperclip' package to enable automatic clipboard copying:")
print(" pip install pyperclip")
except Exception as e:
logging.warning('Could not copy to clipboard. You might be running this script over SSH or without clipboard support.')
logging.debug(f'Clipboard copy error: {e}')
print(f"[SUCCESS] Repository structure and contents successfully saved to file: \"./{output_file}\"")
return output_file
except ImportError as e:
logging.warning(
'Could not copy to clipboard. You might be running this '
'script over SSH or without clipboard support.'
)
logging.debug('Clipboard copy error: %s', e)

View file

@ -1,3 +1,5 @@
"""This is the main entry point for the repo_to_text package."""
from repo_to_text.cli.cli import main
if __name__ == '__main__':

View file

@ -1,3 +1,5 @@
from .utils import setup_logging, check_tree_command, is_ignored_path, remove_empty_dirs
"""This module contains utility functions for the repo_to_text package."""
__all__ = ['setup_logging', 'check_tree_command', 'is_ignored_path', 'remove_empty_dirs']
from .utils import setup_logging, check_tree_command, is_ignored_path
__all__ = ['setup_logging', 'check_tree_command', 'is_ignored_path']

View file

@ -1,7 +1,8 @@
import os
"""This module contains utility functions for the repo_to_text package."""
import shutil
import logging
from typing import List, Set
from typing import List
def setup_logging(debug: bool = False) -> None:
"""Set up logging configuration.
@ -19,7 +20,10 @@ def check_tree_command() -> bool:
bool: True if tree command is available, False otherwise
"""
if shutil.which('tree') is None:
print("The 'tree' command is not found. Please install it using one of the following commands:")
print(
"The 'tree' command is not found. "
+ "Please install it using one of the following commands:"
)
print("For Debian-based systems (e.g., Ubuntu): sudo apt-get install tree")
print("For Red Hat-based systems (e.g., Fedora, CentOS): sudo yum install tree")
return False
@ -40,43 +44,5 @@ def is_ignored_path(file_path: str) -> bool:
is_ignored_file = any(file_path.startswith(prefix) for prefix in ignored_files_prefix)
result = is_ignored_dir or is_ignored_file
if result:
logging.debug(f'Path ignored: {file_path}')
logging.debug('Path ignored: %s', file_path)
return result
def remove_empty_dirs(tree_output: str, path: str = '.') -> str:
"""Remove empty directories from tree output.
Args:
tree_output: Output from tree command
path: Base path for the tree
Returns:
str: Tree output with empty directories removed
"""
logging.debug('Removing empty directories from tree output')
lines = tree_output.splitlines()
non_empty_dirs: Set[str] = set()
filtered_lines: List[str] = []
for line in lines:
parts = line.strip().split()
if parts:
full_path = parts[-1]
if os.path.isdir(full_path) and not any(os.path.isfile(os.path.join(full_path, f)) for f in os.listdir(full_path)):
logging.debug(f'Directory is empty and will be removed: {full_path}')
continue
non_empty_dirs.add(os.path.dirname(full_path))
filtered_lines.append(line)
final_lines: List[str] = []
for line in filtered_lines:
parts = line.strip().split()
if parts:
full_path = parts[-1]
if os.path.isdir(full_path) and full_path not in non_empty_dirs:
logging.debug(f'Directory is empty and will be removed: {full_path}')
continue
final_lines.append(line)
logging.debug('Empty directory removal complete')
return '\n'.join(filtered_lines)

View file

@ -1,15 +1,19 @@
"""Test the CLI module."""
import os
import pytest
import tempfile
import shutil
from typing import Generator
from unittest.mock import patch, MagicMock
import pytest
from repo_to_text.cli.cli import (
create_default_settings_file,
parse_args,
main
)
# pylint: disable=redefined-outer-name
@pytest.fixture
def temp_dir() -> Generator[str, None, None]:
"""Create a temporary directory for testing."""
@ -52,7 +56,7 @@ def test_create_default_settings_file(temp_dir: str) -> None:
settings_file = '.repo-to-text-settings.yaml'
assert os.path.exists(settings_file)
with open(settings_file, 'r') as f:
with open(settings_file, 'r', encoding='utf-8') as f:
content = f.read()
assert 'gitignore-import-and-ignore: True' in content
assert 'ignore-tree-and-content:' in content
@ -94,7 +98,10 @@ def test_main_create_settings(mock_create_settings: MagicMock) -> None:
@patch('repo_to_text.cli.cli.setup_logging')
@patch('repo_to_text.cli.cli.create_default_settings_file')
def test_main_with_debug_logging(mock_create_settings: MagicMock, mock_setup_logging: MagicMock) -> None:
def test_main_with_debug_logging(
mock_create_settings: MagicMock,
mock_setup_logging: MagicMock
) -> None:
"""Test main function with debug logging enabled."""
with patch('sys.argv', ['repo-to-text', '--debug', '--create-settings']):
with pytest.raises(SystemExit) as exc_info:

View file

@ -1,17 +1,21 @@
"""Test the core module."""
import os
import tempfile
import shutil
import pytest
from typing import Generator
import pytest
from repo_to_text.core.core import (
get_tree_structure,
load_ignore_specs,
should_ignore_file,
is_ignored_path,
remove_empty_dirs,
save_repo_to_text
)
# pylint: disable=redefined-outer-name
@pytest.fixture
def temp_dir() -> Generator[str, None, None]:
"""Create a temporary directory for testing."""
@ -20,11 +24,12 @@ def temp_dir() -> Generator[str, None, None]:
shutil.rmtree(temp_path)
@pytest.fixture
def sample_repo(temp_dir: str) -> str:
def sample_repo(tmp_path: str) -> str:
"""Create a sample repository structure for testing."""
tmp_path_str = str(tmp_path)
# Create directories
os.makedirs(os.path.join(temp_dir, "src"))
os.makedirs(os.path.join(temp_dir, "tests"))
os.makedirs(os.path.join(tmp_path_str, "src"))
os.makedirs(os.path.join(tmp_path_str, "tests"))
# Create sample files
files = {
@ -47,12 +52,12 @@ ignore-content:
}
for file_path, content in files.items():
full_path = os.path.join(temp_dir, file_path)
full_path = os.path.join(tmp_path_str, file_path)
os.makedirs(os.path.dirname(full_path), exist_ok=True)
with open(full_path, "w") as f:
with open(full_path, "w", encoding='utf-8') as f:
f.write(content)
return temp_dir
return tmp_path_str
def test_is_ignored_path() -> None:
"""Test the is_ignored_path function."""
@ -63,7 +68,9 @@ def test_is_ignored_path() -> None:
def test_load_ignore_specs(sample_repo: str) -> None:
"""Test loading ignore specifications from files."""
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo)
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(
sample_repo
)
assert gitignore_spec is not None
assert content_ignore_spec is not None
@ -82,7 +89,9 @@ def test_load_ignore_specs(sample_repo: str) -> None:
def test_should_ignore_file(sample_repo: str) -> None:
"""Test file ignoring logic."""
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo)
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(
sample_repo
)
# Test various file paths
assert should_ignore_file(
@ -113,38 +122,6 @@ def test_get_tree_structure(sample_repo: str) -> None:
assert "test_main.py" in tree_output
assert ".git" not in tree_output
def test_remove_empty_dirs(temp_dir: str) -> None:
"""Test removal of empty directories from tree output."""
# Create test directory structure
os.makedirs(os.path.join(temp_dir, "src"))
os.makedirs(os.path.join(temp_dir, "empty_dir"))
os.makedirs(os.path.join(temp_dir, "tests"))
# Create some files
with open(os.path.join(temp_dir, "src/main.py"), "w") as f:
f.write("print('test')")
with open(os.path.join(temp_dir, "tests/test_main.py"), "w") as f:
f.write("def test(): pass")
# Create a mock tree output that matches the actual tree command format
tree_output = (
f"{temp_dir}\n"
f"├── {os.path.join(temp_dir, 'src')}\n"
f"│ └── {os.path.join(temp_dir, 'src/main.py')}\n"
f"├── {os.path.join(temp_dir, 'empty_dir')}\n"
f"└── {os.path.join(temp_dir, 'tests')}\n"
f" └── {os.path.join(temp_dir, 'tests/test_main.py')}\n"
)
filtered_output = remove_empty_dirs(tree_output, temp_dir)
# Check that empty_dir is removed but other directories remain
assert "empty_dir" not in filtered_output
assert os.path.join(temp_dir, "src") in filtered_output
assert os.path.join(temp_dir, "tests") in filtered_output
assert os.path.join(temp_dir, "src/main.py") in filtered_output
assert os.path.join(temp_dir, "tests/test_main.py") in filtered_output
def test_save_repo_to_text(sample_repo: str) -> None:
"""Test the main save_repo_to_text function."""
# Create output directory
@ -153,7 +130,7 @@ def test_save_repo_to_text(sample_repo: str) -> None:
# Create .git directory to ensure it's properly ignored
os.makedirs(os.path.join(sample_repo, ".git"))
with open(os.path.join(sample_repo, ".git/config"), "w") as f:
with open(os.path.join(sample_repo, ".git/config"), "w", encoding='utf-8') as f:
f.write("[core]\n\trepositoryformatversion = 0\n")
# Test file output
@ -162,7 +139,7 @@ def test_save_repo_to_text(sample_repo: str) -> None:
assert os.path.dirname(output_file) == output_dir
# Check file contents
with open(output_file, 'r') as f:
with open(output_file, 'r', encoding='utf-8') as f:
content = f.read()
# Basic content checks
@ -204,7 +181,9 @@ def test_load_ignore_specs_with_cli_patterns(sample_repo: str) -> None:
def test_load_ignore_specs_without_gitignore(temp_dir: str) -> None:
"""Test loading ignore specs when .gitignore is missing."""
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(temp_dir)
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(
temp_dir
)
assert gitignore_spec is None
assert content_ignore_spec is None
assert tree_and_content_ignore_spec is not None
@ -214,7 +193,7 @@ def test_get_tree_structure_with_special_chars(temp_dir: str) -> None:
# Create files with special characters
special_dir = os.path.join(temp_dir, "special chars")
os.makedirs(special_dir)
with open(os.path.join(special_dir, "file with spaces.txt"), "w") as f:
with open(os.path.join(special_dir, "file with spaces.txt"), "w", encoding='utf-8') as f:
f.write("test")
tree_output = get_tree_structure(temp_dir)
@ -223,7 +202,9 @@ def test_get_tree_structure_with_special_chars(temp_dir: str) -> None:
def test_should_ignore_file_edge_cases(sample_repo: str) -> None:
"""Test edge cases for should_ignore_file function."""
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo)
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(
sample_repo
)
# Test with dot-prefixed paths
assert should_ignore_file(
@ -264,7 +245,7 @@ def test_save_repo_to_text_with_binary_files(temp_dir: str) -> None:
def test_save_repo_to_text_custom_output_dir(temp_dir: str) -> None:
"""Test save_repo_to_text with custom output directory."""
# Create a simple file structure
with open(os.path.join(temp_dir, "test.txt"), "w") as f:
with open(os.path.join(temp_dir, "test.txt"), "w", encoding='utf-8') as f:
f.write("test content")
# Create custom output directory
@ -281,5 +262,44 @@ def test_get_tree_structure_empty_directory(temp_dir: str) -> None:
# Should only contain the directory itself
assert tree_output.strip() == "" or tree_output.strip() == temp_dir
def test_empty_dirs_filtering(tmp_path: str) -> None:
"""Test filtering of empty directories in tree structure generation."""
# Create test directory structure with normalized paths
base_path = os.path.normpath(tmp_path)
src_path = os.path.join(base_path, "src")
empty_dir_path = os.path.join(base_path, "empty_dir")
tests_path = os.path.join(base_path, "tests")
os.makedirs(src_path)
os.makedirs(empty_dir_path)
os.makedirs(tests_path)
# Create some files
with open(os.path.join(src_path, "main.py"), "w", encoding='utf-8') as f:
f.write("print('test')")
with open(os.path.join(tests_path, "test_main.py"), "w", encoding='utf-8') as f:
f.write("def test(): pass")
# Get tree structure directly using the function
tree_output = get_tree_structure(base_path)
# Print debug information
print("\nTree output:")
print(tree_output)
# Basic structure checks for directories with files
assert "src" in tree_output
assert "tests" in tree_output
assert "main.py" in tree_output
assert "test_main.py" in tree_output
# Check that empty directory is not included by checking each line
for line in tree_output.splitlines():
# Skip the root directory line
if base_path in line:
continue
# Check that no line contains 'empty_dir'
assert "empty_dir" not in line, f"Found empty_dir in line: {line}"
if __name__ == "__main__":
pytest.main([__file__])

View file

@ -1,6 +1,10 @@
"""Test the utils module."""
import logging
import pytest
from typing import Generator
import io
import pytest
from repo_to_text.utils.utils import setup_logging
@pytest.fixture(autouse=True)
@ -72,7 +76,8 @@ def test_setup_logging_multiple_calls() -> None:
# Call setup_logging again
setup_logging(debug=True)
assert len(root_logger.handlers) == initial_handler_count, "Should not create duplicate handlers"
assert len(root_logger.handlers) == \
initial_handler_count, "Should not create duplicate handlers"
def test_setup_logging_level_change() -> None:
"""Test changing log levels between setup_logging calls."""
@ -96,11 +101,12 @@ def test_setup_logging_message_format() -> None:
logger = logging.getLogger()
# Create a temporary handler to capture output
import io
log_capture = io.StringIO()
handler = logging.StreamHandler(log_capture)
# Use formatter that includes pathname
handler.setFormatter(logging.Formatter('%(levelname)s %(name)s:%(pathname)s:%(lineno)d %(message)s'))
handler.setFormatter(
logging.Formatter('%(levelname)s %(name)s:%(pathname)s:%(lineno)d %(message)s')
)
logger.addHandler(handler)
# Ensure debug level is set
@ -123,7 +129,6 @@ def test_setup_logging_error_messages() -> None:
logger = logging.getLogger()
# Create a temporary handler to capture output
import io
log_capture = io.StringIO()
handler = logging.StreamHandler(log_capture)
handler.setFormatter(logger.handlers[0].formatter)