linter cleanup

This commit is contained in:
Kirill Markin 2024-12-17 14:41:42 +01:00
parent 0cba3592f2
commit 5f283feefd
No known key found for this signature in database
GPG key ID: 03AB9530E15B9C1C
9 changed files with 295 additions and 191 deletions

View file

@ -1,2 +1,4 @@
"""This is the main package for the repo_to_text package."""
__author__ = 'Kirill Markin' __author__ = 'Kirill Markin'
__email__ = 'markinkirill@gmail.com' __email__ = 'markinkirill@gmail.com'

View file

@ -1,3 +1,5 @@
"""This module contains the CLI interface for the repo_to_text package."""
from .cli import create_default_settings_file, parse_args, main from .cli import create_default_settings_file, parse_args, main
__all__ = ['create_default_settings_file', 'parse_args', 'main'] __all__ = ['create_default_settings_file', 'parse_args', 'main']

View file

@ -1,3 +1,7 @@
"""
CLI for repo-to-text
"""
import argparse import argparse
import textwrap import textwrap
import os import os
@ -12,8 +16,11 @@ def create_default_settings_file() -> None:
"""Create a default .repo-to-text-settings.yaml file.""" """Create a default .repo-to-text-settings.yaml file."""
settings_file = '.repo-to-text-settings.yaml' settings_file = '.repo-to-text-settings.yaml'
if os.path.exists(settings_file): if os.path.exists(settings_file):
raise FileExistsError(f"The settings file '{settings_file}' already exists. Please remove it or rename it if you want to create a new default settings file.") raise FileExistsError(
f"The settings file '{settings_file}' already exists. "
"Please remove it or rename it if you want to create a new default settings file."
)
default_settings = textwrap.dedent("""\ default_settings = textwrap.dedent("""\
# Details: https://github.com/kirill-markin/repo-to-text # Details: https://github.com/kirill-markin/repo-to-text
# Syntax: gitignore rules # Syntax: gitignore rules
@ -32,7 +39,7 @@ def create_default_settings_file() -> None:
- "README.md" - "README.md"
- "LICENSE" - "LICENSE"
""") """)
with open('.repo-to-text-settings.yaml', 'w') as f: with open('.repo-to-text-settings.yaml', 'w', encoding='utf-8') as f:
f.write(default_settings) f.write(default_settings)
print("Default .repo-to-text-settings.yaml created.") print("Default .repo-to-text-settings.yaml created.")
@ -42,13 +49,25 @@ def parse_args() -> argparse.Namespace:
Returns: Returns:
argparse.Namespace: Parsed command line arguments argparse.Namespace: Parsed command line arguments
""" """
parser = argparse.ArgumentParser(description='Convert repository structure and contents to text') parser = argparse.ArgumentParser(
description='Convert repository structure and contents to text'
)
parser.add_argument('input_dir', nargs='?', default='.', help='Directory to process') parser.add_argument('input_dir', nargs='?', default='.', help='Directory to process')
parser.add_argument('--debug', action='store_true', help='Enable debug logging') parser.add_argument('--debug', action='store_true', help='Enable debug logging')
parser.add_argument('--output-dir', type=str, help='Directory to save the output file') parser.add_argument('--output-dir', type=str, help='Directory to save the output file')
parser.add_argument('--create-settings', '--init', action='store_true', help='Create default .repo-to-text-settings.yaml file') parser.add_argument(
'--create-settings',
'--init',
action='store_true',
help='Create default .repo-to-text-settings.yaml file'
)
parser.add_argument('--stdout', action='store_true', help='Output to stdout instead of a file') parser.add_argument('--stdout', action='store_true', help='Output to stdout instead of a file')
parser.add_argument('--ignore-patterns', nargs='*', help="List of files or directories to ignore in both tree and content sections. Supports wildcards (e.g., '*').") parser.add_argument(
'--ignore-patterns',
nargs='*',
help="List of files or directories to ignore in both tree and content sections. "
"Supports wildcards (e.g., '*')."
)
return parser.parse_args() return parser.parse_args()
def main() -> NoReturn: def main() -> NoReturn:
@ -60,7 +79,7 @@ def main() -> NoReturn:
args = parse_args() args = parse_args()
setup_logging(debug=args.debug) setup_logging(debug=args.debug)
logging.debug('repo-to-text script started') logging.debug('repo-to-text script started')
try: try:
if args.create_settings: if args.create_settings:
create_default_settings_file() create_default_settings_file()
@ -72,9 +91,9 @@ def main() -> NoReturn:
to_stdout=args.stdout, to_stdout=args.stdout,
cli_ignore_patterns=args.ignore_patterns cli_ignore_patterns=args.ignore_patterns
) )
logging.debug('repo-to-text script finished') logging.debug('repo-to-text script finished')
sys.exit(0) sys.exit(0)
except Exception as e: except (FileNotFoundError, FileExistsError, PermissionError, OSError) as e:
logging.error(f'Error occurred: {str(e)}') logging.error('Error occurred: %s', str(e))
sys.exit(1) sys.exit(1)

View file

@ -1,15 +1,23 @@
"""
Core functionality for repo-to-text
"""
import os import os
import subprocess import subprocess
from typing import Tuple, Optional, List, Dict, Any
from datetime import datetime, timezone
import logging import logging
import yaml import yaml
from datetime import datetime, timezone
from typing import Tuple, Optional, List
import pathspec import pathspec
from pathspec import PathSpec from pathspec import PathSpec
from ..utils.utils import check_tree_command, is_ignored_path, remove_empty_dirs from ..utils.utils import check_tree_command, is_ignored_path, remove_empty_dirs
def get_tree_structure(path: str = '.', gitignore_spec: Optional[PathSpec] = None, tree_and_content_ignore_spec: Optional[PathSpec] = None) -> str: def get_tree_structure(
path: str = '.',
gitignore_spec: Optional[PathSpec] = None,
tree_and_content_ignore_spec: Optional[PathSpec] = None
) -> str:
"""Generate tree structure of the directory. """Generate tree structure of the directory.
Args: Args:
@ -22,17 +30,23 @@ def get_tree_structure(path: str = '.', gitignore_spec: Optional[PathSpec] = Non
""" """
if not check_tree_command(): if not check_tree_command():
return "" return ""
logging.debug(f'Generating tree structure for path: {path}') logging.debug('Generating tree structure for path: %s', path)
result = subprocess.run(['tree', '-a', '-f', '--noreport', path], stdout=subprocess.PIPE) result = subprocess.run(
['tree', '-a', '-f', '--noreport', path],
stdout=subprocess.PIPE,
check=True
)
tree_output = result.stdout.decode('utf-8') tree_output = result.stdout.decode('utf-8')
logging.debug(f'Tree output generated:\n{tree_output}') logging.debug('Tree output generated:\n%s', tree_output)
if not gitignore_spec and not tree_and_content_ignore_spec: if not gitignore_spec and not tree_and_content_ignore_spec:
logging.debug('No .gitignore or ignore-tree-and-content specification found') logging.debug('No .gitignore or ignore-tree-and-content specification found')
return tree_output return tree_output
logging.debug('Filtering tree output based on .gitignore and ignore-tree-and-content specification') logging.debug(
'Filtering tree output based on .gitignore and ignore-tree-and-content specification'
)
filtered_lines: List[str] = [] filtered_lines: List[str] = []
for line in tree_output.splitlines(): for line in tree_output.splitlines():
@ -43,7 +57,7 @@ def get_tree_structure(path: str = '.', gitignore_spec: Optional[PathSpec] = Non
full_path = line[idx:].strip() full_path = line[idx:].strip()
else: else:
continue continue
if full_path == '.': if full_path == '.':
continue continue
@ -52,18 +66,27 @@ def get_tree_structure(path: str = '.', gitignore_spec: Optional[PathSpec] = Non
if os.path.isdir(full_path): if os.path.isdir(full_path):
relative_path += '/' relative_path += '/'
if not should_ignore_file(full_path, relative_path, gitignore_spec, None, tree_and_content_ignore_spec): if not should_ignore_file(
full_path,
relative_path,
gitignore_spec,
None,
tree_and_content_ignore_spec
):
display_line = line.replace('./', '', 1) display_line = line.replace('./', '', 1)
filtered_lines.append(display_line) filtered_lines.append(display_line)
else: else:
logging.debug(f'Ignored: {relative_path}') logging.debug('Ignored: %s', relative_path)
filtered_tree_output = '\n'.join(filtered_lines) filtered_tree_output = '\n'.join(filtered_lines)
logging.debug(f'Filtered tree structure:\n{filtered_tree_output}') logging.debug('Filtered tree structure:\n%s', filtered_tree_output)
logging.debug('Tree structure filtering complete') logging.debug('Tree structure filtering complete')
return filtered_tree_output return filtered_tree_output
def load_ignore_specs(path: str = '.', cli_ignore_patterns: Optional[List[str]] = None) -> Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]: def load_ignore_specs(
path: str = '.',
cli_ignore_patterns: Optional[List[str]] = None
) -> Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]:
"""Load ignore specifications from various sources. """Load ignore specifications from various sources.
Args: Args:
@ -71,7 +94,8 @@ def load_ignore_specs(path: str = '.', cli_ignore_patterns: Optional[List[str]]
cli_ignore_patterns: List of patterns from command line cli_ignore_patterns: List of patterns from command line
Returns: Returns:
Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]: Tuple of gitignore_spec, content_ignore_spec, and tree_and_content_ignore_spec Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]: Tuple of gitignore_spec,
content_ignore_spec, and tree_and_content_ignore_spec
""" """
gitignore_spec = None gitignore_spec = None
content_ignore_spec = None content_ignore_spec = None
@ -80,14 +104,16 @@ def load_ignore_specs(path: str = '.', cli_ignore_patterns: Optional[List[str]]
repo_settings_path = os.path.join(path, '.repo-to-text-settings.yaml') repo_settings_path = os.path.join(path, '.repo-to-text-settings.yaml')
if os.path.exists(repo_settings_path): if os.path.exists(repo_settings_path):
logging.debug(f'Loading .repo-to-text-settings.yaml from path: {repo_settings_path}') logging.debug('Loading .repo-to-text-settings.yaml from path: %s', repo_settings_path)
with open(repo_settings_path, 'r') as f: with open(repo_settings_path, 'r', encoding='utf-8') as f:
settings = yaml.safe_load(f) settings: Dict[str, Any] = yaml.safe_load(f)
use_gitignore = settings.get('gitignore-import-and-ignore', True) use_gitignore = settings.get('gitignore-import-and-ignore', True)
if 'ignore-content' in settings: if 'ignore-content' in settings:
content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', settings['ignore-content']) content_ignore_spec: Optional[PathSpec] = pathspec.PathSpec.from_lines(
'gitwildmatch', settings['ignore-content']
)
if 'ignore-tree-and-content' in settings: if 'ignore-tree-and-content' in settings:
tree_and_content_ignore_list.extend(settings['ignore-tree-and-content']) tree_and_content_ignore_list.extend(settings.get('ignore-tree-and-content', []))
if cli_ignore_patterns: if cli_ignore_patterns:
tree_and_content_ignore_list.extend(cli_ignore_patterns) tree_and_content_ignore_list.extend(cli_ignore_patterns)
@ -95,15 +121,22 @@ def load_ignore_specs(path: str = '.', cli_ignore_patterns: Optional[List[str]]
if use_gitignore: if use_gitignore:
gitignore_path = os.path.join(path, '.gitignore') gitignore_path = os.path.join(path, '.gitignore')
if os.path.exists(gitignore_path): if os.path.exists(gitignore_path):
logging.debug(f'Loading .gitignore from path: {gitignore_path}') logging.debug('Loading .gitignore from path: %s', gitignore_path)
with open(gitignore_path, 'r') as f: with open(gitignore_path, 'r', encoding='utf-8') as f:
gitignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', f) gitignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', f)
tree_and_content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', tree_and_content_ignore_list) tree_and_content_ignore_spec = pathspec.PathSpec.from_lines(
'gitwildmatch', tree_and_content_ignore_list
)
return gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec return gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec
def should_ignore_file(file_path: str, relative_path: str, gitignore_spec: Optional[PathSpec], def should_ignore_file(
content_ignore_spec: Optional[PathSpec], tree_and_content_ignore_spec: Optional[PathSpec]) -> bool: file_path: str,
relative_path: str,
gitignore_spec: Optional[PathSpec],
content_ignore_spec: Optional[PathSpec],
tree_and_content_ignore_spec: Optional[PathSpec]
) -> bool:
"""Check if a file should be ignored based on various ignore specifications. """Check if a file should be ignored based on various ignore specifications.
Args: Args:
@ -126,19 +159,33 @@ def should_ignore_file(file_path: str, relative_path: str, gitignore_spec: Optio
result = ( result = (
is_ignored_path(file_path) or is_ignored_path(file_path) or
bool(gitignore_spec and gitignore_spec.match_file(relative_path)) or bool(
bool(content_ignore_spec and content_ignore_spec.match_file(relative_path)) or gitignore_spec and
bool(tree_and_content_ignore_spec and tree_and_content_ignore_spec.match_file(relative_path)) or gitignore_spec.match_file(relative_path)
) or
bool(
content_ignore_spec and
content_ignore_spec.match_file(relative_path)
) or
bool(
tree_and_content_ignore_spec and
tree_and_content_ignore_spec.match_file(relative_path)
) or
os.path.basename(file_path).startswith('repo-to-text_') os.path.basename(file_path).startswith('repo-to-text_')
) )
logging.debug(f'Checking if file should be ignored:') logging.debug('Checking if file should be ignored:')
logging.debug(f' file_path: {file_path}') logging.debug(' file_path: %s', file_path)
logging.debug(f' relative_path: {relative_path}') logging.debug(' relative_path: %s', relative_path)
logging.debug(f' Result: {result}') logging.debug(' Result: %s', result)
return result return result
def save_repo_to_text(path: str = '.', output_dir: Optional[str] = None, to_stdout: bool = False, cli_ignore_patterns: Optional[List[str]] = None) -> str: def save_repo_to_text(
path: str = '.',
output_dir: Optional[str] = None,
to_stdout: bool = False,
cli_ignore_patterns: Optional[List[str]] = None
) -> str:
"""Save repository structure and contents to a text file. """Save repository structure and contents to a text file.
Args: Args:
@ -150,20 +197,24 @@ def save_repo_to_text(path: str = '.', output_dir: Optional[str] = None, to_stdo
Returns: Returns:
str: Path to the output file or the output text if to_stdout is True str: Path to the output file or the output text if to_stdout is True
""" """
logging.debug(f'Starting to save repo structure to text for path: {path}') logging.debug('Starting to save repo structure to text for path: %s', path)
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(path, cli_ignore_patterns) gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(
tree_structure = get_tree_structure(path, gitignore_spec, tree_and_content_ignore_spec) path, cli_ignore_patterns
tree_structure = remove_empty_dirs(tree_structure, path) )
logging.debug(f'Final tree structure to be written: {tree_structure}') tree_structure: str = get_tree_structure(
path, gitignore_spec, tree_and_content_ignore_spec
)
tree_structure = remove_empty_dirs(tree_structure)
logging.debug('Final tree structure to be written: %s', tree_structure)
timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-UTC') timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-UTC')
output_file = f'repo-to-text_{timestamp}.txt' output_file = f'repo-to-text_{timestamp}.txt'
if output_dir: if output_dir:
if not os.path.exists(output_dir): if not os.path.exists(output_dir):
os.makedirs(output_dir) os.makedirs(output_dir)
output_file = os.path.join(output_dir, output_file) output_file = os.path.join(output_dir, output_file)
output_content: List[str] = [] output_content: List[str] = []
project_name = os.path.basename(os.path.abspath(path)) project_name = os.path.basename(os.path.abspath(path))
output_content.append(f'Directory: {project_name}\n\n') output_content.append(f'Directory: {project_name}\n\n')
@ -172,7 +223,7 @@ def save_repo_to_text(path: str = '.', output_dir: Optional[str] = None, to_stdo
if os.path.exists(os.path.join(path, '.gitignore')): if os.path.exists(os.path.join(path, '.gitignore')):
output_content.append('├── .gitignore\n') output_content.append('├── .gitignore\n')
output_content.append(tree_structure + '\n' + '```\n') output_content.append(tree_structure + '\n' + '```\n')
logging.debug('Tree structure written to output content') logging.debug('Tree structure written to output content')
@ -180,47 +231,59 @@ def save_repo_to_text(path: str = '.', output_dir: Optional[str] = None, to_stdo
for filename in files: for filename in files:
file_path = os.path.join(root, filename) file_path = os.path.join(root, filename)
relative_path = os.path.relpath(file_path, path) relative_path = os.path.relpath(file_path, path)
if should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec): if should_ignore_file(
file_path,
relative_path,
gitignore_spec,
content_ignore_spec,
tree_and_content_ignore_spec
):
continue continue
relative_path = relative_path.replace('./', '', 1) relative_path = relative_path.replace('./', '', 1)
output_content.append(f'\nContents of {relative_path}:\n') output_content.append(f'\nContents of {relative_path}:\n')
output_content.append('```\n') output_content.append('```\n')
try: try:
with open(file_path, 'r', encoding='utf-8') as f: with open(file_path, 'r', encoding='utf-8') as f:
output_content.append(f.read()) output_content.append(f.read())
except UnicodeDecodeError: except UnicodeDecodeError:
logging.debug(f'Could not decode file contents: {file_path}') logging.debug('Could not decode file contents: %s', file_path)
output_content.append('[Could not decode file contents]\n') output_content.append('[Could not decode file contents]\n')
output_content.append('\n```\n') output_content.append('\n```\n')
output_content.append('\n') output_content.append('\n')
logging.debug('Repository contents written to output content') logging.debug('Repository contents written to output content')
output_text = ''.join(output_content) output_text = ''.join(output_content)
if to_stdout: if to_stdout:
print(output_text) print(output_text)
return output_text return output_text
with open(output_file, 'w') as file: with open(output_file, 'w', encoding='utf-8') as file:
file.write(output_text) file.write(output_text)
try: try:
import importlib.util import importlib.util # pylint: disable=import-outside-toplevel
if importlib.util.find_spec("pyperclip"): if importlib.util.find_spec("pyperclip"):
import pyperclip # type: ignore import pyperclip # pylint: disable=import-outside-toplevel # type: ignore
pyperclip.copy(output_text) # type: ignore pyperclip.copy(output_text) # type: ignore
logging.debug('Repository structure and contents copied to clipboard') logging.debug('Repository structure and contents copied to clipboard')
else: else:
print("Tip: Install 'pyperclip' package to enable automatic clipboard copying:") print("Tip: Install 'pyperclip' package to enable automatic clipboard copying:")
print(" pip install pyperclip") print(" pip install pyperclip")
except Exception as e: except (ImportError) as e:
logging.warning('Could not copy to clipboard. You might be running this script over SSH or without clipboard support.') logging.warning(
logging.debug(f'Clipboard copy error: {e}') 'Could not copy to clipboard. You might be running this '
'script over SSH or without clipboard support.'
print(f"[SUCCESS] Repository structure and contents successfully saved to file: \"./{output_file}\"") )
logging.debug('Clipboard copy error: %s', e)
return output_file
print(
"[SUCCESS] Repository structure and contents successfully saved to "
f"file: \"./{output_file}\""
)
return output_file

View file

@ -1,3 +1,5 @@
"""This is the main entry point for the repo_to_text package."""
from repo_to_text.cli.cli import main from repo_to_text.cli.cli import main
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -1,3 +1,5 @@
"""This module contains utility functions for the repo_to_text package."""
import os import os
import shutil import shutil
import logging import logging
@ -19,7 +21,10 @@ def check_tree_command() -> bool:
bool: True if tree command is available, False otherwise bool: True if tree command is available, False otherwise
""" """
if shutil.which('tree') is None: if shutil.which('tree') is None:
print("The 'tree' command is not found. Please install it using one of the following commands:") print(
"The 'tree' command is not found. "
+ "Please install it using one of the following commands:"
)
print("For Debian-based systems (e.g., Ubuntu): sudo apt-get install tree") print("For Debian-based systems (e.g., Ubuntu): sudo apt-get install tree")
print("For Red Hat-based systems (e.g., Fedora, CentOS): sudo yum install tree") print("For Red Hat-based systems (e.g., Fedora, CentOS): sudo yum install tree")
return False return False
@ -40,43 +45,38 @@ def is_ignored_path(file_path: str) -> bool:
is_ignored_file = any(file_path.startswith(prefix) for prefix in ignored_files_prefix) is_ignored_file = any(file_path.startswith(prefix) for prefix in ignored_files_prefix)
result = is_ignored_dir or is_ignored_file result = is_ignored_dir or is_ignored_file
if result: if result:
logging.debug(f'Path ignored: {file_path}') logging.debug('Path ignored: %s', file_path)
return result return result
def remove_empty_dirs(tree_output: str, path: str = '.') -> str: def remove_empty_dirs(tree_output: str) -> str:
"""Remove empty directories from tree output. """Remove empty directories from tree output."""
Args:
tree_output: Output from tree command
path: Base path for the tree
Returns:
str: Tree output with empty directories removed
"""
logging.debug('Removing empty directories from tree output') logging.debug('Removing empty directories from tree output')
lines = tree_output.splitlines() lines = tree_output.splitlines()
non_empty_dirs: Set[str] = set()
filtered_lines: List[str] = [] filtered_lines: List[str] = []
# Track directories that have files or subdirectories
non_empty_dirs: Set[str] = set()
# First pass: identify non-empty directories
for line in reversed(lines):
stripped_line = line.strip()
if not stripped_line.endswith('/'):
# This is a file, mark its parent directory as non-empty
parent_dir: str = os.path.dirname(stripped_line)
while parent_dir:
non_empty_dirs.add(parent_dir)
parent_dir = os.path.dirname(parent_dir)
# Second pass: filter out empty directories
for line in lines: for line in lines:
parts = line.strip().split() stripped_line = line.strip()
if parts: if stripped_line.endswith('/'):
full_path = parts[-1] # This is a directory
if os.path.isdir(full_path) and not any(os.path.isfile(os.path.join(full_path, f)) for f in os.listdir(full_path)): dir_path = stripped_line[:-1] # Remove trailing slash
logging.debug(f'Directory is empty and will be removed: {full_path}') if dir_path not in non_empty_dirs:
logging.debug('Directory is empty and will be removed: %s', dir_path)
continue continue
non_empty_dirs.add(os.path.dirname(full_path)) filtered_lines.append(line)
filtered_lines.append(line)
final_lines: List[str] = []
for line in filtered_lines:
parts = line.strip().split()
if parts:
full_path = parts[-1]
if os.path.isdir(full_path) and full_path not in non_empty_dirs:
logging.debug(f'Directory is empty and will be removed: {full_path}')
continue
final_lines.append(line)
logging.debug('Empty directory removal complete') logging.debug('Empty directory removal complete')
return '\n'.join(filtered_lines) return '\n'.join(filtered_lines)

View file

@ -1,9 +1,11 @@
"""Test the CLI module."""
import os import os
import pytest
import tempfile import tempfile
import shutil import shutil
from typing import Generator from typing import Generator
from unittest.mock import patch, MagicMock from unittest.mock import patch, MagicMock
import pytest
from repo_to_text.cli.cli import ( from repo_to_text.cli.cli import (
create_default_settings_file, create_default_settings_file,
parse_args, parse_args,
@ -48,11 +50,11 @@ def test_create_default_settings_file(temp_dir: str) -> None:
"""Test creation of default settings file.""" """Test creation of default settings file."""
os.chdir(temp_dir) os.chdir(temp_dir)
create_default_settings_file() create_default_settings_file()
settings_file = '.repo-to-text-settings.yaml' settings_file = '.repo-to-text-settings.yaml'
assert os.path.exists(settings_file) assert os.path.exists(settings_file)
with open(settings_file, 'r') as f: with open(settings_file, 'r', encoding='utf-8') as f:
content = f.read() content = f.read()
assert 'gitignore-import-and-ignore: True' in content assert 'gitignore-import-and-ignore: True' in content
assert 'ignore-tree-and-content:' in content assert 'ignore-tree-and-content:' in content
@ -63,7 +65,7 @@ def test_create_default_settings_file_already_exists(temp_dir: str) -> None:
os.chdir(temp_dir) os.chdir(temp_dir)
# Create the file first # Create the file first
create_default_settings_file() create_default_settings_file()
# Try to create it again # Try to create it again
with pytest.raises(FileExistsError) as exc_info: with pytest.raises(FileExistsError) as exc_info:
create_default_settings_file() create_default_settings_file()
@ -94,7 +96,10 @@ def test_main_create_settings(mock_create_settings: MagicMock) -> None:
@patch('repo_to_text.cli.cli.setup_logging') @patch('repo_to_text.cli.cli.setup_logging')
@patch('repo_to_text.cli.cli.create_default_settings_file') @patch('repo_to_text.cli.cli.create_default_settings_file')
def test_main_with_debug_logging(mock_create_settings: MagicMock, mock_setup_logging: MagicMock) -> None: def test_main_with_debug_logging(
mock_create_settings: MagicMock,
mock_setup_logging: MagicMock
) -> None:
"""Test main function with debug logging enabled.""" """Test main function with debug logging enabled."""
with patch('sys.argv', ['repo-to-text', '--debug', '--create-settings']): with patch('sys.argv', ['repo-to-text', '--debug', '--create-settings']):
with pytest.raises(SystemExit) as exc_info: with pytest.raises(SystemExit) as exc_info:
@ -104,4 +109,4 @@ def test_main_with_debug_logging(mock_create_settings: MagicMock, mock_setup_log
mock_create_settings.assert_called_once() mock_create_settings.assert_called_once()
if __name__ == "__main__": if __name__ == "__main__":
pytest.main([__file__]) pytest.main([__file__])

View file

@ -1,8 +1,11 @@
"""Test the core module."""
import os import os
import tempfile import tempfile
import shutil import shutil
import pytest
from typing import Generator from typing import Generator
import pytest
from repo_to_text.core.core import ( from repo_to_text.core.core import (
get_tree_structure, get_tree_structure,
load_ignore_specs, load_ignore_specs,
@ -20,12 +23,13 @@ def temp_dir() -> Generator[str, None, None]:
shutil.rmtree(temp_path) shutil.rmtree(temp_path)
@pytest.fixture @pytest.fixture
def sample_repo(temp_dir: str) -> str: def sample_repo(tmp_path: str) -> str:
"""Create a sample repository structure for testing.""" """Create a sample repository structure for testing."""
tmp_path_str = str(tmp_path)
# Create directories # Create directories
os.makedirs(os.path.join(temp_dir, "src")) os.makedirs(os.path.join(tmp_path_str, "src"))
os.makedirs(os.path.join(temp_dir, "tests")) os.makedirs(os.path.join(tmp_path_str, "tests"))
# Create sample files # Create sample files
files = { files = {
"README.md": "# Test Project", "README.md": "# Test Project",
@ -45,14 +49,14 @@ ignore-content:
- "README.md" - "README.md"
""" """
} }
for file_path, content in files.items(): for file_path, content in files.items():
full_path = os.path.join(temp_dir, file_path) full_path = os.path.join(tmp_path_str, file_path)
os.makedirs(os.path.dirname(full_path), exist_ok=True) os.makedirs(os.path.dirname(full_path), exist_ok=True)
with open(full_path, "w") as f: with open(full_path, "w", encoding='utf-8') as f:
f.write(content) f.write(content)
return temp_dir return tmp_path_str
def test_is_ignored_path() -> None: def test_is_ignored_path() -> None:
"""Test the is_ignored_path function.""" """Test the is_ignored_path function."""
@ -64,26 +68,26 @@ def test_is_ignored_path() -> None:
def test_load_ignore_specs(sample_repo: str) -> None: def test_load_ignore_specs(sample_repo: str) -> None:
"""Test loading ignore specifications from files.""" """Test loading ignore specifications from files."""
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo) gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo)
assert gitignore_spec is not None assert gitignore_spec is not None
assert content_ignore_spec is not None assert content_ignore_spec is not None
assert tree_and_content_ignore_spec is not None assert tree_and_content_ignore_spec is not None
# Test gitignore patterns # Test gitignore patterns
assert gitignore_spec.match_file("test.pyc") is True assert gitignore_spec.match_file("test.pyc") is True
assert gitignore_spec.match_file("__pycache__/cache.py") is True assert gitignore_spec.match_file("__pycache__/cache.py") is True
assert gitignore_spec.match_file(".git/config") is True assert gitignore_spec.match_file(".git/config") is True
# Test content ignore patterns # Test content ignore patterns
assert content_ignore_spec.match_file("README.md") is True assert content_ignore_spec.match_file("README.md") is True
# Test tree and content ignore patterns # Test tree and content ignore patterns
assert tree_and_content_ignore_spec.match_file(".git/config") is True assert tree_and_content_ignore_spec.match_file(".git/config") is True
def test_should_ignore_file(sample_repo: str) -> None: def test_should_ignore_file(sample_repo: str) -> None:
"""Test file ignoring logic.""" """Test file ignoring logic."""
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo) gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo)
# Test various file paths # Test various file paths
assert should_ignore_file( assert should_ignore_file(
".git/config", ".git/config",
@ -92,7 +96,7 @@ def test_should_ignore_file(sample_repo: str) -> None:
content_ignore_spec, content_ignore_spec,
tree_and_content_ignore_spec tree_and_content_ignore_spec
) is True ) is True
assert should_ignore_file( assert should_ignore_file(
"src/main.py", "src/main.py",
"src/main.py", "src/main.py",
@ -105,7 +109,7 @@ def test_get_tree_structure(sample_repo: str) -> None:
"""Test tree structure generation.""" """Test tree structure generation."""
gitignore_spec, _, tree_and_content_ignore_spec = load_ignore_specs(sample_repo) gitignore_spec, _, tree_and_content_ignore_spec = load_ignore_specs(sample_repo)
tree_output = get_tree_structure(sample_repo, gitignore_spec, tree_and_content_ignore_spec) tree_output = get_tree_structure(sample_repo, gitignore_spec, tree_and_content_ignore_spec)
# Basic structure checks # Basic structure checks
assert "src" in tree_output assert "src" in tree_output
assert "tests" in tree_output assert "tests" in tree_output
@ -113,74 +117,74 @@ def test_get_tree_structure(sample_repo: str) -> None:
assert "test_main.py" in tree_output assert "test_main.py" in tree_output
assert ".git" not in tree_output assert ".git" not in tree_output
def test_remove_empty_dirs(temp_dir: str) -> None: def test_remove_empty_dirs(tmp_path: str) -> None:
"""Test removal of empty directories from tree output.""" """Test removal of empty directories from tree output."""
# Create test directory structure # Create test directory structure
os.makedirs(os.path.join(temp_dir, "src")) os.makedirs(os.path.join(tmp_path, "src"))
os.makedirs(os.path.join(temp_dir, "empty_dir")) os.makedirs(os.path.join(tmp_path, "empty_dir"))
os.makedirs(os.path.join(temp_dir, "tests")) os.makedirs(os.path.join(tmp_path, "tests"))
# Create some files # Create some files
with open(os.path.join(temp_dir, "src/main.py"), "w") as f: with open(os.path.join(tmp_path, "src/main.py"), "w", encoding='utf-8') as f:
f.write("print('test')") f.write("print('test')")
with open(os.path.join(temp_dir, "tests/test_main.py"), "w") as f: with open(os.path.join(tmp_path, "tests/test_main.py"), "w", encoding='utf-8') as f:
f.write("def test(): pass") f.write("def test(): pass")
# Create a mock tree output that matches the actual tree command format # Create a mock tree output that matches the actual tree command format
tree_output = ( tree_output = (
f"{temp_dir}\n" f"{tmp_path}\n"
f"├── {os.path.join(temp_dir, 'src')}\n" f"├── {os.path.join(tmp_path, 'src')}\n"
f"│ └── {os.path.join(temp_dir, 'src/main.py')}\n" f"│ └── {os.path.join(tmp_path, 'src/main.py')}\n"
f"├── {os.path.join(temp_dir, 'empty_dir')}\n" f"├── {os.path.join(tmp_path, 'empty_dir')}\n"
f"└── {os.path.join(temp_dir, 'tests')}\n" f"└── {os.path.join(tmp_path, 'tests')}\n"
f" └── {os.path.join(temp_dir, 'tests/test_main.py')}\n" f" └── {os.path.join(tmp_path, 'tests/test_main.py')}\n"
) )
filtered_output = remove_empty_dirs(tree_output, temp_dir) filtered_output = remove_empty_dirs(tree_output)
# Check that empty_dir is removed but other directories remain # Check that empty_dir is removed but other directories remain
assert "empty_dir" not in filtered_output assert "empty_dir" not in filtered_output
assert os.path.join(temp_dir, "src") in filtered_output assert os.path.join(tmp_path, "src") in filtered_output
assert os.path.join(temp_dir, "tests") in filtered_output assert os.path.join(tmp_path, "tests") in filtered_output
assert os.path.join(temp_dir, "src/main.py") in filtered_output assert os.path.join(tmp_path, "src/main.py") in filtered_output
assert os.path.join(temp_dir, "tests/test_main.py") in filtered_output assert os.path.join(tmp_path, "tests/test_main.py") in filtered_output
def test_save_repo_to_text(sample_repo: str) -> None: def test_save_repo_to_text(sample_repo: str) -> None:
"""Test the main save_repo_to_text function.""" """Test the main save_repo_to_text function."""
# Create output directory # Create output directory
output_dir = os.path.join(sample_repo, "output") output_dir = os.path.join(sample_repo, "output")
os.makedirs(output_dir, exist_ok=True) os.makedirs(output_dir, exist_ok=True)
# Create .git directory to ensure it's properly ignored # Create .git directory to ensure it's properly ignored
os.makedirs(os.path.join(sample_repo, ".git")) os.makedirs(os.path.join(sample_repo, ".git"))
with open(os.path.join(sample_repo, ".git/config"), "w") as f: with open(os.path.join(sample_repo, ".git/config"), "w", encoding='utf-8') as f:
f.write("[core]\n\trepositoryformatversion = 0\n") f.write("[core]\n\trepositoryformatversion = 0\n")
# Test file output # Test file output
output_file = save_repo_to_text(sample_repo, output_dir=output_dir) output_file = save_repo_to_text(sample_repo, output_dir=output_dir)
assert os.path.exists(output_file) assert os.path.exists(output_file)
assert os.path.dirname(output_file) == output_dir assert os.path.dirname(output_file) == output_dir
# Check file contents # Check file contents
with open(output_file, 'r') as f: with open(output_file, 'r', encoding='utf-8') as f:
content = f.read() content = f.read()
# Basic content checks # Basic content checks
assert "Directory Structure:" in content assert "Directory Structure:" in content
# Check for expected files # Check for expected files
assert "src/main.py" in content assert "src/main.py" in content
assert "tests/test_main.py" in content assert "tests/test_main.py" in content
# Check for file contents # Check for file contents
assert "print('Hello World')" in content assert "print('Hello World')" in content
assert "def test_sample(): pass" in content assert "def test_sample(): pass" in content
# Ensure ignored patterns are not in output # Ensure ignored patterns are not in output
assert ".git/config" not in content # Check specific file assert ".git/config" not in content # Check specific file
assert "repo-to-text_" not in content assert "repo-to-text_" not in content
assert ".repo-to-text-settings.yaml" not in content assert ".repo-to-text-settings.yaml" not in content
# Check that .gitignore content is not included # Check that .gitignore content is not included
assert "*.pyc" not in content assert "*.pyc" not in content
assert "__pycache__" not in content assert "__pycache__" not in content
@ -197,14 +201,16 @@ def test_load_ignore_specs_with_cli_patterns(sample_repo: str) -> None:
"""Test loading ignore specs with CLI patterns.""" """Test loading ignore specs with CLI patterns."""
cli_patterns = ["*.log", "temp/"] cli_patterns = ["*.log", "temp/"]
_, _, tree_and_content_ignore_spec = load_ignore_specs(sample_repo, cli_patterns) _, _, tree_and_content_ignore_spec = load_ignore_specs(sample_repo, cli_patterns)
assert tree_and_content_ignore_spec.match_file("test.log") is True assert tree_and_content_ignore_spec.match_file("test.log") is True
assert tree_and_content_ignore_spec.match_file("temp/file.txt") is True assert tree_and_content_ignore_spec.match_file("temp/file.txt") is True
assert tree_and_content_ignore_spec.match_file("normal.txt") is False assert tree_and_content_ignore_spec.match_file("normal.txt") is False
def test_load_ignore_specs_without_gitignore(temp_dir: str) -> None: def test_load_ignore_specs_without_gitignore(temp_dir: str) -> None:
"""Test loading ignore specs when .gitignore is missing.""" """Test loading ignore specs when .gitignore is missing."""
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(temp_dir) gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(
temp_dir
)
assert gitignore_spec is None assert gitignore_spec is None
assert content_ignore_spec is None assert content_ignore_spec is None
assert tree_and_content_ignore_spec is not None assert tree_and_content_ignore_spec is not None
@ -214,9 +220,9 @@ def test_get_tree_structure_with_special_chars(temp_dir: str) -> None:
# Create files with special characters # Create files with special characters
special_dir = os.path.join(temp_dir, "special chars") special_dir = os.path.join(temp_dir, "special chars")
os.makedirs(special_dir) os.makedirs(special_dir)
with open(os.path.join(special_dir, "file with spaces.txt"), "w") as f: with open(os.path.join(special_dir, "file with spaces.txt"), "w", encoding='utf-8') as f:
f.write("test") f.write("test")
tree_output = get_tree_structure(temp_dir) tree_output = get_tree_structure(temp_dir)
assert "special chars" in tree_output assert "special chars" in tree_output
assert "file with spaces.txt" in tree_output assert "file with spaces.txt" in tree_output
@ -224,7 +230,7 @@ def test_get_tree_structure_with_special_chars(temp_dir: str) -> None:
def test_should_ignore_file_edge_cases(sample_repo: str) -> None: def test_should_ignore_file_edge_cases(sample_repo: str) -> None:
"""Test edge cases for should_ignore_file function.""" """Test edge cases for should_ignore_file function."""
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo) gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo)
# Test with dot-prefixed paths # Test with dot-prefixed paths
assert should_ignore_file( assert should_ignore_file(
"./src/main.py", "./src/main.py",
@ -233,7 +239,7 @@ def test_should_ignore_file_edge_cases(sample_repo: str) -> None:
content_ignore_spec, content_ignore_spec,
tree_and_content_ignore_spec tree_and_content_ignore_spec
) is False ) is False
# Test with absolute paths # Test with absolute paths
abs_path = os.path.join(sample_repo, "src/main.py") abs_path = os.path.join(sample_repo, "src/main.py")
rel_path = "src/main.py" rel_path = "src/main.py"
@ -252,9 +258,9 @@ def test_save_repo_to_text_with_binary_files(temp_dir: str) -> None:
binary_content = b'\x00\x01\x02\x03' binary_content = b'\x00\x01\x02\x03'
with open(binary_path, "wb") as f: with open(binary_path, "wb") as f:
f.write(binary_content) f.write(binary_content)
output = save_repo_to_text(temp_dir, to_stdout=True) output = save_repo_to_text(temp_dir, to_stdout=True)
# Check that the binary file is listed in the structure # Check that the binary file is listed in the structure
assert "binary.bin" in output assert "binary.bin" in output
# Check that the file content section exists with raw binary content # Check that the file content section exists with raw binary content
@ -264,13 +270,13 @@ def test_save_repo_to_text_with_binary_files(temp_dir: str) -> None:
def test_save_repo_to_text_custom_output_dir(temp_dir: str) -> None: def test_save_repo_to_text_custom_output_dir(temp_dir: str) -> None:
"""Test save_repo_to_text with custom output directory.""" """Test save_repo_to_text with custom output directory."""
# Create a simple file structure # Create a simple file structure
with open(os.path.join(temp_dir, "test.txt"), "w") as f: with open(os.path.join(temp_dir, "test.txt"), "w", encoding='utf-8') as f:
f.write("test content") f.write("test content")
# Create custom output directory # Create custom output directory
output_dir = os.path.join(temp_dir, "custom_output") output_dir = os.path.join(temp_dir, "custom_output")
output_file = save_repo_to_text(temp_dir, output_dir=output_dir) output_file = save_repo_to_text(temp_dir, output_dir=output_dir)
assert os.path.exists(output_file) assert os.path.exists(output_file)
assert os.path.dirname(output_file) == output_dir assert os.path.dirname(output_file) == output_dir
assert output_file.startswith(output_dir) assert output_file.startswith(output_dir)

View file

@ -1,6 +1,10 @@
"""Test the utils module."""
import logging import logging
import pytest
from typing import Generator from typing import Generator
import io
import pytest
from repo_to_text.utils.utils import setup_logging from repo_to_text.utils.utils import setup_logging
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
@ -20,7 +24,7 @@ def test_setup_logging_debug() -> None:
root_logger = logging.getLogger() root_logger = logging.getLogger()
root_logger.handlers.clear() # Clear existing handlers root_logger.handlers.clear() # Clear existing handlers
root_logger.setLevel(logging.WARNING) # Reset to default root_logger.setLevel(logging.WARNING) # Reset to default
setup_logging(debug=True) setup_logging(debug=True)
assert len(root_logger.handlers) > 0 assert len(root_logger.handlers) > 0
assert root_logger.level == logging.DEBUG assert root_logger.level == logging.DEBUG
@ -30,7 +34,7 @@ def test_setup_logging_info() -> None:
root_logger = logging.getLogger() root_logger = logging.getLogger()
root_logger.handlers.clear() # Clear existing handlers root_logger.handlers.clear() # Clear existing handlers
root_logger.setLevel(logging.WARNING) # Reset to default root_logger.setLevel(logging.WARNING) # Reset to default
setup_logging(debug=False) setup_logging(debug=False)
assert len(root_logger.handlers) > 0 assert len(root_logger.handlers) > 0
assert root_logger.level == logging.INFO assert root_logger.level == logging.INFO
@ -40,14 +44,14 @@ def test_setup_logging_formatter() -> None:
setup_logging(debug=True) setup_logging(debug=True)
logger = logging.getLogger() logger = logging.getLogger()
handlers = logger.handlers handlers = logger.handlers
# Check if there's at least one handler # Check if there's at least one handler
assert len(handlers) > 0 assert len(handlers) > 0
# Check formatter # Check formatter
formatter = handlers[0].formatter formatter = handlers[0].formatter
assert formatter is not None assert formatter is not None
# Test format string # Test format string
test_record = logging.LogRecord( test_record = logging.LogRecord(
name='test', name='test',
@ -66,26 +70,27 @@ def test_setup_logging_multiple_calls() -> None:
"""Test that multiple calls to setup_logging don't create duplicate handlers.""" """Test that multiple calls to setup_logging don't create duplicate handlers."""
root_logger = logging.getLogger() root_logger = logging.getLogger()
root_logger.handlers.clear() root_logger.handlers.clear()
setup_logging(debug=True) setup_logging(debug=True)
initial_handler_count = len(root_logger.handlers) initial_handler_count = len(root_logger.handlers)
# Call setup_logging again # Call setup_logging again
setup_logging(debug=True) setup_logging(debug=True)
assert len(root_logger.handlers) == initial_handler_count, "Should not create duplicate handlers" assert len(root_logger.handlers) == \
initial_handler_count, "Should not create duplicate handlers"
def test_setup_logging_level_change() -> None: def test_setup_logging_level_change() -> None:
"""Test changing log levels between setup_logging calls.""" """Test changing log levels between setup_logging calls."""
root_logger = logging.getLogger() root_logger = logging.getLogger()
root_logger.handlers.clear() root_logger.handlers.clear()
# Start with debug # Start with debug
setup_logging(debug=True) setup_logging(debug=True)
assert root_logger.level == logging.DEBUG assert root_logger.level == logging.DEBUG
# Clear handlers before next setup # Clear handlers before next setup
root_logger.handlers.clear() root_logger.handlers.clear()
# Switch to info # Switch to info
setup_logging(debug=False) setup_logging(debug=False)
assert root_logger.level == logging.INFO assert root_logger.level == logging.INFO
@ -94,24 +99,25 @@ def test_setup_logging_message_format() -> None:
"""Test the actual format of logged messages.""" """Test the actual format of logged messages."""
setup_logging(debug=True) setup_logging(debug=True)
logger = logging.getLogger() logger = logging.getLogger()
# Create a temporary handler to capture output # Create a temporary handler to capture output
import io
log_capture = io.StringIO() log_capture = io.StringIO()
handler = logging.StreamHandler(log_capture) handler = logging.StreamHandler(log_capture)
# Use formatter that includes pathname # Use formatter that includes pathname
handler.setFormatter(logging.Formatter('%(levelname)s %(name)s:%(pathname)s:%(lineno)d %(message)s')) handler.setFormatter(
logging.Formatter('%(levelname)s %(name)s:%(pathname)s:%(lineno)d %(message)s')
)
logger.addHandler(handler) logger.addHandler(handler)
# Ensure debug level is set # Ensure debug level is set
logger.setLevel(logging.DEBUG) logger.setLevel(logging.DEBUG)
handler.setLevel(logging.DEBUG) handler.setLevel(logging.DEBUG)
# Log a test message # Log a test message
test_message = "Test log message" test_message = "Test log message"
logger.debug(test_message) logger.debug(test_message)
log_output = log_capture.getvalue() log_output = log_capture.getvalue()
# Verify format components # Verify format components
assert test_message in log_output assert test_message in log_output
assert "DEBUG" in log_output assert "DEBUG" in log_output
@ -121,22 +127,21 @@ def test_setup_logging_error_messages() -> None:
"""Test logging of error messages.""" """Test logging of error messages."""
setup_logging(debug=False) setup_logging(debug=False)
logger = logging.getLogger() logger = logging.getLogger()
# Create a temporary handler to capture output # Create a temporary handler to capture output
import io
log_capture = io.StringIO() log_capture = io.StringIO()
handler = logging.StreamHandler(log_capture) handler = logging.StreamHandler(log_capture)
handler.setFormatter(logger.handlers[0].formatter) handler.setFormatter(logger.handlers[0].formatter)
logger.addHandler(handler) logger.addHandler(handler)
# Log an error message # Log an error message
error_message = "Test error message" error_message = "Test error message"
logger.error(error_message) logger.error(error_message)
log_output = log_capture.getvalue() log_output = log_capture.getvalue()
# Error messages should always be logged regardless of debug setting # Error messages should always be logged regardless of debug setting
assert error_message in log_output assert error_message in log_output
assert "ERROR" in log_output assert "ERROR" in log_output
if __name__ == "__main__": if __name__ == "__main__":
pytest.main([__file__]) pytest.main([__file__])