From e39e7a889603600da2d062fdafd452961fc3eda7 Mon Sep 17 00:00:00 2001 From: Kirill Markin Date: Mon, 16 Dec 2024 10:24:02 +0100 Subject: [PATCH] strict-typing-and-error-handling --- repo_to_text/cli/cli.py | 37 +++-- repo_to_text/core/core.py | 14 +- repo_to_text/main.py | 304 +----------------------------------- repo_to_text/utils/utils.py | 8 +- tests/test_core.py | 2 +- tests/test_main.py | 190 ---------------------- 6 files changed, 36 insertions(+), 519 deletions(-) delete mode 100644 tests/test_main.py diff --git a/repo_to_text/cli/cli.py b/repo_to_text/cli/cli.py index a988d79..286ed8c 100644 --- a/repo_to_text/cli/cli.py +++ b/repo_to_text/cli/cli.py @@ -2,6 +2,7 @@ import argparse import textwrap import os import logging +import sys from typing import NoReturn from ..utils.utils import setup_logging @@ -51,21 +52,29 @@ def parse_args() -> argparse.Namespace: return parser.parse_args() def main() -> NoReturn: - """Main entry point for the CLI.""" + """Main entry point for the CLI. + + Raises: + SystemExit: Always exits with code 0 on success + """ args = parse_args() setup_logging(debug=args.debug) logging.debug('repo-to-text script started') - if args.create_settings: - create_default_settings_file() - logging.debug('.repo-to-text-settings.yaml file created') - else: - save_repo_to_text( - path=args.input_dir, - output_dir=args.output_dir, - to_stdout=args.stdout, - cli_ignore_patterns=args.ignore_patterns - ) - - logging.debug('repo-to-text script finished') - exit(0) \ No newline at end of file + try: + if args.create_settings: + create_default_settings_file() + logging.debug('.repo-to-text-settings.yaml file created') + else: + save_repo_to_text( + path=args.input_dir, + output_dir=args.output_dir, + to_stdout=args.stdout, + cli_ignore_patterns=args.ignore_patterns + ) + + logging.debug('repo-to-text script finished') + sys.exit(0) + except Exception as e: + logging.error(f'Error occurred: {str(e)}') + sys.exit(1) \ No newline at end of file diff --git a/repo_to_text/core/core.py b/repo_to_text/core/core.py index 5c3ac25..003d262 100644 --- a/repo_to_text/core/core.py +++ b/repo_to_text/core/core.py @@ -3,7 +3,7 @@ import subprocess import logging import yaml from datetime import datetime, timezone -from typing import Tuple, Optional +from typing import Tuple, Optional, List import pathspec from pathspec import PathSpec @@ -33,7 +33,7 @@ def get_tree_structure(path: str = '.', gitignore_spec: Optional[PathSpec] = Non return tree_output logging.debug('Filtering tree output based on .gitignore and ignore-tree-and-content specification') - filtered_lines = [] + filtered_lines: List[str] = [] for line in tree_output.splitlines(): idx = line.find('./') @@ -63,7 +63,7 @@ def get_tree_structure(path: str = '.', gitignore_spec: Optional[PathSpec] = Non logging.debug('Tree structure filtering complete') return filtered_tree_output -def load_ignore_specs(path: str = '.', cli_ignore_patterns: Optional[list] = None) -> Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]: +def load_ignore_specs(path: str = '.', cli_ignore_patterns: Optional[List[str]] = None) -> Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]: """Load ignore specifications from various sources. Args: @@ -75,7 +75,7 @@ def load_ignore_specs(path: str = '.', cli_ignore_patterns: Optional[list] = Non """ gitignore_spec = None content_ignore_spec = None - tree_and_content_ignore_list = [] + tree_and_content_ignore_list: List[str] = [] use_gitignore = True repo_settings_path = os.path.join(path, '.repo-to-text-settings.yaml') @@ -138,7 +138,7 @@ def should_ignore_file(file_path: str, relative_path: str, gitignore_spec: Optio logging.debug(f' Result: {result}') return result -def save_repo_to_text(path: str = '.', output_dir: Optional[str] = None, to_stdout: bool = False, cli_ignore_patterns: Optional[list] = None) -> str: +def save_repo_to_text(path: str = '.', output_dir: Optional[str] = None, to_stdout: bool = False, cli_ignore_patterns: Optional[List[str]] = None) -> str: """Save repository structure and contents to a text file. Args: @@ -164,7 +164,7 @@ def save_repo_to_text(path: str = '.', output_dir: Optional[str] = None, to_stdo os.makedirs(output_dir) output_file = os.path.join(output_dir, output_file) - output_content = [] + output_content: List[str] = [] project_name = os.path.basename(os.path.abspath(path)) output_content.append(f'Directory: {project_name}\n\n') output_content.append('Directory Structure:\n') @@ -212,7 +212,7 @@ def save_repo_to_text(path: str = '.', output_dir: Optional[str] = None, to_stdo import importlib.util if importlib.util.find_spec("pyperclip"): import pyperclip # type: ignore - pyperclip.copy(output_text) + pyperclip.copy(output_text) # type: ignore logging.debug('Repository structure and contents copied to clipboard') else: print("Tip: Install 'pyperclip' package to enable automatic clipboard copying:") diff --git a/repo_to_text/main.py b/repo_to_text/main.py index 9934f0b..f911293 100644 --- a/repo_to_text/main.py +++ b/repo_to_text/main.py @@ -1,306 +1,4 @@ -import os -import subprocess -import shutil -import logging -import argparse -import yaml -from datetime import datetime, timezone -import textwrap - -# Importing the missing pathspec module -import pathspec - -def setup_logging(debug=False): - logging_level = logging.DEBUG if debug else logging.INFO - logging.basicConfig(level=logging_level, format='%(asctime)s - %(levelname)s - %(message)s') - -def check_tree_command(): - """Check if the `tree` command is available, and suggest installation if not.""" - if shutil.which('tree') is None: - print("The 'tree' command is not found. Please install it using one of the following commands:") - print("For Debian-based systems (e.g., Ubuntu): sudo apt-get install tree") - print("For Red Hat-based systems (e.g., Fedora, CentOS): sudo yum install tree") - return False - return True - -def get_tree_structure(path='.', gitignore_spec=None, tree_and_content_ignore_spec=None) -> str: - if not check_tree_command(): - return "" - - logging.debug(f'Generating tree structure for path: {path}') - result = subprocess.run(['tree', '-a', '-f', '--noreport', path], stdout=subprocess.PIPE) - tree_output = result.stdout.decode('utf-8') - logging.debug(f'Tree output generated:\n{tree_output}') - - if not gitignore_spec and not tree_and_content_ignore_spec: - logging.debug('No .gitignore or ignore-tree-and-content specification found') - return tree_output - - logging.debug('Filtering tree output based on .gitignore and ignore-tree-and-content specification') - filtered_lines = [] - - for line in tree_output.splitlines(): - # Find the index where the path starts (look for './' or absolute path) - idx = line.find('./') - if idx == -1: - idx = line.find(path) - if idx != -1: - full_path = line[idx:].strip() - else: - # If neither './' nor the absolute path is found, skip the line - continue - - # Skip the root directory '.' - if full_path == '.': - continue - - # Normalize paths - relative_path = os.path.relpath(full_path, path) - relative_path = relative_path.replace(os.sep, '/') - if os.path.isdir(full_path): - relative_path += '/' - - # Check if the file should be ignored - if not should_ignore_file(full_path, relative_path, gitignore_spec, None, tree_and_content_ignore_spec): - # Remove './' from display output for clarity - display_line = line.replace('./', '', 1) - filtered_lines.append(display_line) - else: - logging.debug(f'Ignored: {relative_path}') - - filtered_tree_output = '\n'.join(filtered_lines) - logging.debug(f'Filtered tree structure:\n{filtered_tree_output}') - logging.debug('Tree structure filtering complete') - return filtered_tree_output - -def load_ignore_specs(path='.', cli_ignore_patterns=None): - gitignore_spec = None - content_ignore_spec = None - tree_and_content_ignore_list = [] - use_gitignore = True - - repo_settings_path = os.path.join(path, '.repo-to-text-settings.yaml') - if os.path.exists(repo_settings_path): - logging.debug(f'Loading .repo-to-text-settings.yaml from path: {repo_settings_path}') - with open(repo_settings_path, 'r') as f: - settings = yaml.safe_load(f) - use_gitignore = settings.get('gitignore-import-and-ignore', True) - if 'ignore-content' in settings: - content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', settings['ignore-content']) - if 'ignore-tree-and-content' in settings: - tree_and_content_ignore_list.extend(settings['ignore-tree-and-content']) - - if cli_ignore_patterns: - tree_and_content_ignore_list.extend(cli_ignore_patterns) - - if use_gitignore: - gitignore_path = os.path.join(path, '.gitignore') - if os.path.exists(gitignore_path): - logging.debug(f'Loading .gitignore from path: {gitignore_path}') - with open(gitignore_path, 'r') as f: - gitignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', f) - - tree_and_content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', tree_and_content_ignore_list) - return gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec - -def should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec): - # Normalize relative_path to use forward slashes - relative_path = relative_path.replace(os.sep, '/') - - # Remove leading './' if present - if relative_path.startswith('./'): - relative_path = relative_path[2:] - - # Append '/' to directories to match patterns ending with '/' - if os.path.isdir(file_path): - relative_path += '/' - - result = ( - is_ignored_path(file_path) or - (gitignore_spec and gitignore_spec.match_file(relative_path)) or - (content_ignore_spec and content_ignore_spec.match_file(relative_path)) or - (tree_and_content_ignore_spec and tree_and_content_ignore_spec.match_file(relative_path)) or - os.path.basename(file_path).startswith('repo-to-text_') - ) - - logging.debug(f'Checking if file should be ignored:') - logging.debug(f' file_path: {file_path}') - logging.debug(f' relative_path: {relative_path}') - logging.debug(f' Result: {result}') - return result - -def is_ignored_path(file_path: str) -> bool: - ignored_dirs = ['.git'] - ignored_files_prefix = ['repo-to-text_'] - is_ignored_dir = any(ignored in file_path for ignored in ignored_dirs) - is_ignored_file = any(file_path.startswith(prefix) for prefix in ignored_files_prefix) - result = is_ignored_dir or is_ignored_file - if result: - logging.debug(f'Path ignored: {file_path}') - return result - -def remove_empty_dirs(tree_output: str, path='.') -> str: - logging.debug('Removing empty directories from tree output') - lines = tree_output.splitlines() - non_empty_dirs = set() - filtered_lines = [] - - for line in lines: - parts = line.strip().split() - if parts: - full_path = parts[-1] - if os.path.isdir(full_path) and not any(os.path.isfile(os.path.join(full_path, f)) for f in os.listdir(full_path)): - logging.debug(f'Directory is empty and will be removed: {full_path}') - continue - non_empty_dirs.add(os.path.dirname(full_path)) - filtered_lines.append(line) - - final_lines = [] - for line in filtered_lines: - parts = line.strip().split() - if parts: - full_path = parts[-1] - if os.path.isdir(full_path) and full_path not in non_empty_dirs: - logging.debug(f'Directory is empty and will be removed: {full_path}') - continue - final_lines.append(line) - - logging.debug('Empty directory removal complete') - return '\n'.join(filtered_lines) - -def save_repo_to_text(path='.', output_dir=None, to_stdout=False, cli_ignore_patterns=None) -> str: - logging.debug(f'Starting to save repo structure to text for path: {path}') - gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(path, cli_ignore_patterns) - tree_structure = get_tree_structure(path, gitignore_spec, tree_and_content_ignore_spec) - tree_structure = remove_empty_dirs(tree_structure, path) - logging.debug(f'Final tree structure to be written: {tree_structure}') - - # Add timestamp to the output file name with a descriptive name - timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-UTC') - output_file = f'repo-to-text_{timestamp}.txt' - - # Determine the full path to the output file - if output_dir: - if not os.path.exists(output_dir): - os.makedirs(output_dir) - output_file = os.path.join(output_dir, output_file) - - output_content = [] - project_name = os.path.basename(os.path.abspath(path)) - output_content.append(f'Directory: {project_name}\n\n') - output_content.append('Directory Structure:\n') - output_content.append('```\n.\n') - - # Insert .gitignore if it exists - if os.path.exists(os.path.join(path, '.gitignore')): - output_content.append('├── .gitignore\n') - - output_content.append(tree_structure + '\n' + '```\n') - logging.debug('Tree structure written to output content') - - for root, _, files in os.walk(path): - for filename in files: - file_path = os.path.join(root, filename) - relative_path = os.path.relpath(file_path, path) - - if should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec): - continue - - relative_path = relative_path.replace('./', '', 1) - - output_content.append(f'\nContents of {relative_path}:\n') - output_content.append('```\n') - try: - with open(file_path, 'r', encoding='utf-8') as f: - output_content.append(f.read()) - except UnicodeDecodeError: - logging.debug(f'Could not decode file contents: {file_path}') - output_content.append('[Could not decode file contents]\n') - output_content.append('\n```\n') - - output_content.append('\n') - logging.debug('Repository contents written to output content') - - output_text = ''.join(output_content) - - if to_stdout: - print(output_text) - return output_text - - with open(output_file, 'w') as file: - file.write(output_text) - - # Try to copy to clipboard if pyperclip is installed - try: - import importlib.util - if importlib.util.find_spec("pyperclip"): - # Import pyperclip only if it's available - import pyperclip # type: ignore - pyperclip.copy(output_text) - logging.debug('Repository structure and contents copied to clipboard') - else: - print("Tip: Install 'pyperclip' package to enable automatic clipboard copying:") - print(" pip install pyperclip") - except Exception as e: - logging.warning('Could not copy to clipboard. You might be running this script over SSH or without clipboard support.') - logging.debug(f'Clipboard copy error: {e}') - - print(f"[SUCCESS] Repository structure and contents successfully saved to file: \"./{output_file}\"") - - return output_file - -def create_default_settings_file(): - settings_file = '.repo-to-text-settings.yaml' - if os.path.exists(settings_file): - raise FileExistsError(f"The settings file '{settings_file}' already exists. Please remove it or rename it if you want to create a new default settings file.") - - default_settings = textwrap.dedent("""\ - # Details: https://github.com/kirill-markin/repo-to-text - # Syntax: gitignore rules - - # Ignore files and directories for all sections from gitignore file - # Default: True - gitignore-import-and-ignore: True - - # Ignore files and directories for tree - # and "Contents of ..." sections - ignore-tree-and-content: - - ".repo-to-text-settings.yaml" - - # Ignore files and directories for "Contents of ..." section - ignore-content: - - "README.md" - - "LICENSE" - """) - with open('.repo-to-text-settings.yaml', 'w') as f: - f.write(default_settings) - print("Default .repo-to-text-settings.yaml created.") - -def main(): - parser = argparse.ArgumentParser(description='Convert repository structure and contents to text') - parser.add_argument('input_dir', nargs='?', default='.', help='Directory to process') - parser.add_argument('--debug', action='store_true', help='Enable debug logging') - parser.add_argument('--output-dir', type=str, help='Directory to save the output file') - parser.add_argument('--create-settings', '--init', action='store_true', help='Create default .repo-to-text-settings.yaml file') - parser.add_argument('--stdout', action='store_true', help='Output to stdout instead of a file') - parser.add_argument('--ignore-patterns', nargs='*', help="List of files or directories to ignore in both tree and content sections. Supports wildcards (e.g., '*').") - args = parser.parse_args() - - setup_logging(debug=args.debug) - logging.debug('repo-to-text script started') - - if args.create_settings: - create_default_settings_file() - logging.debug('.repo-to-text-settings.yaml file created') - else: - save_repo_to_text( - path=args.input_dir, - output_dir=args.output_dir, - to_stdout=args.stdout, - cli_ignore_patterns=args.ignore_patterns - ) - - logging.debug('repo-to-text script finished') +from repo_to_text.cli.cli import main if __name__ == '__main__': main() diff --git a/repo_to_text/utils/utils.py b/repo_to_text/utils/utils.py index b2d663a..ea374a4 100644 --- a/repo_to_text/utils/utils.py +++ b/repo_to_text/utils/utils.py @@ -1,7 +1,7 @@ import os import shutil import logging -from typing import List +from typing import List, Set def setup_logging(debug: bool = False) -> None: """Set up logging configuration. @@ -55,8 +55,8 @@ def remove_empty_dirs(tree_output: str, path: str = '.') -> str: """ logging.debug('Removing empty directories from tree output') lines = tree_output.splitlines() - non_empty_dirs = set() - filtered_lines = [] + non_empty_dirs: Set[str] = set() + filtered_lines: List[str] = [] for line in lines: parts = line.strip().split() @@ -68,7 +68,7 @@ def remove_empty_dirs(tree_output: str, path: str = '.') -> str: non_empty_dirs.add(os.path.dirname(full_path)) filtered_lines.append(line) - final_lines = [] + final_lines: List[str] = [] for line in filtered_lines: parts = line.strip().split() if parts: diff --git a/tests/test_core.py b/tests/test_core.py index 3e5cf9e..aa05e3d 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -196,7 +196,7 @@ def test_save_repo_to_text_stdout(sample_repo: str) -> None: def test_load_ignore_specs_with_cli_patterns(sample_repo: str) -> None: """Test loading ignore specs with CLI patterns.""" cli_patterns = ["*.log", "temp/"] - gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo, cli_patterns) + _, _, tree_and_content_ignore_spec = load_ignore_specs(sample_repo, cli_patterns) assert tree_and_content_ignore_spec.match_file("test.log") is True assert tree_and_content_ignore_spec.match_file("temp/file.txt") is True diff --git a/tests/test_main.py b/tests/test_main.py deleted file mode 100644 index cb157ed..0000000 --- a/tests/test_main.py +++ /dev/null @@ -1,190 +0,0 @@ -import os -import tempfile -import shutil -import pytest -from pathlib import Path -from typing import Generator -from repo_to_text.main import ( - get_tree_structure, - load_ignore_specs, - should_ignore_file, - is_ignored_path, - remove_empty_dirs, - save_repo_to_text -) - -@pytest.fixture -def temp_dir() -> Generator[str, None, None]: - """Create a temporary directory for testing.""" - temp_path = tempfile.mkdtemp() - yield temp_path - shutil.rmtree(temp_path) - -@pytest.fixture -def sample_repo(temp_dir: str) -> str: - """Create a sample repository structure for testing.""" - # Create directories - os.makedirs(os.path.join(temp_dir, "src")) - os.makedirs(os.path.join(temp_dir, "tests")) - - # Create sample files - files = { - "README.md": "# Test Project", - ".gitignore": """ -*.pyc -__pycache__/ -.git/ -""", - "src/main.py": "print('Hello World')", - "tests/test_main.py": "def test_sample(): pass", - ".repo-to-text-settings.yaml": """ -gitignore-import-and-ignore: True -ignore-tree-and-content: - - ".git/" - - ".repo-to-text-settings.yaml" -ignore-content: - - "README.md" -""" - } - - for file_path, content in files.items(): - full_path = os.path.join(temp_dir, file_path) - os.makedirs(os.path.dirname(full_path), exist_ok=True) - with open(full_path, "w") as f: - f.write(content) - - return temp_dir - -def test_is_ignored_path() -> None: - """Test the is_ignored_path function.""" - assert is_ignored_path(".git/config") is True - assert is_ignored_path("repo-to-text_output.txt") is True - assert is_ignored_path("src/main.py") is False - assert is_ignored_path("normal_file.txt") is False - -def test_load_ignore_specs(sample_repo: str) -> None: - """Test loading ignore specifications from files.""" - gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo) - - assert gitignore_spec is not None - assert content_ignore_spec is not None - assert tree_and_content_ignore_spec is not None - - # Test gitignore patterns - assert gitignore_spec.match_file("test.pyc") is True - assert gitignore_spec.match_file("__pycache__/cache.py") is True - assert gitignore_spec.match_file(".git/config") is True - - # Test content ignore patterns - assert content_ignore_spec.match_file("README.md") is True - - # Test tree and content ignore patterns - assert tree_and_content_ignore_spec.match_file(".git/config") is True - -def test_should_ignore_file(sample_repo: str) -> None: - """Test file ignoring logic.""" - gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo) - - # Test various file paths - assert should_ignore_file( - ".git/config", - ".git/config", - gitignore_spec, - content_ignore_spec, - tree_and_content_ignore_spec - ) is True - - assert should_ignore_file( - "src/main.py", - "src/main.py", - gitignore_spec, - content_ignore_spec, - tree_and_content_ignore_spec - ) is False - -def test_get_tree_structure(sample_repo: str) -> None: - """Test tree structure generation.""" - gitignore_spec, _, tree_and_content_ignore_spec = load_ignore_specs(sample_repo) - tree_output = get_tree_structure(sample_repo, gitignore_spec, tree_and_content_ignore_spec) - - # Basic structure checks - assert "src" in tree_output - assert "tests" in tree_output - assert "main.py" in tree_output - assert "test_main.py" in tree_output - assert ".git" not in tree_output - -def test_remove_empty_dirs(temp_dir: str) -> None: - """Test removal of empty directories from tree output.""" - # Create test directory structure - os.makedirs(os.path.join(temp_dir, "src")) - os.makedirs(os.path.join(temp_dir, "empty_dir")) - os.makedirs(os.path.join(temp_dir, "tests")) - - # Create some files - with open(os.path.join(temp_dir, "src/main.py"), "w") as f: - f.write("print('test')") - with open(os.path.join(temp_dir, "tests/test_main.py"), "w") as f: - f.write("def test(): pass") - - # Create a mock tree output that matches the actual tree command format - tree_output = ( - f"{temp_dir}\n" - f"├── {os.path.join(temp_dir, 'src')}\n" - f"│ └── {os.path.join(temp_dir, 'src/main.py')}\n" - f"├── {os.path.join(temp_dir, 'empty_dir')}\n" - f"└── {os.path.join(temp_dir, 'tests')}\n" - f" └── {os.path.join(temp_dir, 'tests/test_main.py')}\n" - ) - - filtered_output = remove_empty_dirs(tree_output, temp_dir) - - # Check that empty_dir is removed but other directories remain - assert "empty_dir" not in filtered_output - assert os.path.join(temp_dir, "src") in filtered_output - assert os.path.join(temp_dir, "tests") in filtered_output - assert os.path.join(temp_dir, "src/main.py") in filtered_output - assert os.path.join(temp_dir, "tests/test_main.py") in filtered_output - -def test_save_repo_to_text(sample_repo: str) -> None: - """Test the main save_repo_to_text function.""" - # Create output directory - output_dir = os.path.join(sample_repo, "output") - os.makedirs(output_dir, exist_ok=True) - - # Create .git directory to ensure it's properly ignored - os.makedirs(os.path.join(sample_repo, ".git")) - with open(os.path.join(sample_repo, ".git/config"), "w") as f: - f.write("[core]\n\trepositoryformatversion = 0\n") - - # Test file output - output_file = save_repo_to_text(sample_repo, output_dir=output_dir) - assert os.path.exists(output_file) - assert os.path.dirname(output_file) == output_dir - - # Check file contents - with open(output_file, 'r') as f: - content = f.read() - - # Basic content checks - assert "Directory Structure:" in content - - # Check for expected files - assert "src/main.py" in content - assert "tests/test_main.py" in content - - # Check for file contents - assert "print('Hello World')" in content - assert "def test_sample(): pass" in content - - # Ensure ignored patterns are not in output - assert ".git/config" not in content # Check specific file - assert "repo-to-text_" not in content - assert ".repo-to-text-settings.yaml" not in content - - # Check that .gitignore content is not included - assert "*.pyc" not in content - assert "__pycache__" not in content - -if __name__ == "__main__": - pytest.main([__file__])