diff --git a/repo_to_text/core/core.py b/repo_to_text/core/core.py index 9dd8958..2bf6464 100644 --- a/repo_to_text/core/core.py +++ b/repo_to_text/core/core.py @@ -4,14 +4,14 @@ Core functionality for repo-to-text import os import subprocess -from typing import Tuple, Optional, List, Dict, Any +from typing import Tuple, Optional, List, Dict, Any, Set from datetime import datetime, timezone import logging import yaml import pathspec from pathspec import PathSpec -from ..utils.utils import check_tree_command, is_ignored_path, remove_empty_dirs +from ..utils.utils import check_tree_command, is_ignored_path def get_tree_structure( path: str = '.', @@ -26,7 +26,7 @@ def get_tree_structure( tree_and_content_ignore_spec: PathSpec object for tree and content ignore patterns Returns: - str: Generated tree structure + str: Generated tree structure with empty directories and ignored files removed """ if not check_tree_command(): return "" @@ -47,9 +47,14 @@ def get_tree_structure( logging.debug( 'Filtering tree output based on .gitignore and ignore-tree-and-content specification' ) - filtered_lines: List[str] = [] + lines: List[str] = tree_output.splitlines() + non_empty_dirs: Set[str] = set() + current_path: List[str] = [] + + for line in lines: + indent_level = len(line) - len(line.lstrip('│ ├└')) + current_path = current_path[:indent_level] - for line in tree_output.splitlines(): idx = line.find('./') if idx == -1: idx = line.find(path) @@ -63,24 +68,66 @@ def get_tree_structure( relative_path = os.path.relpath(full_path, path) relative_path = relative_path.replace(os.sep, '/') - if os.path.isdir(full_path): - relative_path += '/' - if not should_ignore_file( + # Skip if file should be ignored + if should_ignore_file( full_path, relative_path, gitignore_spec, None, tree_and_content_ignore_spec ): + logging.debug('Ignored: %s', relative_path) + continue + + # If this is a file, mark all parent directories as non-empty + if not os.path.isdir(full_path): + dir_path = os.path.dirname(relative_path) + while dir_path: + non_empty_dirs.add(dir_path) + dir_path = os.path.dirname(dir_path) + + # Second pass: build filtered output + filtered_lines: List[str] = [] + current_path = [] + + for line in lines: + indent_level = len(line) - len(line.lstrip('│ ├└')) + current_path = current_path[:indent_level] + + # Always include root path + if indent_level == 0: + filtered_lines.append(line) + continue + + idx = line.find('./') + if idx == -1: + idx = line.find(path) + if idx != -1: + full_path = line[idx:].strip() + else: + continue + + relative_path = os.path.relpath(full_path, path) + relative_path = relative_path.replace(os.sep, '/') + + # Skip if file should be ignored + if should_ignore_file( + full_path, + relative_path, + gitignore_spec, + None, + tree_and_content_ignore_spec + ): + continue + + # Include line if it's a file or a non-empty directory + if not os.path.isdir(full_path) or os.path.dirname(relative_path) in non_empty_dirs: display_line = line.replace('./', '', 1) filtered_lines.append(display_line) - else: - logging.debug('Ignored: %s', relative_path) filtered_tree_output = '\n'.join(filtered_lines) logging.debug('Filtered tree structure:\n%s', filtered_tree_output) - logging.debug('Tree structure filtering complete') return filtered_tree_output def load_ignore_specs( @@ -204,7 +251,6 @@ def save_repo_to_text( tree_structure: str = get_tree_structure( path, gitignore_spec, tree_and_content_ignore_spec ) - tree_structure = remove_empty_dirs(tree_structure) logging.debug('Final tree structure to be written: %s', tree_structure) timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-UTC') diff --git a/repo_to_text/utils/__init__.py b/repo_to_text/utils/__init__.py index 51c6c6e..3fd2aed 100644 --- a/repo_to_text/utils/__init__.py +++ b/repo_to_text/utils/__init__.py @@ -1,3 +1,5 @@ -from .utils import setup_logging, check_tree_command, is_ignored_path, remove_empty_dirs +"""This module contains utility functions for the repo_to_text package.""" -__all__ = ['setup_logging', 'check_tree_command', 'is_ignored_path', 'remove_empty_dirs'] \ No newline at end of file +from .utils import setup_logging, check_tree_command, is_ignored_path + +__all__ = ['setup_logging', 'check_tree_command', 'is_ignored_path'] diff --git a/repo_to_text/utils/utils.py b/repo_to_text/utils/utils.py index c37a058..e18ceb6 100644 --- a/repo_to_text/utils/utils.py +++ b/repo_to_text/utils/utils.py @@ -1,9 +1,8 @@ """This module contains utility functions for the repo_to_text package.""" -import os import shutil import logging -from typing import List, Set +from typing import List def setup_logging(debug: bool = False) -> None: """Set up logging configuration. @@ -47,36 +46,3 @@ def is_ignored_path(file_path: str) -> bool: if result: logging.debug('Path ignored: %s', file_path) return result - -def remove_empty_dirs(tree_output: str) -> str: - """Remove empty directories from tree output.""" - logging.debug('Removing empty directories from tree output') - lines = tree_output.splitlines() - filtered_lines: List[str] = [] - - # Track directories that have files or subdirectories - non_empty_dirs: Set[str] = set() - - # First pass: identify non-empty directories - for line in reversed(lines): - stripped_line = line.strip() - if not stripped_line.endswith('/'): - # This is a file, mark its parent directory as non-empty - parent_dir: str = os.path.dirname(stripped_line) - while parent_dir: - non_empty_dirs.add(parent_dir) - parent_dir = os.path.dirname(parent_dir) - - # Second pass: filter out empty directories - for line in lines: - stripped_line = line.strip() - if stripped_line.endswith('/'): - # This is a directory - dir_path = stripped_line[:-1] # Remove trailing slash - if dir_path not in non_empty_dirs: - logging.debug('Directory is empty and will be removed: %s', dir_path) - continue - filtered_lines.append(line) - - logging.debug('Empty directory removal complete') - return '\n'.join(filtered_lines) diff --git a/tests/test_core.py b/tests/test_core.py index 5c38bdd..de18e44 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -11,7 +11,6 @@ from repo_to_text.core.core import ( load_ignore_specs, should_ignore_file, is_ignored_path, - remove_empty_dirs, save_repo_to_text ) @@ -67,7 +66,9 @@ def test_is_ignored_path() -> None: def test_load_ignore_specs(sample_repo: str) -> None: """Test loading ignore specifications from files.""" - gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo) + gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs( + sample_repo + ) assert gitignore_spec is not None assert content_ignore_spec is not None @@ -86,7 +87,9 @@ def test_load_ignore_specs(sample_repo: str) -> None: def test_should_ignore_file(sample_repo: str) -> None: """Test file ignoring logic.""" - gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo) + gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs( + sample_repo + ) # Test various file paths assert should_ignore_file( @@ -117,38 +120,6 @@ def test_get_tree_structure(sample_repo: str) -> None: assert "test_main.py" in tree_output assert ".git" not in tree_output -def test_remove_empty_dirs(tmp_path: str) -> None: - """Test removal of empty directories from tree output.""" - # Create test directory structure - os.makedirs(os.path.join(tmp_path, "src")) - os.makedirs(os.path.join(tmp_path, "empty_dir")) - os.makedirs(os.path.join(tmp_path, "tests")) - - # Create some files - with open(os.path.join(tmp_path, "src/main.py"), "w", encoding='utf-8') as f: - f.write("print('test')") - with open(os.path.join(tmp_path, "tests/test_main.py"), "w", encoding='utf-8') as f: - f.write("def test(): pass") - - # Create a mock tree output that matches the actual tree command format - tree_output = ( - f"{tmp_path}\n" - f"├── {os.path.join(tmp_path, 'src')}\n" - f"│ └── {os.path.join(tmp_path, 'src/main.py')}\n" - f"├── {os.path.join(tmp_path, 'empty_dir')}\n" - f"└── {os.path.join(tmp_path, 'tests')}\n" - f" └── {os.path.join(tmp_path, 'tests/test_main.py')}\n" - ) - - filtered_output = remove_empty_dirs(tree_output) - - # Check that empty_dir is removed but other directories remain - assert "empty_dir" not in filtered_output - assert os.path.join(tmp_path, "src") in filtered_output - assert os.path.join(tmp_path, "tests") in filtered_output - assert os.path.join(tmp_path, "src/main.py") in filtered_output - assert os.path.join(tmp_path, "tests/test_main.py") in filtered_output - def test_save_repo_to_text(sample_repo: str) -> None: """Test the main save_repo_to_text function.""" # Create output directory @@ -229,7 +200,9 @@ def test_get_tree_structure_with_special_chars(temp_dir: str) -> None: def test_should_ignore_file_edge_cases(sample_repo: str) -> None: """Test edge cases for should_ignore_file function.""" - gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo) + gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs( + sample_repo + ) # Test with dot-prefixed paths assert should_ignore_file( @@ -287,5 +260,44 @@ def test_get_tree_structure_empty_directory(temp_dir: str) -> None: # Should only contain the directory itself assert tree_output.strip() == "" or tree_output.strip() == temp_dir +def test_empty_dirs_filtering(tmp_path: str) -> None: + """Test filtering of empty directories in tree structure generation.""" + # Create test directory structure with normalized paths + base_path = os.path.normpath(tmp_path) + src_path = os.path.join(base_path, "src") + empty_dir_path = os.path.join(base_path, "empty_dir") + tests_path = os.path.join(base_path, "tests") + + os.makedirs(src_path) + os.makedirs(empty_dir_path) + os.makedirs(tests_path) + + # Create some files + with open(os.path.join(src_path, "main.py"), "w", encoding='utf-8') as f: + f.write("print('test')") + with open(os.path.join(tests_path, "test_main.py"), "w", encoding='utf-8') as f: + f.write("def test(): pass") + + # Get tree structure directly using the function + tree_output = get_tree_structure(base_path) + + # Print debug information + print("\nTree output:") + print(tree_output) + + # Basic structure checks for directories with files + assert "src" in tree_output + assert "tests" in tree_output + assert "main.py" in tree_output + assert "test_main.py" in tree_output + + # Check that empty directory is not included by checking each line + for line in tree_output.splitlines(): + # Skip the root directory line + if base_path in line: + continue + # Check that no line contains 'empty_dir' + assert "empty_dir" not in line, f"Found empty_dir in line: {line}" + if __name__ == "__main__": pytest.main([__file__])