remove_empty_dirs new logic and linter cleanup

This commit is contained in:
Kirill Markin 2024-12-17 15:06:59 +01:00
parent 5f283feefd
commit a364328e60
No known key found for this signature in database
GPG key ID: 03AB9530E15B9C1C
4 changed files with 111 additions and 85 deletions

View file

@ -4,14 +4,14 @@ Core functionality for repo-to-text
import os
import subprocess
from typing import Tuple, Optional, List, Dict, Any
from typing import Tuple, Optional, List, Dict, Any, Set
from datetime import datetime, timezone
import logging
import yaml
import pathspec
from pathspec import PathSpec
from ..utils.utils import check_tree_command, is_ignored_path, remove_empty_dirs
from ..utils.utils import check_tree_command, is_ignored_path
def get_tree_structure(
path: str = '.',
@ -26,7 +26,7 @@ def get_tree_structure(
tree_and_content_ignore_spec: PathSpec object for tree and content ignore patterns
Returns:
str: Generated tree structure
str: Generated tree structure with empty directories and ignored files removed
"""
if not check_tree_command():
return ""
@ -47,9 +47,14 @@ def get_tree_structure(
logging.debug(
'Filtering tree output based on .gitignore and ignore-tree-and-content specification'
)
filtered_lines: List[str] = []
lines: List[str] = tree_output.splitlines()
non_empty_dirs: Set[str] = set()
current_path: List[str] = []
for line in lines:
indent_level = len(line) - len(line.lstrip('│ ├└'))
current_path = current_path[:indent_level]
for line in tree_output.splitlines():
idx = line.find('./')
if idx == -1:
idx = line.find(path)
@ -63,24 +68,66 @@ def get_tree_structure(
relative_path = os.path.relpath(full_path, path)
relative_path = relative_path.replace(os.sep, '/')
if os.path.isdir(full_path):
relative_path += '/'
if not should_ignore_file(
# Skip if file should be ignored
if should_ignore_file(
full_path,
relative_path,
gitignore_spec,
None,
tree_and_content_ignore_spec
):
logging.debug('Ignored: %s', relative_path)
continue
# If this is a file, mark all parent directories as non-empty
if not os.path.isdir(full_path):
dir_path = os.path.dirname(relative_path)
while dir_path:
non_empty_dirs.add(dir_path)
dir_path = os.path.dirname(dir_path)
# Second pass: build filtered output
filtered_lines: List[str] = []
current_path = []
for line in lines:
indent_level = len(line) - len(line.lstrip('│ ├└'))
current_path = current_path[:indent_level]
# Always include root path
if indent_level == 0:
filtered_lines.append(line)
continue
idx = line.find('./')
if idx == -1:
idx = line.find(path)
if idx != -1:
full_path = line[idx:].strip()
else:
continue
relative_path = os.path.relpath(full_path, path)
relative_path = relative_path.replace(os.sep, '/')
# Skip if file should be ignored
if should_ignore_file(
full_path,
relative_path,
gitignore_spec,
None,
tree_and_content_ignore_spec
):
continue
# Include line if it's a file or a non-empty directory
if not os.path.isdir(full_path) or os.path.dirname(relative_path) in non_empty_dirs:
display_line = line.replace('./', '', 1)
filtered_lines.append(display_line)
else:
logging.debug('Ignored: %s', relative_path)
filtered_tree_output = '\n'.join(filtered_lines)
logging.debug('Filtered tree structure:\n%s', filtered_tree_output)
logging.debug('Tree structure filtering complete')
return filtered_tree_output
def load_ignore_specs(
@ -204,7 +251,6 @@ def save_repo_to_text(
tree_structure: str = get_tree_structure(
path, gitignore_spec, tree_and_content_ignore_spec
)
tree_structure = remove_empty_dirs(tree_structure)
logging.debug('Final tree structure to be written: %s', tree_structure)
timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-UTC')

View file

@ -1,3 +1,5 @@
from .utils import setup_logging, check_tree_command, is_ignored_path, remove_empty_dirs
"""This module contains utility functions for the repo_to_text package."""
__all__ = ['setup_logging', 'check_tree_command', 'is_ignored_path', 'remove_empty_dirs']
from .utils import setup_logging, check_tree_command, is_ignored_path
__all__ = ['setup_logging', 'check_tree_command', 'is_ignored_path']

View file

@ -1,9 +1,8 @@
"""This module contains utility functions for the repo_to_text package."""
import os
import shutil
import logging
from typing import List, Set
from typing import List
def setup_logging(debug: bool = False) -> None:
"""Set up logging configuration.
@ -47,36 +46,3 @@ def is_ignored_path(file_path: str) -> bool:
if result:
logging.debug('Path ignored: %s', file_path)
return result
def remove_empty_dirs(tree_output: str) -> str:
"""Remove empty directories from tree output."""
logging.debug('Removing empty directories from tree output')
lines = tree_output.splitlines()
filtered_lines: List[str] = []
# Track directories that have files or subdirectories
non_empty_dirs: Set[str] = set()
# First pass: identify non-empty directories
for line in reversed(lines):
stripped_line = line.strip()
if not stripped_line.endswith('/'):
# This is a file, mark its parent directory as non-empty
parent_dir: str = os.path.dirname(stripped_line)
while parent_dir:
non_empty_dirs.add(parent_dir)
parent_dir = os.path.dirname(parent_dir)
# Second pass: filter out empty directories
for line in lines:
stripped_line = line.strip()
if stripped_line.endswith('/'):
# This is a directory
dir_path = stripped_line[:-1] # Remove trailing slash
if dir_path not in non_empty_dirs:
logging.debug('Directory is empty and will be removed: %s', dir_path)
continue
filtered_lines.append(line)
logging.debug('Empty directory removal complete')
return '\n'.join(filtered_lines)

View file

@ -11,7 +11,6 @@ from repo_to_text.core.core import (
load_ignore_specs,
should_ignore_file,
is_ignored_path,
remove_empty_dirs,
save_repo_to_text
)
@ -67,7 +66,9 @@ def test_is_ignored_path() -> None:
def test_load_ignore_specs(sample_repo: str) -> None:
"""Test loading ignore specifications from files."""
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo)
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(
sample_repo
)
assert gitignore_spec is not None
assert content_ignore_spec is not None
@ -86,7 +87,9 @@ def test_load_ignore_specs(sample_repo: str) -> None:
def test_should_ignore_file(sample_repo: str) -> None:
"""Test file ignoring logic."""
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo)
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(
sample_repo
)
# Test various file paths
assert should_ignore_file(
@ -117,38 +120,6 @@ def test_get_tree_structure(sample_repo: str) -> None:
assert "test_main.py" in tree_output
assert ".git" not in tree_output
def test_remove_empty_dirs(tmp_path: str) -> None:
"""Test removal of empty directories from tree output."""
# Create test directory structure
os.makedirs(os.path.join(tmp_path, "src"))
os.makedirs(os.path.join(tmp_path, "empty_dir"))
os.makedirs(os.path.join(tmp_path, "tests"))
# Create some files
with open(os.path.join(tmp_path, "src/main.py"), "w", encoding='utf-8') as f:
f.write("print('test')")
with open(os.path.join(tmp_path, "tests/test_main.py"), "w", encoding='utf-8') as f:
f.write("def test(): pass")
# Create a mock tree output that matches the actual tree command format
tree_output = (
f"{tmp_path}\n"
f"├── {os.path.join(tmp_path, 'src')}\n"
f"│ └── {os.path.join(tmp_path, 'src/main.py')}\n"
f"├── {os.path.join(tmp_path, 'empty_dir')}\n"
f"└── {os.path.join(tmp_path, 'tests')}\n"
f" └── {os.path.join(tmp_path, 'tests/test_main.py')}\n"
)
filtered_output = remove_empty_dirs(tree_output)
# Check that empty_dir is removed but other directories remain
assert "empty_dir" not in filtered_output
assert os.path.join(tmp_path, "src") in filtered_output
assert os.path.join(tmp_path, "tests") in filtered_output
assert os.path.join(tmp_path, "src/main.py") in filtered_output
assert os.path.join(tmp_path, "tests/test_main.py") in filtered_output
def test_save_repo_to_text(sample_repo: str) -> None:
"""Test the main save_repo_to_text function."""
# Create output directory
@ -229,7 +200,9 @@ def test_get_tree_structure_with_special_chars(temp_dir: str) -> None:
def test_should_ignore_file_edge_cases(sample_repo: str) -> None:
"""Test edge cases for should_ignore_file function."""
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo)
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(
sample_repo
)
# Test with dot-prefixed paths
assert should_ignore_file(
@ -287,5 +260,44 @@ def test_get_tree_structure_empty_directory(temp_dir: str) -> None:
# Should only contain the directory itself
assert tree_output.strip() == "" or tree_output.strip() == temp_dir
def test_empty_dirs_filtering(tmp_path: str) -> None:
"""Test filtering of empty directories in tree structure generation."""
# Create test directory structure with normalized paths
base_path = os.path.normpath(tmp_path)
src_path = os.path.join(base_path, "src")
empty_dir_path = os.path.join(base_path, "empty_dir")
tests_path = os.path.join(base_path, "tests")
os.makedirs(src_path)
os.makedirs(empty_dir_path)
os.makedirs(tests_path)
# Create some files
with open(os.path.join(src_path, "main.py"), "w", encoding='utf-8') as f:
f.write("print('test')")
with open(os.path.join(tests_path, "test_main.py"), "w", encoding='utf-8') as f:
f.write("def test(): pass")
# Get tree structure directly using the function
tree_output = get_tree_structure(base_path)
# Print debug information
print("\nTree output:")
print(tree_output)
# Basic structure checks for directories with files
assert "src" in tree_output
assert "tests" in tree_output
assert "main.py" in tree_output
assert "test_main.py" in tree_output
# Check that empty directory is not included by checking each line
for line in tree_output.splitlines():
# Skip the root directory line
if base_path in line:
continue
# Check that no line contains 'empty_dir'
assert "empty_dir" not in line, f"Found empty_dir in line: {line}"
if __name__ == "__main__":
pytest.main([__file__])