mirror of
https://github.com/kirill-markin/repo-to-text.git
synced 2025-12-05 19:12:24 -08:00
remove_empty_dirs new logic and linter cleanup
This commit is contained in:
parent
5f283feefd
commit
a364328e60
4 changed files with 111 additions and 85 deletions
|
|
@ -4,14 +4,14 @@ Core functionality for repo-to-text
|
|||
|
||||
import os
|
||||
import subprocess
|
||||
from typing import Tuple, Optional, List, Dict, Any
|
||||
from typing import Tuple, Optional, List, Dict, Any, Set
|
||||
from datetime import datetime, timezone
|
||||
import logging
|
||||
import yaml
|
||||
import pathspec
|
||||
from pathspec import PathSpec
|
||||
|
||||
from ..utils.utils import check_tree_command, is_ignored_path, remove_empty_dirs
|
||||
from ..utils.utils import check_tree_command, is_ignored_path
|
||||
|
||||
def get_tree_structure(
|
||||
path: str = '.',
|
||||
|
|
@ -26,7 +26,7 @@ def get_tree_structure(
|
|||
tree_and_content_ignore_spec: PathSpec object for tree and content ignore patterns
|
||||
|
||||
Returns:
|
||||
str: Generated tree structure
|
||||
str: Generated tree structure with empty directories and ignored files removed
|
||||
"""
|
||||
if not check_tree_command():
|
||||
return ""
|
||||
|
|
@ -47,9 +47,14 @@ def get_tree_structure(
|
|||
logging.debug(
|
||||
'Filtering tree output based on .gitignore and ignore-tree-and-content specification'
|
||||
)
|
||||
filtered_lines: List[str] = []
|
||||
lines: List[str] = tree_output.splitlines()
|
||||
non_empty_dirs: Set[str] = set()
|
||||
current_path: List[str] = []
|
||||
|
||||
for line in lines:
|
||||
indent_level = len(line) - len(line.lstrip('│ ├└'))
|
||||
current_path = current_path[:indent_level]
|
||||
|
||||
for line in tree_output.splitlines():
|
||||
idx = line.find('./')
|
||||
if idx == -1:
|
||||
idx = line.find(path)
|
||||
|
|
@ -63,24 +68,66 @@ def get_tree_structure(
|
|||
|
||||
relative_path = os.path.relpath(full_path, path)
|
||||
relative_path = relative_path.replace(os.sep, '/')
|
||||
if os.path.isdir(full_path):
|
||||
relative_path += '/'
|
||||
|
||||
if not should_ignore_file(
|
||||
# Skip if file should be ignored
|
||||
if should_ignore_file(
|
||||
full_path,
|
||||
relative_path,
|
||||
gitignore_spec,
|
||||
None,
|
||||
tree_and_content_ignore_spec
|
||||
):
|
||||
logging.debug('Ignored: %s', relative_path)
|
||||
continue
|
||||
|
||||
# If this is a file, mark all parent directories as non-empty
|
||||
if not os.path.isdir(full_path):
|
||||
dir_path = os.path.dirname(relative_path)
|
||||
while dir_path:
|
||||
non_empty_dirs.add(dir_path)
|
||||
dir_path = os.path.dirname(dir_path)
|
||||
|
||||
# Second pass: build filtered output
|
||||
filtered_lines: List[str] = []
|
||||
current_path = []
|
||||
|
||||
for line in lines:
|
||||
indent_level = len(line) - len(line.lstrip('│ ├└'))
|
||||
current_path = current_path[:indent_level]
|
||||
|
||||
# Always include root path
|
||||
if indent_level == 0:
|
||||
filtered_lines.append(line)
|
||||
continue
|
||||
|
||||
idx = line.find('./')
|
||||
if idx == -1:
|
||||
idx = line.find(path)
|
||||
if idx != -1:
|
||||
full_path = line[idx:].strip()
|
||||
else:
|
||||
continue
|
||||
|
||||
relative_path = os.path.relpath(full_path, path)
|
||||
relative_path = relative_path.replace(os.sep, '/')
|
||||
|
||||
# Skip if file should be ignored
|
||||
if should_ignore_file(
|
||||
full_path,
|
||||
relative_path,
|
||||
gitignore_spec,
|
||||
None,
|
||||
tree_and_content_ignore_spec
|
||||
):
|
||||
continue
|
||||
|
||||
# Include line if it's a file or a non-empty directory
|
||||
if not os.path.isdir(full_path) or os.path.dirname(relative_path) in non_empty_dirs:
|
||||
display_line = line.replace('./', '', 1)
|
||||
filtered_lines.append(display_line)
|
||||
else:
|
||||
logging.debug('Ignored: %s', relative_path)
|
||||
|
||||
filtered_tree_output = '\n'.join(filtered_lines)
|
||||
logging.debug('Filtered tree structure:\n%s', filtered_tree_output)
|
||||
logging.debug('Tree structure filtering complete')
|
||||
return filtered_tree_output
|
||||
|
||||
def load_ignore_specs(
|
||||
|
|
@ -204,7 +251,6 @@ def save_repo_to_text(
|
|||
tree_structure: str = get_tree_structure(
|
||||
path, gitignore_spec, tree_and_content_ignore_spec
|
||||
)
|
||||
tree_structure = remove_empty_dirs(tree_structure)
|
||||
logging.debug('Final tree structure to be written: %s', tree_structure)
|
||||
|
||||
timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-UTC')
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
from .utils import setup_logging, check_tree_command, is_ignored_path, remove_empty_dirs
|
||||
"""This module contains utility functions for the repo_to_text package."""
|
||||
|
||||
__all__ = ['setup_logging', 'check_tree_command', 'is_ignored_path', 'remove_empty_dirs']
|
||||
from .utils import setup_logging, check_tree_command, is_ignored_path
|
||||
|
||||
__all__ = ['setup_logging', 'check_tree_command', 'is_ignored_path']
|
||||
|
|
|
|||
|
|
@ -1,9 +1,8 @@
|
|||
"""This module contains utility functions for the repo_to_text package."""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import logging
|
||||
from typing import List, Set
|
||||
from typing import List
|
||||
|
||||
def setup_logging(debug: bool = False) -> None:
|
||||
"""Set up logging configuration.
|
||||
|
|
@ -47,36 +46,3 @@ def is_ignored_path(file_path: str) -> bool:
|
|||
if result:
|
||||
logging.debug('Path ignored: %s', file_path)
|
||||
return result
|
||||
|
||||
def remove_empty_dirs(tree_output: str) -> str:
|
||||
"""Remove empty directories from tree output."""
|
||||
logging.debug('Removing empty directories from tree output')
|
||||
lines = tree_output.splitlines()
|
||||
filtered_lines: List[str] = []
|
||||
|
||||
# Track directories that have files or subdirectories
|
||||
non_empty_dirs: Set[str] = set()
|
||||
|
||||
# First pass: identify non-empty directories
|
||||
for line in reversed(lines):
|
||||
stripped_line = line.strip()
|
||||
if not stripped_line.endswith('/'):
|
||||
# This is a file, mark its parent directory as non-empty
|
||||
parent_dir: str = os.path.dirname(stripped_line)
|
||||
while parent_dir:
|
||||
non_empty_dirs.add(parent_dir)
|
||||
parent_dir = os.path.dirname(parent_dir)
|
||||
|
||||
# Second pass: filter out empty directories
|
||||
for line in lines:
|
||||
stripped_line = line.strip()
|
||||
if stripped_line.endswith('/'):
|
||||
# This is a directory
|
||||
dir_path = stripped_line[:-1] # Remove trailing slash
|
||||
if dir_path not in non_empty_dirs:
|
||||
logging.debug('Directory is empty and will be removed: %s', dir_path)
|
||||
continue
|
||||
filtered_lines.append(line)
|
||||
|
||||
logging.debug('Empty directory removal complete')
|
||||
return '\n'.join(filtered_lines)
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ from repo_to_text.core.core import (
|
|||
load_ignore_specs,
|
||||
should_ignore_file,
|
||||
is_ignored_path,
|
||||
remove_empty_dirs,
|
||||
save_repo_to_text
|
||||
)
|
||||
|
||||
|
|
@ -67,7 +66,9 @@ def test_is_ignored_path() -> None:
|
|||
|
||||
def test_load_ignore_specs(sample_repo: str) -> None:
|
||||
"""Test loading ignore specifications from files."""
|
||||
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo)
|
||||
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(
|
||||
sample_repo
|
||||
)
|
||||
|
||||
assert gitignore_spec is not None
|
||||
assert content_ignore_spec is not None
|
||||
|
|
@ -86,7 +87,9 @@ def test_load_ignore_specs(sample_repo: str) -> None:
|
|||
|
||||
def test_should_ignore_file(sample_repo: str) -> None:
|
||||
"""Test file ignoring logic."""
|
||||
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo)
|
||||
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(
|
||||
sample_repo
|
||||
)
|
||||
|
||||
# Test various file paths
|
||||
assert should_ignore_file(
|
||||
|
|
@ -117,38 +120,6 @@ def test_get_tree_structure(sample_repo: str) -> None:
|
|||
assert "test_main.py" in tree_output
|
||||
assert ".git" not in tree_output
|
||||
|
||||
def test_remove_empty_dirs(tmp_path: str) -> None:
|
||||
"""Test removal of empty directories from tree output."""
|
||||
# Create test directory structure
|
||||
os.makedirs(os.path.join(tmp_path, "src"))
|
||||
os.makedirs(os.path.join(tmp_path, "empty_dir"))
|
||||
os.makedirs(os.path.join(tmp_path, "tests"))
|
||||
|
||||
# Create some files
|
||||
with open(os.path.join(tmp_path, "src/main.py"), "w", encoding='utf-8') as f:
|
||||
f.write("print('test')")
|
||||
with open(os.path.join(tmp_path, "tests/test_main.py"), "w", encoding='utf-8') as f:
|
||||
f.write("def test(): pass")
|
||||
|
||||
# Create a mock tree output that matches the actual tree command format
|
||||
tree_output = (
|
||||
f"{tmp_path}\n"
|
||||
f"├── {os.path.join(tmp_path, 'src')}\n"
|
||||
f"│ └── {os.path.join(tmp_path, 'src/main.py')}\n"
|
||||
f"├── {os.path.join(tmp_path, 'empty_dir')}\n"
|
||||
f"└── {os.path.join(tmp_path, 'tests')}\n"
|
||||
f" └── {os.path.join(tmp_path, 'tests/test_main.py')}\n"
|
||||
)
|
||||
|
||||
filtered_output = remove_empty_dirs(tree_output)
|
||||
|
||||
# Check that empty_dir is removed but other directories remain
|
||||
assert "empty_dir" not in filtered_output
|
||||
assert os.path.join(tmp_path, "src") in filtered_output
|
||||
assert os.path.join(tmp_path, "tests") in filtered_output
|
||||
assert os.path.join(tmp_path, "src/main.py") in filtered_output
|
||||
assert os.path.join(tmp_path, "tests/test_main.py") in filtered_output
|
||||
|
||||
def test_save_repo_to_text(sample_repo: str) -> None:
|
||||
"""Test the main save_repo_to_text function."""
|
||||
# Create output directory
|
||||
|
|
@ -229,7 +200,9 @@ def test_get_tree_structure_with_special_chars(temp_dir: str) -> None:
|
|||
|
||||
def test_should_ignore_file_edge_cases(sample_repo: str) -> None:
|
||||
"""Test edge cases for should_ignore_file function."""
|
||||
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo)
|
||||
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(
|
||||
sample_repo
|
||||
)
|
||||
|
||||
# Test with dot-prefixed paths
|
||||
assert should_ignore_file(
|
||||
|
|
@ -287,5 +260,44 @@ def test_get_tree_structure_empty_directory(temp_dir: str) -> None:
|
|||
# Should only contain the directory itself
|
||||
assert tree_output.strip() == "" or tree_output.strip() == temp_dir
|
||||
|
||||
def test_empty_dirs_filtering(tmp_path: str) -> None:
|
||||
"""Test filtering of empty directories in tree structure generation."""
|
||||
# Create test directory structure with normalized paths
|
||||
base_path = os.path.normpath(tmp_path)
|
||||
src_path = os.path.join(base_path, "src")
|
||||
empty_dir_path = os.path.join(base_path, "empty_dir")
|
||||
tests_path = os.path.join(base_path, "tests")
|
||||
|
||||
os.makedirs(src_path)
|
||||
os.makedirs(empty_dir_path)
|
||||
os.makedirs(tests_path)
|
||||
|
||||
# Create some files
|
||||
with open(os.path.join(src_path, "main.py"), "w", encoding='utf-8') as f:
|
||||
f.write("print('test')")
|
||||
with open(os.path.join(tests_path, "test_main.py"), "w", encoding='utf-8') as f:
|
||||
f.write("def test(): pass")
|
||||
|
||||
# Get tree structure directly using the function
|
||||
tree_output = get_tree_structure(base_path)
|
||||
|
||||
# Print debug information
|
||||
print("\nTree output:")
|
||||
print(tree_output)
|
||||
|
||||
# Basic structure checks for directories with files
|
||||
assert "src" in tree_output
|
||||
assert "tests" in tree_output
|
||||
assert "main.py" in tree_output
|
||||
assert "test_main.py" in tree_output
|
||||
|
||||
# Check that empty directory is not included by checking each line
|
||||
for line in tree_output.splitlines():
|
||||
# Skip the root directory line
|
||||
if base_path in line:
|
||||
continue
|
||||
# Check that no line contains 'empty_dir'
|
||||
assert "empty_dir" not in line, f"Found empty_dir in line: {line}"
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__])
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue