mirror of
https://github.com/kirill-markin/repo-to-text.git
synced 2025-12-06 03:22:23 -08:00
remove_empty_dirs new logic and linter cleanup
This commit is contained in:
parent
5f283feefd
commit
a364328e60
4 changed files with 111 additions and 85 deletions
|
|
@ -4,14 +4,14 @@ Core functionality for repo-to-text
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
from typing import Tuple, Optional, List, Dict, Any
|
from typing import Tuple, Optional, List, Dict, Any, Set
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
import logging
|
import logging
|
||||||
import yaml
|
import yaml
|
||||||
import pathspec
|
import pathspec
|
||||||
from pathspec import PathSpec
|
from pathspec import PathSpec
|
||||||
|
|
||||||
from ..utils.utils import check_tree_command, is_ignored_path, remove_empty_dirs
|
from ..utils.utils import check_tree_command, is_ignored_path
|
||||||
|
|
||||||
def get_tree_structure(
|
def get_tree_structure(
|
||||||
path: str = '.',
|
path: str = '.',
|
||||||
|
|
@ -26,7 +26,7 @@ def get_tree_structure(
|
||||||
tree_and_content_ignore_spec: PathSpec object for tree and content ignore patterns
|
tree_and_content_ignore_spec: PathSpec object for tree and content ignore patterns
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: Generated tree structure
|
str: Generated tree structure with empty directories and ignored files removed
|
||||||
"""
|
"""
|
||||||
if not check_tree_command():
|
if not check_tree_command():
|
||||||
return ""
|
return ""
|
||||||
|
|
@ -47,9 +47,14 @@ def get_tree_structure(
|
||||||
logging.debug(
|
logging.debug(
|
||||||
'Filtering tree output based on .gitignore and ignore-tree-and-content specification'
|
'Filtering tree output based on .gitignore and ignore-tree-and-content specification'
|
||||||
)
|
)
|
||||||
filtered_lines: List[str] = []
|
lines: List[str] = tree_output.splitlines()
|
||||||
|
non_empty_dirs: Set[str] = set()
|
||||||
|
current_path: List[str] = []
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
indent_level = len(line) - len(line.lstrip('│ ├└'))
|
||||||
|
current_path = current_path[:indent_level]
|
||||||
|
|
||||||
for line in tree_output.splitlines():
|
|
||||||
idx = line.find('./')
|
idx = line.find('./')
|
||||||
if idx == -1:
|
if idx == -1:
|
||||||
idx = line.find(path)
|
idx = line.find(path)
|
||||||
|
|
@ -63,24 +68,66 @@ def get_tree_structure(
|
||||||
|
|
||||||
relative_path = os.path.relpath(full_path, path)
|
relative_path = os.path.relpath(full_path, path)
|
||||||
relative_path = relative_path.replace(os.sep, '/')
|
relative_path = relative_path.replace(os.sep, '/')
|
||||||
if os.path.isdir(full_path):
|
|
||||||
relative_path += '/'
|
|
||||||
|
|
||||||
if not should_ignore_file(
|
# Skip if file should be ignored
|
||||||
|
if should_ignore_file(
|
||||||
full_path,
|
full_path,
|
||||||
relative_path,
|
relative_path,
|
||||||
gitignore_spec,
|
gitignore_spec,
|
||||||
None,
|
None,
|
||||||
tree_and_content_ignore_spec
|
tree_and_content_ignore_spec
|
||||||
):
|
):
|
||||||
|
logging.debug('Ignored: %s', relative_path)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# If this is a file, mark all parent directories as non-empty
|
||||||
|
if not os.path.isdir(full_path):
|
||||||
|
dir_path = os.path.dirname(relative_path)
|
||||||
|
while dir_path:
|
||||||
|
non_empty_dirs.add(dir_path)
|
||||||
|
dir_path = os.path.dirname(dir_path)
|
||||||
|
|
||||||
|
# Second pass: build filtered output
|
||||||
|
filtered_lines: List[str] = []
|
||||||
|
current_path = []
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
indent_level = len(line) - len(line.lstrip('│ ├└'))
|
||||||
|
current_path = current_path[:indent_level]
|
||||||
|
|
||||||
|
# Always include root path
|
||||||
|
if indent_level == 0:
|
||||||
|
filtered_lines.append(line)
|
||||||
|
continue
|
||||||
|
|
||||||
|
idx = line.find('./')
|
||||||
|
if idx == -1:
|
||||||
|
idx = line.find(path)
|
||||||
|
if idx != -1:
|
||||||
|
full_path = line[idx:].strip()
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
|
||||||
|
relative_path = os.path.relpath(full_path, path)
|
||||||
|
relative_path = relative_path.replace(os.sep, '/')
|
||||||
|
|
||||||
|
# Skip if file should be ignored
|
||||||
|
if should_ignore_file(
|
||||||
|
full_path,
|
||||||
|
relative_path,
|
||||||
|
gitignore_spec,
|
||||||
|
None,
|
||||||
|
tree_and_content_ignore_spec
|
||||||
|
):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Include line if it's a file or a non-empty directory
|
||||||
|
if not os.path.isdir(full_path) or os.path.dirname(relative_path) in non_empty_dirs:
|
||||||
display_line = line.replace('./', '', 1)
|
display_line = line.replace('./', '', 1)
|
||||||
filtered_lines.append(display_line)
|
filtered_lines.append(display_line)
|
||||||
else:
|
|
||||||
logging.debug('Ignored: %s', relative_path)
|
|
||||||
|
|
||||||
filtered_tree_output = '\n'.join(filtered_lines)
|
filtered_tree_output = '\n'.join(filtered_lines)
|
||||||
logging.debug('Filtered tree structure:\n%s', filtered_tree_output)
|
logging.debug('Filtered tree structure:\n%s', filtered_tree_output)
|
||||||
logging.debug('Tree structure filtering complete')
|
|
||||||
return filtered_tree_output
|
return filtered_tree_output
|
||||||
|
|
||||||
def load_ignore_specs(
|
def load_ignore_specs(
|
||||||
|
|
@ -204,7 +251,6 @@ def save_repo_to_text(
|
||||||
tree_structure: str = get_tree_structure(
|
tree_structure: str = get_tree_structure(
|
||||||
path, gitignore_spec, tree_and_content_ignore_spec
|
path, gitignore_spec, tree_and_content_ignore_spec
|
||||||
)
|
)
|
||||||
tree_structure = remove_empty_dirs(tree_structure)
|
|
||||||
logging.debug('Final tree structure to be written: %s', tree_structure)
|
logging.debug('Final tree structure to be written: %s', tree_structure)
|
||||||
|
|
||||||
timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-UTC')
|
timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-UTC')
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,5 @@
|
||||||
from .utils import setup_logging, check_tree_command, is_ignored_path, remove_empty_dirs
|
"""This module contains utility functions for the repo_to_text package."""
|
||||||
|
|
||||||
__all__ = ['setup_logging', 'check_tree_command', 'is_ignored_path', 'remove_empty_dirs']
|
from .utils import setup_logging, check_tree_command, is_ignored_path
|
||||||
|
|
||||||
|
__all__ = ['setup_logging', 'check_tree_command', 'is_ignored_path']
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,8 @@
|
||||||
"""This module contains utility functions for the repo_to_text package."""
|
"""This module contains utility functions for the repo_to_text package."""
|
||||||
|
|
||||||
import os
|
|
||||||
import shutil
|
import shutil
|
||||||
import logging
|
import logging
|
||||||
from typing import List, Set
|
from typing import List
|
||||||
|
|
||||||
def setup_logging(debug: bool = False) -> None:
|
def setup_logging(debug: bool = False) -> None:
|
||||||
"""Set up logging configuration.
|
"""Set up logging configuration.
|
||||||
|
|
@ -47,36 +46,3 @@ def is_ignored_path(file_path: str) -> bool:
|
||||||
if result:
|
if result:
|
||||||
logging.debug('Path ignored: %s', file_path)
|
logging.debug('Path ignored: %s', file_path)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def remove_empty_dirs(tree_output: str) -> str:
|
|
||||||
"""Remove empty directories from tree output."""
|
|
||||||
logging.debug('Removing empty directories from tree output')
|
|
||||||
lines = tree_output.splitlines()
|
|
||||||
filtered_lines: List[str] = []
|
|
||||||
|
|
||||||
# Track directories that have files or subdirectories
|
|
||||||
non_empty_dirs: Set[str] = set()
|
|
||||||
|
|
||||||
# First pass: identify non-empty directories
|
|
||||||
for line in reversed(lines):
|
|
||||||
stripped_line = line.strip()
|
|
||||||
if not stripped_line.endswith('/'):
|
|
||||||
# This is a file, mark its parent directory as non-empty
|
|
||||||
parent_dir: str = os.path.dirname(stripped_line)
|
|
||||||
while parent_dir:
|
|
||||||
non_empty_dirs.add(parent_dir)
|
|
||||||
parent_dir = os.path.dirname(parent_dir)
|
|
||||||
|
|
||||||
# Second pass: filter out empty directories
|
|
||||||
for line in lines:
|
|
||||||
stripped_line = line.strip()
|
|
||||||
if stripped_line.endswith('/'):
|
|
||||||
# This is a directory
|
|
||||||
dir_path = stripped_line[:-1] # Remove trailing slash
|
|
||||||
if dir_path not in non_empty_dirs:
|
|
||||||
logging.debug('Directory is empty and will be removed: %s', dir_path)
|
|
||||||
continue
|
|
||||||
filtered_lines.append(line)
|
|
||||||
|
|
||||||
logging.debug('Empty directory removal complete')
|
|
||||||
return '\n'.join(filtered_lines)
|
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,6 @@ from repo_to_text.core.core import (
|
||||||
load_ignore_specs,
|
load_ignore_specs,
|
||||||
should_ignore_file,
|
should_ignore_file,
|
||||||
is_ignored_path,
|
is_ignored_path,
|
||||||
remove_empty_dirs,
|
|
||||||
save_repo_to_text
|
save_repo_to_text
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -67,7 +66,9 @@ def test_is_ignored_path() -> None:
|
||||||
|
|
||||||
def test_load_ignore_specs(sample_repo: str) -> None:
|
def test_load_ignore_specs(sample_repo: str) -> None:
|
||||||
"""Test loading ignore specifications from files."""
|
"""Test loading ignore specifications from files."""
|
||||||
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo)
|
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(
|
||||||
|
sample_repo
|
||||||
|
)
|
||||||
|
|
||||||
assert gitignore_spec is not None
|
assert gitignore_spec is not None
|
||||||
assert content_ignore_spec is not None
|
assert content_ignore_spec is not None
|
||||||
|
|
@ -86,7 +87,9 @@ def test_load_ignore_specs(sample_repo: str) -> None:
|
||||||
|
|
||||||
def test_should_ignore_file(sample_repo: str) -> None:
|
def test_should_ignore_file(sample_repo: str) -> None:
|
||||||
"""Test file ignoring logic."""
|
"""Test file ignoring logic."""
|
||||||
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo)
|
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(
|
||||||
|
sample_repo
|
||||||
|
)
|
||||||
|
|
||||||
# Test various file paths
|
# Test various file paths
|
||||||
assert should_ignore_file(
|
assert should_ignore_file(
|
||||||
|
|
@ -117,38 +120,6 @@ def test_get_tree_structure(sample_repo: str) -> None:
|
||||||
assert "test_main.py" in tree_output
|
assert "test_main.py" in tree_output
|
||||||
assert ".git" not in tree_output
|
assert ".git" not in tree_output
|
||||||
|
|
||||||
def test_remove_empty_dirs(tmp_path: str) -> None:
|
|
||||||
"""Test removal of empty directories from tree output."""
|
|
||||||
# Create test directory structure
|
|
||||||
os.makedirs(os.path.join(tmp_path, "src"))
|
|
||||||
os.makedirs(os.path.join(tmp_path, "empty_dir"))
|
|
||||||
os.makedirs(os.path.join(tmp_path, "tests"))
|
|
||||||
|
|
||||||
# Create some files
|
|
||||||
with open(os.path.join(tmp_path, "src/main.py"), "w", encoding='utf-8') as f:
|
|
||||||
f.write("print('test')")
|
|
||||||
with open(os.path.join(tmp_path, "tests/test_main.py"), "w", encoding='utf-8') as f:
|
|
||||||
f.write("def test(): pass")
|
|
||||||
|
|
||||||
# Create a mock tree output that matches the actual tree command format
|
|
||||||
tree_output = (
|
|
||||||
f"{tmp_path}\n"
|
|
||||||
f"├── {os.path.join(tmp_path, 'src')}\n"
|
|
||||||
f"│ └── {os.path.join(tmp_path, 'src/main.py')}\n"
|
|
||||||
f"├── {os.path.join(tmp_path, 'empty_dir')}\n"
|
|
||||||
f"└── {os.path.join(tmp_path, 'tests')}\n"
|
|
||||||
f" └── {os.path.join(tmp_path, 'tests/test_main.py')}\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
filtered_output = remove_empty_dirs(tree_output)
|
|
||||||
|
|
||||||
# Check that empty_dir is removed but other directories remain
|
|
||||||
assert "empty_dir" not in filtered_output
|
|
||||||
assert os.path.join(tmp_path, "src") in filtered_output
|
|
||||||
assert os.path.join(tmp_path, "tests") in filtered_output
|
|
||||||
assert os.path.join(tmp_path, "src/main.py") in filtered_output
|
|
||||||
assert os.path.join(tmp_path, "tests/test_main.py") in filtered_output
|
|
||||||
|
|
||||||
def test_save_repo_to_text(sample_repo: str) -> None:
|
def test_save_repo_to_text(sample_repo: str) -> None:
|
||||||
"""Test the main save_repo_to_text function."""
|
"""Test the main save_repo_to_text function."""
|
||||||
# Create output directory
|
# Create output directory
|
||||||
|
|
@ -229,7 +200,9 @@ def test_get_tree_structure_with_special_chars(temp_dir: str) -> None:
|
||||||
|
|
||||||
def test_should_ignore_file_edge_cases(sample_repo: str) -> None:
|
def test_should_ignore_file_edge_cases(sample_repo: str) -> None:
|
||||||
"""Test edge cases for should_ignore_file function."""
|
"""Test edge cases for should_ignore_file function."""
|
||||||
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(sample_repo)
|
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(
|
||||||
|
sample_repo
|
||||||
|
)
|
||||||
|
|
||||||
# Test with dot-prefixed paths
|
# Test with dot-prefixed paths
|
||||||
assert should_ignore_file(
|
assert should_ignore_file(
|
||||||
|
|
@ -287,5 +260,44 @@ def test_get_tree_structure_empty_directory(temp_dir: str) -> None:
|
||||||
# Should only contain the directory itself
|
# Should only contain the directory itself
|
||||||
assert tree_output.strip() == "" or tree_output.strip() == temp_dir
|
assert tree_output.strip() == "" or tree_output.strip() == temp_dir
|
||||||
|
|
||||||
|
def test_empty_dirs_filtering(tmp_path: str) -> None:
|
||||||
|
"""Test filtering of empty directories in tree structure generation."""
|
||||||
|
# Create test directory structure with normalized paths
|
||||||
|
base_path = os.path.normpath(tmp_path)
|
||||||
|
src_path = os.path.join(base_path, "src")
|
||||||
|
empty_dir_path = os.path.join(base_path, "empty_dir")
|
||||||
|
tests_path = os.path.join(base_path, "tests")
|
||||||
|
|
||||||
|
os.makedirs(src_path)
|
||||||
|
os.makedirs(empty_dir_path)
|
||||||
|
os.makedirs(tests_path)
|
||||||
|
|
||||||
|
# Create some files
|
||||||
|
with open(os.path.join(src_path, "main.py"), "w", encoding='utf-8') as f:
|
||||||
|
f.write("print('test')")
|
||||||
|
with open(os.path.join(tests_path, "test_main.py"), "w", encoding='utf-8') as f:
|
||||||
|
f.write("def test(): pass")
|
||||||
|
|
||||||
|
# Get tree structure directly using the function
|
||||||
|
tree_output = get_tree_structure(base_path)
|
||||||
|
|
||||||
|
# Print debug information
|
||||||
|
print("\nTree output:")
|
||||||
|
print(tree_output)
|
||||||
|
|
||||||
|
# Basic structure checks for directories with files
|
||||||
|
assert "src" in tree_output
|
||||||
|
assert "tests" in tree_output
|
||||||
|
assert "main.py" in tree_output
|
||||||
|
assert "test_main.py" in tree_output
|
||||||
|
|
||||||
|
# Check that empty directory is not included by checking each line
|
||||||
|
for line in tree_output.splitlines():
|
||||||
|
# Skip the root directory line
|
||||||
|
if base_path in line:
|
||||||
|
continue
|
||||||
|
# Check that no line contains 'empty_dir'
|
||||||
|
assert "empty_dir" not in line, f"Found empty_dir in line: {line}"
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
pytest.main([__file__])
|
pytest.main([__file__])
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue