remove_empty_dirs new logic and linter cleanup

This commit is contained in:
Kirill Markin 2024-12-17 15:06:59 +01:00
parent 5f283feefd
commit a364328e60
No known key found for this signature in database
GPG key ID: 03AB9530E15B9C1C
4 changed files with 111 additions and 85 deletions

View file

@ -4,14 +4,14 @@ Core functionality for repo-to-text
import os
import subprocess
from typing import Tuple, Optional, List, Dict, Any
from typing import Tuple, Optional, List, Dict, Any, Set
from datetime import datetime, timezone
import logging
import yaml
import pathspec
from pathspec import PathSpec
from ..utils.utils import check_tree_command, is_ignored_path, remove_empty_dirs
from ..utils.utils import check_tree_command, is_ignored_path
def get_tree_structure(
path: str = '.',
@ -26,7 +26,7 @@ def get_tree_structure(
tree_and_content_ignore_spec: PathSpec object for tree and content ignore patterns
Returns:
str: Generated tree structure
str: Generated tree structure with empty directories and ignored files removed
"""
if not check_tree_command():
return ""
@ -47,9 +47,14 @@ def get_tree_structure(
logging.debug(
'Filtering tree output based on .gitignore and ignore-tree-and-content specification'
)
filtered_lines: List[str] = []
lines: List[str] = tree_output.splitlines()
non_empty_dirs: Set[str] = set()
current_path: List[str] = []
for line in lines:
indent_level = len(line) - len(line.lstrip('│ ├└'))
current_path = current_path[:indent_level]
for line in tree_output.splitlines():
idx = line.find('./')
if idx == -1:
idx = line.find(path)
@ -63,24 +68,66 @@ def get_tree_structure(
relative_path = os.path.relpath(full_path, path)
relative_path = relative_path.replace(os.sep, '/')
if os.path.isdir(full_path):
relative_path += '/'
if not should_ignore_file(
# Skip if file should be ignored
if should_ignore_file(
full_path,
relative_path,
gitignore_spec,
None,
tree_and_content_ignore_spec
):
logging.debug('Ignored: %s', relative_path)
continue
# If this is a file, mark all parent directories as non-empty
if not os.path.isdir(full_path):
dir_path = os.path.dirname(relative_path)
while dir_path:
non_empty_dirs.add(dir_path)
dir_path = os.path.dirname(dir_path)
# Second pass: build filtered output
filtered_lines: List[str] = []
current_path = []
for line in lines:
indent_level = len(line) - len(line.lstrip('│ ├└'))
current_path = current_path[:indent_level]
# Always include root path
if indent_level == 0:
filtered_lines.append(line)
continue
idx = line.find('./')
if idx == -1:
idx = line.find(path)
if idx != -1:
full_path = line[idx:].strip()
else:
continue
relative_path = os.path.relpath(full_path, path)
relative_path = relative_path.replace(os.sep, '/')
# Skip if file should be ignored
if should_ignore_file(
full_path,
relative_path,
gitignore_spec,
None,
tree_and_content_ignore_spec
):
continue
# Include line if it's a file or a non-empty directory
if not os.path.isdir(full_path) or os.path.dirname(relative_path) in non_empty_dirs:
display_line = line.replace('./', '', 1)
filtered_lines.append(display_line)
else:
logging.debug('Ignored: %s', relative_path)
filtered_tree_output = '\n'.join(filtered_lines)
logging.debug('Filtered tree structure:\n%s', filtered_tree_output)
logging.debug('Tree structure filtering complete')
return filtered_tree_output
def load_ignore_specs(
@ -204,7 +251,6 @@ def save_repo_to_text(
tree_structure: str = get_tree_structure(
path, gitignore_spec, tree_and_content_ignore_spec
)
tree_structure = remove_empty_dirs(tree_structure)
logging.debug('Final tree structure to be written: %s', tree_structure)
timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-UTC')

View file

@ -1,3 +1,5 @@
from .utils import setup_logging, check_tree_command, is_ignored_path, remove_empty_dirs
"""This module contains utility functions for the repo_to_text package."""
__all__ = ['setup_logging', 'check_tree_command', 'is_ignored_path', 'remove_empty_dirs']
from .utils import setup_logging, check_tree_command, is_ignored_path
__all__ = ['setup_logging', 'check_tree_command', 'is_ignored_path']

View file

@ -1,9 +1,8 @@
"""This module contains utility functions for the repo_to_text package."""
import os
import shutil
import logging
from typing import List, Set
from typing import List
def setup_logging(debug: bool = False) -> None:
"""Set up logging configuration.
@ -47,36 +46,3 @@ def is_ignored_path(file_path: str) -> bool:
if result:
logging.debug('Path ignored: %s', file_path)
return result
def remove_empty_dirs(tree_output: str) -> str:
"""Remove empty directories from tree output."""
logging.debug('Removing empty directories from tree output')
lines = tree_output.splitlines()
filtered_lines: List[str] = []
# Track directories that have files or subdirectories
non_empty_dirs: Set[str] = set()
# First pass: identify non-empty directories
for line in reversed(lines):
stripped_line = line.strip()
if not stripped_line.endswith('/'):
# This is a file, mark its parent directory as non-empty
parent_dir: str = os.path.dirname(stripped_line)
while parent_dir:
non_empty_dirs.add(parent_dir)
parent_dir = os.path.dirname(parent_dir)
# Second pass: filter out empty directories
for line in lines:
stripped_line = line.strip()
if stripped_line.endswith('/'):
# This is a directory
dir_path = stripped_line[:-1] # Remove trailing slash
if dir_path not in non_empty_dirs:
logging.debug('Directory is empty and will be removed: %s', dir_path)
continue
filtered_lines.append(line)
logging.debug('Empty directory removal complete')
return '\n'.join(filtered_lines)