mirror of
https://github.com/kirill-markin/repo-to-text.git
synced 2025-12-05 19:12:24 -08:00
remove_empty_dirs new logic and linter cleanup
This commit is contained in:
parent
5f283feefd
commit
a364328e60
4 changed files with 111 additions and 85 deletions
|
|
@ -4,14 +4,14 @@ Core functionality for repo-to-text
|
|||
|
||||
import os
|
||||
import subprocess
|
||||
from typing import Tuple, Optional, List, Dict, Any
|
||||
from typing import Tuple, Optional, List, Dict, Any, Set
|
||||
from datetime import datetime, timezone
|
||||
import logging
|
||||
import yaml
|
||||
import pathspec
|
||||
from pathspec import PathSpec
|
||||
|
||||
from ..utils.utils import check_tree_command, is_ignored_path, remove_empty_dirs
|
||||
from ..utils.utils import check_tree_command, is_ignored_path
|
||||
|
||||
def get_tree_structure(
|
||||
path: str = '.',
|
||||
|
|
@ -26,7 +26,7 @@ def get_tree_structure(
|
|||
tree_and_content_ignore_spec: PathSpec object for tree and content ignore patterns
|
||||
|
||||
Returns:
|
||||
str: Generated tree structure
|
||||
str: Generated tree structure with empty directories and ignored files removed
|
||||
"""
|
||||
if not check_tree_command():
|
||||
return ""
|
||||
|
|
@ -47,9 +47,14 @@ def get_tree_structure(
|
|||
logging.debug(
|
||||
'Filtering tree output based on .gitignore and ignore-tree-and-content specification'
|
||||
)
|
||||
filtered_lines: List[str] = []
|
||||
lines: List[str] = tree_output.splitlines()
|
||||
non_empty_dirs: Set[str] = set()
|
||||
current_path: List[str] = []
|
||||
|
||||
for line in lines:
|
||||
indent_level = len(line) - len(line.lstrip('│ ├└'))
|
||||
current_path = current_path[:indent_level]
|
||||
|
||||
for line in tree_output.splitlines():
|
||||
idx = line.find('./')
|
||||
if idx == -1:
|
||||
idx = line.find(path)
|
||||
|
|
@ -63,24 +68,66 @@ def get_tree_structure(
|
|||
|
||||
relative_path = os.path.relpath(full_path, path)
|
||||
relative_path = relative_path.replace(os.sep, '/')
|
||||
if os.path.isdir(full_path):
|
||||
relative_path += '/'
|
||||
|
||||
if not should_ignore_file(
|
||||
# Skip if file should be ignored
|
||||
if should_ignore_file(
|
||||
full_path,
|
||||
relative_path,
|
||||
gitignore_spec,
|
||||
None,
|
||||
tree_and_content_ignore_spec
|
||||
):
|
||||
logging.debug('Ignored: %s', relative_path)
|
||||
continue
|
||||
|
||||
# If this is a file, mark all parent directories as non-empty
|
||||
if not os.path.isdir(full_path):
|
||||
dir_path = os.path.dirname(relative_path)
|
||||
while dir_path:
|
||||
non_empty_dirs.add(dir_path)
|
||||
dir_path = os.path.dirname(dir_path)
|
||||
|
||||
# Second pass: build filtered output
|
||||
filtered_lines: List[str] = []
|
||||
current_path = []
|
||||
|
||||
for line in lines:
|
||||
indent_level = len(line) - len(line.lstrip('│ ├└'))
|
||||
current_path = current_path[:indent_level]
|
||||
|
||||
# Always include root path
|
||||
if indent_level == 0:
|
||||
filtered_lines.append(line)
|
||||
continue
|
||||
|
||||
idx = line.find('./')
|
||||
if idx == -1:
|
||||
idx = line.find(path)
|
||||
if idx != -1:
|
||||
full_path = line[idx:].strip()
|
||||
else:
|
||||
continue
|
||||
|
||||
relative_path = os.path.relpath(full_path, path)
|
||||
relative_path = relative_path.replace(os.sep, '/')
|
||||
|
||||
# Skip if file should be ignored
|
||||
if should_ignore_file(
|
||||
full_path,
|
||||
relative_path,
|
||||
gitignore_spec,
|
||||
None,
|
||||
tree_and_content_ignore_spec
|
||||
):
|
||||
continue
|
||||
|
||||
# Include line if it's a file or a non-empty directory
|
||||
if not os.path.isdir(full_path) or os.path.dirname(relative_path) in non_empty_dirs:
|
||||
display_line = line.replace('./', '', 1)
|
||||
filtered_lines.append(display_line)
|
||||
else:
|
||||
logging.debug('Ignored: %s', relative_path)
|
||||
|
||||
filtered_tree_output = '\n'.join(filtered_lines)
|
||||
logging.debug('Filtered tree structure:\n%s', filtered_tree_output)
|
||||
logging.debug('Tree structure filtering complete')
|
||||
return filtered_tree_output
|
||||
|
||||
def load_ignore_specs(
|
||||
|
|
@ -204,7 +251,6 @@ def save_repo_to_text(
|
|||
tree_structure: str = get_tree_structure(
|
||||
path, gitignore_spec, tree_and_content_ignore_spec
|
||||
)
|
||||
tree_structure = remove_empty_dirs(tree_structure)
|
||||
logging.debug('Final tree structure to be written: %s', tree_structure)
|
||||
|
||||
timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-UTC')
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
from .utils import setup_logging, check_tree_command, is_ignored_path, remove_empty_dirs
|
||||
"""This module contains utility functions for the repo_to_text package."""
|
||||
|
||||
__all__ = ['setup_logging', 'check_tree_command', 'is_ignored_path', 'remove_empty_dirs']
|
||||
from .utils import setup_logging, check_tree_command, is_ignored_path
|
||||
|
||||
__all__ = ['setup_logging', 'check_tree_command', 'is_ignored_path']
|
||||
|
|
|
|||
|
|
@ -1,9 +1,8 @@
|
|||
"""This module contains utility functions for the repo_to_text package."""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import logging
|
||||
from typing import List, Set
|
||||
from typing import List
|
||||
|
||||
def setup_logging(debug: bool = False) -> None:
|
||||
"""Set up logging configuration.
|
||||
|
|
@ -47,36 +46,3 @@ def is_ignored_path(file_path: str) -> bool:
|
|||
if result:
|
||||
logging.debug('Path ignored: %s', file_path)
|
||||
return result
|
||||
|
||||
def remove_empty_dirs(tree_output: str) -> str:
|
||||
"""Remove empty directories from tree output."""
|
||||
logging.debug('Removing empty directories from tree output')
|
||||
lines = tree_output.splitlines()
|
||||
filtered_lines: List[str] = []
|
||||
|
||||
# Track directories that have files or subdirectories
|
||||
non_empty_dirs: Set[str] = set()
|
||||
|
||||
# First pass: identify non-empty directories
|
||||
for line in reversed(lines):
|
||||
stripped_line = line.strip()
|
||||
if not stripped_line.endswith('/'):
|
||||
# This is a file, mark its parent directory as non-empty
|
||||
parent_dir: str = os.path.dirname(stripped_line)
|
||||
while parent_dir:
|
||||
non_empty_dirs.add(parent_dir)
|
||||
parent_dir = os.path.dirname(parent_dir)
|
||||
|
||||
# Second pass: filter out empty directories
|
||||
for line in lines:
|
||||
stripped_line = line.strip()
|
||||
if stripped_line.endswith('/'):
|
||||
# This is a directory
|
||||
dir_path = stripped_line[:-1] # Remove trailing slash
|
||||
if dir_path not in non_empty_dirs:
|
||||
logging.debug('Directory is empty and will be removed: %s', dir_path)
|
||||
continue
|
||||
filtered_lines.append(line)
|
||||
|
||||
logging.debug('Empty directory removal complete')
|
||||
return '\n'.join(filtered_lines)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue