Refactor tree structure generation and filtering logic

- Simplified the `get_tree_structure` function by extracting the tree command execution and filtering into separate functions: `run_tree_command` and `filter_tree_output`.
- Introduced `process_line`, `extract_full_path`, and `mark_non_empty_dirs` to enhance readability and maintainability of the filtering process.
- Updated `load_ignore_specs` to improve loading of ignore specifications from settings and .gitignore files.
- Added clipboard functionality to copy output content after saving.
- Cleaned up and clarified docstrings for better understanding of function purposes.
This commit is contained in:
Kirill Markin 2024-12-17 17:16:39 +01:00
parent ecfbed98ac
commit d124fa24cc
No known key found for this signature in database
GPG key ID: 03AB9530E15B9C1C

View file

@ -6,6 +6,7 @@ import os
import subprocess import subprocess
from typing import Tuple, Optional, List, Dict, Any, Set from typing import Tuple, Optional, List, Dict, Any, Set
from datetime import datetime, timezone from datetime import datetime, timezone
from importlib.machinery import ModuleSpec
import logging import logging
import yaml import yaml
import pathspec import pathspec
@ -18,58 +19,63 @@ def get_tree_structure(
gitignore_spec: Optional[PathSpec] = None, gitignore_spec: Optional[PathSpec] = None,
tree_and_content_ignore_spec: Optional[PathSpec] = None tree_and_content_ignore_spec: Optional[PathSpec] = None
) -> str: ) -> str:
"""Generate tree structure of the directory. """Generate tree structure of the directory."""
Args:
path: Directory path to generate tree for
gitignore_spec: PathSpec object for gitignore patterns
tree_and_content_ignore_spec: PathSpec object for tree and content ignore patterns
Returns:
str: Generated tree structure with empty directories and ignored files removed
"""
if not check_tree_command(): if not check_tree_command():
return "" return ""
logging.debug('Generating tree structure for path: %s', path) logging.debug('Generating tree structure for path: %s', path)
result = subprocess.run( tree_output = run_tree_command(path)
['tree', '-a', '-f', '--noreport', path],
stdout=subprocess.PIPE,
check=True
)
tree_output = result.stdout.decode('utf-8')
logging.debug('Tree output generated:\n%s', tree_output) logging.debug('Tree output generated:\n%s', tree_output)
if not gitignore_spec and not tree_and_content_ignore_spec: if not gitignore_spec and not tree_and_content_ignore_spec:
logging.debug('No .gitignore or ignore-tree-and-content specification found') logging.debug('No .gitignore or ignore-tree-and-content specification found')
return tree_output return tree_output
logging.debug( logging.debug('Filtering tree output based on ignore specifications')
'Filtering tree output based on .gitignore and ignore-tree-and-content specification' return filter_tree_output(tree_output, path, gitignore_spec, tree_and_content_ignore_spec)
def run_tree_command(path: str) -> str:
"""Run the tree command and return its output."""
result = subprocess.run(
['tree', '-a', '-f', '--noreport', path],
stdout=subprocess.PIPE,
check=True
) )
return result.stdout.decode('utf-8')
def filter_tree_output(
tree_output: str,
path: str,
gitignore_spec: Optional[PathSpec],
tree_and_content_ignore_spec: Optional[PathSpec]
) -> str:
"""Filter the tree output based on ignore specifications."""
lines: List[str] = tree_output.splitlines() lines: List[str] = tree_output.splitlines()
non_empty_dirs: Set[str] = set() non_empty_dirs: Set[str] = set()
current_path: List[str] = []
for line in lines: filtered_lines = [
indent_level = len(line) - len(line.lstrip('│ ├└')) process_line(line, path, gitignore_spec, tree_and_content_ignore_spec, non_empty_dirs)
current_path = current_path[:indent_level] for line in lines
]
idx = line.find('./') filtered_tree_output = '\n'.join(filter(None, filtered_lines))
if idx == -1: logging.debug('Filtered tree structure:\n%s', filtered_tree_output)
idx = line.find(path) return filtered_tree_output
if idx != -1:
full_path = line[idx:].strip()
else:
continue
if full_path == '.': def process_line(
continue line: str,
path: str,
gitignore_spec: Optional[PathSpec],
tree_and_content_ignore_spec: Optional[PathSpec],
non_empty_dirs: Set[str]
) -> Optional[str]:
"""Process a single line of the tree output."""
full_path = extract_full_path(line, path)
if not full_path or full_path == '.':
return None
relative_path = os.path.relpath(full_path, path) relative_path = os.path.relpath(full_path, path).replace(os.sep, '/')
relative_path = relative_path.replace(os.sep, '/')
# Skip if file should be ignored
if should_ignore_file( if should_ignore_file(
full_path, full_path,
relative_path, relative_path,
@ -78,109 +84,76 @@ def get_tree_structure(
tree_and_content_ignore_spec tree_and_content_ignore_spec
): ):
logging.debug('Ignored: %s', relative_path) logging.debug('Ignored: %s', relative_path)
continue return None
# If this is a file, mark all parent directories as non-empty
if not os.path.isdir(full_path): if not os.path.isdir(full_path):
mark_non_empty_dirs(relative_path, non_empty_dirs)
if not os.path.isdir(full_path) or os.path.dirname(relative_path) in non_empty_dirs:
return line.replace('./', '', 1)
return None
def extract_full_path(line: str, path: str) -> Optional[str]:
"""Extract the full path from a line of tree output."""
idx = line.find('./')
if idx == -1:
idx = line.find(path)
return line[idx:].strip() if idx != -1 else None
def mark_non_empty_dirs(relative_path: str, non_empty_dirs: Set[str]) -> None:
"""Mark all parent directories of a file as non-empty."""
dir_path = os.path.dirname(relative_path) dir_path = os.path.dirname(relative_path)
while dir_path: while dir_path:
non_empty_dirs.add(dir_path) non_empty_dirs.add(dir_path)
dir_path = os.path.dirname(dir_path) dir_path = os.path.dirname(dir_path)
# Second pass: build filtered output
filtered_lines: List[str] = []
current_path = []
for line in lines:
indent_level = len(line) - len(line.lstrip('│ ├└'))
current_path = current_path[:indent_level]
# Always include root path
if indent_level == 0:
filtered_lines.append(line)
continue
idx = line.find('./')
if idx == -1:
idx = line.find(path)
if idx != -1:
full_path = line[idx:].strip()
else:
continue
relative_path = os.path.relpath(full_path, path)
relative_path = relative_path.replace(os.sep, '/')
# Skip if file should be ignored
if should_ignore_file(
full_path,
relative_path,
gitignore_spec,
None,
tree_and_content_ignore_spec
):
continue
# Include line if it's a file or a non-empty directory
if not os.path.isdir(full_path) or os.path.dirname(relative_path) in non_empty_dirs:
display_line = line.replace('./', '', 1)
filtered_lines.append(display_line)
filtered_tree_output = '\n'.join(filtered_lines)
logging.debug('Filtered tree structure:\n%s', filtered_tree_output)
return filtered_tree_output
def load_ignore_specs( def load_ignore_specs(
path: str = '.', path: str = '.',
cli_ignore_patterns: Optional[List[str]] = None cli_ignore_patterns: Optional[List[str]] = None
) -> Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]: ) -> Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]:
"""Load ignore specifications from various sources.""" """Load ignore specifications from various sources.
Args:
path: Base directory path
cli_ignore_patterns: List of patterns from command line
Returns:
Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]: Tuple of gitignore_spec,
content_ignore_spec, and tree_and_content_ignore_spec
"""
gitignore_spec = None gitignore_spec = None
content_ignore_spec = None content_ignore_spec = None
tree_and_content_ignore_list: List[str] = [] tree_and_content_ignore_list: List[str] = []
use_gitignore = True use_gitignore = True
settings = load_settings_from_file(path) repo_settings_path = os.path.join(path, '.repo-to-text-settings.yaml')
if settings: if os.path.exists(repo_settings_path):
logging.debug('Loading .repo-to-text-settings.yaml from path: %s', repo_settings_path)
with open(repo_settings_path, 'r', encoding='utf-8') as f:
settings: Dict[str, Any] = yaml.safe_load(f)
use_gitignore = settings.get('gitignore-import-and-ignore', True) use_gitignore = settings.get('gitignore-import-and-ignore', True)
content_ignore_spec = create_content_ignore_spec(settings) if 'ignore-content' in settings:
content_ignore_spec: Optional[PathSpec] = pathspec.PathSpec.from_lines(
'gitwildmatch', settings['ignore-content']
)
if 'ignore-tree-and-content' in settings:
tree_and_content_ignore_list.extend(settings.get('ignore-tree-and-content', [])) tree_and_content_ignore_list.extend(settings.get('ignore-tree-and-content', []))
if cli_ignore_patterns: if cli_ignore_patterns:
tree_and_content_ignore_list.extend(cli_ignore_patterns) tree_and_content_ignore_list.extend(cli_ignore_patterns)
if use_gitignore: if use_gitignore:
gitignore_spec = load_gitignore_spec(path) gitignore_path = os.path.join(path, '.gitignore')
if os.path.exists(gitignore_path):
logging.debug('Loading .gitignore from path: %s', gitignore_path)
with open(gitignore_path, 'r', encoding='utf-8') as f:
gitignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', f)
tree_and_content_ignore_spec = pathspec.PathSpec.from_lines( tree_and_content_ignore_spec = pathspec.PathSpec.from_lines(
'gitwildmatch', tree_and_content_ignore_list 'gitwildmatch', tree_and_content_ignore_list
) )
return gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec return gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec
def load_settings_from_file(path: str) -> Optional[Dict[str, Any]]:
"""Load settings from the .repo-to-text-settings.yaml file."""
repo_settings_path = os.path.join(path, '.repo-to-text-settings.yaml')
if os.path.exists(repo_settings_path):
logging.debug('Loading .repo-to-text-settings.yaml from path: %s', repo_settings_path)
with open(repo_settings_path, 'r', encoding='utf-8') as f:
return yaml.safe_load(f)
return None
def create_content_ignore_spec(settings: Dict[str, Any]) -> Optional[PathSpec]:
"""Create content ignore spec from settings."""
if 'ignore-content' in settings:
return pathspec.PathSpec.from_lines('gitwildmatch', settings['ignore-content'])
return None
def load_gitignore_spec(path: str) -> Optional[PathSpec]:
"""Load gitignore spec from the .gitignore file."""
gitignore_path = os.path.join(path, '.gitignore')
if os.path.exists(gitignore_path):
logging.debug('Loading .gitignore from path: %s', gitignore_path)
with open(gitignore_path, 'r', encoding='utf-8') as f:
return pathspec.PathSpec.from_lines('gitwildmatch', f)
return None
def should_ignore_file( def should_ignore_file(
file_path: str, file_path: str,
relative_path: str, relative_path: str,
@ -237,17 +210,7 @@ def save_repo_to_text(
to_stdout: bool = False, to_stdout: bool = False,
cli_ignore_patterns: Optional[List[str]] = None cli_ignore_patterns: Optional[List[str]] = None
) -> str: ) -> str:
"""Save repository structure and contents to a text file. """Save repository structure and contents to a text file."""
Args:
path: Repository path
output_dir: Directory to save output file
to_stdout: Whether to output to stdout instead of file
cli_ignore_patterns: List of patterns from command line
Returns:
str: Path to the output file or the output text if to_stdout is True
"""
logging.debug('Starting to save repo structure to text for path: %s', path) logging.debug('Starting to save repo structure to text for path: %s', path)
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs( gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(
path, cli_ignore_patterns path, cli_ignore_patterns
@ -257,14 +220,36 @@ def save_repo_to_text(
) )
logging.debug('Final tree structure to be written: %s', tree_structure) logging.debug('Final tree structure to be written: %s', tree_structure)
timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-UTC') output_content = generate_output_content(
output_file = f'repo-to-text_{timestamp}.txt' path,
tree_structure,
gitignore_spec,
content_ignore_spec,
tree_and_content_ignore_spec
)
if output_dir: if to_stdout:
if not os.path.exists(output_dir): print(output_content)
os.makedirs(output_dir) return output_content
output_file = os.path.join(output_dir, output_file)
output_file = write_output_to_file(output_content, output_dir)
copy_to_clipboard(output_content)
print(
"[SUCCESS] Repository structure and contents successfully saved to "
f"file: \"./{output_file}\""
)
return output_file
def generate_output_content(
path: str,
tree_structure: str,
gitignore_spec: Optional[PathSpec],
content_ignore_spec: Optional[PathSpec],
tree_and_content_ignore_spec: Optional[PathSpec]
) -> str:
"""Generate the output content for the repository."""
output_content: List[str] = [] output_content: List[str] = []
project_name = os.path.basename(os.path.abspath(path)) project_name = os.path.basename(os.path.abspath(path))
output_content.append(f'Directory: {project_name}\n\n') output_content.append(f'Directory: {project_name}\n\n')
@ -306,34 +291,38 @@ def save_repo_to_text(
output_content.append('\n') output_content.append('\n')
logging.debug('Repository contents written to output content') logging.debug('Repository contents written to output content')
output_text = ''.join(output_content) return ''.join(output_content)
if to_stdout: def write_output_to_file(output_content: str, output_dir: Optional[str]) -> str:
print(output_text) """Write the output content to a file."""
return output_text timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-UTC')
output_file = f'repo-to-text_{timestamp}.txt'
if output_dir:
if not os.path.exists(output_dir):
os.makedirs(output_dir)
output_file = os.path.join(output_dir, output_file)
with open(output_file, 'w', encoding='utf-8') as file: with open(output_file, 'w', encoding='utf-8') as file:
file.write(output_text) file.write(output_content)
return output_file
def copy_to_clipboard(output_content: str) -> None:
"""Copy the output content to the clipboard if possible."""
try: try:
import importlib.util # pylint: disable=import-outside-toplevel import importlib.util # pylint: disable=import-outside-toplevel
if importlib.util.find_spec("pyperclip"): spec: Optional[ModuleSpec] = importlib.util.find_spec("pyperclip")
if spec:
import pyperclip # pylint: disable=import-outside-toplevel # type: ignore import pyperclip # pylint: disable=import-outside-toplevel # type: ignore
pyperclip.copy(output_text) # type: ignore pyperclip.copy(output_content) # type: ignore
logging.debug('Repository structure and contents copied to clipboard') logging.debug('Repository structure and contents copied to clipboard')
else: else:
print("Tip: Install 'pyperclip' package to enable automatic clipboard copying:") print("Tip: Install 'pyperclip' package to enable automatic clipboard copying:")
print(" pip install pyperclip") print(" pip install pyperclip")
except (ImportError) as e: except ImportError as e:
logging.warning( logging.warning(
'Could not copy to clipboard. You might be running this ' 'Could not copy to clipboard. You might be running this '
'script over SSH or without clipboard support.' 'script over SSH or without clipboard support.'
) )
logging.debug('Clipboard copy error: %s', e) logging.debug('Clipboard copy error: %s', e)
print(
"[SUCCESS] Repository structure and contents successfully saved to "
f"file: \"./{output_file}\""
)
return output_file