mirror of
https://github.com/kirill-markin/repo-to-text.git
synced 2025-12-06 03:22:23 -08:00
Refactor tree structure generation and filtering logic
- Simplified the `get_tree_structure` function by extracting the tree command execution and filtering into separate functions: `run_tree_command` and `filter_tree_output`. - Introduced `process_line`, `extract_full_path`, and `mark_non_empty_dirs` to enhance readability and maintainability of the filtering process. - Updated `load_ignore_specs` to improve loading of ignore specifications from settings and .gitignore files. - Added clipboard functionality to copy output content after saving. - Cleaned up and clarified docstrings for better understanding of function purposes.
This commit is contained in:
parent
ecfbed98ac
commit
d124fa24cc
1 changed files with 148 additions and 159 deletions
|
|
@ -6,6 +6,7 @@ import os
|
||||||
import subprocess
|
import subprocess
|
||||||
from typing import Tuple, Optional, List, Dict, Any, Set
|
from typing import Tuple, Optional, List, Dict, Any, Set
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
from importlib.machinery import ModuleSpec
|
||||||
import logging
|
import logging
|
||||||
import yaml
|
import yaml
|
||||||
import pathspec
|
import pathspec
|
||||||
|
|
@ -18,58 +19,63 @@ def get_tree_structure(
|
||||||
gitignore_spec: Optional[PathSpec] = None,
|
gitignore_spec: Optional[PathSpec] = None,
|
||||||
tree_and_content_ignore_spec: Optional[PathSpec] = None
|
tree_and_content_ignore_spec: Optional[PathSpec] = None
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Generate tree structure of the directory.
|
"""Generate tree structure of the directory."""
|
||||||
|
|
||||||
Args:
|
|
||||||
path: Directory path to generate tree for
|
|
||||||
gitignore_spec: PathSpec object for gitignore patterns
|
|
||||||
tree_and_content_ignore_spec: PathSpec object for tree and content ignore patterns
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: Generated tree structure with empty directories and ignored files removed
|
|
||||||
"""
|
|
||||||
if not check_tree_command():
|
if not check_tree_command():
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
logging.debug('Generating tree structure for path: %s', path)
|
logging.debug('Generating tree structure for path: %s', path)
|
||||||
result = subprocess.run(
|
tree_output = run_tree_command(path)
|
||||||
['tree', '-a', '-f', '--noreport', path],
|
|
||||||
stdout=subprocess.PIPE,
|
|
||||||
check=True
|
|
||||||
)
|
|
||||||
tree_output = result.stdout.decode('utf-8')
|
|
||||||
logging.debug('Tree output generated:\n%s', tree_output)
|
logging.debug('Tree output generated:\n%s', tree_output)
|
||||||
|
|
||||||
if not gitignore_spec and not tree_and_content_ignore_spec:
|
if not gitignore_spec and not tree_and_content_ignore_spec:
|
||||||
logging.debug('No .gitignore or ignore-tree-and-content specification found')
|
logging.debug('No .gitignore or ignore-tree-and-content specification found')
|
||||||
return tree_output
|
return tree_output
|
||||||
|
|
||||||
logging.debug(
|
logging.debug('Filtering tree output based on ignore specifications')
|
||||||
'Filtering tree output based on .gitignore and ignore-tree-and-content specification'
|
return filter_tree_output(tree_output, path, gitignore_spec, tree_and_content_ignore_spec)
|
||||||
|
|
||||||
|
def run_tree_command(path: str) -> str:
|
||||||
|
"""Run the tree command and return its output."""
|
||||||
|
result = subprocess.run(
|
||||||
|
['tree', '-a', '-f', '--noreport', path],
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
check=True
|
||||||
)
|
)
|
||||||
|
return result.stdout.decode('utf-8')
|
||||||
|
|
||||||
|
def filter_tree_output(
|
||||||
|
tree_output: str,
|
||||||
|
path: str,
|
||||||
|
gitignore_spec: Optional[PathSpec],
|
||||||
|
tree_and_content_ignore_spec: Optional[PathSpec]
|
||||||
|
) -> str:
|
||||||
|
"""Filter the tree output based on ignore specifications."""
|
||||||
lines: List[str] = tree_output.splitlines()
|
lines: List[str] = tree_output.splitlines()
|
||||||
non_empty_dirs: Set[str] = set()
|
non_empty_dirs: Set[str] = set()
|
||||||
current_path: List[str] = []
|
|
||||||
|
|
||||||
for line in lines:
|
filtered_lines = [
|
||||||
indent_level = len(line) - len(line.lstrip('│ ├└'))
|
process_line(line, path, gitignore_spec, tree_and_content_ignore_spec, non_empty_dirs)
|
||||||
current_path = current_path[:indent_level]
|
for line in lines
|
||||||
|
]
|
||||||
|
|
||||||
idx = line.find('./')
|
filtered_tree_output = '\n'.join(filter(None, filtered_lines))
|
||||||
if idx == -1:
|
logging.debug('Filtered tree structure:\n%s', filtered_tree_output)
|
||||||
idx = line.find(path)
|
return filtered_tree_output
|
||||||
if idx != -1:
|
|
||||||
full_path = line[idx:].strip()
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if full_path == '.':
|
def process_line(
|
||||||
continue
|
line: str,
|
||||||
|
path: str,
|
||||||
|
gitignore_spec: Optional[PathSpec],
|
||||||
|
tree_and_content_ignore_spec: Optional[PathSpec],
|
||||||
|
non_empty_dirs: Set[str]
|
||||||
|
) -> Optional[str]:
|
||||||
|
"""Process a single line of the tree output."""
|
||||||
|
full_path = extract_full_path(line, path)
|
||||||
|
if not full_path or full_path == '.':
|
||||||
|
return None
|
||||||
|
|
||||||
relative_path = os.path.relpath(full_path, path)
|
relative_path = os.path.relpath(full_path, path).replace(os.sep, '/')
|
||||||
relative_path = relative_path.replace(os.sep, '/')
|
|
||||||
|
|
||||||
# Skip if file should be ignored
|
|
||||||
if should_ignore_file(
|
if should_ignore_file(
|
||||||
full_path,
|
full_path,
|
||||||
relative_path,
|
relative_path,
|
||||||
|
|
@ -78,109 +84,76 @@ def get_tree_structure(
|
||||||
tree_and_content_ignore_spec
|
tree_and_content_ignore_spec
|
||||||
):
|
):
|
||||||
logging.debug('Ignored: %s', relative_path)
|
logging.debug('Ignored: %s', relative_path)
|
||||||
continue
|
return None
|
||||||
|
|
||||||
# If this is a file, mark all parent directories as non-empty
|
|
||||||
if not os.path.isdir(full_path):
|
if not os.path.isdir(full_path):
|
||||||
|
mark_non_empty_dirs(relative_path, non_empty_dirs)
|
||||||
|
|
||||||
|
if not os.path.isdir(full_path) or os.path.dirname(relative_path) in non_empty_dirs:
|
||||||
|
return line.replace('./', '', 1)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def extract_full_path(line: str, path: str) -> Optional[str]:
|
||||||
|
"""Extract the full path from a line of tree output."""
|
||||||
|
idx = line.find('./')
|
||||||
|
if idx == -1:
|
||||||
|
idx = line.find(path)
|
||||||
|
return line[idx:].strip() if idx != -1 else None
|
||||||
|
|
||||||
|
def mark_non_empty_dirs(relative_path: str, non_empty_dirs: Set[str]) -> None:
|
||||||
|
"""Mark all parent directories of a file as non-empty."""
|
||||||
dir_path = os.path.dirname(relative_path)
|
dir_path = os.path.dirname(relative_path)
|
||||||
while dir_path:
|
while dir_path:
|
||||||
non_empty_dirs.add(dir_path)
|
non_empty_dirs.add(dir_path)
|
||||||
dir_path = os.path.dirname(dir_path)
|
dir_path = os.path.dirname(dir_path)
|
||||||
|
|
||||||
# Second pass: build filtered output
|
|
||||||
filtered_lines: List[str] = []
|
|
||||||
current_path = []
|
|
||||||
|
|
||||||
for line in lines:
|
|
||||||
indent_level = len(line) - len(line.lstrip('│ ├└'))
|
|
||||||
current_path = current_path[:indent_level]
|
|
||||||
|
|
||||||
# Always include root path
|
|
||||||
if indent_level == 0:
|
|
||||||
filtered_lines.append(line)
|
|
||||||
continue
|
|
||||||
|
|
||||||
idx = line.find('./')
|
|
||||||
if idx == -1:
|
|
||||||
idx = line.find(path)
|
|
||||||
if idx != -1:
|
|
||||||
full_path = line[idx:].strip()
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
|
|
||||||
relative_path = os.path.relpath(full_path, path)
|
|
||||||
relative_path = relative_path.replace(os.sep, '/')
|
|
||||||
|
|
||||||
# Skip if file should be ignored
|
|
||||||
if should_ignore_file(
|
|
||||||
full_path,
|
|
||||||
relative_path,
|
|
||||||
gitignore_spec,
|
|
||||||
None,
|
|
||||||
tree_and_content_ignore_spec
|
|
||||||
):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Include line if it's a file or a non-empty directory
|
|
||||||
if not os.path.isdir(full_path) or os.path.dirname(relative_path) in non_empty_dirs:
|
|
||||||
display_line = line.replace('./', '', 1)
|
|
||||||
filtered_lines.append(display_line)
|
|
||||||
|
|
||||||
filtered_tree_output = '\n'.join(filtered_lines)
|
|
||||||
logging.debug('Filtered tree structure:\n%s', filtered_tree_output)
|
|
||||||
return filtered_tree_output
|
|
||||||
|
|
||||||
def load_ignore_specs(
|
def load_ignore_specs(
|
||||||
path: str = '.',
|
path: str = '.',
|
||||||
cli_ignore_patterns: Optional[List[str]] = None
|
cli_ignore_patterns: Optional[List[str]] = None
|
||||||
) -> Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]:
|
) -> Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]:
|
||||||
"""Load ignore specifications from various sources."""
|
"""Load ignore specifications from various sources.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path: Base directory path
|
||||||
|
cli_ignore_patterns: List of patterns from command line
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]: Tuple of gitignore_spec,
|
||||||
|
content_ignore_spec, and tree_and_content_ignore_spec
|
||||||
|
"""
|
||||||
gitignore_spec = None
|
gitignore_spec = None
|
||||||
content_ignore_spec = None
|
content_ignore_spec = None
|
||||||
tree_and_content_ignore_list: List[str] = []
|
tree_and_content_ignore_list: List[str] = []
|
||||||
use_gitignore = True
|
use_gitignore = True
|
||||||
|
|
||||||
settings = load_settings_from_file(path)
|
repo_settings_path = os.path.join(path, '.repo-to-text-settings.yaml')
|
||||||
if settings:
|
if os.path.exists(repo_settings_path):
|
||||||
|
logging.debug('Loading .repo-to-text-settings.yaml from path: %s', repo_settings_path)
|
||||||
|
with open(repo_settings_path, 'r', encoding='utf-8') as f:
|
||||||
|
settings: Dict[str, Any] = yaml.safe_load(f)
|
||||||
use_gitignore = settings.get('gitignore-import-and-ignore', True)
|
use_gitignore = settings.get('gitignore-import-and-ignore', True)
|
||||||
content_ignore_spec = create_content_ignore_spec(settings)
|
if 'ignore-content' in settings:
|
||||||
|
content_ignore_spec: Optional[PathSpec] = pathspec.PathSpec.from_lines(
|
||||||
|
'gitwildmatch', settings['ignore-content']
|
||||||
|
)
|
||||||
|
if 'ignore-tree-and-content' in settings:
|
||||||
tree_and_content_ignore_list.extend(settings.get('ignore-tree-and-content', []))
|
tree_and_content_ignore_list.extend(settings.get('ignore-tree-and-content', []))
|
||||||
|
|
||||||
if cli_ignore_patterns:
|
if cli_ignore_patterns:
|
||||||
tree_and_content_ignore_list.extend(cli_ignore_patterns)
|
tree_and_content_ignore_list.extend(cli_ignore_patterns)
|
||||||
|
|
||||||
if use_gitignore:
|
if use_gitignore:
|
||||||
gitignore_spec = load_gitignore_spec(path)
|
gitignore_path = os.path.join(path, '.gitignore')
|
||||||
|
if os.path.exists(gitignore_path):
|
||||||
|
logging.debug('Loading .gitignore from path: %s', gitignore_path)
|
||||||
|
with open(gitignore_path, 'r', encoding='utf-8') as f:
|
||||||
|
gitignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', f)
|
||||||
|
|
||||||
tree_and_content_ignore_spec = pathspec.PathSpec.from_lines(
|
tree_and_content_ignore_spec = pathspec.PathSpec.from_lines(
|
||||||
'gitwildmatch', tree_and_content_ignore_list
|
'gitwildmatch', tree_and_content_ignore_list
|
||||||
)
|
)
|
||||||
return gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec
|
return gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec
|
||||||
|
|
||||||
def load_settings_from_file(path: str) -> Optional[Dict[str, Any]]:
|
|
||||||
"""Load settings from the .repo-to-text-settings.yaml file."""
|
|
||||||
repo_settings_path = os.path.join(path, '.repo-to-text-settings.yaml')
|
|
||||||
if os.path.exists(repo_settings_path):
|
|
||||||
logging.debug('Loading .repo-to-text-settings.yaml from path: %s', repo_settings_path)
|
|
||||||
with open(repo_settings_path, 'r', encoding='utf-8') as f:
|
|
||||||
return yaml.safe_load(f)
|
|
||||||
return None
|
|
||||||
|
|
||||||
def create_content_ignore_spec(settings: Dict[str, Any]) -> Optional[PathSpec]:
|
|
||||||
"""Create content ignore spec from settings."""
|
|
||||||
if 'ignore-content' in settings:
|
|
||||||
return pathspec.PathSpec.from_lines('gitwildmatch', settings['ignore-content'])
|
|
||||||
return None
|
|
||||||
|
|
||||||
def load_gitignore_spec(path: str) -> Optional[PathSpec]:
|
|
||||||
"""Load gitignore spec from the .gitignore file."""
|
|
||||||
gitignore_path = os.path.join(path, '.gitignore')
|
|
||||||
if os.path.exists(gitignore_path):
|
|
||||||
logging.debug('Loading .gitignore from path: %s', gitignore_path)
|
|
||||||
with open(gitignore_path, 'r', encoding='utf-8') as f:
|
|
||||||
return pathspec.PathSpec.from_lines('gitwildmatch', f)
|
|
||||||
return None
|
|
||||||
|
|
||||||
def should_ignore_file(
|
def should_ignore_file(
|
||||||
file_path: str,
|
file_path: str,
|
||||||
relative_path: str,
|
relative_path: str,
|
||||||
|
|
@ -237,17 +210,7 @@ def save_repo_to_text(
|
||||||
to_stdout: bool = False,
|
to_stdout: bool = False,
|
||||||
cli_ignore_patterns: Optional[List[str]] = None
|
cli_ignore_patterns: Optional[List[str]] = None
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Save repository structure and contents to a text file.
|
"""Save repository structure and contents to a text file."""
|
||||||
|
|
||||||
Args:
|
|
||||||
path: Repository path
|
|
||||||
output_dir: Directory to save output file
|
|
||||||
to_stdout: Whether to output to stdout instead of file
|
|
||||||
cli_ignore_patterns: List of patterns from command line
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: Path to the output file or the output text if to_stdout is True
|
|
||||||
"""
|
|
||||||
logging.debug('Starting to save repo structure to text for path: %s', path)
|
logging.debug('Starting to save repo structure to text for path: %s', path)
|
||||||
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(
|
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(
|
||||||
path, cli_ignore_patterns
|
path, cli_ignore_patterns
|
||||||
|
|
@ -257,14 +220,36 @@ def save_repo_to_text(
|
||||||
)
|
)
|
||||||
logging.debug('Final tree structure to be written: %s', tree_structure)
|
logging.debug('Final tree structure to be written: %s', tree_structure)
|
||||||
|
|
||||||
timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-UTC')
|
output_content = generate_output_content(
|
||||||
output_file = f'repo-to-text_{timestamp}.txt'
|
path,
|
||||||
|
tree_structure,
|
||||||
|
gitignore_spec,
|
||||||
|
content_ignore_spec,
|
||||||
|
tree_and_content_ignore_spec
|
||||||
|
)
|
||||||
|
|
||||||
if output_dir:
|
if to_stdout:
|
||||||
if not os.path.exists(output_dir):
|
print(output_content)
|
||||||
os.makedirs(output_dir)
|
return output_content
|
||||||
output_file = os.path.join(output_dir, output_file)
|
|
||||||
|
|
||||||
|
output_file = write_output_to_file(output_content, output_dir)
|
||||||
|
copy_to_clipboard(output_content)
|
||||||
|
|
||||||
|
print(
|
||||||
|
"[SUCCESS] Repository structure and contents successfully saved to "
|
||||||
|
f"file: \"./{output_file}\""
|
||||||
|
)
|
||||||
|
|
||||||
|
return output_file
|
||||||
|
|
||||||
|
def generate_output_content(
|
||||||
|
path: str,
|
||||||
|
tree_structure: str,
|
||||||
|
gitignore_spec: Optional[PathSpec],
|
||||||
|
content_ignore_spec: Optional[PathSpec],
|
||||||
|
tree_and_content_ignore_spec: Optional[PathSpec]
|
||||||
|
) -> str:
|
||||||
|
"""Generate the output content for the repository."""
|
||||||
output_content: List[str] = []
|
output_content: List[str] = []
|
||||||
project_name = os.path.basename(os.path.abspath(path))
|
project_name = os.path.basename(os.path.abspath(path))
|
||||||
output_content.append(f'Directory: {project_name}\n\n')
|
output_content.append(f'Directory: {project_name}\n\n')
|
||||||
|
|
@ -306,34 +291,38 @@ def save_repo_to_text(
|
||||||
output_content.append('\n')
|
output_content.append('\n')
|
||||||
logging.debug('Repository contents written to output content')
|
logging.debug('Repository contents written to output content')
|
||||||
|
|
||||||
output_text = ''.join(output_content)
|
return ''.join(output_content)
|
||||||
|
|
||||||
if to_stdout:
|
def write_output_to_file(output_content: str, output_dir: Optional[str]) -> str:
|
||||||
print(output_text)
|
"""Write the output content to a file."""
|
||||||
return output_text
|
timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-UTC')
|
||||||
|
output_file = f'repo-to-text_{timestamp}.txt'
|
||||||
|
|
||||||
|
if output_dir:
|
||||||
|
if not os.path.exists(output_dir):
|
||||||
|
os.makedirs(output_dir)
|
||||||
|
output_file = os.path.join(output_dir, output_file)
|
||||||
|
|
||||||
with open(output_file, 'w', encoding='utf-8') as file:
|
with open(output_file, 'w', encoding='utf-8') as file:
|
||||||
file.write(output_text)
|
file.write(output_content)
|
||||||
|
|
||||||
|
return output_file
|
||||||
|
|
||||||
|
def copy_to_clipboard(output_content: str) -> None:
|
||||||
|
"""Copy the output content to the clipboard if possible."""
|
||||||
try:
|
try:
|
||||||
import importlib.util # pylint: disable=import-outside-toplevel
|
import importlib.util # pylint: disable=import-outside-toplevel
|
||||||
if importlib.util.find_spec("pyperclip"):
|
spec: Optional[ModuleSpec] = importlib.util.find_spec("pyperclip")
|
||||||
|
if spec:
|
||||||
import pyperclip # pylint: disable=import-outside-toplevel # type: ignore
|
import pyperclip # pylint: disable=import-outside-toplevel # type: ignore
|
||||||
pyperclip.copy(output_text) # type: ignore
|
pyperclip.copy(output_content) # type: ignore
|
||||||
logging.debug('Repository structure and contents copied to clipboard')
|
logging.debug('Repository structure and contents copied to clipboard')
|
||||||
else:
|
else:
|
||||||
print("Tip: Install 'pyperclip' package to enable automatic clipboard copying:")
|
print("Tip: Install 'pyperclip' package to enable automatic clipboard copying:")
|
||||||
print(" pip install pyperclip")
|
print(" pip install pyperclip")
|
||||||
except (ImportError) as e:
|
except ImportError as e:
|
||||||
logging.warning(
|
logging.warning(
|
||||||
'Could not copy to clipboard. You might be running this '
|
'Could not copy to clipboard. You might be running this '
|
||||||
'script over SSH or without clipboard support.'
|
'script over SSH or without clipboard support.'
|
||||||
)
|
)
|
||||||
logging.debug('Clipboard copy error: %s', e)
|
logging.debug('Clipboard copy error: %s', e)
|
||||||
|
|
||||||
print(
|
|
||||||
"[SUCCESS] Repository structure and contents successfully saved to "
|
|
||||||
f"file: \"./{output_file}\""
|
|
||||||
)
|
|
||||||
|
|
||||||
return output_file
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue