linter cleanup

This commit is contained in:
Kirill Markin 2024-12-17 14:41:42 +01:00
parent 0cba3592f2
commit 5f283feefd
No known key found for this signature in database
GPG key ID: 03AB9530E15B9C1C
9 changed files with 295 additions and 191 deletions

View file

@ -1,2 +1,4 @@
"""This is the main package for the repo_to_text package."""
__author__ = 'Kirill Markin'
__email__ = 'markinkirill@gmail.com'

View file

@ -1,3 +1,5 @@
"""This module contains the CLI interface for the repo_to_text package."""
from .cli import create_default_settings_file, parse_args, main
__all__ = ['create_default_settings_file', 'parse_args', 'main']
__all__ = ['create_default_settings_file', 'parse_args', 'main']

View file

@ -1,3 +1,7 @@
"""
CLI for repo-to-text
"""
import argparse
import textwrap
import os
@ -12,8 +16,11 @@ def create_default_settings_file() -> None:
"""Create a default .repo-to-text-settings.yaml file."""
settings_file = '.repo-to-text-settings.yaml'
if os.path.exists(settings_file):
raise FileExistsError(f"The settings file '{settings_file}' already exists. Please remove it or rename it if you want to create a new default settings file.")
raise FileExistsError(
f"The settings file '{settings_file}' already exists. "
"Please remove it or rename it if you want to create a new default settings file."
)
default_settings = textwrap.dedent("""\
# Details: https://github.com/kirill-markin/repo-to-text
# Syntax: gitignore rules
@ -32,7 +39,7 @@ def create_default_settings_file() -> None:
- "README.md"
- "LICENSE"
""")
with open('.repo-to-text-settings.yaml', 'w') as f:
with open('.repo-to-text-settings.yaml', 'w', encoding='utf-8') as f:
f.write(default_settings)
print("Default .repo-to-text-settings.yaml created.")
@ -42,13 +49,25 @@ def parse_args() -> argparse.Namespace:
Returns:
argparse.Namespace: Parsed command line arguments
"""
parser = argparse.ArgumentParser(description='Convert repository structure and contents to text')
parser = argparse.ArgumentParser(
description='Convert repository structure and contents to text'
)
parser.add_argument('input_dir', nargs='?', default='.', help='Directory to process')
parser.add_argument('--debug', action='store_true', help='Enable debug logging')
parser.add_argument('--output-dir', type=str, help='Directory to save the output file')
parser.add_argument('--create-settings', '--init', action='store_true', help='Create default .repo-to-text-settings.yaml file')
parser.add_argument(
'--create-settings',
'--init',
action='store_true',
help='Create default .repo-to-text-settings.yaml file'
)
parser.add_argument('--stdout', action='store_true', help='Output to stdout instead of a file')
parser.add_argument('--ignore-patterns', nargs='*', help="List of files or directories to ignore in both tree and content sections. Supports wildcards (e.g., '*').")
parser.add_argument(
'--ignore-patterns',
nargs='*',
help="List of files or directories to ignore in both tree and content sections. "
"Supports wildcards (e.g., '*')."
)
return parser.parse_args()
def main() -> NoReturn:
@ -60,7 +79,7 @@ def main() -> NoReturn:
args = parse_args()
setup_logging(debug=args.debug)
logging.debug('repo-to-text script started')
try:
if args.create_settings:
create_default_settings_file()
@ -72,9 +91,9 @@ def main() -> NoReturn:
to_stdout=args.stdout,
cli_ignore_patterns=args.ignore_patterns
)
logging.debug('repo-to-text script finished')
sys.exit(0)
except Exception as e:
logging.error(f'Error occurred: {str(e)}')
sys.exit(1)
except (FileNotFoundError, FileExistsError, PermissionError, OSError) as e:
logging.error('Error occurred: %s', str(e))
sys.exit(1)

View file

@ -1,15 +1,23 @@
"""
Core functionality for repo-to-text
"""
import os
import subprocess
from typing import Tuple, Optional, List, Dict, Any
from datetime import datetime, timezone
import logging
import yaml
from datetime import datetime, timezone
from typing import Tuple, Optional, List
import pathspec
from pathspec import PathSpec
from ..utils.utils import check_tree_command, is_ignored_path, remove_empty_dirs
def get_tree_structure(path: str = '.', gitignore_spec: Optional[PathSpec] = None, tree_and_content_ignore_spec: Optional[PathSpec] = None) -> str:
def get_tree_structure(
path: str = '.',
gitignore_spec: Optional[PathSpec] = None,
tree_and_content_ignore_spec: Optional[PathSpec] = None
) -> str:
"""Generate tree structure of the directory.
Args:
@ -22,17 +30,23 @@ def get_tree_structure(path: str = '.', gitignore_spec: Optional[PathSpec] = Non
"""
if not check_tree_command():
return ""
logging.debug(f'Generating tree structure for path: {path}')
result = subprocess.run(['tree', '-a', '-f', '--noreport', path], stdout=subprocess.PIPE)
logging.debug('Generating tree structure for path: %s', path)
result = subprocess.run(
['tree', '-a', '-f', '--noreport', path],
stdout=subprocess.PIPE,
check=True
)
tree_output = result.stdout.decode('utf-8')
logging.debug(f'Tree output generated:\n{tree_output}')
logging.debug('Tree output generated:\n%s', tree_output)
if not gitignore_spec and not tree_and_content_ignore_spec:
logging.debug('No .gitignore or ignore-tree-and-content specification found')
return tree_output
logging.debug('Filtering tree output based on .gitignore and ignore-tree-and-content specification')
logging.debug(
'Filtering tree output based on .gitignore and ignore-tree-and-content specification'
)
filtered_lines: List[str] = []
for line in tree_output.splitlines():
@ -43,7 +57,7 @@ def get_tree_structure(path: str = '.', gitignore_spec: Optional[PathSpec] = Non
full_path = line[idx:].strip()
else:
continue
if full_path == '.':
continue
@ -52,18 +66,27 @@ def get_tree_structure(path: str = '.', gitignore_spec: Optional[PathSpec] = Non
if os.path.isdir(full_path):
relative_path += '/'
if not should_ignore_file(full_path, relative_path, gitignore_spec, None, tree_and_content_ignore_spec):
if not should_ignore_file(
full_path,
relative_path,
gitignore_spec,
None,
tree_and_content_ignore_spec
):
display_line = line.replace('./', '', 1)
filtered_lines.append(display_line)
else:
logging.debug(f'Ignored: {relative_path}')
logging.debug('Ignored: %s', relative_path)
filtered_tree_output = '\n'.join(filtered_lines)
logging.debug(f'Filtered tree structure:\n{filtered_tree_output}')
logging.debug('Filtered tree structure:\n%s', filtered_tree_output)
logging.debug('Tree structure filtering complete')
return filtered_tree_output
def load_ignore_specs(path: str = '.', cli_ignore_patterns: Optional[List[str]] = None) -> Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]:
def load_ignore_specs(
path: str = '.',
cli_ignore_patterns: Optional[List[str]] = None
) -> Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]:
"""Load ignore specifications from various sources.
Args:
@ -71,7 +94,8 @@ def load_ignore_specs(path: str = '.', cli_ignore_patterns: Optional[List[str]]
cli_ignore_patterns: List of patterns from command line
Returns:
Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]: Tuple of gitignore_spec, content_ignore_spec, and tree_and_content_ignore_spec
Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]: Tuple of gitignore_spec,
content_ignore_spec, and tree_and_content_ignore_spec
"""
gitignore_spec = None
content_ignore_spec = None
@ -80,14 +104,16 @@ def load_ignore_specs(path: str = '.', cli_ignore_patterns: Optional[List[str]]
repo_settings_path = os.path.join(path, '.repo-to-text-settings.yaml')
if os.path.exists(repo_settings_path):
logging.debug(f'Loading .repo-to-text-settings.yaml from path: {repo_settings_path}')
with open(repo_settings_path, 'r') as f:
settings = yaml.safe_load(f)
logging.debug('Loading .repo-to-text-settings.yaml from path: %s', repo_settings_path)
with open(repo_settings_path, 'r', encoding='utf-8') as f:
settings: Dict[str, Any] = yaml.safe_load(f)
use_gitignore = settings.get('gitignore-import-and-ignore', True)
if 'ignore-content' in settings:
content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', settings['ignore-content'])
content_ignore_spec: Optional[PathSpec] = pathspec.PathSpec.from_lines(
'gitwildmatch', settings['ignore-content']
)
if 'ignore-tree-and-content' in settings:
tree_and_content_ignore_list.extend(settings['ignore-tree-and-content'])
tree_and_content_ignore_list.extend(settings.get('ignore-tree-and-content', []))
if cli_ignore_patterns:
tree_and_content_ignore_list.extend(cli_ignore_patterns)
@ -95,15 +121,22 @@ def load_ignore_specs(path: str = '.', cli_ignore_patterns: Optional[List[str]]
if use_gitignore:
gitignore_path = os.path.join(path, '.gitignore')
if os.path.exists(gitignore_path):
logging.debug(f'Loading .gitignore from path: {gitignore_path}')
with open(gitignore_path, 'r') as f:
logging.debug('Loading .gitignore from path: %s', gitignore_path)
with open(gitignore_path, 'r', encoding='utf-8') as f:
gitignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', f)
tree_and_content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', tree_and_content_ignore_list)
tree_and_content_ignore_spec = pathspec.PathSpec.from_lines(
'gitwildmatch', tree_and_content_ignore_list
)
return gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec
def should_ignore_file(file_path: str, relative_path: str, gitignore_spec: Optional[PathSpec],
content_ignore_spec: Optional[PathSpec], tree_and_content_ignore_spec: Optional[PathSpec]) -> bool:
def should_ignore_file(
file_path: str,
relative_path: str,
gitignore_spec: Optional[PathSpec],
content_ignore_spec: Optional[PathSpec],
tree_and_content_ignore_spec: Optional[PathSpec]
) -> bool:
"""Check if a file should be ignored based on various ignore specifications.
Args:
@ -126,19 +159,33 @@ def should_ignore_file(file_path: str, relative_path: str, gitignore_spec: Optio
result = (
is_ignored_path(file_path) or
bool(gitignore_spec and gitignore_spec.match_file(relative_path)) or
bool(content_ignore_spec and content_ignore_spec.match_file(relative_path)) or
bool(tree_and_content_ignore_spec and tree_and_content_ignore_spec.match_file(relative_path)) or
bool(
gitignore_spec and
gitignore_spec.match_file(relative_path)
) or
bool(
content_ignore_spec and
content_ignore_spec.match_file(relative_path)
) or
bool(
tree_and_content_ignore_spec and
tree_and_content_ignore_spec.match_file(relative_path)
) or
os.path.basename(file_path).startswith('repo-to-text_')
)
logging.debug(f'Checking if file should be ignored:')
logging.debug(f' file_path: {file_path}')
logging.debug(f' relative_path: {relative_path}')
logging.debug(f' Result: {result}')
logging.debug('Checking if file should be ignored:')
logging.debug(' file_path: %s', file_path)
logging.debug(' relative_path: %s', relative_path)
logging.debug(' Result: %s', result)
return result
def save_repo_to_text(path: str = '.', output_dir: Optional[str] = None, to_stdout: bool = False, cli_ignore_patterns: Optional[List[str]] = None) -> str:
def save_repo_to_text(
path: str = '.',
output_dir: Optional[str] = None,
to_stdout: bool = False,
cli_ignore_patterns: Optional[List[str]] = None
) -> str:
"""Save repository structure and contents to a text file.
Args:
@ -150,20 +197,24 @@ def save_repo_to_text(path: str = '.', output_dir: Optional[str] = None, to_stdo
Returns:
str: Path to the output file or the output text if to_stdout is True
"""
logging.debug(f'Starting to save repo structure to text for path: {path}')
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(path, cli_ignore_patterns)
tree_structure = get_tree_structure(path, gitignore_spec, tree_and_content_ignore_spec)
tree_structure = remove_empty_dirs(tree_structure, path)
logging.debug(f'Final tree structure to be written: {tree_structure}')
logging.debug('Starting to save repo structure to text for path: %s', path)
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(
path, cli_ignore_patterns
)
tree_structure: str = get_tree_structure(
path, gitignore_spec, tree_and_content_ignore_spec
)
tree_structure = remove_empty_dirs(tree_structure)
logging.debug('Final tree structure to be written: %s', tree_structure)
timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-UTC')
output_file = f'repo-to-text_{timestamp}.txt'
if output_dir:
if not os.path.exists(output_dir):
os.makedirs(output_dir)
output_file = os.path.join(output_dir, output_file)
output_content: List[str] = []
project_name = os.path.basename(os.path.abspath(path))
output_content.append(f'Directory: {project_name}\n\n')
@ -172,7 +223,7 @@ def save_repo_to_text(path: str = '.', output_dir: Optional[str] = None, to_stdo
if os.path.exists(os.path.join(path, '.gitignore')):
output_content.append('├── .gitignore\n')
output_content.append(tree_structure + '\n' + '```\n')
logging.debug('Tree structure written to output content')
@ -180,47 +231,59 @@ def save_repo_to_text(path: str = '.', output_dir: Optional[str] = None, to_stdo
for filename in files:
file_path = os.path.join(root, filename)
relative_path = os.path.relpath(file_path, path)
if should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec):
if should_ignore_file(
file_path,
relative_path,
gitignore_spec,
content_ignore_spec,
tree_and_content_ignore_spec
):
continue
relative_path = relative_path.replace('./', '', 1)
output_content.append(f'\nContents of {relative_path}:\n')
output_content.append('```\n')
try:
with open(file_path, 'r', encoding='utf-8') as f:
output_content.append(f.read())
except UnicodeDecodeError:
logging.debug(f'Could not decode file contents: {file_path}')
logging.debug('Could not decode file contents: %s', file_path)
output_content.append('[Could not decode file contents]\n')
output_content.append('\n```\n')
output_content.append('\n')
logging.debug('Repository contents written to output content')
output_text = ''.join(output_content)
if to_stdout:
print(output_text)
return output_text
with open(output_file, 'w') as file:
with open(output_file, 'w', encoding='utf-8') as file:
file.write(output_text)
try:
import importlib.util
import importlib.util # pylint: disable=import-outside-toplevel
if importlib.util.find_spec("pyperclip"):
import pyperclip # type: ignore
import pyperclip # pylint: disable=import-outside-toplevel # type: ignore
pyperclip.copy(output_text) # type: ignore
logging.debug('Repository structure and contents copied to clipboard')
else:
print("Tip: Install 'pyperclip' package to enable automatic clipboard copying:")
print(" pip install pyperclip")
except Exception as e:
logging.warning('Could not copy to clipboard. You might be running this script over SSH or without clipboard support.')
logging.debug(f'Clipboard copy error: {e}')
print(f"[SUCCESS] Repository structure and contents successfully saved to file: \"./{output_file}\"")
return output_file
except (ImportError) as e:
logging.warning(
'Could not copy to clipboard. You might be running this '
'script over SSH or without clipboard support.'
)
logging.debug('Clipboard copy error: %s', e)
print(
"[SUCCESS] Repository structure and contents successfully saved to "
f"file: \"./{output_file}\""
)
return output_file

View file

@ -1,3 +1,5 @@
"""This is the main entry point for the repo_to_text package."""
from repo_to_text.cli.cli import main
if __name__ == '__main__':

View file

@ -1,3 +1,5 @@
"""This module contains utility functions for the repo_to_text package."""
import os
import shutil
import logging
@ -19,7 +21,10 @@ def check_tree_command() -> bool:
bool: True if tree command is available, False otherwise
"""
if shutil.which('tree') is None:
print("The 'tree' command is not found. Please install it using one of the following commands:")
print(
"The 'tree' command is not found. "
+ "Please install it using one of the following commands:"
)
print("For Debian-based systems (e.g., Ubuntu): sudo apt-get install tree")
print("For Red Hat-based systems (e.g., Fedora, CentOS): sudo yum install tree")
return False
@ -40,43 +45,38 @@ def is_ignored_path(file_path: str) -> bool:
is_ignored_file = any(file_path.startswith(prefix) for prefix in ignored_files_prefix)
result = is_ignored_dir or is_ignored_file
if result:
logging.debug(f'Path ignored: {file_path}')
logging.debug('Path ignored: %s', file_path)
return result
def remove_empty_dirs(tree_output: str, path: str = '.') -> str:
"""Remove empty directories from tree output.
Args:
tree_output: Output from tree command
path: Base path for the tree
Returns:
str: Tree output with empty directories removed
"""
def remove_empty_dirs(tree_output: str) -> str:
"""Remove empty directories from tree output."""
logging.debug('Removing empty directories from tree output')
lines = tree_output.splitlines()
non_empty_dirs: Set[str] = set()
filtered_lines: List[str] = []
# Track directories that have files or subdirectories
non_empty_dirs: Set[str] = set()
# First pass: identify non-empty directories
for line in reversed(lines):
stripped_line = line.strip()
if not stripped_line.endswith('/'):
# This is a file, mark its parent directory as non-empty
parent_dir: str = os.path.dirname(stripped_line)
while parent_dir:
non_empty_dirs.add(parent_dir)
parent_dir = os.path.dirname(parent_dir)
# Second pass: filter out empty directories
for line in lines:
parts = line.strip().split()
if parts:
full_path = parts[-1]
if os.path.isdir(full_path) and not any(os.path.isfile(os.path.join(full_path, f)) for f in os.listdir(full_path)):
logging.debug(f'Directory is empty and will be removed: {full_path}')
stripped_line = line.strip()
if stripped_line.endswith('/'):
# This is a directory
dir_path = stripped_line[:-1] # Remove trailing slash
if dir_path not in non_empty_dirs:
logging.debug('Directory is empty and will be removed: %s', dir_path)
continue
non_empty_dirs.add(os.path.dirname(full_path))
filtered_lines.append(line)
final_lines: List[str] = []
for line in filtered_lines:
parts = line.strip().split()
if parts:
full_path = parts[-1]
if os.path.isdir(full_path) and full_path not in non_empty_dirs:
logging.debug(f'Directory is empty and will be removed: {full_path}')
continue
final_lines.append(line)
filtered_lines.append(line)
logging.debug('Empty directory removal complete')
return '\n'.join(filtered_lines)
return '\n'.join(filtered_lines)