mirror of
https://github.com/kirill-markin/repo-to-text.git
synced 2025-12-05 19:12:24 -08:00
Refactor devide logic by files and more tests
This commit is contained in:
parent
6a434e5174
commit
dbfa602cd3
9 changed files with 922 additions and 0 deletions
3
repo_to_text/cli/__init__.py
Normal file
3
repo_to_text/cli/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
from .cli import create_default_settings_file, parse_args, main
|
||||
|
||||
__all__ = ['create_default_settings_file', 'parse_args', 'main']
|
||||
71
repo_to_text/cli/cli.py
Normal file
71
repo_to_text/cli/cli.py
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
import argparse
|
||||
import textwrap
|
||||
import os
|
||||
import logging
|
||||
from typing import NoReturn
|
||||
|
||||
from ..utils.utils import setup_logging
|
||||
from ..core.core import save_repo_to_text
|
||||
|
||||
def create_default_settings_file() -> None:
|
||||
"""Create a default .repo-to-text-settings.yaml file."""
|
||||
settings_file = '.repo-to-text-settings.yaml'
|
||||
if os.path.exists(settings_file):
|
||||
raise FileExistsError(f"The settings file '{settings_file}' already exists. Please remove it or rename it if you want to create a new default settings file.")
|
||||
|
||||
default_settings = textwrap.dedent("""\
|
||||
# Details: https://github.com/kirill-markin/repo-to-text
|
||||
# Syntax: gitignore rules
|
||||
|
||||
# Ignore files and directories for all sections from gitignore file
|
||||
# Default: True
|
||||
gitignore-import-and-ignore: True
|
||||
|
||||
# Ignore files and directories for tree
|
||||
# and "Contents of ..." sections
|
||||
ignore-tree-and-content:
|
||||
- ".repo-to-text-settings.yaml"
|
||||
|
||||
# Ignore files and directories for "Contents of ..." section
|
||||
ignore-content:
|
||||
- "README.md"
|
||||
- "LICENSE"
|
||||
""")
|
||||
with open('.repo-to-text-settings.yaml', 'w') as f:
|
||||
f.write(default_settings)
|
||||
print("Default .repo-to-text-settings.yaml created.")
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
"""Parse command line arguments.
|
||||
|
||||
Returns:
|
||||
argparse.Namespace: Parsed command line arguments
|
||||
"""
|
||||
parser = argparse.ArgumentParser(description='Convert repository structure and contents to text')
|
||||
parser.add_argument('input_dir', nargs='?', default='.', help='Directory to process')
|
||||
parser.add_argument('--debug', action='store_true', help='Enable debug logging')
|
||||
parser.add_argument('--output-dir', type=str, help='Directory to save the output file')
|
||||
parser.add_argument('--create-settings', '--init', action='store_true', help='Create default .repo-to-text-settings.yaml file')
|
||||
parser.add_argument('--stdout', action='store_true', help='Output to stdout instead of a file')
|
||||
parser.add_argument('--ignore-patterns', nargs='*', help="List of files or directories to ignore in both tree and content sections. Supports wildcards (e.g., '*').")
|
||||
return parser.parse_args()
|
||||
|
||||
def main() -> NoReturn:
|
||||
"""Main entry point for the CLI."""
|
||||
args = parse_args()
|
||||
setup_logging(debug=args.debug)
|
||||
logging.debug('repo-to-text script started')
|
||||
|
||||
if args.create_settings:
|
||||
create_default_settings_file()
|
||||
logging.debug('.repo-to-text-settings.yaml file created')
|
||||
else:
|
||||
save_repo_to_text(
|
||||
path=args.input_dir,
|
||||
output_dir=args.output_dir,
|
||||
to_stdout=args.stdout,
|
||||
cli_ignore_patterns=args.ignore_patterns
|
||||
)
|
||||
|
||||
logging.debug('repo-to-text script finished')
|
||||
exit(0)
|
||||
3
repo_to_text/core/__init__.py
Normal file
3
repo_to_text/core/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
from .core import get_tree_structure, load_ignore_specs, should_ignore_file, save_repo_to_text
|
||||
|
||||
__all__ = ['get_tree_structure', 'load_ignore_specs', 'should_ignore_file', 'save_repo_to_text']
|
||||
226
repo_to_text/core/core.py
Normal file
226
repo_to_text/core/core.py
Normal file
|
|
@ -0,0 +1,226 @@
|
|||
import os
|
||||
import subprocess
|
||||
import logging
|
||||
import yaml
|
||||
from datetime import datetime, timezone
|
||||
from typing import Tuple, Optional
|
||||
import pathspec
|
||||
from pathspec import PathSpec
|
||||
|
||||
from ..utils.utils import check_tree_command, is_ignored_path, remove_empty_dirs
|
||||
|
||||
def get_tree_structure(path: str = '.', gitignore_spec: Optional[PathSpec] = None, tree_and_content_ignore_spec: Optional[PathSpec] = None) -> str:
|
||||
"""Generate tree structure of the directory.
|
||||
|
||||
Args:
|
||||
path: Directory path to generate tree for
|
||||
gitignore_spec: PathSpec object for gitignore patterns
|
||||
tree_and_content_ignore_spec: PathSpec object for tree and content ignore patterns
|
||||
|
||||
Returns:
|
||||
str: Generated tree structure
|
||||
"""
|
||||
if not check_tree_command():
|
||||
return ""
|
||||
|
||||
logging.debug(f'Generating tree structure for path: {path}')
|
||||
result = subprocess.run(['tree', '-a', '-f', '--noreport', path], stdout=subprocess.PIPE)
|
||||
tree_output = result.stdout.decode('utf-8')
|
||||
logging.debug(f'Tree output generated:\n{tree_output}')
|
||||
|
||||
if not gitignore_spec and not tree_and_content_ignore_spec:
|
||||
logging.debug('No .gitignore or ignore-tree-and-content specification found')
|
||||
return tree_output
|
||||
|
||||
logging.debug('Filtering tree output based on .gitignore and ignore-tree-and-content specification')
|
||||
filtered_lines = []
|
||||
|
||||
for line in tree_output.splitlines():
|
||||
idx = line.find('./')
|
||||
if idx == -1:
|
||||
idx = line.find(path)
|
||||
if idx != -1:
|
||||
full_path = line[idx:].strip()
|
||||
else:
|
||||
continue
|
||||
|
||||
if full_path == '.':
|
||||
continue
|
||||
|
||||
relative_path = os.path.relpath(full_path, path)
|
||||
relative_path = relative_path.replace(os.sep, '/')
|
||||
if os.path.isdir(full_path):
|
||||
relative_path += '/'
|
||||
|
||||
if not should_ignore_file(full_path, relative_path, gitignore_spec, None, tree_and_content_ignore_spec):
|
||||
display_line = line.replace('./', '', 1)
|
||||
filtered_lines.append(display_line)
|
||||
else:
|
||||
logging.debug(f'Ignored: {relative_path}')
|
||||
|
||||
filtered_tree_output = '\n'.join(filtered_lines)
|
||||
logging.debug(f'Filtered tree structure:\n{filtered_tree_output}')
|
||||
logging.debug('Tree structure filtering complete')
|
||||
return filtered_tree_output
|
||||
|
||||
def load_ignore_specs(path: str = '.', cli_ignore_patterns: Optional[list] = None) -> Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]:
|
||||
"""Load ignore specifications from various sources.
|
||||
|
||||
Args:
|
||||
path: Base directory path
|
||||
cli_ignore_patterns: List of patterns from command line
|
||||
|
||||
Returns:
|
||||
Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]: Tuple of gitignore_spec, content_ignore_spec, and tree_and_content_ignore_spec
|
||||
"""
|
||||
gitignore_spec = None
|
||||
content_ignore_spec = None
|
||||
tree_and_content_ignore_list = []
|
||||
use_gitignore = True
|
||||
|
||||
repo_settings_path = os.path.join(path, '.repo-to-text-settings.yaml')
|
||||
if os.path.exists(repo_settings_path):
|
||||
logging.debug(f'Loading .repo-to-text-settings.yaml from path: {repo_settings_path}')
|
||||
with open(repo_settings_path, 'r') as f:
|
||||
settings = yaml.safe_load(f)
|
||||
use_gitignore = settings.get('gitignore-import-and-ignore', True)
|
||||
if 'ignore-content' in settings:
|
||||
content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', settings['ignore-content'])
|
||||
if 'ignore-tree-and-content' in settings:
|
||||
tree_and_content_ignore_list.extend(settings['ignore-tree-and-content'])
|
||||
|
||||
if cli_ignore_patterns:
|
||||
tree_and_content_ignore_list.extend(cli_ignore_patterns)
|
||||
|
||||
if use_gitignore:
|
||||
gitignore_path = os.path.join(path, '.gitignore')
|
||||
if os.path.exists(gitignore_path):
|
||||
logging.debug(f'Loading .gitignore from path: {gitignore_path}')
|
||||
with open(gitignore_path, 'r') as f:
|
||||
gitignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', f)
|
||||
|
||||
tree_and_content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', tree_and_content_ignore_list)
|
||||
return gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec
|
||||
|
||||
def should_ignore_file(file_path: str, relative_path: str, gitignore_spec: Optional[PathSpec],
|
||||
content_ignore_spec: Optional[PathSpec], tree_and_content_ignore_spec: Optional[PathSpec]) -> bool:
|
||||
"""Check if a file should be ignored based on various ignore specifications.
|
||||
|
||||
Args:
|
||||
file_path: Full path to the file
|
||||
relative_path: Path relative to the repository root
|
||||
gitignore_spec: PathSpec object for gitignore patterns
|
||||
content_ignore_spec: PathSpec object for content ignore patterns
|
||||
tree_and_content_ignore_spec: PathSpec object for tree and content ignore patterns
|
||||
|
||||
Returns:
|
||||
bool: True if file should be ignored, False otherwise
|
||||
"""
|
||||
relative_path = relative_path.replace(os.sep, '/')
|
||||
|
||||
if relative_path.startswith('./'):
|
||||
relative_path = relative_path[2:]
|
||||
|
||||
if os.path.isdir(file_path):
|
||||
relative_path += '/'
|
||||
|
||||
result = (
|
||||
is_ignored_path(file_path) or
|
||||
bool(gitignore_spec and gitignore_spec.match_file(relative_path)) or
|
||||
bool(content_ignore_spec and content_ignore_spec.match_file(relative_path)) or
|
||||
bool(tree_and_content_ignore_spec and tree_and_content_ignore_spec.match_file(relative_path)) or
|
||||
os.path.basename(file_path).startswith('repo-to-text_')
|
||||
)
|
||||
|
||||
logging.debug(f'Checking if file should be ignored:')
|
||||
logging.debug(f' file_path: {file_path}')
|
||||
logging.debug(f' relative_path: {relative_path}')
|
||||
logging.debug(f' Result: {result}')
|
||||
return result
|
||||
|
||||
def save_repo_to_text(path: str = '.', output_dir: Optional[str] = None, to_stdout: bool = False, cli_ignore_patterns: Optional[list] = None) -> str:
|
||||
"""Save repository structure and contents to a text file.
|
||||
|
||||
Args:
|
||||
path: Repository path
|
||||
output_dir: Directory to save output file
|
||||
to_stdout: Whether to output to stdout instead of file
|
||||
cli_ignore_patterns: List of patterns from command line
|
||||
|
||||
Returns:
|
||||
str: Path to the output file or the output text if to_stdout is True
|
||||
"""
|
||||
logging.debug(f'Starting to save repo structure to text for path: {path}')
|
||||
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(path, cli_ignore_patterns)
|
||||
tree_structure = get_tree_structure(path, gitignore_spec, tree_and_content_ignore_spec)
|
||||
tree_structure = remove_empty_dirs(tree_structure, path)
|
||||
logging.debug(f'Final tree structure to be written: {tree_structure}')
|
||||
|
||||
timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-UTC')
|
||||
output_file = f'repo-to-text_{timestamp}.txt'
|
||||
|
||||
if output_dir:
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
output_file = os.path.join(output_dir, output_file)
|
||||
|
||||
output_content = []
|
||||
project_name = os.path.basename(os.path.abspath(path))
|
||||
output_content.append(f'Directory: {project_name}\n\n')
|
||||
output_content.append('Directory Structure:\n')
|
||||
output_content.append('```\n.\n')
|
||||
|
||||
if os.path.exists(os.path.join(path, '.gitignore')):
|
||||
output_content.append('├── .gitignore\n')
|
||||
|
||||
output_content.append(tree_structure + '\n' + '```\n')
|
||||
logging.debug('Tree structure written to output content')
|
||||
|
||||
for root, _, files in os.walk(path):
|
||||
for filename in files:
|
||||
file_path = os.path.join(root, filename)
|
||||
relative_path = os.path.relpath(file_path, path)
|
||||
|
||||
if should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec):
|
||||
continue
|
||||
|
||||
relative_path = relative_path.replace('./', '', 1)
|
||||
|
||||
output_content.append(f'\nContents of {relative_path}:\n')
|
||||
output_content.append('```\n')
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
output_content.append(f.read())
|
||||
except UnicodeDecodeError:
|
||||
logging.debug(f'Could not decode file contents: {file_path}')
|
||||
output_content.append('[Could not decode file contents]\n')
|
||||
output_content.append('\n```\n')
|
||||
|
||||
output_content.append('\n')
|
||||
logging.debug('Repository contents written to output content')
|
||||
|
||||
output_text = ''.join(output_content)
|
||||
|
||||
if to_stdout:
|
||||
print(output_text)
|
||||
return output_text
|
||||
|
||||
with open(output_file, 'w') as file:
|
||||
file.write(output_text)
|
||||
|
||||
try:
|
||||
import importlib.util
|
||||
if importlib.util.find_spec("pyperclip"):
|
||||
import pyperclip # type: ignore
|
||||
pyperclip.copy(output_text)
|
||||
logging.debug('Repository structure and contents copied to clipboard')
|
||||
else:
|
||||
print("Tip: Install 'pyperclip' package to enable automatic clipboard copying:")
|
||||
print(" pip install pyperclip")
|
||||
except Exception as e:
|
||||
logging.warning('Could not copy to clipboard. You might be running this script over SSH or without clipboard support.')
|
||||
logging.debug(f'Clipboard copy error: {e}')
|
||||
|
||||
print(f"[SUCCESS] Repository structure and contents successfully saved to file: \"./{output_file}\"")
|
||||
|
||||
return output_file
|
||||
3
repo_to_text/utils/__init__.py
Normal file
3
repo_to_text/utils/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
from .utils import setup_logging, check_tree_command, is_ignored_path, remove_empty_dirs
|
||||
|
||||
__all__ = ['setup_logging', 'check_tree_command', 'is_ignored_path', 'remove_empty_dirs']
|
||||
82
repo_to_text/utils/utils.py
Normal file
82
repo_to_text/utils/utils.py
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
import os
|
||||
import shutil
|
||||
import logging
|
||||
from typing import List
|
||||
|
||||
def setup_logging(debug: bool = False) -> None:
|
||||
"""Set up logging configuration.
|
||||
|
||||
Args:
|
||||
debug: If True, sets logging level to DEBUG, otherwise INFO
|
||||
"""
|
||||
logging_level = logging.DEBUG if debug else logging.INFO
|
||||
logging.basicConfig(level=logging_level, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
def check_tree_command() -> bool:
|
||||
"""Check if the `tree` command is available, and suggest installation if not.
|
||||
|
||||
Returns:
|
||||
bool: True if tree command is available, False otherwise
|
||||
"""
|
||||
if shutil.which('tree') is None:
|
||||
print("The 'tree' command is not found. Please install it using one of the following commands:")
|
||||
print("For Debian-based systems (e.g., Ubuntu): sudo apt-get install tree")
|
||||
print("For Red Hat-based systems (e.g., Fedora, CentOS): sudo yum install tree")
|
||||
return False
|
||||
return True
|
||||
|
||||
def is_ignored_path(file_path: str) -> bool:
|
||||
"""Check if a file path should be ignored based on predefined rules.
|
||||
|
||||
Args:
|
||||
file_path: Path to check
|
||||
|
||||
Returns:
|
||||
bool: True if path should be ignored, False otherwise
|
||||
"""
|
||||
ignored_dirs: List[str] = ['.git']
|
||||
ignored_files_prefix: List[str] = ['repo-to-text_']
|
||||
is_ignored_dir = any(ignored in file_path for ignored in ignored_dirs)
|
||||
is_ignored_file = any(file_path.startswith(prefix) for prefix in ignored_files_prefix)
|
||||
result = is_ignored_dir or is_ignored_file
|
||||
if result:
|
||||
logging.debug(f'Path ignored: {file_path}')
|
||||
return result
|
||||
|
||||
def remove_empty_dirs(tree_output: str, path: str = '.') -> str:
|
||||
"""Remove empty directories from tree output.
|
||||
|
||||
Args:
|
||||
tree_output: Output from tree command
|
||||
path: Base path for the tree
|
||||
|
||||
Returns:
|
||||
str: Tree output with empty directories removed
|
||||
"""
|
||||
logging.debug('Removing empty directories from tree output')
|
||||
lines = tree_output.splitlines()
|
||||
non_empty_dirs = set()
|
||||
filtered_lines = []
|
||||
|
||||
for line in lines:
|
||||
parts = line.strip().split()
|
||||
if parts:
|
||||
full_path = parts[-1]
|
||||
if os.path.isdir(full_path) and not any(os.path.isfile(os.path.join(full_path, f)) for f in os.listdir(full_path)):
|
||||
logging.debug(f'Directory is empty and will be removed: {full_path}')
|
||||
continue
|
||||
non_empty_dirs.add(os.path.dirname(full_path))
|
||||
filtered_lines.append(line)
|
||||
|
||||
final_lines = []
|
||||
for line in filtered_lines:
|
||||
parts = line.strip().split()
|
||||
if parts:
|
||||
full_path = parts[-1]
|
||||
if os.path.isdir(full_path) and full_path not in non_empty_dirs:
|
||||
logging.debug(f'Directory is empty and will be removed: {full_path}')
|
||||
continue
|
||||
final_lines.append(line)
|
||||
|
||||
logging.debug('Empty directory removal complete')
|
||||
return '\n'.join(filtered_lines)
|
||||
Loading…
Add table
Add a link
Reference in a new issue