From 7921839f089e0185d62df5beb822d07cf1ba2fb6 Mon Sep 17 00:00:00 2001 From: Kirill Markin Date: Sun, 9 Jun 2024 09:38:27 +0200 Subject: [PATCH 1/4] ignore-content setting --- .repo-to-text-settings.yaml | 12 ++++++++++++ repo_to_text/main.py | 31 +++++++++++++++++++++++++------ requirements.txt | 1 + 3 files changed, 38 insertions(+), 6 deletions(-) create mode 100644 .repo-to-text-settings.yaml diff --git a/.repo-to-text-settings.yaml b/.repo-to-text-settings.yaml new file mode 100644 index 0000000..0b1d0e1 --- /dev/null +++ b/.repo-to-text-settings.yaml @@ -0,0 +1,12 @@ +# Details: https://github.com/kirill-markin/repo-to-text + +# Ignore files and directories for "Contents of ..." section +# Syntax: gitignore +ignore-content: + - ".repo-to-text-settings.yaml" + - "README.md" + - "LICENSE" + - "examples/" + - "tests/" + - "MANIFEST.in" + - "setup.py" diff --git a/repo_to_text/main.py b/repo_to_text/main.py index a275276..82295ef 100644 --- a/repo_to_text/main.py +++ b/repo_to_text/main.py @@ -3,6 +3,7 @@ import subprocess import pathspec import logging import argparse +import yaml from datetime import datetime import pyperclip @@ -33,14 +34,32 @@ def get_tree_structure(path='.', gitignore_spec=None) -> str: logging.debug('Tree structure filtering complete') return '\n'.join(filtered_lines) -def load_gitignore(path='.'): +def load_ignore_specs(path='.'): + gitignore_spec = None + content_ignore_spec = None + gitignore_path = os.path.join(path, '.gitignore') if os.path.exists(gitignore_path): logging.debug(f'Loading .gitignore from path: {gitignore_path}') with open(gitignore_path, 'r') as f: - return pathspec.PathSpec.from_lines('gitwildmatch', f) - logging.debug('.gitignore not found') - return None + gitignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', f) + + repo_settings_path = os.path.join(path, '.repo-to-text-settings.yaml') + if os.path.exists(repo_settings_path): + logging.debug(f'Loading .repo-to-text-settings.yaml from path: {repo_settings_path}') + with open(repo_settings_path, 'r') as f: + ignore_data = yaml.safe_load(f) + if 'ignore-content' in ignore_data: + content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', ignore_data['ignore-content']) + + return gitignore_spec, content_ignore_spec + +def should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec): + return ( + is_ignored_path(file_path) or + (gitignore_spec and gitignore_spec.match_file(relative_path)) or + (content_ignore_spec and content_ignore_spec.match_file(relative_path)) + ) def is_ignored_path(file_path: str) -> bool: ignored_dirs = ['.git'] @@ -83,7 +102,7 @@ def remove_empty_dirs(tree_output: str, path='.') -> str: def save_repo_to_text(path='.', output_dir=None) -> str: logging.debug(f'Starting to save repo structure to text for path: {path}') - gitignore_spec = load_gitignore(path) + gitignore_spec, content_ignore_spec = load_ignore_specs(path) tree_structure = get_tree_structure(path, gitignore_spec) tree_structure = remove_empty_dirs(tree_structure, path) @@ -115,7 +134,7 @@ def save_repo_to_text(path='.', output_dir=None) -> str: file_path = os.path.join(root, filename) relative_path = os.path.relpath(file_path, path) - if is_ignored_path(file_path) or (gitignore_spec and gitignore_spec.match_file(relative_path)): + if should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec): continue relative_path = relative_path.replace('./', '', 1) diff --git a/requirements.txt b/requirements.txt index fcd830c..1d9e36e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ pathspec==0.12.1 pytest==8.2.2 argparse==1.4.0 pyperclip==1.8.2 +PyYAML==6.0.1 From b6bcdeca03538f432273dde17abf623617640695 Mon Sep 17 00:00:00 2001 From: Kirill Markin Date: Sun, 9 Jun 2024 09:46:54 +0200 Subject: [PATCH 2/4] ignore-tree-and-content setting --- .repo-to-text-settings.yaml | 16 ++++++++++------ repo_to_text/main.py | 27 ++++++++++++++++----------- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/.repo-to-text-settings.yaml b/.repo-to-text-settings.yaml index 0b1d0e1..8b7789f 100644 --- a/.repo-to-text-settings.yaml +++ b/.repo-to-text-settings.yaml @@ -1,12 +1,16 @@ # Details: https://github.com/kirill-markin/repo-to-text +# Syntax: gitignore rules -# Ignore files and directories for "Contents of ..." section -# Syntax: gitignore -ignore-content: +# Ignore files and directories for tree +# and "Contents of ..." sections +ignore-tree-and-content: - ".repo-to-text-settings.yaml" - - "README.md" - - "LICENSE" - "examples/" - - "tests/" - "MANIFEST.in" - "setup.py" + +# Ignore files and directories for "Contents of ..." section +ignore-content: + - "README.md" + - "LICENSE" + - "tests/" diff --git a/repo_to_text/main.py b/repo_to_text/main.py index 82295ef..68211b8 100644 --- a/repo_to_text/main.py +++ b/repo_to_text/main.py @@ -11,24 +11,24 @@ def setup_logging(debug=False): logging_level = logging.DEBUG if debug else logging.INFO logging.basicConfig(level=logging_level, format='%(asctime)s - %(levelname)s - %(message)s') -def get_tree_structure(path='.', gitignore_spec=None) -> str: +def get_tree_structure(path='.', gitignore_spec=None, tree_and_content_ignore_spec=None) -> str: logging.debug(f'Generating tree structure for path: {path}') result = subprocess.run(['tree', '-a', '-f', '--noreport', path], stdout=subprocess.PIPE) tree_output = result.stdout.decode('utf-8') logging.debug(f'Tree output generated: {tree_output}') - if not gitignore_spec: - logging.debug('No .gitignore specification found') + if not gitignore_spec and not tree_and_content_ignore_spec: + logging.debug('No .gitignore or ignore-tree-and-content specification found') return tree_output - logging.debug('Filtering tree output based on .gitignore specification') + logging.debug('Filtering tree output based on .gitignore and ignore-tree-and-content specification') filtered_lines = [] for line in tree_output.splitlines(): parts = line.strip().split() if parts: full_path = parts[-1] relative_path = os.path.relpath(full_path, path) - if not gitignore_spec.match_file(relative_path) and not is_ignored_path(relative_path): + if not should_ignore_file(full_path, relative_path, gitignore_spec, None, tree_and_content_ignore_spec): filtered_lines.append(line.replace('./', '', 1)) logging.debug('Tree structure filtering complete') @@ -37,6 +37,7 @@ def get_tree_structure(path='.', gitignore_spec=None) -> str: def load_ignore_specs(path='.'): gitignore_spec = None content_ignore_spec = None + tree_and_content_ignore_spec = None gitignore_path = os.path.join(path, '.gitignore') if os.path.exists(gitignore_path): @@ -51,14 +52,18 @@ def load_ignore_specs(path='.'): ignore_data = yaml.safe_load(f) if 'ignore-content' in ignore_data: content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', ignore_data['ignore-content']) + if 'ignore-tree-and-content' in ignore_data: + tree_and_content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', ignore_data['ignore-tree-and-content']) - return gitignore_spec, content_ignore_spec + return gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec -def should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec): + +def should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec): return ( is_ignored_path(file_path) or (gitignore_spec and gitignore_spec.match_file(relative_path)) or - (content_ignore_spec and content_ignore_spec.match_file(relative_path)) + (content_ignore_spec and content_ignore_spec.match_file(relative_path)) or + (tree_and_content_ignore_spec and tree_and_content_ignore_spec.match_file(relative_path)) ) def is_ignored_path(file_path: str) -> bool: @@ -102,8 +107,8 @@ def remove_empty_dirs(tree_output: str, path='.') -> str: def save_repo_to_text(path='.', output_dir=None) -> str: logging.debug(f'Starting to save repo structure to text for path: {path}') - gitignore_spec, content_ignore_spec = load_ignore_specs(path) - tree_structure = get_tree_structure(path, gitignore_spec) + gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(path) + tree_structure = get_tree_structure(path, gitignore_spec, tree_and_content_ignore_spec) tree_structure = remove_empty_dirs(tree_structure, path) # Add timestamp to the output file name with a descriptive name @@ -134,7 +139,7 @@ def save_repo_to_text(path='.', output_dir=None) -> str: file_path = os.path.join(root, filename) relative_path = os.path.relpath(file_path, path) - if should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec): + if should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec): continue relative_path = relative_path.replace('./', '', 1) From 72ac64ceb6f55da74c1e3f42782cadbd7b4de877 Mon Sep 17 00:00:00 2001 From: Kirill Markin Date: Sun, 9 Jun 2024 09:53:27 +0200 Subject: [PATCH 3/4] gitignore-import-and-ignore setting --- .repo-to-text-settings.yaml | 4 ++++ repo_to_text/main.py | 26 ++++++++++++++------------ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/.repo-to-text-settings.yaml b/.repo-to-text-settings.yaml index 8b7789f..889bb03 100644 --- a/.repo-to-text-settings.yaml +++ b/.repo-to-text-settings.yaml @@ -1,6 +1,10 @@ # Details: https://github.com/kirill-markin/repo-to-text # Syntax: gitignore rules +# Ignore files and directories for all sections from gitignore file +# Default: True +gitignore-import-and-ignore: True + # Ignore files and directories for tree # and "Contents of ..." sections ignore-tree-and-content: diff --git a/repo_to_text/main.py b/repo_to_text/main.py index 68211b8..faeb128 100644 --- a/repo_to_text/main.py +++ b/repo_to_text/main.py @@ -38,26 +38,28 @@ def load_ignore_specs(path='.'): gitignore_spec = None content_ignore_spec = None tree_and_content_ignore_spec = None - - gitignore_path = os.path.join(path, '.gitignore') - if os.path.exists(gitignore_path): - logging.debug(f'Loading .gitignore from path: {gitignore_path}') - with open(gitignore_path, 'r') as f: - gitignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', f) + use_gitignore = True repo_settings_path = os.path.join(path, '.repo-to-text-settings.yaml') if os.path.exists(repo_settings_path): logging.debug(f'Loading .repo-to-text-settings.yaml from path: {repo_settings_path}') with open(repo_settings_path, 'r') as f: - ignore_data = yaml.safe_load(f) - if 'ignore-content' in ignore_data: - content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', ignore_data['ignore-content']) - if 'ignore-tree-and-content' in ignore_data: - tree_and_content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', ignore_data['ignore-tree-and-content']) + settings = yaml.safe_load(f) + use_gitignore = settings.get('gitignore-import-and-ignore', True) + if 'ignore-content' in settings: + content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', settings['ignore-content']) + if 'ignore-tree-and-content' in settings: + tree_and_content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', settings['ignore-tree-and-content']) + + if use_gitignore: + gitignore_path = os.path.join(path, '.gitignore') + if os.path.exists(gitignore_path): + logging.debug(f'Loading .gitignore from path: {gitignore_path}') + with open(gitignore_path, 'r') as f: + gitignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', f) return gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec - def should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec): return ( is_ignored_path(file_path) or From bdc2f2be42220876bd5e0eea3938d01f52dfb921 Mon Sep 17 00:00:00 2001 From: Kirill Markin Date: Sun, 9 Jun 2024 10:09:49 +0200 Subject: [PATCH 4/4] filenames cleanup --- .gitignore | 4 +- README.md | 4 +- ...e_repo-to-text_2024-06-09-08-06-31-UTC.txt | 225 ++++++++++ ..._repo_snapshot_2024-06-08-11-35-28-UTC.txt | 420 ------------------ repo_to_text/main.py | 7 +- tests/test_main.py | 4 +- 6 files changed, 235 insertions(+), 429 deletions(-) create mode 100644 examples/example_repo-to-text_2024-06-09-08-06-31-UTC.txt delete mode 100644 examples/example_repo_snapshot_2024-06-08-11-35-28-UTC.txt diff --git a/.gitignore b/.gitignore index f7150e2..a57110f 100644 --- a/.gitignore +++ b/.gitignore @@ -168,5 +168,5 @@ cython_debug/ # Ignore egg-info directory repo_to_text.egg-info/ -# Ignore generated repo_snapshot_*.txt files -repo_snapshot_*.txt +# Ignore generated repo-to-text_*.txt files +repo-to-text_*.txt diff --git a/README.md b/README.md index cd76e68..c548b08 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ ![Example Output](https://raw.githubusercontent.com/kirill-markin/repo-to-text/main/examples/screenshot-demo.jpg) -The generated text file will include the directory structure and contents of each file. For a full example, see the [example output for this repository](https://github.com/kirill-markin/repo-to-text/blob/main/examples/example_repo_snapshot_2024-06-08-11-35-28-UTC.txt). +The generated text file will include the directory structure and contents of each file. For a full example, see the [example output for this repository](https://github.com/kirill-markin/repo-to-text/blob/main/examples/example_repo-to-text_2024-06-09-08-06-31-UTC.txt). The same text will appear in your clipboard. You can paste it into a dialog with the LLM and start communicating. @@ -36,7 +36,7 @@ After installation, you can use the `repo-to-text` command in your terminal. Nav repo-to-text ``` -This will create a file named `repo_snapshot_YYYY-MM-DD-HH-MM-SS-UTC.txt` in the current directory with the text representation of the repository. The contents of this file will also be copied to your clipboard for easy sharing. +This will create a file named `repo-to-text_YYYY-MM-DD-HH-MM-SS-UTC.txt` in the current directory with the text representation of the repository. The contents of this file will also be copied to your clipboard for easy sharing. ### Options diff --git a/examples/example_repo-to-text_2024-06-09-08-06-31-UTC.txt b/examples/example_repo-to-text_2024-06-09-08-06-31-UTC.txt new file mode 100644 index 0000000..eb74a03 --- /dev/null +++ b/examples/example_repo-to-text_2024-06-09-08-06-31-UTC.txt @@ -0,0 +1,225 @@ +Directory: repo-to-text + +Directory Structure: +``` +. +├── .gitignore +├── LICENSE +├── README.md +├── repo_to_text +│   ├── repo_to_text/__init__.py +│   └── repo_to_text/main.py +├── requirements.txt +└── tests + ├── tests/__init__.py + └── tests/test_main.py +``` + +Contents of requirements.txt: +``` +setuptools==70.0.0 +pathspec==0.12.1 +pytest==8.2.2 +argparse==1.4.0 +pyperclip==1.8.2 +PyYAML==6.0.1 + +``` + +Contents of repo_to_text/__init__.py: +``` +__author__ = 'Kirill Markin' +__email__ = 'markinkirill@gmail.com' + +``` + +Contents of repo_to_text/main.py: +``` +import os +import subprocess +import pathspec +import logging +import argparse +import yaml +from datetime import datetime +import pyperclip + +def setup_logging(debug=False): + logging_level = logging.DEBUG if debug else logging.INFO + logging.basicConfig(level=logging_level, format='%(asctime)s - %(levelname)s - %(message)s') + +def get_tree_structure(path='.', gitignore_spec=None, tree_and_content_ignore_spec=None) -> str: + logging.debug(f'Generating tree structure for path: {path}') + result = subprocess.run(['tree', '-a', '-f', '--noreport', path], stdout=subprocess.PIPE) + tree_output = result.stdout.decode('utf-8') + logging.debug(f'Tree output generated: {tree_output}') + + if not gitignore_spec and not tree_and_content_ignore_spec: + logging.debug('No .gitignore or ignore-tree-and-content specification found') + return tree_output + + logging.debug('Filtering tree output based on .gitignore and ignore-tree-and-content specification') + filtered_lines = [] + for line in tree_output.splitlines(): + parts = line.strip().split() + if parts: + full_path = parts[-1] + relative_path = os.path.relpath(full_path, path) + if not should_ignore_file(full_path, relative_path, gitignore_spec, None, tree_and_content_ignore_spec): + filtered_lines.append(line.replace('./', '', 1)) + + logging.debug('Tree structure filtering complete') + return '\n'.join(filtered_lines) + +def load_ignore_specs(path='.'): + gitignore_spec = None + content_ignore_spec = None + tree_and_content_ignore_spec = None + use_gitignore = True + + repo_settings_path = os.path.join(path, '.repo-to-text-settings.yaml') + if os.path.exists(repo_settings_path): + logging.debug(f'Loading .repo-to-text-settings.yaml from path: {repo_settings_path}') + with open(repo_settings_path, 'r') as f: + settings = yaml.safe_load(f) + use_gitignore = settings.get('gitignore-import-and-ignore', True) + if 'ignore-content' in settings: + content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', settings['ignore-content']) + if 'ignore-tree-and-content' in settings: + tree_and_content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', settings['ignore-tree-and-content']) + + if use_gitignore: + gitignore_path = os.path.join(path, '.gitignore') + if os.path.exists(gitignore_path): + logging.debug(f'Loading .gitignore from path: {gitignore_path}') + with open(gitignore_path, 'r') as f: + gitignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', f) + + return gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec + +def should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec): + return ( + is_ignored_path(file_path) or + (gitignore_spec and gitignore_spec.match_file(relative_path)) or + (content_ignore_spec and content_ignore_spec.match_file(relative_path)) or + (tree_and_content_ignore_spec and tree_and_content_ignore_spec.match_file(relative_path)) or + os.path.basename(file_path).startswith('repo-to-text_') + ) + +def is_ignored_path(file_path: str) -> bool: + ignored_dirs = ['.git'] + ignored_files_prefix = ['repo-to-text_'] + is_ignored_dir = any(ignored in file_path for ignored in ignored_dirs) + is_ignored_file = any(file_path.startswith(prefix) for prefix in ignored_files_prefix) + result = is_ignored_dir or is_ignored_file + if result: + logging.debug(f'Path ignored: {file_path}') + return result + +def remove_empty_dirs(tree_output: str, path='.') -> str: + logging.debug('Removing empty directories from tree output') + lines = tree_output.splitlines() + non_empty_dirs = set() + filtered_lines = [] + + for line in lines: + parts = line.strip().split() + if parts: + full_path = parts[-1] + if os.path.isdir(full_path) and not any(os.path.isfile(os.path.join(full_path, f)) for f in os.listdir(full_path)): + logging.debug(f'Directory is empty and will be removed: {full_path}') + continue + non_empty_dirs.add(os.path.dirname(full_path)) + filtered_lines.append(line) + + final_lines = [] + for line in filtered_lines: + parts = line.strip().split() + if parts: + full_path = parts[-1] + if os.path.isdir(full_path) and full_path not in non_empty_dirs: + logging.debug(f'Directory is empty and will be removed: {full_path}') + continue + final_lines.append(line) + + logging.debug('Empty directory removal complete') + return '\n'.join(final_lines) + +def save_repo_to_text(path='.', output_dir=None) -> str: + logging.debug(f'Starting to save repo structure to text for path: {path}') + gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(path) + tree_structure = get_tree_structure(path, gitignore_spec, tree_and_content_ignore_spec) + tree_structure = remove_empty_dirs(tree_structure, path) + + # Add timestamp to the output file name with a descriptive name + timestamp = datetime.utcnow().strftime('%Y-%m-%d-%H-%M-%S-UTC') + output_file = f'repo-to-text_{timestamp}.txt' + + # Determine the full path to the output file + if output_dir: + if not os.path.exists(output_dir): + os.makedirs(output_dir) + output_file = os.path.join(output_dir, output_file) + + with open(output_file, 'w') as file: + project_name = os.path.basename(os.path.abspath(path)) + file.write(f'Directory: {project_name}\n\n') + file.write('Directory Structure:\n') + file.write('```\n.\n') + + # Insert .gitignore if it exists + if os.path.exists(os.path.join(path, '.gitignore')): + file.write('├── .gitignore\n') + + file.write(tree_structure + '\n' + '```\n') + logging.debug('Tree structure written to file') + + for root, _, files in os.walk(path): + for filename in files: + file_path = os.path.join(root, filename) + relative_path = os.path.relpath(file_path, path) + + if should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec): + continue + + relative_path = relative_path.replace('./', '', 1) + + file.write(f'\nContents of {relative_path}:\n') + file.write('```\n') + try: + with open(file_path, 'r', encoding='utf-8') as f: + file.write(f.read()) + except UnicodeDecodeError: + logging.debug(f'Could not decode file contents: {file_path}') + file.write('[Could not decode file contents]\n') + file.write('\n```\n') + + file.write('\n') + logging.debug('Repository contents written to file') + + # Read the contents of the generated file + with open(output_file, 'r') as file: + repo_text = file.read() + + # Copy the contents to the clipboard + pyperclip.copy(repo_text) + logging.debug('Repository structure and contents copied to clipboard') + + return output_file + +def main(): + parser = argparse.ArgumentParser(description='Convert repository structure and contents to text') + parser.add_argument('--debug', action='store_true', help='Enable debug logging') + parser.add_argument('--output-dir', type=str, help='Directory to save the output file') + args = parser.parse_args() + + setup_logging(debug=args.debug) + logging.debug('repo-to-text script started') + save_repo_to_text(output_dir=args.output_dir) + logging.debug('repo-to-text script finished') + +if __name__ == '__main__': + main() + +``` + diff --git a/examples/example_repo_snapshot_2024-06-08-11-35-28-UTC.txt b/examples/example_repo_snapshot_2024-06-08-11-35-28-UTC.txt deleted file mode 100644 index 5871c29..0000000 --- a/examples/example_repo_snapshot_2024-06-08-11-35-28-UTC.txt +++ /dev/null @@ -1,420 +0,0 @@ -Directory: repo-to-text - -Directory Structure: -``` -. -├── .gitignore -├── LICENSE -├── MANIFEST.in -├── README.md -├── repo_to_text -│   ├── repo_to_text/__init__.py -│   └── repo_to_text/main.py -├── requirements.txt -├── setup.py -└── tests - ├── tests/__init__.py - └── tests/test_main.py -``` - -Contents of LICENSE: -``` -MIT License - -Copyright (c) 2024 Kirill Markin - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - -``` - -Contents of requirements.txt: -``` -setuptools==70.0.0 -pathspec==0.12.1 -pytest==8.2.2 -argparse==1.4.0 -pyperclip==1.8.2 - -``` - -Contents of MANIFEST.in: -``` -include README.md -include LICENSE -include requirements.txt - -``` - -Contents of README.md: -``` -# repo-to-text - -`repo-to-text` is an open-source project that converts the structure and contents of a directory (repository) into a single text file. By executing a simple command in the terminal, this tool generates a text representation of the directory, including the output of the `tree` command and the contents of each file, formatted for easy reading and sharing. - -## Features - -- Generates a text representation of a directory's structure. -- Includes the output of the `tree` command. -- Saves the contents of each file, encapsulated in markdown code blocks. -- Copies the generated text representation to the clipboard for easy sharing. -- Easy to install and use via `pip` and Homebrew. - -## Installation - -### Using pip - -To install `repo-to-text` via pip, run the following command: - -```bash -pip install repo-to-text -``` - -## Usage - -After installation, you can use the `repo-to-text` command in your terminal. Navigate to the directory you want to convert and run: - -```bash -repo-to-text -``` - -This will create a file named `repo_snapshot_YYYY-MM-DD-HH-MM-SS-UTC.txt` in the current directory with the text representation of the repository. The contents of this file will also be copied to your clipboard for easy sharing. - -### Options - -You can customize the behavior of `repo-to-text` with the following options: - -- `--output-dir `: Specify an output directory where the generated text file will be saved. For example: - - ```bash - repo-to-text --output-dir /path/to/output - ``` - - This will save the file in the specified output directory instead of the current directory. - -- `--debug`: Enable DEBUG logging. By default, `repo-to-text` runs with INFO logging level. To enable DEBUG logging, use the `--debug` flag: - - ```bash - repo-to-text --debug - ``` - -## Example Output - -The generated text file will include the directory structure and contents of each file. For a full example, see the [example output for this repository](https://github.com/kirill-markin/repo-to-text/blob/db89dbfc9cfa3a8eb29dd14763bc477619a3cea4/examples/example_repo_snapshot_2024-06-08-10-30-33-UTC.txt). - -## Install Locally - -To install `repo-to-text` locally for development, follow these steps: - -1. Clone the repository: - - ```bash - git clone https://github.com/kirill-markin/repo-to-text - cd repo-to-text - ``` - -2. Install the package locally: - - ```bash - pip install -e . - ``` - -### Installing Dependencies - -To install all the required dependencies, run the following command: - -```bash -pip install -r requirements.txt -``` - -### Running Tests - -To run the tests, use the following command: - -```bash -pytest -``` - -## Uninstall - -To uninstall the package, run the following command from the directory where the repository is located: - -```bash -pip uninstall repo-to-text -``` - -## Contributing - -Contributions are welcome! If you have any suggestions or find a bug, please open an issue or submit a pull request. - -## License - -This project is licensed under the MIT License - see the [LICENSE](https://github.com/kirill-markin/repo-to-text/blob/main/LICENSE) file for details. - -## Contact - -This project is maintained by [Kirill Markin](https://github.com/kirill-markin). For any inquiries or feedback, please contact [markinkirill@gmail.com](mailto:markinkirill@gmail.com). - -``` - -Contents of setup.py: -``` -from setuptools import setup, find_packages - -with open('requirements.txt') as f: - required = f.read().splitlines() - -setup( - name='repo-to-text', - version='0.1.1', - author='Kirill Markin', - author_email='markinkirill@gmail.com', - description='Convert a directory structure and its contents into a single text file, including the tree output and file contents in markdown code blocks.', - long_description=open('README.md').read(), - long_description_content_type='text/markdown', - url='https://github.com/kirill-markin/repo-to-text', - license='MIT', - packages=find_packages(), - install_requires=required, - include_package_data=True, - entry_points={ - 'console_scripts': [ - 'repo-to-text=repo_to_text.main:main', - ], - }, - classifiers=[ - 'Programming Language :: Python :: 3', - 'License :: OSI Approved :: MIT License', - 'Operating System :: OS Independent', - ], - python_requires='>=3.6', -) - -``` - -Contents of tests/__init__.py: -``` - -``` - -Contents of tests/test_main.py: -``` -import os -import subprocess -import pytest -import time - -def test_repo_to_text(): - # Remove any existing snapshot files to avoid conflicts - for file in os.listdir('.'): - if file.startswith('repo_snapshot_') and file.endswith('.txt'): - os.remove(file) - - # Run the repo-to-text command - result = subprocess.run(['repo-to-text'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) - - # Assert that the command ran without errors - assert result.returncode == 0, f"Command failed with error: {result.stderr.decode('utf-8')}" - - # Check for the existence of the new snapshot file - snapshot_files = [f for f in os.listdir('.') if f.startswith('repo_snapshot_') and f.endswith('.txt')] - assert len(snapshot_files) == 1, "No snapshot file created or multiple files created" - - # Verify that the snapshot file is not empty - with open(snapshot_files[0], 'r') as f: - content = f.read() - assert len(content) > 0, "Snapshot file is empty" - - # Clean up the generated snapshot file - os.remove(snapshot_files[0]) - -if __name__ == "__main__": - pytest.main() - -``` - -Contents of repo_to_text/__init__.py: -``` -__author__ = 'Kirill Markin' -__email__ = 'markinkirill@gmail.com' - -``` - -Contents of repo_to_text/main.py: -``` -import os -import subprocess -import pathspec -import logging -import argparse -from datetime import datetime -import pyperclip - -def setup_logging(debug=False): - logging_level = logging.DEBUG if debug else logging.INFO - logging.basicConfig(level=logging_level, format='%(asctime)s - %(levelname)s - %(message)s') - -def get_tree_structure(path='.', gitignore_spec=None) -> str: - logging.debug(f'Generating tree structure for path: {path}') - result = subprocess.run(['tree', '-a', '-f', '--noreport', path], stdout=subprocess.PIPE) - tree_output = result.stdout.decode('utf-8') - logging.debug(f'Tree output generated: {tree_output}') - - if not gitignore_spec: - logging.debug('No .gitignore specification found') - return tree_output - - logging.debug('Filtering tree output based on .gitignore specification') - filtered_lines = [] - for line in tree_output.splitlines(): - parts = line.strip().split() - if parts: - full_path = parts[-1] - relative_path = os.path.relpath(full_path, path) - if not gitignore_spec.match_file(relative_path) and not is_ignored_path(relative_path): - filtered_lines.append(line.replace('./', '', 1)) - - logging.debug('Tree structure filtering complete') - return '\n'.join(filtered_lines) - -def load_gitignore(path='.'): - gitignore_path = os.path.join(path, '.gitignore') - if os.path.exists(gitignore_path): - logging.debug(f'Loading .gitignore from path: {gitignore_path}') - with open(gitignore_path, 'r') as f: - return pathspec.PathSpec.from_lines('gitwildmatch', f) - logging.debug('.gitignore not found') - return None - -def is_ignored_path(file_path: str) -> bool: - ignored_dirs = ['.git'] - ignored_files_prefix = ['repo_snapshot_'] - is_ignored_dir = any(ignored in file_path for ignored in ignored_dirs) - is_ignored_file = any(file_path.startswith(prefix) for prefix in ignored_files_prefix) - result = is_ignored_dir or is_ignored_file - if result: - logging.debug(f'Path ignored: {file_path}') - return result - -def remove_empty_dirs(tree_output: str, path='.') -> str: - logging.debug('Removing empty directories from tree output') - lines = tree_output.splitlines() - non_empty_dirs = set() - filtered_lines = [] - - for line in lines: - parts = line.strip().split() - if parts: - full_path = parts[-1] - if os.path.isdir(full_path) and not any(os.path.isfile(os.path.join(full_path, f)) for f in os.listdir(full_path)): - logging.debug(f'Directory is empty and will be removed: {full_path}') - continue - non_empty_dirs.add(os.path.dirname(full_path)) - filtered_lines.append(line) - - final_lines = [] - for line in filtered_lines: - parts = line.strip().split() - if parts: - full_path = parts[-1] - if os.path.isdir(full_path) and full_path not in non_empty_dirs: - logging.debug(f'Directory is empty and will be removed: {full_path}') - continue - final_lines.append(line) - - logging.debug('Empty directory removal complete') - return '\n'.join(final_lines) - -def save_repo_to_text(path='.', output_dir=None) -> str: - logging.debug(f'Starting to save repo structure to text for path: {path}') - gitignore_spec = load_gitignore(path) - tree_structure = get_tree_structure(path, gitignore_spec) - tree_structure = remove_empty_dirs(tree_structure, path) - - # Add timestamp to the output file name with a descriptive name - timestamp = datetime.utcnow().strftime('%Y-%m-%d-%H-%M-%S-UTC') - output_file = f'repo_snapshot_{timestamp}.txt' - - # Determine the full path to the output file - if output_dir: - if not os.path.exists(output_dir): - os.makedirs(output_dir) - output_file = os.path.join(output_dir, output_file) - - with open(output_file, 'w') as file: - project_name = os.path.basename(os.path.abspath(path)) - file.write(f'Directory: {project_name}\n\n') - file.write('Directory Structure:\n') - file.write('```\n.\n') - - # Insert .gitignore if it exists - if os.path.exists(os.path.join(path, '.gitignore')): - file.write('├── .gitignore\n') - - file.write(tree_structure + '\n' + '```\n') - logging.debug('Tree structure written to file') - - for root, _, files in os.walk(path): - for filename in files: - file_path = os.path.join(root, filename) - relative_path = os.path.relpath(file_path, path) - - if is_ignored_path(file_path) or (gitignore_spec and gitignore_spec.match_file(relative_path)): - continue - - relative_path = relative_path.replace('./', '', 1) - - file.write(f'\nContents of {relative_path}:\n') - file.write('```\n') - try: - with open(file_path, 'r', encoding='utf-8') as f: - file.write(f.read()) - except UnicodeDecodeError: - logging.error(f'Could not decode file contents: {file_path}') - file.write('[Could not decode file contents]\n') - file.write('\n```\n') - - file.write('\n') - logging.debug('Repository contents written to file') - - # Read the contents of the generated file - with open(output_file, 'r') as file: - repo_text = file.read() - - # Copy the contents to the clipboard - pyperclip.copy(repo_text) - logging.debug('Repository structure and contents copied to clipboard') - - return output_file - -def main(): - parser = argparse.ArgumentParser(description='Convert repository structure and contents to text') - parser.add_argument('--debug', action='store_true', help='Enable debug logging') - parser.add_argument('--output-dir', type=str, help='Directory to save the output file') - args = parser.parse_args() - - setup_logging(debug=args.debug) - logging.debug('repo-to-text script started') - save_repo_to_text(output_dir=args.output_dir) - logging.debug('repo-to-text script finished') - -if __name__ == '__main__': - main() - -``` - diff --git a/repo_to_text/main.py b/repo_to_text/main.py index faeb128..19b63d2 100644 --- a/repo_to_text/main.py +++ b/repo_to_text/main.py @@ -65,12 +65,13 @@ def should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_ is_ignored_path(file_path) or (gitignore_spec and gitignore_spec.match_file(relative_path)) or (content_ignore_spec and content_ignore_spec.match_file(relative_path)) or - (tree_and_content_ignore_spec and tree_and_content_ignore_spec.match_file(relative_path)) + (tree_and_content_ignore_spec and tree_and_content_ignore_spec.match_file(relative_path)) or + os.path.basename(file_path).startswith('repo-to-text_') ) def is_ignored_path(file_path: str) -> bool: ignored_dirs = ['.git'] - ignored_files_prefix = ['repo_snapshot_'] + ignored_files_prefix = ['repo-to-text_'] is_ignored_dir = any(ignored in file_path for ignored in ignored_dirs) is_ignored_file = any(file_path.startswith(prefix) for prefix in ignored_files_prefix) result = is_ignored_dir or is_ignored_file @@ -115,7 +116,7 @@ def save_repo_to_text(path='.', output_dir=None) -> str: # Add timestamp to the output file name with a descriptive name timestamp = datetime.utcnow().strftime('%Y-%m-%d-%H-%M-%S-UTC') - output_file = f'repo_snapshot_{timestamp}.txt' + output_file = f'repo-to-text_{timestamp}.txt' # Determine the full path to the output file if output_dir: diff --git a/tests/test_main.py b/tests/test_main.py index b3f8274..b59da16 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -6,7 +6,7 @@ import time def test_repo_to_text(): # Remove any existing snapshot files to avoid conflicts for file in os.listdir('.'): - if file.startswith('repo_snapshot_') and file.endswith('.txt'): + if file.startswith('repo-to-text_') and file.endswith('.txt'): os.remove(file) # Run the repo-to-text command @@ -16,7 +16,7 @@ def test_repo_to_text(): assert result.returncode == 0, f"Command failed with error: {result.stderr.decode('utf-8')}" # Check for the existence of the new snapshot file - snapshot_files = [f for f in os.listdir('.') if f.startswith('repo_snapshot_') and f.endswith('.txt')] + snapshot_files = [f for f in os.listdir('.') if f.startswith('repo-to-text_') and f.endswith('.txt')] assert len(snapshot_files) == 1, "No snapshot file created or multiple files created" # Verify that the snapshot file is not empty