mirror of
https://github.com/kirill-markin/repo-to-text.git
synced 2025-12-05 19:12:24 -08:00
Merge pull request #1 from kirill-markin:feature/repo-to-text-settings
Feature/repo-to-text-settings
This commit is contained in:
commit
cf25357505
8 changed files with 297 additions and 444 deletions
4
.gitignore
vendored
4
.gitignore
vendored
|
|
@ -168,5 +168,5 @@ cython_debug/
|
|||
# Ignore egg-info directory
|
||||
repo_to_text.egg-info/
|
||||
|
||||
# Ignore generated repo_snapshot_*.txt files
|
||||
repo_snapshot_*.txt
|
||||
# Ignore generated repo-to-text_*.txt files
|
||||
repo-to-text_*.txt
|
||||
|
|
|
|||
20
.repo-to-text-settings.yaml
Normal file
20
.repo-to-text-settings.yaml
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
# Details: https://github.com/kirill-markin/repo-to-text
|
||||
# Syntax: gitignore rules
|
||||
|
||||
# Ignore files and directories for all sections from gitignore file
|
||||
# Default: True
|
||||
gitignore-import-and-ignore: True
|
||||
|
||||
# Ignore files and directories for tree
|
||||
# and "Contents of ..." sections
|
||||
ignore-tree-and-content:
|
||||
- ".repo-to-text-settings.yaml"
|
||||
- "examples/"
|
||||
- "MANIFEST.in"
|
||||
- "setup.py"
|
||||
|
||||
# Ignore files and directories for "Contents of ..." section
|
||||
ignore-content:
|
||||
- "README.md"
|
||||
- "LICENSE"
|
||||
- "tests/"
|
||||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||

|
||||
|
||||
The generated text file will include the directory structure and contents of each file. For a full example, see the [example output for this repository](https://github.com/kirill-markin/repo-to-text/blob/main/examples/example_repo_snapshot_2024-06-08-11-35-28-UTC.txt).
|
||||
The generated text file will include the directory structure and contents of each file. For a full example, see the [example output for this repository](https://github.com/kirill-markin/repo-to-text/blob/main/examples/example_repo-to-text_2024-06-09-08-06-31-UTC.txt).
|
||||
|
||||
The same text will appear in your clipboard. You can paste it into a dialog with the LLM and start communicating.
|
||||
|
||||
|
|
@ -36,7 +36,7 @@ After installation, you can use the `repo-to-text` command in your terminal. Nav
|
|||
repo-to-text
|
||||
```
|
||||
|
||||
This will create a file named `repo_snapshot_YYYY-MM-DD-HH-MM-SS-UTC.txt` in the current directory with the text representation of the repository. The contents of this file will also be copied to your clipboard for easy sharing.
|
||||
This will create a file named `repo-to-text_YYYY-MM-DD-HH-MM-SS-UTC.txt` in the current directory with the text representation of the repository. The contents of this file will also be copied to your clipboard for easy sharing.
|
||||
|
||||
### Options
|
||||
|
||||
|
|
|
|||
225
examples/example_repo-to-text_2024-06-09-08-06-31-UTC.txt
Normal file
225
examples/example_repo-to-text_2024-06-09-08-06-31-UTC.txt
Normal file
|
|
@ -0,0 +1,225 @@
|
|||
Directory: repo-to-text
|
||||
|
||||
Directory Structure:
|
||||
```
|
||||
.
|
||||
├── .gitignore
|
||||
├── LICENSE
|
||||
├── README.md
|
||||
├── repo_to_text
|
||||
│ ├── repo_to_text/__init__.py
|
||||
│ └── repo_to_text/main.py
|
||||
├── requirements.txt
|
||||
└── tests
|
||||
├── tests/__init__.py
|
||||
└── tests/test_main.py
|
||||
```
|
||||
|
||||
Contents of requirements.txt:
|
||||
```
|
||||
setuptools==70.0.0
|
||||
pathspec==0.12.1
|
||||
pytest==8.2.2
|
||||
argparse==1.4.0
|
||||
pyperclip==1.8.2
|
||||
PyYAML==6.0.1
|
||||
|
||||
```
|
||||
|
||||
Contents of repo_to_text/__init__.py:
|
||||
```
|
||||
__author__ = 'Kirill Markin'
|
||||
__email__ = 'markinkirill@gmail.com'
|
||||
|
||||
```
|
||||
|
||||
Contents of repo_to_text/main.py:
|
||||
```
|
||||
import os
|
||||
import subprocess
|
||||
import pathspec
|
||||
import logging
|
||||
import argparse
|
||||
import yaml
|
||||
from datetime import datetime
|
||||
import pyperclip
|
||||
|
||||
def setup_logging(debug=False):
|
||||
logging_level = logging.DEBUG if debug else logging.INFO
|
||||
logging.basicConfig(level=logging_level, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
def get_tree_structure(path='.', gitignore_spec=None, tree_and_content_ignore_spec=None) -> str:
|
||||
logging.debug(f'Generating tree structure for path: {path}')
|
||||
result = subprocess.run(['tree', '-a', '-f', '--noreport', path], stdout=subprocess.PIPE)
|
||||
tree_output = result.stdout.decode('utf-8')
|
||||
logging.debug(f'Tree output generated: {tree_output}')
|
||||
|
||||
if not gitignore_spec and not tree_and_content_ignore_spec:
|
||||
logging.debug('No .gitignore or ignore-tree-and-content specification found')
|
||||
return tree_output
|
||||
|
||||
logging.debug('Filtering tree output based on .gitignore and ignore-tree-and-content specification')
|
||||
filtered_lines = []
|
||||
for line in tree_output.splitlines():
|
||||
parts = line.strip().split()
|
||||
if parts:
|
||||
full_path = parts[-1]
|
||||
relative_path = os.path.relpath(full_path, path)
|
||||
if not should_ignore_file(full_path, relative_path, gitignore_spec, None, tree_and_content_ignore_spec):
|
||||
filtered_lines.append(line.replace('./', '', 1))
|
||||
|
||||
logging.debug('Tree structure filtering complete')
|
||||
return '\n'.join(filtered_lines)
|
||||
|
||||
def load_ignore_specs(path='.'):
|
||||
gitignore_spec = None
|
||||
content_ignore_spec = None
|
||||
tree_and_content_ignore_spec = None
|
||||
use_gitignore = True
|
||||
|
||||
repo_settings_path = os.path.join(path, '.repo-to-text-settings.yaml')
|
||||
if os.path.exists(repo_settings_path):
|
||||
logging.debug(f'Loading .repo-to-text-settings.yaml from path: {repo_settings_path}')
|
||||
with open(repo_settings_path, 'r') as f:
|
||||
settings = yaml.safe_load(f)
|
||||
use_gitignore = settings.get('gitignore-import-and-ignore', True)
|
||||
if 'ignore-content' in settings:
|
||||
content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', settings['ignore-content'])
|
||||
if 'ignore-tree-and-content' in settings:
|
||||
tree_and_content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', settings['ignore-tree-and-content'])
|
||||
|
||||
if use_gitignore:
|
||||
gitignore_path = os.path.join(path, '.gitignore')
|
||||
if os.path.exists(gitignore_path):
|
||||
logging.debug(f'Loading .gitignore from path: {gitignore_path}')
|
||||
with open(gitignore_path, 'r') as f:
|
||||
gitignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', f)
|
||||
|
||||
return gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec
|
||||
|
||||
def should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec):
|
||||
return (
|
||||
is_ignored_path(file_path) or
|
||||
(gitignore_spec and gitignore_spec.match_file(relative_path)) or
|
||||
(content_ignore_spec and content_ignore_spec.match_file(relative_path)) or
|
||||
(tree_and_content_ignore_spec and tree_and_content_ignore_spec.match_file(relative_path)) or
|
||||
os.path.basename(file_path).startswith('repo-to-text_')
|
||||
)
|
||||
|
||||
def is_ignored_path(file_path: str) -> bool:
|
||||
ignored_dirs = ['.git']
|
||||
ignored_files_prefix = ['repo-to-text_']
|
||||
is_ignored_dir = any(ignored in file_path for ignored in ignored_dirs)
|
||||
is_ignored_file = any(file_path.startswith(prefix) for prefix in ignored_files_prefix)
|
||||
result = is_ignored_dir or is_ignored_file
|
||||
if result:
|
||||
logging.debug(f'Path ignored: {file_path}')
|
||||
return result
|
||||
|
||||
def remove_empty_dirs(tree_output: str, path='.') -> str:
|
||||
logging.debug('Removing empty directories from tree output')
|
||||
lines = tree_output.splitlines()
|
||||
non_empty_dirs = set()
|
||||
filtered_lines = []
|
||||
|
||||
for line in lines:
|
||||
parts = line.strip().split()
|
||||
if parts:
|
||||
full_path = parts[-1]
|
||||
if os.path.isdir(full_path) and not any(os.path.isfile(os.path.join(full_path, f)) for f in os.listdir(full_path)):
|
||||
logging.debug(f'Directory is empty and will be removed: {full_path}')
|
||||
continue
|
||||
non_empty_dirs.add(os.path.dirname(full_path))
|
||||
filtered_lines.append(line)
|
||||
|
||||
final_lines = []
|
||||
for line in filtered_lines:
|
||||
parts = line.strip().split()
|
||||
if parts:
|
||||
full_path = parts[-1]
|
||||
if os.path.isdir(full_path) and full_path not in non_empty_dirs:
|
||||
logging.debug(f'Directory is empty and will be removed: {full_path}')
|
||||
continue
|
||||
final_lines.append(line)
|
||||
|
||||
logging.debug('Empty directory removal complete')
|
||||
return '\n'.join(final_lines)
|
||||
|
||||
def save_repo_to_text(path='.', output_dir=None) -> str:
|
||||
logging.debug(f'Starting to save repo structure to text for path: {path}')
|
||||
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(path)
|
||||
tree_structure = get_tree_structure(path, gitignore_spec, tree_and_content_ignore_spec)
|
||||
tree_structure = remove_empty_dirs(tree_structure, path)
|
||||
|
||||
# Add timestamp to the output file name with a descriptive name
|
||||
timestamp = datetime.utcnow().strftime('%Y-%m-%d-%H-%M-%S-UTC')
|
||||
output_file = f'repo-to-text_{timestamp}.txt'
|
||||
|
||||
# Determine the full path to the output file
|
||||
if output_dir:
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
output_file = os.path.join(output_dir, output_file)
|
||||
|
||||
with open(output_file, 'w') as file:
|
||||
project_name = os.path.basename(os.path.abspath(path))
|
||||
file.write(f'Directory: {project_name}\n\n')
|
||||
file.write('Directory Structure:\n')
|
||||
file.write('```\n.\n')
|
||||
|
||||
# Insert .gitignore if it exists
|
||||
if os.path.exists(os.path.join(path, '.gitignore')):
|
||||
file.write('├── .gitignore\n')
|
||||
|
||||
file.write(tree_structure + '\n' + '```\n')
|
||||
logging.debug('Tree structure written to file')
|
||||
|
||||
for root, _, files in os.walk(path):
|
||||
for filename in files:
|
||||
file_path = os.path.join(root, filename)
|
||||
relative_path = os.path.relpath(file_path, path)
|
||||
|
||||
if should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec):
|
||||
continue
|
||||
|
||||
relative_path = relative_path.replace('./', '', 1)
|
||||
|
||||
file.write(f'\nContents of {relative_path}:\n')
|
||||
file.write('```\n')
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
file.write(f.read())
|
||||
except UnicodeDecodeError:
|
||||
logging.debug(f'Could not decode file contents: {file_path}')
|
||||
file.write('[Could not decode file contents]\n')
|
||||
file.write('\n```\n')
|
||||
|
||||
file.write('\n')
|
||||
logging.debug('Repository contents written to file')
|
||||
|
||||
# Read the contents of the generated file
|
||||
with open(output_file, 'r') as file:
|
||||
repo_text = file.read()
|
||||
|
||||
# Copy the contents to the clipboard
|
||||
pyperclip.copy(repo_text)
|
||||
logging.debug('Repository structure and contents copied to clipboard')
|
||||
|
||||
return output_file
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Convert repository structure and contents to text')
|
||||
parser.add_argument('--debug', action='store_true', help='Enable debug logging')
|
||||
parser.add_argument('--output-dir', type=str, help='Directory to save the output file')
|
||||
args = parser.parse_args()
|
||||
|
||||
setup_logging(debug=args.debug)
|
||||
logging.debug('repo-to-text script started')
|
||||
save_repo_to_text(output_dir=args.output_dir)
|
||||
logging.debug('repo-to-text script finished')
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
```
|
||||
|
||||
|
|
@ -1,420 +0,0 @@
|
|||
Directory: repo-to-text
|
||||
|
||||
Directory Structure:
|
||||
```
|
||||
.
|
||||
├── .gitignore
|
||||
├── LICENSE
|
||||
├── MANIFEST.in
|
||||
├── README.md
|
||||
├── repo_to_text
|
||||
│ ├── repo_to_text/__init__.py
|
||||
│ └── repo_to_text/main.py
|
||||
├── requirements.txt
|
||||
├── setup.py
|
||||
└── tests
|
||||
├── tests/__init__.py
|
||||
└── tests/test_main.py
|
||||
```
|
||||
|
||||
Contents of LICENSE:
|
||||
```
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2024 Kirill Markin
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
```
|
||||
|
||||
Contents of requirements.txt:
|
||||
```
|
||||
setuptools==70.0.0
|
||||
pathspec==0.12.1
|
||||
pytest==8.2.2
|
||||
argparse==1.4.0
|
||||
pyperclip==1.8.2
|
||||
|
||||
```
|
||||
|
||||
Contents of MANIFEST.in:
|
||||
```
|
||||
include README.md
|
||||
include LICENSE
|
||||
include requirements.txt
|
||||
|
||||
```
|
||||
|
||||
Contents of README.md:
|
||||
```
|
||||
# repo-to-text
|
||||
|
||||
`repo-to-text` is an open-source project that converts the structure and contents of a directory (repository) into a single text file. By executing a simple command in the terminal, this tool generates a text representation of the directory, including the output of the `tree` command and the contents of each file, formatted for easy reading and sharing.
|
||||
|
||||
## Features
|
||||
|
||||
- Generates a text representation of a directory's structure.
|
||||
- Includes the output of the `tree` command.
|
||||
- Saves the contents of each file, encapsulated in markdown code blocks.
|
||||
- Copies the generated text representation to the clipboard for easy sharing.
|
||||
- Easy to install and use via `pip` and Homebrew.
|
||||
|
||||
## Installation
|
||||
|
||||
### Using pip
|
||||
|
||||
To install `repo-to-text` via pip, run the following command:
|
||||
|
||||
```bash
|
||||
pip install repo-to-text
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
After installation, you can use the `repo-to-text` command in your terminal. Navigate to the directory you want to convert and run:
|
||||
|
||||
```bash
|
||||
repo-to-text
|
||||
```
|
||||
|
||||
This will create a file named `repo_snapshot_YYYY-MM-DD-HH-MM-SS-UTC.txt` in the current directory with the text representation of the repository. The contents of this file will also be copied to your clipboard for easy sharing.
|
||||
|
||||
### Options
|
||||
|
||||
You can customize the behavior of `repo-to-text` with the following options:
|
||||
|
||||
- `--output-dir <path>`: Specify an output directory where the generated text file will be saved. For example:
|
||||
|
||||
```bash
|
||||
repo-to-text --output-dir /path/to/output
|
||||
```
|
||||
|
||||
This will save the file in the specified output directory instead of the current directory.
|
||||
|
||||
- `--debug`: Enable DEBUG logging. By default, `repo-to-text` runs with INFO logging level. To enable DEBUG logging, use the `--debug` flag:
|
||||
|
||||
```bash
|
||||
repo-to-text --debug
|
||||
```
|
||||
|
||||
## Example Output
|
||||
|
||||
The generated text file will include the directory structure and contents of each file. For a full example, see the [example output for this repository](https://github.com/kirill-markin/repo-to-text/blob/db89dbfc9cfa3a8eb29dd14763bc477619a3cea4/examples/example_repo_snapshot_2024-06-08-10-30-33-UTC.txt).
|
||||
|
||||
## Install Locally
|
||||
|
||||
To install `repo-to-text` locally for development, follow these steps:
|
||||
|
||||
1. Clone the repository:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/kirill-markin/repo-to-text
|
||||
cd repo-to-text
|
||||
```
|
||||
|
||||
2. Install the package locally:
|
||||
|
||||
```bash
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
### Installing Dependencies
|
||||
|
||||
To install all the required dependencies, run the following command:
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### Running Tests
|
||||
|
||||
To run the tests, use the following command:
|
||||
|
||||
```bash
|
||||
pytest
|
||||
```
|
||||
|
||||
## Uninstall
|
||||
|
||||
To uninstall the package, run the following command from the directory where the repository is located:
|
||||
|
||||
```bash
|
||||
pip uninstall repo-to-text
|
||||
```
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions are welcome! If you have any suggestions or find a bug, please open an issue or submit a pull request.
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the MIT License - see the [LICENSE](https://github.com/kirill-markin/repo-to-text/blob/main/LICENSE) file for details.
|
||||
|
||||
## Contact
|
||||
|
||||
This project is maintained by [Kirill Markin](https://github.com/kirill-markin). For any inquiries or feedback, please contact [markinkirill@gmail.com](mailto:markinkirill@gmail.com).
|
||||
|
||||
```
|
||||
|
||||
Contents of setup.py:
|
||||
```
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
with open('requirements.txt') as f:
|
||||
required = f.read().splitlines()
|
||||
|
||||
setup(
|
||||
name='repo-to-text',
|
||||
version='0.1.1',
|
||||
author='Kirill Markin',
|
||||
author_email='markinkirill@gmail.com',
|
||||
description='Convert a directory structure and its contents into a single text file, including the tree output and file contents in markdown code blocks.',
|
||||
long_description=open('README.md').read(),
|
||||
long_description_content_type='text/markdown',
|
||||
url='https://github.com/kirill-markin/repo-to-text',
|
||||
license='MIT',
|
||||
packages=find_packages(),
|
||||
install_requires=required,
|
||||
include_package_data=True,
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'repo-to-text=repo_to_text.main:main',
|
||||
],
|
||||
},
|
||||
classifiers=[
|
||||
'Programming Language :: Python :: 3',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Operating System :: OS Independent',
|
||||
],
|
||||
python_requires='>=3.6',
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
Contents of tests/__init__.py:
|
||||
```
|
||||
|
||||
```
|
||||
|
||||
Contents of tests/test_main.py:
|
||||
```
|
||||
import os
|
||||
import subprocess
|
||||
import pytest
|
||||
import time
|
||||
|
||||
def test_repo_to_text():
|
||||
# Remove any existing snapshot files to avoid conflicts
|
||||
for file in os.listdir('.'):
|
||||
if file.startswith('repo_snapshot_') and file.endswith('.txt'):
|
||||
os.remove(file)
|
||||
|
||||
# Run the repo-to-text command
|
||||
result = subprocess.run(['repo-to-text'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
|
||||
# Assert that the command ran without errors
|
||||
assert result.returncode == 0, f"Command failed with error: {result.stderr.decode('utf-8')}"
|
||||
|
||||
# Check for the existence of the new snapshot file
|
||||
snapshot_files = [f for f in os.listdir('.') if f.startswith('repo_snapshot_') and f.endswith('.txt')]
|
||||
assert len(snapshot_files) == 1, "No snapshot file created or multiple files created"
|
||||
|
||||
# Verify that the snapshot file is not empty
|
||||
with open(snapshot_files[0], 'r') as f:
|
||||
content = f.read()
|
||||
assert len(content) > 0, "Snapshot file is empty"
|
||||
|
||||
# Clean up the generated snapshot file
|
||||
os.remove(snapshot_files[0])
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main()
|
||||
|
||||
```
|
||||
|
||||
Contents of repo_to_text/__init__.py:
|
||||
```
|
||||
__author__ = 'Kirill Markin'
|
||||
__email__ = 'markinkirill@gmail.com'
|
||||
|
||||
```
|
||||
|
||||
Contents of repo_to_text/main.py:
|
||||
```
|
||||
import os
|
||||
import subprocess
|
||||
import pathspec
|
||||
import logging
|
||||
import argparse
|
||||
from datetime import datetime
|
||||
import pyperclip
|
||||
|
||||
def setup_logging(debug=False):
|
||||
logging_level = logging.DEBUG if debug else logging.INFO
|
||||
logging.basicConfig(level=logging_level, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
def get_tree_structure(path='.', gitignore_spec=None) -> str:
|
||||
logging.debug(f'Generating tree structure for path: {path}')
|
||||
result = subprocess.run(['tree', '-a', '-f', '--noreport', path], stdout=subprocess.PIPE)
|
||||
tree_output = result.stdout.decode('utf-8')
|
||||
logging.debug(f'Tree output generated: {tree_output}')
|
||||
|
||||
if not gitignore_spec:
|
||||
logging.debug('No .gitignore specification found')
|
||||
return tree_output
|
||||
|
||||
logging.debug('Filtering tree output based on .gitignore specification')
|
||||
filtered_lines = []
|
||||
for line in tree_output.splitlines():
|
||||
parts = line.strip().split()
|
||||
if parts:
|
||||
full_path = parts[-1]
|
||||
relative_path = os.path.relpath(full_path, path)
|
||||
if not gitignore_spec.match_file(relative_path) and not is_ignored_path(relative_path):
|
||||
filtered_lines.append(line.replace('./', '', 1))
|
||||
|
||||
logging.debug('Tree structure filtering complete')
|
||||
return '\n'.join(filtered_lines)
|
||||
|
||||
def load_gitignore(path='.'):
|
||||
gitignore_path = os.path.join(path, '.gitignore')
|
||||
if os.path.exists(gitignore_path):
|
||||
logging.debug(f'Loading .gitignore from path: {gitignore_path}')
|
||||
with open(gitignore_path, 'r') as f:
|
||||
return pathspec.PathSpec.from_lines('gitwildmatch', f)
|
||||
logging.debug('.gitignore not found')
|
||||
return None
|
||||
|
||||
def is_ignored_path(file_path: str) -> bool:
|
||||
ignored_dirs = ['.git']
|
||||
ignored_files_prefix = ['repo_snapshot_']
|
||||
is_ignored_dir = any(ignored in file_path for ignored in ignored_dirs)
|
||||
is_ignored_file = any(file_path.startswith(prefix) for prefix in ignored_files_prefix)
|
||||
result = is_ignored_dir or is_ignored_file
|
||||
if result:
|
||||
logging.debug(f'Path ignored: {file_path}')
|
||||
return result
|
||||
|
||||
def remove_empty_dirs(tree_output: str, path='.') -> str:
|
||||
logging.debug('Removing empty directories from tree output')
|
||||
lines = tree_output.splitlines()
|
||||
non_empty_dirs = set()
|
||||
filtered_lines = []
|
||||
|
||||
for line in lines:
|
||||
parts = line.strip().split()
|
||||
if parts:
|
||||
full_path = parts[-1]
|
||||
if os.path.isdir(full_path) and not any(os.path.isfile(os.path.join(full_path, f)) for f in os.listdir(full_path)):
|
||||
logging.debug(f'Directory is empty and will be removed: {full_path}')
|
||||
continue
|
||||
non_empty_dirs.add(os.path.dirname(full_path))
|
||||
filtered_lines.append(line)
|
||||
|
||||
final_lines = []
|
||||
for line in filtered_lines:
|
||||
parts = line.strip().split()
|
||||
if parts:
|
||||
full_path = parts[-1]
|
||||
if os.path.isdir(full_path) and full_path not in non_empty_dirs:
|
||||
logging.debug(f'Directory is empty and will be removed: {full_path}')
|
||||
continue
|
||||
final_lines.append(line)
|
||||
|
||||
logging.debug('Empty directory removal complete')
|
||||
return '\n'.join(final_lines)
|
||||
|
||||
def save_repo_to_text(path='.', output_dir=None) -> str:
|
||||
logging.debug(f'Starting to save repo structure to text for path: {path}')
|
||||
gitignore_spec = load_gitignore(path)
|
||||
tree_structure = get_tree_structure(path, gitignore_spec)
|
||||
tree_structure = remove_empty_dirs(tree_structure, path)
|
||||
|
||||
# Add timestamp to the output file name with a descriptive name
|
||||
timestamp = datetime.utcnow().strftime('%Y-%m-%d-%H-%M-%S-UTC')
|
||||
output_file = f'repo_snapshot_{timestamp}.txt'
|
||||
|
||||
# Determine the full path to the output file
|
||||
if output_dir:
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
output_file = os.path.join(output_dir, output_file)
|
||||
|
||||
with open(output_file, 'w') as file:
|
||||
project_name = os.path.basename(os.path.abspath(path))
|
||||
file.write(f'Directory: {project_name}\n\n')
|
||||
file.write('Directory Structure:\n')
|
||||
file.write('```\n.\n')
|
||||
|
||||
# Insert .gitignore if it exists
|
||||
if os.path.exists(os.path.join(path, '.gitignore')):
|
||||
file.write('├── .gitignore\n')
|
||||
|
||||
file.write(tree_structure + '\n' + '```\n')
|
||||
logging.debug('Tree structure written to file')
|
||||
|
||||
for root, _, files in os.walk(path):
|
||||
for filename in files:
|
||||
file_path = os.path.join(root, filename)
|
||||
relative_path = os.path.relpath(file_path, path)
|
||||
|
||||
if is_ignored_path(file_path) or (gitignore_spec and gitignore_spec.match_file(relative_path)):
|
||||
continue
|
||||
|
||||
relative_path = relative_path.replace('./', '', 1)
|
||||
|
||||
file.write(f'\nContents of {relative_path}:\n')
|
||||
file.write('```\n')
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
file.write(f.read())
|
||||
except UnicodeDecodeError:
|
||||
logging.error(f'Could not decode file contents: {file_path}')
|
||||
file.write('[Could not decode file contents]\n')
|
||||
file.write('\n```\n')
|
||||
|
||||
file.write('\n')
|
||||
logging.debug('Repository contents written to file')
|
||||
|
||||
# Read the contents of the generated file
|
||||
with open(output_file, 'r') as file:
|
||||
repo_text = file.read()
|
||||
|
||||
# Copy the contents to the clipboard
|
||||
pyperclip.copy(repo_text)
|
||||
logging.debug('Repository structure and contents copied to clipboard')
|
||||
|
||||
return output_file
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Convert repository structure and contents to text')
|
||||
parser.add_argument('--debug', action='store_true', help='Enable debug logging')
|
||||
parser.add_argument('--output-dir', type=str, help='Directory to save the output file')
|
||||
args = parser.parse_args()
|
||||
|
||||
setup_logging(debug=args.debug)
|
||||
logging.debug('repo-to-text script started')
|
||||
save_repo_to_text(output_dir=args.output_dir)
|
||||
logging.debug('repo-to-text script finished')
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
```
|
||||
|
||||
|
|
@ -3,6 +3,7 @@ import subprocess
|
|||
import pathspec
|
||||
import logging
|
||||
import argparse
|
||||
import yaml
|
||||
from datetime import datetime
|
||||
import pyperclip
|
||||
|
||||
|
|
@ -10,41 +11,67 @@ def setup_logging(debug=False):
|
|||
logging_level = logging.DEBUG if debug else logging.INFO
|
||||
logging.basicConfig(level=logging_level, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
def get_tree_structure(path='.', gitignore_spec=None) -> str:
|
||||
def get_tree_structure(path='.', gitignore_spec=None, tree_and_content_ignore_spec=None) -> str:
|
||||
logging.debug(f'Generating tree structure for path: {path}')
|
||||
result = subprocess.run(['tree', '-a', '-f', '--noreport', path], stdout=subprocess.PIPE)
|
||||
tree_output = result.stdout.decode('utf-8')
|
||||
logging.debug(f'Tree output generated: {tree_output}')
|
||||
|
||||
if not gitignore_spec:
|
||||
logging.debug('No .gitignore specification found')
|
||||
if not gitignore_spec and not tree_and_content_ignore_spec:
|
||||
logging.debug('No .gitignore or ignore-tree-and-content specification found')
|
||||
return tree_output
|
||||
|
||||
logging.debug('Filtering tree output based on .gitignore specification')
|
||||
logging.debug('Filtering tree output based on .gitignore and ignore-tree-and-content specification')
|
||||
filtered_lines = []
|
||||
for line in tree_output.splitlines():
|
||||
parts = line.strip().split()
|
||||
if parts:
|
||||
full_path = parts[-1]
|
||||
relative_path = os.path.relpath(full_path, path)
|
||||
if not gitignore_spec.match_file(relative_path) and not is_ignored_path(relative_path):
|
||||
if not should_ignore_file(full_path, relative_path, gitignore_spec, None, tree_and_content_ignore_spec):
|
||||
filtered_lines.append(line.replace('./', '', 1))
|
||||
|
||||
logging.debug('Tree structure filtering complete')
|
||||
return '\n'.join(filtered_lines)
|
||||
|
||||
def load_gitignore(path='.'):
|
||||
gitignore_path = os.path.join(path, '.gitignore')
|
||||
if os.path.exists(gitignore_path):
|
||||
logging.debug(f'Loading .gitignore from path: {gitignore_path}')
|
||||
with open(gitignore_path, 'r') as f:
|
||||
return pathspec.PathSpec.from_lines('gitwildmatch', f)
|
||||
logging.debug('.gitignore not found')
|
||||
return None
|
||||
def load_ignore_specs(path='.'):
|
||||
gitignore_spec = None
|
||||
content_ignore_spec = None
|
||||
tree_and_content_ignore_spec = None
|
||||
use_gitignore = True
|
||||
|
||||
repo_settings_path = os.path.join(path, '.repo-to-text-settings.yaml')
|
||||
if os.path.exists(repo_settings_path):
|
||||
logging.debug(f'Loading .repo-to-text-settings.yaml from path: {repo_settings_path}')
|
||||
with open(repo_settings_path, 'r') as f:
|
||||
settings = yaml.safe_load(f)
|
||||
use_gitignore = settings.get('gitignore-import-and-ignore', True)
|
||||
if 'ignore-content' in settings:
|
||||
content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', settings['ignore-content'])
|
||||
if 'ignore-tree-and-content' in settings:
|
||||
tree_and_content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', settings['ignore-tree-and-content'])
|
||||
|
||||
if use_gitignore:
|
||||
gitignore_path = os.path.join(path, '.gitignore')
|
||||
if os.path.exists(gitignore_path):
|
||||
logging.debug(f'Loading .gitignore from path: {gitignore_path}')
|
||||
with open(gitignore_path, 'r') as f:
|
||||
gitignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', f)
|
||||
|
||||
return gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec
|
||||
|
||||
def should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec):
|
||||
return (
|
||||
is_ignored_path(file_path) or
|
||||
(gitignore_spec and gitignore_spec.match_file(relative_path)) or
|
||||
(content_ignore_spec and content_ignore_spec.match_file(relative_path)) or
|
||||
(tree_and_content_ignore_spec and tree_and_content_ignore_spec.match_file(relative_path)) or
|
||||
os.path.basename(file_path).startswith('repo-to-text_')
|
||||
)
|
||||
|
||||
def is_ignored_path(file_path: str) -> bool:
|
||||
ignored_dirs = ['.git']
|
||||
ignored_files_prefix = ['repo_snapshot_']
|
||||
ignored_files_prefix = ['repo-to-text_']
|
||||
is_ignored_dir = any(ignored in file_path for ignored in ignored_dirs)
|
||||
is_ignored_file = any(file_path.startswith(prefix) for prefix in ignored_files_prefix)
|
||||
result = is_ignored_dir or is_ignored_file
|
||||
|
|
@ -83,13 +110,13 @@ def remove_empty_dirs(tree_output: str, path='.') -> str:
|
|||
|
||||
def save_repo_to_text(path='.', output_dir=None) -> str:
|
||||
logging.debug(f'Starting to save repo structure to text for path: {path}')
|
||||
gitignore_spec = load_gitignore(path)
|
||||
tree_structure = get_tree_structure(path, gitignore_spec)
|
||||
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(path)
|
||||
tree_structure = get_tree_structure(path, gitignore_spec, tree_and_content_ignore_spec)
|
||||
tree_structure = remove_empty_dirs(tree_structure, path)
|
||||
|
||||
# Add timestamp to the output file name with a descriptive name
|
||||
timestamp = datetime.utcnow().strftime('%Y-%m-%d-%H-%M-%S-UTC')
|
||||
output_file = f'repo_snapshot_{timestamp}.txt'
|
||||
output_file = f'repo-to-text_{timestamp}.txt'
|
||||
|
||||
# Determine the full path to the output file
|
||||
if output_dir:
|
||||
|
|
@ -115,7 +142,7 @@ def save_repo_to_text(path='.', output_dir=None) -> str:
|
|||
file_path = os.path.join(root, filename)
|
||||
relative_path = os.path.relpath(file_path, path)
|
||||
|
||||
if is_ignored_path(file_path) or (gitignore_spec and gitignore_spec.match_file(relative_path)):
|
||||
if should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec):
|
||||
continue
|
||||
|
||||
relative_path = relative_path.replace('./', '', 1)
|
||||
|
|
|
|||
|
|
@ -3,3 +3,4 @@ pathspec==0.12.1
|
|||
pytest==8.2.2
|
||||
argparse==1.4.0
|
||||
pyperclip==1.8.2
|
||||
PyYAML==6.0.1
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import time
|
|||
def test_repo_to_text():
|
||||
# Remove any existing snapshot files to avoid conflicts
|
||||
for file in os.listdir('.'):
|
||||
if file.startswith('repo_snapshot_') and file.endswith('.txt'):
|
||||
if file.startswith('repo-to-text_') and file.endswith('.txt'):
|
||||
os.remove(file)
|
||||
|
||||
# Run the repo-to-text command
|
||||
|
|
@ -16,7 +16,7 @@ def test_repo_to_text():
|
|||
assert result.returncode == 0, f"Command failed with error: {result.stderr.decode('utf-8')}"
|
||||
|
||||
# Check for the existence of the new snapshot file
|
||||
snapshot_files = [f for f in os.listdir('.') if f.startswith('repo_snapshot_') and f.endswith('.txt')]
|
||||
snapshot_files = [f for f in os.listdir('.') if f.startswith('repo-to-text_') and f.endswith('.txt')]
|
||||
assert len(snapshot_files) == 1, "No snapshot file created or multiple files created"
|
||||
|
||||
# Verify that the snapshot file is not empty
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue