mirror of
https://github.com/kirill-markin/repo-to-text.git
synced 2025-12-06 03:22:23 -08:00
Merge pull request #1 from kirill-markin:feature/repo-to-text-settings
Feature/repo-to-text-settings
This commit is contained in:
commit
cf25357505
8 changed files with 297 additions and 444 deletions
4
.gitignore
vendored
4
.gitignore
vendored
|
|
@ -168,5 +168,5 @@ cython_debug/
|
||||||
# Ignore egg-info directory
|
# Ignore egg-info directory
|
||||||
repo_to_text.egg-info/
|
repo_to_text.egg-info/
|
||||||
|
|
||||||
# Ignore generated repo_snapshot_*.txt files
|
# Ignore generated repo-to-text_*.txt files
|
||||||
repo_snapshot_*.txt
|
repo-to-text_*.txt
|
||||||
|
|
|
||||||
20
.repo-to-text-settings.yaml
Normal file
20
.repo-to-text-settings.yaml
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
# Details: https://github.com/kirill-markin/repo-to-text
|
||||||
|
# Syntax: gitignore rules
|
||||||
|
|
||||||
|
# Ignore files and directories for all sections from gitignore file
|
||||||
|
# Default: True
|
||||||
|
gitignore-import-and-ignore: True
|
||||||
|
|
||||||
|
# Ignore files and directories for tree
|
||||||
|
# and "Contents of ..." sections
|
||||||
|
ignore-tree-and-content:
|
||||||
|
- ".repo-to-text-settings.yaml"
|
||||||
|
- "examples/"
|
||||||
|
- "MANIFEST.in"
|
||||||
|
- "setup.py"
|
||||||
|
|
||||||
|
# Ignore files and directories for "Contents of ..." section
|
||||||
|
ignore-content:
|
||||||
|
- "README.md"
|
||||||
|
- "LICENSE"
|
||||||
|
- "tests/"
|
||||||
|
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
The generated text file will include the directory structure and contents of each file. For a full example, see the [example output for this repository](https://github.com/kirill-markin/repo-to-text/blob/main/examples/example_repo_snapshot_2024-06-08-11-35-28-UTC.txt).
|
The generated text file will include the directory structure and contents of each file. For a full example, see the [example output for this repository](https://github.com/kirill-markin/repo-to-text/blob/main/examples/example_repo-to-text_2024-06-09-08-06-31-UTC.txt).
|
||||||
|
|
||||||
The same text will appear in your clipboard. You can paste it into a dialog with the LLM and start communicating.
|
The same text will appear in your clipboard. You can paste it into a dialog with the LLM and start communicating.
|
||||||
|
|
||||||
|
|
@ -36,7 +36,7 @@ After installation, you can use the `repo-to-text` command in your terminal. Nav
|
||||||
repo-to-text
|
repo-to-text
|
||||||
```
|
```
|
||||||
|
|
||||||
This will create a file named `repo_snapshot_YYYY-MM-DD-HH-MM-SS-UTC.txt` in the current directory with the text representation of the repository. The contents of this file will also be copied to your clipboard for easy sharing.
|
This will create a file named `repo-to-text_YYYY-MM-DD-HH-MM-SS-UTC.txt` in the current directory with the text representation of the repository. The contents of this file will also be copied to your clipboard for easy sharing.
|
||||||
|
|
||||||
### Options
|
### Options
|
||||||
|
|
||||||
|
|
|
||||||
225
examples/example_repo-to-text_2024-06-09-08-06-31-UTC.txt
Normal file
225
examples/example_repo-to-text_2024-06-09-08-06-31-UTC.txt
Normal file
|
|
@ -0,0 +1,225 @@
|
||||||
|
Directory: repo-to-text
|
||||||
|
|
||||||
|
Directory Structure:
|
||||||
|
```
|
||||||
|
.
|
||||||
|
├── .gitignore
|
||||||
|
├── LICENSE
|
||||||
|
├── README.md
|
||||||
|
├── repo_to_text
|
||||||
|
│ ├── repo_to_text/__init__.py
|
||||||
|
│ └── repo_to_text/main.py
|
||||||
|
├── requirements.txt
|
||||||
|
└── tests
|
||||||
|
├── tests/__init__.py
|
||||||
|
└── tests/test_main.py
|
||||||
|
```
|
||||||
|
|
||||||
|
Contents of requirements.txt:
|
||||||
|
```
|
||||||
|
setuptools==70.0.0
|
||||||
|
pathspec==0.12.1
|
||||||
|
pytest==8.2.2
|
||||||
|
argparse==1.4.0
|
||||||
|
pyperclip==1.8.2
|
||||||
|
PyYAML==6.0.1
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
Contents of repo_to_text/__init__.py:
|
||||||
|
```
|
||||||
|
__author__ = 'Kirill Markin'
|
||||||
|
__email__ = 'markinkirill@gmail.com'
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
Contents of repo_to_text/main.py:
|
||||||
|
```
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import pathspec
|
||||||
|
import logging
|
||||||
|
import argparse
|
||||||
|
import yaml
|
||||||
|
from datetime import datetime
|
||||||
|
import pyperclip
|
||||||
|
|
||||||
|
def setup_logging(debug=False):
|
||||||
|
logging_level = logging.DEBUG if debug else logging.INFO
|
||||||
|
logging.basicConfig(level=logging_level, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||||
|
|
||||||
|
def get_tree_structure(path='.', gitignore_spec=None, tree_and_content_ignore_spec=None) -> str:
|
||||||
|
logging.debug(f'Generating tree structure for path: {path}')
|
||||||
|
result = subprocess.run(['tree', '-a', '-f', '--noreport', path], stdout=subprocess.PIPE)
|
||||||
|
tree_output = result.stdout.decode('utf-8')
|
||||||
|
logging.debug(f'Tree output generated: {tree_output}')
|
||||||
|
|
||||||
|
if not gitignore_spec and not tree_and_content_ignore_spec:
|
||||||
|
logging.debug('No .gitignore or ignore-tree-and-content specification found')
|
||||||
|
return tree_output
|
||||||
|
|
||||||
|
logging.debug('Filtering tree output based on .gitignore and ignore-tree-and-content specification')
|
||||||
|
filtered_lines = []
|
||||||
|
for line in tree_output.splitlines():
|
||||||
|
parts = line.strip().split()
|
||||||
|
if parts:
|
||||||
|
full_path = parts[-1]
|
||||||
|
relative_path = os.path.relpath(full_path, path)
|
||||||
|
if not should_ignore_file(full_path, relative_path, gitignore_spec, None, tree_and_content_ignore_spec):
|
||||||
|
filtered_lines.append(line.replace('./', '', 1))
|
||||||
|
|
||||||
|
logging.debug('Tree structure filtering complete')
|
||||||
|
return '\n'.join(filtered_lines)
|
||||||
|
|
||||||
|
def load_ignore_specs(path='.'):
|
||||||
|
gitignore_spec = None
|
||||||
|
content_ignore_spec = None
|
||||||
|
tree_and_content_ignore_spec = None
|
||||||
|
use_gitignore = True
|
||||||
|
|
||||||
|
repo_settings_path = os.path.join(path, '.repo-to-text-settings.yaml')
|
||||||
|
if os.path.exists(repo_settings_path):
|
||||||
|
logging.debug(f'Loading .repo-to-text-settings.yaml from path: {repo_settings_path}')
|
||||||
|
with open(repo_settings_path, 'r') as f:
|
||||||
|
settings = yaml.safe_load(f)
|
||||||
|
use_gitignore = settings.get('gitignore-import-and-ignore', True)
|
||||||
|
if 'ignore-content' in settings:
|
||||||
|
content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', settings['ignore-content'])
|
||||||
|
if 'ignore-tree-and-content' in settings:
|
||||||
|
tree_and_content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', settings['ignore-tree-and-content'])
|
||||||
|
|
||||||
|
if use_gitignore:
|
||||||
|
gitignore_path = os.path.join(path, '.gitignore')
|
||||||
|
if os.path.exists(gitignore_path):
|
||||||
|
logging.debug(f'Loading .gitignore from path: {gitignore_path}')
|
||||||
|
with open(gitignore_path, 'r') as f:
|
||||||
|
gitignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', f)
|
||||||
|
|
||||||
|
return gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec
|
||||||
|
|
||||||
|
def should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec):
|
||||||
|
return (
|
||||||
|
is_ignored_path(file_path) or
|
||||||
|
(gitignore_spec and gitignore_spec.match_file(relative_path)) or
|
||||||
|
(content_ignore_spec and content_ignore_spec.match_file(relative_path)) or
|
||||||
|
(tree_and_content_ignore_spec and tree_and_content_ignore_spec.match_file(relative_path)) or
|
||||||
|
os.path.basename(file_path).startswith('repo-to-text_')
|
||||||
|
)
|
||||||
|
|
||||||
|
def is_ignored_path(file_path: str) -> bool:
|
||||||
|
ignored_dirs = ['.git']
|
||||||
|
ignored_files_prefix = ['repo-to-text_']
|
||||||
|
is_ignored_dir = any(ignored in file_path for ignored in ignored_dirs)
|
||||||
|
is_ignored_file = any(file_path.startswith(prefix) for prefix in ignored_files_prefix)
|
||||||
|
result = is_ignored_dir or is_ignored_file
|
||||||
|
if result:
|
||||||
|
logging.debug(f'Path ignored: {file_path}')
|
||||||
|
return result
|
||||||
|
|
||||||
|
def remove_empty_dirs(tree_output: str, path='.') -> str:
|
||||||
|
logging.debug('Removing empty directories from tree output')
|
||||||
|
lines = tree_output.splitlines()
|
||||||
|
non_empty_dirs = set()
|
||||||
|
filtered_lines = []
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
parts = line.strip().split()
|
||||||
|
if parts:
|
||||||
|
full_path = parts[-1]
|
||||||
|
if os.path.isdir(full_path) and not any(os.path.isfile(os.path.join(full_path, f)) for f in os.listdir(full_path)):
|
||||||
|
logging.debug(f'Directory is empty and will be removed: {full_path}')
|
||||||
|
continue
|
||||||
|
non_empty_dirs.add(os.path.dirname(full_path))
|
||||||
|
filtered_lines.append(line)
|
||||||
|
|
||||||
|
final_lines = []
|
||||||
|
for line in filtered_lines:
|
||||||
|
parts = line.strip().split()
|
||||||
|
if parts:
|
||||||
|
full_path = parts[-1]
|
||||||
|
if os.path.isdir(full_path) and full_path not in non_empty_dirs:
|
||||||
|
logging.debug(f'Directory is empty and will be removed: {full_path}')
|
||||||
|
continue
|
||||||
|
final_lines.append(line)
|
||||||
|
|
||||||
|
logging.debug('Empty directory removal complete')
|
||||||
|
return '\n'.join(final_lines)
|
||||||
|
|
||||||
|
def save_repo_to_text(path='.', output_dir=None) -> str:
|
||||||
|
logging.debug(f'Starting to save repo structure to text for path: {path}')
|
||||||
|
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(path)
|
||||||
|
tree_structure = get_tree_structure(path, gitignore_spec, tree_and_content_ignore_spec)
|
||||||
|
tree_structure = remove_empty_dirs(tree_structure, path)
|
||||||
|
|
||||||
|
# Add timestamp to the output file name with a descriptive name
|
||||||
|
timestamp = datetime.utcnow().strftime('%Y-%m-%d-%H-%M-%S-UTC')
|
||||||
|
output_file = f'repo-to-text_{timestamp}.txt'
|
||||||
|
|
||||||
|
# Determine the full path to the output file
|
||||||
|
if output_dir:
|
||||||
|
if not os.path.exists(output_dir):
|
||||||
|
os.makedirs(output_dir)
|
||||||
|
output_file = os.path.join(output_dir, output_file)
|
||||||
|
|
||||||
|
with open(output_file, 'w') as file:
|
||||||
|
project_name = os.path.basename(os.path.abspath(path))
|
||||||
|
file.write(f'Directory: {project_name}\n\n')
|
||||||
|
file.write('Directory Structure:\n')
|
||||||
|
file.write('```\n.\n')
|
||||||
|
|
||||||
|
# Insert .gitignore if it exists
|
||||||
|
if os.path.exists(os.path.join(path, '.gitignore')):
|
||||||
|
file.write('├── .gitignore\n')
|
||||||
|
|
||||||
|
file.write(tree_structure + '\n' + '```\n')
|
||||||
|
logging.debug('Tree structure written to file')
|
||||||
|
|
||||||
|
for root, _, files in os.walk(path):
|
||||||
|
for filename in files:
|
||||||
|
file_path = os.path.join(root, filename)
|
||||||
|
relative_path = os.path.relpath(file_path, path)
|
||||||
|
|
||||||
|
if should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec):
|
||||||
|
continue
|
||||||
|
|
||||||
|
relative_path = relative_path.replace('./', '', 1)
|
||||||
|
|
||||||
|
file.write(f'\nContents of {relative_path}:\n')
|
||||||
|
file.write('```\n')
|
||||||
|
try:
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||||||
|
file.write(f.read())
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
logging.debug(f'Could not decode file contents: {file_path}')
|
||||||
|
file.write('[Could not decode file contents]\n')
|
||||||
|
file.write('\n```\n')
|
||||||
|
|
||||||
|
file.write('\n')
|
||||||
|
logging.debug('Repository contents written to file')
|
||||||
|
|
||||||
|
# Read the contents of the generated file
|
||||||
|
with open(output_file, 'r') as file:
|
||||||
|
repo_text = file.read()
|
||||||
|
|
||||||
|
# Copy the contents to the clipboard
|
||||||
|
pyperclip.copy(repo_text)
|
||||||
|
logging.debug('Repository structure and contents copied to clipboard')
|
||||||
|
|
||||||
|
return output_file
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description='Convert repository structure and contents to text')
|
||||||
|
parser.add_argument('--debug', action='store_true', help='Enable debug logging')
|
||||||
|
parser.add_argument('--output-dir', type=str, help='Directory to save the output file')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
setup_logging(debug=args.debug)
|
||||||
|
logging.debug('repo-to-text script started')
|
||||||
|
save_repo_to_text(output_dir=args.output_dir)
|
||||||
|
logging.debug('repo-to-text script finished')
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
|
@ -1,420 +0,0 @@
|
||||||
Directory: repo-to-text
|
|
||||||
|
|
||||||
Directory Structure:
|
|
||||||
```
|
|
||||||
.
|
|
||||||
├── .gitignore
|
|
||||||
├── LICENSE
|
|
||||||
├── MANIFEST.in
|
|
||||||
├── README.md
|
|
||||||
├── repo_to_text
|
|
||||||
│ ├── repo_to_text/__init__.py
|
|
||||||
│ └── repo_to_text/main.py
|
|
||||||
├── requirements.txt
|
|
||||||
├── setup.py
|
|
||||||
└── tests
|
|
||||||
├── tests/__init__.py
|
|
||||||
└── tests/test_main.py
|
|
||||||
```
|
|
||||||
|
|
||||||
Contents of LICENSE:
|
|
||||||
```
|
|
||||||
MIT License
|
|
||||||
|
|
||||||
Copyright (c) 2024 Kirill Markin
|
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
|
||||||
in the Software without restriction, including without limitation the rights
|
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
|
||||||
furnished to do so, subject to the following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be included in all
|
|
||||||
copies or substantial portions of the Software.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
SOFTWARE.
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
Contents of requirements.txt:
|
|
||||||
```
|
|
||||||
setuptools==70.0.0
|
|
||||||
pathspec==0.12.1
|
|
||||||
pytest==8.2.2
|
|
||||||
argparse==1.4.0
|
|
||||||
pyperclip==1.8.2
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
Contents of MANIFEST.in:
|
|
||||||
```
|
|
||||||
include README.md
|
|
||||||
include LICENSE
|
|
||||||
include requirements.txt
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
Contents of README.md:
|
|
||||||
```
|
|
||||||
# repo-to-text
|
|
||||||
|
|
||||||
`repo-to-text` is an open-source project that converts the structure and contents of a directory (repository) into a single text file. By executing a simple command in the terminal, this tool generates a text representation of the directory, including the output of the `tree` command and the contents of each file, formatted for easy reading and sharing.
|
|
||||||
|
|
||||||
## Features
|
|
||||||
|
|
||||||
- Generates a text representation of a directory's structure.
|
|
||||||
- Includes the output of the `tree` command.
|
|
||||||
- Saves the contents of each file, encapsulated in markdown code blocks.
|
|
||||||
- Copies the generated text representation to the clipboard for easy sharing.
|
|
||||||
- Easy to install and use via `pip` and Homebrew.
|
|
||||||
|
|
||||||
## Installation
|
|
||||||
|
|
||||||
### Using pip
|
|
||||||
|
|
||||||
To install `repo-to-text` via pip, run the following command:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install repo-to-text
|
|
||||||
```
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
After installation, you can use the `repo-to-text` command in your terminal. Navigate to the directory you want to convert and run:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
repo-to-text
|
|
||||||
```
|
|
||||||
|
|
||||||
This will create a file named `repo_snapshot_YYYY-MM-DD-HH-MM-SS-UTC.txt` in the current directory with the text representation of the repository. The contents of this file will also be copied to your clipboard for easy sharing.
|
|
||||||
|
|
||||||
### Options
|
|
||||||
|
|
||||||
You can customize the behavior of `repo-to-text` with the following options:
|
|
||||||
|
|
||||||
- `--output-dir <path>`: Specify an output directory where the generated text file will be saved. For example:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
repo-to-text --output-dir /path/to/output
|
|
||||||
```
|
|
||||||
|
|
||||||
This will save the file in the specified output directory instead of the current directory.
|
|
||||||
|
|
||||||
- `--debug`: Enable DEBUG logging. By default, `repo-to-text` runs with INFO logging level. To enable DEBUG logging, use the `--debug` flag:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
repo-to-text --debug
|
|
||||||
```
|
|
||||||
|
|
||||||
## Example Output
|
|
||||||
|
|
||||||
The generated text file will include the directory structure and contents of each file. For a full example, see the [example output for this repository](https://github.com/kirill-markin/repo-to-text/blob/db89dbfc9cfa3a8eb29dd14763bc477619a3cea4/examples/example_repo_snapshot_2024-06-08-10-30-33-UTC.txt).
|
|
||||||
|
|
||||||
## Install Locally
|
|
||||||
|
|
||||||
To install `repo-to-text` locally for development, follow these steps:
|
|
||||||
|
|
||||||
1. Clone the repository:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git clone https://github.com/kirill-markin/repo-to-text
|
|
||||||
cd repo-to-text
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Install the package locally:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install -e .
|
|
||||||
```
|
|
||||||
|
|
||||||
### Installing Dependencies
|
|
||||||
|
|
||||||
To install all the required dependencies, run the following command:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install -r requirements.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
### Running Tests
|
|
||||||
|
|
||||||
To run the tests, use the following command:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pytest
|
|
||||||
```
|
|
||||||
|
|
||||||
## Uninstall
|
|
||||||
|
|
||||||
To uninstall the package, run the following command from the directory where the repository is located:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip uninstall repo-to-text
|
|
||||||
```
|
|
||||||
|
|
||||||
## Contributing
|
|
||||||
|
|
||||||
Contributions are welcome! If you have any suggestions or find a bug, please open an issue or submit a pull request.
|
|
||||||
|
|
||||||
## License
|
|
||||||
|
|
||||||
This project is licensed under the MIT License - see the [LICENSE](https://github.com/kirill-markin/repo-to-text/blob/main/LICENSE) file for details.
|
|
||||||
|
|
||||||
## Contact
|
|
||||||
|
|
||||||
This project is maintained by [Kirill Markin](https://github.com/kirill-markin). For any inquiries or feedback, please contact [markinkirill@gmail.com](mailto:markinkirill@gmail.com).
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
Contents of setup.py:
|
|
||||||
```
|
|
||||||
from setuptools import setup, find_packages
|
|
||||||
|
|
||||||
with open('requirements.txt') as f:
|
|
||||||
required = f.read().splitlines()
|
|
||||||
|
|
||||||
setup(
|
|
||||||
name='repo-to-text',
|
|
||||||
version='0.1.1',
|
|
||||||
author='Kirill Markin',
|
|
||||||
author_email='markinkirill@gmail.com',
|
|
||||||
description='Convert a directory structure and its contents into a single text file, including the tree output and file contents in markdown code blocks.',
|
|
||||||
long_description=open('README.md').read(),
|
|
||||||
long_description_content_type='text/markdown',
|
|
||||||
url='https://github.com/kirill-markin/repo-to-text',
|
|
||||||
license='MIT',
|
|
||||||
packages=find_packages(),
|
|
||||||
install_requires=required,
|
|
||||||
include_package_data=True,
|
|
||||||
entry_points={
|
|
||||||
'console_scripts': [
|
|
||||||
'repo-to-text=repo_to_text.main:main',
|
|
||||||
],
|
|
||||||
},
|
|
||||||
classifiers=[
|
|
||||||
'Programming Language :: Python :: 3',
|
|
||||||
'License :: OSI Approved :: MIT License',
|
|
||||||
'Operating System :: OS Independent',
|
|
||||||
],
|
|
||||||
python_requires='>=3.6',
|
|
||||||
)
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
Contents of tests/__init__.py:
|
|
||||||
```
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
Contents of tests/test_main.py:
|
|
||||||
```
|
|
||||||
import os
|
|
||||||
import subprocess
|
|
||||||
import pytest
|
|
||||||
import time
|
|
||||||
|
|
||||||
def test_repo_to_text():
|
|
||||||
# Remove any existing snapshot files to avoid conflicts
|
|
||||||
for file in os.listdir('.'):
|
|
||||||
if file.startswith('repo_snapshot_') and file.endswith('.txt'):
|
|
||||||
os.remove(file)
|
|
||||||
|
|
||||||
# Run the repo-to-text command
|
|
||||||
result = subprocess.run(['repo-to-text'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
||||||
|
|
||||||
# Assert that the command ran without errors
|
|
||||||
assert result.returncode == 0, f"Command failed with error: {result.stderr.decode('utf-8')}"
|
|
||||||
|
|
||||||
# Check for the existence of the new snapshot file
|
|
||||||
snapshot_files = [f for f in os.listdir('.') if f.startswith('repo_snapshot_') and f.endswith('.txt')]
|
|
||||||
assert len(snapshot_files) == 1, "No snapshot file created or multiple files created"
|
|
||||||
|
|
||||||
# Verify that the snapshot file is not empty
|
|
||||||
with open(snapshot_files[0], 'r') as f:
|
|
||||||
content = f.read()
|
|
||||||
assert len(content) > 0, "Snapshot file is empty"
|
|
||||||
|
|
||||||
# Clean up the generated snapshot file
|
|
||||||
os.remove(snapshot_files[0])
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
pytest.main()
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
Contents of repo_to_text/__init__.py:
|
|
||||||
```
|
|
||||||
__author__ = 'Kirill Markin'
|
|
||||||
__email__ = 'markinkirill@gmail.com'
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
Contents of repo_to_text/main.py:
|
|
||||||
```
|
|
||||||
import os
|
|
||||||
import subprocess
|
|
||||||
import pathspec
|
|
||||||
import logging
|
|
||||||
import argparse
|
|
||||||
from datetime import datetime
|
|
||||||
import pyperclip
|
|
||||||
|
|
||||||
def setup_logging(debug=False):
|
|
||||||
logging_level = logging.DEBUG if debug else logging.INFO
|
|
||||||
logging.basicConfig(level=logging_level, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
||||||
|
|
||||||
def get_tree_structure(path='.', gitignore_spec=None) -> str:
|
|
||||||
logging.debug(f'Generating tree structure for path: {path}')
|
|
||||||
result = subprocess.run(['tree', '-a', '-f', '--noreport', path], stdout=subprocess.PIPE)
|
|
||||||
tree_output = result.stdout.decode('utf-8')
|
|
||||||
logging.debug(f'Tree output generated: {tree_output}')
|
|
||||||
|
|
||||||
if not gitignore_spec:
|
|
||||||
logging.debug('No .gitignore specification found')
|
|
||||||
return tree_output
|
|
||||||
|
|
||||||
logging.debug('Filtering tree output based on .gitignore specification')
|
|
||||||
filtered_lines = []
|
|
||||||
for line in tree_output.splitlines():
|
|
||||||
parts = line.strip().split()
|
|
||||||
if parts:
|
|
||||||
full_path = parts[-1]
|
|
||||||
relative_path = os.path.relpath(full_path, path)
|
|
||||||
if not gitignore_spec.match_file(relative_path) and not is_ignored_path(relative_path):
|
|
||||||
filtered_lines.append(line.replace('./', '', 1))
|
|
||||||
|
|
||||||
logging.debug('Tree structure filtering complete')
|
|
||||||
return '\n'.join(filtered_lines)
|
|
||||||
|
|
||||||
def load_gitignore(path='.'):
|
|
||||||
gitignore_path = os.path.join(path, '.gitignore')
|
|
||||||
if os.path.exists(gitignore_path):
|
|
||||||
logging.debug(f'Loading .gitignore from path: {gitignore_path}')
|
|
||||||
with open(gitignore_path, 'r') as f:
|
|
||||||
return pathspec.PathSpec.from_lines('gitwildmatch', f)
|
|
||||||
logging.debug('.gitignore not found')
|
|
||||||
return None
|
|
||||||
|
|
||||||
def is_ignored_path(file_path: str) -> bool:
|
|
||||||
ignored_dirs = ['.git']
|
|
||||||
ignored_files_prefix = ['repo_snapshot_']
|
|
||||||
is_ignored_dir = any(ignored in file_path for ignored in ignored_dirs)
|
|
||||||
is_ignored_file = any(file_path.startswith(prefix) for prefix in ignored_files_prefix)
|
|
||||||
result = is_ignored_dir or is_ignored_file
|
|
||||||
if result:
|
|
||||||
logging.debug(f'Path ignored: {file_path}')
|
|
||||||
return result
|
|
||||||
|
|
||||||
def remove_empty_dirs(tree_output: str, path='.') -> str:
|
|
||||||
logging.debug('Removing empty directories from tree output')
|
|
||||||
lines = tree_output.splitlines()
|
|
||||||
non_empty_dirs = set()
|
|
||||||
filtered_lines = []
|
|
||||||
|
|
||||||
for line in lines:
|
|
||||||
parts = line.strip().split()
|
|
||||||
if parts:
|
|
||||||
full_path = parts[-1]
|
|
||||||
if os.path.isdir(full_path) and not any(os.path.isfile(os.path.join(full_path, f)) for f in os.listdir(full_path)):
|
|
||||||
logging.debug(f'Directory is empty and will be removed: {full_path}')
|
|
||||||
continue
|
|
||||||
non_empty_dirs.add(os.path.dirname(full_path))
|
|
||||||
filtered_lines.append(line)
|
|
||||||
|
|
||||||
final_lines = []
|
|
||||||
for line in filtered_lines:
|
|
||||||
parts = line.strip().split()
|
|
||||||
if parts:
|
|
||||||
full_path = parts[-1]
|
|
||||||
if os.path.isdir(full_path) and full_path not in non_empty_dirs:
|
|
||||||
logging.debug(f'Directory is empty and will be removed: {full_path}')
|
|
||||||
continue
|
|
||||||
final_lines.append(line)
|
|
||||||
|
|
||||||
logging.debug('Empty directory removal complete')
|
|
||||||
return '\n'.join(final_lines)
|
|
||||||
|
|
||||||
def save_repo_to_text(path='.', output_dir=None) -> str:
|
|
||||||
logging.debug(f'Starting to save repo structure to text for path: {path}')
|
|
||||||
gitignore_spec = load_gitignore(path)
|
|
||||||
tree_structure = get_tree_structure(path, gitignore_spec)
|
|
||||||
tree_structure = remove_empty_dirs(tree_structure, path)
|
|
||||||
|
|
||||||
# Add timestamp to the output file name with a descriptive name
|
|
||||||
timestamp = datetime.utcnow().strftime('%Y-%m-%d-%H-%M-%S-UTC')
|
|
||||||
output_file = f'repo_snapshot_{timestamp}.txt'
|
|
||||||
|
|
||||||
# Determine the full path to the output file
|
|
||||||
if output_dir:
|
|
||||||
if not os.path.exists(output_dir):
|
|
||||||
os.makedirs(output_dir)
|
|
||||||
output_file = os.path.join(output_dir, output_file)
|
|
||||||
|
|
||||||
with open(output_file, 'w') as file:
|
|
||||||
project_name = os.path.basename(os.path.abspath(path))
|
|
||||||
file.write(f'Directory: {project_name}\n\n')
|
|
||||||
file.write('Directory Structure:\n')
|
|
||||||
file.write('```\n.\n')
|
|
||||||
|
|
||||||
# Insert .gitignore if it exists
|
|
||||||
if os.path.exists(os.path.join(path, '.gitignore')):
|
|
||||||
file.write('├── .gitignore\n')
|
|
||||||
|
|
||||||
file.write(tree_structure + '\n' + '```\n')
|
|
||||||
logging.debug('Tree structure written to file')
|
|
||||||
|
|
||||||
for root, _, files in os.walk(path):
|
|
||||||
for filename in files:
|
|
||||||
file_path = os.path.join(root, filename)
|
|
||||||
relative_path = os.path.relpath(file_path, path)
|
|
||||||
|
|
||||||
if is_ignored_path(file_path) or (gitignore_spec and gitignore_spec.match_file(relative_path)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
relative_path = relative_path.replace('./', '', 1)
|
|
||||||
|
|
||||||
file.write(f'\nContents of {relative_path}:\n')
|
|
||||||
file.write('```\n')
|
|
||||||
try:
|
|
||||||
with open(file_path, 'r', encoding='utf-8') as f:
|
|
||||||
file.write(f.read())
|
|
||||||
except UnicodeDecodeError:
|
|
||||||
logging.error(f'Could not decode file contents: {file_path}')
|
|
||||||
file.write('[Could not decode file contents]\n')
|
|
||||||
file.write('\n```\n')
|
|
||||||
|
|
||||||
file.write('\n')
|
|
||||||
logging.debug('Repository contents written to file')
|
|
||||||
|
|
||||||
# Read the contents of the generated file
|
|
||||||
with open(output_file, 'r') as file:
|
|
||||||
repo_text = file.read()
|
|
||||||
|
|
||||||
# Copy the contents to the clipboard
|
|
||||||
pyperclip.copy(repo_text)
|
|
||||||
logging.debug('Repository structure and contents copied to clipboard')
|
|
||||||
|
|
||||||
return output_file
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(description='Convert repository structure and contents to text')
|
|
||||||
parser.add_argument('--debug', action='store_true', help='Enable debug logging')
|
|
||||||
parser.add_argument('--output-dir', type=str, help='Directory to save the output file')
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
setup_logging(debug=args.debug)
|
|
||||||
logging.debug('repo-to-text script started')
|
|
||||||
save_repo_to_text(output_dir=args.output_dir)
|
|
||||||
logging.debug('repo-to-text script finished')
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
@ -3,6 +3,7 @@ import subprocess
|
||||||
import pathspec
|
import pathspec
|
||||||
import logging
|
import logging
|
||||||
import argparse
|
import argparse
|
||||||
|
import yaml
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import pyperclip
|
import pyperclip
|
||||||
|
|
||||||
|
|
@ -10,41 +11,67 @@ def setup_logging(debug=False):
|
||||||
logging_level = logging.DEBUG if debug else logging.INFO
|
logging_level = logging.DEBUG if debug else logging.INFO
|
||||||
logging.basicConfig(level=logging_level, format='%(asctime)s - %(levelname)s - %(message)s')
|
logging.basicConfig(level=logging_level, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||||
|
|
||||||
def get_tree_structure(path='.', gitignore_spec=None) -> str:
|
def get_tree_structure(path='.', gitignore_spec=None, tree_and_content_ignore_spec=None) -> str:
|
||||||
logging.debug(f'Generating tree structure for path: {path}')
|
logging.debug(f'Generating tree structure for path: {path}')
|
||||||
result = subprocess.run(['tree', '-a', '-f', '--noreport', path], stdout=subprocess.PIPE)
|
result = subprocess.run(['tree', '-a', '-f', '--noreport', path], stdout=subprocess.PIPE)
|
||||||
tree_output = result.stdout.decode('utf-8')
|
tree_output = result.stdout.decode('utf-8')
|
||||||
logging.debug(f'Tree output generated: {tree_output}')
|
logging.debug(f'Tree output generated: {tree_output}')
|
||||||
|
|
||||||
if not gitignore_spec:
|
if not gitignore_spec and not tree_and_content_ignore_spec:
|
||||||
logging.debug('No .gitignore specification found')
|
logging.debug('No .gitignore or ignore-tree-and-content specification found')
|
||||||
return tree_output
|
return tree_output
|
||||||
|
|
||||||
logging.debug('Filtering tree output based on .gitignore specification')
|
logging.debug('Filtering tree output based on .gitignore and ignore-tree-and-content specification')
|
||||||
filtered_lines = []
|
filtered_lines = []
|
||||||
for line in tree_output.splitlines():
|
for line in tree_output.splitlines():
|
||||||
parts = line.strip().split()
|
parts = line.strip().split()
|
||||||
if parts:
|
if parts:
|
||||||
full_path = parts[-1]
|
full_path = parts[-1]
|
||||||
relative_path = os.path.relpath(full_path, path)
|
relative_path = os.path.relpath(full_path, path)
|
||||||
if not gitignore_spec.match_file(relative_path) and not is_ignored_path(relative_path):
|
if not should_ignore_file(full_path, relative_path, gitignore_spec, None, tree_and_content_ignore_spec):
|
||||||
filtered_lines.append(line.replace('./', '', 1))
|
filtered_lines.append(line.replace('./', '', 1))
|
||||||
|
|
||||||
logging.debug('Tree structure filtering complete')
|
logging.debug('Tree structure filtering complete')
|
||||||
return '\n'.join(filtered_lines)
|
return '\n'.join(filtered_lines)
|
||||||
|
|
||||||
def load_gitignore(path='.'):
|
def load_ignore_specs(path='.'):
|
||||||
gitignore_path = os.path.join(path, '.gitignore')
|
gitignore_spec = None
|
||||||
if os.path.exists(gitignore_path):
|
content_ignore_spec = None
|
||||||
logging.debug(f'Loading .gitignore from path: {gitignore_path}')
|
tree_and_content_ignore_spec = None
|
||||||
with open(gitignore_path, 'r') as f:
|
use_gitignore = True
|
||||||
return pathspec.PathSpec.from_lines('gitwildmatch', f)
|
|
||||||
logging.debug('.gitignore not found')
|
repo_settings_path = os.path.join(path, '.repo-to-text-settings.yaml')
|
||||||
return None
|
if os.path.exists(repo_settings_path):
|
||||||
|
logging.debug(f'Loading .repo-to-text-settings.yaml from path: {repo_settings_path}')
|
||||||
|
with open(repo_settings_path, 'r') as f:
|
||||||
|
settings = yaml.safe_load(f)
|
||||||
|
use_gitignore = settings.get('gitignore-import-and-ignore', True)
|
||||||
|
if 'ignore-content' in settings:
|
||||||
|
content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', settings['ignore-content'])
|
||||||
|
if 'ignore-tree-and-content' in settings:
|
||||||
|
tree_and_content_ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', settings['ignore-tree-and-content'])
|
||||||
|
|
||||||
|
if use_gitignore:
|
||||||
|
gitignore_path = os.path.join(path, '.gitignore')
|
||||||
|
if os.path.exists(gitignore_path):
|
||||||
|
logging.debug(f'Loading .gitignore from path: {gitignore_path}')
|
||||||
|
with open(gitignore_path, 'r') as f:
|
||||||
|
gitignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', f)
|
||||||
|
|
||||||
|
return gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec
|
||||||
|
|
||||||
|
def should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec):
|
||||||
|
return (
|
||||||
|
is_ignored_path(file_path) or
|
||||||
|
(gitignore_spec and gitignore_spec.match_file(relative_path)) or
|
||||||
|
(content_ignore_spec and content_ignore_spec.match_file(relative_path)) or
|
||||||
|
(tree_and_content_ignore_spec and tree_and_content_ignore_spec.match_file(relative_path)) or
|
||||||
|
os.path.basename(file_path).startswith('repo-to-text_')
|
||||||
|
)
|
||||||
|
|
||||||
def is_ignored_path(file_path: str) -> bool:
|
def is_ignored_path(file_path: str) -> bool:
|
||||||
ignored_dirs = ['.git']
|
ignored_dirs = ['.git']
|
||||||
ignored_files_prefix = ['repo_snapshot_']
|
ignored_files_prefix = ['repo-to-text_']
|
||||||
is_ignored_dir = any(ignored in file_path for ignored in ignored_dirs)
|
is_ignored_dir = any(ignored in file_path for ignored in ignored_dirs)
|
||||||
is_ignored_file = any(file_path.startswith(prefix) for prefix in ignored_files_prefix)
|
is_ignored_file = any(file_path.startswith(prefix) for prefix in ignored_files_prefix)
|
||||||
result = is_ignored_dir or is_ignored_file
|
result = is_ignored_dir or is_ignored_file
|
||||||
|
|
@ -83,13 +110,13 @@ def remove_empty_dirs(tree_output: str, path='.') -> str:
|
||||||
|
|
||||||
def save_repo_to_text(path='.', output_dir=None) -> str:
|
def save_repo_to_text(path='.', output_dir=None) -> str:
|
||||||
logging.debug(f'Starting to save repo structure to text for path: {path}')
|
logging.debug(f'Starting to save repo structure to text for path: {path}')
|
||||||
gitignore_spec = load_gitignore(path)
|
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(path)
|
||||||
tree_structure = get_tree_structure(path, gitignore_spec)
|
tree_structure = get_tree_structure(path, gitignore_spec, tree_and_content_ignore_spec)
|
||||||
tree_structure = remove_empty_dirs(tree_structure, path)
|
tree_structure = remove_empty_dirs(tree_structure, path)
|
||||||
|
|
||||||
# Add timestamp to the output file name with a descriptive name
|
# Add timestamp to the output file name with a descriptive name
|
||||||
timestamp = datetime.utcnow().strftime('%Y-%m-%d-%H-%M-%S-UTC')
|
timestamp = datetime.utcnow().strftime('%Y-%m-%d-%H-%M-%S-UTC')
|
||||||
output_file = f'repo_snapshot_{timestamp}.txt'
|
output_file = f'repo-to-text_{timestamp}.txt'
|
||||||
|
|
||||||
# Determine the full path to the output file
|
# Determine the full path to the output file
|
||||||
if output_dir:
|
if output_dir:
|
||||||
|
|
@ -115,7 +142,7 @@ def save_repo_to_text(path='.', output_dir=None) -> str:
|
||||||
file_path = os.path.join(root, filename)
|
file_path = os.path.join(root, filename)
|
||||||
relative_path = os.path.relpath(file_path, path)
|
relative_path = os.path.relpath(file_path, path)
|
||||||
|
|
||||||
if is_ignored_path(file_path) or (gitignore_spec and gitignore_spec.match_file(relative_path)):
|
if should_ignore_file(file_path, relative_path, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
relative_path = relative_path.replace('./', '', 1)
|
relative_path = relative_path.replace('./', '', 1)
|
||||||
|
|
|
||||||
|
|
@ -3,3 +3,4 @@ pathspec==0.12.1
|
||||||
pytest==8.2.2
|
pytest==8.2.2
|
||||||
argparse==1.4.0
|
argparse==1.4.0
|
||||||
pyperclip==1.8.2
|
pyperclip==1.8.2
|
||||||
|
PyYAML==6.0.1
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ import time
|
||||||
def test_repo_to_text():
|
def test_repo_to_text():
|
||||||
# Remove any existing snapshot files to avoid conflicts
|
# Remove any existing snapshot files to avoid conflicts
|
||||||
for file in os.listdir('.'):
|
for file in os.listdir('.'):
|
||||||
if file.startswith('repo_snapshot_') and file.endswith('.txt'):
|
if file.startswith('repo-to-text_') and file.endswith('.txt'):
|
||||||
os.remove(file)
|
os.remove(file)
|
||||||
|
|
||||||
# Run the repo-to-text command
|
# Run the repo-to-text command
|
||||||
|
|
@ -16,7 +16,7 @@ def test_repo_to_text():
|
||||||
assert result.returncode == 0, f"Command failed with error: {result.stderr.decode('utf-8')}"
|
assert result.returncode == 0, f"Command failed with error: {result.stderr.decode('utf-8')}"
|
||||||
|
|
||||||
# Check for the existence of the new snapshot file
|
# Check for the existence of the new snapshot file
|
||||||
snapshot_files = [f for f in os.listdir('.') if f.startswith('repo_snapshot_') and f.endswith('.txt')]
|
snapshot_files = [f for f in os.listdir('.') if f.startswith('repo-to-text_') and f.endswith('.txt')]
|
||||||
assert len(snapshot_files) == 1, "No snapshot file created or multiple files created"
|
assert len(snapshot_files) == 1, "No snapshot file created or multiple files created"
|
||||||
|
|
||||||
# Verify that the snapshot file is not empty
|
# Verify that the snapshot file is not empty
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue