mirror of
https://github.com/kirill-markin/repo-to-text.git
synced 2025-12-06 03:22:23 -08:00
Compare commits
100 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
77209f30aa | ||
|
|
8a94182b3d | ||
|
|
bcb0d82191 | ||
|
|
2807344752 | ||
|
|
3721ed45f0 | ||
|
|
de1c84eca3 | ||
|
|
0ace858645 | ||
|
|
44153cde98 | ||
|
|
b04dd8df63 | ||
|
|
14d2b3b36e | ||
|
|
689dd362ec | ||
|
|
57026bd52e | ||
|
|
241ce0ef70 | ||
|
|
d7badce9ae | ||
|
|
3731c01a20 | ||
|
|
7a60741471 | ||
|
|
5c5b0ab941 | ||
|
|
34aa48c0a1 | ||
|
|
e066b481af | ||
|
|
9431ff9d07 | ||
|
|
58b4b23eae | ||
|
|
d8977b8cf4 | ||
|
|
cde732c57d | ||
|
|
7c32b7a565 | ||
|
|
a9d54aa0ca | ||
|
|
1817078e46 | ||
|
|
4d99a1aa59 | ||
|
|
d124fa24cc | ||
|
|
ecfbed98ac | ||
|
|
2a08a70cf4 | ||
|
|
17c7bb76e8 | ||
|
|
62e6daf19c | ||
|
|
4eac47029f | ||
|
|
a364328e60 | ||
|
|
5f283feefd | ||
|
|
0cba3592f2 | ||
|
|
2a8f31cf18 | ||
|
|
6bcd21f40f | ||
|
|
fae0657b04 | ||
|
|
9567b8bb6d | ||
|
|
b2148b4b5b | ||
|
|
d89052115a | ||
|
|
6198b4f410 | ||
|
|
e39e7a8896 | ||
|
|
5e1ae59375 | ||
|
|
8ac3d0b727 | ||
|
|
577d42746d | ||
|
|
a7e906c294 | ||
|
|
dbfa602cd3 | ||
|
|
9b03465644 | ||
|
|
6a434e5174 | ||
|
|
38dac27984 | ||
|
|
4e86177cfd | ||
|
|
fd70f24eed | ||
|
|
46941115f1 | ||
|
|
d93a1d1fb0 | ||
|
|
bd0f242c08 | ||
|
|
2759fd3640 | ||
|
|
4642e3fd09 | ||
|
|
ce996c5db7 | ||
|
|
07521f06ca | ||
|
|
fd84b62395 | ||
|
|
6b63d5309f | ||
|
|
0c08bf54d7 | ||
|
|
602bd99e89 | ||
|
|
3e236d7508 | ||
|
|
d3998786c0 | ||
|
|
9847e1ff46 | ||
|
|
ad36a75a7a | ||
|
|
42326ae797 | ||
|
|
6e33556c4a | ||
|
|
1cf311bcf9 | ||
|
|
18b4b0bcd3 | ||
|
|
c8fa443421 | ||
|
|
0ff9f97d8a | ||
|
|
3513ae7165 | ||
|
|
6dd163f17e | ||
|
|
5f65e98a72 | ||
|
|
e1e6819fb9 | ||
|
|
8bde83f9f1 | ||
|
|
d75e2f9f72 | ||
|
|
d5cc239e64 | ||
|
|
57f2e65a68 | ||
|
|
3b5a3d8cd2 | ||
|
|
cfb04dfcd1 | ||
|
|
43beff4666 | ||
|
|
2d4349ed45 | ||
|
|
5f1061a493 | ||
|
|
ea765924ea | ||
|
|
03372e09df | ||
|
|
ca76a73454 | ||
|
|
cf25357505 | ||
|
|
bdc2f2be42 | ||
|
|
72ac64ceb6 | ||
|
|
b6bcdeca03 | ||
|
|
7921839f08 | ||
|
|
1d715cd18a | ||
|
|
34362b6b66 | ||
|
|
d220534dc7 | ||
|
|
61a7fb4188 |
30 changed files with 4104 additions and 662 deletions
49
.cursor/index.mdc
Normal file
49
.cursor/index.mdc
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
---
|
||||
alwaysApply: true
|
||||
---
|
||||
|
||||
# repo-to-text
|
||||
|
||||
## Project Overview
|
||||
`repo-to-text` is a command-line tool that converts a directory's structure and contents into a single text file.
|
||||
It generates a formatted XML representation that includes the directory tree and file contents, making it easy to share code with LLMs for development and debugging.
|
||||
|
||||
## Usage
|
||||
- Install: `pip install repo-to-text`
|
||||
- Run: `cd <your-repo-dir> && repo-to-text`
|
||||
- The result will be saved in the current directory as `repo-to-text_YYYY-MM-DD-HH-MM-SS-UTC.txt`
|
||||
|
||||
## Common Commands
|
||||
- `repo-to-text` - Process current directory
|
||||
- `repo-to-text /path/to/dir` - Process specific directory
|
||||
- `repo-to-text --output-dir /path/to/output` - Specify output directory
|
||||
- `repo-to-text --stdout > myfile.txt` - Output to stdout and redirect to file
|
||||
- `repo-to-text --create-settings` - Create a default settings file
|
||||
|
||||
## Output Format
|
||||
The tool generates an XML-structured output with:
|
||||
- Root `<repo-to-text>` tag
|
||||
- Directory structure in `<directory_structure>` tags
|
||||
- File contents in `<content full_path="...">` tags
|
||||
|
||||
## Configuration
|
||||
- Create `.repo-to-text-settings.yaml` at the root of your project
|
||||
- Use gitignore-style rules to specify what files to ignore
|
||||
- Configure what files to include in the tree and content sections
|
||||
|
||||
## Development
|
||||
- Python >= 3.6
|
||||
- Install dev dependencies: `pip install -e ".[dev]"`
|
||||
- Run tests: `pytest`
|
||||
|
||||
## Testing
|
||||
- Tests are located in the `tests/` directory
|
||||
- Main test files:
|
||||
- `tests/test_core.py` - Tests for core functionality
|
||||
- `tests/test_cli.py` - Tests for command-line interface
|
||||
- `tests/test_utils.py` - Tests for utility functions
|
||||
- Run all tests: `pytest`
|
||||
- Run specific test file: `pytest tests/test_core.py`
|
||||
- Run with coverage: `pytest --cov=repo_to_text`
|
||||
- Test temporary directories are created and cleaned up automatically
|
||||
- Binary file handling is tested with mock binary data
|
||||
1
.cursorignore
Normal file
1
.cursorignore
Normal file
|
|
@ -0,0 +1 @@
|
|||
examples/*
|
||||
36
.github/workflows/tests.yml
vendored
Normal file
36
.github/workflows/tests.yml
vendored
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
name: Run Tests
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ "main" ]
|
||||
pull_request:
|
||||
branches: [ "main" ]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.9", "3.11", "3.13"]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install tree command
|
||||
run: sudo apt-get install -y tree
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install .[dev]
|
||||
- name: Run pylint
|
||||
run: |
|
||||
pylint repo_to_text
|
||||
- name: Run tests
|
||||
run: |
|
||||
pytest tests/
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
|
|
@ -168,5 +168,5 @@ cython_debug/
|
|||
# Ignore egg-info directory
|
||||
repo_to_text.egg-info/
|
||||
|
||||
# Ignore generated repo_snapshot_*.txt files
|
||||
repo_snapshot_*.txt
|
||||
# Ignore generated repo-to-text_*.txt files
|
||||
repo-to-text_*.txt
|
||||
|
|
|
|||
25
.repo-to-text-settings.yaml
Normal file
25
.repo-to-text-settings.yaml
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
# Details: https://github.com/kirill-markin/repo-to-text
|
||||
# Syntax: gitignore rules
|
||||
|
||||
# Ignore files and directories for all sections from gitignore file
|
||||
# Default: True
|
||||
gitignore-import-and-ignore: True
|
||||
|
||||
# Ignore files and directories for tree
|
||||
# and contents sections (<content full_path="...">...</content>)
|
||||
ignore-tree-and-content:
|
||||
- ".repo-to-text-settings.yaml"
|
||||
- "examples/"
|
||||
- "MANIFEST.in"
|
||||
- "setup.py"
|
||||
|
||||
# Ignore files and directories for contents sections
|
||||
ignore-content:
|
||||
- "README.md"
|
||||
- "LICENSE"
|
||||
- "tests/"
|
||||
|
||||
# Optional: Maximum number of words per output file before splitting.
|
||||
# If not specified or null, no splitting based on word count will occur.
|
||||
# Must be a positive integer if set.
|
||||
# maximum_word_count_per_file: 10000
|
||||
1
AGENTS.md
Symbolic link
1
AGENTS.md
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
.cursor/index.mdc
|
||||
1
CLAUDE.md
Symbolic link
1
CLAUDE.md
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
.cursor/index.mdc
|
||||
33
Dockerfile
Normal file
33
Dockerfile
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
FROM python:3.12-slim
|
||||
|
||||
# Set environment variables
|
||||
ENV PYTHONUNBUFFERED=1 \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
PIP_NO_CACHE_DIR=1
|
||||
|
||||
# Create non-root user
|
||||
RUN useradd -m -s /bin/bash user
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
tree \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy all necessary files for package installation
|
||||
COPY pyproject.toml README.md ./
|
||||
|
||||
# Copy the package source
|
||||
COPY repo_to_text ./repo_to_text
|
||||
|
||||
# Install the package
|
||||
RUN pip install --no-cache-dir -e .
|
||||
|
||||
# Copy remaining files
|
||||
COPY . .
|
||||
|
||||
# Set default user
|
||||
USER user
|
||||
|
||||
ENTRYPOINT ["repo-to-text"]
|
||||
|
|
@ -1,3 +1,2 @@
|
|||
include README.md
|
||||
include LICENSE
|
||||
include requirements.txt
|
||||
include LICENSE
|
||||
233
README.md
233
README.md
|
|
@ -1,14 +1,48 @@
|
|||
# repo-to-text
|
||||
# Repository to Text Conversion: repo-to-text command
|
||||
|
||||
`repo-to-text` is an open-source project that converts the structure and contents of a directory (repository) into a single text file. By executing a simple command in the terminal, this tool generates a text representation of the directory, including the output of the `tree` command and the contents of each file, formatted for easy reading and sharing.
|
||||
`repo-to-text` converts a directory's structure and contents into a single text file. Run it from the terminal to generate a formatted text representation that includes the directory tree and file contents. This makes it easy to share code with LLMs for development and debugging.
|
||||
|
||||
## Features
|
||||
## Quick Start
|
||||
|
||||
- Generates a text representation of a directory's structure.
|
||||
- Includes the output of the `tree` command.
|
||||
- Saves the contents of each file, encapsulated in markdown code blocks.
|
||||
- Copies the generated text representation to the clipboard for easy sharing.
|
||||
- Easy to install and use via `pip` and Homebrew.
|
||||
1. `pip install repo-to-text` — install the package
|
||||
2. `cd <your-repo-dir>` — navigate to the repository directory
|
||||
3. `repo-to-text` — run the command, result will be saved in the current directory
|
||||
|
||||
## Example of Repository to Text Conversion
|
||||
|
||||

|
||||
|
||||
The generated text file will include the directory structure and contents of each file, using XML tags for better structure:
|
||||
|
||||
```xml
|
||||
<repo-to-text>
|
||||
Directory: myproject
|
||||
|
||||
Directory Structure:
|
||||
<directory_structure>
|
||||
.
|
||||
├── .gitignore
|
||||
├── README.md
|
||||
└── src
|
||||
└── main.py
|
||||
</directory_structure>
|
||||
|
||||
<content full_path="README.md">
|
||||
# My Project
|
||||
This is a simple project.
|
||||
</content>
|
||||
|
||||
<content full_path="src/main.py">
|
||||
def main():
|
||||
print("Hello, World!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
</content>
|
||||
|
||||
</repo-to-text>
|
||||
|
||||
For a full example, see the [example output for this repository](https://github.com/kirill-markin/repo-to-text/blob/main/examples/example_repo-to-text_2024-06-09-08-06-31-UTC.txt).
|
||||
|
||||
## Installation
|
||||
|
||||
|
|
@ -20,6 +54,12 @@ To install `repo-to-text` via pip, run the following command:
|
|||
pip install repo-to-text
|
||||
```
|
||||
|
||||
To upgrade to the latest version, use the following command:
|
||||
|
||||
```bash
|
||||
pip install --upgrade repo-to-text
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
After installation, you can use the `repo-to-text` command in your terminal. Navigate to the directory you want to convert and run:
|
||||
|
|
@ -28,7 +68,13 @@ After installation, you can use the `repo-to-text` command in your terminal. Nav
|
|||
repo-to-text
|
||||
```
|
||||
|
||||
This will create a file named `repo_snapshot_YYYY-MM-DD-HH-MM-SS-UTC.txt` in the current directory with the text representation of the repository. The contents of this file will also be copied to your clipboard for easy sharing.
|
||||
or
|
||||
|
||||
```bash
|
||||
flatten
|
||||
```
|
||||
|
||||
This will create a file named `repo-to-text_YYYY-MM-DD-HH-MM-SS-UTC.txt` in the current directory with the text representation of the repository. The contents of this file will also be copied to your clipboard for easy sharing.
|
||||
|
||||
### Options
|
||||
|
||||
|
|
@ -42,15 +88,154 @@ You can customize the behavior of `repo-to-text` with the following options:
|
|||
|
||||
This will save the file in the specified output directory instead of the current directory.
|
||||
|
||||
- `--create-settings` or `--init`: Create a default `.repo-to-text-settings.yaml` file with predefined settings. This is useful if you want to start with a template settings file and customize it according to your needs. To create the default settings file, run the following command in your terminal:
|
||||
|
||||
```bash
|
||||
repo-to-text --create-settings
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```bash
|
||||
repo-to-text --init
|
||||
```
|
||||
|
||||
This will create a file named `.repo-to-text-settings.yaml` in the current directory. If the file already exists, an error will be raised to prevent overwriting.
|
||||
|
||||
- `--debug`: Enable DEBUG logging. By default, `repo-to-text` runs with INFO logging level. To enable DEBUG logging, use the `--debug` flag:
|
||||
|
||||
```bash
|
||||
repo-to-text --debug
|
||||
```
|
||||
|
||||
## Example Output
|
||||
or to save the debug log to a file:
|
||||
|
||||
The generated text file will include the directory structure and contents of each file. For a full example, see the [example output for this repository](https://github.com/kirill-markin/repo-to-text/blob/main/examples/example_repo_snapshot_2024-06-08-11-35-28-UTC.txt).
|
||||
```bash
|
||||
repo-to-text --debug > debug_log.txt 2>&1
|
||||
```
|
||||
|
||||
- `input_dir`: Specify the directory to process. If not provided, the current directory (`.`) will be used. For example:
|
||||
|
||||
```bash
|
||||
repo-to-text /path/to/input_dir
|
||||
```
|
||||
|
||||
- `--stdout`: Output the generated text to stdout instead of a file. This is useful for piping the output to another command or saving it to a file using shell redirection. For example:
|
||||
|
||||
```bash
|
||||
repo-to-text --stdout > myfile.txt
|
||||
```
|
||||
|
||||
This will write the output directly to `myfile.txt` instead of creating a timestamped file.
|
||||
|
||||
## Docker Usage
|
||||
|
||||
### Building and Running
|
||||
|
||||
1. Build the container:
|
||||
|
||||
```bash
|
||||
docker compose build
|
||||
```
|
||||
|
||||
2. Start a shell session:
|
||||
|
||||
```bash
|
||||
docker compose run --rm repo-to-text
|
||||
```
|
||||
|
||||
Once in the shell, you can run `repo-to-text`:
|
||||
|
||||
- Process current directory:
|
||||
|
||||
```bash
|
||||
repo-to-text
|
||||
```
|
||||
|
||||
- Process specific directory:
|
||||
|
||||
```bash
|
||||
repo-to-text /home/user/myproject
|
||||
```
|
||||
|
||||
- Use with options:
|
||||
|
||||
```bash
|
||||
repo-to-text --output-dir /home/user/output
|
||||
```
|
||||
|
||||
The container mounts your home directory at `/home/user`, allowing access to all your projects.
|
||||
|
||||
## Settings
|
||||
|
||||
`repo-to-text` also supports configuration via a `.repo-to-text-settings.yaml` file. By default, the tool works without this file, but you can use it to customize what gets included in the final text file.
|
||||
|
||||
### Creating the Settings File
|
||||
|
||||
To create a settings file, add a file named `.repo-to-text-settings.yaml` at the root of your project with the following content:
|
||||
|
||||
```yaml
|
||||
# Syntax: gitignore rules
|
||||
|
||||
# Ignore files and directories for all sections from gitignore file
|
||||
# Default: True
|
||||
gitignore-import-and-ignore: True
|
||||
|
||||
# Ignore files and directories for tree
|
||||
# and contents sections (<content full_path="...">...</content>)
|
||||
ignore-tree-and-content:
|
||||
- ".repo-to-text-settings.yaml"
|
||||
- "examples/"
|
||||
- "MANIFEST.in"
|
||||
- "setup.py"
|
||||
|
||||
# Ignore files and directories for contents sections
|
||||
ignore-content:
|
||||
- "README.md"
|
||||
- "LICENSE"
|
||||
- "tests/"
|
||||
```
|
||||
|
||||
You can copy this file from the [existing example in the project](https://github.com/kirill-markin/repo-to-text/blob/main/.repo-to-text-settings.yaml) and adjust it to your needs. This file allows you to specify rules for what should be ignored when creating the text representation of the repository.
|
||||
|
||||
### Configuration Options
|
||||
|
||||
- **gitignore-import-and-ignore**: Ignore files and directories specified in `.gitignore` for all sections.
|
||||
- **ignore-tree-and-content**: Ignore files and directories for the tree and contents sections.
|
||||
- **ignore-content**: Ignore files and directories only for the contents sections.
|
||||
|
||||
Using these settings, you can control which files and directories are included or excluded from the final text file.
|
||||
- **maximum_word_count_per_file**: Optional integer. Sets a maximum word count for each output file. If the total content exceeds this limit, the output will be split into multiple files. The split files will be named using the convention `output_filename_part_N.txt`, where `N` is the part number.
|
||||
Example:
|
||||
```yaml
|
||||
# Optional: Maximum word count per output file.
|
||||
# If set, the output will be split into multiple files if the total word count exceeds this.
|
||||
# maximum_word_count_per_file: 10000
|
||||
```
|
||||
|
||||
### Wildcards and Inclusions
|
||||
|
||||
Using Wildcard Patterns
|
||||
|
||||
- `*.ext`: Matches any file ending with .ext in any directory.
|
||||
- `dir/*.ext`: Matches files ending with .ext in the specified directory dir/.
|
||||
- `**/*.ext`: Matches files ending with .ext in any subdirectory (recursive).
|
||||
|
||||
If you want to include certain files that would otherwise be ignored, use the ! pattern:
|
||||
|
||||
```yaml
|
||||
ignore-tree-and-content:
|
||||
- "*.txt"
|
||||
- "!README.txt"
|
||||
```
|
||||
|
||||
## gitignore Rule to Ignore generated files
|
||||
|
||||
To ignore the generated text files, add the following lines to your `.gitignore` file:
|
||||
|
||||
```gitignore
|
||||
repo-to-text_*.txt
|
||||
```
|
||||
|
||||
## Install Locally
|
||||
|
||||
|
|
@ -63,19 +248,31 @@ To install `repo-to-text` locally for development, follow these steps:
|
|||
cd repo-to-text
|
||||
```
|
||||
|
||||
2. Install the package locally:
|
||||
2. Install the package with development dependencies:
|
||||
|
||||
```bash
|
||||
pip install -e .
|
||||
pip install -e ".[dev]"
|
||||
```
|
||||
|
||||
### Installing Dependencies
|
||||
### Requirements
|
||||
|
||||
To install all the required dependencies, run the following command:
|
||||
- Python >= 3.6
|
||||
- Core dependencies:
|
||||
- setuptools >= 70.0.0
|
||||
- pathspec >= 0.12.1
|
||||
- argparse >= 1.4.0
|
||||
- PyYAML >= 6.0.1
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
### Development Dependencies
|
||||
|
||||
For development, additional packages are required:
|
||||
|
||||
- pytest >= 8.2.2
|
||||
- black
|
||||
- mypy
|
||||
- isort
|
||||
- build
|
||||
- twine
|
||||
|
||||
### Running Tests
|
||||
|
||||
|
|
|
|||
13
docker-compose.yaml
Normal file
13
docker-compose.yaml
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
services:
|
||||
repo-to-text:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
volumes:
|
||||
- ${HOME:-/home/user}:/home/user
|
||||
working_dir: /home/user
|
||||
environment:
|
||||
- HOME=/home/user
|
||||
user: "${UID:-1000}:${GID:-1000}"
|
||||
init: true
|
||||
entrypoint: ["/bin/bash"]
|
||||
689
examples/example_repo-to-text_2024-06-09-08-06-31-UTC.txt
Normal file
689
examples/example_repo-to-text_2024-06-09-08-06-31-UTC.txt
Normal file
|
|
@ -0,0 +1,689 @@
|
|||
<repo-to-text>
|
||||
Directory: repo-to-text
|
||||
|
||||
Directory Structure:
|
||||
<directory_structure>
|
||||
.
|
||||
├── .gitignore
|
||||
├── .cursorignore
|
||||
├── Dockerfile
|
||||
├── LICENSE
|
||||
├── README.md
|
||||
├── docker-compose.yaml
|
||||
├── pyproject.toml
|
||||
│ ├── repo_to_text/__init__.py
|
||||
│ ├── repo_to_text/cli
|
||||
│ │ ├── repo_to_text/cli/__init__.py
|
||||
│ │ └── repo_to_text/cli/cli.py
|
||||
│ ├── repo_to_text/core
|
||||
│ │ ├── repo_to_text/core/__init__.py
|
||||
│ │ └── repo_to_text/core/core.py
|
||||
│ ├── repo_to_text/main.py
|
||||
│ └── repo_to_text/utils
|
||||
│ ├── repo_to_text/utils/__init__.py
|
||||
│ └── repo_to_text/utils/utils.py
|
||||
├── tests/__init__.py
|
||||
├── tests/test_cli.py
|
||||
├── tests/test_core.py
|
||||
└── tests/test_utils.py
|
||||
</directory_structure>
|
||||
|
||||
<content full_path=".cursorignore">
|
||||
examples/*
|
||||
|
||||
</content>
|
||||
|
||||
<content full_path="docker-compose.yaml">
|
||||
services:
|
||||
repo-to-text:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
volumes:
|
||||
- ${HOME:-/home/user}:/home/user
|
||||
working_dir: /home/user
|
||||
environment:
|
||||
- HOME=/home/user
|
||||
user: "${UID:-1000}:${GID:-1000}"
|
||||
init: true
|
||||
entrypoint: ["/bin/bash"]
|
||||
|
||||
</content>
|
||||
|
||||
<content full_path="Dockerfile">
|
||||
FROM python:3.12-slim
|
||||
|
||||
# Set environment variables
|
||||
ENV PYTHONUNBUFFERED=1 \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
PIP_NO_CACHE_DIR=1
|
||||
|
||||
# Create non-root user
|
||||
RUN useradd -m -s /bin/bash user
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
tree \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy all necessary files for package installation
|
||||
COPY pyproject.toml README.md ./
|
||||
|
||||
# Copy the package source
|
||||
COPY repo_to_text ./repo_to_text
|
||||
|
||||
# Install the package
|
||||
RUN pip install --no-cache-dir -e .
|
||||
|
||||
# Copy remaining files
|
||||
COPY . .
|
||||
|
||||
# Set default user
|
||||
USER user
|
||||
|
||||
ENTRYPOINT ["repo-to-text"]
|
||||
|
||||
</content>
|
||||
|
||||
<content full_path="pyproject.toml">
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[project]
|
||||
name = "repo-to-text"
|
||||
version = "0.5.4"
|
||||
authors = [
|
||||
{ name = "Kirill Markin", email = "markinkirill@gmail.com" },
|
||||
]
|
||||
description = "Convert a directory structure and its contents into a single text file, including the tree output and file contents in markdown code blocks. It may be useful to chat with LLM about your code."
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.6"
|
||||
license = { text = "MIT" }
|
||||
classifiers = [
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
"Development Status :: 4 - Beta",
|
||||
]
|
||||
dependencies = [
|
||||
"setuptools>=70.0.0",
|
||||
"pathspec>=0.12.1",
|
||||
"argparse>=1.4.0",
|
||||
"PyYAML>=6.0.1",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/kirill-markin/repo-to-text"
|
||||
Repository = "https://github.com/kirill-markin/repo-to-text"
|
||||
|
||||
[project.scripts]
|
||||
repo-to-text = "repo_to_text.main:main"
|
||||
flatten = "repo_to_text.main:main"
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
"pytest>=8.2.2",
|
||||
"black",
|
||||
"mypy",
|
||||
"isort",
|
||||
"build",
|
||||
"twine",
|
||||
"pylint",
|
||||
]
|
||||
|
||||
[tool.pylint]
|
||||
disable = [
|
||||
"C0303",
|
||||
]
|
||||
|
||||
</content>
|
||||
|
||||
<content full_path="repo_to_text/__init__.py">
|
||||
"""This is the main package for the repo_to_text package."""
|
||||
|
||||
__author__ = 'Kirill Markin'
|
||||
__email__ = 'markinkirill@gmail.com'
|
||||
|
||||
</content>
|
||||
|
||||
<content full_path="repo_to_text/main.py">
|
||||
"""This is the main entry point for the repo_to_text package."""
|
||||
|
||||
from repo_to_text.cli.cli import main
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
</content>
|
||||
|
||||
<content full_path="repo_to_text/core/__init__.py">
|
||||
"""This module contains the core functionality of the repo_to_text package."""
|
||||
|
||||
from .core import get_tree_structure, load_ignore_specs, should_ignore_file, save_repo_to_text
|
||||
|
||||
__all__ = ['get_tree_structure', 'load_ignore_specs', 'should_ignore_file', 'save_repo_to_text']
|
||||
|
||||
</content>
|
||||
|
||||
<content full_path="repo_to_text/core/core.py">
|
||||
"""
|
||||
Core functionality for repo-to-text
|
||||
"""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
from typing import Tuple, Optional, List, Dict, Any, Set
|
||||
from datetime import datetime, timezone
|
||||
from importlib.machinery import ModuleSpec
|
||||
import logging
|
||||
import yaml
|
||||
import pathspec
|
||||
from pathspec import PathSpec
|
||||
|
||||
from ..utils.utils import check_tree_command, is_ignored_path
|
||||
|
||||
def get_tree_structure(
|
||||
path: str = '.',
|
||||
gitignore_spec: Optional[PathSpec] = None,
|
||||
tree_and_content_ignore_spec: Optional[PathSpec] = None
|
||||
) -> str:
|
||||
"""Generate tree structure of the directory."""
|
||||
if not check_tree_command():
|
||||
return ""
|
||||
|
||||
logging.debug('Generating tree structure for path: %s', path)
|
||||
tree_output = run_tree_command(path)
|
||||
logging.debug('Tree output generated:\n%s', tree_output)
|
||||
|
||||
if not gitignore_spec and not tree_and_content_ignore_spec:
|
||||
logging.debug('No .gitignore or ignore-tree-and-content specification found')
|
||||
return tree_output
|
||||
|
||||
logging.debug('Filtering tree output based on ignore specifications')
|
||||
return filter_tree_output(tree_output, path, gitignore_spec, tree_and_content_ignore_spec)
|
||||
|
||||
def run_tree_command(path: str) -> str:
|
||||
"""Run the tree command and return its output."""
|
||||
result = subprocess.run(
|
||||
['tree', '-a', '-f', '--noreport', path],
|
||||
stdout=subprocess.PIPE,
|
||||
check=True
|
||||
)
|
||||
return result.stdout.decode('utf-8')
|
||||
|
||||
def filter_tree_output(
|
||||
tree_output: str,
|
||||
path: str,
|
||||
gitignore_spec: Optional[PathSpec],
|
||||
tree_and_content_ignore_spec: Optional[PathSpec]
|
||||
) -> str:
|
||||
"""Filter the tree output based on ignore specifications."""
|
||||
lines: List[str] = tree_output.splitlines()
|
||||
non_empty_dirs: Set[str] = set()
|
||||
|
||||
filtered_lines = [
|
||||
process_line(line, path, gitignore_spec, tree_and_content_ignore_spec, non_empty_dirs)
|
||||
for line in lines
|
||||
]
|
||||
|
||||
filtered_tree_output = '\n'.join(filter(None, filtered_lines))
|
||||
logging.debug('Filtered tree structure:\n%s', filtered_tree_output)
|
||||
return filtered_tree_output
|
||||
|
||||
def process_line(
|
||||
line: str,
|
||||
path: str,
|
||||
gitignore_spec: Optional[PathSpec],
|
||||
tree_and_content_ignore_spec: Optional[PathSpec],
|
||||
non_empty_dirs: Set[str]
|
||||
) -> Optional[str]:
|
||||
"""Process a single line of the tree output."""
|
||||
full_path = extract_full_path(line, path)
|
||||
if not full_path or full_path == '.':
|
||||
return None
|
||||
|
||||
relative_path = os.path.relpath(full_path, path).replace(os.sep, '/')
|
||||
|
||||
if should_ignore_file(
|
||||
full_path,
|
||||
relative_path,
|
||||
gitignore_spec,
|
||||
None,
|
||||
tree_and_content_ignore_spec
|
||||
):
|
||||
logging.debug('Ignored: %s', relative_path)
|
||||
return None
|
||||
|
||||
if not os.path.isdir(full_path):
|
||||
mark_non_empty_dirs(relative_path, non_empty_dirs)
|
||||
|
||||
if not os.path.isdir(full_path) or os.path.dirname(relative_path) in non_empty_dirs:
|
||||
return line.replace('./', '', 1)
|
||||
return None
|
||||
|
||||
def extract_full_path(line: str, path: str) -> Optional[str]:
|
||||
"""Extract the full path from a line of tree output."""
|
||||
idx = line.find('./')
|
||||
if idx == -1:
|
||||
idx = line.find(path)
|
||||
return line[idx:].strip() if idx != -1 else None
|
||||
|
||||
def mark_non_empty_dirs(relative_path: str, non_empty_dirs: Set[str]) -> None:
|
||||
"""Mark all parent directories of a file as non-empty."""
|
||||
dir_path = os.path.dirname(relative_path)
|
||||
while dir_path:
|
||||
non_empty_dirs.add(dir_path)
|
||||
dir_path = os.path.dirname(dir_path)
|
||||
|
||||
def load_ignore_specs(
|
||||
path: str = '.',
|
||||
cli_ignore_patterns: Optional[List[str]] = None
|
||||
) -> Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]:
|
||||
"""Load ignore specifications from various sources.
|
||||
|
||||
Args:
|
||||
path: Base directory path
|
||||
cli_ignore_patterns: List of patterns from command line
|
||||
|
||||
Returns:
|
||||
Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]: Tuple of gitignore_spec,
|
||||
content_ignore_spec, and tree_and_content_ignore_spec
|
||||
"""
|
||||
gitignore_spec = None
|
||||
content_ignore_spec = None
|
||||
tree_and_content_ignore_list: List[str] = []
|
||||
use_gitignore = True
|
||||
|
||||
repo_settings_path = os.path.join(path, '.repo-to-text-settings.yaml')
|
||||
if os.path.exists(repo_settings_path):
|
||||
logging.debug('Loading .repo-to-text-settings.yaml from path: %s', repo_settings_path)
|
||||
with open(repo_settings_path, 'r', encoding='utf-8') as f:
|
||||
settings: Dict[str, Any] = yaml.safe_load(f)
|
||||
use_gitignore = settings.get('gitignore-import-and-ignore', True)
|
||||
if 'ignore-content' in settings:
|
||||
content_ignore_spec: Optional[PathSpec] = pathspec.PathSpec.from_lines(
|
||||
'gitwildmatch', settings['ignore-content']
|
||||
)
|
||||
if 'ignore-tree-and-content' in settings:
|
||||
tree_and_content_ignore_list.extend(settings.get('ignore-tree-and-content', []))
|
||||
|
||||
if cli_ignore_patterns:
|
||||
tree_and_content_ignore_list.extend(cli_ignore_patterns)
|
||||
|
||||
if use_gitignore:
|
||||
gitignore_path = os.path.join(path, '.gitignore')
|
||||
if os.path.exists(gitignore_path):
|
||||
logging.debug('Loading .gitignore from path: %s', gitignore_path)
|
||||
with open(gitignore_path, 'r', encoding='utf-8') as f:
|
||||
gitignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', f)
|
||||
|
||||
tree_and_content_ignore_spec = pathspec.PathSpec.from_lines(
|
||||
'gitwildmatch', tree_and_content_ignore_list
|
||||
)
|
||||
return gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec
|
||||
|
||||
def should_ignore_file(
|
||||
file_path: str,
|
||||
relative_path: str,
|
||||
gitignore_spec: Optional[PathSpec],
|
||||
content_ignore_spec: Optional[PathSpec],
|
||||
tree_and_content_ignore_spec: Optional[PathSpec]
|
||||
) -> bool:
|
||||
"""Check if a file should be ignored based on various ignore specifications.
|
||||
|
||||
Args:
|
||||
file_path: Full path to the file
|
||||
relative_path: Path relative to the repository root
|
||||
gitignore_spec: PathSpec object for gitignore patterns
|
||||
content_ignore_spec: PathSpec object for content ignore patterns
|
||||
tree_and_content_ignore_spec: PathSpec object for tree and content ignore patterns
|
||||
|
||||
Returns:
|
||||
bool: True if file should be ignored, False otherwise
|
||||
"""
|
||||
relative_path = relative_path.replace(os.sep, '/')
|
||||
|
||||
if relative_path.startswith('./'):
|
||||
relative_path = relative_path[2:]
|
||||
|
||||
if os.path.isdir(file_path):
|
||||
relative_path += '/'
|
||||
|
||||
result = (
|
||||
is_ignored_path(file_path) or
|
||||
bool(
|
||||
gitignore_spec and
|
||||
gitignore_spec.match_file(relative_path)
|
||||
) or
|
||||
bool(
|
||||
content_ignore_spec and
|
||||
content_ignore_spec.match_file(relative_path)
|
||||
) or
|
||||
bool(
|
||||
tree_and_content_ignore_spec and
|
||||
tree_and_content_ignore_spec.match_file(relative_path)
|
||||
) or
|
||||
os.path.basename(file_path).startswith('repo-to-text_')
|
||||
)
|
||||
|
||||
logging.debug('Checking if file should be ignored:')
|
||||
logging.debug(' file_path: %s', file_path)
|
||||
logging.debug(' relative_path: %s', relative_path)
|
||||
logging.debug(' Result: %s', result)
|
||||
return result
|
||||
|
||||
def save_repo_to_text(
|
||||
path: str = '.',
|
||||
output_dir: Optional[str] = None,
|
||||
to_stdout: bool = False,
|
||||
cli_ignore_patterns: Optional[List[str]] = None
|
||||
) -> str:
|
||||
"""Save repository structure and contents to a text file."""
|
||||
logging.debug('Starting to save repo structure to text for path: %s', path)
|
||||
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(
|
||||
path, cli_ignore_patterns
|
||||
)
|
||||
tree_structure: str = get_tree_structure(
|
||||
path, gitignore_spec, tree_and_content_ignore_spec
|
||||
)
|
||||
logging.debug('Final tree structure to be written: %s', tree_structure)
|
||||
|
||||
output_content = generate_output_content(
|
||||
path,
|
||||
tree_structure,
|
||||
gitignore_spec,
|
||||
content_ignore_spec,
|
||||
tree_and_content_ignore_spec
|
||||
)
|
||||
|
||||
if to_stdout:
|
||||
print(output_content)
|
||||
return output_content
|
||||
|
||||
output_file = write_output_to_file(output_content, output_dir)
|
||||
copy_to_clipboard(output_content)
|
||||
|
||||
print(
|
||||
"[SUCCESS] Repository structure and contents successfully saved to "
|
||||
f"file: \"./{output_file}\""
|
||||
)
|
||||
|
||||
return output_file
|
||||
|
||||
def generate_output_content(
|
||||
path: str,
|
||||
tree_structure: str,
|
||||
gitignore_spec: Optional[PathSpec],
|
||||
content_ignore_spec: Optional[PathSpec],
|
||||
tree_and_content_ignore_spec: Optional[PathSpec]
|
||||
) -> str:
|
||||
"""Generate the output content for the repository."""
|
||||
output_content: List[str] = []
|
||||
project_name = os.path.basename(os.path.abspath(path))
|
||||
|
||||
# Add XML opening tag
|
||||
output_content.append('<repo-to-text>\n')
|
||||
|
||||
output_content.append(f'Directory: {project_name}\n\n')
|
||||
output_content.append('Directory Structure:\n')
|
||||
output_content.append('<directory_structure>\n.\n')
|
||||
|
||||
if os.path.exists(os.path.join(path, '.gitignore')):
|
||||
output_content.append('├── .gitignore\n')
|
||||
|
||||
output_content.append(tree_structure + '\n' + '</directory_structure>\n')
|
||||
logging.debug('Tree structure written to output content')
|
||||
|
||||
for root, _, files in os.walk(path):
|
||||
for filename in files:
|
||||
file_path = os.path.join(root, filename)
|
||||
relative_path = os.path.relpath(file_path, path)
|
||||
|
||||
if should_ignore_file(
|
||||
file_path,
|
||||
relative_path,
|
||||
gitignore_spec,
|
||||
content_ignore_spec,
|
||||
tree_and_content_ignore_spec
|
||||
):
|
||||
continue
|
||||
|
||||
relative_path = relative_path.replace('./', '', 1)
|
||||
|
||||
try:
|
||||
# Try to open as text first
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
file_content = f.read()
|
||||
output_content.append(f'\n<content full_path="{relative_path}">\n')
|
||||
output_content.append(file_content)
|
||||
output_content.append('\n</content>\n')
|
||||
except UnicodeDecodeError:
|
||||
# Handle binary files with the same content tag format
|
||||
logging.debug('Handling binary file contents: %s', file_path)
|
||||
with open(file_path, 'rb') as f:
|
||||
binary_content = f.read()
|
||||
output_content.append(f'\n<content full_path="{relative_path}">\n')
|
||||
output_content.append(binary_content.decode('latin1'))
|
||||
output_content.append('\n</content>\n')
|
||||
|
||||
# Add XML closing tag
|
||||
output_content.append('\n</repo-to-text>\n')
|
||||
|
||||
logging.debug('Repository contents written to output content')
|
||||
|
||||
return ''.join(output_content)
|
||||
|
||||
def write_output_to_file(output_content: str, output_dir: Optional[str]) -> str:
|
||||
"""Write the output content to a file."""
|
||||
timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-UTC')
|
||||
output_file = f'repo-to-text_{timestamp}.txt'
|
||||
|
||||
if output_dir:
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
output_file = os.path.join(output_dir, output_file)
|
||||
|
||||
with open(output_file, 'w', encoding='utf-8') as file:
|
||||
file.write(output_content)
|
||||
|
||||
return output_file
|
||||
|
||||
def copy_to_clipboard(output_content: str) -> None:
|
||||
"""Copy the output content to the clipboard if possible."""
|
||||
try:
|
||||
import importlib.util # pylint: disable=import-outside-toplevel
|
||||
spec: Optional[ModuleSpec] = importlib.util.find_spec("pyperclip") # type: ignore
|
||||
if spec:
|
||||
import pyperclip # pylint: disable=import-outside-toplevel # type: ignore
|
||||
pyperclip.copy(output_content) # type: ignore
|
||||
logging.debug('Repository structure and contents copied to clipboard')
|
||||
else:
|
||||
print("Tip: Install 'pyperclip' package to enable automatic clipboard copying:")
|
||||
print(" pip install pyperclip")
|
||||
except ImportError as e:
|
||||
logging.warning(
|
||||
'Could not copy to clipboard. You might be running this '
|
||||
'script over SSH or without clipboard support.'
|
||||
)
|
||||
logging.debug('Clipboard copy error: %s', e)
|
||||
|
||||
</content>
|
||||
|
||||
<content full_path="repo_to_text/utils/__init__.py">
|
||||
"""This module contains utility functions for the repo_to_text package."""
|
||||
|
||||
from .utils import setup_logging, check_tree_command, is_ignored_path
|
||||
|
||||
__all__ = ['setup_logging', 'check_tree_command', 'is_ignored_path']
|
||||
|
||||
</content>
|
||||
|
||||
<content full_path="repo_to_text/utils/utils.py">
|
||||
"""This module contains utility functions for the repo_to_text package."""
|
||||
|
||||
import shutil
|
||||
import logging
|
||||
from typing import List
|
||||
|
||||
def setup_logging(debug: bool = False) -> None:
|
||||
"""Set up logging configuration.
|
||||
|
||||
Args:
|
||||
debug: If True, sets logging level to DEBUG, otherwise INFO
|
||||
"""
|
||||
logging_level = logging.DEBUG if debug else logging.INFO
|
||||
logging.basicConfig(level=logging_level, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
def check_tree_command() -> bool:
|
||||
"""Check if the `tree` command is available, and suggest installation if not.
|
||||
|
||||
Returns:
|
||||
bool: True if tree command is available, False otherwise
|
||||
"""
|
||||
if shutil.which('tree') is None:
|
||||
print(
|
||||
"The 'tree' command is not found. "
|
||||
+ "Please install it using one of the following commands:"
|
||||
)
|
||||
print("For Debian-based systems (e.g., Ubuntu): sudo apt-get install tree")
|
||||
print("For Red Hat-based systems (e.g., Fedora, CentOS): sudo yum install tree")
|
||||
return False
|
||||
return True
|
||||
|
||||
def is_ignored_path(file_path: str) -> bool:
|
||||
"""Check if a file path should be ignored based on predefined rules.
|
||||
|
||||
Args:
|
||||
file_path: Path to check
|
||||
|
||||
Returns:
|
||||
bool: True if path should be ignored, False otherwise
|
||||
"""
|
||||
ignored_dirs: List[str] = ['.git']
|
||||
ignored_files_prefix: List[str] = ['repo-to-text_']
|
||||
is_ignored_dir = any(ignored in file_path for ignored in ignored_dirs)
|
||||
is_ignored_file = any(file_path.startswith(prefix) for prefix in ignored_files_prefix)
|
||||
result = is_ignored_dir or is_ignored_file
|
||||
if result:
|
||||
logging.debug('Path ignored: %s', file_path)
|
||||
return result
|
||||
|
||||
</content>
|
||||
|
||||
<content full_path="repo_to_text/cli/__init__.py">
|
||||
"""This module contains the CLI interface for the repo_to_text package."""
|
||||
|
||||
from .cli import create_default_settings_file, parse_args, main
|
||||
|
||||
__all__ = ['create_default_settings_file', 'parse_args', 'main']
|
||||
|
||||
</content>
|
||||
|
||||
<content full_path="repo_to_text/cli/cli.py">
|
||||
"""
|
||||
CLI for repo-to-text
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import textwrap
|
||||
import os
|
||||
import logging
|
||||
import sys
|
||||
from typing import NoReturn
|
||||
|
||||
from ..utils.utils import setup_logging
|
||||
from ..core.core import save_repo_to_text
|
||||
|
||||
def create_default_settings_file() -> None:
|
||||
"""Create a default .repo-to-text-settings.yaml file."""
|
||||
settings_file = '.repo-to-text-settings.yaml'
|
||||
if os.path.exists(settings_file):
|
||||
raise FileExistsError(
|
||||
f"The settings file '{settings_file}' already exists. "
|
||||
"Please remove it or rename it if you want to create a new default settings file."
|
||||
)
|
||||
|
||||
default_settings = textwrap.dedent("""\
|
||||
# Details: https://github.com/kirill-markin/repo-to-text
|
||||
# Syntax: gitignore rules
|
||||
|
||||
# Ignore files and directories for all sections from gitignore file
|
||||
# Default: True
|
||||
gitignore-import-and-ignore: True
|
||||
|
||||
# Ignore files and directories for tree
|
||||
# and contents sections (<content full_path="...">...</content>)
|
||||
ignore-tree-and-content:
|
||||
- ".repo-to-text-settings.yaml"
|
||||
|
||||
# Ignore files and directories for contents sections
|
||||
ignore-content:
|
||||
- "README.md"
|
||||
- "LICENSE"
|
||||
- "package-lock.json"
|
||||
""")
|
||||
with open('.repo-to-text-settings.yaml', 'w', encoding='utf-8') as f:
|
||||
f.write(default_settings)
|
||||
print("Default .repo-to-text-settings.yaml created.")
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
"""Parse command line arguments.
|
||||
|
||||
Returns:
|
||||
argparse.Namespace: Parsed command line arguments
|
||||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Convert repository structure and contents to text'
|
||||
)
|
||||
parser.add_argument('input_dir', nargs='?', default='.', help='Directory to process')
|
||||
parser.add_argument('--debug', action='store_true', help='Enable debug logging')
|
||||
parser.add_argument('--output-dir', type=str, help='Directory to save the output file')
|
||||
parser.add_argument(
|
||||
'--create-settings',
|
||||
'--init',
|
||||
action='store_true',
|
||||
help='Create default .repo-to-text-settings.yaml file'
|
||||
)
|
||||
parser.add_argument('--stdout', action='store_true', help='Output to stdout instead of a file')
|
||||
parser.add_argument(
|
||||
'--ignore-patterns',
|
||||
nargs='*',
|
||||
help="List of files or directories to ignore in both tree and content sections. "
|
||||
"Supports wildcards (e.g., '*')."
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
def main() -> NoReturn:
|
||||
"""Main entry point for the CLI.
|
||||
|
||||
Raises:
|
||||
SystemExit: Always exits with code 0 on success
|
||||
"""
|
||||
args = parse_args()
|
||||
setup_logging(debug=args.debug)
|
||||
logging.debug('repo-to-text script started')
|
||||
|
||||
try:
|
||||
if args.create_settings:
|
||||
create_default_settings_file()
|
||||
logging.debug('.repo-to-text-settings.yaml file created')
|
||||
else:
|
||||
save_repo_to_text(
|
||||
path=args.input_dir,
|
||||
output_dir=args.output_dir,
|
||||
to_stdout=args.stdout,
|
||||
cli_ignore_patterns=args.ignore_patterns
|
||||
)
|
||||
|
||||
logging.debug('repo-to-text script finished')
|
||||
sys.exit(0)
|
||||
except (FileNotFoundError, FileExistsError, PermissionError, OSError) as e:
|
||||
logging.error('Error occurred: %s', str(e))
|
||||
sys.exit(1)
|
||||
|
||||
</content>
|
||||
|
||||
</repo-to-text>
|
||||
|
|
@ -1,420 +0,0 @@
|
|||
Directory: repo-to-text
|
||||
|
||||
Directory Structure:
|
||||
```
|
||||
.
|
||||
├── .gitignore
|
||||
├── LICENSE
|
||||
├── MANIFEST.in
|
||||
├── README.md
|
||||
├── repo_to_text
|
||||
│ ├── repo_to_text/__init__.py
|
||||
│ └── repo_to_text/main.py
|
||||
├── requirements.txt
|
||||
├── setup.py
|
||||
└── tests
|
||||
├── tests/__init__.py
|
||||
└── tests/test_main.py
|
||||
```
|
||||
|
||||
Contents of LICENSE:
|
||||
```
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2024 Kirill Markin
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
```
|
||||
|
||||
Contents of requirements.txt:
|
||||
```
|
||||
setuptools==70.0.0
|
||||
pathspec==0.12.1
|
||||
pytest==8.2.2
|
||||
argparse==1.4.0
|
||||
pyperclip==1.8.2
|
||||
|
||||
```
|
||||
|
||||
Contents of MANIFEST.in:
|
||||
```
|
||||
include README.md
|
||||
include LICENSE
|
||||
include requirements.txt
|
||||
|
||||
```
|
||||
|
||||
Contents of README.md:
|
||||
```
|
||||
# repo-to-text
|
||||
|
||||
`repo-to-text` is an open-source project that converts the structure and contents of a directory (repository) into a single text file. By executing a simple command in the terminal, this tool generates a text representation of the directory, including the output of the `tree` command and the contents of each file, formatted for easy reading and sharing.
|
||||
|
||||
## Features
|
||||
|
||||
- Generates a text representation of a directory's structure.
|
||||
- Includes the output of the `tree` command.
|
||||
- Saves the contents of each file, encapsulated in markdown code blocks.
|
||||
- Copies the generated text representation to the clipboard for easy sharing.
|
||||
- Easy to install and use via `pip` and Homebrew.
|
||||
|
||||
## Installation
|
||||
|
||||
### Using pip
|
||||
|
||||
To install `repo-to-text` via pip, run the following command:
|
||||
|
||||
```bash
|
||||
pip install repo-to-text
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
After installation, you can use the `repo-to-text` command in your terminal. Navigate to the directory you want to convert and run:
|
||||
|
||||
```bash
|
||||
repo-to-text
|
||||
```
|
||||
|
||||
This will create a file named `repo_snapshot_YYYY-MM-DD-HH-MM-SS-UTC.txt` in the current directory with the text representation of the repository. The contents of this file will also be copied to your clipboard for easy sharing.
|
||||
|
||||
### Options
|
||||
|
||||
You can customize the behavior of `repo-to-text` with the following options:
|
||||
|
||||
- `--output-dir <path>`: Specify an output directory where the generated text file will be saved. For example:
|
||||
|
||||
```bash
|
||||
repo-to-text --output-dir /path/to/output
|
||||
```
|
||||
|
||||
This will save the file in the specified output directory instead of the current directory.
|
||||
|
||||
- `--debug`: Enable DEBUG logging. By default, `repo-to-text` runs with INFO logging level. To enable DEBUG logging, use the `--debug` flag:
|
||||
|
||||
```bash
|
||||
repo-to-text --debug
|
||||
```
|
||||
|
||||
## Example Output
|
||||
|
||||
The generated text file will include the directory structure and contents of each file. For a full example, see the [example output for this repository](https://github.com/kirill-markin/repo-to-text/blob/db89dbfc9cfa3a8eb29dd14763bc477619a3cea4/examples/example_repo_snapshot_2024-06-08-10-30-33-UTC.txt).
|
||||
|
||||
## Install Locally
|
||||
|
||||
To install `repo-to-text` locally for development, follow these steps:
|
||||
|
||||
1. Clone the repository:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/kirill-markin/repo-to-text
|
||||
cd repo-to-text
|
||||
```
|
||||
|
||||
2. Install the package locally:
|
||||
|
||||
```bash
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
### Installing Dependencies
|
||||
|
||||
To install all the required dependencies, run the following command:
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### Running Tests
|
||||
|
||||
To run the tests, use the following command:
|
||||
|
||||
```bash
|
||||
pytest
|
||||
```
|
||||
|
||||
## Uninstall
|
||||
|
||||
To uninstall the package, run the following command from the directory where the repository is located:
|
||||
|
||||
```bash
|
||||
pip uninstall repo-to-text
|
||||
```
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions are welcome! If you have any suggestions or find a bug, please open an issue or submit a pull request.
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the MIT License - see the [LICENSE](https://github.com/kirill-markin/repo-to-text/blob/main/LICENSE) file for details.
|
||||
|
||||
## Contact
|
||||
|
||||
This project is maintained by [Kirill Markin](https://github.com/kirill-markin). For any inquiries or feedback, please contact [markinkirill@gmail.com](mailto:markinkirill@gmail.com).
|
||||
|
||||
```
|
||||
|
||||
Contents of setup.py:
|
||||
```
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
with open('requirements.txt') as f:
|
||||
required = f.read().splitlines()
|
||||
|
||||
setup(
|
||||
name='repo-to-text',
|
||||
version='0.1.1',
|
||||
author='Kirill Markin',
|
||||
author_email='markinkirill@gmail.com',
|
||||
description='Convert a directory structure and its contents into a single text file, including the tree output and file contents in markdown code blocks.',
|
||||
long_description=open('README.md').read(),
|
||||
long_description_content_type='text/markdown',
|
||||
url='https://github.com/kirill-markin/repo-to-text',
|
||||
license='MIT',
|
||||
packages=find_packages(),
|
||||
install_requires=required,
|
||||
include_package_data=True,
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'repo-to-text=repo_to_text.main:main',
|
||||
],
|
||||
},
|
||||
classifiers=[
|
||||
'Programming Language :: Python :: 3',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Operating System :: OS Independent',
|
||||
],
|
||||
python_requires='>=3.6',
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
Contents of tests/__init__.py:
|
||||
```
|
||||
|
||||
```
|
||||
|
||||
Contents of tests/test_main.py:
|
||||
```
|
||||
import os
|
||||
import subprocess
|
||||
import pytest
|
||||
import time
|
||||
|
||||
def test_repo_to_text():
|
||||
# Remove any existing snapshot files to avoid conflicts
|
||||
for file in os.listdir('.'):
|
||||
if file.startswith('repo_snapshot_') and file.endswith('.txt'):
|
||||
os.remove(file)
|
||||
|
||||
# Run the repo-to-text command
|
||||
result = subprocess.run(['repo-to-text'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
|
||||
# Assert that the command ran without errors
|
||||
assert result.returncode == 0, f"Command failed with error: {result.stderr.decode('utf-8')}"
|
||||
|
||||
# Check for the existence of the new snapshot file
|
||||
snapshot_files = [f for f in os.listdir('.') if f.startswith('repo_snapshot_') and f.endswith('.txt')]
|
||||
assert len(snapshot_files) == 1, "No snapshot file created or multiple files created"
|
||||
|
||||
# Verify that the snapshot file is not empty
|
||||
with open(snapshot_files[0], 'r') as f:
|
||||
content = f.read()
|
||||
assert len(content) > 0, "Snapshot file is empty"
|
||||
|
||||
# Clean up the generated snapshot file
|
||||
os.remove(snapshot_files[0])
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main()
|
||||
|
||||
```
|
||||
|
||||
Contents of repo_to_text/__init__.py:
|
||||
```
|
||||
__author__ = 'Kirill Markin'
|
||||
__email__ = 'markinkirill@gmail.com'
|
||||
|
||||
```
|
||||
|
||||
Contents of repo_to_text/main.py:
|
||||
```
|
||||
import os
|
||||
import subprocess
|
||||
import pathspec
|
||||
import logging
|
||||
import argparse
|
||||
from datetime import datetime
|
||||
import pyperclip
|
||||
|
||||
def setup_logging(debug=False):
|
||||
logging_level = logging.DEBUG if debug else logging.INFO
|
||||
logging.basicConfig(level=logging_level, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
def get_tree_structure(path='.', gitignore_spec=None) -> str:
|
||||
logging.debug(f'Generating tree structure for path: {path}')
|
||||
result = subprocess.run(['tree', '-a', '-f', '--noreport', path], stdout=subprocess.PIPE)
|
||||
tree_output = result.stdout.decode('utf-8')
|
||||
logging.debug(f'Tree output generated: {tree_output}')
|
||||
|
||||
if not gitignore_spec:
|
||||
logging.debug('No .gitignore specification found')
|
||||
return tree_output
|
||||
|
||||
logging.debug('Filtering tree output based on .gitignore specification')
|
||||
filtered_lines = []
|
||||
for line in tree_output.splitlines():
|
||||
parts = line.strip().split()
|
||||
if parts:
|
||||
full_path = parts[-1]
|
||||
relative_path = os.path.relpath(full_path, path)
|
||||
if not gitignore_spec.match_file(relative_path) and not is_ignored_path(relative_path):
|
||||
filtered_lines.append(line.replace('./', '', 1))
|
||||
|
||||
logging.debug('Tree structure filtering complete')
|
||||
return '\n'.join(filtered_lines)
|
||||
|
||||
def load_gitignore(path='.'):
|
||||
gitignore_path = os.path.join(path, '.gitignore')
|
||||
if os.path.exists(gitignore_path):
|
||||
logging.debug(f'Loading .gitignore from path: {gitignore_path}')
|
||||
with open(gitignore_path, 'r') as f:
|
||||
return pathspec.PathSpec.from_lines('gitwildmatch', f)
|
||||
logging.debug('.gitignore not found')
|
||||
return None
|
||||
|
||||
def is_ignored_path(file_path: str) -> bool:
|
||||
ignored_dirs = ['.git']
|
||||
ignored_files_prefix = ['repo_snapshot_']
|
||||
is_ignored_dir = any(ignored in file_path for ignored in ignored_dirs)
|
||||
is_ignored_file = any(file_path.startswith(prefix) for prefix in ignored_files_prefix)
|
||||
result = is_ignored_dir or is_ignored_file
|
||||
if result:
|
||||
logging.debug(f'Path ignored: {file_path}')
|
||||
return result
|
||||
|
||||
def remove_empty_dirs(tree_output: str, path='.') -> str:
|
||||
logging.debug('Removing empty directories from tree output')
|
||||
lines = tree_output.splitlines()
|
||||
non_empty_dirs = set()
|
||||
filtered_lines = []
|
||||
|
||||
for line in lines:
|
||||
parts = line.strip().split()
|
||||
if parts:
|
||||
full_path = parts[-1]
|
||||
if os.path.isdir(full_path) and not any(os.path.isfile(os.path.join(full_path, f)) for f in os.listdir(full_path)):
|
||||
logging.debug(f'Directory is empty and will be removed: {full_path}')
|
||||
continue
|
||||
non_empty_dirs.add(os.path.dirname(full_path))
|
||||
filtered_lines.append(line)
|
||||
|
||||
final_lines = []
|
||||
for line in filtered_lines:
|
||||
parts = line.strip().split()
|
||||
if parts:
|
||||
full_path = parts[-1]
|
||||
if os.path.isdir(full_path) and full_path not in non_empty_dirs:
|
||||
logging.debug(f'Directory is empty and will be removed: {full_path}')
|
||||
continue
|
||||
final_lines.append(line)
|
||||
|
||||
logging.debug('Empty directory removal complete')
|
||||
return '\n'.join(final_lines)
|
||||
|
||||
def save_repo_to_text(path='.', output_dir=None) -> str:
|
||||
logging.debug(f'Starting to save repo structure to text for path: {path}')
|
||||
gitignore_spec = load_gitignore(path)
|
||||
tree_structure = get_tree_structure(path, gitignore_spec)
|
||||
tree_structure = remove_empty_dirs(tree_structure, path)
|
||||
|
||||
# Add timestamp to the output file name with a descriptive name
|
||||
timestamp = datetime.utcnow().strftime('%Y-%m-%d-%H-%M-%S-UTC')
|
||||
output_file = f'repo_snapshot_{timestamp}.txt'
|
||||
|
||||
# Determine the full path to the output file
|
||||
if output_dir:
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
output_file = os.path.join(output_dir, output_file)
|
||||
|
||||
with open(output_file, 'w') as file:
|
||||
project_name = os.path.basename(os.path.abspath(path))
|
||||
file.write(f'Directory: {project_name}\n\n')
|
||||
file.write('Directory Structure:\n')
|
||||
file.write('```\n.\n')
|
||||
|
||||
# Insert .gitignore if it exists
|
||||
if os.path.exists(os.path.join(path, '.gitignore')):
|
||||
file.write('├── .gitignore\n')
|
||||
|
||||
file.write(tree_structure + '\n' + '```\n')
|
||||
logging.debug('Tree structure written to file')
|
||||
|
||||
for root, _, files in os.walk(path):
|
||||
for filename in files:
|
||||
file_path = os.path.join(root, filename)
|
||||
relative_path = os.path.relpath(file_path, path)
|
||||
|
||||
if is_ignored_path(file_path) or (gitignore_spec and gitignore_spec.match_file(relative_path)):
|
||||
continue
|
||||
|
||||
relative_path = relative_path.replace('./', '', 1)
|
||||
|
||||
file.write(f'\nContents of {relative_path}:\n')
|
||||
file.write('```\n')
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
file.write(f.read())
|
||||
except UnicodeDecodeError:
|
||||
logging.error(f'Could not decode file contents: {file_path}')
|
||||
file.write('[Could not decode file contents]\n')
|
||||
file.write('\n```\n')
|
||||
|
||||
file.write('\n')
|
||||
logging.debug('Repository contents written to file')
|
||||
|
||||
# Read the contents of the generated file
|
||||
with open(output_file, 'r') as file:
|
||||
repo_text = file.read()
|
||||
|
||||
# Copy the contents to the clipboard
|
||||
pyperclip.copy(repo_text)
|
||||
logging.debug('Repository structure and contents copied to clipboard')
|
||||
|
||||
return output_file
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Convert repository structure and contents to text')
|
||||
parser.add_argument('--debug', action='store_true', help='Enable debug logging')
|
||||
parser.add_argument('--output-dir', type=str, help='Directory to save the output file')
|
||||
args = parser.parse_args()
|
||||
|
||||
setup_logging(debug=args.debug)
|
||||
logging.debug('repo-to-text script started')
|
||||
save_repo_to_text(output_dir=args.output_dir)
|
||||
logging.debug('repo-to-text script finished')
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
```
|
||||
|
||||
BIN
examples/screenshot-demo.jpg
Normal file
BIN
examples/screenshot-demo.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 175 KiB |
1296
poetry.lock
generated
Normal file
1296
poetry.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
54
pyproject.toml
Normal file
54
pyproject.toml
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[project]
|
||||
name = "repo-to-text"
|
||||
version = "0.8.0"
|
||||
authors = [
|
||||
{ name = "Kirill Markin", email = "markinkirill@gmail.com" },
|
||||
]
|
||||
description = "Convert a directory structure and its contents into a single text file, including the tree output and file contents in structured XML format. It may be useful to chat with LLM about your code."
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.9"
|
||||
license = { text = "MIT" }
|
||||
classifiers = [
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
"Development Status :: 4 - Beta",
|
||||
]
|
||||
dependencies = [
|
||||
"setuptools>=70.0.0",
|
||||
"pathspec>=0.12.1",
|
||||
"argparse>=1.4.0",
|
||||
"PyYAML>=6.0.1",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/kirill-markin/repo-to-text"
|
||||
Repository = "https://github.com/kirill-markin/repo-to-text"
|
||||
|
||||
[project.scripts]
|
||||
repo-to-text = "repo_to_text.main:main"
|
||||
flatten = "repo_to_text.main:main"
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
"pytest>=8.2.2",
|
||||
"black",
|
||||
"mypy",
|
||||
"isort",
|
||||
"build",
|
||||
"twine",
|
||||
"pylint",
|
||||
]
|
||||
|
||||
[tool.pylint]
|
||||
disable = [
|
||||
"C0303",
|
||||
]
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,2 +1,4 @@
|
|||
"""This is the main package for the repo_to_text package."""
|
||||
|
||||
__author__ = 'Kirill Markin'
|
||||
__email__ = 'markinkirill@gmail.com'
|
||||
|
|
|
|||
5
repo_to_text/cli/__init__.py
Normal file
5
repo_to_text/cli/__init__.py
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
"""This module contains the CLI interface for the repo_to_text package."""
|
||||
|
||||
from .cli import create_default_settings_file, parse_args, main
|
||||
|
||||
__all__ = ['create_default_settings_file', 'parse_args', 'main']
|
||||
105
repo_to_text/cli/cli.py
Normal file
105
repo_to_text/cli/cli.py
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
"""
|
||||
CLI for repo-to-text
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import textwrap
|
||||
import os
|
||||
import logging
|
||||
import sys
|
||||
from typing import NoReturn
|
||||
|
||||
from ..utils.utils import setup_logging
|
||||
from ..core.core import save_repo_to_text
|
||||
|
||||
def create_default_settings_file() -> None:
|
||||
"""Create a default .repo-to-text-settings.yaml file."""
|
||||
settings_file = '.repo-to-text-settings.yaml'
|
||||
if os.path.exists(settings_file):
|
||||
raise FileExistsError(
|
||||
f"The settings file '{settings_file}' already exists. "
|
||||
"Please remove it or rename it if you want to create a new default settings file."
|
||||
)
|
||||
|
||||
default_settings = textwrap.dedent("""\
|
||||
# Details: https://github.com/kirill-markin/repo-to-text
|
||||
# Syntax: gitignore rules
|
||||
|
||||
# Ignore files and directories for all sections from gitignore file
|
||||
# Default: True
|
||||
gitignore-import-and-ignore: True
|
||||
|
||||
# Ignore files and directories for tree
|
||||
# and contents sections (<content full_path="...">...</content>)
|
||||
ignore-tree-and-content:
|
||||
- ".repo-to-text-settings.yaml"
|
||||
|
||||
# Ignore files and directories for contents sections
|
||||
ignore-content:
|
||||
- "README.md"
|
||||
- "LICENSE"
|
||||
- "package-lock.json"
|
||||
|
||||
# Optional: Maximum number of words per output file before splitting.
|
||||
# If not specified or null, no splitting based on word count will occur.
|
||||
# Must be a positive integer if set.
|
||||
# maximum_word_count_per_file: 10000
|
||||
""")
|
||||
with open('.repo-to-text-settings.yaml', 'w', encoding='utf-8') as f:
|
||||
f.write(default_settings)
|
||||
print("Default .repo-to-text-settings.yaml created.")
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
"""Parse command line arguments.
|
||||
|
||||
Returns:
|
||||
argparse.Namespace: Parsed command line arguments
|
||||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Convert repository structure and contents to text'
|
||||
)
|
||||
parser.add_argument('input_dir', nargs='?', default='.', help='Directory to process')
|
||||
parser.add_argument('--debug', action='store_true', help='Enable debug logging')
|
||||
parser.add_argument('--output-dir', type=str, help='Directory to save the output file')
|
||||
parser.add_argument(
|
||||
'--create-settings',
|
||||
'--init',
|
||||
action='store_true',
|
||||
help='Create default .repo-to-text-settings.yaml file'
|
||||
)
|
||||
parser.add_argument('--stdout', action='store_true', help='Output to stdout instead of a file')
|
||||
parser.add_argument(
|
||||
'--ignore-patterns',
|
||||
nargs='*',
|
||||
help="List of files or directories to ignore in both tree and content sections. "
|
||||
"Supports wildcards (e.g., '*')."
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
def main() -> NoReturn:
|
||||
"""Main entry point for the CLI.
|
||||
|
||||
Raises:
|
||||
SystemExit: Always exits with code 0 on success
|
||||
"""
|
||||
args = parse_args()
|
||||
setup_logging(debug=args.debug)
|
||||
logging.debug('repo-to-text script started')
|
||||
|
||||
try:
|
||||
if args.create_settings:
|
||||
create_default_settings_file()
|
||||
logging.debug('.repo-to-text-settings.yaml file created')
|
||||
else:
|
||||
save_repo_to_text(
|
||||
path=args.input_dir,
|
||||
output_dir=args.output_dir,
|
||||
to_stdout=args.stdout,
|
||||
cli_ignore_patterns=args.ignore_patterns
|
||||
)
|
||||
|
||||
logging.debug('repo-to-text script finished')
|
||||
sys.exit(0)
|
||||
except (FileNotFoundError, FileExistsError, PermissionError, OSError) as e:
|
||||
logging.error('Error occurred: %s', str(e))
|
||||
sys.exit(1)
|
||||
5
repo_to_text/core/__init__.py
Normal file
5
repo_to_text/core/__init__.py
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
"""This module contains the core functionality of the repo_to_text package."""
|
||||
|
||||
from .core import get_tree_structure, load_ignore_specs, should_ignore_file, save_repo_to_text
|
||||
|
||||
__all__ = ['get_tree_structure', 'load_ignore_specs', 'should_ignore_file', 'save_repo_to_text']
|
||||
507
repo_to_text/core/core.py
Normal file
507
repo_to_text/core/core.py
Normal file
|
|
@ -0,0 +1,507 @@
|
|||
"""
|
||||
Core functionality for repo-to-text
|
||||
"""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import platform
|
||||
from typing import Tuple, Optional, List, Dict, Any, Set
|
||||
from datetime import datetime, timezone
|
||||
from importlib.machinery import ModuleSpec
|
||||
import logging
|
||||
import yaml # type: ignore
|
||||
import pathspec
|
||||
from pathspec import PathSpec
|
||||
|
||||
from ..utils.utils import check_tree_command, is_ignored_path
|
||||
|
||||
def get_tree_structure(
|
||||
path: str = '.',
|
||||
gitignore_spec: Optional[PathSpec] = None,
|
||||
tree_and_content_ignore_spec: Optional[PathSpec] = None
|
||||
) -> str:
|
||||
"""Generate tree structure of the directory."""
|
||||
if not check_tree_command():
|
||||
return ""
|
||||
|
||||
logging.debug('Generating tree structure for path: %s', path)
|
||||
tree_output = run_tree_command(path)
|
||||
logging.debug('Tree output generated:\n%s', tree_output)
|
||||
|
||||
if not gitignore_spec and not tree_and_content_ignore_spec:
|
||||
logging.debug('No .gitignore or ignore-tree-and-content specification found')
|
||||
return tree_output
|
||||
|
||||
logging.debug('Filtering tree output based on ignore specifications')
|
||||
return filter_tree_output(tree_output, path, gitignore_spec, tree_and_content_ignore_spec)
|
||||
|
||||
def run_tree_command(path: str) -> str:
|
||||
"""Run the tree command and return its output."""
|
||||
if platform.system() == "Windows":
|
||||
cmd = ["cmd", "/c", "tree", "/a", "/f", path]
|
||||
else:
|
||||
cmd = ["tree", "-a", "-f", "--noreport", path]
|
||||
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
encoding='utf-8',
|
||||
check=True
|
||||
)
|
||||
return result.stdout
|
||||
|
||||
def filter_tree_output(
|
||||
tree_output: str,
|
||||
path: str,
|
||||
gitignore_spec: Optional[PathSpec],
|
||||
tree_and_content_ignore_spec: Optional[PathSpec]
|
||||
) -> str:
|
||||
"""Filter the tree output based on ignore specifications."""
|
||||
lines: List[str] = tree_output.splitlines()
|
||||
non_empty_dirs: Set[str] = set()
|
||||
|
||||
filtered_lines = [
|
||||
process_line(line, path, gitignore_spec, tree_and_content_ignore_spec, non_empty_dirs)
|
||||
for line in lines
|
||||
]
|
||||
|
||||
filtered_tree_output = '\n'.join(filter(None, filtered_lines))
|
||||
logging.debug('Filtered tree structure:\n%s', filtered_tree_output)
|
||||
return filtered_tree_output
|
||||
|
||||
def process_line(
|
||||
line: str,
|
||||
path: str,
|
||||
gitignore_spec: Optional[PathSpec],
|
||||
tree_and_content_ignore_spec: Optional[PathSpec],
|
||||
non_empty_dirs: Set[str]
|
||||
) -> Optional[str]:
|
||||
"""Process a single line of the tree output."""
|
||||
full_path = extract_full_path(line, path)
|
||||
if not full_path or full_path == '.':
|
||||
return None
|
||||
|
||||
try:
|
||||
relative_path = os.path.relpath(full_path, path).replace(os.sep, '/')
|
||||
except (ValueError, OSError) as e:
|
||||
# Handle case where relpath fails (e.g., in CI when cwd is unavailable)
|
||||
# Use absolute path conversion as fallback
|
||||
logging.debug('os.path.relpath failed for %s, using fallback: %s', full_path, e)
|
||||
if os.path.isabs(full_path) and os.path.isabs(path):
|
||||
# Both are absolute, try manual relative calculation
|
||||
try:
|
||||
common = os.path.commonpath([full_path, path])
|
||||
relative_path = os.path.relpath(full_path, common).replace(os.sep, '/')
|
||||
except (ValueError, OSError):
|
||||
# Last resort: use just the filename
|
||||
relative_path = os.path.basename(full_path)
|
||||
else:
|
||||
relative_path = os.path.basename(full_path)
|
||||
|
||||
if should_ignore_file(
|
||||
full_path,
|
||||
relative_path,
|
||||
gitignore_spec,
|
||||
None,
|
||||
tree_and_content_ignore_spec
|
||||
):
|
||||
logging.debug('Ignored: %s', relative_path)
|
||||
return None
|
||||
|
||||
if not os.path.isdir(full_path):
|
||||
mark_non_empty_dirs(relative_path, non_empty_dirs)
|
||||
|
||||
if not os.path.isdir(full_path) or os.path.dirname(relative_path) in non_empty_dirs:
|
||||
return line.replace('./', '', 1)
|
||||
return None
|
||||
|
||||
def extract_full_path(line: str, path: str) -> Optional[str]:
|
||||
"""Extract the full path from a line of tree output."""
|
||||
idx = line.find('./')
|
||||
if idx == -1:
|
||||
idx = line.find(path)
|
||||
return line[idx:].strip() if idx != -1 else None
|
||||
|
||||
def mark_non_empty_dirs(relative_path: str, non_empty_dirs: Set[str]) -> None:
|
||||
"""Mark all parent directories of a file as non-empty."""
|
||||
dir_path = os.path.dirname(relative_path)
|
||||
while dir_path:
|
||||
non_empty_dirs.add(dir_path)
|
||||
dir_path = os.path.dirname(dir_path)
|
||||
|
||||
def load_ignore_specs(
|
||||
path: str = '.',
|
||||
cli_ignore_patterns: Optional[List[str]] = None
|
||||
) -> Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]:
|
||||
"""Load ignore specifications from various sources.
|
||||
|
||||
Args:
|
||||
path: Base directory path
|
||||
cli_ignore_patterns: List of patterns from command line
|
||||
|
||||
Returns:
|
||||
Tuple[Optional[PathSpec], Optional[PathSpec], PathSpec]: Tuple of gitignore_spec,
|
||||
content_ignore_spec, and tree_and_content_ignore_spec
|
||||
"""
|
||||
gitignore_spec = None
|
||||
content_ignore_spec = None
|
||||
tree_and_content_ignore_list: List[str] = []
|
||||
use_gitignore = True
|
||||
|
||||
repo_settings_path = os.path.join(path, '.repo-to-text-settings.yaml')
|
||||
if os.path.exists(repo_settings_path):
|
||||
logging.debug(
|
||||
'Loading .repo-to-text-settings.yaml for ignore specs from path: %s',
|
||||
repo_settings_path
|
||||
)
|
||||
with open(repo_settings_path, 'r', encoding='utf-8') as f:
|
||||
settings: Dict[str, Any] = yaml.safe_load(f)
|
||||
use_gitignore = settings.get('gitignore-import-and-ignore', True)
|
||||
if 'ignore-content' in settings:
|
||||
content_ignore_spec = pathspec.PathSpec.from_lines(
|
||||
'gitwildmatch', settings['ignore-content']
|
||||
)
|
||||
if 'ignore-tree-and-content' in settings:
|
||||
tree_and_content_ignore_list.extend(
|
||||
settings.get('ignore-tree-and-content', [])
|
||||
)
|
||||
|
||||
if cli_ignore_patterns:
|
||||
tree_and_content_ignore_list.extend(cli_ignore_patterns)
|
||||
|
||||
if use_gitignore:
|
||||
gitignore_path = os.path.join(path, '.gitignore')
|
||||
if os.path.exists(gitignore_path):
|
||||
logging.debug('Loading .gitignore from path: %s', gitignore_path)
|
||||
with open(gitignore_path, 'r', encoding='utf-8') as f:
|
||||
gitignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', f)
|
||||
|
||||
tree_and_content_ignore_spec = pathspec.PathSpec.from_lines(
|
||||
'gitwildmatch', tree_and_content_ignore_list
|
||||
)
|
||||
return gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec
|
||||
|
||||
def load_additional_specs(path: str = '.') -> Dict[str, Any]:
|
||||
"""Load additional specifications from the settings file."""
|
||||
additional_specs: Dict[str, Any] = {
|
||||
'maximum_word_count_per_file': None
|
||||
}
|
||||
repo_settings_path = os.path.join(path, '.repo-to-text-settings.yaml')
|
||||
if os.path.exists(repo_settings_path):
|
||||
logging.debug(
|
||||
'Loading .repo-to-text-settings.yaml for additional specs from path: %s',
|
||||
repo_settings_path
|
||||
)
|
||||
with open(repo_settings_path, 'r', encoding='utf-8') as f:
|
||||
settings: Dict[str, Any] = yaml.safe_load(f)
|
||||
if 'maximum_word_count_per_file' in settings:
|
||||
max_words = settings['maximum_word_count_per_file']
|
||||
if isinstance(max_words, int) and max_words > 0:
|
||||
additional_specs['maximum_word_count_per_file'] = max_words
|
||||
elif max_words is not None: # Allow null/None to mean "not set"
|
||||
logging.warning(
|
||||
"Invalid value for 'maximum_word_count_per_file': %s. "
|
||||
"It must be a positive integer or null. Ignoring.", max_words
|
||||
)
|
||||
return additional_specs
|
||||
|
||||
def should_ignore_file(
|
||||
file_path: str,
|
||||
relative_path: str,
|
||||
gitignore_spec: Optional[PathSpec],
|
||||
content_ignore_spec: Optional[PathSpec],
|
||||
tree_and_content_ignore_spec: Optional[PathSpec]
|
||||
) -> bool:
|
||||
"""Check if a file should be ignored based on various ignore specifications.
|
||||
|
||||
Args:
|
||||
file_path: Full path to the file
|
||||
relative_path: Path relative to the repository root
|
||||
gitignore_spec: PathSpec object for gitignore patterns
|
||||
content_ignore_spec: PathSpec object for content ignore patterns
|
||||
tree_and_content_ignore_spec: PathSpec object for tree and content ignore patterns
|
||||
|
||||
Returns:
|
||||
bool: True if file should be ignored, False otherwise
|
||||
"""
|
||||
relative_path = relative_path.replace(os.sep, '/')
|
||||
|
||||
if relative_path.startswith('./'):
|
||||
relative_path = relative_path[2:]
|
||||
|
||||
if os.path.isdir(file_path):
|
||||
relative_path += '/'
|
||||
|
||||
result = (
|
||||
is_ignored_path(file_path) or
|
||||
bool(
|
||||
gitignore_spec and
|
||||
gitignore_spec.match_file(relative_path)
|
||||
) or
|
||||
bool(
|
||||
content_ignore_spec and
|
||||
content_ignore_spec.match_file(relative_path)
|
||||
) or
|
||||
bool(
|
||||
tree_and_content_ignore_spec and
|
||||
tree_and_content_ignore_spec.match_file(relative_path)
|
||||
) or
|
||||
os.path.basename(file_path).startswith('repo-to-text_')
|
||||
)
|
||||
|
||||
logging.debug('Checking if file should be ignored:')
|
||||
logging.debug(' file_path: %s', file_path)
|
||||
logging.debug(' relative_path: %s', relative_path)
|
||||
logging.debug(' Result: %s', result)
|
||||
return result
|
||||
|
||||
def save_repo_to_text(
|
||||
path: str = '.',
|
||||
output_dir: Optional[str] = None,
|
||||
to_stdout: bool = False,
|
||||
cli_ignore_patterns: Optional[List[str]] = None
|
||||
) -> str:
|
||||
"""Save repository structure and contents to a text file or multiple files."""
|
||||
# pylint: disable=too-many-locals
|
||||
logging.debug('Starting to save repo structure to text for path: %s', path)
|
||||
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = (
|
||||
load_ignore_specs(path, cli_ignore_patterns)
|
||||
)
|
||||
additional_specs = load_additional_specs(path)
|
||||
maximum_word_count_per_file = additional_specs.get(
|
||||
'maximum_word_count_per_file'
|
||||
)
|
||||
|
||||
tree_structure: str = get_tree_structure(
|
||||
path, gitignore_spec, tree_and_content_ignore_spec
|
||||
)
|
||||
logging.debug('Final tree structure to be written: %s', tree_structure)
|
||||
|
||||
output_content_segments = generate_output_content(
|
||||
path,
|
||||
tree_structure,
|
||||
gitignore_spec,
|
||||
content_ignore_spec,
|
||||
tree_and_content_ignore_spec,
|
||||
maximum_word_count_per_file
|
||||
)
|
||||
|
||||
if to_stdout:
|
||||
for segment in output_content_segments:
|
||||
print(segment, end='') # Avoid double newlines if segments naturally end with one
|
||||
# Return joined content for consistency, though primarily printed
|
||||
return "".join(output_content_segments)
|
||||
|
||||
timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-UTC')
|
||||
base_output_name_stem = f'repo-to-text_{timestamp}'
|
||||
|
||||
output_filepaths: List[str] = []
|
||||
|
||||
if not output_content_segments:
|
||||
logging.warning(
|
||||
"generate_output_content returned no segments. No output file will be created."
|
||||
)
|
||||
return "" # Or handle by creating an empty placeholder file
|
||||
|
||||
if len(output_content_segments) == 1:
|
||||
single_filename = f"{base_output_name_stem}.txt"
|
||||
full_path_single_file = (
|
||||
os.path.join(output_dir, single_filename) if output_dir else single_filename
|
||||
)
|
||||
|
||||
if output_dir and not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
|
||||
with open(full_path_single_file, 'w', encoding='utf-8') as f:
|
||||
f.write(output_content_segments[0])
|
||||
output_filepaths.append(full_path_single_file)
|
||||
copy_to_clipboard(output_content_segments[0])
|
||||
# Use basename for safe display in case relpath fails
|
||||
display_path = os.path.basename(full_path_single_file)
|
||||
print(
|
||||
"[SUCCESS] Repository structure and contents successfully saved to "
|
||||
f"file: \"{display_path}\""
|
||||
)
|
||||
else: # Multiple segments
|
||||
if output_dir and not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir) # Create output_dir once if needed
|
||||
|
||||
for i, segment_content in enumerate(output_content_segments):
|
||||
part_filename = f"{base_output_name_stem}_part_{i+1}.txt"
|
||||
full_path_part_file = (
|
||||
os.path.join(output_dir, part_filename) if output_dir else part_filename
|
||||
)
|
||||
|
||||
with open(full_path_part_file, 'w', encoding='utf-8') as f:
|
||||
f.write(segment_content)
|
||||
output_filepaths.append(full_path_part_file)
|
||||
|
||||
print(
|
||||
f"[SUCCESS] Repository structure and contents successfully saved to "
|
||||
f"{len(output_filepaths)} files:"
|
||||
)
|
||||
for fp in output_filepaths:
|
||||
# Use basename for safe display in case relpath fails
|
||||
display_path = os.path.basename(fp)
|
||||
print(f" - \"{display_path}\"")
|
||||
|
||||
if output_filepaths:
|
||||
# Return the actual file path for existence checks
|
||||
return output_filepaths[0]
|
||||
return ""
|
||||
|
||||
def _read_file_content(file_path: str) -> str:
|
||||
"""Read file content, handling binary files and broken symlinks.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file to read
|
||||
|
||||
Returns:
|
||||
str: File content or appropriate message for special cases
|
||||
"""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
return f.read()
|
||||
except UnicodeDecodeError:
|
||||
logging.debug('Handling binary file contents: %s', file_path)
|
||||
with open(file_path, 'rb') as f_bin:
|
||||
binary_content: bytes = f_bin.read()
|
||||
return binary_content.decode('latin1')
|
||||
except FileNotFoundError as e:
|
||||
# Minimal handling for bad symlinks
|
||||
if os.path.islink(file_path) and not os.path.exists(file_path):
|
||||
try:
|
||||
target = os.readlink(file_path)
|
||||
except OSError:
|
||||
target = ''
|
||||
return f"[symlink] -> {target}"
|
||||
raise e
|
||||
|
||||
|
||||
def generate_output_content(
|
||||
path: str,
|
||||
tree_structure: str,
|
||||
gitignore_spec: Optional[PathSpec],
|
||||
content_ignore_spec: Optional[PathSpec],
|
||||
tree_and_content_ignore_spec: Optional[PathSpec],
|
||||
maximum_word_count_per_file: Optional[int] = None
|
||||
) -> List[str]:
|
||||
"""Generate the output content for the repository, potentially split into segments."""
|
||||
# pylint: disable=too-many-arguments
|
||||
# pylint: disable=too-many-locals
|
||||
# pylint: disable=too-many-positional-arguments
|
||||
output_segments: List[str] = []
|
||||
current_segment_builder: List[str] = []
|
||||
current_segment_word_count: int = 0
|
||||
project_name = os.path.basename(os.path.abspath(path))
|
||||
|
||||
def count_words(text: str) -> int:
|
||||
return len(text.split())
|
||||
|
||||
def _finalize_current_segment():
|
||||
nonlocal current_segment_word_count # Allow modification
|
||||
if current_segment_builder:
|
||||
output_segments.append("".join(current_segment_builder))
|
||||
current_segment_builder.clear()
|
||||
current_segment_word_count = 0
|
||||
|
||||
def _add_chunk_to_output(chunk: str):
|
||||
nonlocal current_segment_word_count
|
||||
chunk_wc = count_words(chunk)
|
||||
|
||||
if maximum_word_count_per_file is not None:
|
||||
# If current segment is not empty, and adding this chunk would exceed limit,
|
||||
# finalize the current segment before adding this new chunk.
|
||||
if (current_segment_builder and
|
||||
current_segment_word_count + chunk_wc > maximum_word_count_per_file):
|
||||
_finalize_current_segment()
|
||||
|
||||
current_segment_builder.append(chunk)
|
||||
current_segment_word_count += chunk_wc
|
||||
|
||||
# This logic ensures that if a single chunk itself is larger than the limit,
|
||||
# it forms its own segment. The next call to _add_chunk_to_output
|
||||
# or the final _finalize_current_segment will commit it.
|
||||
|
||||
_add_chunk_to_output('<repo-to-text>\n')
|
||||
_add_chunk_to_output(f'Directory: {project_name}\n\n')
|
||||
_add_chunk_to_output('Directory Structure:\n')
|
||||
_add_chunk_to_output('<directory_structure>\n.\n')
|
||||
|
||||
if os.path.exists(os.path.join(path, '.gitignore')):
|
||||
_add_chunk_to_output('├── .gitignore\n')
|
||||
|
||||
_add_chunk_to_output(tree_structure + '\n' + '</directory_structure>\n')
|
||||
logging.debug('Tree structure added to output content segment builder')
|
||||
|
||||
for root, _, files in os.walk(path):
|
||||
for filename in files:
|
||||
file_path = os.path.join(root, filename)
|
||||
relative_path = os.path.relpath(file_path, path)
|
||||
|
||||
if should_ignore_file(
|
||||
file_path,
|
||||
relative_path,
|
||||
gitignore_spec,
|
||||
content_ignore_spec,
|
||||
tree_and_content_ignore_spec
|
||||
):
|
||||
continue
|
||||
|
||||
cleaned_relative_path = relative_path.replace('./', '', 1)
|
||||
|
||||
_add_chunk_to_output(f'\n<content full_path="{cleaned_relative_path}">\n')
|
||||
file_content = _read_file_content(file_path)
|
||||
_add_chunk_to_output(file_content)
|
||||
_add_chunk_to_output('\n</content>\n')
|
||||
|
||||
_add_chunk_to_output('\n</repo-to-text>\n')
|
||||
|
||||
_finalize_current_segment() # Finalize any remaining content in the builder
|
||||
|
||||
logging.debug(
|
||||
'Repository contents generated into %s segment(s)', len(output_segments)
|
||||
)
|
||||
|
||||
# Ensure at least one segment is returned, even if it's just the empty repo structure
|
||||
if not output_segments and not current_segment_builder:
|
||||
# This case implies an empty repo and an extremely small word limit that split
|
||||
# even the minimal tags. Or, if all content was filtered out.
|
||||
# Return a minimal valid structure if everything else resulted in empty.
|
||||
# However, the _add_chunk_to_output for repo tags should ensure
|
||||
# current_segment_builder is not empty. And _finalize_current_segment ensures
|
||||
# output_segments gets it. If output_segments is truly empty, it means an error
|
||||
# or unexpected state. For safety, if it's empty, return a list with one empty
|
||||
# string or minimal tags. Given the logic, this path is unlikely.
|
||||
logging.warning(
|
||||
"No output segments were generated. Returning a single empty segment."
|
||||
)
|
||||
return ["<repo-to-text>\n</repo-to-text>\n"]
|
||||
|
||||
|
||||
return output_segments
|
||||
|
||||
|
||||
# The original write_output_to_file function is no longer needed as its logic
|
||||
# is incorporated into save_repo_to_text for handling single/multiple files.
|
||||
|
||||
def copy_to_clipboard(output_content: str) -> None:
|
||||
"""Copy the output content to the clipboard if possible."""
|
||||
try:
|
||||
import importlib.util # pylint: disable=import-outside-toplevel
|
||||
spec: Optional[ModuleSpec] = importlib.util.find_spec("pyperclip") # type: ignore
|
||||
if spec:
|
||||
import pyperclip # pylint: disable=import-outside-toplevel # type: ignore
|
||||
pyperclip.copy(output_content) # type: ignore
|
||||
logging.debug('Repository structure and contents copied to clipboard')
|
||||
else:
|
||||
print("Tip: Install 'pyperclip' package to enable automatic clipboard copying:")
|
||||
print(" pip install pyperclip")
|
||||
except ImportError as e:
|
||||
logging.warning(
|
||||
'Could not copy to clipboard. You might be running this '
|
||||
'script over SSH or without clipboard support.'
|
||||
)
|
||||
logging.debug('Clipboard copy error: %s', e)
|
||||
|
|
@ -1,158 +1,6 @@
|
|||
import os
|
||||
import subprocess
|
||||
import pathspec
|
||||
import logging
|
||||
import argparse
|
||||
from datetime import datetime
|
||||
import pyperclip
|
||||
"""This is the main entry point for the repo_to_text package."""
|
||||
|
||||
def setup_logging(debug=False):
|
||||
logging_level = logging.DEBUG if debug else logging.INFO
|
||||
logging.basicConfig(level=logging_level, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
def get_tree_structure(path='.', gitignore_spec=None) -> str:
|
||||
logging.debug(f'Generating tree structure for path: {path}')
|
||||
result = subprocess.run(['tree', '-a', '-f', '--noreport', path], stdout=subprocess.PIPE)
|
||||
tree_output = result.stdout.decode('utf-8')
|
||||
logging.debug(f'Tree output generated: {tree_output}')
|
||||
|
||||
if not gitignore_spec:
|
||||
logging.debug('No .gitignore specification found')
|
||||
return tree_output
|
||||
|
||||
logging.debug('Filtering tree output based on .gitignore specification')
|
||||
filtered_lines = []
|
||||
for line in tree_output.splitlines():
|
||||
parts = line.strip().split()
|
||||
if parts:
|
||||
full_path = parts[-1]
|
||||
relative_path = os.path.relpath(full_path, path)
|
||||
if not gitignore_spec.match_file(relative_path) and not is_ignored_path(relative_path):
|
||||
filtered_lines.append(line.replace('./', '', 1))
|
||||
|
||||
logging.debug('Tree structure filtering complete')
|
||||
return '\n'.join(filtered_lines)
|
||||
|
||||
def load_gitignore(path='.'):
|
||||
gitignore_path = os.path.join(path, '.gitignore')
|
||||
if os.path.exists(gitignore_path):
|
||||
logging.debug(f'Loading .gitignore from path: {gitignore_path}')
|
||||
with open(gitignore_path, 'r') as f:
|
||||
return pathspec.PathSpec.from_lines('gitwildmatch', f)
|
||||
logging.debug('.gitignore not found')
|
||||
return None
|
||||
|
||||
def is_ignored_path(file_path: str) -> bool:
|
||||
ignored_dirs = ['.git']
|
||||
ignored_files_prefix = ['repo_snapshot_']
|
||||
is_ignored_dir = any(ignored in file_path for ignored in ignored_dirs)
|
||||
is_ignored_file = any(file_path.startswith(prefix) for prefix in ignored_files_prefix)
|
||||
result = is_ignored_dir or is_ignored_file
|
||||
if result:
|
||||
logging.debug(f'Path ignored: {file_path}')
|
||||
return result
|
||||
|
||||
def remove_empty_dirs(tree_output: str, path='.') -> str:
|
||||
logging.debug('Removing empty directories from tree output')
|
||||
lines = tree_output.splitlines()
|
||||
non_empty_dirs = set()
|
||||
filtered_lines = []
|
||||
|
||||
for line in lines:
|
||||
parts = line.strip().split()
|
||||
if parts:
|
||||
full_path = parts[-1]
|
||||
if os.path.isdir(full_path) and not any(os.path.isfile(os.path.join(full_path, f)) for f in os.listdir(full_path)):
|
||||
logging.debug(f'Directory is empty and will be removed: {full_path}')
|
||||
continue
|
||||
non_empty_dirs.add(os.path.dirname(full_path))
|
||||
filtered_lines.append(line)
|
||||
|
||||
final_lines = []
|
||||
for line in filtered_lines:
|
||||
parts = line.strip().split()
|
||||
if parts:
|
||||
full_path = parts[-1]
|
||||
if os.path.isdir(full_path) and full_path not in non_empty_dirs:
|
||||
logging.debug(f'Directory is empty and will be removed: {full_path}')
|
||||
continue
|
||||
final_lines.append(line)
|
||||
|
||||
logging.debug('Empty directory removal complete')
|
||||
return '\n'.join(final_lines)
|
||||
|
||||
def save_repo_to_text(path='.', output_dir=None) -> str:
|
||||
logging.debug(f'Starting to save repo structure to text for path: {path}')
|
||||
gitignore_spec = load_gitignore(path)
|
||||
tree_structure = get_tree_structure(path, gitignore_spec)
|
||||
tree_structure = remove_empty_dirs(tree_structure, path)
|
||||
|
||||
# Add timestamp to the output file name with a descriptive name
|
||||
timestamp = datetime.utcnow().strftime('%Y-%m-%d-%H-%M-%S-UTC')
|
||||
output_file = f'repo_snapshot_{timestamp}.txt'
|
||||
|
||||
# Determine the full path to the output file
|
||||
if output_dir:
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
output_file = os.path.join(output_dir, output_file)
|
||||
|
||||
with open(output_file, 'w') as file:
|
||||
project_name = os.path.basename(os.path.abspath(path))
|
||||
file.write(f'Directory: {project_name}\n\n')
|
||||
file.write('Directory Structure:\n')
|
||||
file.write('```\n.\n')
|
||||
|
||||
# Insert .gitignore if it exists
|
||||
if os.path.exists(os.path.join(path, '.gitignore')):
|
||||
file.write('├── .gitignore\n')
|
||||
|
||||
file.write(tree_structure + '\n' + '```\n')
|
||||
logging.debug('Tree structure written to file')
|
||||
|
||||
for root, _, files in os.walk(path):
|
||||
for filename in files:
|
||||
file_path = os.path.join(root, filename)
|
||||
relative_path = os.path.relpath(file_path, path)
|
||||
|
||||
if is_ignored_path(file_path) or (gitignore_spec and gitignore_spec.match_file(relative_path)):
|
||||
continue
|
||||
|
||||
relative_path = relative_path.replace('./', '', 1)
|
||||
|
||||
file.write(f'\nContents of {relative_path}:\n')
|
||||
file.write('```\n')
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
file.write(f.read())
|
||||
except UnicodeDecodeError:
|
||||
logging.debug(f'Could not decode file contents: {file_path}')
|
||||
file.write('[Could not decode file contents]\n')
|
||||
file.write('\n```\n')
|
||||
|
||||
file.write('\n')
|
||||
logging.debug('Repository contents written to file')
|
||||
|
||||
# Read the contents of the generated file
|
||||
with open(output_file, 'r') as file:
|
||||
repo_text = file.read()
|
||||
|
||||
# Copy the contents to the clipboard
|
||||
pyperclip.copy(repo_text)
|
||||
logging.debug('Repository structure and contents copied to clipboard')
|
||||
|
||||
return output_file
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Convert repository structure and contents to text')
|
||||
parser.add_argument('--debug', action='store_true', help='Enable debug logging')
|
||||
parser.add_argument('--output-dir', type=str, help='Directory to save the output file')
|
||||
args = parser.parse_args()
|
||||
|
||||
setup_logging(debug=args.debug)
|
||||
logging.debug('repo-to-text script started')
|
||||
save_repo_to_text(output_dir=args.output_dir)
|
||||
logging.debug('repo-to-text script finished')
|
||||
from repo_to_text.cli.cli import main
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
|
|||
5
repo_to_text/utils/__init__.py
Normal file
5
repo_to_text/utils/__init__.py
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
"""This module contains utility functions for the repo_to_text package."""
|
||||
|
||||
from .utils import setup_logging, check_tree_command, is_ignored_path
|
||||
|
||||
__all__ = ['setup_logging', 'check_tree_command', 'is_ignored_path']
|
||||
48
repo_to_text/utils/utils.py
Normal file
48
repo_to_text/utils/utils.py
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
"""This module contains utility functions for the repo_to_text package."""
|
||||
|
||||
import shutil
|
||||
import logging
|
||||
from typing import List
|
||||
|
||||
def setup_logging(debug: bool = False) -> None:
|
||||
"""Set up logging configuration.
|
||||
|
||||
Args:
|
||||
debug: If True, sets logging level to DEBUG, otherwise INFO
|
||||
"""
|
||||
logging_level = logging.DEBUG if debug else logging.INFO
|
||||
logging.basicConfig(level=logging_level, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
def check_tree_command() -> bool:
|
||||
"""Check if the `tree` command is available, and suggest installation if not.
|
||||
|
||||
Returns:
|
||||
bool: True if tree command is available, False otherwise
|
||||
"""
|
||||
if shutil.which('tree') is None:
|
||||
print(
|
||||
"The 'tree' command is not found. "
|
||||
+ "Please install it using one of the following commands:"
|
||||
)
|
||||
print("For Debian-based systems (e.g., Ubuntu): sudo apt-get install tree")
|
||||
print("For Red Hat-based systems (e.g., Fedora, CentOS): sudo yum install tree")
|
||||
return False
|
||||
return True
|
||||
|
||||
def is_ignored_path(file_path: str) -> bool:
|
||||
"""Check if a file path should be ignored based on predefined rules.
|
||||
|
||||
Args:
|
||||
file_path: Path to check
|
||||
|
||||
Returns:
|
||||
bool: True if path should be ignored, False otherwise
|
||||
"""
|
||||
ignored_dirs: List[str] = ['.git']
|
||||
ignored_files_prefix: List[str] = ['repo-to-text_']
|
||||
is_ignored_dir = any(ignored in file_path for ignored in ignored_dirs)
|
||||
is_ignored_file = any(file_path.startswith(prefix) for prefix in ignored_files_prefix)
|
||||
result = is_ignored_dir or is_ignored_file
|
||||
if result:
|
||||
logging.debug('Path ignored: %s', file_path)
|
||||
return result
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
setuptools==70.0.0
|
||||
pathspec==0.12.1
|
||||
pytest==8.2.2
|
||||
argparse==1.4.0
|
||||
pyperclip==1.8.2
|
||||
30
setup.py
30
setup.py
|
|
@ -1,30 +0,0 @@
|
|||
from setuptools import setup, find_packages
|
||||
|
||||
with open('requirements.txt') as f:
|
||||
required = f.read().splitlines()
|
||||
|
||||
setup(
|
||||
name='repo-to-text',
|
||||
version='0.1.3',
|
||||
author='Kirill Markin',
|
||||
author_email='markinkirill@gmail.com',
|
||||
description='Convert a directory structure and its contents into a single text file, including the tree output and file contents in markdown code blocks.',
|
||||
long_description=open('README.md').read(),
|
||||
long_description_content_type='text/markdown',
|
||||
url='https://github.com/kirill-markin/repo-to-text',
|
||||
license='MIT',
|
||||
packages=find_packages(),
|
||||
install_requires=required,
|
||||
include_package_data=True,
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'repo-to-text=repo_to_text.main:main',
|
||||
],
|
||||
},
|
||||
classifiers=[
|
||||
'Programming Language :: Python :: 3',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Operating System :: OS Independent',
|
||||
],
|
||||
python_requires='>=3.6',
|
||||
)
|
||||
114
tests/test_cli.py
Normal file
114
tests/test_cli.py
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
"""Test the CLI module."""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
import shutil
|
||||
from typing import Generator
|
||||
from unittest.mock import patch, MagicMock
|
||||
import pytest
|
||||
from repo_to_text.cli.cli import (
|
||||
create_default_settings_file,
|
||||
parse_args,
|
||||
main
|
||||
)
|
||||
|
||||
# pylint: disable=redefined-outer-name
|
||||
|
||||
@pytest.fixture
|
||||
def temp_dir() -> Generator[str, None, None]:
|
||||
"""Create a temporary directory for testing."""
|
||||
temp_path = tempfile.mkdtemp()
|
||||
yield temp_path
|
||||
shutil.rmtree(temp_path)
|
||||
|
||||
def test_parse_args_defaults() -> None:
|
||||
"""Test parsing command line arguments with default values."""
|
||||
with patch('sys.argv', ['repo-to-text']):
|
||||
args = parse_args()
|
||||
assert args.input_dir == '.'
|
||||
assert not args.debug
|
||||
assert args.output_dir is None
|
||||
assert not args.create_settings
|
||||
assert not args.stdout
|
||||
assert args.ignore_patterns is None
|
||||
|
||||
def test_parse_args_with_values() -> None:
|
||||
"""Test parsing command line arguments with provided values."""
|
||||
test_args = [
|
||||
'repo-to-text',
|
||||
'input/path',
|
||||
'--debug',
|
||||
'--output-dir', 'output/path',
|
||||
'--ignore-patterns', '*.log', 'temp/'
|
||||
]
|
||||
with patch('sys.argv', test_args):
|
||||
args = parse_args()
|
||||
assert args.input_dir == 'input/path'
|
||||
assert args.debug
|
||||
assert args.output_dir == 'output/path'
|
||||
assert args.ignore_patterns == ['*.log', 'temp/']
|
||||
|
||||
def test_create_default_settings_file(temp_dir: str) -> None:
|
||||
"""Test creation of default settings file."""
|
||||
os.chdir(temp_dir)
|
||||
create_default_settings_file()
|
||||
|
||||
settings_file = '.repo-to-text-settings.yaml'
|
||||
assert os.path.exists(settings_file)
|
||||
|
||||
with open(settings_file, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
assert 'gitignore-import-and-ignore: True' in content
|
||||
assert 'ignore-tree-and-content:' in content
|
||||
assert 'ignore-content:' in content
|
||||
|
||||
def test_create_default_settings_file_already_exists(temp_dir: str) -> None:
|
||||
"""Test handling of existing settings file."""
|
||||
os.chdir(temp_dir)
|
||||
# Create the file first
|
||||
create_default_settings_file()
|
||||
|
||||
# Try to create it again
|
||||
with pytest.raises(FileExistsError) as exc_info:
|
||||
create_default_settings_file()
|
||||
assert "already exists" in str(exc_info.value)
|
||||
|
||||
@patch('repo_to_text.cli.cli.save_repo_to_text')
|
||||
def test_main_normal_execution(mock_save_repo: MagicMock) -> None:
|
||||
"""Test main function with normal execution."""
|
||||
with patch('sys.argv', ['repo-to-text', '--stdout']):
|
||||
with pytest.raises(SystemExit) as exc_info:
|
||||
main()
|
||||
assert exc_info.value.code == 0
|
||||
mock_save_repo.assert_called_once_with(
|
||||
path='.',
|
||||
output_dir=None,
|
||||
to_stdout=True,
|
||||
cli_ignore_patterns=None
|
||||
)
|
||||
|
||||
@patch('repo_to_text.cli.cli.create_default_settings_file')
|
||||
def test_main_create_settings(mock_create_settings: MagicMock) -> None:
|
||||
"""Test main function with create settings option."""
|
||||
with patch('sys.argv', ['repo-to-text', '--create-settings']):
|
||||
with pytest.raises(SystemExit) as exc_info:
|
||||
main()
|
||||
assert exc_info.value.code == 0
|
||||
mock_create_settings.assert_called_once()
|
||||
|
||||
@patch('repo_to_text.cli.cli.setup_logging')
|
||||
@patch('repo_to_text.cli.cli.create_default_settings_file')
|
||||
def test_main_with_debug_logging(
|
||||
mock_create_settings: MagicMock,
|
||||
mock_setup_logging: MagicMock
|
||||
) -> None:
|
||||
"""Test main function with debug logging enabled."""
|
||||
with patch('sys.argv', ['repo-to-text', '--debug', '--create-settings']):
|
||||
with pytest.raises(SystemExit) as exc_info:
|
||||
main()
|
||||
assert exc_info.value.code == 0
|
||||
mock_setup_logging.assert_called_once_with(debug=True)
|
||||
mock_create_settings.assert_called_once()
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__])
|
||||
748
tests/test_core.py
Normal file
748
tests/test_core.py
Normal file
|
|
@ -0,0 +1,748 @@
|
|||
"""Test the core module."""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
import shutil
|
||||
from typing import Generator, IO
|
||||
import pytest
|
||||
|
||||
from unittest.mock import patch, mock_open, MagicMock
|
||||
import yaml # For creating mock settings files easily
|
||||
|
||||
from repo_to_text.core.core import (
|
||||
get_tree_structure,
|
||||
load_ignore_specs,
|
||||
should_ignore_file,
|
||||
is_ignored_path,
|
||||
save_repo_to_text,
|
||||
load_additional_specs,
|
||||
generate_output_content
|
||||
)
|
||||
|
||||
# pylint: disable=redefined-outer-name
|
||||
|
||||
@pytest.fixture
|
||||
def temp_dir() -> Generator[str, None, None]:
|
||||
"""Create a temporary directory for testing."""
|
||||
temp_path = tempfile.mkdtemp()
|
||||
yield temp_path
|
||||
shutil.rmtree(temp_path)
|
||||
|
||||
# Mock tree outputs
|
||||
# Raw output similar to `tree -a -f --noreport`
|
||||
MOCK_RAW_TREE_FOR_SAMPLE_REPO = """./
|
||||
./.gitignore
|
||||
./.repo-to-text-settings.yaml
|
||||
./README.md
|
||||
./src
|
||||
./src/main.py
|
||||
./tests
|
||||
./tests/test_main.py
|
||||
"""
|
||||
|
||||
MOCK_RAW_TREE_SPECIAL_CHARS = """./
|
||||
./special chars
|
||||
./special chars/file with spaces.txt
|
||||
"""
|
||||
|
||||
MOCK_RAW_TREE_EMPTY_FILTERING = """./
|
||||
./src
|
||||
./src/main.py
|
||||
./tests
|
||||
./tests/test_main.py
|
||||
"""
|
||||
# Note: ./empty_dir is removed, assuming tree or filter_tree_output would handle it.
|
||||
# This makes the test focus on the rest of the logic if tree output is as expected.
|
||||
|
||||
# Expected output from get_tree_structure (filtered)
|
||||
MOCK_GTS_OUTPUT_FOR_SAMPLE_REPO = """.
|
||||
├── .gitignore
|
||||
├── README.md
|
||||
├── src
|
||||
│ └── main.py
|
||||
└── tests
|
||||
└── test_main.py"""
|
||||
|
||||
MOCK_GTS_OUTPUT_FOR_SIMPLE_REPO = """.
|
||||
├── file1.txt
|
||||
├── file2.txt
|
||||
└── subdir
|
||||
└── file3.txt"""
|
||||
|
||||
@pytest.fixture
|
||||
def sample_repo(tmp_path: str) -> str:
|
||||
"""Create a sample repository structure for testing."""
|
||||
tmp_path_str = str(tmp_path)
|
||||
# Create directories
|
||||
os.makedirs(os.path.join(tmp_path_str, "src"))
|
||||
os.makedirs(os.path.join(tmp_path_str, "tests"))
|
||||
|
||||
# Create sample files
|
||||
files = {
|
||||
"README.md": "# Test Project",
|
||||
".gitignore": """
|
||||
*.pyc
|
||||
__pycache__/
|
||||
.git/
|
||||
""",
|
||||
"src/main.py": "print('Hello World')",
|
||||
"tests/test_main.py": "def test_sample(): pass",
|
||||
".repo-to-text-settings.yaml": """
|
||||
gitignore-import-and-ignore: True
|
||||
ignore-tree-and-content:
|
||||
- ".git/"
|
||||
- ".repo-to-text-settings.yaml"
|
||||
ignore-content:
|
||||
- "README.md"
|
||||
- "package-lock.json"
|
||||
"""
|
||||
}
|
||||
|
||||
for file_path, content in files.items():
|
||||
full_path = os.path.join(tmp_path_str, file_path)
|
||||
os.makedirs(os.path.dirname(full_path), exist_ok=True)
|
||||
with open(full_path, "w", encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
|
||||
return tmp_path_str
|
||||
|
||||
@pytest.fixture
|
||||
def simple_word_count_repo(tmp_path: str) -> str:
|
||||
"""Create a simple repository for word count testing."""
|
||||
repo_path = str(tmp_path)
|
||||
files_content = {
|
||||
"file1.txt": "This is file one. It has eight words.", # 8 words
|
||||
"file2.txt": "File two is here. This makes six words.", # 6 words
|
||||
"subdir/file3.txt": "Another file in a subdirectory, with ten words exactly." # 10 words
|
||||
}
|
||||
for file_path, content in files_content.items():
|
||||
full_path = os.path.join(repo_path, file_path)
|
||||
os.makedirs(os.path.dirname(full_path), exist_ok=True)
|
||||
with open(full_path, "w", encoding="utf-8") as f:
|
||||
f.write(content)
|
||||
return repo_path
|
||||
|
||||
def count_words_for_test(text: str) -> int:
|
||||
"""Helper to count words consistently with core logic for tests."""
|
||||
return len(text.split())
|
||||
|
||||
def test_is_ignored_path() -> None:
|
||||
"""Test the is_ignored_path function."""
|
||||
assert is_ignored_path(".git/config") is True
|
||||
assert is_ignored_path("repo-to-text_output.txt") is True
|
||||
assert is_ignored_path("src/main.py") is False
|
||||
assert is_ignored_path("normal_file.txt") is False
|
||||
|
||||
def test_load_ignore_specs(sample_repo: str) -> None:
|
||||
"""Test loading ignore specifications from files."""
|
||||
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(
|
||||
sample_repo
|
||||
)
|
||||
|
||||
assert gitignore_spec is not None
|
||||
assert content_ignore_spec is not None
|
||||
assert tree_and_content_ignore_spec is not None
|
||||
|
||||
# Test gitignore patterns
|
||||
assert gitignore_spec.match_file("test.pyc") is True
|
||||
assert gitignore_spec.match_file("__pycache__/cache.py") is True
|
||||
assert gitignore_spec.match_file(".git/config") is True
|
||||
|
||||
# Test content ignore patterns
|
||||
assert content_ignore_spec.match_file("README.md") is True
|
||||
|
||||
# Test tree and content ignore patterns
|
||||
assert tree_and_content_ignore_spec.match_file(".git/config") is True
|
||||
|
||||
def test_should_ignore_file(sample_repo: str) -> None:
|
||||
"""Test file ignoring logic."""
|
||||
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(
|
||||
sample_repo
|
||||
)
|
||||
|
||||
# Test various file paths
|
||||
assert should_ignore_file(
|
||||
".git/config",
|
||||
".git/config",
|
||||
gitignore_spec,
|
||||
content_ignore_spec,
|
||||
tree_and_content_ignore_spec
|
||||
) is True
|
||||
|
||||
assert should_ignore_file(
|
||||
"src/main.py",
|
||||
"src/main.py",
|
||||
gitignore_spec,
|
||||
content_ignore_spec,
|
||||
tree_and_content_ignore_spec
|
||||
) is False
|
||||
|
||||
@patch('repo_to_text.core.core.run_tree_command', return_value=MOCK_RAW_TREE_FOR_SAMPLE_REPO)
|
||||
@patch('repo_to_text.core.core.check_tree_command', return_value=True)
|
||||
def test_get_tree_structure(mock_check_tree: MagicMock, mock_run_tree: MagicMock, sample_repo: str) -> None:
|
||||
"""Test tree structure generation."""
|
||||
gitignore_spec, _, tree_and_content_ignore_spec = load_ignore_specs(sample_repo)
|
||||
# The .repo-to-text-settings.yaml in sample_repo ignores itself from tree and content
|
||||
tree_output = get_tree_structure(sample_repo, gitignore_spec, tree_and_content_ignore_spec)
|
||||
|
||||
# Basic structure checks
|
||||
assert "src" in tree_output
|
||||
assert "tests" in tree_output
|
||||
assert "main.py" in tree_output
|
||||
assert "test_main.py" in tree_output
|
||||
assert ".git" not in tree_output
|
||||
assert ".repo-to-text-settings.yaml" not in tree_output # Should be filtered by tree_and_content_ignore_spec
|
||||
|
||||
@patch('repo_to_text.core.core.get_tree_structure', return_value=MOCK_GTS_OUTPUT_FOR_SAMPLE_REPO)
|
||||
@patch('repo_to_text.core.core.check_tree_command', return_value=True) # In case any internal call still checks
|
||||
def test_save_repo_to_text(mock_check_tree: MagicMock, mock_get_tree: MagicMock, sample_repo: str) -> None:
|
||||
"""Test the main save_repo_to_text function."""
|
||||
# Create output directory
|
||||
output_dir = os.path.join(sample_repo, "output")
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
# Create .git directory to ensure it's properly ignored
|
||||
os.makedirs(os.path.join(sample_repo, ".git"))
|
||||
with open(os.path.join(sample_repo, ".git/config"), "w", encoding='utf-8') as f:
|
||||
f.write("[core]\n\trepositoryformatversion = 0\n")
|
||||
|
||||
# Test file output
|
||||
output_file = save_repo_to_text(sample_repo, output_dir=output_dir)
|
||||
assert os.path.exists(output_file)
|
||||
assert os.path.abspath(os.path.dirname(output_file)) == os.path.abspath(output_dir)
|
||||
|
||||
# Check file contents
|
||||
with open(output_file, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# Basic content checks
|
||||
assert "Directory Structure:" in content
|
||||
|
||||
# Check for expected files
|
||||
assert "src/main.py" in content
|
||||
assert "tests/test_main.py" in content
|
||||
|
||||
# Check for file contents
|
||||
assert "print('Hello World')" in content
|
||||
assert "def test_sample(): pass" in content
|
||||
|
||||
# Ensure ignored patterns are not in output
|
||||
assert ".git/config" not in content # Check specific file
|
||||
assert "repo-to-text_" not in content
|
||||
assert ".repo-to-text-settings.yaml" not in content
|
||||
|
||||
# Check that .gitignore content is not included
|
||||
assert "*.pyc" not in content
|
||||
assert "__pycache__" not in content
|
||||
|
||||
def test_save_repo_to_text_stdout(sample_repo: str) -> None:
|
||||
"""Test save_repo_to_text with stdout output."""
|
||||
output = save_repo_to_text(sample_repo, to_stdout=True)
|
||||
assert isinstance(output, str)
|
||||
assert "Directory Structure:" in output
|
||||
assert "src/main.py" in output
|
||||
assert "tests/test_main.py" in output
|
||||
|
||||
def test_load_ignore_specs_with_cli_patterns(sample_repo: str) -> None:
|
||||
"""Test loading ignore specs with CLI patterns."""
|
||||
cli_patterns = ["*.log", "temp/"]
|
||||
_, _, tree_and_content_ignore_spec = load_ignore_specs(sample_repo, cli_patterns)
|
||||
|
||||
assert tree_and_content_ignore_spec.match_file("test.log") is True
|
||||
assert tree_and_content_ignore_spec.match_file("temp/file.txt") is True
|
||||
assert tree_and_content_ignore_spec.match_file("normal.txt") is False
|
||||
|
||||
def test_load_ignore_specs_without_gitignore(temp_dir: str) -> None:
|
||||
"""Test loading ignore specs when .gitignore is missing."""
|
||||
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(
|
||||
temp_dir
|
||||
)
|
||||
assert gitignore_spec is None
|
||||
assert content_ignore_spec is None
|
||||
assert tree_and_content_ignore_spec is not None
|
||||
|
||||
@patch('repo_to_text.core.core.run_tree_command', return_value=MOCK_RAW_TREE_SPECIAL_CHARS)
|
||||
@patch('repo_to_text.core.core.check_tree_command', return_value=True)
|
||||
def test_get_tree_structure_with_special_chars(mock_check_tree: MagicMock, mock_run_tree: MagicMock, temp_dir: str) -> None:
|
||||
"""Test tree structure generation with special characters in paths."""
|
||||
# Create files with special characters
|
||||
special_dir = os.path.join(temp_dir, "special chars") # Matches MOCK_RAW_TREE_SPECIAL_CHARS
|
||||
os.makedirs(special_dir)
|
||||
with open(os.path.join(special_dir, "file with spaces.txt"), "w", encoding='utf-8') as f:
|
||||
f.write("test")
|
||||
|
||||
# load_ignore_specs will be called inside; for temp_dir, they will be None or empty.
|
||||
gitignore_spec, _, tree_and_content_ignore_spec = load_ignore_specs(temp_dir)
|
||||
tree_output = get_tree_structure(temp_dir, gitignore_spec, tree_and_content_ignore_spec)
|
||||
|
||||
assert "special chars" in tree_output
|
||||
assert "file with spaces.txt" in tree_output
|
||||
|
||||
def test_should_ignore_file_edge_cases(sample_repo: str) -> None:
|
||||
"""Test edge cases for should_ignore_file function."""
|
||||
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(
|
||||
sample_repo
|
||||
)
|
||||
|
||||
# Test with dot-prefixed paths
|
||||
assert should_ignore_file(
|
||||
"./src/main.py",
|
||||
"./src/main.py",
|
||||
gitignore_spec,
|
||||
content_ignore_spec,
|
||||
tree_and_content_ignore_spec
|
||||
) is False
|
||||
|
||||
# Test with absolute paths
|
||||
abs_path = os.path.join(sample_repo, "src/main.py")
|
||||
rel_path = "src/main.py"
|
||||
assert should_ignore_file(
|
||||
abs_path,
|
||||
rel_path,
|
||||
gitignore_spec,
|
||||
content_ignore_spec,
|
||||
tree_and_content_ignore_spec
|
||||
) is False
|
||||
|
||||
def test_save_repo_to_text_with_binary_files(temp_dir: str) -> None:
|
||||
"""Test handling of binary files in save_repo_to_text."""
|
||||
# Create a binary file
|
||||
binary_path = os.path.join(temp_dir, "binary.bin")
|
||||
binary_content = b'\x00\x01\x02\x03'
|
||||
with open(binary_path, "wb") as f:
|
||||
f.write(binary_content)
|
||||
|
||||
output = save_repo_to_text(temp_dir, to_stdout=True)
|
||||
|
||||
# Check that the binary file is listed in the structure
|
||||
assert "binary.bin" in output
|
||||
# Check that the file content section exists with raw binary content
|
||||
expected_content = f"<content full_path=\"binary.bin\">\n{binary_content.decode('latin1')}\n</content>"
|
||||
assert expected_content in output
|
||||
|
||||
@patch('repo_to_text.core.core.get_tree_structure', return_value=MOCK_GTS_OUTPUT_FOR_SIMPLE_REPO) # Using simple repo tree for generic content
|
||||
@patch('repo_to_text.core.core.check_tree_command', return_value=True)
|
||||
def test_save_repo_to_text_custom_output_dir(mock_check_tree: MagicMock, mock_get_tree: MagicMock, temp_dir: str) -> None:
|
||||
"""Test save_repo_to_text with custom output directory."""
|
||||
# Create a simple file structure
|
||||
with open(os.path.join(temp_dir, "test.txt"), "w", encoding='utf-8') as f:
|
||||
f.write("test content")
|
||||
|
||||
# Create custom output directory
|
||||
output_dir = os.path.join(temp_dir, "custom_output")
|
||||
output_file = save_repo_to_text(temp_dir, output_dir=output_dir)
|
||||
|
||||
assert os.path.exists(output_file)
|
||||
assert os.path.abspath(os.path.dirname(output_file)) == os.path.abspath(output_dir)
|
||||
# output_file is relative, output_dir is absolute. This assertion needs care.
|
||||
# Let's assert that the absolute path of output_file starts with absolute output_dir
|
||||
assert os.path.abspath(output_file).startswith(os.path.abspath(output_dir))
|
||||
|
||||
def test_get_tree_structure_empty_directory(temp_dir: str) -> None:
|
||||
"""Test tree structure generation for empty directory."""
|
||||
tree_output = get_tree_structure(temp_dir)
|
||||
# Should only contain the directory itself
|
||||
assert tree_output.strip() == "" or tree_output.strip() == temp_dir
|
||||
|
||||
@patch('repo_to_text.core.core.run_tree_command', return_value=MOCK_RAW_TREE_EMPTY_FILTERING)
|
||||
@patch('repo_to_text.core.core.check_tree_command', return_value=True)
|
||||
def test_empty_dirs_filtering(mock_check_tree: MagicMock, mock_run_tree: MagicMock, tmp_path: str) -> None:
|
||||
"""Test filtering of empty directories in tree structure generation."""
|
||||
# Create test directory structure with normalized paths
|
||||
base_path = os.path.normpath(tmp_path)
|
||||
src_path = os.path.join(base_path, "src")
|
||||
empty_dir_path = os.path.join(base_path, "empty_dir")
|
||||
tests_path = os.path.join(base_path, "tests")
|
||||
|
||||
os.makedirs(src_path)
|
||||
os.makedirs(empty_dir_path)
|
||||
os.makedirs(tests_path)
|
||||
|
||||
# Create some files
|
||||
with open(os.path.join(src_path, "main.py"), "w", encoding='utf-8') as f:
|
||||
f.write("print('test')")
|
||||
with open(os.path.join(tests_path, "test_main.py"), "w", encoding='utf-8') as f:
|
||||
f.write("def test(): pass")
|
||||
|
||||
# Get tree structure directly using the function
|
||||
tree_output = get_tree_structure(base_path)
|
||||
|
||||
# Print debug information
|
||||
print("\nTree output:")
|
||||
print(tree_output)
|
||||
|
||||
# Basic structure checks for directories with files
|
||||
assert "src" in tree_output
|
||||
assert "tests" in tree_output
|
||||
assert "main.py" in tree_output
|
||||
assert "test_main.py" in tree_output
|
||||
|
||||
# Check that empty directory is not included by checking each line
|
||||
for line in tree_output.splitlines():
|
||||
# Skip the root directory line
|
||||
if base_path in line:
|
||||
continue
|
||||
# Check that no line contains 'empty_dir'
|
||||
assert "empty_dir" not in line, f"Found empty_dir in line: {line}"
|
||||
|
||||
# Tests for maximum_word_count_per_file functionality
|
||||
|
||||
def test_load_additional_specs_valid_max_words(tmp_path: str) -> None:
|
||||
"""Test load_additional_specs with a valid maximum_word_count_per_file."""
|
||||
settings_content = {"maximum_word_count_per_file": 1000}
|
||||
settings_file = os.path.join(tmp_path, ".repo-to-text-settings.yaml")
|
||||
with open(settings_file, "w", encoding="utf-8") as f:
|
||||
yaml.dump(settings_content, f)
|
||||
|
||||
specs = load_additional_specs(tmp_path)
|
||||
assert specs["maximum_word_count_per_file"] == 1000
|
||||
|
||||
def test_load_additional_specs_invalid_max_words_string(tmp_path: str, caplog: pytest.LogCaptureFixture) -> None:
|
||||
"""Test load_additional_specs with an invalid string for maximum_word_count_per_file."""
|
||||
settings_content = {"maximum_word_count_per_file": "not-an-integer"}
|
||||
settings_file = os.path.join(tmp_path, ".repo-to-text-settings.yaml")
|
||||
with open(settings_file, "w", encoding="utf-8") as f:
|
||||
yaml.dump(settings_content, f)
|
||||
|
||||
specs = load_additional_specs(tmp_path)
|
||||
assert specs["maximum_word_count_per_file"] is None
|
||||
assert "Invalid value for 'maximum_word_count_per_file': not-an-integer" in caplog.text
|
||||
|
||||
def test_load_additional_specs_invalid_max_words_negative(tmp_path: str, caplog: pytest.LogCaptureFixture) -> None:
|
||||
"""Test load_additional_specs with a negative integer for maximum_word_count_per_file."""
|
||||
settings_content = {"maximum_word_count_per_file": -100}
|
||||
settings_file = os.path.join(tmp_path, ".repo-to-text-settings.yaml")
|
||||
with open(settings_file, "w", encoding="utf-8") as f:
|
||||
yaml.dump(settings_content, f)
|
||||
|
||||
specs = load_additional_specs(tmp_path)
|
||||
assert specs["maximum_word_count_per_file"] is None
|
||||
assert "Invalid value for 'maximum_word_count_per_file': -100" in caplog.text
|
||||
|
||||
def test_load_additional_specs_max_words_is_none_in_yaml(tmp_path: str, caplog: pytest.LogCaptureFixture) -> None:
|
||||
"""Test load_additional_specs when maximum_word_count_per_file is explicitly null in YAML."""
|
||||
settings_content = {"maximum_word_count_per_file": None} # In YAML, this is 'null'
|
||||
settings_file = os.path.join(tmp_path, ".repo-to-text-settings.yaml")
|
||||
with open(settings_file, "w", encoding="utf-8") as f:
|
||||
yaml.dump(settings_content, f)
|
||||
|
||||
specs = load_additional_specs(tmp_path)
|
||||
assert specs["maximum_word_count_per_file"] is None
|
||||
assert "Invalid value for 'maximum_word_count_per_file'" not in caplog.text
|
||||
|
||||
def test_load_additional_specs_max_words_not_present(tmp_path: str) -> None:
|
||||
"""Test load_additional_specs when maximum_word_count_per_file is not present."""
|
||||
settings_content = {"other_setting": "value"}
|
||||
settings_file = os.path.join(tmp_path, ".repo-to-text-settings.yaml")
|
||||
with open(settings_file, "w", encoding="utf-8") as f:
|
||||
yaml.dump(settings_content, f)
|
||||
|
||||
specs = load_additional_specs(tmp_path)
|
||||
assert specs["maximum_word_count_per_file"] is None
|
||||
|
||||
def test_load_additional_specs_no_settings_file(tmp_path: str) -> None:
|
||||
"""Test load_additional_specs when no settings file exists."""
|
||||
specs = load_additional_specs(tmp_path)
|
||||
assert specs["maximum_word_count_per_file"] is None
|
||||
|
||||
# Tests for generate_output_content related to splitting
|
||||
@patch('repo_to_text.core.core.get_tree_structure', return_value=MOCK_GTS_OUTPUT_FOR_SIMPLE_REPO)
|
||||
def test_generate_output_content_no_splitting_max_words_not_set(mock_get_tree: MagicMock, simple_word_count_repo: str) -> None:
|
||||
"""Test generate_output_content with no splitting when max_words is not set."""
|
||||
path = simple_word_count_repo
|
||||
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(path)
|
||||
# tree_structure is now effectively MOCK_GTS_OUTPUT_FOR_SIMPLE_REPO due to the mock
|
||||
|
||||
segments = generate_output_content(
|
||||
path, MOCK_GTS_OUTPUT_FOR_SIMPLE_REPO, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec,
|
||||
maximum_word_count_per_file=None
|
||||
)
|
||||
mock_get_tree.assert_not_called() # We are passing tree_structure directly
|
||||
assert len(segments) == 1
|
||||
assert "file1.txt" in segments[0]
|
||||
assert "This is file one." in segments[0]
|
||||
|
||||
@patch('repo_to_text.core.core.get_tree_structure', return_value=MOCK_GTS_OUTPUT_FOR_SIMPLE_REPO)
|
||||
def test_generate_output_content_no_splitting_content_less_than_limit(mock_get_tree: MagicMock, simple_word_count_repo: str) -> None:
|
||||
"""Test generate_output_content with no splitting when content is less than max_words limit."""
|
||||
path = simple_word_count_repo
|
||||
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(path)
|
||||
|
||||
segments = generate_output_content(
|
||||
path, MOCK_GTS_OUTPUT_FOR_SIMPLE_REPO, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec,
|
||||
maximum_word_count_per_file=500 # High limit
|
||||
)
|
||||
mock_get_tree.assert_not_called()
|
||||
assert len(segments) == 1
|
||||
assert "file1.txt" in segments[0]
|
||||
|
||||
@patch('repo_to_text.core.core.get_tree_structure', return_value=MOCK_GTS_OUTPUT_FOR_SIMPLE_REPO)
|
||||
def test_generate_output_content_splitting_occurs(mock_get_tree: MagicMock, simple_word_count_repo: str) -> None:
|
||||
"""Test generate_output_content when splitting occurs due to max_words limit."""
|
||||
path = simple_word_count_repo
|
||||
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(path)
|
||||
max_words = 30
|
||||
segments = generate_output_content(
|
||||
path, MOCK_GTS_OUTPUT_FOR_SIMPLE_REPO, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec,
|
||||
maximum_word_count_per_file=max_words
|
||||
)
|
||||
mock_get_tree.assert_not_called()
|
||||
assert len(segments) > 1
|
||||
total_content = "".join(segments)
|
||||
assert "file1.txt" in total_content
|
||||
assert "This is file one." in total_content
|
||||
for i, segment in enumerate(segments):
|
||||
segment_word_count = count_words_for_test(segment)
|
||||
if i < len(segments) - 1: # For all but the last segment
|
||||
# A segment can be larger than max_words if a single chunk (e.g. file content block) is larger
|
||||
assert segment_word_count <= max_words or \
|
||||
(segment_word_count > max_words and count_words_for_test(segment.splitlines()[-2]) > max_words)
|
||||
else: # Last segment can be smaller
|
||||
assert segment_word_count > 0
|
||||
|
||||
@patch('repo_to_text.core.core.get_tree_structure', return_value=MOCK_GTS_OUTPUT_FOR_SIMPLE_REPO)
|
||||
def test_generate_output_content_splitting_very_small_limit(mock_get_tree: MagicMock, simple_word_count_repo: str) -> None:
|
||||
"""Test generate_output_content with a very small max_words limit."""
|
||||
path = simple_word_count_repo
|
||||
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(path)
|
||||
max_words = 10 # Very small limit
|
||||
segments = generate_output_content(
|
||||
path, MOCK_GTS_OUTPUT_FOR_SIMPLE_REPO, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec,
|
||||
maximum_word_count_per_file=max_words
|
||||
)
|
||||
mock_get_tree.assert_not_called()
|
||||
assert len(segments) > 3 # Expect multiple splits due to small limit and multiple chunks
|
||||
total_content = "".join(segments)
|
||||
assert "file1.txt" in total_content # Check presence of file name in overall output
|
||||
|
||||
raw_file1_content = "This is file one. It has eight words." # 8 words
|
||||
# Based on actual debug output, the closing tag is just "</content>" (1 word)
|
||||
closing_tag_content = "</content>" # 1 word
|
||||
|
||||
# With max_words = 10:
|
||||
# The splitting logic works per chunk, so raw_content (8 words) + closing_tag (1 word) = 9 words total
|
||||
# should fit in one segment when they're placed together
|
||||
|
||||
# Debug: Let's see what segments actually look like in CI
|
||||
print(f"\nDEBUG: Generated {len(segments)} segments:")
|
||||
for i, segment in enumerate(segments):
|
||||
print(f"Segment {i+1} ({count_words_for_test(segment)} words):")
|
||||
print(f"'{segment}'")
|
||||
print("---")
|
||||
|
||||
found_raw_content_segment = False
|
||||
for segment in segments:
|
||||
if raw_file1_content in segment:
|
||||
# Check if this segment contains raw content with closing tag (total 9 words)
|
||||
segment_wc = count_words_for_test(segment)
|
||||
if closing_tag_content in segment:
|
||||
# Raw content (8 words) + closing tag (1 word) = 9 words total
|
||||
expected_word_count = count_words_for_test(raw_file1_content) + count_words_for_test(closing_tag_content)
|
||||
assert segment_wc == expected_word_count # Should be 9 words
|
||||
found_raw_content_segment = True
|
||||
break
|
||||
else:
|
||||
# Segment contains opening tag + raw content (2 + 8 = 10 words)
|
||||
# Opening tag: <content full_path="file1.txt"> (2 words)
|
||||
# Raw content: "This is file one. It has eight words." (8 words)
|
||||
opening_tag_word_count = 2 # <content and full_path="file1.txt">
|
||||
expected_word_count = opening_tag_word_count + count_words_for_test(raw_file1_content)
|
||||
assert segment_wc == expected_word_count # Should be 10 words
|
||||
found_raw_content_segment = True
|
||||
break
|
||||
assert found_raw_content_segment, "Segment with raw file1 content not found or not matching expected structure"
|
||||
|
||||
@patch('repo_to_text.core.core.get_tree_structure') # Will use a specific mock inside
|
||||
def test_generate_output_content_file_header_content_together(mock_get_tree: MagicMock, tmp_path: str) -> None:
|
||||
"""Test that file header and its content are not split if word count allows."""
|
||||
repo_path = str(tmp_path)
|
||||
file_content_str = "word " * 15 # 15 words
|
||||
# Tags: <content full_path="single_file.txt">\n (3) + \n</content> (2) = 5 words. Total block = 20 words.
|
||||
files_content = {"single_file.txt": file_content_str.strip()}
|
||||
for file_path_key, content_val in files_content.items():
|
||||
full_path = os.path.join(repo_path, file_path_key)
|
||||
os.makedirs(os.path.dirname(full_path), exist_ok=True)
|
||||
with open(full_path, "w", encoding="utf-8") as f:
|
||||
f.write(content_val)
|
||||
|
||||
gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec = load_ignore_specs(repo_path)
|
||||
# Mock the tree structure for this specific test case
|
||||
mock_tree_for_single_file = ".\n└── single_file.txt"
|
||||
mock_get_tree.return_value = mock_tree_for_single_file # This mock is for any internal calls if any
|
||||
|
||||
max_words_sufficient = 35 # Enough for header + this one file block (around 20 words + initial header)
|
||||
segments = generate_output_content(
|
||||
repo_path, mock_tree_for_single_file, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec,
|
||||
maximum_word_count_per_file=max_words_sufficient
|
||||
)
|
||||
assert len(segments) == 1 # Expect no splitting of this file from its tags
|
||||
expected_file_block = f'<content full_path="single_file.txt">\n{file_content_str.strip()}\n</content>'
|
||||
assert expected_file_block in segments[0]
|
||||
|
||||
# Test if it splits if max_words is too small for the file block (20 words)
|
||||
max_words_small = 10
|
||||
segments_small_limit = generate_output_content(
|
||||
repo_path, mock_tree_for_single_file, gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec,
|
||||
maximum_word_count_per_file=max_words_small
|
||||
)
|
||||
# The file block (20 words) is a single chunk. It will form its own segment.
|
||||
# Header part will be one segment. File block another. Footer another.
|
||||
assert len(segments_small_limit) >= 2
|
||||
|
||||
found_raw_content_in_own_segment = False
|
||||
raw_content_single_file = "word " * 15 # 15 words
|
||||
# expected_file_block is the whole thing (20 words)
|
||||
# With max_words_small = 10:
|
||||
# 1. Opening tag (3 words) -> new segment
|
||||
# 2. Raw content (15 words) -> new segment (because 0 + 15 > 10)
|
||||
# 3. Closing tag (2 words) -> new segment (because 0 + 2 <= 10, but follows a large chunk)
|
||||
|
||||
for segment in segments_small_limit:
|
||||
if raw_content_single_file.strip() in segment.strip() and \
|
||||
'<content full_path="single_file.txt">' not in segment and \
|
||||
'</content>' not in segment:
|
||||
# This segment should contain only the raw 15 words
|
||||
assert count_words_for_test(segment.strip()) == 15
|
||||
found_raw_content_in_own_segment = True
|
||||
break
|
||||
assert found_raw_content_in_own_segment, "Raw content of single_file.txt not found in its own segment"
|
||||
|
||||
# Tests for save_repo_to_text related to splitting
|
||||
@patch('repo_to_text.core.core.load_additional_specs')
|
||||
@patch('repo_to_text.core.core.generate_output_content')
|
||||
@patch('repo_to_text.core.core.os.makedirs')
|
||||
@patch('builtins.open', new_callable=mock_open)
|
||||
@patch('repo_to_text.core.core.copy_to_clipboard')
|
||||
def test_save_repo_to_text_no_splitting_mocked(
|
||||
mock_copy_to_clipboard: MagicMock,
|
||||
mock_file_open: MagicMock,
|
||||
mock_makedirs: MagicMock,
|
||||
mock_generate_output: MagicMock,
|
||||
mock_load_specs: MagicMock,
|
||||
simple_word_count_repo: str,
|
||||
tmp_path: str
|
||||
) -> None:
|
||||
"""Test save_repo_to_text: no splitting, single file output."""
|
||||
mock_load_specs.return_value = {'maximum_word_count_per_file': None}
|
||||
mock_generate_output.return_value = ["Single combined content\nfile1.txt\ncontent1"]
|
||||
output_dir = os.path.join(str(tmp_path), "output")
|
||||
|
||||
with patch('repo_to_text.core.core.datetime') as mock_datetime:
|
||||
mock_datetime.now.return_value.strftime.return_value = "mock_timestamp"
|
||||
returned_path = save_repo_to_text(simple_word_count_repo, output_dir=output_dir)
|
||||
|
||||
mock_load_specs.assert_called_once_with(simple_word_count_repo)
|
||||
mock_generate_output.assert_called_once()
|
||||
expected_filename = os.path.join(output_dir, "repo-to-text_mock_timestamp.txt")
|
||||
assert os.path.basename(returned_path) == os.path.basename(expected_filename)
|
||||
mock_makedirs.assert_called_once_with(output_dir)
|
||||
mock_file_open.assert_called_once_with(expected_filename, 'w', encoding='utf-8')
|
||||
mock_file_open().write.assert_called_once_with("Single combined content\nfile1.txt\ncontent1")
|
||||
mock_copy_to_clipboard.assert_called_once_with("Single combined content\nfile1.txt\ncontent1")
|
||||
|
||||
@patch('repo_to_text.core.core.load_additional_specs')
|
||||
@patch('repo_to_text.core.core.generate_output_content')
|
||||
@patch('repo_to_text.core.core.os.makedirs')
|
||||
@patch('builtins.open')
|
||||
@patch('repo_to_text.core.core.copy_to_clipboard')
|
||||
def test_save_repo_to_text_splitting_occurs_mocked(
|
||||
mock_copy_to_clipboard: MagicMock,
|
||||
mock_open_function: MagicMock,
|
||||
mock_makedirs: MagicMock,
|
||||
mock_generate_output: MagicMock,
|
||||
mock_load_specs: MagicMock,
|
||||
simple_word_count_repo: str,
|
||||
tmp_path: str
|
||||
) -> None:
|
||||
"""Test save_repo_to_text: splitting occurs, multiple file outputs with better write check."""
|
||||
mock_load_specs.return_value = {'maximum_word_count_per_file': 50}
|
||||
segments_content = ["Segment 1 content data", "Segment 2 content data"]
|
||||
mock_generate_output.return_value = segments_content
|
||||
output_dir = os.path.join(str(tmp_path), "output_split_adv")
|
||||
|
||||
mock_file_handle1 = MagicMock(spec=IO)
|
||||
mock_file_handle2 = MagicMock(spec=IO)
|
||||
mock_open_function.side_effect = [mock_file_handle1, mock_file_handle2]
|
||||
|
||||
with patch('repo_to_text.core.core.datetime') as mock_datetime:
|
||||
mock_datetime.now.return_value.strftime.return_value = "mock_ts_split_adv"
|
||||
returned_path = save_repo_to_text(simple_word_count_repo, output_dir=output_dir)
|
||||
|
||||
expected_filename_part1 = os.path.join(output_dir, "repo-to-text_mock_ts_split_adv_part_1.txt")
|
||||
expected_filename_part2 = os.path.join(output_dir, "repo-to-text_mock_ts_split_adv_part_2.txt")
|
||||
|
||||
assert os.path.basename(returned_path) == os.path.basename(expected_filename_part1)
|
||||
mock_makedirs.assert_called_once_with(output_dir)
|
||||
|
||||
mock_open_function.assert_any_call(expected_filename_part1, 'w', encoding='utf-8')
|
||||
mock_open_function.assert_any_call(expected_filename_part2, 'w', encoding='utf-8')
|
||||
assert mock_open_function.call_count == 2
|
||||
|
||||
mock_file_handle1.__enter__().write.assert_called_once_with(segments_content[0])
|
||||
mock_file_handle2.__enter__().write.assert_called_once_with(segments_content[1])
|
||||
|
||||
mock_copy_to_clipboard.assert_not_called()
|
||||
|
||||
@patch('repo_to_text.core.core.copy_to_clipboard')
|
||||
@patch('builtins.open', new_callable=mock_open)
|
||||
@patch('repo_to_text.core.core.os.makedirs')
|
||||
@patch('repo_to_text.core.core.generate_output_content') # This is the one that will be used
|
||||
@patch('repo_to_text.core.core.load_additional_specs') # This is the one that will be used
|
||||
@patch('repo_to_text.core.core.get_tree_structure', return_value=MOCK_GTS_OUTPUT_FOR_SIMPLE_REPO)
|
||||
def test_save_repo_to_text_stdout_with_splitting(
|
||||
mock_get_tree: MagicMock, # Order of mock args should match decorator order (bottom-up)
|
||||
mock_load_specs: MagicMock,
|
||||
mock_generate_output: MagicMock,
|
||||
mock_os_makedirs: MagicMock,
|
||||
mock_file_open: MagicMock,
|
||||
mock_copy_to_clipboard: MagicMock,
|
||||
simple_word_count_repo: str,
|
||||
capsys: pytest.CaptureFixture[str]
|
||||
) -> None:
|
||||
"""Test save_repo_to_text with to_stdout=True and content that would split."""
|
||||
mock_load_specs.return_value = {'maximum_word_count_per_file': 10}
|
||||
mock_generate_output.return_value = ["Segment 1 for stdout.", "Segment 2 for stdout."]
|
||||
|
||||
result_string = save_repo_to_text(simple_word_count_repo, to_stdout=True)
|
||||
|
||||
mock_load_specs.assert_called_once_with(simple_word_count_repo)
|
||||
mock_get_tree.assert_called_once() # Assert that get_tree_structure was called
|
||||
mock_generate_output.assert_called_once()
|
||||
mock_os_makedirs.assert_not_called()
|
||||
mock_file_open.assert_not_called()
|
||||
mock_copy_to_clipboard.assert_not_called()
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert "Segment 1 for stdout.Segment 2 for stdout." == captured.out.strip() # Added strip() to handle potential newlines from logging
|
||||
assert result_string == "Segment 1 for stdout.Segment 2 for stdout."
|
||||
|
||||
@patch('repo_to_text.core.core.load_additional_specs')
|
||||
@patch('repo_to_text.core.core.generate_output_content')
|
||||
@patch('repo_to_text.core.core.os.makedirs')
|
||||
@patch('builtins.open', new_callable=mock_open)
|
||||
@patch('repo_to_text.core.core.copy_to_clipboard')
|
||||
def test_save_repo_to_text_empty_segments(
|
||||
mock_copy_to_clipboard: MagicMock,
|
||||
mock_file_open: MagicMock,
|
||||
mock_makedirs: MagicMock,
|
||||
mock_generate_output: MagicMock,
|
||||
mock_load_specs: MagicMock,
|
||||
simple_word_count_repo: str,
|
||||
tmp_path: str,
|
||||
caplog: pytest.LogCaptureFixture
|
||||
) -> None:
|
||||
"""Test save_repo_to_text when generate_output_content returns no segments."""
|
||||
mock_load_specs.return_value = {'maximum_word_count_per_file': None}
|
||||
mock_generate_output.return_value = []
|
||||
output_dir = os.path.join(str(tmp_path), "output_empty")
|
||||
|
||||
returned_path = save_repo_to_text(simple_word_count_repo, output_dir=output_dir)
|
||||
|
||||
assert returned_path == ""
|
||||
mock_makedirs.assert_not_called()
|
||||
mock_file_open.assert_not_called()
|
||||
mock_copy_to_clipboard.assert_not_called()
|
||||
assert "generate_output_content returned no segments" in caplog.text
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__])
|
||||
|
|
@ -1,31 +0,0 @@
|
|||
import os
|
||||
import subprocess
|
||||
import pytest
|
||||
import time
|
||||
|
||||
def test_repo_to_text():
|
||||
# Remove any existing snapshot files to avoid conflicts
|
||||
for file in os.listdir('.'):
|
||||
if file.startswith('repo_snapshot_') and file.endswith('.txt'):
|
||||
os.remove(file)
|
||||
|
||||
# Run the repo-to-text command
|
||||
result = subprocess.run(['repo-to-text'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
|
||||
# Assert that the command ran without errors
|
||||
assert result.returncode == 0, f"Command failed with error: {result.stderr.decode('utf-8')}"
|
||||
|
||||
# Check for the existence of the new snapshot file
|
||||
snapshot_files = [f for f in os.listdir('.') if f.startswith('repo_snapshot_') and f.endswith('.txt')]
|
||||
assert len(snapshot_files) == 1, "No snapshot file created or multiple files created"
|
||||
|
||||
# Verify that the snapshot file is not empty
|
||||
with open(snapshot_files[0], 'r') as f:
|
||||
content = f.read()
|
||||
assert len(content) > 0, "Snapshot file is empty"
|
||||
|
||||
# Clean up the generated snapshot file
|
||||
os.remove(snapshot_files[0])
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main()
|
||||
147
tests/test_utils.py
Normal file
147
tests/test_utils.py
Normal file
|
|
@ -0,0 +1,147 @@
|
|||
"""Test the utils module."""
|
||||
|
||||
import logging
|
||||
from typing import Generator
|
||||
import io
|
||||
import pytest
|
||||
|
||||
from repo_to_text.utils.utils import setup_logging
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_logger() -> Generator[None, None, None]:
|
||||
"""Reset root logger before each test."""
|
||||
root_logger = logging.getLogger()
|
||||
for handler in root_logger.handlers[:]:
|
||||
root_logger.removeHandler(handler)
|
||||
root_logger.setLevel(logging.WARNING) # Default level
|
||||
yield
|
||||
for handler in root_logger.handlers[:]:
|
||||
root_logger.removeHandler(handler)
|
||||
root_logger.setLevel(logging.WARNING) # Reset after test
|
||||
|
||||
def test_setup_logging_debug() -> None:
|
||||
"""Test setup_logging with debug mode."""
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.handlers.clear() # Clear existing handlers
|
||||
root_logger.setLevel(logging.WARNING) # Reset to default
|
||||
|
||||
setup_logging(debug=True)
|
||||
assert len(root_logger.handlers) > 0
|
||||
assert root_logger.level == logging.DEBUG
|
||||
|
||||
def test_setup_logging_info() -> None:
|
||||
"""Test setup_logging with info mode."""
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.handlers.clear() # Clear existing handlers
|
||||
root_logger.setLevel(logging.WARNING) # Reset to default
|
||||
|
||||
setup_logging(debug=False)
|
||||
assert len(root_logger.handlers) > 0
|
||||
assert root_logger.level == logging.INFO
|
||||
|
||||
def test_setup_logging_formatter() -> None:
|
||||
"""Test logging formatter setup."""
|
||||
setup_logging(debug=True)
|
||||
logger = logging.getLogger()
|
||||
handlers = logger.handlers
|
||||
|
||||
# Check if there's at least one handler
|
||||
assert len(handlers) > 0
|
||||
|
||||
# Check formatter
|
||||
formatter = handlers[0].formatter
|
||||
assert formatter is not None
|
||||
|
||||
# Test format string
|
||||
test_record = logging.LogRecord(
|
||||
name='test',
|
||||
level=logging.DEBUG,
|
||||
pathname='test.py',
|
||||
lineno=1,
|
||||
msg='Test message',
|
||||
args=(),
|
||||
exc_info=None
|
||||
)
|
||||
formatted = formatter.format(test_record)
|
||||
assert 'Test message' in formatted
|
||||
assert test_record.levelname in formatted
|
||||
|
||||
def test_setup_logging_multiple_calls() -> None:
|
||||
"""Test that multiple calls to setup_logging don't create duplicate handlers."""
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.handlers.clear()
|
||||
|
||||
setup_logging(debug=True)
|
||||
initial_handler_count = len(root_logger.handlers)
|
||||
|
||||
# Call setup_logging again
|
||||
setup_logging(debug=True)
|
||||
assert len(root_logger.handlers) == \
|
||||
initial_handler_count, "Should not create duplicate handlers"
|
||||
|
||||
def test_setup_logging_level_change() -> None:
|
||||
"""Test changing log levels between setup_logging calls."""
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.handlers.clear()
|
||||
|
||||
# Start with debug
|
||||
setup_logging(debug=True)
|
||||
assert root_logger.level == logging.DEBUG
|
||||
|
||||
# Clear handlers before next setup
|
||||
root_logger.handlers.clear()
|
||||
|
||||
# Switch to info
|
||||
setup_logging(debug=False)
|
||||
assert root_logger.level == logging.INFO
|
||||
|
||||
def test_setup_logging_message_format() -> None:
|
||||
"""Test the actual format of logged messages."""
|
||||
setup_logging(debug=True)
|
||||
logger = logging.getLogger()
|
||||
|
||||
# Create a temporary handler to capture output
|
||||
log_capture = io.StringIO()
|
||||
handler = logging.StreamHandler(log_capture)
|
||||
# Use formatter that includes pathname
|
||||
handler.setFormatter(
|
||||
logging.Formatter('%(levelname)s %(name)s:%(pathname)s:%(lineno)d %(message)s')
|
||||
)
|
||||
logger.addHandler(handler)
|
||||
|
||||
# Ensure debug level is set
|
||||
logger.setLevel(logging.DEBUG)
|
||||
handler.setLevel(logging.DEBUG)
|
||||
|
||||
# Log a test message
|
||||
test_message = "Test log message"
|
||||
logger.debug(test_message)
|
||||
log_output = log_capture.getvalue()
|
||||
|
||||
# Verify format components
|
||||
assert test_message in log_output
|
||||
assert "DEBUG" in log_output
|
||||
assert "test_utils.py" in log_output
|
||||
|
||||
def test_setup_logging_error_messages() -> None:
|
||||
"""Test logging of error messages."""
|
||||
setup_logging(debug=False)
|
||||
logger = logging.getLogger()
|
||||
|
||||
# Create a temporary handler to capture output
|
||||
log_capture = io.StringIO()
|
||||
handler = logging.StreamHandler(log_capture)
|
||||
handler.setFormatter(logger.handlers[0].formatter)
|
||||
logger.addHandler(handler)
|
||||
|
||||
# Log an error message
|
||||
error_message = "Test error message"
|
||||
logger.error(error_message)
|
||||
log_output = log_capture.getvalue()
|
||||
|
||||
# Error messages should always be logged regardless of debug setting
|
||||
assert error_message in log_output
|
||||
assert "ERROR" in log_output
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__])
|
||||
Loading…
Add table
Add a link
Reference in a new issue