Cybrkyd's Git Repositories

GitGen - commit: c89ddb8

commit c89ddb8a2170ab59b93eb65b084f60efec81ca8a68bf5668b30476ff112a0c10
author cybrkyd <git@cybrkyd.com> 2026-03-05 10:33:13 +0000
committer cybrkyd <git@cybrkyd.com> 2026-03-05 10:33:13 +0000
v2.0

Commit Message

Version 2 - Site generation cache

- Cached builds workflow
- Cache tracking in gitgen-cache.json with file path + HEAD as reference
- README updated

📊 Diffstat

README.md 23
gitgen.py 97
2 files changed, 108 insertions(+), 12 deletions(-)

Diff

diff --git a/README.md b/README.md
index 00fba2d..f6aa7fd 100644
--- a/README.md
+++ b/README.md
@@ -13,15 +13,16 @@ GitGen generates a static HTML site with the following capabilities:
- Repository overview pages with rendered README files
- Markdown rendering for README.md files
- File tree listings based on the current `HEAD`
- - Commit history (last 50 commits per repository)
+ - Commit history (configurable last 50 commits per repository)
- Detailed commit pages with:
- Full commit metadata
- Tags pointing at relevant commits
- Diffstat summary
- - Unified diff view (truncated to 500 lines)
+ - Unified diff view (truncated to 500 lines(configurable))
- Safe HTML escaping throughout
- SEO meta
- Parallel repository processing for faster generation
+ - Version 2 introduced site generation caching for even faster builds and deployment.
All output is static HTML and CSS, suitable for hosting on any static web server.
@@ -60,6 +61,19 @@ python3 gitgen.py
Open `git-website/index.html` in a browser or deploy the directory contents to a static web host.
+ ## Caching
+
+ Version 2 introduced site generation caching. A log `gitgen-cache.json` is created at first run which keeps track of the state of each repository by file path and HEAD hash. At subsequent runs, only repositories with new HEAD hashes (commits) are regenerated in full. Any new repositories are also added to the cache log and tracked moving forward.
+
+ - The log file `gitgen-cache.json` is output to the same directory as `gitgen.py`.
+ - Deleted repositories **need to be removed manually** from `gitgen-cache.json`.
+
+ The advantage is two-fold:
+ 1. The caching removes the need to regenerate the entire site (all repos) at each run, ensuring faster build times.
+ 2. When deploying to the remote web host, only changes are uploaded. This is assuming a solution like `rsync` or `ftp` is used, both of which are capable of transferring only newer local files to the remote web host.
+
+ The bandwidth savings on the remote deployment is significant, especially with a large number of repositories.
+
## Directory Structure
The generated output has the following structure:
@@ -88,14 +102,17 @@ GitGen looks for README files in the following order:
- README.txt
- README
- readme.md
+ - readme.txt
Markdown READMEs are rendered using Python-Markdown with these extensions enabled:
- tables
- sane_lists
- fenced_code
+ - toc
+ - attr_list
- Non-Markdown READMEs are rendered as escaped pre-formatted text.
+ Non-Markdown READMEs are rendered as escaped, pre-formatted text.
## Licence
diff --git a/gitgen.py b/gitgen.py
index e357fbc..ab6d11b 100644
--- a/gitgen.py
+++ b/gitgen.py
@@ -8,6 +8,7 @@ import sys
import subprocess
import datetime
import html
+ import json
from pathlib import Path
from typing import List, Dict, Optional, DefaultDict
from collections import defaultdict
@@ -19,6 +20,7 @@ from concurrent.futures import ThreadPoolExecutor
BASE_NAME = "Cybrkyd's Git Repositories"
BASE_URL = "https://git.cybrkyd.com"
BASE_DESC = "Cybrkyd's code, commits and diffs playground."
+ CACHE_FILE = "gitgen-cache.json" # Cache file stored in script directory
class GitRepoScanner:
"""Scans and processes Git repositories"""
@@ -43,6 +45,10 @@ class GitRepoScanner:
except Exception:
return None
+ def get_current_head(self, repo_path: Path) -> Optional[str]:
+ """Get the current HEAD commit hash for a repository"""
+ return self.run_git_command(repo_path, ['rev-parse', 'HEAD'])
+
def find_git_repos(self) -> List[Dict]:
self.repos = []
for item in self.base_path.iterdir():
@@ -123,7 +129,7 @@ class GitRepoScanner:
return None
def get_all_contributions(self) -> Dict[str, int]:
- """Collect all commits from all repos for the past year"""
+ """Collect commits from all repos for the past year"""
contributions = defaultdict(int)
for item in self.base_path.iterdir():
@@ -942,6 +948,38 @@ class HTMLGenerator:
return "".join(html_fragments)
+ def load_cache(cache_path: Path) -> Dict:
+ """Load the generation cache from JSON file"""
+ if cache_path.exists():
+ try:
+ with open(cache_path, 'r') as f:
+ return json.load(f)
+ except (json.JSONDecodeError, IOError) as e:
+ print(f"Warning: Could not load cache file: {e}")
+ return {}
+ return {}
+
+ def save_cache(cache_path: Path, cache_data: Dict):
+ """Save the generation cache to JSON file"""
+ try:
+ with open(cache_path, 'w') as f:
+ json.dump(cache_data, f, indent=2)
+ except IOError as e:
+ print(f"Warning: Could not save cache file: {e}")
+
+ def needs_update(repo_info: Dict, cached_head: Optional[str], scanner: GitRepoScanner) -> bool:
+ """Check if a repository needs to be regenerated"""
+ current_head = scanner.get_current_head(repo_info['path'])
+ if not current_head:
+ print(f"Warning: Could not get HEAD for {repo_info['name']}, will regenerate")
+ return True
+
+ # Store current head in repo info for later use
+ repo_info['current_head'] = current_head.strip()
+
+ # If no cached head or heads differ, needs update
+ return not cached_head or cached_head != repo_info['current_head']
+
def process_repo(repo, scanner, output_dir):
repo_dir = output_dir / repo['name']
repo_dir.mkdir(exist_ok=True)
@@ -973,18 +1011,59 @@ def process_repo(repo, scanner, output_dir):
def main():
scanner = GitRepoScanner()
- generator = HTMLGenerator(scanner)
- repos = scanner.find_git_repos()
- if not repos:
+
+ # Find all repositories
+ all_repos = scanner.find_git_repos()
+ if not all_repos:
+ print("No git repositories found.")
sys.exit(1)
- out = generator.output_dir
- (out / "index.html").write_text(generator.generate_index(repos), encoding='utf-8')
+ # Setup output directory and cache
+ generator = HTMLGenerator(scanner)
+ cache_path = Path.cwd() / CACHE_FILE
+
+ # Load cache
+ cache = load_cache(cache_path)
+ repos_cache = cache.get('repositories', {})
+
+ # Determine which repos need processing
+ repos_to_process = []
+ skipped_count = 0
- with ThreadPoolExecutor() as executor:
- executor.map(lambda r: process_repo(r, scanner, out), repos)
+ for repo in all_repos:
+ cached_info = repos_cache.get(repo['name'], {})
+ cached_head = cached_info.get('head')
+
+ if needs_update(repo, cached_head, scanner):
+ repos_to_process.append(repo)
+ else:
+ skipped_count += 1
+
+ if repos_to_process:
+ # Always generate main index
+ (generator.output_dir / "index.html").write_text(
+ generator.generate_index(all_repos), encoding='utf-8'
+ )
- print("\n Website generation completed.")
+ # Process repos that need updates
+ with ThreadPoolExecutor() as executor:
+ executor.map(lambda r: process_repo(r, scanner, generator.output_dir), repos_to_process)
+
+ # Update cache with new HEADs for processed repos
+ for repo in repos_to_process:
+ if 'current_head' in repo:
+ repos_cache[repo['name']] = {
+ 'path': str(repo['path']),
+ 'head': repo['current_head']
+ }
+
+ # Save updated cache
+ cache['repositories'] = repos_cache
+ save_cache(cache_path, cache)
+
+ print("\nWebsite generation completed.")
+ else:
+ print("\nNo repositories needed updating.")
if __name__ == "__main__":
main()