microsoft/hve-core
Publicmirrored fromhttps://github.com/microsoft/hve-coreAvailable
.github/skills/experimental/powerpoint/scripts/generate_themes.py
332lines · modecode
| 1 | #!/usr/bin/env python3 |
| 2 | # Copyright (c) Microsoft Corporation. |
| 3 | # SPDX-License-Identifier: MIT |
| 4 | """Generate themed content directory variants from a base deck's content. |
| 5 | |
| 6 | Reads a themes.yaml color mapping file and produces a complete content |
| 7 | directory copy for each theme with all hex colors remapped in YAML and |
| 8 | Python files while copying images as-is. |
| 9 | |
| 10 | Usage:: |
| 11 | |
| 12 | python generate_themes.py --content-dir content/ \ |
| 13 | --themes themes.yaml --output-dir ../ |
| 14 | """ |
| 15 | |
| 16 | from __future__ import annotations |
| 17 | |
| 18 | import argparse |
| 19 | import logging |
| 20 | import re |
| 21 | import shutil |
| 22 | import sys |
| 23 | from pathlib import Path |
| 24 | from typing import Any |
| 25 | |
| 26 | from pptx_utils import ( |
| 27 | EXIT_ERROR, |
| 28 | EXIT_FAILURE, |
| 29 | EXIT_SUCCESS, |
| 30 | configure_logging, |
| 31 | ) |
| 32 | |
| 33 | # ruamel.yaml is used intentionally for round-trip fidelity in |
| 34 | # update_style_metadata: preserves comments, key ordering, and quoting |
| 35 | # style when patching style.yaml files. pyyaml cannot preserve these. |
| 36 | from ruamel.yaml import YAML |
| 37 | |
| 38 | logger = logging.getLogger(__name__) |
| 39 | |
| 40 | |
| 41 | def create_parser() -> argparse.ArgumentParser: |
| 42 | """Create and configure argument parser.""" |
| 43 | parser = argparse.ArgumentParser( |
| 44 | description="Generate themed content directory variants from a base deck." |
| 45 | ) |
| 46 | parser.add_argument( |
| 47 | "--content-dir", |
| 48 | type=Path, |
| 49 | required=True, |
| 50 | help="Path to the base theme's content directory.", |
| 51 | ) |
| 52 | parser.add_argument( |
| 53 | "--themes", |
| 54 | type=Path, |
| 55 | required=True, |
| 56 | help="Path to a YAML file defining theme color mappings.", |
| 57 | ) |
| 58 | parser.add_argument( |
| 59 | "--output-dir", |
| 60 | type=Path, |
| 61 | required=True, |
| 62 | help="Parent directory where themed content directories are created.", |
| 63 | ) |
| 64 | parser.add_argument( |
| 65 | "-v", "--verbose", action="store_true", help="Enable verbose output" |
| 66 | ) |
| 67 | return parser |
| 68 | |
| 69 | |
| 70 | def load_themes(themes_path: Path) -> dict[str, Any]: |
| 71 | """Load and validate the themes YAML file. |
| 72 | |
| 73 | Returns the ``themes`` mapping keyed by theme-id. |
| 74 | """ |
| 75 | hex6_re = re.compile(r"^#?[0-9A-Fa-f]{6}$") |
| 76 | ryaml = YAML(typ="safe") |
| 77 | data = ryaml.load(themes_path.read_text(encoding="utf-8")) |
| 78 | if not isinstance(data, dict) or "themes" not in data: |
| 79 | raise ValueError("themes YAML must contain a top-level 'themes' key") |
| 80 | themes = data["themes"] |
| 81 | for theme_id, cfg in themes.items(): |
| 82 | if "colors" not in cfg or not isinstance(cfg["colors"], dict): |
| 83 | raise ValueError(f"Theme '{theme_id}' must contain a 'colors' mapping") |
| 84 | for k, v in cfg["colors"].items(): |
| 85 | if not isinstance(k, str) or not isinstance(v, str): |
| 86 | raise ValueError( |
| 87 | f"Theme '{theme_id}' color map keys and values must be " |
| 88 | f"strings; got {k!r}: {v!r}" |
| 89 | ) |
| 90 | if not hex6_re.match(k) or not hex6_re.match(v): |
| 91 | raise ValueError( |
| 92 | f"Theme '{theme_id}' color entry {k!r}: {v!r} " |
| 93 | "must be 6-character hex strings (with optional # prefix)" |
| 94 | ) |
| 95 | return themes |
| 96 | |
| 97 | |
| 98 | def remap_hex_in_text(text: str, color_map: dict[str, str]) -> str: |
| 99 | """Replace ``#RRGGBB`` hex color values using *color_map*. |
| 100 | |
| 101 | Uses a single-pass regex callback to avoid chain remapping where |
| 102 | one substitution's output feeds the next (e.g., A→B then B→C |
| 103 | would incorrectly produce C instead of the intended B). |
| 104 | |
| 105 | Keys and values in *color_map* may optionally include the leading ``#``; |
| 106 | the prefix is stripped before matching. Matching is case-insensitive. |
| 107 | """ |
| 108 | bare_map = {k.lstrip("#").lower(): v.lstrip("#") for k, v in color_map.items()} |
| 109 | invalid = {k: v for k, v in bare_map.items() if len(k) != 6 or len(v) != 6} |
| 110 | if invalid: |
| 111 | raise ValueError( |
| 112 | f"Color map entries must be 6-character hex strings; invalid: {invalid}" |
| 113 | ) |
| 114 | if not bare_map: |
| 115 | return text |
| 116 | pattern = re.compile( |
| 117 | r"#(" + "|".join(re.escape(k) for k in bare_map) + r")", |
| 118 | re.IGNORECASE, |
| 119 | ) |
| 120 | return pattern.sub(lambda m: f"#{bare_map[m.group(1).lower()]}", text) |
| 121 | |
| 122 | |
| 123 | def remap_rgb_in_python(text: str, color_map: dict[str, str]) -> str: |
| 124 | """Replace ``RGBColor(0xRR, 0xGG, 0xBB)``, ``"#RRGGBB"``, and |
| 125 | ``'#RRGGBB'`` patterns. |
| 126 | |
| 127 | Uses a single-pass regex callback to avoid chain remapping where |
| 128 | one substitution's output feeds the next. |
| 129 | |
| 130 | Keys and values in *color_map* may optionally include the leading ``#``; |
| 131 | the prefix is stripped before matching. |
| 132 | |
| 133 | Note: Replacement output is always uppercase hex (e.g. ``#1B1B1F``) |
| 134 | regardless of the original casing in the source file. |
| 135 | """ |
| 136 | bare_map: dict[str, str] = {} |
| 137 | for old_hex, new_hex in color_map.items(): |
| 138 | old_bare = old_hex.lstrip("#").upper() |
| 139 | bare_map[old_bare] = new_hex.lstrip("#").upper() |
| 140 | |
| 141 | invalid = {k: v for k, v in bare_map.items() if len(k) != 6 or len(v) != 6} |
| 142 | if invalid: |
| 143 | raise ValueError( |
| 144 | f"Color map entries must be 6-character hex strings; invalid: {invalid}" |
| 145 | ) |
| 146 | |
| 147 | if not bare_map: |
| 148 | return text |
| 149 | |
| 150 | def _rgb_pattern(hex6: str) -> str: |
| 151 | r = int(hex6[0:2], 16) |
| 152 | g = int(hex6[2:4], 16) |
| 153 | b = int(hex6[4:6], 16) |
| 154 | return rf"RGBColor\(\s*0x{r:02X}\s*,\s*0x{g:02X}\s*,\s*0x{b:02X}\s*\)" |
| 155 | |
| 156 | def _hex_pattern_double(hex6: str) -> str: |
| 157 | return rf'"#{re.escape(hex6)}"' |
| 158 | |
| 159 | def _hex_pattern_single(hex6: str) -> str: |
| 160 | return rf"'#{re.escape(hex6)}'" |
| 161 | |
| 162 | # Build combined pattern matching RGBColor(...), "#RRGGBB", and '#RRGGBB' |
| 163 | rgb_parts = [f"({_rgb_pattern(k)})" for k in bare_map] |
| 164 | hex_dbl_parts = [f"({_hex_pattern_double(k)})" for k in bare_map] |
| 165 | hex_sgl_parts = [f"({_hex_pattern_single(k)})" for k in bare_map] |
| 166 | combined = re.compile( |
| 167 | "|".join(rgb_parts + hex_dbl_parts + hex_sgl_parts), re.IGNORECASE |
| 168 | ) |
| 169 | |
| 170 | keys = list(bare_map.keys()) |
| 171 | n = len(keys) |
| 172 | |
| 173 | def _replace(m: re.Match) -> str: |
| 174 | for i, k in enumerate(keys): |
| 175 | # Groups 1..n are RGBColor, n+1..2n double-quoted, 2n+1..3n single-quoted |
| 176 | if m.group(i + 1) is not None: |
| 177 | v = bare_map[k] |
| 178 | r = int(v[0:2], 16) |
| 179 | g = int(v[2:4], 16) |
| 180 | b = int(v[4:6], 16) |
| 181 | return f"RGBColor(0x{r:02X}, 0x{g:02X}, 0x{b:02X})" |
| 182 | if m.group(n + i + 1) is not None: |
| 183 | return f'"#{bare_map[k]}"' |
| 184 | if m.group(2 * n + i + 1) is not None: |
| 185 | return f"'#{bare_map[k]}'" |
| 186 | return m.group(0) |
| 187 | |
| 188 | return combined.sub(_replace, text) |
| 189 | |
| 190 | |
| 191 | def process_file(src: Path, dest: Path, color_map: dict[str, str]) -> None: |
| 192 | """Copy *src* to *dest*, remapping colors for YAML and Python files.""" |
| 193 | if src.suffix == ".yaml": |
| 194 | text = src.read_text(encoding="utf-8") |
| 195 | text = remap_hex_in_text(text, color_map) |
| 196 | dest.write_text(text, encoding="utf-8") |
| 197 | elif src.suffix == ".py": |
| 198 | text = src.read_text(encoding="utf-8") |
| 199 | # remap_rgb_in_python handles both RGBColor(...) and "#RRGGBB" quoted |
| 200 | # forms in a single pass; skip remap_hex_in_text to avoid chain remap |
| 201 | text = remap_rgb_in_python(text, color_map) |
| 202 | dest.write_text(text, encoding="utf-8") |
| 203 | else: |
| 204 | shutil.copy2(src, dest) |
| 205 | |
| 206 | |
| 207 | def process_directory(src_dir: Path, dest_dir: Path, color_map: dict[str, str]) -> None: |
| 208 | """Recursively process *src_dir* into *dest_dir*, remapping colors.""" |
| 209 | dest_dir.mkdir(parents=True, exist_ok=True) |
| 210 | for entry in sorted(src_dir.iterdir()): |
| 211 | dest_entry = dest_dir / entry.name |
| 212 | if entry.is_dir(): |
| 213 | process_directory(entry, dest_entry, color_map) |
| 214 | elif entry.is_file(): |
| 215 | process_file(entry, dest_entry, color_map) |
| 216 | |
| 217 | |
| 218 | def update_style_metadata(style_path: Path, theme_id: str, label: str) -> None: |
| 219 | """Patch theme name and append label to title in style.yaml. |
| 220 | |
| 221 | Uses ruamel.yaml for round-trip fidelity: preserves comments, |
| 222 | key ordering, and quoting style from the original file. |
| 223 | """ |
| 224 | if not style_path.exists(): |
| 225 | return |
| 226 | ryaml = YAML() # RoundTripLoader: preserves comments, ordering, and quoting |
| 227 | ryaml.preserve_quotes = True |
| 228 | data = ryaml.load(style_path.read_text(encoding="utf-8")) |
| 229 | if not isinstance(data, dict): |
| 230 | return |
| 231 | |
| 232 | # Update theme name in the themes list |
| 233 | themes = data.get("themes", []) |
| 234 | if isinstance(themes, list) and themes: |
| 235 | first = themes[0] |
| 236 | if isinstance(first, dict): |
| 237 | first["name"] = theme_id |
| 238 | |
| 239 | # Append theme label to metadata title |
| 240 | metadata = data.get("metadata", {}) |
| 241 | if isinstance(metadata, dict): |
| 242 | title = metadata.get("title", "") |
| 243 | if label not in title: |
| 244 | metadata["title"] = f"{title} ({label})" if title else label |
| 245 | |
| 246 | with style_path.open("w", encoding="utf-8") as f: |
| 247 | ryaml.dump(data, f) |
| 248 | |
| 249 | |
| 250 | def generate_theme( |
| 251 | content_dir: Path, |
| 252 | output_dir: Path, |
| 253 | deck_name: str, |
| 254 | theme_id: str, |
| 255 | theme_config: dict, |
| 256 | ) -> Path: |
| 257 | """Generate a complete themed copy of *content_dir*.""" |
| 258 | color_map = theme_config["colors"] |
| 259 | label = theme_config.get("label", theme_id) |
| 260 | |
| 261 | # Sanitize theme_id to prevent path traversal via malformed YAML. |
| 262 | safe_id = re.sub(r"[^a-zA-Z0-9_\-]", "_", theme_id) |
| 263 | output_base = output_dir / f"{deck_name}-{safe_id}" |
| 264 | output_content = output_base / "content" |
| 265 | output_deck = output_base / "slide-deck" |
| 266 | |
| 267 | if output_content.exists(): |
| 268 | shutil.rmtree(output_content) |
| 269 | |
| 270 | process_directory(content_dir, output_content, color_map) |
| 271 | |
| 272 | output_deck.mkdir(parents=True, exist_ok=True) |
| 273 | (output_deck / ".gitkeep").touch() |
| 274 | |
| 275 | # Patch style.yaml metadata inside the themed content |
| 276 | style_candidates = [ |
| 277 | output_content / "global" / "style.yaml", |
| 278 | output_content / "style.yaml", |
| 279 | ] |
| 280 | for style_path in style_candidates: |
| 281 | update_style_metadata(style_path, theme_id, label) |
| 282 | |
| 283 | logger.info("Generated: %s/", output_base.name) |
| 284 | return output_base |
| 285 | |
| 286 | |
| 287 | def run(args: argparse.Namespace) -> int: |
| 288 | """Execute theme generation.""" |
| 289 | content_dir = args.content_dir.resolve() |
| 290 | themes_path = args.themes.resolve() |
| 291 | output_dir = args.output_dir.resolve() |
| 292 | |
| 293 | if not content_dir.is_dir(): |
| 294 | logger.error("Content directory does not exist: %s", content_dir) |
| 295 | return EXIT_ERROR |
| 296 | if not themes_path.is_file(): |
| 297 | logger.error("Themes file does not exist: %s", themes_path) |
| 298 | return EXIT_ERROR |
| 299 | |
| 300 | themes = load_themes(themes_path) |
| 301 | deck_name = content_dir.parent.name |
| 302 | output_dir.mkdir(parents=True, exist_ok=True) |
| 303 | |
| 304 | logger.info("Generating %d themed variant(s) for '%s' ...", len(themes), deck_name) |
| 305 | |
| 306 | for theme_id, theme_config in themes.items(): |
| 307 | generate_theme(content_dir, output_dir, deck_name, theme_id, theme_config) |
| 308 | |
| 309 | logger.info("All themes generated successfully.") |
| 310 | return EXIT_SUCCESS |
| 311 | |
| 312 | |
| 313 | def main() -> int: |
| 314 | """Main entry point.""" |
| 315 | parser = create_parser() |
| 316 | args = parser.parse_args() |
| 317 | configure_logging(args.verbose) |
| 318 | try: |
| 319 | return run(args) |
| 320 | except KeyboardInterrupt: |
| 321 | print("\nInterrupted by user", file=sys.stderr) |
| 322 | return 130 |
| 323 | except BrokenPipeError: |
| 324 | sys.stderr.close() |
| 325 | return EXIT_FAILURE |
| 326 | except Exception as e: |
| 327 | logger.error("%s", e) |
| 328 | return EXIT_FAILURE |
| 329 | |
| 330 | |
| 331 | if __name__ == "__main__": |
| 332 | sys.exit(main()) |
| 333 | |