microsoft/hve-core
Publicmirrored fromhttps://github.com/microsoft/hve-coreAvailable
.github/skills/experimental/powerpoint/scripts/embed_audio.py
233lines · modecode
| 1 | #!/usr/bin/env python3 |
| 2 | # Copyright (c) Microsoft Corporation. |
| 3 | # SPDX-License-Identifier: MIT |
| 4 | """Embed WAV audio files into a PowerPoint deck, one per slide. |
| 5 | |
| 6 | Matches audio files to slides by naming convention (slide-001.wav → slide 1) |
| 7 | and embeds each as an audio shape using python-pptx's add_movie API. |
| 8 | |
| 9 | Usage:: |
| 10 | |
| 11 | python embed_audio.py --input deck.pptx \ |
| 12 | --audio-dir voice-over/ --output out.pptx |
| 13 | python embed_audio.py --input deck.pptx \ |
| 14 | --audio-dir voice-over/ --output out.pptx \ |
| 15 | --slides "1,3,5" |
| 16 | python embed_audio.py --input deck.pptx \ |
| 17 | --audio-dir voice-over/ --output out.pptx -v |
| 18 | """ |
| 19 | |
| 20 | from __future__ import annotations |
| 21 | |
| 22 | import argparse |
| 23 | import io |
| 24 | import logging |
| 25 | import re |
| 26 | import sys |
| 27 | import tempfile |
| 28 | from pathlib import Path |
| 29 | |
| 30 | from PIL import Image |
| 31 | from pptx import Presentation |
| 32 | from pptx.util import Inches |
| 33 | from pptx_utils import ( |
| 34 | EXIT_ERROR, |
| 35 | EXIT_FAILURE, |
| 36 | EXIT_SUCCESS, |
| 37 | configure_logging, |
| 38 | parse_slide_filter, |
| 39 | ) |
| 40 | |
| 41 | logger = logging.getLogger(__name__) |
| 42 | |
| 43 | AUDIO_PATTERN = re.compile(r"^slide-(\d+)\.wav$", re.IGNORECASE) |
| 44 | |
| 45 | AUDIO_LEFT = Inches(0.1) |
| 46 | AUDIO_WIDTH = Inches(0.3) |
| 47 | AUDIO_HEIGHT = Inches(0.3) |
| 48 | AUDIO_OFFSCREEN_OFFSET = Inches(0.5) |
| 49 | |
| 50 | |
| 51 | def create_parser() -> argparse.ArgumentParser: |
| 52 | """Create and configure argument parser.""" |
| 53 | parser = argparse.ArgumentParser( |
| 54 | description="Embed WAV audio files into a PowerPoint deck" |
| 55 | ) |
| 56 | parser.add_argument( |
| 57 | "--input", required=True, type=Path, help="Source PPTX file path" |
| 58 | ) |
| 59 | parser.add_argument( |
| 60 | "--audio-dir", required=True, type=Path, help="Directory containing WAV files" |
| 61 | ) |
| 62 | parser.add_argument( |
| 63 | "--output", required=True, type=Path, help="Output PPTX file path" |
| 64 | ) |
| 65 | parser.add_argument( |
| 66 | "--slides", |
| 67 | help="Comma-separated slide numbers to embed audio on (1-based, default: all)", |
| 68 | ) |
| 69 | parser.add_argument( |
| 70 | "-v", "--verbose", action="store_true", help="Enable verbose output" |
| 71 | ) |
| 72 | return parser |
| 73 | |
| 74 | |
| 75 | def discover_audio_files(audio_dir: Path) -> dict[int, Path]: |
| 76 | """Map slide numbers to WAV file paths found in the audio directory. |
| 77 | |
| 78 | Scans for files matching the ``slide-NNN.wav`` naming convention. |
| 79 | |
| 80 | Args: |
| 81 | audio_dir: Directory to scan for WAV files. |
| 82 | |
| 83 | Returns: |
| 84 | Dictionary mapping 1-based slide numbers to their WAV file paths. |
| 85 | """ |
| 86 | mapping: dict[int, Path] = {} |
| 87 | for entry in sorted(audio_dir.iterdir()): |
| 88 | if not entry.is_file(): |
| 89 | continue |
| 90 | match = AUDIO_PATTERN.match(entry.name) |
| 91 | if match: |
| 92 | slide_num = int(match.group(1)) |
| 93 | mapping[slide_num] = entry |
| 94 | logger.debug("Found audio for slide %d: %s", slide_num, entry.name) |
| 95 | return mapping |
| 96 | |
| 97 | |
| 98 | def create_poster_frame() -> Path: |
| 99 | """Create a minimal 1x1 transparent PNG for the audio poster frame. |
| 100 | |
| 101 | python-pptx's ``add_movie`` requires a poster frame image. This creates |
| 102 | a temporary transparent PNG so the audio shape has no visible thumbnail. |
| 103 | |
| 104 | Returns: |
| 105 | Path to the temporary PNG file. |
| 106 | """ |
| 107 | img = Image.new("RGBA", (1, 1), (0, 0, 0, 0)) |
| 108 | buf = io.BytesIO() |
| 109 | img.save(buf, format="PNG") |
| 110 | buf.seek(0) |
| 111 | tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False) |
| 112 | tmp.write(buf.getvalue()) |
| 113 | tmp.close() |
| 114 | return Path(tmp.name) |
| 115 | |
| 116 | |
| 117 | def embed_audio( |
| 118 | prs: Presentation, |
| 119 | audio_map: dict[int, Path], |
| 120 | slide_filter: set[int] | None, |
| 121 | poster_frame: Path, |
| 122 | ) -> int: |
| 123 | """Embed WAV files into matching slides. |
| 124 | |
| 125 | Args: |
| 126 | prs: Loaded Presentation object (modified in place). |
| 127 | audio_map: Mapping of 1-based slide numbers to WAV file paths. |
| 128 | slide_filter: Optional set of slide numbers to restrict embedding. |
| 129 | poster_frame: Path to the poster frame image for add_movie. |
| 130 | |
| 131 | Returns: |
| 132 | Count of slides that received embedded audio. |
| 133 | """ |
| 134 | embedded_count = 0 |
| 135 | audio_top = prs.slide_height + AUDIO_OFFSCREEN_OFFSET |
| 136 | for slide_num, slide in enumerate(prs.slides, start=1): |
| 137 | if slide_filter and slide_num not in slide_filter: |
| 138 | continue |
| 139 | wav_path = audio_map.get(slide_num) |
| 140 | if not wav_path: |
| 141 | logger.debug("Slide %d: no audio file found, skipping", slide_num) |
| 142 | continue |
| 143 | |
| 144 | # python-pptx does not expose a public audio-embedding API, so we use |
| 145 | # add_movie which creates a video relationship type. PowerPoint Desktop |
| 146 | # handles WAV media embedded this way correctly for narration timing and |
| 147 | # video export via "Use Recorded Timings and Narrations". Other viewers |
| 148 | # (LibreOffice, Google Slides) may display a video icon instead. |
| 149 | slide.shapes.add_movie( |
| 150 | movie_file=str(wav_path), |
| 151 | left=AUDIO_LEFT, |
| 152 | top=audio_top, |
| 153 | width=AUDIO_WIDTH, |
| 154 | height=AUDIO_HEIGHT, |
| 155 | poster_frame_image=str(poster_frame), |
| 156 | mime_type="audio/wav", |
| 157 | ) |
| 158 | embedded_count += 1 |
| 159 | logger.info("Slide %d: embedded %s", slide_num, wav_path.name) |
| 160 | |
| 161 | return embedded_count |
| 162 | |
| 163 | |
| 164 | def run(args: argparse.Namespace) -> int: |
| 165 | """Execute the audio embedding workflow. |
| 166 | |
| 167 | Args: |
| 168 | args: Parsed command-line arguments. |
| 169 | |
| 170 | Returns: |
| 171 | Exit code indicating success or failure. |
| 172 | """ |
| 173 | input_path: Path = args.input |
| 174 | audio_dir: Path = args.audio_dir |
| 175 | output_path: Path = args.output |
| 176 | |
| 177 | if not input_path.is_file(): |
| 178 | logger.error("Input file not found: %s", input_path) |
| 179 | return EXIT_ERROR |
| 180 | |
| 181 | if not audio_dir.is_dir(): |
| 182 | logger.error("Audio directory not found: %s", audio_dir) |
| 183 | return EXIT_ERROR |
| 184 | |
| 185 | slide_filter = parse_slide_filter(args.slides) |
| 186 | |
| 187 | audio_map = discover_audio_files(audio_dir) |
| 188 | if not audio_map: |
| 189 | logger.warning("No slide-NNN.wav files found in %s", audio_dir) |
| 190 | return EXIT_FAILURE |
| 191 | |
| 192 | logger.info("Discovered %d audio file(s) in %s", len(audio_map), audio_dir) |
| 193 | |
| 194 | prs = Presentation(str(input_path)) |
| 195 | total_slides = len(prs.slides) |
| 196 | logger.info("Opened %s (%d slides)", input_path.name, total_slides) |
| 197 | |
| 198 | poster_frame = create_poster_frame() |
| 199 | try: |
| 200 | embedded = embed_audio(prs, audio_map, slide_filter, poster_frame) |
| 201 | finally: |
| 202 | poster_frame.unlink(missing_ok=True) |
| 203 | |
| 204 | if embedded == 0: |
| 205 | logger.warning("No audio files matched any target slides") |
| 206 | return EXIT_FAILURE |
| 207 | |
| 208 | output_path.parent.mkdir(parents=True, exist_ok=True) |
| 209 | prs.save(str(output_path)) |
| 210 | logger.info("Saved %s with %d embedded audio track(s)", output_path, embedded) |
| 211 | return EXIT_SUCCESS |
| 212 | |
| 213 | |
| 214 | def main() -> int: |
| 215 | """Main entry point for the script.""" |
| 216 | parser = create_parser() |
| 217 | args = parser.parse_args() |
| 218 | configure_logging(args.verbose) |
| 219 | try: |
| 220 | return run(args) |
| 221 | except KeyboardInterrupt: |
| 222 | print("\nInterrupted by user", file=sys.stderr) |
| 223 | return 130 |
| 224 | except BrokenPipeError: |
| 225 | sys.stderr.close() |
| 226 | return EXIT_FAILURE |
| 227 | except Exception as e: |
| 228 | logger.error("Unexpected error: %s", e) |
| 229 | return EXIT_FAILURE |
| 230 | |
| 231 | |
| 232 | if __name__ == "__main__": |
| 233 | sys.exit(main()) |
| 234 | |