microsoft/hve-core

Public

mirrored fromhttps://github.com/microsoft/hve-coreAvailable

CodeCommitsIssuesPull requestsActionsInsightsSecurity
docs/transparency-note

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

.github/skills/experimental/powerpoint/scripts/embed_audio.py

233lines · modecode

1#!/usr/bin/env python3
2# Copyright (c) Microsoft Corporation.
3# SPDX-License-Identifier: MIT
4"""Embed WAV audio files into a PowerPoint deck, one per slide.
5
6Matches audio files to slides by naming convention (slide-001.wav → slide 1)
7and embeds each as an audio shape using python-pptx's add_movie API.
8
9Usage::
10
11 python embed_audio.py --input deck.pptx \
12 --audio-dir voice-over/ --output out.pptx
13 python embed_audio.py --input deck.pptx \
14 --audio-dir voice-over/ --output out.pptx \
15 --slides "1,3,5"
16 python embed_audio.py --input deck.pptx \
17 --audio-dir voice-over/ --output out.pptx -v
18"""
19
20from __future__ import annotations
21
22import argparse
23import io
24import logging
25import re
26import sys
27import tempfile
28from pathlib import Path
29
30from PIL import Image
31from pptx import Presentation
32from pptx.util import Inches
33from pptx_utils import (
34 EXIT_ERROR,
35 EXIT_FAILURE,
36 EXIT_SUCCESS,
37 configure_logging,
38 parse_slide_filter,
39)
40
41logger = logging.getLogger(__name__)
42
43AUDIO_PATTERN = re.compile(r"^slide-(\d+)\.wav$", re.IGNORECASE)
44
45AUDIO_LEFT = Inches(0.1)
46AUDIO_WIDTH = Inches(0.3)
47AUDIO_HEIGHT = Inches(0.3)
48AUDIO_OFFSCREEN_OFFSET = Inches(0.5)
49
50
51def create_parser() -> argparse.ArgumentParser:
52 """Create and configure argument parser."""
53 parser = argparse.ArgumentParser(
54 description="Embed WAV audio files into a PowerPoint deck"
55 )
56 parser.add_argument(
57 "--input", required=True, type=Path, help="Source PPTX file path"
58 )
59 parser.add_argument(
60 "--audio-dir", required=True, type=Path, help="Directory containing WAV files"
61 )
62 parser.add_argument(
63 "--output", required=True, type=Path, help="Output PPTX file path"
64 )
65 parser.add_argument(
66 "--slides",
67 help="Comma-separated slide numbers to embed audio on (1-based, default: all)",
68 )
69 parser.add_argument(
70 "-v", "--verbose", action="store_true", help="Enable verbose output"
71 )
72 return parser
73
74
75def discover_audio_files(audio_dir: Path) -> dict[int, Path]:
76 """Map slide numbers to WAV file paths found in the audio directory.
77
78 Scans for files matching the ``slide-NNN.wav`` naming convention.
79
80 Args:
81 audio_dir: Directory to scan for WAV files.
82
83 Returns:
84 Dictionary mapping 1-based slide numbers to their WAV file paths.
85 """
86 mapping: dict[int, Path] = {}
87 for entry in sorted(audio_dir.iterdir()):
88 if not entry.is_file():
89 continue
90 match = AUDIO_PATTERN.match(entry.name)
91 if match:
92 slide_num = int(match.group(1))
93 mapping[slide_num] = entry
94 logger.debug("Found audio for slide %d: %s", slide_num, entry.name)
95 return mapping
96
97
98def create_poster_frame() -> Path:
99 """Create a minimal 1x1 transparent PNG for the audio poster frame.
100
101 python-pptx's ``add_movie`` requires a poster frame image. This creates
102 a temporary transparent PNG so the audio shape has no visible thumbnail.
103
104 Returns:
105 Path to the temporary PNG file.
106 """
107 img = Image.new("RGBA", (1, 1), (0, 0, 0, 0))
108 buf = io.BytesIO()
109 img.save(buf, format="PNG")
110 buf.seek(0)
111 tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
112 tmp.write(buf.getvalue())
113 tmp.close()
114 return Path(tmp.name)
115
116
117def embed_audio(
118 prs: Presentation,
119 audio_map: dict[int, Path],
120 slide_filter: set[int] | None,
121 poster_frame: Path,
122) -> int:
123 """Embed WAV files into matching slides.
124
125 Args:
126 prs: Loaded Presentation object (modified in place).
127 audio_map: Mapping of 1-based slide numbers to WAV file paths.
128 slide_filter: Optional set of slide numbers to restrict embedding.
129 poster_frame: Path to the poster frame image for add_movie.
130
131 Returns:
132 Count of slides that received embedded audio.
133 """
134 embedded_count = 0
135 audio_top = prs.slide_height + AUDIO_OFFSCREEN_OFFSET
136 for slide_num, slide in enumerate(prs.slides, start=1):
137 if slide_filter and slide_num not in slide_filter:
138 continue
139 wav_path = audio_map.get(slide_num)
140 if not wav_path:
141 logger.debug("Slide %d: no audio file found, skipping", slide_num)
142 continue
143
144 # python-pptx does not expose a public audio-embedding API, so we use
145 # add_movie which creates a video relationship type. PowerPoint Desktop
146 # handles WAV media embedded this way correctly for narration timing and
147 # video export via "Use Recorded Timings and Narrations". Other viewers
148 # (LibreOffice, Google Slides) may display a video icon instead.
149 slide.shapes.add_movie(
150 movie_file=str(wav_path),
151 left=AUDIO_LEFT,
152 top=audio_top,
153 width=AUDIO_WIDTH,
154 height=AUDIO_HEIGHT,
155 poster_frame_image=str(poster_frame),
156 mime_type="audio/wav",
157 )
158 embedded_count += 1
159 logger.info("Slide %d: embedded %s", slide_num, wav_path.name)
160
161 return embedded_count
162
163
164def run(args: argparse.Namespace) -> int:
165 """Execute the audio embedding workflow.
166
167 Args:
168 args: Parsed command-line arguments.
169
170 Returns:
171 Exit code indicating success or failure.
172 """
173 input_path: Path = args.input
174 audio_dir: Path = args.audio_dir
175 output_path: Path = args.output
176
177 if not input_path.is_file():
178 logger.error("Input file not found: %s", input_path)
179 return EXIT_ERROR
180
181 if not audio_dir.is_dir():
182 logger.error("Audio directory not found: %s", audio_dir)
183 return EXIT_ERROR
184
185 slide_filter = parse_slide_filter(args.slides)
186
187 audio_map = discover_audio_files(audio_dir)
188 if not audio_map:
189 logger.warning("No slide-NNN.wav files found in %s", audio_dir)
190 return EXIT_FAILURE
191
192 logger.info("Discovered %d audio file(s) in %s", len(audio_map), audio_dir)
193
194 prs = Presentation(str(input_path))
195 total_slides = len(prs.slides)
196 logger.info("Opened %s (%d slides)", input_path.name, total_slides)
197
198 poster_frame = create_poster_frame()
199 try:
200 embedded = embed_audio(prs, audio_map, slide_filter, poster_frame)
201 finally:
202 poster_frame.unlink(missing_ok=True)
203
204 if embedded == 0:
205 logger.warning("No audio files matched any target slides")
206 return EXIT_FAILURE
207
208 output_path.parent.mkdir(parents=True, exist_ok=True)
209 prs.save(str(output_path))
210 logger.info("Saved %s with %d embedded audio track(s)", output_path, embedded)
211 return EXIT_SUCCESS
212
213
214def main() -> int:
215 """Main entry point for the script."""
216 parser = create_parser()
217 args = parser.parse_args()
218 configure_logging(args.verbose)
219 try:
220 return run(args)
221 except KeyboardInterrupt:
222 print("\nInterrupted by user", file=sys.stderr)
223 return 130
224 except BrokenPipeError:
225 sys.stderr.close()
226 return EXIT_FAILURE
227 except Exception as e:
228 logger.error("Unexpected error: %s", e)
229 return EXIT_FAILURE
230
231
232if __name__ == "__main__":
233 sys.exit(main())
234