openai/openai-python

Public

mirrored fromhttps://github.com/openai/openai-pythonAvailable

Watch0 Fork0 Star0

Code Commits Issues Pull requests Actions Insights Security

dev/codex/package-manager-safety-dry-run

Find a branch or tag

Branches

dev/codex/package-manager-safety-dry-run

Clone

HTTPS

Download ZIP

openai-python/examples/realtime

examples/realtime/push_to_talk_app.py

291lines · modecode

Raw Download

Latest commit unavailable.

unknown

1	`#!/usr/bin/env uv run`
2	`####################################################################`
3	`# Sample TUI app with a push to talk interface to the Realtime API #`
4	# If you have `uv` installed and the `OPENAI_API_KEY` #
5	`# environment variable set, you can run this example with just #`
6	`# #`
7	# `./examples/realtime/push_to_talk_app.py` #
8	`# #`
9	# On Mac, you'll also need `brew install portaudio ffmpeg` #
10	`####################################################################`
11	`#`
12	`# /// script`
13	`# requires-python = ">=3.9"`
14	`# dependencies = [`
15	`# "textual",`
16	`# "numpy",`
17	`# "pyaudio",`
18	`# "pydub",`
19	`# "sounddevice",`
20	`# "openai[realtime]",`
21	`# ]`
22	`#`
23	`# [tool.uv.sources]`
24	`# openai = { path = "../../", editable = true }`
25	`# ///`
26	`from __future__ import annotations`
27
28	`import base64`
29	`import asyncio`
30	`from typing import Any, cast`
31	`from typing_extensions import override`
32
33	`from textual import events`
34	`from audio_util import CHANNELS, SAMPLE_RATE, AudioPlayerAsync`
35	`from textual.app import App, ComposeResult`
36	`from textual.widgets import Button, Static, RichLog`
37	`from textual.reactive import reactive`
38	`from textual.containers import Container`
39
40	`from openai import AsyncOpenAI`
41	`from openai.types.realtime.session import Session`
42	`from openai.resources.realtime.realtime import AsyncRealtimeConnection`
43
44
45	`class SessionDisplay(Static):`
46	`"""A widget that shows the current session ID."""`
47
48	`session_id = reactive("")`
49
50	`@override`
51	`def render(self) -> str:`
52	`return f"Session ID: {self.session_id}" if self.session_id else "Connecting..."`
53
54
55	`class AudioStatusIndicator(Static):`
56	`"""A widget that shows the current audio recording status."""`
57
58	`is_recording = reactive(False)`
59
60	`@override`
61	`def render(self) -> str:`
62	`status = (`
63	`"🔴 Recording... (Press K to stop)" if self.is_recording else "⚪ Press K to start recording (Q to quit)"`
64	`)`
65	`return status`
66
67
68	`class RealtimeApp(App[None]):`
69	`CSS = """`
70	`Screen {`
71	`background: #1a1b26; /* Dark blue-grey background */`
72	`}`
73
74	`Container {`
75	`border: double rgb(91, 164, 91);`
76	`}`
77
78	`Horizontal {`
79	`width: 100%;`
80	`}`
81
82	`#input-container {`
83	`height: 5; /* Explicit height for input container */`
84	`margin: 1 1;`
85	`padding: 1 2;`
86	`}`
87
88	`Input {`
89	`width: 80%;`
90	`height: 3; /* Explicit height for input */`
91	`}`
92
93	`Button {`
94	`width: 20%;`
95	`height: 3; /* Explicit height for button */`
96	`}`
97
98	`#bottom-pane {`
99	`width: 100%;`
100	`height: 82%; /* Reduced to make room for session display */`
101	`border: round rgb(205, 133, 63);`
102	`content-align: center middle;`
103	`}`
104
105	`#status-indicator {`
106	`height: 3;`
107	`content-align: center middle;`
108	`background: #2a2b36;`
109	`border: solid rgb(91, 164, 91);`
110	`margin: 1 1;`
111	`}`
112
113	`#session-display {`
114	`height: 3;`
115	`content-align: center middle;`
116	`background: #2a2b36;`
117	`border: solid rgb(91, 164, 91);`
118	`margin: 1 1;`
119	`}`
120
121	`Static {`
122	`color: white;`
123	`}`
124	`"""`
125
126	`client: AsyncOpenAI`
127	`should_send_audio: asyncio.Event`
128	`audio_player: AudioPlayerAsync`
129	`last_audio_item_id: str \| None`
130	`connection: AsyncRealtimeConnection \| None`
131	`session: Session \| None`
132	`connected: asyncio.Event`
133
134	`def __init__(self) -> None:`
135	`super().__init__()`
136	`self.connection = None`
137	`self.session = None`
138	`self.client = AsyncOpenAI()`
139	`self.audio_player = AudioPlayerAsync()`
140	`self.last_audio_item_id = None`
141	`self.should_send_audio = asyncio.Event()`
142	`self.connected = asyncio.Event()`
143
144	`@override`
145	`def compose(self) -> ComposeResult:`
146	`"""Create child widgets for the app."""`
147	`with Container():`
148	`yield SessionDisplay(id="session-display")`
149	`yield AudioStatusIndicator(id="status-indicator")`
150	`yield RichLog(id="bottom-pane", wrap=True, highlight=True, markup=True)`
151
152	`async def on_mount(self) -> None:`
153	`self.run_worker(self.handle_realtime_connection())`
154	`self.run_worker(self.send_mic_audio())`
155
156	`async def handle_realtime_connection(self) -> None:`
157	`async with self.client.realtime.connect(model="gpt-realtime") as conn:`
158	`self.connection = conn`
159	`self.connected.set()`
160
161	`# note: this is the default and can be omitted`
162	# if you want to manually handle VAD yourself, then set `'turn_detection': None`
163	`await conn.session.update(`
164	`session={`
165	`"audio": {`
166	`"input": {"turn_detection": {"type": "server_vad"}},`
167	`},`
168	`"model": "gpt-realtime",`
169	`"type": "realtime",`
170	`}`
171	`)`
172
173	`acc_items: dict[str, Any] = {}`
174
175	`async for event in conn:`
176	`if event.type == "session.created":`
177	`self.session = event.session`
178	`session_display = self.query_one(SessionDisplay)`
179	`assert event.session.id is not None`
180	`session_display.session_id = event.session.id`
181	`continue`
182
183	`if event.type == "session.updated":`
184	`self.session = event.session`
185	`continue`
186
187	`if event.type == "response.output_audio.delta":`
188	`if event.item_id != self.last_audio_item_id:`
189	`self.audio_player.reset_frame_count()`
190	`self.last_audio_item_id = event.item_id`
191
192	`bytes_data = base64.b64decode(event.delta)`
193	`self.audio_player.add_data(bytes_data)`
194	`continue`
195
196	`if event.type == "response.output_audio_transcript.delta":`
197	`try:`
198	`text = acc_items[event.item_id]`
199	`except KeyError:`
200	`acc_items[event.item_id] = event.delta`
201	`else:`
202	`acc_items[event.item_id] = text + event.delta`
203
204	`# Clear and update the entire content because RichLog otherwise treats each delta as a new line`
205	`bottom_pane = self.query_one("#bottom-pane", RichLog)`
206	`bottom_pane.clear()`
207	`bottom_pane.write(acc_items[event.item_id])`
208	`continue`
209
210	`async def _get_connection(self) -> AsyncRealtimeConnection:`
211	`await self.connected.wait()`
212	`assert self.connection is not None`
213	`return self.connection`
214
215	`async def send_mic_audio(self) -> None:`
216	`import sounddevice as sd # type: ignore`
217
218	`sent_audio = False`
219
220	`device_info = sd.query_devices()`
221	`print(device_info)`
222
223	`read_size = int(SAMPLE_RATE * 0.02)`
224
225	`stream = sd.InputStream(`
226	`channels=CHANNELS,`
227	`samplerate=SAMPLE_RATE,`
228	`dtype="int16",`
229	`)`
230	`stream.start()`
231
232	`status_indicator = self.query_one(AudioStatusIndicator)`
233
234	`try:`
235	`while True:`
236	`if stream.read_available < read_size:`
237	`await asyncio.sleep(0)`
238	`continue`
239
240	`await self.should_send_audio.wait()`
241	`status_indicator.is_recording = True`
242
243	`data, _ = stream.read(read_size)`
244
245	`connection = await self._get_connection()`
246	`if not sent_audio:`
247	`asyncio.create_task(connection.send({"type": "response.cancel"}))`
248	`sent_audio = True`
249
250	`await connection.input_audio_buffer.append(audio=base64.b64encode(cast(Any, data)).decode("utf-8"))`
251
252	`await asyncio.sleep(0)`
253	`except KeyboardInterrupt:`
254	`pass`
255	`finally:`
256	`stream.stop()`
257	`stream.close()`
258
259	`async def on_key(self, event: events.Key) -> None:`
260	`"""Handle key press events."""`
261	`if event.key == "enter":`
262	`self.query_one(Button).press()`
263	`return`
264
265	`if event.key == "q":`
266	`self.exit()`
267	`return`
268
269	`if event.key == "k":`
270	`status_indicator = self.query_one(AudioStatusIndicator)`
271	`if status_indicator.is_recording:`
272	`self.should_send_audio.clear()`
273	`status_indicator.is_recording = False`
274
275	`if self.session and self.session.turn_detection is None:`
276	`# The default in the API is that the model will automatically detect when the user has`
277	`# stopped talking and then start responding itself.`
278	`#`
279	# However if we're in manual `turn_detection` mode then we need to
280	`# manually tell the model to commit the audio buffer and start responding.`
281	`conn = await self._get_connection()`
282	`await conn.input_audio_buffer.commit()`
283	`await conn.response.create()`
284	`else:`
285	`self.should_send_audio.set()`
286	`status_indicator.is_recording = True`
287
288
289	`if __name__ == "__main__":`
290	`app = RealtimeApp()`
291	`app.run()`

openai/openai-python

Branches

Tags

Clone