openai/openai-python

Public

mirrored fromhttps://github.com/openai/openai-pythonAvailable

Watch0 Fork0 Star0

Code Commits Issues Pull requests Actions Insights Security

v1.61.0

Find a branch or tag

Branches

v1.61.0

Clone

HTTPS

Download ZIP

openai-python/examples/realtime

examples/realtime/audio_util.py

142lines · modecode

Raw Download

docs: add examples + guidance on Realtime API support 488ec04

1 year ago

1	`from __future__ import annotations`
2
3	`import io`
4	`import base64`
5	`import asyncio`
6	`import threading`
7	`from typing import Callable, Awaitable`
8
9	`import numpy as np`
10	`import pyaudio`
11	`import sounddevice as sd`
12	`from pydub import AudioSegment`
13
14	`from openai.resources.beta.realtime.realtime import AsyncRealtimeConnection`
15
16	`CHUNK_LENGTH_S = 0.05 # 100ms`
17	`SAMPLE_RATE = 24000`
18	`FORMAT = pyaudio.paInt16`
19	`CHANNELS = 1`
20
21	`# pyright: reportUnknownMemberType=false, reportUnknownVariableType=false, reportUnknownArgumentType=false`
22
23
24	`def audio_to_pcm16_base64(audio_bytes: bytes) -> bytes:`
25	`# load the audio file from the byte stream`
26	`audio = AudioSegment.from_file(io.BytesIO(audio_bytes))`
27	`print(f"Loaded audio: {audio.frame_rate=} {audio.channels=} {audio.sample_width=} {audio.frame_width=}")`
28	`# resample to 24kHz mono pcm16`
29	`pcm_audio = audio.set_frame_rate(SAMPLE_RATE).set_channels(CHANNELS).set_sample_width(2).raw_data`
30	`return pcm_audio`
31
32
33	`class AudioPlayerAsync:`
34	`def __init__(self):`
35	`self.queue = []`
36	`self.lock = threading.Lock()`
37	`self.stream = sd.OutputStream(`
38	`callback=self.callback,`
39	`samplerate=SAMPLE_RATE,`
40	`channels=CHANNELS,`
41	`dtype=np.int16,`
42	`blocksize=int(CHUNK_LENGTH_S * SAMPLE_RATE),`
43	`)`
44	`self.playing = False`
45	`self._frame_count = 0`
46
47	`def callback(self, outdata, frames, time, status): # noqa`
48	`with self.lock:`
49	`data = np.empty(0, dtype=np.int16)`
50
51	`# get next item from queue if there is still space in the buffer`
52	`while len(data) < frames and len(self.queue) > 0:`
53	`item = self.queue.pop(0)`
54	`frames_needed = frames - len(data)`
55	`data = np.concatenate((data, item[:frames_needed]))`
56	`if len(item) > frames_needed:`
57	`self.queue.insert(0, item[frames_needed:])`
58
59	`self._frame_count += len(data)`
60
61	`# fill the rest of the frames with zeros if there is no more data`
62	`if len(data) < frames:`
63	`data = np.concatenate((data, np.zeros(frames - len(data), dtype=np.int16)))`
64
65	`outdata[:] = data.reshape(-1, 1)`
66
67	`def reset_frame_count(self):`
68	`self._frame_count = 0`
69
70	`def get_frame_count(self):`
71	`return self._frame_count`
72
73	`def add_data(self, data: bytes):`
74	`with self.lock:`
75	`# bytes is pcm16 single channel audio data, convert to numpy array`
76	`np_data = np.frombuffer(data, dtype=np.int16)`
77	`self.queue.append(np_data)`
78	`if not self.playing:`
79	`self.start()`
80
81	`def start(self):`
82	`self.playing = True`
83	`self.stream.start()`
84
85	`def stop(self):`
86	`self.playing = False`
87	`self.stream.stop()`
88	`with self.lock:`
89	`self.queue = []`
90
91	`def terminate(self):`
92	`self.stream.close()`
93
94
95	`async def send_audio_worker_sounddevice(`
96	`connection: AsyncRealtimeConnection,`
97	`should_send: Callable[[], bool] \| None = None,`
98	`start_send: Callable[[], Awaitable[None]] \| None = None,`
99	`):`
100	`sent_audio = False`
101
102	`device_info = sd.query_devices()`
103	`print(device_info)`
104
105	`read_size = int(SAMPLE_RATE * 0.02)`
106
107	`stream = sd.InputStream(`
108	`channels=CHANNELS,`
109	`samplerate=SAMPLE_RATE,`
110	`dtype="int16",`
111	`)`
112	`stream.start()`
113
114	`try:`
115	`while True:`
116	`if stream.read_available < read_size:`
117	`await asyncio.sleep(0)`
118	`continue`
119
120	`data, _ = stream.read(read_size)`
121
122	`if should_send() if should_send else True:`
123	`if not sent_audio and start_send:`
124	`await start_send()`
125	`await connection.send(`
126	`{"type": "input_audio_buffer.append", "audio": base64.b64encode(data).decode("utf-8")}`
127	`)`
128	`sent_audio = True`
129
130	`elif sent_audio:`
131	`print("Done, triggering inference")`
132	`await connection.send({"type": "input_audio_buffer.commit"})`
133	`await connection.send({"type": "response.create", "response": {}})`
134	`sent_audio = False`
135
136	`await asyncio.sleep(0)`
137
138	`except KeyboardInterrupt:`
139	`pass`
140	`finally:`
141	`stream.stop()`
142	`stream.close()`

openai/openai-python

Branches

Tags

Clone