openai/openai-python

Public

mirrored from https://github.com/openai/openai-pythonAvailable

CodeCommitsIssuesPull requestsActionsInsightsSecurity
v2.38.0

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

examples/realtime/audio_util.py

142lines · modeblame

488ec04bRobert Craigie1 years ago1from __future__ import annotations
2
3import io
4import base64
5import asyncio
6import threading
7from typing import Callable, Awaitable
8
9import numpy as np
10import pyaudio
11import sounddevice as sd
12from pydub import AudioSegment
13
3d3d16abstainless-app[bot]9 months ago14from openai.resources.realtime.realtime import AsyncRealtimeConnection
488ec04bRobert Craigie1 years ago15
16CHUNK_LENGTH_S = 0.05 # 100ms
17SAMPLE_RATE = 24000
18FORMAT = pyaudio.paInt16
19CHANNELS = 1
20
21# pyright: reportUnknownMemberType=false, reportUnknownVariableType=false, reportUnknownArgumentType=false
22
23
24def audio_to_pcm16_base64(audio_bytes: bytes) -> bytes:
25# load the audio file from the byte stream
26audio = AudioSegment.from_file(io.BytesIO(audio_bytes))
27print(f"Loaded audio: {audio.frame_rate=} {audio.channels=} {audio.sample_width=} {audio.frame_width=}")
28# resample to 24kHz mono pcm16
29pcm_audio = audio.set_frame_rate(SAMPLE_RATE).set_channels(CHANNELS).set_sample_width(2).raw_data
30return pcm_audio
31
32
33class AudioPlayerAsync:
34def __init__(self):
35self.queue = []
36self.lock = threading.Lock()
37self.stream = sd.OutputStream(
38callback=self.callback,
39samplerate=SAMPLE_RATE,
40channels=CHANNELS,
41dtype=np.int16,
42blocksize=int(CHUNK_LENGTH_S * SAMPLE_RATE),
43)
44self.playing = False
45self._frame_count = 0
46
47def callback(self, outdata, frames, time, status): # noqa
48with self.lock:
49data = np.empty(0, dtype=np.int16)
50
51# get next item from queue if there is still space in the buffer
52while len(data) < frames and len(self.queue) > 0:
53item = self.queue.pop(0)
54frames_needed = frames - len(data)
55data = np.concatenate((data, item[:frames_needed]))
56if len(item) > frames_needed:
57self.queue.insert(0, item[frames_needed:])
58
59self._frame_count += len(data)
60
61# fill the rest of the frames with zeros if there is no more data
62if len(data) < frames:
63data = np.concatenate((data, np.zeros(frames - len(data), dtype=np.int16)))
64
65outdata[:] = data.reshape(-1, 1)
66
67def reset_frame_count(self):
68self._frame_count = 0
69
70def get_frame_count(self):
71return self._frame_count
72
73def add_data(self, data: bytes):
74with self.lock:
75# bytes is pcm16 single channel audio data, convert to numpy array
76np_data = np.frombuffer(data, dtype=np.int16)
77self.queue.append(np_data)
78if not self.playing:
79self.start()
80
81def start(self):
82self.playing = True
83self.stream.start()
84
85def stop(self):
86self.playing = False
87self.stream.stop()
88with self.lock:
89self.queue = []
90
91def terminate(self):
92self.stream.close()
93
94
95async def send_audio_worker_sounddevice(
96connection: AsyncRealtimeConnection,
97should_send: Callable[[], bool] | None = None,
98start_send: Callable[[], Awaitable[None]] | None = None,
99):
100sent_audio = False
101
102device_info = sd.query_devices()
103print(device_info)
104
105read_size = int(SAMPLE_RATE * 0.02)
106
107stream = sd.InputStream(
108channels=CHANNELS,
109samplerate=SAMPLE_RATE,
110dtype="int16",
111)
112stream.start()
113
114try:
115while True:
116if stream.read_available < read_size:
117await asyncio.sleep(0)
118continue
119
120data, _ = stream.read(read_size)
121
122if should_send() if should_send else True:
123if not sent_audio and start_send:
124await start_send()
125await connection.send(
126{"type": "input_audio_buffer.append", "audio": base64.b64encode(data).decode("utf-8")}
127)
128sent_audio = True
129
130elif sent_audio:
131print("Done, triggering inference")
132await connection.send({"type": "input_audio_buffer.commit"})
133await connection.send({"type": "response.create", "response": {}})
134sent_audio = False
135
136await asyncio.sleep(0)
137
138except KeyboardInterrupt:
139pass
140finally:
141stream.stop()
142stream.close()