microsoft/TypeAgent

Public

mirrored fromhttps://github.com/microsoft/TypeAgentAvailable

Watch0 Fork0 Star0

Code Commits Issues Pull requests Actions Insights Security

17c6532b70d881167dfd56361fc65f2fc047f037

Find a branch or tag

Branches

17c6532b70d881167dfd56361fc65f2fc047f037

Clone

HTTPS

Download ZIP

TypeAgent/python/whisperService

python/whisperService/faster-whisper.py

68lines · modecode

Raw Download

Latest commit unavailable.

unknown

1	`# Copyright (c) Microsoft Corporation.`
2	`# Licensed under the MIT License.`
3
4	`from io import BytesIO`
5	`from pydub import AudioSegment`
6	`import uvicorn`
7	`import numpy as np`
8	`import torch`
9	`from fastapi import FastAPI, UploadFile, File`
10	`from fastapi.middleware.cors import CORSMiddleware`
11	`from fastapi.responses import JSONResponse`
12	`import logging`
13	`from faster_whisper import WhisperModel`
14
15	`import os`
16	`os.environ['KMP_DUPLICATE_LIB_OK']='True'`
17
18	`# Configure logging`
19	`logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s')`
20
21	`# Initialize the app`
22	`app = FastAPI()`
23
24	`# allow all cors`
25	`app.add_middleware(`
26	`CORSMiddleware,`
27	`allow_origins=["*"],`
28	`allow_credentials=True,`
29	`allow_methods=["*"],`
30	`allow_headers=["*"],`
31	`)`
32
33	`# Load the Whisper model`
34	`print("Loading model...")`
35	`device = "cuda" if torch.cuda.is_available() else "cpu"`
36	`compute_type = "float16" if torch.cuda.is_available() else "default"`
37	`model = WhisperModel("medium.en", device=device, compute_type=compute_type)`
38	`print("Model loaded!")`
39
40
41	`@app.post("/transcribe/")`
42	`async def transcription(file: UploadFile = File(...)):`
43	`try:`
44	`audio_bytes = await file.read()`
45
46	`# Use pydub to handle different audio formats and convert audio`
47	`audio = AudioSegment.from_file(BytesIO(audio_bytes))`
48	`audio = audio.set_channels(1).set_frame_rate(16000).set_sample_width(2)`
49
50	`# Convert data from 16 bit wide integers to floating point with a width of 32 bits.`
51	`# Clamp the audio stream frequency to a PCM wavelength compatible default of 32768hz max.`
52	`audio_np = (`
53	`np.frombuffer(audio.raw_data, dtype=np.int16).astype(np.float32) / 32768.0`
54	`)`
55
56	`# faster_whisper returns a generator. Iterate though it to get the transcription`
57	`segments, _ = model.transcribe(audio_np)`
58	`segments = list(segments)`
59	`transcription = "\n".join(i.text for i in segments)`
60
61	`return JSONResponse(content={"transcription": transcription}, status_code=200)`
62	`except Exception as e:`
63	`logging.error("An error occurred during transcription", exc_info=True)`
64	`return JSONResponse(content={"error": "An internal error has occurred!"}, status_code=500)`
65
66
67	`if __name__ == "__main__":`
68	`uvicorn.run(app, host="0.0.0.0", port=8001)`
69

microsoft/TypeAgent

Branches

Tags

Clone