microsoft/TypeAgent

Public

mirrored fromhttps://github.com/microsoft/TypeAgentAvailable

CodeCommitsIssuesPull requestsActionsInsightsSecurity
17c6532b70d881167dfd56361fc65f2fc047f037

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

python/whisperService/faster-whisper.py

68lines · modecode

1# Copyright (c) Microsoft Corporation.
2# Licensed under the MIT License.
3
4from io import BytesIO
5from pydub import AudioSegment
6import uvicorn
7import numpy as np
8import torch
9from fastapi import FastAPI, UploadFile, File
10from fastapi.middleware.cors import CORSMiddleware
11from fastapi.responses import JSONResponse
12import logging
13from faster_whisper import WhisperModel
14
15import os
16os.environ['KMP_DUPLICATE_LIB_OK']='True'
17
18# Configure logging
19logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s')
20
21# Initialize the app
22app = FastAPI()
23
24# allow all cors
25app.add_middleware(
26 CORSMiddleware,
27 allow_origins=["*"],
28 allow_credentials=True,
29 allow_methods=["*"],
30 allow_headers=["*"],
31)
32
33# Load the Whisper model
34print("Loading model...")
35device = "cuda" if torch.cuda.is_available() else "cpu"
36compute_type = "float16" if torch.cuda.is_available() else "default"
37model = WhisperModel("medium.en", device=device, compute_type=compute_type)
38print("Model loaded!")
39
40
41@app.post("/transcribe/")
42async def transcription(file: UploadFile = File(...)):
43 try:
44 audio_bytes = await file.read()
45
46 # Use pydub to handle different audio formats and convert audio
47 audio = AudioSegment.from_file(BytesIO(audio_bytes))
48 audio = audio.set_channels(1).set_frame_rate(16000).set_sample_width(2)
49
50 # Convert data from 16 bit wide integers to floating point with a width of 32 bits.
51 # Clamp the audio stream frequency to a PCM wavelength compatible default of 32768hz max.
52 audio_np = (
53 np.frombuffer(audio.raw_data, dtype=np.int16).astype(np.float32) / 32768.0
54 )
55
56 # faster_whisper returns a generator. Iterate though it to get the transcription
57 segments, _ = model.transcribe(audio_np)
58 segments = list(segments)
59 transcription = "\n".join(i.text for i in segments)
60
61 return JSONResponse(content={"transcription": transcription}, status_code=200)
62 except Exception as e:
63 logging.error("An error occurred during transcription", exc_info=True)
64 return JSONResponse(content={"error": "An internal error has occurred!"}, status_code=500)
65
66
67if __name__ == "__main__":
68 uvicorn.run(app, host="0.0.0.0", port=8001)
69