microsoft/onnxruntime-extensions

Public

mirrored from https://github.com/microsoft/onnxruntime-extensionsAvailable

Watch0 Fork0 Star0

Code Commits Issues Pull requests Actions Insights Security

copilot/document-operator-contracts

Find a branch or tag

Branches

copilot/document-operator-contracts

Clone

HTTPS

Download ZIP

onnxruntime-extensions/include

include/ortx_extractor.h

131lines · modecode

Raw Download

Latest commit unavailable.

unknown

1	`// C ABI header file for the onnxruntime-extensions tokenization module`
2
3	`#pragma once`
4
5	`#include "ortx_utils.h"`
6
7	`typedef OrtxObject OrtxFeatureExtractor;`
8	`typedef OrtxObject OrtxRawAudios;`
9
10	`#ifdef __cplusplus`
11	`extern "C" {`
12	`#endif`
13
14	`/**`
15	`* @brief Creates a feature extractor object.`
16	`*`
17	`* This function creates a feature extractor object based on the provided feature definition.`
18	`*`
19	`* @param[out] extractor Pointer to a pointer to the created feature extractor object.`
20	`* @param[in] fe_def The feature definition used to create the feature extractor.`
21	`*`
22	`* @return An error code indicating the result of the operation.`
23	`*/`
24	`extError_t ORTX_API_CALL OrtxCreateSpeechFeatureExtractor(OrtxFeatureExtractor** extractor, const char* fe_def);`
25
26	`/**`
27	`* Loads a collection of audio files into memory.`
28	`*`
29	* This function loads a collection of audio files specified by the `audio_paths` array
30	* into memory and returns a pointer to the loaded audio data in the `audios` parameter.
31	`*`
32	`* @param audios A pointer to a pointer that will be updated with the loaded audio data.`
33	`* The caller is responsible for freeing the memory allocated for the audio data.`
34	`* @param audio_paths An array of strings representing the paths to the audio files to be loaded.`
35	`* @param num_audios The number of audio files to be loaded.`
36	`*`
37	* @return An `extError_t` value indicating the success or failure of the operation.
38	`*/`
39	`extError_t ORTX_API_CALL OrtxLoadAudios(OrtxRawAudios** audios, const char* const* audio_paths, size_t num_audios);`
40
41	`/**`
42	`* @brief Creates an array of raw audio objects, which refers to the audio data and sizes provided.`
43	`*`
44	`* This function creates an array of raw audio objects based on the provided data and sizes. The data will be stored in`
45	* the `audios` parameter.
46	`*`
47	`* @param audios Pointer to the variable that will hold the created raw audio objects.`
48	`* @param data Array of pointers to the audio data.`
49	`* @param sizes Array of pointers to the sizes of the audio data.`
50	`* @param num_audios Number of audio objects to create.`
51	`*`
52	`* @return extError_t Error code indicating the success or failure of the operation.`
53	`*/`
54	`extError_t ORTX_API_CALL OrtxCreateRawAudios(OrtxRawAudios** audios, const void* data[], const int64_t sizes[],`
55	`size_t num_audios);`
56
57	`/**`
58	`* @brief Calculates the log mel spectrogram for a given audio using the specified feature extractor.`
59	`*`
60	`* This function takes an instance of the OrtxFeatureExtractor struct, an instance of the OrtxRawAudios struct,`
61	`* and a pointer to an OrtxTensorResult pointer. It calculates the log mel spectrogram for the given audio using`
62	`* the specified feature extractor and stores the result in the provided log_mel pointer.`
63	`*`
64	`* @param extractor The feature extractor to use for calculating the log mel spectrogram.`
65	`* @param audio The raw audio data to process.`
66	`* @param log_mel A pointer to an OrtxTensorResult pointer where the result will be stored.`
67	`* @return An extError_t value indicating the success or failure of the operation.`
68	`*/`
69	`extError_t ORTX_API_CALL OrtxSpeechLogMel(OrtxFeatureExtractor* extractor, OrtxRawAudios* audio,`
70	`OrtxTensorResult** log_mel);`
71
72	`/**`
73	`* @brief Splits an input audio signal and outputs the areas of high vs low energy based on the STFT analysis.`
74	`*`
75	`* This function takes an input waveform tensor and associated parameters such as sample rate,`
76	`* frame length, hop length, and energy threshold (in dB), and identifies contiguous segments`
77	`* of speech or sound activity. It writes the resulting segment start and end indices into`
78	`* the provided output tensor.`
79	`*`
80	`* @param input The input waveform tensor (1D or 2D) containing audio samples.`
81	`* @param sr_tensor A tensor containing the sample rate of the input audio (in Hz).`
82	`* @param frame_ms_tensor A tensor containing the frame size in milliseconds.`
83	`* @param hop_ms_tensor A tensor containing the hop length in milliseconds.`
84	`* @param energy_threshold_db_tensor A tensor specifying the energy threshold in decibels (dB)`
85	`* used to decide which frames are considered active.`
86	`* @param output0 A pointer to an output tensor where the resulting segments will be written.`
87	`* Each row contains two integers: [start_sample, end_sample] for a detected segment.`
88	`* @return An extError_t value indicating the success or failure of the operation.`
89	`*/`
90	`extError_t ORTX_API_CALL OrtxSplitSignalSegments(const OrtxTensor* input, const OrtxTensor* sr_tensor,`
91	`const OrtxTensor* frame_ms_tensor, const OrtxTensor* hop_ms_tensor,`
92	`const OrtxTensor* energy_threshold_db_tensor, OrtxTensor* output0);`
93
94	`/**`
95	`* @brief Merges adjacent signal segments that are separated by short gaps.`
96	`*`
97	`* This function takes a tensor of detected segments (each row containing [start, end] indices)`
98	`* and merges any consecutive segments whose gap is smaller than the specified threshold (in milliseconds).`
99	`*`
100	`* @param segments_tensor The input tensor of detected segments, of shape [N, 2].`
101	`* @param merge_gap_ms_tensor A tensor containing a single integer value representing`
102	`* the maximum allowed gap (in milliseconds) between consecutive segments to be merged.`
103	`* @param output0 A pointer to an output tensor where the merged segments will be stored.`
104	`* Each row contains two integers: [merged_start_sample, merged_end_sample].`
105	`* @return An extError_t value indicating the success or failure of the operation.`
106	`*/`
107	`extError_t ORTX_API_CALL OrtxMergeSignalSegments(const OrtxTensor* segments_tensor,`
108	`const OrtxTensor* merge_gap_ms_tensor, OrtxTensor* output0);`
109
110	`/**`
111	`* @brief Extracts log-mel features from raw audio data using a feature extractor.`
112	`*`
113	`* This function processes the input audio buffers through the provided feature extractor,`
114	`* producing log-mel spectrogram outputs suitable for inference or further signal analysis.`
115	`*`
116	`* @param extractor A pointer to an OrtxFeatureExtractor object that defines the feature`
117	`* extraction pipeline and processing parameters.`
118	`* @param audio A pointer to an OrtxRawAudios structure containing raw audio data buffers`
119	`* and associated metadata (e.g., sampling rate, channels).`
120	`* @param result A pointer to an OrtxTensorResult pointer that will be allocated and set to`
121	`* hold the resulting log-mel spectrogram data and other outputs based on json configuration.`
122	`*`
123	`* @return An extError_t value indicating success or error status. Returns`
124	`* EXT_SUCCESS on success, or an appropriate error code if extraction fails.`
125	`*/`
126	`extError_t ORTX_API_CALL OrtxFeatureExtraction(OrtxFeatureExtractor* extractor, OrtxRawAudios* audio,`
127	`OrtxTensorResult** result);`
128
129	`#ifdef __cplusplus`
130	`}`
131	`#endif`

microsoft/onnxruntime-extensions

Branches

Tags

Clone