microsoft/onnxruntime-extensions
Publicmirrored fromhttps://github.com/microsoft/onnxruntime-extensionsAvailable
operators/vision/encode_image.cc
82lines · modecode
| 1 | // Copyright (c) Microsoft Corporation. All rights reserved. |
| 2 | // Licensed under the MIT License. |
| 3 | |
| 4 | #include "encode_image.hpp" |
| 5 | |
| 6 | // #include <opencv2/imgcodecs.hpp> |
| 7 | #include "png.h" |
| 8 | #include "jpeglib.h" |
| 9 | |
| 10 | #include "vision/impl/png_encoder_decoder.hpp" |
| 11 | |
| 12 | namespace ort_extensions { |
| 13 | |
| 14 | namespace { |
| 15 | void EncodeJpg(uint8_t*& buffer, unsigned long& num_bytes) { |
| 16 | struct jpeg_compress_struct cinfo; |
| 17 | struct jpeg_error_mgr jerr; |
| 18 | JSAMPROW row_pointer[1]; /* pointer to JSAMPLE row[s] */ |
| 19 | int row_stride; /* physical row width in image buffer */ |
| 20 | |
| 21 | cinfo.err = jpeg_std_error(&jerr); |
| 22 | jpeg_create_compress(&cinfo); |
| 23 | |
| 24 | jpeg_mem_dest(&cinfo, &buffer, &num_bytes); |
| 25 | |
| 26 | cinfo.image_width = 600; /* image width and height, in pixels */ |
| 27 | cinfo.image_height = 600; |
| 28 | cinfo.input_components = 3; /* # of color components per pixel */ |
| 29 | cinfo.in_color_space = JCS_RGB; /* colorspace of input image */ |
| 30 | jpeg_set_defaults(&cinfo); |
| 31 | jpeg_set_quality(&cinfo, /*quality*/ 90, TRUE /* limit to baseline-JPEG values */); |
| 32 | |
| 33 | jpeg_start_compress(&cinfo, TRUE); |
| 34 | |
| 35 | row_stride = cinfo.image_width * 3; /* JSAMPLEs per row in image_buffer */ |
| 36 | JSAMPLE* image_buffer = nullptr; // allocated externally |
| 37 | |
| 38 | while (cinfo.next_scanline < cinfo.image_height) { |
| 39 | /* jpeg_write_scanlines expects an array of pointers to scanlines. |
| 40 | * Here the array is only one element long, but you could pass |
| 41 | * more than one scanline at a time if that's more convenient. |
| 42 | */ |
| 43 | row_pointer[0] = &image_buffer[cinfo.next_scanline * row_stride]; |
| 44 | (void)jpeg_write_scanlines(&cinfo, row_pointer, 1); |
| 45 | } |
| 46 | |
| 47 | jpeg_finish_compress(&cinfo); |
| 48 | jpeg_destroy_compress(&cinfo); |
| 49 | } |
| 50 | } // namespace |
| 51 | |
| 52 | void KernelEncodeImage ::Compute(OrtKernelContext* context) { |
| 53 | // Setup inputs |
| 54 | // TODO: RGB is probably better if we're not using opencv |
| 55 | const OrtValue* input_bgr = ort_.KernelContext_GetInput(context, 0ULL); |
| 56 | const OrtTensorDimensions dimensions_bgr(ort_, input_bgr); |
| 57 | |
| 58 | if (dimensions_bgr.size() != 3 || dimensions_bgr[2] != 3) { |
| 59 | // expect {H, W, C} as that's the inverse of what decode_image produces. |
| 60 | // we have no way to check if it's BGR or RGB though |
| 61 | ORT_CXX_API_THROW("[EncodeImage] requires rank 3 BGR input in channels last format.", ORT_INVALID_ARGUMENT); |
| 62 | } |
| 63 | |
| 64 | if (extension_ == ".png") { |
| 65 | PngEncoder encoder(ort_.GetTensorData<uint8_t>(input_bgr), dimensions_bgr); |
| 66 | const auto& encoded_image = encoder.Encode(); |
| 67 | |
| 68 | std::vector<int64_t> output_dimensions{static_cast<int64_t>(encoded_image.size())}; |
| 69 | OrtValue* output_value = ort_.KernelContext_GetOutput(context, 0, |
| 70 | output_dimensions.data(), |
| 71 | output_dimensions.size()); |
| 72 | |
| 73 | uint8_t* data = ort_.GetTensorMutableData<uint8_t>(output_value); |
| 74 | memcpy(data, encoded_image.data(), encoded_image.size()); |
| 75 | |
| 76 | } else { |
| 77 | uint8_t* missing_buffer = nullptr; |
| 78 | unsigned long num_encoded_bytes = 0; |
| 79 | EncodeJpg(missing_buffer, num_encoded_bytes); |
| 80 | } |
| 81 | } |
| 82 | } // namespace ort_extensions |
| 83 | |