microsoft/onnxruntime-extensions
Publicmirrored fromhttps://github.com/microsoft/onnxruntime-extensionsAvailable
cmake/ext_cuda.cmake
40lines · modecode
| 1 | # Copyright (c) Microsoft Corporation. All rights reserved. |
| 2 | # Licensed under the MIT License. |
| 3 | |
| 4 | find_package(CUDAToolkit) |
| 5 | enable_language(CUDA) |
| 6 | |
| 7 | set(CMAKE_CUDA_RUNTIME_LIBRARY Shared) |
| 8 | set(CMAKE_CUDA_STANDARD 17) |
| 9 | cmake_dependent_option(OCOS_USE_FLASH_ATTENTION "Build flash attention kernel for scaled dot product attention" ON "NOT WIN32" OFF) |
| 10 | option(OCOS_USE_MEMORY_EFFICIENT_ATTENTION "Build memory efficient attention kernel for scaled dot product attention" ON) |
| 11 | if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.6) |
| 12 | message(STATUS "Turn off flash attention and memory efficient attention since CUDA compiler version < 11.6") |
| 13 | set(OCOS_USE_FLASH_ATTENTION OFF) |
| 14 | set(OCOS_USE_MEMORY_EFFICIENT_ATTENTION OFF) |
| 15 | endif() |
| 16 | |
| 17 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr") |
| 18 | if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11) |
| 19 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --Werror default-stream-launch") |
| 20 | endif() |
| 21 | |
| 22 | if(NOT WIN32) |
| 23 | list(APPEND CUDA_NVCC_FLAGS --compiler-options -fPIC) |
| 24 | endif() |
| 25 | |
| 26 | # Options passed to cudafe |
| 27 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcudafe \"--diag_suppress=bad_friend_decl\"") |
| 28 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcudafe \"--diag_suppress=unsigned_compare_with_zero\"") |
| 29 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcudafe \"--diag_suppress=expr_has_no_effect\"") |
| 30 | |
| 31 | add_compile_definitions(USE_CUDA) |
| 32 | |
| 33 | if (OCOS_USE_FLASH_ATTENTION) |
| 34 | message(STATUS "Enable flash attention") |
| 35 | add_compile_definitions(OCOS_USE_FLASH_ATTENTION) |
| 36 | endif() |
| 37 | if (OCOS_USE_MEMORY_EFFICIENT_ATTENTION) |
| 38 | message(STATUS "Enable memory efficient attention") |
| 39 | add_compile_definitions(OCOS_USE_MEMORY_EFFICIENT_ATTENTION) |
| 40 | endif() |
| 41 | |