microsoft/onnxruntime-extensions

Public

mirrored fromhttps://github.com/microsoft/onnxruntime-extensionsAvailable

CodeCommitsIssuesPull requestsActionsInsightsSecurity
176c1d013864044bcc0747b908bdd32048669401

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

cmake/ext_cuda.cmake

40lines · modecode

1# Copyright (c) Microsoft Corporation. All rights reserved.
2# Licensed under the MIT License.
3
4find_package(CUDAToolkit)
5enable_language(CUDA)
6
7set(CMAKE_CUDA_RUNTIME_LIBRARY Shared)
8set(CMAKE_CUDA_STANDARD 17)
9cmake_dependent_option(OCOS_USE_FLASH_ATTENTION "Build flash attention kernel for scaled dot product attention" ON "NOT WIN32" OFF)
10option(OCOS_USE_MEMORY_EFFICIENT_ATTENTION "Build memory efficient attention kernel for scaled dot product attention" ON)
11if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.6)
12 message(STATUS "Turn off flash attention and memory efficient attention since CUDA compiler version < 11.6")
13 set(OCOS_USE_FLASH_ATTENTION OFF)
14 set(OCOS_USE_MEMORY_EFFICIENT_ATTENTION OFF)
15endif()
16
17set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
18if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11)
19 set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --Werror default-stream-launch")
20endif()
21
22if(NOT WIN32)
23 list(APPEND CUDA_NVCC_FLAGS --compiler-options -fPIC)
24endif()
25
26# Options passed to cudafe
27set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcudafe \"--diag_suppress=bad_friend_decl\"")
28set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcudafe \"--diag_suppress=unsigned_compare_with_zero\"")
29set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcudafe \"--diag_suppress=expr_has_no_effect\"")
30
31add_compile_definitions(USE_CUDA)
32
33if (OCOS_USE_FLASH_ATTENTION)
34 message(STATUS "Enable flash attention")
35 add_compile_definitions(OCOS_USE_FLASH_ATTENTION)
36endif()
37if (OCOS_USE_MEMORY_EFFICIENT_ATTENTION)
38 message(STATUS "Enable memory efficient attention")
39 add_compile_definitions(OCOS_USE_MEMORY_EFFICIENT_ATTENTION)
40endif()