microsoft/onnxruntime-extensions

Public

mirrored fromhttps://github.com/microsoft/onnxruntime-extensionsAvailable

Watch0 Fork0 Star0

Code Commits Issues Pull requests Actions Insights Security

wechi/ort_test

Find a branch or tag

Branches

wechi/ort_test

Clone

HTTPS

Download ZIP

onnxruntime-extensions/cmake

cmake/ext_cuda.cmake

40lines · modecode

Raw Download

Latest commit unavailable.

unknown

1	`# Copyright (c) Microsoft Corporation. All rights reserved.`
2	`# Licensed under the MIT License.`
3
4	`find_package(CUDAToolkit)`
5	`enable_language(CUDA)`
6
7	`set(CMAKE_CUDA_RUNTIME_LIBRARY Shared)`
8	`set(CMAKE_CUDA_STANDARD 17)`
9	`include(CMakeDependentOption)`
10	`cmake_dependent_option(USE_FLASH_ATTENTION "Build flash attention kernel for scaled dot product attention" ON "NOT WIN32" OFF)`
11	`option(USE_MEMORY_EFFICIENT_ATTENTION "Build memory efficient attention kernel for scaled dot product attention" ON)`
12	`if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.6)`
13	`message( STATUS "Turn off flash attention and memory efficient attention since CUDA compiler version < 11.6")`
14	`set(USE_FLASH_ATTENTION OFF)`
15	`set(USE_MEMORY_EFFICIENT_ATTENTION OFF)`
16	`endif()`
17
18	`set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")`
19	`if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11)`
20	`set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --Werror default-stream-launch")`
21	`endif()`
22
23	`if(NOT WIN32)`
24	`list(APPEND CUDA_NVCC_FLAGS --compiler-options -fPIC)`
25	`endif()`
26
27	`# Options passed to cudafe`
28	`set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcudafe \"--diag_suppress=bad_friend_decl\"")`
29	`set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcudafe \"--diag_suppress=unsigned_compare_with_zero\"")`
30	`set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcudafe \"--diag_suppress=expr_has_no_effect\"")`
31
32	`add_compile_definitions(USE_CUDA)`
33	`if (USE_FLASH_ATTENTION)`
34	`message( STATUS "Enable flash attention")`
35	`add_compile_definitions(USE_FLASH_ATTENTION)`
36	`endif()`
37	`if (USE_MEMORY_EFFICIENT_ATTENTION)`
38	`message( STATUS "Enable memory efficient attention")`
39	`add_compile_definitions(USE_MEMORY_EFFICIENT_ATTENTION)`
40	`endif()`
41

microsoft/onnxruntime-extensions

Branches

Tags

Clone