microsoft/qdk
Publicmirrored fromhttps://github.com/microsoft/qdkAvailable
allocator/mimalloc-sys/mimalloc/include/mimalloc/internal.h
979lines · modecode
| 1 | /* ---------------------------------------------------------------------------- |
| 2 | Copyright (c) 2018-2023, Microsoft Research, Daan Leijen |
| 3 | This is free software; you can redistribute it and/or modify it under the |
| 4 | terms of the MIT license. A copy of the license can be found in the file |
| 5 | "LICENSE" at the root of this distribution. |
| 6 | -----------------------------------------------------------------------------*/ |
| 7 | #pragma once |
| 8 | #ifndef MIMALLOC_INTERNAL_H |
| 9 | #define MIMALLOC_INTERNAL_H |
| 10 | |
| 11 | |
| 12 | // -------------------------------------------------------------------------- |
| 13 | // This file contains the interal API's of mimalloc and various utility |
| 14 | // functions and macros. |
| 15 | // -------------------------------------------------------------------------- |
| 16 | |
| 17 | #include "mimalloc/types.h" |
| 18 | #include "mimalloc/track.h" |
| 19 | |
| 20 | #if (MI_DEBUG>0) |
| 21 | #define mi_trace_message(...) _mi_trace_message(__VA_ARGS__) |
| 22 | #else |
| 23 | #define mi_trace_message(...) |
| 24 | #endif |
| 25 | |
| 26 | #define MI_CACHE_LINE 64 |
| 27 | #if defined(_MSC_VER) |
| 28 | #pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths) |
| 29 | #pragma warning(disable:26812) // unscoped enum warning |
| 30 | #define mi_decl_noinline __declspec(noinline) |
| 31 | #define mi_decl_thread __declspec(thread) |
| 32 | #define mi_decl_cache_align __declspec(align(MI_CACHE_LINE)) |
| 33 | #elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc |
| 34 | #define mi_decl_noinline __attribute__((noinline)) |
| 35 | #define mi_decl_thread __thread |
| 36 | #define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE))) |
| 37 | #else |
| 38 | #define mi_decl_noinline |
| 39 | #define mi_decl_thread __thread // hope for the best :-) |
| 40 | #define mi_decl_cache_align |
| 41 | #endif |
| 42 | |
| 43 | #if defined(__EMSCRIPTEN__) && !defined(__wasi__) |
| 44 | #define __wasi__ |
| 45 | #endif |
| 46 | |
| 47 | #if defined(__cplusplus) |
| 48 | #define mi_decl_externc extern "C" |
| 49 | #else |
| 50 | #define mi_decl_externc |
| 51 | #endif |
| 52 | |
| 53 | // pthreads |
| 54 | #if !defined(_WIN32) && !defined(__wasi__) |
| 55 | #define MI_USE_PTHREADS |
| 56 | #include <pthread.h> |
| 57 | #endif |
| 58 | |
| 59 | // "options.c" |
| 60 | void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message); |
| 61 | void _mi_fprintf(mi_output_fun* out, void* arg, const char* fmt, ...); |
| 62 | void _mi_warning_message(const char* fmt, ...); |
| 63 | void _mi_verbose_message(const char* fmt, ...); |
| 64 | void _mi_trace_message(const char* fmt, ...); |
| 65 | void _mi_options_init(void); |
| 66 | void _mi_error_message(int err, const char* fmt, ...); |
| 67 | |
| 68 | // random.c |
| 69 | void _mi_random_init(mi_random_ctx_t* ctx); |
| 70 | void _mi_random_init_weak(mi_random_ctx_t* ctx); |
| 71 | void _mi_random_reinit_if_weak(mi_random_ctx_t * ctx); |
| 72 | void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx); |
| 73 | uintptr_t _mi_random_next(mi_random_ctx_t* ctx); |
| 74 | uintptr_t _mi_heap_random_next(mi_heap_t* heap); |
| 75 | uintptr_t _mi_os_random_weak(uintptr_t extra_seed); |
| 76 | static inline uintptr_t _mi_random_shuffle(uintptr_t x); |
| 77 | |
| 78 | // init.c |
| 79 | extern mi_decl_cache_align mi_stats_t _mi_stats_main; |
| 80 | extern mi_decl_cache_align const mi_page_t _mi_page_empty; |
| 81 | bool _mi_is_main_thread(void); |
| 82 | size_t _mi_current_thread_count(void); |
| 83 | bool _mi_preloading(void); // true while the C runtime is not initialized yet |
| 84 | mi_threadid_t _mi_thread_id(void) mi_attr_noexcept; |
| 85 | mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap |
| 86 | void _mi_thread_done(mi_heap_t* heap); |
| 87 | void _mi_thread_data_collect(void); |
| 88 | |
| 89 | // os.c |
| 90 | void _mi_os_init(void); // called from process init |
| 91 | void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats); |
| 92 | void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats); |
| 93 | void _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* stats); |
| 94 | |
| 95 | size_t _mi_os_page_size(void); |
| 96 | size_t _mi_os_good_alloc_size(size_t size); |
| 97 | bool _mi_os_has_overcommit(void); |
| 98 | bool _mi_os_has_virtual_reserve(void); |
| 99 | |
| 100 | bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats); |
| 101 | bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats); |
| 102 | bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); |
| 103 | bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); |
| 104 | bool _mi_os_protect(void* addr, size_t size); |
| 105 | bool _mi_os_unprotect(void* addr, size_t size); |
| 106 | bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats); |
| 107 | bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats); |
| 108 | |
| 109 | void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats); |
| 110 | void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats); |
| 111 | |
| 112 | void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size); |
| 113 | bool _mi_os_use_large_page(size_t size, size_t alignment); |
| 114 | size_t _mi_os_large_page_size(void); |
| 115 | |
| 116 | void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid); |
| 117 | |
| 118 | // arena.c |
| 119 | mi_arena_id_t _mi_arena_id_none(void); |
| 120 | void _mi_arena_free(void* p, size_t size, size_t still_committed_size, mi_memid_t memid, mi_stats_t* stats); |
| 121 | void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); |
| 122 | void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); |
| 123 | bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id); |
| 124 | bool _mi_arena_contains(const void* p); |
| 125 | void _mi_arena_collect(bool force_purge, mi_stats_t* stats); |
| 126 | void _mi_arena_unsafe_destroy_all(mi_stats_t* stats); |
| 127 | |
| 128 | // "segment-map.c" |
| 129 | void _mi_segment_map_allocated_at(const mi_segment_t* segment); |
| 130 | void _mi_segment_map_freed_at(const mi_segment_t* segment); |
| 131 | |
| 132 | // "segment.c" |
| 133 | mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); |
| 134 | void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld); |
| 135 | void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); |
| 136 | bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); |
| 137 | void _mi_segment_thread_collect(mi_segments_tld_t* tld); |
| 138 | |
| 139 | #if MI_HUGE_PAGE_ABANDON |
| 140 | void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); |
| 141 | #else |
| 142 | void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); |
| 143 | #endif |
| 144 | |
| 145 | uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page |
| 146 | void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld); |
| 147 | void _mi_abandoned_await_readers(void); |
| 148 | void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld); |
| 149 | |
| 150 | // "page.c" |
| 151 | void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc; |
| 152 | |
| 153 | void _mi_page_retire(mi_page_t* page) mi_attr_noexcept; // free the page if there are no other pages with many free blocks |
| 154 | void _mi_page_unfull(mi_page_t* page); |
| 155 | void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // free the page |
| 156 | void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread... |
| 157 | void _mi_heap_delayed_free_all(mi_heap_t* heap); |
| 158 | bool _mi_heap_delayed_free_partial(mi_heap_t* heap); |
| 159 | void _mi_heap_collect_retired(mi_heap_t* heap, bool force); |
| 160 | |
| 161 | void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never); |
| 162 | bool _mi_page_try_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never); |
| 163 | size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append); |
| 164 | void _mi_deferred_free(mi_heap_t* heap, bool force); |
| 165 | |
| 166 | void _mi_page_free_collect(mi_page_t* page,bool force); |
| 167 | void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback from segments |
| 168 | |
| 169 | size_t _mi_bin_size(uint8_t bin); // for stats |
| 170 | uint8_t _mi_bin(size_t size); // for stats |
| 171 | |
| 172 | // "heap.c" |
| 173 | void _mi_heap_destroy_pages(mi_heap_t* heap); |
| 174 | void _mi_heap_collect_abandon(mi_heap_t* heap); |
| 175 | void _mi_heap_set_default_direct(mi_heap_t* heap); |
| 176 | bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid); |
| 177 | void _mi_heap_unsafe_destroy_all(void); |
| 178 | |
| 179 | // "stats.c" |
| 180 | void _mi_stats_done(mi_stats_t* stats); |
| 181 | mi_msecs_t _mi_clock_now(void); |
| 182 | mi_msecs_t _mi_clock_end(mi_msecs_t start); |
| 183 | mi_msecs_t _mi_clock_start(void); |
| 184 | |
| 185 | // "alloc.c" |
| 186 | void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic` |
| 187 | void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept; |
| 188 | void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned` |
| 189 | void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept; |
| 190 | mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p); |
| 191 | bool _mi_free_delayed_block(mi_block_t* block); |
| 192 | void _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration |
| 193 | void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size); |
| 194 | |
| 195 | // option.c, c primitives |
| 196 | char _mi_toupper(char c); |
| 197 | int _mi_strnicmp(const char* s, const char* t, size_t n); |
| 198 | void _mi_strlcpy(char* dest, const char* src, size_t dest_size); |
| 199 | void _mi_strlcat(char* dest, const char* src, size_t dest_size); |
| 200 | size_t _mi_strlen(const char* s); |
| 201 | size_t _mi_strnlen(const char* s, size_t max_len); |
| 202 | |
| 203 | |
| 204 | #if MI_DEBUG>1 |
| 205 | bool _mi_page_is_valid(mi_page_t* page); |
| 206 | #endif |
| 207 | |
| 208 | |
| 209 | // ------------------------------------------------------ |
| 210 | // Branches |
| 211 | // ------------------------------------------------------ |
| 212 | |
| 213 | #if defined(__GNUC__) || defined(__clang__) |
| 214 | #define mi_unlikely(x) (__builtin_expect(!!(x),false)) |
| 215 | #define mi_likely(x) (__builtin_expect(!!(x),true)) |
| 216 | #elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) |
| 217 | #define mi_unlikely(x) (x) [[unlikely]] |
| 218 | #define mi_likely(x) (x) [[likely]] |
| 219 | #else |
| 220 | #define mi_unlikely(x) (x) |
| 221 | #define mi_likely(x) (x) |
| 222 | #endif |
| 223 | |
| 224 | #ifndef __has_builtin |
| 225 | #define __has_builtin(x) 0 |
| 226 | #endif |
| 227 | |
| 228 | |
| 229 | /* ----------------------------------------------------------- |
| 230 | Error codes passed to `_mi_fatal_error` |
| 231 | All are recoverable but EFAULT is a serious error and aborts by default in secure mode. |
| 232 | For portability define undefined error codes using common Unix codes: |
| 233 | <https://www-numi.fnal.gov/offline_software/srt_public_context/WebDocs/Errors/unix_system_errors.html> |
| 234 | ----------------------------------------------------------- */ |
| 235 | #include <errno.h> |
| 236 | #ifndef EAGAIN // double free |
| 237 | #define EAGAIN (11) |
| 238 | #endif |
| 239 | #ifndef ENOMEM // out of memory |
| 240 | #define ENOMEM (12) |
| 241 | #endif |
| 242 | #ifndef EFAULT // corrupted free-list or meta-data |
| 243 | #define EFAULT (14) |
| 244 | #endif |
| 245 | #ifndef EINVAL // trying to free an invalid pointer |
| 246 | #define EINVAL (22) |
| 247 | #endif |
| 248 | #ifndef EOVERFLOW // count*size overflow |
| 249 | #define EOVERFLOW (75) |
| 250 | #endif |
| 251 | |
| 252 | |
| 253 | /* ----------------------------------------------------------- |
| 254 | Inlined definitions |
| 255 | ----------------------------------------------------------- */ |
| 256 | #define MI_UNUSED(x) (void)(x) |
| 257 | #if (MI_DEBUG>0) |
| 258 | #define MI_UNUSED_RELEASE(x) |
| 259 | #else |
| 260 | #define MI_UNUSED_RELEASE(x) MI_UNUSED(x) |
| 261 | #endif |
| 262 | |
| 263 | #define MI_INIT4(x) x(),x(),x(),x() |
| 264 | #define MI_INIT8(x) MI_INIT4(x),MI_INIT4(x) |
| 265 | #define MI_INIT16(x) MI_INIT8(x),MI_INIT8(x) |
| 266 | #define MI_INIT32(x) MI_INIT16(x),MI_INIT16(x) |
| 267 | #define MI_INIT64(x) MI_INIT32(x),MI_INIT32(x) |
| 268 | #define MI_INIT128(x) MI_INIT64(x),MI_INIT64(x) |
| 269 | #define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x) |
| 270 | |
| 271 | |
| 272 | #include <string.h> |
| 273 | // initialize a local variable to zero; use memset as compilers optimize constant sized memset's |
| 274 | #define _mi_memzero_var(x) memset(&x,0,sizeof(x)) |
| 275 | |
| 276 | // Is `x` a power of two? (0 is considered a power of two) |
| 277 | static inline bool _mi_is_power_of_two(uintptr_t x) { |
| 278 | return ((x & (x - 1)) == 0); |
| 279 | } |
| 280 | |
| 281 | // Is a pointer aligned? |
| 282 | static inline bool _mi_is_aligned(void* p, size_t alignment) { |
| 283 | mi_assert_internal(alignment != 0); |
| 284 | return (((uintptr_t)p % alignment) == 0); |
| 285 | } |
| 286 | |
| 287 | // Align upwards |
| 288 | static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { |
| 289 | mi_assert_internal(alignment != 0); |
| 290 | uintptr_t mask = alignment - 1; |
| 291 | if ((alignment & mask) == 0) { // power of two? |
| 292 | return ((sz + mask) & ~mask); |
| 293 | } |
| 294 | else { |
| 295 | return (((sz + mask)/alignment)*alignment); |
| 296 | } |
| 297 | } |
| 298 | |
| 299 | // Align downwards |
| 300 | static inline uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) { |
| 301 | mi_assert_internal(alignment != 0); |
| 302 | uintptr_t mask = alignment - 1; |
| 303 | if ((alignment & mask) == 0) { // power of two? |
| 304 | return (sz & ~mask); |
| 305 | } |
| 306 | else { |
| 307 | return ((sz / alignment) * alignment); |
| 308 | } |
| 309 | } |
| 310 | |
| 311 | // Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`. |
| 312 | static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) { |
| 313 | mi_assert_internal(divider != 0); |
| 314 | return (divider == 0 ? size : ((size + divider - 1) / divider)); |
| 315 | } |
| 316 | |
| 317 | // Is memory zero initialized? |
| 318 | static inline bool mi_mem_is_zero(const void* p, size_t size) { |
| 319 | for (size_t i = 0; i < size; i++) { |
| 320 | if (((uint8_t*)p)[i] != 0) return false; |
| 321 | } |
| 322 | return true; |
| 323 | } |
| 324 | |
| 325 | |
| 326 | // Align a byte size to a size in _machine words_, |
| 327 | // i.e. byte size == `wsize*sizeof(void*)`. |
| 328 | static inline size_t _mi_wsize_from_size(size_t size) { |
| 329 | mi_assert_internal(size <= SIZE_MAX - sizeof(uintptr_t)); |
| 330 | return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t); |
| 331 | } |
| 332 | |
| 333 | // Overflow detecting multiply |
| 334 | #if __has_builtin(__builtin_umul_overflow) || (defined(__GNUC__) && (__GNUC__ >= 5)) |
| 335 | #include <limits.h> // UINT_MAX, ULONG_MAX |
| 336 | #if defined(_CLOCK_T) // for Illumos |
| 337 | #undef _CLOCK_T |
| 338 | #endif |
| 339 | static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { |
| 340 | #if (SIZE_MAX == ULONG_MAX) |
| 341 | return __builtin_umull_overflow(count, size, (unsigned long *)total); |
| 342 | #elif (SIZE_MAX == UINT_MAX) |
| 343 | return __builtin_umul_overflow(count, size, (unsigned int *)total); |
| 344 | #else |
| 345 | return __builtin_umulll_overflow(count, size, (unsigned long long *)total); |
| 346 | #endif |
| 347 | } |
| 348 | #else /* __builtin_umul_overflow is unavailable */ |
| 349 | static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { |
| 350 | #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) |
| 351 | *total = count * size; |
| 352 | // note: gcc/clang optimize this to directly check the overflow flag |
| 353 | return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) && size > 0 && (SIZE_MAX / size) < count); |
| 354 | } |
| 355 | #endif |
| 356 | |
| 357 | // Safe multiply `count*size` into `total`; return `true` on overflow. |
| 358 | static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* total) { |
| 359 | if (count==1) { // quick check for the case where count is one (common for C++ allocators) |
| 360 | *total = size; |
| 361 | return false; |
| 362 | } |
| 363 | else if mi_unlikely(mi_mul_overflow(count, size, total)) { |
| 364 | #if MI_DEBUG > 0 |
| 365 | _mi_error_message(EOVERFLOW, "allocation request is too large (%zu * %zu bytes)\n", count, size); |
| 366 | #endif |
| 367 | *total = SIZE_MAX; |
| 368 | return true; |
| 369 | } |
| 370 | else return false; |
| 371 | } |
| 372 | |
| 373 | |
| 374 | /*---------------------------------------------------------------------------------------- |
| 375 | Heap functions |
| 376 | ------------------------------------------------------------------------------------------- */ |
| 377 | |
| 378 | extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value of the thread local default heap |
| 379 | |
| 380 | static inline bool mi_heap_is_backing(const mi_heap_t* heap) { |
| 381 | return (heap->tld->heap_backing == heap); |
| 382 | } |
| 383 | |
| 384 | static inline bool mi_heap_is_initialized(mi_heap_t* heap) { |
| 385 | mi_assert_internal(heap != NULL); |
| 386 | return (heap != &_mi_heap_empty); |
| 387 | } |
| 388 | |
| 389 | static inline uintptr_t _mi_ptr_cookie(const void* p) { |
| 390 | extern mi_heap_t _mi_heap_main; |
| 391 | mi_assert_internal(_mi_heap_main.cookie != 0); |
| 392 | return ((uintptr_t)p ^ _mi_heap_main.cookie); |
| 393 | } |
| 394 | |
| 395 | /* ----------------------------------------------------------- |
| 396 | Pages |
| 397 | ----------------------------------------------------------- */ |
| 398 | |
| 399 | static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t size) { |
| 400 | mi_assert_internal(size <= (MI_SMALL_SIZE_MAX + MI_PADDING_SIZE)); |
| 401 | const size_t idx = _mi_wsize_from_size(size); |
| 402 | mi_assert_internal(idx < MI_PAGES_DIRECT); |
| 403 | return heap->pages_free_direct[idx]; |
| 404 | } |
| 405 | |
| 406 | // Segment that contains the pointer |
| 407 | // Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE), |
| 408 | // and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it; |
| 409 | // therefore we align one byte before `p`. |
| 410 | static inline mi_segment_t* _mi_ptr_segment(const void* p) { |
| 411 | mi_assert_internal(p != NULL); |
| 412 | return (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK); |
| 413 | } |
| 414 | |
| 415 | static inline mi_page_t* mi_slice_to_page(mi_slice_t* s) { |
| 416 | mi_assert_internal(s->slice_offset== 0 && s->slice_count > 0); |
| 417 | return (mi_page_t*)(s); |
| 418 | } |
| 419 | |
| 420 | static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) { |
| 421 | mi_assert_internal(p->slice_offset== 0 && p->slice_count > 0); |
| 422 | return (mi_slice_t*)(p); |
| 423 | } |
| 424 | |
| 425 | // Segment belonging to a page |
| 426 | static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) { |
| 427 | mi_segment_t* segment = _mi_ptr_segment(page); |
| 428 | mi_assert_internal(segment == NULL || ((mi_slice_t*)page >= segment->slices && (mi_slice_t*)page < segment->slices + segment->slice_entries)); |
| 429 | return segment; |
| 430 | } |
| 431 | |
| 432 | static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) { |
| 433 | mi_slice_t* start = (mi_slice_t*)((uint8_t*)slice - slice->slice_offset); |
| 434 | mi_assert_internal(start >= _mi_ptr_segment(slice)->slices); |
| 435 | mi_assert_internal(start->slice_offset == 0); |
| 436 | mi_assert_internal(start + start->slice_count > slice); |
| 437 | return start; |
| 438 | } |
| 439 | |
| 440 | // Get the page containing the pointer (performance critical as it is called in mi_free) |
| 441 | static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { |
| 442 | mi_assert_internal(p > (void*)segment); |
| 443 | ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; |
| 444 | mi_assert_internal(diff > 0 && diff <= (ptrdiff_t)MI_SEGMENT_SIZE); |
| 445 | size_t idx = (size_t)diff >> MI_SEGMENT_SLICE_SHIFT; |
| 446 | mi_assert_internal(idx <= segment->slice_entries); |
| 447 | mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx]; |
| 448 | mi_slice_t* slice = mi_slice_first(slice0); // adjust to the block that holds the page data |
| 449 | mi_assert_internal(slice->slice_offset == 0); |
| 450 | mi_assert_internal(slice >= segment->slices && slice < segment->slices + segment->slice_entries); |
| 451 | return mi_slice_to_page(slice); |
| 452 | } |
| 453 | |
| 454 | // Quick page start for initialized pages |
| 455 | static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { |
| 456 | return _mi_segment_page_start(segment, page, page_size); |
| 457 | } |
| 458 | |
| 459 | // Get the page containing the pointer |
| 460 | static inline mi_page_t* _mi_ptr_page(void* p) { |
| 461 | return _mi_segment_page_of(_mi_ptr_segment(p), p); |
| 462 | } |
| 463 | |
| 464 | // Get the block size of a page (special case for huge objects) |
| 465 | static inline size_t mi_page_block_size(const mi_page_t* page) { |
| 466 | const size_t bsize = page->xblock_size; |
| 467 | mi_assert_internal(bsize > 0); |
| 468 | if mi_likely(bsize < MI_HUGE_BLOCK_SIZE) { |
| 469 | return bsize; |
| 470 | } |
| 471 | else { |
| 472 | size_t psize; |
| 473 | _mi_segment_page_start(_mi_page_segment(page), page, &psize); |
| 474 | return psize; |
| 475 | } |
| 476 | } |
| 477 | |
| 478 | static inline bool mi_page_is_huge(const mi_page_t* page) { |
| 479 | return (_mi_page_segment(page)->kind == MI_SEGMENT_HUGE); |
| 480 | } |
| 481 | |
| 482 | // Get the usable block size of a page without fixed padding. |
| 483 | // This may still include internal padding due to alignment and rounding up size classes. |
| 484 | static inline size_t mi_page_usable_block_size(const mi_page_t* page) { |
| 485 | return mi_page_block_size(page) - MI_PADDING_SIZE; |
| 486 | } |
| 487 | |
| 488 | // size of a segment |
| 489 | static inline size_t mi_segment_size(mi_segment_t* segment) { |
| 490 | return segment->segment_slices * MI_SEGMENT_SLICE_SIZE; |
| 491 | } |
| 492 | |
| 493 | static inline uint8_t* mi_segment_end(mi_segment_t* segment) { |
| 494 | return (uint8_t*)segment + mi_segment_size(segment); |
| 495 | } |
| 496 | |
| 497 | // Thread free access |
| 498 | static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) { |
| 499 | return (mi_block_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & ~3); |
| 500 | } |
| 501 | |
| 502 | static inline mi_delayed_t mi_page_thread_free_flag(const mi_page_t* page) { |
| 503 | return (mi_delayed_t)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & 3); |
| 504 | } |
| 505 | |
| 506 | // Heap access |
| 507 | static inline mi_heap_t* mi_page_heap(const mi_page_t* page) { |
| 508 | return (mi_heap_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xheap)); |
| 509 | } |
| 510 | |
| 511 | static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) { |
| 512 | mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING); |
| 513 | mi_atomic_store_release(&page->xheap,(uintptr_t)heap); |
| 514 | } |
| 515 | |
| 516 | // Thread free flag helpers |
| 517 | static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) { |
| 518 | return (mi_block_t*)(tf & ~0x03); |
| 519 | } |
| 520 | static inline mi_delayed_t mi_tf_delayed(mi_thread_free_t tf) { |
| 521 | return (mi_delayed_t)(tf & 0x03); |
| 522 | } |
| 523 | static inline mi_thread_free_t mi_tf_make(mi_block_t* block, mi_delayed_t delayed) { |
| 524 | return (mi_thread_free_t)((uintptr_t)block | (uintptr_t)delayed); |
| 525 | } |
| 526 | static inline mi_thread_free_t mi_tf_set_delayed(mi_thread_free_t tf, mi_delayed_t delayed) { |
| 527 | return mi_tf_make(mi_tf_block(tf),delayed); |
| 528 | } |
| 529 | static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t* block) { |
| 530 | return mi_tf_make(block, mi_tf_delayed(tf)); |
| 531 | } |
| 532 | |
| 533 | // are all blocks in a page freed? |
| 534 | // note: needs up-to-date used count, (as the `xthread_free` list may not be empty). see `_mi_page_collect_free`. |
| 535 | static inline bool mi_page_all_free(const mi_page_t* page) { |
| 536 | mi_assert_internal(page != NULL); |
| 537 | return (page->used == 0); |
| 538 | } |
| 539 | |
| 540 | // are there any available blocks? |
| 541 | static inline bool mi_page_has_any_available(const mi_page_t* page) { |
| 542 | mi_assert_internal(page != NULL && page->reserved > 0); |
| 543 | return (page->used < page->reserved || (mi_page_thread_free(page) != NULL)); |
| 544 | } |
| 545 | |
| 546 | // are there immediately available blocks, i.e. blocks available on the free list. |
| 547 | static inline bool mi_page_immediate_available(const mi_page_t* page) { |
| 548 | mi_assert_internal(page != NULL); |
| 549 | return (page->free != NULL); |
| 550 | } |
| 551 | |
| 552 | // is more than 7/8th of a page in use? |
| 553 | static inline bool mi_page_mostly_used(const mi_page_t* page) { |
| 554 | if (page==NULL) return true; |
| 555 | uint16_t frac = page->reserved / 8U; |
| 556 | return (page->reserved - page->used <= frac); |
| 557 | } |
| 558 | |
| 559 | static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) { |
| 560 | return &((mi_heap_t*)heap)->pages[_mi_bin(size)]; |
| 561 | } |
| 562 | |
| 563 | |
| 564 | |
| 565 | //----------------------------------------------------------- |
| 566 | // Page flags |
| 567 | //----------------------------------------------------------- |
| 568 | static inline bool mi_page_is_in_full(const mi_page_t* page) { |
| 569 | return page->flags.x.in_full; |
| 570 | } |
| 571 | |
| 572 | static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) { |
| 573 | page->flags.x.in_full = in_full; |
| 574 | } |
| 575 | |
| 576 | static inline bool mi_page_has_aligned(const mi_page_t* page) { |
| 577 | return page->flags.x.has_aligned; |
| 578 | } |
| 579 | |
| 580 | static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) { |
| 581 | page->flags.x.has_aligned = has_aligned; |
| 582 | } |
| 583 | |
| 584 | |
| 585 | /* ------------------------------------------------------------------- |
| 586 | Encoding/Decoding the free list next pointers |
| 587 | |
| 588 | This is to protect against buffer overflow exploits where the |
| 589 | free list is mutated. Many hardened allocators xor the next pointer `p` |
| 590 | with a secret key `k1`, as `p^k1`. This prevents overwriting with known |
| 591 | values but might be still too weak: if the attacker can guess |
| 592 | the pointer `p` this can reveal `k1` (since `p^k1^p == k1`). |
| 593 | Moreover, if multiple blocks can be read as well, the attacker can |
| 594 | xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot |
| 595 | about the pointers (and subsequently `k1`). |
| 596 | |
| 597 | Instead mimalloc uses an extra key `k2` and encodes as `((p^k2)<<<k1)+k1`. |
| 598 | Since these operations are not associative, the above approaches do not |
| 599 | work so well any more even if the `p` can be guesstimated. For example, |
| 600 | for the read case we can subtract two entries to discard the `+k1` term, |
| 601 | but that leads to `((p1^k2)<<<k1) - ((p2^k2)<<<k1)` at best. |
| 602 | We include the left-rotation since xor and addition are otherwise linear |
| 603 | in the lowest bit. Finally, both keys are unique per page which reduces |
| 604 | the re-use of keys by a large factor. |
| 605 | |
| 606 | We also pass a separate `null` value to be used as `NULL` or otherwise |
| 607 | `(k2<<<k1)+k1` would appear (too) often as a sentinel value. |
| 608 | ------------------------------------------------------------------- */ |
| 609 | |
| 610 | static inline bool mi_is_in_same_segment(const void* p, const void* q) { |
| 611 | return (_mi_ptr_segment(p) == _mi_ptr_segment(q)); |
| 612 | } |
| 613 | |
| 614 | static inline bool mi_is_in_same_page(const void* p, const void* q) { |
| 615 | mi_segment_t* segment = _mi_ptr_segment(p); |
| 616 | if (_mi_ptr_segment(q) != segment) return false; |
| 617 | // assume q may be invalid // return (_mi_segment_page_of(segment, p) == _mi_segment_page_of(segment, q)); |
| 618 | mi_page_t* page = _mi_segment_page_of(segment, p); |
| 619 | size_t psize; |
| 620 | uint8_t* start = _mi_segment_page_start(segment, page, &psize); |
| 621 | return (start <= (uint8_t*)q && (uint8_t*)q < start + psize); |
| 622 | } |
| 623 | |
| 624 | static inline uintptr_t mi_rotl(uintptr_t x, uintptr_t shift) { |
| 625 | shift %= MI_INTPTR_BITS; |
| 626 | return (shift==0 ? x : ((x << shift) | (x >> (MI_INTPTR_BITS - shift)))); |
| 627 | } |
| 628 | static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) { |
| 629 | shift %= MI_INTPTR_BITS; |
| 630 | return (shift==0 ? x : ((x >> shift) | (x << (MI_INTPTR_BITS - shift)))); |
| 631 | } |
| 632 | |
| 633 | static inline void* mi_ptr_decode(const void* null, const mi_encoded_t x, const uintptr_t* keys) { |
| 634 | void* p = (void*)(mi_rotr(x - keys[0], keys[0]) ^ keys[1]); |
| 635 | return (p==null ? NULL : p); |
| 636 | } |
| 637 | |
| 638 | static inline mi_encoded_t mi_ptr_encode(const void* null, const void* p, const uintptr_t* keys) { |
| 639 | uintptr_t x = (uintptr_t)(p==NULL ? null : p); |
| 640 | return mi_rotl(x ^ keys[1], keys[0]) + keys[0]; |
| 641 | } |
| 642 | |
| 643 | static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, const uintptr_t* keys ) { |
| 644 | mi_track_mem_defined(block,sizeof(mi_block_t)); |
| 645 | mi_block_t* next; |
| 646 | #ifdef MI_ENCODE_FREELIST |
| 647 | next = (mi_block_t*)mi_ptr_decode(null, block->next, keys); |
| 648 | #else |
| 649 | MI_UNUSED(keys); MI_UNUSED(null); |
| 650 | next = (mi_block_t*)block->next; |
| 651 | #endif |
| 652 | mi_track_mem_noaccess(block,sizeof(mi_block_t)); |
| 653 | return next; |
| 654 | } |
| 655 | |
| 656 | static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) { |
| 657 | mi_track_mem_undefined(block,sizeof(mi_block_t)); |
| 658 | #ifdef MI_ENCODE_FREELIST |
| 659 | block->next = mi_ptr_encode(null, next, keys); |
| 660 | #else |
| 661 | MI_UNUSED(keys); MI_UNUSED(null); |
| 662 | block->next = (mi_encoded_t)next; |
| 663 | #endif |
| 664 | mi_track_mem_noaccess(block,sizeof(mi_block_t)); |
| 665 | } |
| 666 | |
| 667 | static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) { |
| 668 | #ifdef MI_ENCODE_FREELIST |
| 669 | mi_block_t* next = mi_block_nextx(page,block,page->keys); |
| 670 | // check for free list corruption: is `next` at least in the same page? |
| 671 | // TODO: check if `next` is `page->block_size` aligned? |
| 672 | if mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next)) { |
| 673 | _mi_error_message(EFAULT, "corrupted free list entry of size %zub at %p: value 0x%zx\n", mi_page_block_size(page), block, (uintptr_t)next); |
| 674 | next = NULL; |
| 675 | } |
| 676 | return next; |
| 677 | #else |
| 678 | MI_UNUSED(page); |
| 679 | return mi_block_nextx(page,block,NULL); |
| 680 | #endif |
| 681 | } |
| 682 | |
| 683 | static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) { |
| 684 | #ifdef MI_ENCODE_FREELIST |
| 685 | mi_block_set_nextx(page,block,next, page->keys); |
| 686 | #else |
| 687 | MI_UNUSED(page); |
| 688 | mi_block_set_nextx(page,block,next,NULL); |
| 689 | #endif |
| 690 | } |
| 691 | |
| 692 | |
| 693 | // ------------------------------------------------------------------- |
| 694 | // commit mask |
| 695 | // ------------------------------------------------------------------- |
| 696 | |
| 697 | static inline void mi_commit_mask_create_empty(mi_commit_mask_t* cm) { |
| 698 | for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { |
| 699 | cm->mask[i] = 0; |
| 700 | } |
| 701 | } |
| 702 | |
| 703 | static inline void mi_commit_mask_create_full(mi_commit_mask_t* cm) { |
| 704 | for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { |
| 705 | cm->mask[i] = ~((size_t)0); |
| 706 | } |
| 707 | } |
| 708 | |
| 709 | static inline bool mi_commit_mask_is_empty(const mi_commit_mask_t* cm) { |
| 710 | for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { |
| 711 | if (cm->mask[i] != 0) return false; |
| 712 | } |
| 713 | return true; |
| 714 | } |
| 715 | |
| 716 | static inline bool mi_commit_mask_is_full(const mi_commit_mask_t* cm) { |
| 717 | for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { |
| 718 | if (cm->mask[i] != ~((size_t)0)) return false; |
| 719 | } |
| 720 | return true; |
| 721 | } |
| 722 | |
| 723 | // defined in `segment.c`: |
| 724 | size_t _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total); |
| 725 | size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx); |
| 726 | |
| 727 | #define mi_commit_mask_foreach(cm,idx,count) \ |
| 728 | idx = 0; \ |
| 729 | while ((count = _mi_commit_mask_next_run(cm,&idx)) > 0) { |
| 730 | |
| 731 | #define mi_commit_mask_foreach_end() \ |
| 732 | idx += count; \ |
| 733 | } |
| 734 | |
| 735 | |
| 736 | |
| 737 | /* ----------------------------------------------------------- |
| 738 | memory id's |
| 739 | ----------------------------------------------------------- */ |
| 740 | |
| 741 | static inline mi_memid_t _mi_memid_create(mi_memkind_t memkind) { |
| 742 | mi_memid_t memid; |
| 743 | _mi_memzero_var(memid); |
| 744 | memid.memkind = memkind; |
| 745 | return memid; |
| 746 | } |
| 747 | |
| 748 | static inline mi_memid_t _mi_memid_none(void) { |
| 749 | return _mi_memid_create(MI_MEM_NONE); |
| 750 | } |
| 751 | |
| 752 | static inline mi_memid_t _mi_memid_create_os(bool committed, bool is_zero, bool is_large) { |
| 753 | mi_memid_t memid = _mi_memid_create(MI_MEM_OS); |
| 754 | memid.initially_committed = committed; |
| 755 | memid.initially_zero = is_zero; |
| 756 | memid.is_pinned = is_large; |
| 757 | return memid; |
| 758 | } |
| 759 | |
| 760 | |
| 761 | // ------------------------------------------------------------------- |
| 762 | // Fast "random" shuffle |
| 763 | // ------------------------------------------------------------------- |
| 764 | |
| 765 | static inline uintptr_t _mi_random_shuffle(uintptr_t x) { |
| 766 | if (x==0) { x = 17; } // ensure we don't get stuck in generating zeros |
| 767 | #if (MI_INTPTR_SIZE==8) |
| 768 | // by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c> |
| 769 | x ^= x >> 30; |
| 770 | x *= 0xbf58476d1ce4e5b9UL; |
| 771 | x ^= x >> 27; |
| 772 | x *= 0x94d049bb133111ebUL; |
| 773 | x ^= x >> 31; |
| 774 | #elif (MI_INTPTR_SIZE==4) |
| 775 | // by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/> |
| 776 | x ^= x >> 16; |
| 777 | x *= 0x7feb352dUL; |
| 778 | x ^= x >> 15; |
| 779 | x *= 0x846ca68bUL; |
| 780 | x ^= x >> 16; |
| 781 | #endif |
| 782 | return x; |
| 783 | } |
| 784 | |
| 785 | // ------------------------------------------------------------------- |
| 786 | // Optimize numa node access for the common case (= one node) |
| 787 | // ------------------------------------------------------------------- |
| 788 | |
| 789 | int _mi_os_numa_node_get(mi_os_tld_t* tld); |
| 790 | size_t _mi_os_numa_node_count_get(void); |
| 791 | |
| 792 | extern _Atomic(size_t) _mi_numa_node_count; |
| 793 | static inline int _mi_os_numa_node(mi_os_tld_t* tld) { |
| 794 | if mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1) { return 0; } |
| 795 | else return _mi_os_numa_node_get(tld); |
| 796 | } |
| 797 | static inline size_t _mi_os_numa_node_count(void) { |
| 798 | const size_t count = mi_atomic_load_relaxed(&_mi_numa_node_count); |
| 799 | if mi_likely(count > 0) { return count; } |
| 800 | else return _mi_os_numa_node_count_get(); |
| 801 | } |
| 802 | |
| 803 | |
| 804 | |
| 805 | // ----------------------------------------------------------------------- |
| 806 | // Count bits: trailing or leading zeros (with MI_INTPTR_BITS on all zero) |
| 807 | // ----------------------------------------------------------------------- |
| 808 | |
| 809 | #if defined(__GNUC__) |
| 810 | |
| 811 | #include <limits.h> // LONG_MAX |
| 812 | #define MI_HAVE_FAST_BITSCAN |
| 813 | static inline size_t mi_clz(uintptr_t x) { |
| 814 | if (x==0) return MI_INTPTR_BITS; |
| 815 | #if (INTPTR_MAX == LONG_MAX) |
| 816 | return __builtin_clzl(x); |
| 817 | #else |
| 818 | return __builtin_clzll(x); |
| 819 | #endif |
| 820 | } |
| 821 | static inline size_t mi_ctz(uintptr_t x) { |
| 822 | if (x==0) return MI_INTPTR_BITS; |
| 823 | #if (INTPTR_MAX == LONG_MAX) |
| 824 | return __builtin_ctzl(x); |
| 825 | #else |
| 826 | return __builtin_ctzll(x); |
| 827 | #endif |
| 828 | } |
| 829 | |
| 830 | #elif defined(_MSC_VER) |
| 831 | |
| 832 | #include <limits.h> // LONG_MAX |
| 833 | #include <intrin.h> // BitScanReverse64 |
| 834 | #define MI_HAVE_FAST_BITSCAN |
| 835 | static inline size_t mi_clz(uintptr_t x) { |
| 836 | if (x==0) return MI_INTPTR_BITS; |
| 837 | unsigned long idx; |
| 838 | #if (INTPTR_MAX == LONG_MAX) |
| 839 | _BitScanReverse(&idx, x); |
| 840 | #else |
| 841 | _BitScanReverse64(&idx, x); |
| 842 | #endif |
| 843 | return ((MI_INTPTR_BITS - 1) - idx); |
| 844 | } |
| 845 | static inline size_t mi_ctz(uintptr_t x) { |
| 846 | if (x==0) return MI_INTPTR_BITS; |
| 847 | unsigned long idx; |
| 848 | #if (INTPTR_MAX == LONG_MAX) |
| 849 | _BitScanForward(&idx, x); |
| 850 | #else |
| 851 | _BitScanForward64(&idx, x); |
| 852 | #endif |
| 853 | return idx; |
| 854 | } |
| 855 | |
| 856 | #else |
| 857 | static inline size_t mi_ctz32(uint32_t x) { |
| 858 | // de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf> |
| 859 | static const unsigned char debruijn[32] = { |
| 860 | 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, |
| 861 | 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 |
| 862 | }; |
| 863 | if (x==0) return 32; |
| 864 | return debruijn[((x & -(int32_t)x) * 0x077CB531UL) >> 27]; |
| 865 | } |
| 866 | static inline size_t mi_clz32(uint32_t x) { |
| 867 | // de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf> |
| 868 | static const uint8_t debruijn[32] = { |
| 869 | 31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1, |
| 870 | 23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0 |
| 871 | }; |
| 872 | if (x==0) return 32; |
| 873 | x |= x >> 1; |
| 874 | x |= x >> 2; |
| 875 | x |= x >> 4; |
| 876 | x |= x >> 8; |
| 877 | x |= x >> 16; |
| 878 | return debruijn[(uint32_t)(x * 0x07C4ACDDUL) >> 27]; |
| 879 | } |
| 880 | |
| 881 | static inline size_t mi_clz(uintptr_t x) { |
| 882 | if (x==0) return MI_INTPTR_BITS; |
| 883 | #if (MI_INTPTR_BITS <= 32) |
| 884 | return mi_clz32((uint32_t)x); |
| 885 | #else |
| 886 | size_t count = mi_clz32((uint32_t)(x >> 32)); |
| 887 | if (count < 32) return count; |
| 888 | return (32 + mi_clz32((uint32_t)x)); |
| 889 | #endif |
| 890 | } |
| 891 | static inline size_t mi_ctz(uintptr_t x) { |
| 892 | if (x==0) return MI_INTPTR_BITS; |
| 893 | #if (MI_INTPTR_BITS <= 32) |
| 894 | return mi_ctz32((uint32_t)x); |
| 895 | #else |
| 896 | size_t count = mi_ctz32((uint32_t)x); |
| 897 | if (count < 32) return count; |
| 898 | return (32 + mi_ctz32((uint32_t)(x>>32))); |
| 899 | #endif |
| 900 | } |
| 901 | |
| 902 | #endif |
| 903 | |
| 904 | // "bit scan reverse": Return index of the highest bit (or MI_INTPTR_BITS if `x` is zero) |
| 905 | static inline size_t mi_bsr(uintptr_t x) { |
| 906 | return (x==0 ? MI_INTPTR_BITS : MI_INTPTR_BITS - 1 - mi_clz(x)); |
| 907 | } |
| 908 | |
| 909 | |
| 910 | // --------------------------------------------------------------------------------- |
| 911 | // Provide our own `_mi_memcpy` for potential performance optimizations. |
| 912 | // |
| 913 | // For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if |
| 914 | // we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support |
| 915 | // (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253. |
| 916 | // --------------------------------------------------------------------------------- |
| 917 | |
| 918 | #if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) |
| 919 | #include <intrin.h> |
| 920 | extern bool _mi_cpu_has_fsrm; |
| 921 | static inline void _mi_memcpy(void* dst, const void* src, size_t n) { |
| 922 | if (_mi_cpu_has_fsrm) { |
| 923 | __movsb((unsigned char*)dst, (const unsigned char*)src, n); |
| 924 | } |
| 925 | else { |
| 926 | memcpy(dst, src, n); |
| 927 | } |
| 928 | } |
| 929 | static inline void _mi_memzero(void* dst, size_t n) { |
| 930 | if (_mi_cpu_has_fsrm) { |
| 931 | __stosb((unsigned char*)dst, 0, n); |
| 932 | } |
| 933 | else { |
| 934 | memset(dst, 0, n); |
| 935 | } |
| 936 | } |
| 937 | #else |
| 938 | static inline void _mi_memcpy(void* dst, const void* src, size_t n) { |
| 939 | memcpy(dst, src, n); |
| 940 | } |
| 941 | static inline void _mi_memzero(void* dst, size_t n) { |
| 942 | memset(dst, 0, n); |
| 943 | } |
| 944 | #endif |
| 945 | |
| 946 | // ------------------------------------------------------------------------------- |
| 947 | // The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned |
| 948 | // This is used for example in `mi_realloc`. |
| 949 | // ------------------------------------------------------------------------------- |
| 950 | |
| 951 | #if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) |
| 952 | // On GCC/CLang we provide a hint that the pointers are word aligned. |
| 953 | static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) { |
| 954 | mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0)); |
| 955 | void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE); |
| 956 | const void* asrc = __builtin_assume_aligned(src, MI_INTPTR_SIZE); |
| 957 | _mi_memcpy(adst, asrc, n); |
| 958 | } |
| 959 | |
| 960 | static inline void _mi_memzero_aligned(void* dst, size_t n) { |
| 961 | mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0); |
| 962 | void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE); |
| 963 | _mi_memzero(adst, n); |
| 964 | } |
| 965 | #else |
| 966 | // Default fallback on `_mi_memcpy` |
| 967 | static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) { |
| 968 | mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0)); |
| 969 | _mi_memcpy(dst, src, n); |
| 970 | } |
| 971 | |
| 972 | static inline void _mi_memzero_aligned(void* dst, size_t n) { |
| 973 | mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0); |
| 974 | _mi_memzero(dst, n); |
| 975 | } |
| 976 | #endif |
| 977 | |
| 978 | |
| 979 | #endif |
| 980 | |