microsoft/qdk
Publicmirrored fromhttps://github.com/microsoft/qdkAvailable
allocator/mimalloc-sys/mimalloc/src/init.c
709lines · modecode
| 1 | /* ---------------------------------------------------------------------------- |
| 2 | Copyright (c) 2018-2022, Microsoft Research, Daan Leijen |
| 3 | This is free software; you can redistribute it and/or modify it under the |
| 4 | terms of the MIT license. A copy of the license can be found in the file |
| 5 | "LICENSE" at the root of this distribution. |
| 6 | -----------------------------------------------------------------------------*/ |
| 7 | #include "mimalloc.h" |
| 8 | #include "mimalloc/internal.h" |
| 9 | #include "mimalloc/prim.h" |
| 10 | |
| 11 | #include <string.h> // memcpy, memset |
| 12 | #include <stdlib.h> // atexit |
| 13 | |
| 14 | |
| 15 | // Empty page used to initialize the small free pages array |
| 16 | const mi_page_t _mi_page_empty = { |
| 17 | 0, false, false, false, |
| 18 | 0, // capacity |
| 19 | 0, // reserved capacity |
| 20 | { 0 }, // flags |
| 21 | false, // is_zero |
| 22 | 0, // retire_expire |
| 23 | NULL, // free |
| 24 | 0, // used |
| 25 | 0, // xblock_size |
| 26 | NULL, // local_free |
| 27 | #if (MI_PADDING || MI_ENCODE_FREELIST) |
| 28 | { 0, 0 }, |
| 29 | #endif |
| 30 | MI_ATOMIC_VAR_INIT(0), // xthread_free |
| 31 | MI_ATOMIC_VAR_INIT(0), // xheap |
| 32 | NULL, NULL |
| 33 | #if MI_INTPTR_SIZE==8 |
| 34 | , { 0 } // padding |
| 35 | #endif |
| 36 | }; |
| 37 | |
| 38 | #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) |
| 39 | |
| 40 | #if (MI_SMALL_WSIZE_MAX==128) |
| 41 | #if (MI_PADDING>0) && (MI_INTPTR_SIZE >= 8) |
| 42 | #define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() } |
| 43 | #elif (MI_PADDING>0) |
| 44 | #define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() } |
| 45 | #else |
| 46 | #define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY() } |
| 47 | #endif |
| 48 | #else |
| 49 | #error "define right initialization sizes corresponding to MI_SMALL_WSIZE_MAX" |
| 50 | #endif |
| 51 | |
| 52 | // Empty page queues for every bin |
| 53 | #define QNULL(sz) { NULL, NULL, (sz)*sizeof(uintptr_t) } |
| 54 | #define MI_PAGE_QUEUES_EMPTY \ |
| 55 | { QNULL(1), \ |
| 56 | QNULL( 1), QNULL( 2), QNULL( 3), QNULL( 4), QNULL( 5), QNULL( 6), QNULL( 7), QNULL( 8), /* 8 */ \ |
| 57 | QNULL( 10), QNULL( 12), QNULL( 14), QNULL( 16), QNULL( 20), QNULL( 24), QNULL( 28), QNULL( 32), /* 16 */ \ |
| 58 | QNULL( 40), QNULL( 48), QNULL( 56), QNULL( 64), QNULL( 80), QNULL( 96), QNULL( 112), QNULL( 128), /* 24 */ \ |
| 59 | QNULL( 160), QNULL( 192), QNULL( 224), QNULL( 256), QNULL( 320), QNULL( 384), QNULL( 448), QNULL( 512), /* 32 */ \ |
| 60 | QNULL( 640), QNULL( 768), QNULL( 896), QNULL( 1024), QNULL( 1280), QNULL( 1536), QNULL( 1792), QNULL( 2048), /* 40 */ \ |
| 61 | QNULL( 2560), QNULL( 3072), QNULL( 3584), QNULL( 4096), QNULL( 5120), QNULL( 6144), QNULL( 7168), QNULL( 8192), /* 48 */ \ |
| 62 | QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \ |
| 63 | QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \ |
| 64 | QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), QNULL(393216), QNULL(458752), QNULL(524288), /* 72 */ \ |
| 65 | QNULL(MI_MEDIUM_OBJ_WSIZE_MAX + 1 /* 655360, Huge queue */), \ |
| 66 | QNULL(MI_MEDIUM_OBJ_WSIZE_MAX + 2) /* Full queue */ } |
| 67 | |
| 68 | #define MI_STAT_COUNT_NULL() {0,0,0,0} |
| 69 | |
| 70 | // Empty statistics |
| 71 | #if MI_STAT>1 |
| 72 | #define MI_STAT_COUNT_END_NULL() , { MI_STAT_COUNT_NULL(), MI_INIT32(MI_STAT_COUNT_NULL) } |
| 73 | #else |
| 74 | #define MI_STAT_COUNT_END_NULL() |
| 75 | #endif |
| 76 | |
| 77 | #define MI_STATS_NULL \ |
| 78 | MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ |
| 79 | MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ |
| 80 | MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ |
| 81 | MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ |
| 82 | MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ |
| 83 | MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ |
| 84 | MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ |
| 85 | MI_STAT_COUNT_NULL(), \ |
| 86 | { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ |
| 87 | { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \ |
| 88 | MI_STAT_COUNT_END_NULL() |
| 89 | |
| 90 | |
| 91 | // Empty slice span queues for every bin |
| 92 | #define SQNULL(sz) { NULL, NULL, sz } |
| 93 | #define MI_SEGMENT_SPAN_QUEUES_EMPTY \ |
| 94 | { SQNULL(1), \ |
| 95 | SQNULL( 1), SQNULL( 2), SQNULL( 3), SQNULL( 4), SQNULL( 5), SQNULL( 6), SQNULL( 7), SQNULL( 10), /* 8 */ \ |
| 96 | SQNULL( 12), SQNULL( 14), SQNULL( 16), SQNULL( 20), SQNULL( 24), SQNULL( 28), SQNULL( 32), SQNULL( 40), /* 16 */ \ |
| 97 | SQNULL( 48), SQNULL( 56), SQNULL( 64), SQNULL( 80), SQNULL( 96), SQNULL( 112), SQNULL( 128), SQNULL( 160), /* 24 */ \ |
| 98 | SQNULL( 192), SQNULL( 224), SQNULL( 256), SQNULL( 320), SQNULL( 384), SQNULL( 448), SQNULL( 512), SQNULL( 640), /* 32 */ \ |
| 99 | SQNULL( 768), SQNULL( 896), SQNULL( 1024) /* 35 */ } |
| 100 | |
| 101 | |
| 102 | // -------------------------------------------------------- |
| 103 | // Statically allocate an empty heap as the initial |
| 104 | // thread local value for the default heap, |
| 105 | // and statically allocate the backing heap for the main |
| 106 | // thread so it can function without doing any allocation |
| 107 | // itself (as accessing a thread local for the first time |
| 108 | // may lead to allocation itself on some platforms) |
| 109 | // -------------------------------------------------------- |
| 110 | |
| 111 | mi_decl_cache_align const mi_heap_t _mi_heap_empty = { |
| 112 | NULL, |
| 113 | MI_SMALL_PAGES_EMPTY, |
| 114 | MI_PAGE_QUEUES_EMPTY, |
| 115 | MI_ATOMIC_VAR_INIT(NULL), |
| 116 | 0, // tid |
| 117 | 0, // cookie |
| 118 | 0, // arena id |
| 119 | { 0, 0 }, // keys |
| 120 | { {0}, {0}, 0, true }, // random |
| 121 | 0, // page count |
| 122 | MI_BIN_FULL, 0, // page retired min/max |
| 123 | NULL, // next |
| 124 | false |
| 125 | }; |
| 126 | |
| 127 | #define tld_empty_stats ((mi_stats_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,stats))) |
| 128 | #define tld_empty_os ((mi_os_tld_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,os))) |
| 129 | |
| 130 | mi_decl_cache_align static const mi_tld_t tld_empty = { |
| 131 | 0, |
| 132 | false, |
| 133 | NULL, NULL, |
| 134 | { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, tld_empty_stats, tld_empty_os }, // segments |
| 135 | { 0, tld_empty_stats }, // os |
| 136 | { MI_STATS_NULL } // stats |
| 137 | }; |
| 138 | |
| 139 | mi_threadid_t _mi_thread_id(void) mi_attr_noexcept { |
| 140 | return _mi_prim_thread_id(); |
| 141 | } |
| 142 | |
| 143 | // the thread-local default heap for allocation |
| 144 | mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; |
| 145 | |
| 146 | extern mi_heap_t _mi_heap_main; |
| 147 | |
| 148 | static mi_tld_t tld_main = { |
| 149 | 0, false, |
| 150 | &_mi_heap_main, & _mi_heap_main, |
| 151 | { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, &tld_main.stats, &tld_main.os }, // segments |
| 152 | { 0, &tld_main.stats }, // os |
| 153 | { MI_STATS_NULL } // stats |
| 154 | }; |
| 155 | |
| 156 | mi_heap_t _mi_heap_main = { |
| 157 | &tld_main, |
| 158 | MI_SMALL_PAGES_EMPTY, |
| 159 | MI_PAGE_QUEUES_EMPTY, |
| 160 | MI_ATOMIC_VAR_INIT(NULL), |
| 161 | 0, // thread id |
| 162 | 0, // initial cookie |
| 163 | 0, // arena id |
| 164 | { 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) |
| 165 | { {0x846ca68b}, {0}, 0, true }, // random |
| 166 | 0, // page count |
| 167 | MI_BIN_FULL, 0, // page retired min/max |
| 168 | NULL, // next heap |
| 169 | false // can reclaim |
| 170 | }; |
| 171 | |
| 172 | bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`. |
| 173 | |
| 174 | mi_stats_t _mi_stats_main = { MI_STATS_NULL }; |
| 175 | |
| 176 | |
| 177 | static void mi_heap_main_init(void) { |
| 178 | if (_mi_heap_main.cookie == 0) { |
| 179 | _mi_heap_main.thread_id = _mi_thread_id(); |
| 180 | _mi_heap_main.cookie = 1; |
| 181 | #if defined(_WIN32) && !defined(MI_SHARED_LIB) |
| 182 | _mi_random_init_weak(&_mi_heap_main.random); // prevent allocation failure during bcrypt dll initialization with static linking |
| 183 | #else |
| 184 | _mi_random_init(&_mi_heap_main.random); |
| 185 | #endif |
| 186 | _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main); |
| 187 | _mi_heap_main.keys[0] = _mi_heap_random_next(&_mi_heap_main); |
| 188 | _mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main); |
| 189 | } |
| 190 | } |
| 191 | |
| 192 | mi_heap_t* _mi_heap_main_get(void) { |
| 193 | mi_heap_main_init(); |
| 194 | return &_mi_heap_main; |
| 195 | } |
| 196 | |
| 197 | |
| 198 | /* ----------------------------------------------------------- |
| 199 | Initialization and freeing of the thread local heaps |
| 200 | ----------------------------------------------------------- */ |
| 201 | |
| 202 | // note: in x64 in release build `sizeof(mi_thread_data_t)` is under 4KiB (= OS page size). |
| 203 | typedef struct mi_thread_data_s { |
| 204 | mi_heap_t heap; // must come first due to cast in `_mi_heap_done` |
| 205 | mi_tld_t tld; |
| 206 | mi_memid_t memid; |
| 207 | } mi_thread_data_t; |
| 208 | |
| 209 | |
| 210 | // Thread meta-data is allocated directly from the OS. For |
| 211 | // some programs that do not use thread pools and allocate and |
| 212 | // destroy many OS threads, this may causes too much overhead |
| 213 | // per thread so we maintain a small cache of recently freed metadata. |
| 214 | |
| 215 | #define TD_CACHE_SIZE (16) |
| 216 | static _Atomic(mi_thread_data_t*) td_cache[TD_CACHE_SIZE]; |
| 217 | |
| 218 | static mi_thread_data_t* mi_thread_data_zalloc(void) { |
| 219 | // try to find thread metadata in the cache |
| 220 | bool is_zero = false; |
| 221 | mi_thread_data_t* td = NULL; |
| 222 | for (int i = 0; i < TD_CACHE_SIZE; i++) { |
| 223 | td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); |
| 224 | if (td != NULL) { |
| 225 | // found cached allocation, try use it |
| 226 | td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); |
| 227 | if (td != NULL) { |
| 228 | break; |
| 229 | } |
| 230 | } |
| 231 | } |
| 232 | |
| 233 | // if that fails, allocate as meta data |
| 234 | if (td == NULL) { |
| 235 | mi_memid_t memid; |
| 236 | td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid, &_mi_stats_main); |
| 237 | if (td == NULL) { |
| 238 | // if this fails, try once more. (issue #257) |
| 239 | td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid, &_mi_stats_main); |
| 240 | if (td == NULL) { |
| 241 | // really out of memory |
| 242 | _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); |
| 243 | } |
| 244 | } |
| 245 | if (td != NULL) { |
| 246 | td->memid = memid; |
| 247 | is_zero = memid.initially_zero; |
| 248 | } |
| 249 | } |
| 250 | |
| 251 | if (td != NULL && !is_zero) { |
| 252 | _mi_memzero_aligned(td, sizeof(*td)); |
| 253 | } |
| 254 | return td; |
| 255 | } |
| 256 | |
| 257 | static void mi_thread_data_free( mi_thread_data_t* tdfree ) { |
| 258 | // try to add the thread metadata to the cache |
| 259 | for (int i = 0; i < TD_CACHE_SIZE; i++) { |
| 260 | mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); |
| 261 | if (td == NULL) { |
| 262 | mi_thread_data_t* expected = NULL; |
| 263 | if (mi_atomic_cas_ptr_weak_acq_rel(mi_thread_data_t, &td_cache[i], &expected, tdfree)) { |
| 264 | return; |
| 265 | } |
| 266 | } |
| 267 | } |
| 268 | // if that fails, just free it directly |
| 269 | _mi_os_free(tdfree, sizeof(mi_thread_data_t), tdfree->memid, &_mi_stats_main); |
| 270 | } |
| 271 | |
| 272 | void _mi_thread_data_collect(void) { |
| 273 | // free all thread metadata from the cache |
| 274 | for (int i = 0; i < TD_CACHE_SIZE; i++) { |
| 275 | mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); |
| 276 | if (td != NULL) { |
| 277 | td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); |
| 278 | if (td != NULL) { |
| 279 | _mi_os_free(td, sizeof(mi_thread_data_t), td->memid, &_mi_stats_main); |
| 280 | } |
| 281 | } |
| 282 | } |
| 283 | } |
| 284 | |
| 285 | // Initialize the thread local default heap, called from `mi_thread_init` |
| 286 | static bool _mi_heap_init(void) { |
| 287 | if (mi_heap_is_initialized(mi_prim_get_default_heap())) return true; |
| 288 | if (_mi_is_main_thread()) { |
| 289 | // mi_assert_internal(_mi_heap_main.thread_id != 0); // can happen on freeBSD where alloc is called before any initialization |
| 290 | // the main heap is statically allocated |
| 291 | mi_heap_main_init(); |
| 292 | _mi_heap_set_default_direct(&_mi_heap_main); |
| 293 | //mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_prim_get_default_heap()); |
| 294 | } |
| 295 | else { |
| 296 | // use `_mi_os_alloc` to allocate directly from the OS |
| 297 | mi_thread_data_t* td = mi_thread_data_zalloc(); |
| 298 | if (td == NULL) return false; |
| 299 | |
| 300 | mi_tld_t* tld = &td->tld; |
| 301 | mi_heap_t* heap = &td->heap; |
| 302 | _mi_memcpy_aligned(tld, &tld_empty, sizeof(*tld)); |
| 303 | _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(*heap)); |
| 304 | heap->thread_id = _mi_thread_id(); |
| 305 | _mi_random_init(&heap->random); |
| 306 | heap->cookie = _mi_heap_random_next(heap) | 1; |
| 307 | heap->keys[0] = _mi_heap_random_next(heap); |
| 308 | heap->keys[1] = _mi_heap_random_next(heap); |
| 309 | heap->tld = tld; |
| 310 | tld->heap_backing = heap; |
| 311 | tld->heaps = heap; |
| 312 | tld->segments.stats = &tld->stats; |
| 313 | tld->segments.os = &tld->os; |
| 314 | tld->os.stats = &tld->stats; |
| 315 | _mi_heap_set_default_direct(heap); |
| 316 | } |
| 317 | return false; |
| 318 | } |
| 319 | |
| 320 | // Free the thread local default heap (called from `mi_thread_done`) |
| 321 | static bool _mi_heap_done(mi_heap_t* heap) { |
| 322 | if (!mi_heap_is_initialized(heap)) return true; |
| 323 | |
| 324 | // reset default heap |
| 325 | _mi_heap_set_default_direct(_mi_is_main_thread() ? &_mi_heap_main : (mi_heap_t*)&_mi_heap_empty); |
| 326 | |
| 327 | // switch to backing heap |
| 328 | heap = heap->tld->heap_backing; |
| 329 | if (!mi_heap_is_initialized(heap)) return false; |
| 330 | |
| 331 | // delete all non-backing heaps in this thread |
| 332 | mi_heap_t* curr = heap->tld->heaps; |
| 333 | while (curr != NULL) { |
| 334 | mi_heap_t* next = curr->next; // save `next` as `curr` will be freed |
| 335 | if (curr != heap) { |
| 336 | mi_assert_internal(!mi_heap_is_backing(curr)); |
| 337 | mi_heap_delete(curr); |
| 338 | } |
| 339 | curr = next; |
| 340 | } |
| 341 | mi_assert_internal(heap->tld->heaps == heap && heap->next == NULL); |
| 342 | mi_assert_internal(mi_heap_is_backing(heap)); |
| 343 | |
| 344 | // collect if not the main thread |
| 345 | if (heap != &_mi_heap_main) { |
| 346 | _mi_heap_collect_abandon(heap); |
| 347 | } |
| 348 | |
| 349 | // merge stats |
| 350 | _mi_stats_done(&heap->tld->stats); |
| 351 | |
| 352 | // free if not the main thread |
| 353 | if (heap != &_mi_heap_main) { |
| 354 | // the following assertion does not always hold for huge segments as those are always treated |
| 355 | // as abondened: one may allocate it in one thread, but deallocate in another in which case |
| 356 | // the count can be too large or negative. todo: perhaps not count huge segments? see issue #363 |
| 357 | // mi_assert_internal(heap->tld->segments.count == 0 || heap->thread_id != _mi_thread_id()); |
| 358 | mi_thread_data_free((mi_thread_data_t*)heap); |
| 359 | } |
| 360 | else { |
| 361 | #if 0 |
| 362 | // never free the main thread even in debug mode; if a dll is linked statically with mimalloc, |
| 363 | // there may still be delete/free calls after the mi_fls_done is called. Issue #207 |
| 364 | _mi_heap_destroy_pages(heap); |
| 365 | mi_assert_internal(heap->tld->heap_backing == &_mi_heap_main); |
| 366 | #endif |
| 367 | } |
| 368 | return false; |
| 369 | } |
| 370 | |
| 371 | |
| 372 | |
| 373 | // -------------------------------------------------------- |
| 374 | // Try to run `mi_thread_done()` automatically so any memory |
| 375 | // owned by the thread but not yet released can be abandoned |
| 376 | // and re-owned by another thread. |
| 377 | // |
| 378 | // 1. windows dynamic library: |
| 379 | // call from DllMain on DLL_THREAD_DETACH |
| 380 | // 2. windows static library: |
| 381 | // use `FlsAlloc` to call a destructor when the thread is done |
| 382 | // 3. unix, pthreads: |
| 383 | // use a pthread key to call a destructor when a pthread is done |
| 384 | // |
| 385 | // In the last two cases we also need to call `mi_process_init` |
| 386 | // to set up the thread local keys. |
| 387 | // -------------------------------------------------------- |
| 388 | |
| 389 | // Set up handlers so `mi_thread_done` is called automatically |
| 390 | static void mi_process_setup_auto_thread_done(void) { |
| 391 | static bool tls_initialized = false; // fine if it races |
| 392 | if (tls_initialized) return; |
| 393 | tls_initialized = true; |
| 394 | _mi_prim_thread_init_auto_done(); |
| 395 | _mi_heap_set_default_direct(&_mi_heap_main); |
| 396 | } |
| 397 | |
| 398 | |
| 399 | bool _mi_is_main_thread(void) { |
| 400 | return (_mi_heap_main.thread_id==0 || _mi_heap_main.thread_id == _mi_thread_id()); |
| 401 | } |
| 402 | |
| 403 | static _Atomic(size_t) thread_count = MI_ATOMIC_VAR_INIT(1); |
| 404 | |
| 405 | size_t _mi_current_thread_count(void) { |
| 406 | return mi_atomic_load_relaxed(&thread_count); |
| 407 | } |
| 408 | |
| 409 | // This is called from the `mi_malloc_generic` |
| 410 | void mi_thread_init(void) mi_attr_noexcept |
| 411 | { |
| 412 | // ensure our process has started already |
| 413 | mi_process_init(); |
| 414 | |
| 415 | // initialize the thread local default heap |
| 416 | // (this will call `_mi_heap_set_default_direct` and thus set the |
| 417 | // fiber/pthread key to a non-zero value, ensuring `_mi_thread_done` is called) |
| 418 | if (_mi_heap_init()) return; // returns true if already initialized |
| 419 | |
| 420 | _mi_stat_increase(&_mi_stats_main.threads, 1); |
| 421 | mi_atomic_increment_relaxed(&thread_count); |
| 422 | //_mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id()); |
| 423 | } |
| 424 | |
| 425 | void mi_thread_done(void) mi_attr_noexcept { |
| 426 | _mi_thread_done(NULL); |
| 427 | } |
| 428 | |
| 429 | void _mi_thread_done(mi_heap_t* heap) |
| 430 | { |
| 431 | // calling with NULL implies using the default heap |
| 432 | if (heap == NULL) { |
| 433 | heap = mi_prim_get_default_heap(); |
| 434 | if (heap == NULL) return; |
| 435 | } |
| 436 | |
| 437 | // prevent re-entrancy through heap_done/heap_set_default_direct (issue #699) |
| 438 | if (!mi_heap_is_initialized(heap)) { |
| 439 | return; |
| 440 | } |
| 441 | |
| 442 | // adjust stats |
| 443 | mi_atomic_decrement_relaxed(&thread_count); |
| 444 | _mi_stat_decrease(&_mi_stats_main.threads, 1); |
| 445 | |
| 446 | // check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local heaps... |
| 447 | if (heap->thread_id != _mi_thread_id()) return; |
| 448 | |
| 449 | // abandon the thread local heap |
| 450 | if (_mi_heap_done(heap)) return; // returns true if already ran |
| 451 | } |
| 452 | |
| 453 | void _mi_heap_set_default_direct(mi_heap_t* heap) { |
| 454 | mi_assert_internal(heap != NULL); |
| 455 | #if defined(MI_TLS_SLOT) |
| 456 | mi_prim_tls_slot_set(MI_TLS_SLOT,heap); |
| 457 | #elif defined(MI_TLS_PTHREAD_SLOT_OFS) |
| 458 | *mi_tls_pthread_heap_slot() = heap; |
| 459 | #elif defined(MI_TLS_PTHREAD) |
| 460 | // we use _mi_heap_default_key |
| 461 | #else |
| 462 | _mi_heap_default = heap; |
| 463 | #endif |
| 464 | |
| 465 | // ensure the default heap is passed to `_mi_thread_done` |
| 466 | // setting to a non-NULL value also ensures `mi_thread_done` is called. |
| 467 | _mi_prim_thread_associate_default_heap(heap); |
| 468 | } |
| 469 | |
| 470 | |
| 471 | // -------------------------------------------------------- |
| 472 | // Run functions on process init/done, and thread init/done |
| 473 | // -------------------------------------------------------- |
| 474 | static void mi_cdecl mi_process_done(void); |
| 475 | |
| 476 | static bool os_preloading = true; // true until this module is initialized |
| 477 | static bool mi_redirected = false; // true if malloc redirects to mi_malloc |
| 478 | |
| 479 | // Returns true if this module has not been initialized; Don't use C runtime routines until it returns false. |
| 480 | bool mi_decl_noinline _mi_preloading(void) { |
| 481 | return os_preloading; |
| 482 | } |
| 483 | |
| 484 | mi_decl_nodiscard bool mi_is_redirected(void) mi_attr_noexcept { |
| 485 | return mi_redirected; |
| 486 | } |
| 487 | |
| 488 | // Communicate with the redirection module on Windows |
| 489 | #if defined(_WIN32) && defined(MI_SHARED_LIB) && !defined(MI_WIN_NOREDIRECT) |
| 490 | #ifdef __cplusplus |
| 491 | extern "C" { |
| 492 | #endif |
| 493 | mi_decl_export void _mi_redirect_entry(DWORD reason) { |
| 494 | // called on redirection; careful as this may be called before DllMain |
| 495 | if (reason == DLL_PROCESS_ATTACH) { |
| 496 | mi_redirected = true; |
| 497 | } |
| 498 | else if (reason == DLL_PROCESS_DETACH) { |
| 499 | mi_redirected = false; |
| 500 | } |
| 501 | else if (reason == DLL_THREAD_DETACH) { |
| 502 | mi_thread_done(); |
| 503 | } |
| 504 | } |
| 505 | __declspec(dllimport) bool mi_cdecl mi_allocator_init(const char** message); |
| 506 | __declspec(dllimport) void mi_cdecl mi_allocator_done(void); |
| 507 | #ifdef __cplusplus |
| 508 | } |
| 509 | #endif |
| 510 | #else |
| 511 | static bool mi_allocator_init(const char** message) { |
| 512 | if (message != NULL) *message = NULL; |
| 513 | return true; |
| 514 | } |
| 515 | static void mi_allocator_done(void) { |
| 516 | // nothing to do |
| 517 | } |
| 518 | #endif |
| 519 | |
| 520 | // Called once by the process loader |
| 521 | static void mi_process_load(void) { |
| 522 | mi_heap_main_init(); |
| 523 | #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD) |
| 524 | volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true; |
| 525 | if (dummy == NULL) return; // use dummy or otherwise the access may get optimized away (issue #697) |
| 526 | #endif |
| 527 | os_preloading = false; |
| 528 | mi_assert_internal(_mi_is_main_thread()); |
| 529 | #if !(defined(_WIN32) && defined(MI_SHARED_LIB)) // use Dll process detach (see below) instead of atexit (issue #521) |
| 530 | atexit(&mi_process_done); |
| 531 | #endif |
| 532 | _mi_options_init(); |
| 533 | mi_process_setup_auto_thread_done(); |
| 534 | mi_process_init(); |
| 535 | if (mi_redirected) _mi_verbose_message("malloc is redirected.\n"); |
| 536 | |
| 537 | // show message from the redirector (if present) |
| 538 | const char* msg = NULL; |
| 539 | mi_allocator_init(&msg); |
| 540 | if (msg != NULL && (mi_option_is_enabled(mi_option_verbose) || mi_option_is_enabled(mi_option_show_errors))) { |
| 541 | _mi_fputs(NULL,NULL,NULL,msg); |
| 542 | } |
| 543 | |
| 544 | // reseed random |
| 545 | _mi_random_reinit_if_weak(&_mi_heap_main.random); |
| 546 | } |
| 547 | |
| 548 | #if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) |
| 549 | #include <intrin.h> |
| 550 | mi_decl_cache_align bool _mi_cpu_has_fsrm = false; |
| 551 | |
| 552 | static void mi_detect_cpu_features(void) { |
| 553 | // FSRM for fast rep movsb support (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017)) |
| 554 | int32_t cpu_info[4]; |
| 555 | __cpuid(cpu_info, 7); |
| 556 | _mi_cpu_has_fsrm = ((cpu_info[3] & (1 << 4)) != 0); // bit 4 of EDX : see <https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features> |
| 557 | } |
| 558 | #else |
| 559 | static void mi_detect_cpu_features(void) { |
| 560 | // nothing |
| 561 | } |
| 562 | #endif |
| 563 | |
| 564 | // Initialize the process; called by thread_init or the process loader |
| 565 | void mi_process_init(void) mi_attr_noexcept { |
| 566 | // ensure we are called once |
| 567 | static mi_atomic_once_t process_init; |
| 568 | #if _MSC_VER < 1920 |
| 569 | mi_heap_main_init(); // vs2017 can dynamically re-initialize _mi_heap_main |
| 570 | #endif |
| 571 | if (!mi_atomic_once(&process_init)) return; |
| 572 | _mi_process_is_initialized = true; |
| 573 | _mi_verbose_message("process init: 0x%zx\n", _mi_thread_id()); |
| 574 | mi_process_setup_auto_thread_done(); |
| 575 | |
| 576 | mi_detect_cpu_features(); |
| 577 | _mi_os_init(); |
| 578 | mi_heap_main_init(); |
| 579 | #if MI_DEBUG |
| 580 | _mi_verbose_message("debug level : %d\n", MI_DEBUG); |
| 581 | #endif |
| 582 | _mi_verbose_message("secure level: %d\n", MI_SECURE); |
| 583 | _mi_verbose_message("mem tracking: %s\n", MI_TRACK_TOOL); |
| 584 | #if MI_TSAN |
| 585 | _mi_verbose_message("thread santizer enabled\n"); |
| 586 | #endif |
| 587 | mi_thread_init(); |
| 588 | |
| 589 | #if defined(_WIN32) |
| 590 | // On windows, when building as a static lib the FLS cleanup happens to early for the main thread. |
| 591 | // To avoid this, set the FLS value for the main thread to NULL so the fls cleanup |
| 592 | // will not call _mi_thread_done on the (still executing) main thread. See issue #508. |
| 593 | _mi_prim_thread_associate_default_heap(NULL); |
| 594 | #endif |
| 595 | |
| 596 | mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL) |
| 597 | mi_track_init(); |
| 598 | |
| 599 | if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { |
| 600 | size_t pages = mi_option_get_clamp(mi_option_reserve_huge_os_pages, 0, 128*1024); |
| 601 | long reserve_at = mi_option_get(mi_option_reserve_huge_os_pages_at); |
| 602 | if (reserve_at != -1) { |
| 603 | mi_reserve_huge_os_pages_at(pages, reserve_at, pages*500); |
| 604 | } else { |
| 605 | mi_reserve_huge_os_pages_interleave(pages, 0, pages*500); |
| 606 | } |
| 607 | } |
| 608 | if (mi_option_is_enabled(mi_option_reserve_os_memory)) { |
| 609 | long ksize = mi_option_get(mi_option_reserve_os_memory); |
| 610 | if (ksize > 0) { |
| 611 | mi_reserve_os_memory((size_t)ksize*MI_KiB, true /* commit? */, true /* allow large pages? */); |
| 612 | } |
| 613 | } |
| 614 | } |
| 615 | |
| 616 | // Called when the process is done (through `at_exit`) |
| 617 | static void mi_cdecl mi_process_done(void) { |
| 618 | // only shutdown if we were initialized |
| 619 | if (!_mi_process_is_initialized) return; |
| 620 | // ensure we are called once |
| 621 | static bool process_done = false; |
| 622 | if (process_done) return; |
| 623 | process_done = true; |
| 624 | |
| 625 | // release any thread specific resources and ensure _mi_thread_done is called on all but the main thread |
| 626 | _mi_prim_thread_done_auto_done(); |
| 627 | |
| 628 | #ifndef MI_SKIP_COLLECT_ON_EXIT |
| 629 | #if (MI_DEBUG || !defined(MI_SHARED_LIB)) |
| 630 | // free all memory if possible on process exit. This is not needed for a stand-alone process |
| 631 | // but should be done if mimalloc is statically linked into another shared library which |
| 632 | // is repeatedly loaded/unloaded, see issue #281. |
| 633 | mi_collect(true /* force */ ); |
| 634 | #endif |
| 635 | #endif |
| 636 | |
| 637 | // Forcefully release all retained memory; this can be dangerous in general if overriding regular malloc/free |
| 638 | // since after process_done there might still be other code running that calls `free` (like at_exit routines, |
| 639 | // or C-runtime termination code. |
| 640 | if (mi_option_is_enabled(mi_option_destroy_on_exit)) { |
| 641 | mi_collect(true /* force */); |
| 642 | _mi_heap_unsafe_destroy_all(); // forcefully release all memory held by all heaps (of this thread only!) |
| 643 | _mi_arena_unsafe_destroy_all(& _mi_heap_main_get()->tld->stats); |
| 644 | } |
| 645 | |
| 646 | if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) { |
| 647 | mi_stats_print(NULL); |
| 648 | } |
| 649 | mi_allocator_done(); |
| 650 | _mi_verbose_message("process done: 0x%zx\n", _mi_heap_main.thread_id); |
| 651 | os_preloading = true; // don't call the C runtime anymore |
| 652 | } |
| 653 | |
| 654 | |
| 655 | |
| 656 | #if defined(_WIN32) && defined(MI_SHARED_LIB) |
| 657 | // Windows DLL: easy to hook into process_init and thread_done |
| 658 | __declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) { |
| 659 | MI_UNUSED(reserved); |
| 660 | MI_UNUSED(inst); |
| 661 | if (reason==DLL_PROCESS_ATTACH) { |
| 662 | mi_process_load(); |
| 663 | } |
| 664 | else if (reason==DLL_PROCESS_DETACH) { |
| 665 | mi_process_done(); |
| 666 | } |
| 667 | else if (reason==DLL_THREAD_DETACH) { |
| 668 | if (!mi_is_redirected()) { |
| 669 | mi_thread_done(); |
| 670 | } |
| 671 | } |
| 672 | return TRUE; |
| 673 | } |
| 674 | |
| 675 | #elif defined(_MSC_VER) |
| 676 | // MSVC: use data section magic for static libraries |
| 677 | // See <https://www.codeguru.com/cpp/misc/misc/applicationcontrol/article.php/c6945/Running-Code-Before-and-After-Main.htm> |
| 678 | static int _mi_process_init(void) { |
| 679 | mi_process_load(); |
| 680 | return 0; |
| 681 | } |
| 682 | typedef int(*_mi_crt_callback_t)(void); |
| 683 | #if defined(_M_X64) || defined(_M_ARM64) |
| 684 | __pragma(comment(linker, "/include:" "_mi_msvc_initu")) |
| 685 | #pragma section(".CRT$XIU", long, read) |
| 686 | #else |
| 687 | __pragma(comment(linker, "/include:" "__mi_msvc_initu")) |
| 688 | #endif |
| 689 | #pragma data_seg(".CRT$XIU") |
| 690 | mi_decl_externc _mi_crt_callback_t _mi_msvc_initu[] = { &_mi_process_init }; |
| 691 | #pragma data_seg() |
| 692 | |
| 693 | #elif defined(__cplusplus) |
| 694 | // C++: use static initialization to detect process start |
| 695 | static bool _mi_process_init(void) { |
| 696 | mi_process_load(); |
| 697 | return (_mi_heap_main.thread_id != 0); |
| 698 | } |
| 699 | static bool mi_initialized = _mi_process_init(); |
| 700 | |
| 701 | #elif defined(__GNUC__) || defined(__clang__) |
| 702 | // GCC,Clang: use the constructor attribute |
| 703 | static void __attribute__((constructor)) _mi_process_init(void) { |
| 704 | mi_process_load(); |
| 705 | } |
| 706 | |
| 707 | #else |
| 708 | #pragma message("define a way to call mi_process_load on your platform") |
| 709 | #endif |
| 710 | |