microsoft/qdk

Public

mirrored fromhttps://github.com/microsoft/qdkAvailable

CodeCommitsIssuesPull requestsActionsInsightsSecurity
billti/num2-sim

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

source/allocator/mimalloc-sys/mimalloc/src/prim/unix/prim.c

934lines · modecode

1/* ----------------------------------------------------------------------------
2Copyright (c) 2018-2025, Microsoft Research, Daan Leijen
3This is free software; you can redistribute it and/or modify it under the
4terms of the MIT license. A copy of the license can be found in the file
5"LICENSE" at the root of this distribution.
6-----------------------------------------------------------------------------*/
7
8// This file is included in `src/prim/prim.c`
9
10#ifndef _DEFAULT_SOURCE
11#define _DEFAULT_SOURCE // ensure mmap flags and syscall are defined
12#endif
13
14#if defined(__sun)
15// illumos provides new mman.h api when any of these are defined
16// otherwise the old api based on caddr_t which predates the void pointers one.
17// stock solaris provides only the former, chose to atomically to discard those
18// flags only here rather than project wide tough.
19#undef _XOPEN_SOURCE
20#undef _POSIX_C_SOURCE
21#endif
22
23#include "mimalloc.h"
24#include "mimalloc/internal.h"
25#include "mimalloc/prim.h"
26
27#include <sys/mman.h> // mmap
28#include <unistd.h> // sysconf
29#include <fcntl.h> // open, close, read, access
30#include <stdlib.h> // getenv, arc4random_buf
31
32#if defined(__linux__)
33 #include <features.h>
34 #include <sys/prctl.h> // THP disable, PR_SET_VMA
35 #if defined(__GLIBC__) && !defined(PR_SET_VMA)
36 #include <linux/prctl.h>
37 #endif
38 #if defined(__GLIBC__)
39 #include <linux/mman.h> // linux mmap flags
40 #else
41 #include <sys/mman.h>
42 #endif
43#elif defined(__APPLE__)
44 #include <AvailabilityMacros.h>
45 #include <TargetConditionals.h>
46 #if !defined(TARGET_OS_OSX) || TARGET_OS_OSX // see issue #879, used to be (!TARGET_IOS_IPHONE && !TARGET_IOS_SIMULATOR)
47 #include <mach/vm_statistics.h> // VM_MAKE_TAG, VM_FLAGS_SUPERPAGE_SIZE_2MB, etc.
48 #endif
49 #if !defined(MAC_OS_X_VERSION_10_7)
50 #define MAC_OS_X_VERSION_10_7 1070
51 #endif
52#elif defined(__FreeBSD__) || defined(__DragonFly__)
53 #include <sys/param.h>
54 #if __FreeBSD_version >= 1200000
55 #include <sys/cpuset.h>
56 #include <sys/domainset.h>
57 #endif
58 #include <sys/sysctl.h>
59#endif
60
61#if (defined(__linux__) && !defined(__ANDROID__)) || defined(__FreeBSD__)
62 #define MI_HAS_SYSCALL_H
63 #include <sys/syscall.h>
64#endif
65
66#if !defined(MADV_DONTNEED) && defined(POSIX_MADV_DONTNEED) // QNX
67#define MADV_DONTNEED POSIX_MADV_DONTNEED
68#endif
69#if !defined(MADV_FREE) && defined(POSIX_MADV_FREE) // QNX
70#define MADV_FREE POSIX_MADV_FREE
71#endif
72
73#define MI_UNIX_LARGE_PAGE_SIZE (2*MI_MiB) // TODO: can we query the OS for this?
74
75//------------------------------------------------------------------------------------
76// Use syscalls for some primitives to allow for libraries that override open/read/close etc.
77// and do allocation themselves; using syscalls prevents recursion when mimalloc is
78// still initializing (issue #713)
79// Declare inline to avoid unused function warnings.
80//------------------------------------------------------------------------------------
81
82#if defined(MI_HAS_SYSCALL_H) && defined(SYS_open) && defined(SYS_close) && defined(SYS_read) && defined(SYS_access)
83
84static inline int mi_prim_open(const char* fpath, int open_flags) {
85 return syscall(SYS_open,fpath,open_flags,0);
86}
87static inline ssize_t mi_prim_read(int fd, void* buf, size_t bufsize) {
88 return syscall(SYS_read,fd,buf,bufsize);
89}
90static inline int mi_prim_close(int fd) {
91 return syscall(SYS_close,fd);
92}
93static inline int mi_prim_access(const char *fpath, int mode) {
94 return syscall(SYS_access,fpath,mode);
95}
96
97#else
98
99static inline int mi_prim_open(const char* fpath, int open_flags) {
100 return open(fpath,open_flags);
101}
102static inline ssize_t mi_prim_read(int fd, void* buf, size_t bufsize) {
103 return read(fd,buf,bufsize);
104}
105static inline int mi_prim_close(int fd) {
106 return close(fd);
107}
108static inline int mi_prim_access(const char *fpath, int mode) {
109 return access(fpath,mode);
110}
111
112#endif
113
114
115
116//---------------------------------------------
117// init
118//---------------------------------------------
119
120static bool unix_detect_overcommit(void) {
121 bool os_overcommit = true;
122#if defined(__linux__)
123 int fd = mi_prim_open("/proc/sys/vm/overcommit_memory", O_RDONLY);
124 if (fd >= 0) {
125 char buf[32];
126 ssize_t nread = mi_prim_read(fd, &buf, sizeof(buf));
127 mi_prim_close(fd);
128 // <https://www.kernel.org/doc/Documentation/vm/overcommit-accounting>
129 // 0: heuristic overcommit, 1: always overcommit, 2: never overcommit (ignore NORESERVE)
130 if (nread >= 1) {
131 os_overcommit = (buf[0] == '0' || buf[0] == '1');
132 }
133 }
134#elif defined(__FreeBSD__)
135 int val = 0;
136 size_t olen = sizeof(val);
137 if (sysctlbyname("vm.overcommit", &val, &olen, NULL, 0) == 0) {
138 os_overcommit = (val != 0);
139 }
140#else
141 // default: overcommit is true
142#endif
143 return os_overcommit;
144}
145
146void _mi_prim_mem_init( mi_os_mem_config_t* config )
147{
148 long psize = sysconf(_SC_PAGESIZE);
149 if (psize > 0) {
150 config->page_size = (size_t)psize;
151 config->alloc_granularity = (size_t)psize;
152 #if defined(_SC_PHYS_PAGES)
153 long pphys = sysconf(_SC_PHYS_PAGES);
154 const size_t psize_in_kib = (size_t)psize / MI_KiB;
155 if (psize_in_kib > 0 && pphys > 0 && (size_t)pphys <= (SIZE_MAX/psize_in_kib)) {
156 config->physical_memory_in_kib = (size_t)pphys * psize_in_kib;
157 }
158 #endif
159 }
160 config->large_page_size = MI_UNIX_LARGE_PAGE_SIZE;
161 config->has_overcommit = unix_detect_overcommit();
162 config->has_partial_free = true; // mmap can free in parts
163 config->has_virtual_reserve = true; // todo: check if this true for NetBSD? (for anonymous mmap with PROT_NONE)
164
165 // disable transparent huge pages for this process?
166 #if (defined(__linux__) || defined(__ANDROID__)) && defined(PR_GET_THP_DISABLE)
167 #if defined(MI_NO_THP)
168 if (true)
169 #else
170 if (!mi_option_is_enabled(mi_option_allow_large_os_pages)) // disable THP also if large OS pages are not allowed in the options
171 #endif
172 {
173 int val = 0;
174 if (prctl(PR_GET_THP_DISABLE, &val, 0, 0, 0) != 0) {
175 // Most likely since distros often come with always/madvise settings.
176 val = 1;
177 // Disabling only for mimalloc process rather than touching system wide settings
178 (void)prctl(PR_SET_THP_DISABLE, &val, 0, 0, 0);
179 }
180 }
181 #endif
182}
183
184
185//---------------------------------------------
186// free
187//---------------------------------------------
188
189int _mi_prim_free(void* addr, size_t size ) {
190 if (size==0) return 0;
191 bool err = (munmap(addr, size) == -1);
192 return (err ? errno : 0);
193}
194
195
196//---------------------------------------------
197// mmap
198//---------------------------------------------
199
200static int unix_madvise(void* addr, size_t size, int advice) {
201 #if defined(__sun)
202 int res = madvise((caddr_t)addr, size, advice); // Solaris needs cast (issue #520)
203 #elif defined(__QNX__)
204 int res = posix_madvise(addr, size, advice);
205 #else
206 int res = madvise(addr, size, advice);
207 #endif
208 return (res==0 ? 0 : errno);
209}
210
211static void* unix_mmap_prim(void* addr, size_t size, int protect_flags, int flags, int fd) {
212 void* p = mmap(addr, size, protect_flags, flags, fd, 0 /* offset */);
213 #if defined(__linux__) && defined(PR_SET_VMA)
214 if (p!=MAP_FAILED && p!=NULL) {
215 prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, p, size, "mimalloc");
216 }
217 #endif
218 return p;
219}
220
221static void* unix_mmap_prim_aligned(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) {
222 MI_UNUSED(try_alignment);
223 void* p = NULL;
224 #if defined(MAP_ALIGNED) // BSD
225 if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) {
226 size_t n = mi_bsr(try_alignment);
227 if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB
228 p = unix_mmap_prim(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd);
229 if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) {
230 int err = errno;
231 _mi_trace_message("unable to directly request aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, addr);
232 }
233 if (p!=MAP_FAILED) return p;
234 // fall back to regular mmap
235 }
236 }
237 #elif defined(MAP_ALIGN) // Solaris
238 if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) {
239 p = unix_mmap_prim((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd); // addr parameter is the required alignment
240 if (p!=MAP_FAILED) return p;
241 // fall back to regular mmap
242 }
243 #endif
244 #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED)
245 // on 64-bit systems, use the virtual address area after 2TiB for 4MiB aligned allocations
246 if (addr == NULL) {
247 void* hint = _mi_os_get_aligned_hint(try_alignment, size);
248 if (hint != NULL) {
249 p = unix_mmap_prim(hint, size, protect_flags, flags, fd);
250 if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) {
251 #if MI_TRACK_ENABLED // asan sometimes does not instrument errno correctly?
252 int err = 0;
253 #else
254 int err = errno;
255 #endif
256 _mi_trace_message("unable to directly request hinted aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, hint);
257 }
258 if (p!=MAP_FAILED) return p;
259 // fall back to regular mmap
260 }
261 }
262 #endif
263 // regular mmap
264 p = unix_mmap_prim(addr, size, protect_flags, flags, fd);
265 if (p!=MAP_FAILED) return p;
266 // failed to allocate
267 return NULL;
268}
269
270static int unix_mmap_fd(void) {
271 #if defined(VM_MAKE_TAG)
272 // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99)
273 int os_tag = (int)mi_option_get(mi_option_os_tag);
274 if (os_tag < 100 || os_tag > 255) { os_tag = 254; }
275 return VM_MAKE_TAG(os_tag);
276 #else
277 return -1;
278 #endif
279}
280
281static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) {
282 #if !defined(MAP_ANONYMOUS)
283 #define MAP_ANONYMOUS MAP_ANON
284 #endif
285 #if !defined(MAP_NORESERVE)
286 #define MAP_NORESERVE 0
287 #endif
288 void* p = NULL;
289 const int fd = unix_mmap_fd();
290 int flags = MAP_PRIVATE | MAP_ANONYMOUS;
291 if (_mi_os_has_overcommit()) {
292 flags |= MAP_NORESERVE;
293 }
294 #if defined(PROT_MAX)
295 protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD
296 #endif
297 // huge page allocation
298 if (allow_large && (large_only || (_mi_os_use_large_page(size, try_alignment) && mi_option_get(mi_option_allow_large_os_pages) == 1))) {
299 static _Atomic(size_t) large_page_try_ok; // = 0;
300 size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok);
301 if (!large_only && try_ok > 0) {
302 // If the OS is not configured for large OS pages, or the user does not have
303 // enough permission, the `mmap` will always fail (but it might also fail for other reasons).
304 // Therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times
305 // to avoid too many failing calls to mmap.
306 mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1);
307 }
308 else {
309 int lflags = flags & ~MAP_NORESERVE; // using NORESERVE on huge pages seems to fail on Linux
310 int lfd = fd;
311 #ifdef MAP_ALIGNED_SUPER
312 lflags |= MAP_ALIGNED_SUPER;
313 #endif
314 #ifdef MAP_HUGETLB
315 lflags |= MAP_HUGETLB;
316 #endif
317 #ifdef MAP_HUGE_1GB
318 static bool mi_huge_pages_available = true;
319 if (large_only && (size % MI_GiB) == 0 && mi_huge_pages_available) {
320 lflags |= MAP_HUGE_1GB;
321 }
322 else
323 #endif
324 {
325 #ifdef MAP_HUGE_2MB
326 lflags |= MAP_HUGE_2MB;
327 #endif
328 }
329 #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB
330 lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB;
331 #endif
332 if (large_only || lflags != flags) {
333 // try large OS page allocation
334 *is_large = true;
335 p = unix_mmap_prim_aligned(addr, size, try_alignment, protect_flags, lflags, lfd);
336 #ifdef MAP_HUGE_1GB
337 if (p == NULL && (lflags & MAP_HUGE_1GB) == MAP_HUGE_1GB) {
338 mi_huge_pages_available = false; // don't try huge 1GiB pages again
339 if (large_only) {
340 _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (errno: %i)\n", errno);
341 }
342 lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB);
343 p = unix_mmap_prim_aligned(addr, size, try_alignment, protect_flags, lflags, lfd);
344 }
345 #endif
346 if (large_only) return p;
347 if (p == NULL) {
348 mi_atomic_store_release(&large_page_try_ok, (size_t)8); // on error, don't try again for the next N allocations
349 }
350 }
351 }
352 }
353 // regular allocation
354 if (p == NULL) {
355 *is_large = false;
356 p = unix_mmap_prim_aligned(addr, size, try_alignment, protect_flags, flags, fd);
357 if (p != NULL) {
358 #if defined(MADV_HUGEPAGE)
359 // Many Linux systems don't allow MAP_HUGETLB but they support instead
360 // transparent huge pages (THP). Generally, it is not required to call `madvise` with MADV_HUGE
361 // though since properly aligned allocations will already use large pages if available
362 // in that case -- in particular for our large regions (in `memory.c`).
363 // However, some systems only allow THP if called with explicit `madvise`, so
364 // when large OS pages are enabled for mimalloc, we call `madvise` anyways.
365 if (allow_large && _mi_os_use_large_page(size, try_alignment)) {
366 if (unix_madvise(p, size, MADV_HUGEPAGE) == 0) {
367 // *is_large = true; // possibly
368 };
369 }
370 #elif defined(__sun)
371 if (allow_large && _mi_os_use_large_page(size, try_alignment)) {
372 struct memcntl_mha cmd = {0};
373 cmd.mha_pagesize = _mi_os_large_page_size();
374 cmd.mha_cmd = MHA_MAPSIZE_VA;
375 if (memcntl((caddr_t)p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) {
376 // *is_large = true; // possibly
377 }
378 }
379 #endif
380 }
381 }
382 return p;
383}
384
385// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
386int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
387 mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
388 mi_assert_internal(commit || !allow_large);
389 mi_assert_internal(try_alignment > 0);
390 if (hint_addr == NULL && size >= 8*MI_UNIX_LARGE_PAGE_SIZE && try_alignment > 1 && _mi_is_power_of_two(try_alignment) && try_alignment < MI_UNIX_LARGE_PAGE_SIZE) {
391 try_alignment = MI_UNIX_LARGE_PAGE_SIZE; // try to align along large page size for larger allocations
392 }
393
394 *is_zero = true;
395 int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE);
396 *addr = unix_mmap(hint_addr, size, try_alignment, protect_flags, false, allow_large, is_large);
397 return (*addr != NULL ? 0 : errno);
398}
399
400
401//---------------------------------------------
402// Commit/Reset
403//---------------------------------------------
404
405static void unix_mprotect_hint(int err) {
406 #if defined(__linux__) && (MI_SECURE>=2) // guard page around every mimalloc page
407 if (err == ENOMEM) {
408 _mi_warning_message("The next warning may be caused by a low memory map limit.\n"
409 " On Linux this is controlled by the vm.max_map_count -- maybe increase it?\n"
410 " For example: sudo sysctl -w vm.max_map_count=262144\n");
411 }
412 #else
413 MI_UNUSED(err);
414 #endif
415}
416
417int _mi_prim_commit(void* start, size_t size, bool* is_zero) {
418 // commit: ensure we can access the area
419 // note: we may think that *is_zero can be true since the memory
420 // was either from mmap PROT_NONE, or from decommit MADV_DONTNEED, but
421 // we sometimes call commit on a range with still partially committed
422 // memory and `mprotect` does not zero the range.
423 *is_zero = false;
424 int err = mprotect(start, size, (PROT_READ | PROT_WRITE));
425 if (err != 0) {
426 err = errno;
427 unix_mprotect_hint(err);
428 }
429 return err;
430}
431
432int _mi_prim_reuse(void* start, size_t size) {
433 MI_UNUSED(start); MI_UNUSED(size);
434 #if defined(__APPLE__) && defined(MADV_FREE_REUSE)
435 return unix_madvise(start, size, MADV_FREE_REUSE);
436 #endif
437 return 0;
438}
439
440int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) {
441 int err = 0;
442 #if defined(__APPLE__) && defined(MADV_FREE_REUSABLE)
443 // decommit on macOS: use MADV_FREE_REUSABLE as it does immediate rss accounting (issue #1097)
444 err = unix_madvise(start, size, MADV_FREE_REUSABLE);
445 if (err) { err = unix_madvise(start, size, MADV_DONTNEED); }
446 #else
447 // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE)
448 err = unix_madvise(start, size, MADV_DONTNEED);
449 #endif
450 #if !MI_DEBUG && MI_SECURE<=2
451 *needs_recommit = false;
452 #else
453 *needs_recommit = true;
454 mprotect(start, size, PROT_NONE);
455 #endif
456 /*
457 // decommit: use mmap with MAP_FIXED and PROT_NONE to discard the existing memory (and reduce rss)
458 *needs_recommit = true;
459 const int fd = unix_mmap_fd();
460 void* p = mmap(start, size, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0);
461 if (p != start) { err = errno; }
462 */
463 return err;
464}
465
466int _mi_prim_reset(void* start, size_t size) {
467 int err = 0;
468
469 // on macOS can use MADV_FREE_REUSABLE (but we disable this for now as it seems slower)
470 #if 0 && defined(__APPLE__) && defined(MADV_FREE_REUSABLE)
471 err = unix_madvise(start, size, MADV_FREE_REUSABLE);
472 if (err==0) return 0;
473 // fall through
474 #endif
475
476 #if defined(MADV_FREE)
477 // Otherwise, we try to use `MADV_FREE` as that is the fastest. A drawback though is that it
478 // will not reduce the `rss` stats in tools like `top` even though the memory is available
479 // to other processes. With the default `MIMALLOC_PURGE_DECOMMITS=1` we ensure that by
480 // default `MADV_DONTNEED` is used though.
481 static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE);
482 int oadvice = (int)mi_atomic_load_relaxed(&advice);
483 while ((err = unix_madvise(start, size, oadvice)) != 0 && errno == EAGAIN) { errno = 0; };
484 if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) {
485 // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on
486 mi_atomic_store_release(&advice, (size_t)MADV_DONTNEED);
487 err = unix_madvise(start, size, MADV_DONTNEED);
488 }
489 #else
490 err = unix_madvise(start, size, MADV_DONTNEED);
491 #endif
492 return err;
493}
494
495int _mi_prim_protect(void* start, size_t size, bool protect) {
496 int err = mprotect(start, size, protect ? PROT_NONE : (PROT_READ | PROT_WRITE));
497 if (err != 0) { err = errno; }
498 unix_mprotect_hint(err);
499 return err;
500}
501
502
503
504//---------------------------------------------
505// Huge page allocation
506//---------------------------------------------
507
508#if (MI_INTPTR_SIZE >= 8) && !defined(__HAIKU__) && !defined(__CYGWIN__)
509
510#ifndef MPOL_PREFERRED
511#define MPOL_PREFERRED 1
512#endif
513
514#if defined(MI_HAS_SYSCALL_H) && defined(SYS_mbind)
515static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) {
516 return syscall(SYS_mbind, start, len, mode, nmask, maxnode, flags);
517}
518#else
519static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) {
520 MI_UNUSED(start); MI_UNUSED(len); MI_UNUSED(mode); MI_UNUSED(nmask); MI_UNUSED(maxnode); MI_UNUSED(flags);
521 return 0;
522}
523#endif
524
525int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
526 bool is_large = true;
527 *is_zero = true;
528 *addr = unix_mmap(hint_addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
529 if (*addr != NULL && numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes
530 unsigned long numa_mask = (1UL << numa_node);
531 // TODO: does `mbind` work correctly for huge OS pages? should we
532 // use `set_mempolicy` before calling mmap instead?
533 // see: <https://lkml.org/lkml/2017/2/9/875>
534 long err = mi_prim_mbind(*addr, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0);
535 if (err != 0) {
536 err = errno;
537 _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d (error: %d (0x%x))\n", numa_node, err, err);
538 }
539 }
540 return (*addr != NULL ? 0 : errno);
541}
542
543#else
544
545int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
546 MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node);
547 *is_zero = false;
548 *addr = NULL;
549 return ENOMEM;
550}
551
552#endif
553
554//---------------------------------------------
555// NUMA nodes
556//---------------------------------------------
557
558#if defined(__linux__)
559
560size_t _mi_prim_numa_node(void) {
561 #if defined(MI_HAS_SYSCALL_H) && defined(SYS_getcpu)
562 unsigned long node = 0;
563 unsigned long ncpu = 0;
564 long err = syscall(SYS_getcpu, &ncpu, &node, NULL);
565 if (err != 0) return 0;
566 return node;
567 #else
568 return 0;
569 #endif
570}
571
572size_t _mi_prim_numa_node_count(void) {
573 char buf[128];
574 unsigned node = 0;
575 for(node = 0; node < 256; node++) {
576 // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation)
577 _mi_snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1);
578 if (mi_prim_access(buf,R_OK) != 0) break;
579 }
580 return (node+1);
581}
582
583#elif defined(__FreeBSD__) && __FreeBSD_version >= 1200000
584
585size_t _mi_prim_numa_node(void) {
586 domainset_t dom;
587 size_t node;
588 int policy;
589 if (cpuset_getdomain(CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, sizeof(dom), &dom, &policy) == -1) return 0ul;
590 for (node = 0; node < MAXMEMDOM; node++) {
591 if (DOMAINSET_ISSET(node, &dom)) return node;
592 }
593 return 0ul;
594}
595
596size_t _mi_prim_numa_node_count(void) {
597 size_t ndomains = 0;
598 size_t len = sizeof(ndomains);
599 if (sysctlbyname("vm.ndomains", &ndomains, &len, NULL, 0) == -1) return 0ul;
600 return ndomains;
601}
602
603#elif defined(__DragonFly__)
604
605size_t _mi_prim_numa_node(void) {
606 // TODO: DragonFly does not seem to provide any userland means to get this information.
607 return 0ul;
608}
609
610size_t _mi_prim_numa_node_count(void) {
611 size_t ncpus = 0, nvirtcoresperphys = 0;
612 size_t len = sizeof(size_t);
613 if (sysctlbyname("hw.ncpu", &ncpus, &len, NULL, 0) == -1) return 0ul;
614 if (sysctlbyname("hw.cpu_topology_ht_ids", &nvirtcoresperphys, &len, NULL, 0) == -1) return 0ul;
615 return nvirtcoresperphys * ncpus;
616}
617
618#else
619
620size_t _mi_prim_numa_node(void) {
621 return 0;
622}
623
624size_t _mi_prim_numa_node_count(void) {
625 return 1;
626}
627
628#endif
629
630// ----------------------------------------------------------------
631// Clock
632// ----------------------------------------------------------------
633
634#include <time.h>
635
636#if defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC)
637
638mi_msecs_t _mi_prim_clock_now(void) {
639 struct timespec t;
640 #ifdef CLOCK_MONOTONIC
641 clock_gettime(CLOCK_MONOTONIC, &t);
642 #else
643 clock_gettime(CLOCK_REALTIME, &t);
644 #endif
645 return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000);
646}
647
648#else
649
650// low resolution timer
651mi_msecs_t _mi_prim_clock_now(void) {
652 #if !defined(CLOCKS_PER_SEC) || (CLOCKS_PER_SEC == 1000) || (CLOCKS_PER_SEC == 0)
653 return (mi_msecs_t)clock();
654 #elif (CLOCKS_PER_SEC < 1000)
655 return (mi_msecs_t)clock() * (1000 / (mi_msecs_t)CLOCKS_PER_SEC);
656 #else
657 return (mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000);
658 #endif
659}
660
661#endif
662
663
664
665
666//----------------------------------------------------------------
667// Process info
668//----------------------------------------------------------------
669
670#if defined(__unix__) || defined(__unix) || defined(unix) || defined(__APPLE__) || defined(__HAIKU__)
671#include <stdio.h>
672#include <unistd.h>
673#include <sys/resource.h>
674
675#if defined(__APPLE__)
676#include <mach/mach.h>
677#endif
678
679#if defined(__HAIKU__)
680#include <kernel/OS.h>
681#endif
682
683static mi_msecs_t timeval_secs(const struct timeval* tv) {
684 return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L);
685}
686
687void _mi_prim_process_info(mi_process_info_t* pinfo)
688{
689 struct rusage rusage;
690 getrusage(RUSAGE_SELF, &rusage);
691 pinfo->utime = timeval_secs(&rusage.ru_utime);
692 pinfo->stime = timeval_secs(&rusage.ru_stime);
693#if !defined(__HAIKU__)
694 pinfo->page_faults = rusage.ru_majflt;
695#endif
696#if defined(__HAIKU__)
697 // Haiku does not have (yet?) a way to
698 // get these stats per process
699 thread_info tid;
700 area_info mem;
701 ssize_t c;
702 get_thread_info(find_thread(0), &tid);
703 while (get_next_area_info(tid.team, &c, &mem) == B_OK) {
704 pinfo->peak_rss += mem.ram_size;
705 }
706 pinfo->page_faults = 0;
707#elif defined(__APPLE__)
708 pinfo->peak_rss = rusage.ru_maxrss; // macos reports in bytes
709 #ifdef MACH_TASK_BASIC_INFO
710 struct mach_task_basic_info info;
711 mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;
712 if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) {
713 pinfo->current_rss = (size_t)info.resident_size;
714 }
715 #else
716 struct task_basic_info info;
717 mach_msg_type_number_t infoCount = TASK_BASIC_INFO_COUNT;
718 if (task_info(mach_task_self(), TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) {
719 pinfo->current_rss = (size_t)info.resident_size;
720 }
721 #endif
722#else
723 pinfo->peak_rss = rusage.ru_maxrss * 1024; // Linux/BSD report in KiB
724#endif
725 // use defaults for commit
726}
727
728#else
729
730#ifndef __wasi__
731// WebAssembly instances are not processes
732#pragma message("define a way to get process info")
733#endif
734
735void _mi_prim_process_info(mi_process_info_t* pinfo)
736{
737 // use defaults
738 MI_UNUSED(pinfo);
739}
740
741#endif
742
743
744//----------------------------------------------------------------
745// Output
746//----------------------------------------------------------------
747
748void _mi_prim_out_stderr( const char* msg ) {
749 fputs(msg,stderr);
750}
751
752
753//----------------------------------------------------------------
754// Environment
755//----------------------------------------------------------------
756
757#if !defined(MI_USE_ENVIRON) || (MI_USE_ENVIRON!=0)
758// On Posix systemsr use `environ` to access environment variables
759// even before the C runtime is initialized.
760#if defined(__APPLE__) && defined(__has_include) && __has_include(<crt_externs.h>)
761#include <crt_externs.h>
762static char** mi_get_environ(void) {
763 return (*_NSGetEnviron());
764}
765#else
766extern char** environ;
767static char** mi_get_environ(void) {
768 return environ;
769}
770#endif
771bool _mi_prim_getenv(const char* name, char* result, size_t result_size) {
772 if (name==NULL) return false;
773 const size_t len = _mi_strlen(name);
774 if (len == 0) return false;
775 char** env = mi_get_environ();
776 if (env == NULL) return false;
777 // compare up to 10000 entries
778 for (int i = 0; i < 10000 && env[i] != NULL; i++) {
779 const char* s = env[i];
780 if (_mi_strnicmp(name, s, len) == 0 && s[len] == '=') { // case insensitive
781 // found it
782 _mi_strlcpy(result, s + len + 1, result_size);
783 return true;
784 }
785 }
786 return false;
787}
788#else
789// fallback: use standard C `getenv` but this cannot be used while initializing the C runtime
790bool _mi_prim_getenv(const char* name, char* result, size_t result_size) {
791 // cannot call getenv() when still initializing the C runtime.
792 if (_mi_preloading()) return false;
793 const char* s = getenv(name);
794 if (s == NULL) {
795 // we check the upper case name too.
796 char buf[64+1];
797 size_t len = _mi_strnlen(name,sizeof(buf)-1);
798 for (size_t i = 0; i < len; i++) {
799 buf[i] = _mi_toupper(name[i]);
800 }
801 buf[len] = 0;
802 s = getenv(buf);
803 }
804 if (s == NULL || _mi_strnlen(s,result_size) >= result_size) return false;
805 _mi_strlcpy(result, s, result_size);
806 return true;
807}
808#endif // !MI_USE_ENVIRON
809
810
811//----------------------------------------------------------------
812// Random
813//----------------------------------------------------------------
814
815#if defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_15) && (MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_15)
816#include <CommonCrypto/CommonCryptoError.h>
817#include <CommonCrypto/CommonRandom.h>
818
819bool _mi_prim_random_buf(void* buf, size_t buf_len) {
820 // We prefer CCRandomGenerateBytes as it returns an error code while arc4random_buf
821 // may fail silently on macOS. See PR #390, and <https://opensource.apple.com/source/Libc/Libc-1439.40.11/gen/FreeBSD/arc4random.c.auto.html>
822 return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess);
823}
824
825#elif defined(__ANDROID__) || defined(__DragonFly__) || \
826 defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
827 defined(__sun) || \
828 (defined(__APPLE__) && (MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7))
829
830bool _mi_prim_random_buf(void* buf, size_t buf_len) {
831 arc4random_buf(buf, buf_len);
832 return true;
833}
834
835#elif defined(__APPLE__) || defined(__linux__) || defined(__HAIKU__) // also for old apple versions < 10.7 (issue #829)
836
837#include <sys/types.h>
838#include <sys/stat.h>
839#include <errno.h>
840
841bool _mi_prim_random_buf(void* buf, size_t buf_len) {
842 // Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h`
843 // and for the latter the actual `getrandom` call is not always defined.
844 // (see <https://stackoverflow.com/questions/45237324/why-doesnt-getrandom-compile>)
845 // We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed.
846 #if defined(MI_HAS_SYSCALL_H) && defined(SYS_getrandom)
847 #ifndef GRND_NONBLOCK
848 #define GRND_NONBLOCK (1)
849 #endif
850 static _Atomic(uintptr_t) no_getrandom; // = 0
851 if (mi_atomic_load_acquire(&no_getrandom)==0) {
852 ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK);
853 if (ret >= 0) return (buf_len == (size_t)ret);
854 if (errno != ENOSYS) return false;
855 mi_atomic_store_release(&no_getrandom, (uintptr_t)1); // don't call again, and fall back to /dev/urandom
856 }
857 #endif
858 int flags = O_RDONLY;
859 #if defined(O_CLOEXEC)
860 flags |= O_CLOEXEC;
861 #endif
862 int fd = mi_prim_open("/dev/urandom", flags);
863 if (fd < 0) return false;
864 size_t count = 0;
865 while(count < buf_len) {
866 ssize_t ret = mi_prim_read(fd, (char*)buf + count, buf_len - count);
867 if (ret<=0) {
868 if (errno!=EAGAIN && errno!=EINTR) break;
869 }
870 else {
871 count += ret;
872 }
873 }
874 mi_prim_close(fd);
875 return (count==buf_len);
876}
877
878#else
879
880bool _mi_prim_random_buf(void* buf, size_t buf_len) {
881 return false;
882}
883
884#endif
885
886
887//----------------------------------------------------------------
888// Thread init/done
889//----------------------------------------------------------------
890
891#if defined(MI_USE_PTHREADS)
892
893// use pthread local storage keys to detect thread ending
894// (and used with MI_TLS_PTHREADS for the default heap)
895pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1);
896
897static void mi_pthread_done(void* value) {
898 if (value!=NULL) {
899 _mi_thread_done((mi_heap_t*)value);
900 }
901}
902
903void _mi_prim_thread_init_auto_done(void) {
904 mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1));
905 pthread_key_create(&_mi_heap_default_key, &mi_pthread_done);
906}
907
908void _mi_prim_thread_done_auto_done(void) {
909 if (_mi_heap_default_key != (pthread_key_t)(-1)) { // do not leak the key, see issue #809
910 pthread_key_delete(_mi_heap_default_key);
911 }
912}
913
914void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
915 if (_mi_heap_default_key != (pthread_key_t)(-1)) { // can happen during recursive invocation on freeBSD
916 pthread_setspecific(_mi_heap_default_key, heap);
917 }
918}
919
920#else
921
922void _mi_prim_thread_init_auto_done(void) {
923 // nothing
924}
925
926void _mi_prim_thread_done_auto_done(void) {
927 // nothing
928}
929
930void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
931 MI_UNUSED(heap);
932}
933
934#endif
935