microsoft/qdk

Public

mirrored fromhttps://github.com/microsoft/qdkAvailable

CodeCommitsIssuesPull requestsActionsInsightsSecurity
iadavis/openqasm-extern-compilation

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

source/allocator/mimalloc-sys/mimalloc/src/prim/windows/prim.c

878lines · modecode

1/* ----------------------------------------------------------------------------
2Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
3This is free software; you can redistribute it and/or modify it under the
4terms of the MIT license. A copy of the license can be found in the file
5"LICENSE" at the root of this distribution.
6-----------------------------------------------------------------------------*/
7
8// This file is included in `src/prim/prim.c`
9
10#include "mimalloc.h"
11#include "mimalloc/internal.h"
12#include "mimalloc/prim.h"
13#include <stdio.h> // fputs, stderr
14
15// xbox has no console IO
16#if !defined(WINAPI_FAMILY_PARTITION) || WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_SYSTEM)
17#define MI_HAS_CONSOLE_IO
18#endif
19
20//---------------------------------------------
21// Dynamically bind Windows API points for portability
22//---------------------------------------------
23
24// We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016.
25// So, we need to look it up dynamically to run on older systems. (use __stdcall for 32-bit compatibility)
26// NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB)
27// We define a minimal MEM_EXTENDED_PARAMETER ourselves in order to be able to compile with older SDK's.
28typedef enum MI_MEM_EXTENDED_PARAMETER_TYPE_E {
29 MiMemExtendedParameterInvalidType = 0,
30 MiMemExtendedParameterAddressRequirements,
31 MiMemExtendedParameterNumaNode,
32 MiMemExtendedParameterPartitionHandle,
33 MiMemExtendedParameterUserPhysicalHandle,
34 MiMemExtendedParameterAttributeFlags,
35 MiMemExtendedParameterMax
36} MI_MEM_EXTENDED_PARAMETER_TYPE;
37
38typedef struct DECLSPEC_ALIGN(8) MI_MEM_EXTENDED_PARAMETER_S {
39 struct { DWORD64 Type : 8; DWORD64 Reserved : 56; } Type;
40 union { DWORD64 ULong64; PVOID Pointer; SIZE_T Size; HANDLE Handle; DWORD ULong; } Arg;
41} MI_MEM_EXTENDED_PARAMETER;
42
43typedef struct MI_MEM_ADDRESS_REQUIREMENTS_S {
44 PVOID LowestStartingAddress;
45 PVOID HighestEndingAddress;
46 SIZE_T Alignment;
47} MI_MEM_ADDRESS_REQUIREMENTS;
48
49#define MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE 0x00000010
50
51#include <winternl.h>
52typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG);
53typedef LONG (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); // avoid NTSTATUS as it is not defined on xbox (pr #1084)
54static PVirtualAlloc2 pVirtualAlloc2 = NULL;
55static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL;
56
57// Similarly, GetNumaProcessorNodeEx is only supported since Windows 7 (and GetNumaNodeProcessorMask is not supported on xbox)
58typedef struct MI_PROCESSOR_NUMBER_S { WORD Group; BYTE Number; BYTE Reserved; } MI_PROCESSOR_NUMBER;
59
60typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(MI_PROCESSOR_NUMBER* ProcNumber);
61typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(MI_PROCESSOR_NUMBER* Processor, PUSHORT NodeNumber);
62typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINITY ProcessorMask);
63typedef BOOL (__stdcall *PGetNumaProcessorNode)(UCHAR Processor, PUCHAR NodeNumber);
64typedef BOOL (__stdcall* PGetNumaNodeProcessorMask)(UCHAR Node, PULONGLONG ProcessorMask);
65typedef BOOL (__stdcall* PGetNumaHighestNodeNumber)(PULONG Node);
66static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL;
67static PGetNumaProcessorNodeEx pGetNumaProcessorNodeEx = NULL;
68static PGetNumaNodeProcessorMaskEx pGetNumaNodeProcessorMaskEx = NULL;
69static PGetNumaProcessorNode pGetNumaProcessorNode = NULL;
70static PGetNumaNodeProcessorMask pGetNumaNodeProcessorMask = NULL;
71static PGetNumaHighestNodeNumber pGetNumaHighestNodeNumber = NULL;
72
73// Not available on xbox
74typedef SIZE_T(__stdcall* PGetLargePageMinimum)(VOID);
75static PGetLargePageMinimum pGetLargePageMinimum = NULL;
76
77// Available after Windows XP
78typedef BOOL (__stdcall *PGetPhysicallyInstalledSystemMemory)( PULONGLONG TotalMemoryInKilobytes );
79
80//---------------------------------------------
81// Enable large page support dynamically (if possible)
82//---------------------------------------------
83
84static bool win_enable_large_os_pages(size_t* large_page_size)
85{
86 static bool large_initialized = false;
87 if (large_initialized) return (_mi_os_large_page_size() > 0);
88 large_initialized = true;
89 if (pGetLargePageMinimum==NULL) return false; // no large page support (xbox etc.)
90
91 // Try to see if large OS pages are supported
92 // To use large pages on Windows, we first need access permission
93 // Set "Lock pages in memory" permission in the group policy editor
94 // <https://devblogs.microsoft.com/oldnewthing/20110128-00/?p=11643>
95 unsigned long err = 0;
96 HANDLE token = NULL;
97 BOOL ok = OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token);
98 if (ok) {
99 TOKEN_PRIVILEGES tp;
100 ok = LookupPrivilegeValue(NULL, TEXT("SeLockMemoryPrivilege"), &tp.Privileges[0].Luid);
101 if (ok) {
102 tp.PrivilegeCount = 1;
103 tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
104 ok = AdjustTokenPrivileges(token, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0);
105 if (ok) {
106 err = GetLastError();
107 ok = (err == ERROR_SUCCESS);
108 if (ok && large_page_size != NULL && pGetLargePageMinimum != NULL) {
109 *large_page_size = (*pGetLargePageMinimum)();
110 }
111 }
112 }
113 CloseHandle(token);
114 }
115 if (!ok) {
116 if (err == 0) err = GetLastError();
117 _mi_warning_message("cannot enable large OS page support, error %lu\n", err);
118 }
119 return (ok!=0);
120}
121
122
123//---------------------------------------------
124// Initialize
125//---------------------------------------------
126
127void _mi_prim_mem_init( mi_os_mem_config_t* config )
128{
129 config->has_overcommit = false;
130 config->has_partial_free = false;
131 config->has_virtual_reserve = true;
132 // get the page size
133 SYSTEM_INFO si;
134 GetSystemInfo(&si);
135 if (si.dwPageSize > 0) { config->page_size = si.dwPageSize; }
136 if (si.dwAllocationGranularity > 0) { config->alloc_granularity = si.dwAllocationGranularity; }
137 // get virtual address bits
138 if ((uintptr_t)si.lpMaximumApplicationAddress > 0) {
139 const size_t vbits = MI_SIZE_BITS - mi_clz((uintptr_t)si.lpMaximumApplicationAddress);
140 config->virtual_address_bits = vbits;
141 }
142
143 // get the VirtualAlloc2 function
144 HINSTANCE hDll;
145 hDll = LoadLibrary(TEXT("kernelbase.dll"));
146 if (hDll != NULL) {
147 // use VirtualAlloc2FromApp if possible as it is available to Windows store apps
148 pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp");
149 if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2");
150 FreeLibrary(hDll);
151 }
152 // NtAllocateVirtualMemoryEx is used for huge page allocation
153 hDll = LoadLibrary(TEXT("ntdll.dll"));
154 if (hDll != NULL) {
155 pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx");
156 FreeLibrary(hDll);
157 }
158 // Try to use Win7+ numa API
159 hDll = LoadLibrary(TEXT("kernel32.dll"));
160 if (hDll != NULL) {
161 pGetCurrentProcessorNumberEx = (PGetCurrentProcessorNumberEx)(void (*)(void))GetProcAddress(hDll, "GetCurrentProcessorNumberEx");
162 pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx");
163 pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx");
164 pGetNumaProcessorNode = (PGetNumaProcessorNode)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNode");
165 pGetNumaNodeProcessorMask = (PGetNumaNodeProcessorMask)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMask");
166 pGetNumaHighestNodeNumber = (PGetNumaHighestNodeNumber)(void (*)(void))GetProcAddress(hDll, "GetNumaHighestNodeNumber");
167 pGetLargePageMinimum = (PGetLargePageMinimum)(void (*)(void))GetProcAddress(hDll, "GetLargePageMinimum");
168 // Get physical memory (not available on XP, so check dynamically)
169 PGetPhysicallyInstalledSystemMemory pGetPhysicallyInstalledSystemMemory = (PGetPhysicallyInstalledSystemMemory)(void (*)(void))GetProcAddress(hDll,"GetPhysicallyInstalledSystemMemory");
170 if (pGetPhysicallyInstalledSystemMemory != NULL) {
171 ULONGLONG memInKiB = 0;
172 if ((*pGetPhysicallyInstalledSystemMemory)(&memInKiB)) {
173 if (memInKiB > 0 && memInKiB <= SIZE_MAX) {
174 config->physical_memory_in_kib = (size_t)memInKiB;
175 }
176 }
177 }
178 FreeLibrary(hDll);
179 }
180 // Enable large/huge OS page support?
181 if (mi_option_is_enabled(mi_option_allow_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
182 win_enable_large_os_pages(&config->large_page_size);
183 }
184}
185
186
187//---------------------------------------------
188// Free
189//---------------------------------------------
190
191int _mi_prim_free(void* addr, size_t size ) {
192 MI_UNUSED(size);
193 DWORD errcode = 0;
194 bool err = (VirtualFree(addr, 0, MEM_RELEASE) == 0);
195 if (err) { errcode = GetLastError(); }
196 if (errcode == ERROR_INVALID_ADDRESS) {
197 // In mi_os_mem_alloc_aligned the fallback path may have returned a pointer inside
198 // the memory region returned by VirtualAlloc; in that case we need to free using
199 // the start of the region.
200 MEMORY_BASIC_INFORMATION info; _mi_memzero_var(info);
201 VirtualQuery(addr, &info, sizeof(info));
202 if (info.AllocationBase < addr && ((uint8_t*)addr - (uint8_t*)info.AllocationBase) < (ptrdiff_t)MI_SEGMENT_SIZE) {
203 errcode = 0;
204 err = (VirtualFree(info.AllocationBase, 0, MEM_RELEASE) == 0);
205 if (err) { errcode = GetLastError(); }
206 }
207 }
208 return (int)errcode;
209}
210
211
212//---------------------------------------------
213// VirtualAlloc
214//---------------------------------------------
215
216static void* win_virtual_alloc_prim_once(void* addr, size_t size, size_t try_alignment, DWORD flags) {
217 #if (MI_INTPTR_SIZE >= 8)
218 // on 64-bit systems, try to use the virtual address area after 2TiB for 4MiB aligned allocations
219 if (addr == NULL) {
220 void* hint = _mi_os_get_aligned_hint(try_alignment,size);
221 if (hint != NULL) {
222 void* p = VirtualAlloc(hint, size, flags, PAGE_READWRITE);
223 if (p != NULL) return p;
224 _mi_verbose_message("warning: unable to allocate hinted aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), hint, try_alignment, flags);
225 // fall through on error
226 }
227 }
228 #endif
229 // on modern Windows try use VirtualAlloc2 for aligned allocation
230 if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) {
231 MI_MEM_ADDRESS_REQUIREMENTS reqs = { 0, 0, 0 };
232 reqs.Alignment = try_alignment;
233 MI_MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} };
234 param.Type.Type = MiMemExtendedParameterAddressRequirements;
235 param.Arg.Pointer = &reqs;
236 void* p = (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, &param, 1);
237 if (p != NULL) return p;
238 _mi_warning_message("unable to allocate aligned OS memory (0x%zx bytes, error code: 0x%x, address: %p, alignment: 0x%zx, flags: 0x%x)\n", size, GetLastError(), addr, try_alignment, flags);
239 // fall through on error
240 }
241 // last resort
242 return VirtualAlloc(addr, size, flags, PAGE_READWRITE);
243}
244
245static bool win_is_out_of_memory_error(DWORD err) {
246 switch (err) {
247 case ERROR_COMMITMENT_MINIMUM:
248 case ERROR_COMMITMENT_LIMIT:
249 case ERROR_PAGEFILE_QUOTA:
250 case ERROR_NOT_ENOUGH_MEMORY:
251 return true;
252 default:
253 return false;
254 }
255}
256
257static void* win_virtual_alloc_prim(void* addr, size_t size, size_t try_alignment, DWORD flags) {
258 long max_retry_msecs = mi_option_get_clamp(mi_option_retry_on_oom, 0, 2000); // at most 2 seconds
259 if (max_retry_msecs == 1) { max_retry_msecs = 100; } // if one sets the option to "true"
260 for (long tries = 1; tries <= 10; tries++) { // try at most 10 times (=2200ms)
261 void* p = win_virtual_alloc_prim_once(addr, size, try_alignment, flags);
262 if (p != NULL) {
263 // success, return the address
264 return p;
265 }
266 else if (max_retry_msecs > 0 && (try_alignment <= 2*MI_SEGMENT_ALIGN) &&
267 (flags&MEM_COMMIT) != 0 && (flags&MEM_LARGE_PAGES) == 0 &&
268 win_is_out_of_memory_error(GetLastError())) {
269 // if committing regular memory and being out-of-memory,
270 // keep trying for a bit in case memory frees up after all. See issue #894
271 _mi_warning_message("out-of-memory on OS allocation, try again... (attempt %lu, 0x%zx bytes, error code: 0x%x, address: %p, alignment: 0x%zx, flags: 0x%x)\n", tries, size, GetLastError(), addr, try_alignment, flags);
272 long sleep_msecs = tries*40; // increasing waits
273 if (sleep_msecs > max_retry_msecs) { sleep_msecs = max_retry_msecs; }
274 max_retry_msecs -= sleep_msecs;
275 Sleep(sleep_msecs);
276 }
277 else {
278 // otherwise return with an error
279 break;
280 }
281 }
282 return NULL;
283}
284
285static void* win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only, bool allow_large, bool* is_large) {
286 mi_assert_internal(!(large_only && !allow_large));
287 static _Atomic(size_t) large_page_try_ok; // = 0;
288 void* p = NULL;
289 // Try to allocate large OS pages (2MiB) if allowed or required.
290 if ((large_only || _mi_os_use_large_page(size, try_alignment))
291 && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) {
292 size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok);
293 if (!large_only && try_ok > 0) {
294 // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive.
295 // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times.
296 mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1);
297 }
298 else {
299 // large OS pages must always reserve and commit.
300 *is_large = true;
301 p = win_virtual_alloc_prim(addr, size, try_alignment, flags | MEM_LARGE_PAGES);
302 if (large_only) return p;
303 // fall back to non-large page allocation on error (`p == NULL`).
304 if (p == NULL) {
305 mi_atomic_store_release(&large_page_try_ok,10UL); // on error, don't try again for the next N allocations
306 }
307 }
308 }
309 // Fall back to regular page allocation
310 if (p == NULL) {
311 *is_large = ((flags&MEM_LARGE_PAGES) != 0);
312 p = win_virtual_alloc_prim(addr, size, try_alignment, flags);
313 }
314 //if (p == NULL) { _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x, large only: %d, allow large: %d)\n", size, GetLastError(), addr, try_alignment, flags, large_only, allow_large); }
315 return p;
316}
317
318int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
319 mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
320 mi_assert_internal(commit || !allow_large);
321 mi_assert_internal(try_alignment > 0);
322 *is_zero = true;
323 int flags = MEM_RESERVE;
324 if (commit) { flags |= MEM_COMMIT; }
325 *addr = win_virtual_alloc(hint_addr, size, try_alignment, flags, false, allow_large, is_large);
326 return (*addr != NULL ? 0 : (int)GetLastError());
327}
328
329
330//---------------------------------------------
331// Commit/Reset/Protect
332//---------------------------------------------
333#ifdef _MSC_VER
334#pragma warning(disable:6250) // suppress warning calling VirtualFree without MEM_RELEASE (for decommit)
335#endif
336
337int _mi_prim_commit(void* addr, size_t size, bool* is_zero) {
338 *is_zero = false;
339 /*
340 // zero'ing only happens on an initial commit... but checking upfront seems expensive..
341 _MEMORY_BASIC_INFORMATION meminfo; _mi_memzero_var(meminfo);
342 if (VirtualQuery(addr, &meminfo, size) > 0) {
343 if ((meminfo.State & MEM_COMMIT) == 0) {
344 *is_zero = true;
345 }
346 }
347 */
348 // commit
349 void* p = VirtualAlloc(addr, size, MEM_COMMIT, PAGE_READWRITE);
350 if (p == NULL) return (int)GetLastError();
351 return 0;
352}
353
354int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) {
355 BOOL ok = VirtualFree(addr, size, MEM_DECOMMIT);
356 *needs_recommit = true; // for safety, assume always decommitted even in the case of an error.
357 return (ok ? 0 : (int)GetLastError());
358}
359
360int _mi_prim_reset(void* addr, size_t size) {
361 void* p = VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
362 mi_assert_internal(p == addr);
363 #if 0
364 if (p != NULL) {
365 VirtualUnlock(addr,size); // VirtualUnlock after MEM_RESET removes the memory directly from the working set
366 }
367 #endif
368 return (p != NULL ? 0 : (int)GetLastError());
369}
370
371int _mi_prim_reuse(void* addr, size_t size) {
372 MI_UNUSED(addr); MI_UNUSED(size);
373 return 0;
374}
375
376int _mi_prim_protect(void* addr, size_t size, bool protect) {
377 DWORD oldprotect = 0;
378 BOOL ok = VirtualProtect(addr, size, protect ? PAGE_NOACCESS : PAGE_READWRITE, &oldprotect);
379 return (ok ? 0 : (int)GetLastError());
380}
381
382
383//---------------------------------------------
384// Huge page allocation
385//---------------------------------------------
386
387static void* _mi_prim_alloc_huge_os_pagesx(void* hint_addr, size_t size, int numa_node)
388{
389 const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE;
390
391 win_enable_large_os_pages(NULL);
392
393 MI_MEM_EXTENDED_PARAMETER params[3] = { {{0,0},{0}},{{0,0},{0}},{{0,0},{0}} };
394 // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages
395 static bool mi_huge_pages_available = true;
396 if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) {
397 params[0].Type.Type = MiMemExtendedParameterAttributeFlags;
398 params[0].Arg.ULong64 = MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE;
399 ULONG param_count = 1;
400 if (numa_node >= 0) {
401 param_count++;
402 params[1].Type.Type = MiMemExtendedParameterNumaNode;
403 params[1].Arg.ULong = (unsigned)numa_node;
404 }
405 SIZE_T psize = size;
406 void* base = hint_addr;
407 LONG err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count);
408 if (err == 0 && base != NULL) {
409 return base;
410 }
411 else {
412 // fall back to regular large pages
413 mi_huge_pages_available = false; // don't try further huge pages
414 _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err);
415 }
416 }
417 // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation
418 if (pVirtualAlloc2 != NULL && numa_node >= 0) {
419 params[0].Type.Type = MiMemExtendedParameterNumaNode;
420 params[0].Arg.ULong = (unsigned)numa_node;
421 return (*pVirtualAlloc2)(GetCurrentProcess(), hint_addr, size, flags, PAGE_READWRITE, params, 1);
422 }
423
424 // otherwise use regular virtual alloc on older windows
425 return VirtualAlloc(hint_addr, size, flags, PAGE_READWRITE);
426}
427
428int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
429 *is_zero = true;
430 *addr = _mi_prim_alloc_huge_os_pagesx(hint_addr,size,numa_node);
431 return (*addr != NULL ? 0 : (int)GetLastError());
432}
433
434
435//---------------------------------------------
436// Numa nodes
437//---------------------------------------------
438
439size_t _mi_prim_numa_node(void) {
440 USHORT numa_node = 0;
441 if (pGetCurrentProcessorNumberEx != NULL && pGetNumaProcessorNodeEx != NULL) {
442 // Extended API is supported
443 MI_PROCESSOR_NUMBER pnum;
444 (*pGetCurrentProcessorNumberEx)(&pnum);
445 USHORT nnode = 0;
446 BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode);
447 if (ok) { numa_node = nnode; }
448 }
449 else if (pGetNumaProcessorNode != NULL) {
450 // Vista or earlier, use older API that is limited to 64 processors. Issue #277
451 DWORD pnum = GetCurrentProcessorNumber();
452 UCHAR nnode = 0;
453 BOOL ok = pGetNumaProcessorNode((UCHAR)pnum, &nnode);
454 if (ok) { numa_node = nnode; }
455 }
456 return numa_node;
457}
458
459size_t _mi_prim_numa_node_count(void) {
460 ULONG numa_max = 0;
461 if (pGetNumaHighestNodeNumber!=NULL) {
462 (*pGetNumaHighestNodeNumber)(&numa_max);
463 }
464 // find the highest node number that has actual processors assigned to it. Issue #282
465 while (numa_max > 0) {
466 if (pGetNumaNodeProcessorMaskEx != NULL) {
467 // Extended API is supported
468 GROUP_AFFINITY affinity;
469 if ((*pGetNumaNodeProcessorMaskEx)((USHORT)numa_max, &affinity)) {
470 if (affinity.Mask != 0) break; // found the maximum non-empty node
471 }
472 }
473 else {
474 // Vista or earlier, use older API that is limited to 64 processors.
475 ULONGLONG mask;
476 if (pGetNumaNodeProcessorMask != NULL) {
477 if ((*pGetNumaNodeProcessorMask)((UCHAR)numa_max, &mask)) {
478 if (mask != 0) break; // found the maximum non-empty node
479 }
480 };
481 }
482 // max node was invalid or had no processor assigned, try again
483 numa_max--;
484 }
485 return ((size_t)numa_max + 1);
486}
487
488
489//----------------------------------------------------------------
490// Clock
491//----------------------------------------------------------------
492
493static mi_msecs_t mi_to_msecs(LARGE_INTEGER t) {
494 static LARGE_INTEGER mfreq; // = 0
495 if (mfreq.QuadPart == 0LL) {
496 LARGE_INTEGER f;
497 QueryPerformanceFrequency(&f);
498 mfreq.QuadPart = f.QuadPart/1000LL;
499 if (mfreq.QuadPart == 0) mfreq.QuadPart = 1;
500 }
501 return (mi_msecs_t)(t.QuadPart / mfreq.QuadPart);
502}
503
504mi_msecs_t _mi_prim_clock_now(void) {
505 LARGE_INTEGER t;
506 QueryPerformanceCounter(&t);
507 return mi_to_msecs(t);
508}
509
510
511//----------------------------------------------------------------
512// Process Info
513//----------------------------------------------------------------
514
515#include <psapi.h>
516
517static mi_msecs_t filetime_msecs(const FILETIME* ftime) {
518 ULARGE_INTEGER i;
519 i.LowPart = ftime->dwLowDateTime;
520 i.HighPart = ftime->dwHighDateTime;
521 mi_msecs_t msecs = (i.QuadPart / 10000); // FILETIME is in 100 nano seconds
522 return msecs;
523}
524
525typedef BOOL (WINAPI *PGetProcessMemoryInfo)(HANDLE, PPROCESS_MEMORY_COUNTERS, DWORD);
526static PGetProcessMemoryInfo pGetProcessMemoryInfo = NULL;
527
528void _mi_prim_process_info(mi_process_info_t* pinfo)
529{
530 FILETIME ct;
531 FILETIME ut;
532 FILETIME st;
533 FILETIME et;
534 GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut);
535 pinfo->utime = filetime_msecs(&ut);
536 pinfo->stime = filetime_msecs(&st);
537
538 // load psapi on demand
539 if (pGetProcessMemoryInfo == NULL) {
540 HINSTANCE hDll = LoadLibrary(TEXT("psapi.dll"));
541 if (hDll != NULL) {
542 pGetProcessMemoryInfo = (PGetProcessMemoryInfo)(void (*)(void))GetProcAddress(hDll, "GetProcessMemoryInfo");
543 }
544 }
545
546 // get process info
547 PROCESS_MEMORY_COUNTERS info; _mi_memzero_var(info);
548 if (pGetProcessMemoryInfo != NULL) {
549 pGetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info));
550 }
551 pinfo->current_rss = (size_t)info.WorkingSetSize;
552 pinfo->peak_rss = (size_t)info.PeakWorkingSetSize;
553 pinfo->current_commit = (size_t)info.PagefileUsage;
554 pinfo->peak_commit = (size_t)info.PeakPagefileUsage;
555 pinfo->page_faults = (size_t)info.PageFaultCount;
556}
557
558//----------------------------------------------------------------
559// Output
560//----------------------------------------------------------------
561
562void _mi_prim_out_stderr( const char* msg )
563{
564 // on windows with redirection, the C runtime cannot handle locale dependent output
565 // after the main thread closes so we use direct console output.
566 if (!_mi_preloading()) {
567 // _cputs(msg); // _cputs cannot be used as it aborts when failing to lock the console
568 static HANDLE hcon = INVALID_HANDLE_VALUE;
569 static bool hconIsConsole = false;
570 if (hcon == INVALID_HANDLE_VALUE) {
571 hcon = GetStdHandle(STD_ERROR_HANDLE);
572 #ifdef MI_HAS_CONSOLE_IO
573 CONSOLE_SCREEN_BUFFER_INFO sbi;
574 hconIsConsole = ((hcon != INVALID_HANDLE_VALUE) && GetConsoleScreenBufferInfo(hcon, &sbi));
575 #endif
576 }
577 const size_t len = _mi_strlen(msg);
578 if (len > 0 && len < UINT32_MAX) {
579 DWORD written = 0;
580 if (hconIsConsole) {
581 #ifdef MI_HAS_CONSOLE_IO
582 WriteConsoleA(hcon, msg, (DWORD)len, &written, NULL);
583 #endif
584 }
585 else if (hcon != INVALID_HANDLE_VALUE) {
586 // use direct write if stderr was redirected
587 WriteFile(hcon, msg, (DWORD)len, &written, NULL);
588 }
589 else {
590 // finally fall back to fputs after all
591 fputs(msg, stderr);
592 }
593 }
594 }
595}
596
597
598//----------------------------------------------------------------
599// Environment
600//----------------------------------------------------------------
601
602// On Windows use GetEnvironmentVariable instead of getenv to work
603// reliably even when this is invoked before the C runtime is initialized.
604// i.e. when `_mi_preloading() == true`.
605// Note: on windows, environment names are not case sensitive.
606bool _mi_prim_getenv(const char* name, char* result, size_t result_size) {
607 result[0] = 0;
608 size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size);
609 return (len > 0 && len < result_size);
610}
611
612
613//----------------------------------------------------------------
614// Random
615//----------------------------------------------------------------
616
617#if defined(MI_USE_RTLGENRANDOM) // || defined(__cplusplus)
618// We prefer to use BCryptGenRandom instead of (the unofficial) RtlGenRandom but when using
619// dynamic overriding, we observed it can raise an exception when compiled with C++, and
620// sometimes deadlocks when also running under the VS debugger.
621// In contrast, issue #623 implies that on Windows Server 2019 we need to use BCryptGenRandom.
622// To be continued..
623#pragma comment (lib,"advapi32.lib")
624#define RtlGenRandom SystemFunction036
625mi_decl_externc BOOLEAN NTAPI RtlGenRandom(PVOID RandomBuffer, ULONG RandomBufferLength);
626
627bool _mi_prim_random_buf(void* buf, size_t buf_len) {
628 return (RtlGenRandom(buf, (ULONG)buf_len) != 0);
629}
630
631#else
632
633#ifndef BCRYPT_USE_SYSTEM_PREFERRED_RNG
634#define BCRYPT_USE_SYSTEM_PREFERRED_RNG 0x00000002
635#endif
636
637typedef LONG (NTAPI *PBCryptGenRandom)(HANDLE, PUCHAR, ULONG, ULONG);
638static PBCryptGenRandom pBCryptGenRandom = NULL;
639
640bool _mi_prim_random_buf(void* buf, size_t buf_len) {
641 if (pBCryptGenRandom == NULL) {
642 HINSTANCE hDll = LoadLibrary(TEXT("bcrypt.dll"));
643 if (hDll != NULL) {
644 pBCryptGenRandom = (PBCryptGenRandom)(void (*)(void))GetProcAddress(hDll, "BCryptGenRandom");
645 }
646 if (pBCryptGenRandom == NULL) return false;
647 }
648 return (pBCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0);
649}
650
651#endif // MI_USE_RTLGENRANDOM
652
653
654
655//----------------------------------------------------------------
656// Process & Thread Init/Done
657//----------------------------------------------------------------
658
659#if MI_WIN_USE_FIXED_TLS==1
660mi_decl_cache_align size_t _mi_win_tls_offset = 0;
661#endif
662
663//static void mi_debug_out(const char* s) {
664// HANDLE h = GetStdHandle(STD_ERROR_HANDLE);
665// WriteConsole(h, s, (DWORD)_mi_strlen(s), NULL, NULL);
666//}
667
668static void mi_win_tls_init(DWORD reason) {
669 if (reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) {
670 #if MI_WIN_USE_FIXED_TLS==1 // we must allocate a TLS slot dynamically
671 if (_mi_win_tls_offset == 0 && reason == DLL_PROCESS_ATTACH) {
672 const DWORD tls_slot = TlsAlloc(); // usually returns slot 1
673 if (tls_slot == TLS_OUT_OF_INDEXES) {
674 _mi_error_message(EFAULT, "unable to allocate the a TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n");
675 }
676 _mi_win_tls_offset = (size_t)tls_slot * sizeof(void*);
677 }
678 #endif
679 #if MI_HAS_TLS_SLOT >= 2 // we must initialize the TLS slot before any allocation
680 if (mi_prim_get_default_heap() == NULL) {
681 _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty);
682 #if MI_DEBUG && MI_WIN_USE_FIXED_TLS==1
683 void* const p = TlsGetValue((DWORD)(_mi_win_tls_offset / sizeof(void*)));
684 mi_assert_internal(p == (void*)&_mi_heap_empty);
685 #endif
686 }
687 #endif
688 }
689}
690
691static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
692 MI_UNUSED(reserved);
693 MI_UNUSED(module);
694 mi_win_tls_init(reason);
695 if (reason==DLL_PROCESS_ATTACH) {
696 _mi_auto_process_init();
697 }
698 else if (reason==DLL_PROCESS_DETACH) {
699 _mi_auto_process_done();
700 }
701 else if (reason==DLL_THREAD_DETACH && !_mi_is_redirected()) {
702 _mi_thread_done(NULL);
703 }
704}
705
706
707#if defined(MI_SHARED_LIB)
708 #define MI_PRIM_HAS_PROCESS_ATTACH 1
709
710 // Windows DLL: easy to hook into process_init and thread_done
711 BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) {
712 mi_win_main((PVOID)inst,reason,reserved);
713 return TRUE;
714 }
715
716 // nothing to do since `_mi_thread_done` is handled through the DLL_THREAD_DETACH event.
717 void _mi_prim_thread_init_auto_done(void) { }
718 void _mi_prim_thread_done_auto_done(void) { }
719 void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
720 MI_UNUSED(heap);
721 }
722
723#elif !defined(MI_WIN_USE_FLS)
724 #define MI_PRIM_HAS_PROCESS_ATTACH 1
725
726 static void NTAPI mi_win_main_attach(PVOID module, DWORD reason, LPVOID reserved) {
727 if (reason == DLL_PROCESS_ATTACH || reason == DLL_THREAD_ATTACH) {
728 mi_win_main(module, reason, reserved);
729 }
730 }
731 static void NTAPI mi_win_main_detach(PVOID module, DWORD reason, LPVOID reserved) {
732 if (reason == DLL_PROCESS_DETACH || reason == DLL_THREAD_DETACH) {
733 mi_win_main(module, reason, reserved);
734 }
735 }
736
737 // Set up TLS callbacks in a statically linked library by using special data sections.
738 // See <https://stackoverflow.com/questions/14538159/tls-callback-in-windows>
739 // We use 2 entries to ensure we call attach events before constructors
740 // are called, and detach events after destructors are called.
741 #if defined(__cplusplus)
742 extern "C" {
743 #endif
744
745 #if defined(_WIN64)
746 #pragma comment(linker, "/INCLUDE:_tls_used")
747 #pragma comment(linker, "/INCLUDE:_mi_tls_callback_pre")
748 #pragma comment(linker, "/INCLUDE:_mi_tls_callback_post")
749 #pragma const_seg(".CRT$XLB")
750 extern const PIMAGE_TLS_CALLBACK _mi_tls_callback_pre[];
751 const PIMAGE_TLS_CALLBACK _mi_tls_callback_pre[] = { &mi_win_main_attach };
752 #pragma const_seg()
753 #pragma const_seg(".CRT$XLY")
754 extern const PIMAGE_TLS_CALLBACK _mi_tls_callback_post[];
755 const PIMAGE_TLS_CALLBACK _mi_tls_callback_post[] = { &mi_win_main_detach };
756 #pragma const_seg()
757 #else
758 #pragma comment(linker, "/INCLUDE:__tls_used")
759 #pragma comment(linker, "/INCLUDE:__mi_tls_callback_pre")
760 #pragma comment(linker, "/INCLUDE:__mi_tls_callback_post")
761 #pragma data_seg(".CRT$XLB")
762 PIMAGE_TLS_CALLBACK _mi_tls_callback_pre[] = { &mi_win_main_attach };
763 #pragma data_seg()
764 #pragma data_seg(".CRT$XLY")
765 PIMAGE_TLS_CALLBACK _mi_tls_callback_post[] = { &mi_win_main_detach };
766 #pragma data_seg()
767 #endif
768
769 #if defined(__cplusplus)
770 }
771 #endif
772
773 // nothing to do since `_mi_thread_done` is handled through the DLL_THREAD_DETACH event.
774 void _mi_prim_thread_init_auto_done(void) { }
775 void _mi_prim_thread_done_auto_done(void) { }
776 void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
777 MI_UNUSED(heap);
778 }
779
780#else // deprecated: statically linked, use fiber api
781
782 #if defined(_MSC_VER) // on clang/gcc use the constructor attribute (in `src/prim/prim.c`)
783 // MSVC: use data section magic for static libraries
784 // See <https://www.codeguru.com/cpp/misc/misc/applicationcontrol/article.php/c6945/Running-Code-Before-and-After-Main.htm>
785 #define MI_PRIM_HAS_PROCESS_ATTACH 1
786
787 static int mi_process_attach(void) {
788 mi_win_main(NULL,DLL_PROCESS_ATTACH,NULL);
789 atexit(&_mi_auto_process_done);
790 return 0;
791 }
792 typedef int(*mi_crt_callback_t)(void);
793 #if defined(_WIN64)
794 #pragma comment(linker, "/INCLUDE:_mi_tls_callback")
795 #pragma section(".CRT$XIU", long, read)
796 #else
797 #pragma comment(linker, "/INCLUDE:__mi_tls_callback")
798 #endif
799 #pragma data_seg(".CRT$XIU")
800 mi_decl_externc mi_crt_callback_t _mi_tls_callback[] = { &mi_process_attach };
801 #pragma data_seg()
802 #endif
803
804 // use the fiber api for calling `_mi_thread_done`.
805 #include <fibersapi.h>
806 #if (_WIN32_WINNT < 0x600) // before Windows Vista
807 WINBASEAPI DWORD WINAPI FlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback );
808 WINBASEAPI PVOID WINAPI FlsGetValue( _In_ DWORD dwFlsIndex );
809 WINBASEAPI BOOL WINAPI FlsSetValue( _In_ DWORD dwFlsIndex, _In_opt_ PVOID lpFlsData );
810 WINBASEAPI BOOL WINAPI FlsFree(_In_ DWORD dwFlsIndex);
811 #endif
812
813 static DWORD mi_fls_key = (DWORD)(-1);
814
815 static void NTAPI mi_fls_done(PVOID value) {
816 mi_heap_t* heap = (mi_heap_t*)value;
817 if (heap != NULL) {
818 _mi_thread_done(heap);
819 FlsSetValue(mi_fls_key, NULL); // prevent recursion as _mi_thread_done may set it back to the main heap, issue #672
820 }
821 }
822
823 void _mi_prim_thread_init_auto_done(void) {
824 mi_fls_key = FlsAlloc(&mi_fls_done);
825 }
826
827 void _mi_prim_thread_done_auto_done(void) {
828 // call thread-done on all threads (except the main thread) to prevent
829 // dangling callback pointer if statically linked with a DLL; Issue #208
830 FlsFree(mi_fls_key);
831 }
832
833 void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
834 mi_assert_internal(mi_fls_key != (DWORD)(-1));
835 FlsSetValue(mi_fls_key, heap);
836 }
837#endif
838
839// ----------------------------------------------------
840// Communicate with the redirection module on Windows
841// ----------------------------------------------------
842#if defined(MI_SHARED_LIB) && !defined(MI_WIN_NOREDIRECT)
843 #define MI_PRIM_HAS_ALLOCATOR_INIT 1
844
845 static bool mi_redirected = false; // true if malloc redirects to mi_malloc
846
847 bool _mi_is_redirected(void) {
848 return mi_redirected;
849 }
850
851 #ifdef __cplusplus
852 extern "C" {
853 #endif
854 mi_decl_export void _mi_redirect_entry(DWORD reason) {
855 // called on redirection; careful as this may be called before DllMain
856 mi_win_tls_init(reason);
857 if (reason == DLL_PROCESS_ATTACH) {
858 mi_redirected = true;
859 }
860 else if (reason == DLL_PROCESS_DETACH) {
861 mi_redirected = false;
862 }
863 else if (reason == DLL_THREAD_DETACH) {
864 _mi_thread_done(NULL);
865 }
866 }
867 __declspec(dllimport) bool mi_cdecl mi_allocator_init(const char** message);
868 __declspec(dllimport) void mi_cdecl mi_allocator_done(void);
869 #ifdef __cplusplus
870 }
871 #endif
872 bool _mi_allocator_init(const char** message) {
873 return mi_allocator_init(message);
874 }
875 void _mi_allocator_done(void) {
876 mi_allocator_done();
877 }
878#endif
879