/*
 * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
 * Copyright (c) 1996 by Silicon Graphics.  All rights reserved.
 * Copyright (c) 1998 by Fergus Henderson.  All rights reserved.
 * Copyright (c) 2000-2009 by Hewlett-Packard Development Company.
 * All rights reserved.
 * Copyright (c) 2009-2025 Ivan Maidanski
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */

/*
 * This file could be used for the following purposes:
 *   - get the complete collector as a single link object file (module);
 *   - enable more compiler optimizations.
 *
 * Tip: to get the highest level of compiler optimizations, the typical
 * compiler options to use (assuming gcc) are:
 * `-O3 -march=native -fprofile-generate`
 *
 * Warning: gcc for Linux (for C++ clients only): use `-fexceptions` both for
 * the collector library and the client as otherwise `GC_thread_exit_proc()`
 * is not guaranteed to be invoked (see the comments in `pthread_start.c`
 * file).
 */

#define GC_SINGLE_OBJ_BUILD

#ifndef __cplusplus
/* `static` is desirable here for more efficient linkage. */
/* TODO: Enable this in case of the compilation as C++ code. */
#  define GC_INNER STATIC
#  define GC_EXTERN GC_INNER
/* Note: `STATIC` macro is defined in `gcconfig.h` file. */
#endif

/* Small files go first... */
/*
 * Copyright (c) 2001 by Hewlett-Packard Company. All rights reserved.
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */

/*
 * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
 * Copyright (c) 1991-1995 by Xerox Corporation.  All rights reserved.
 * Copyright (c) 1997 by Silicon Graphics.  All rights reserved.
 * Copyright (c) 1999 by Hewlett-Packard Company.  All rights reserved.
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */

/*
 * This is mostly an internal header file.  Typical clients should
 * not use it.  Clients that define their own object kinds with
 * debugging allocators will probably want to include this, however.
 * No attempt is made to keep the namespace clean.  This should not be
 * included from header files that are frequently included by clients.
 */

#ifndef GC_DBG_MLC_H
#define GC_DBG_MLC_H

/*
 * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
 * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
 * Copyright (c) 1996-1999 by Silicon Graphics.  All rights reserved.
 * Copyright (c) 1999-2004 Hewlett-Packard Development Company, L.P.
 * Copyright (c) 2008-2025 Ivan Maidanski
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */

#ifndef GC_PRIVATE_H
#define GC_PRIVATE_H

#ifdef HAVE_CONFIG_H
/* include/config.h.  Generated from config.h.in by configure.  */
/* include/config.h.in.  Generated from configure.ac by autoheader.  */

/* Define to recognize all pointers to the interior of objects. */
#define ALL_INTERIOR_POINTERS 1

/* AO load, store and/or test-and-set primitives are implemented in
   `libatomic_ops` using locks. */
/* #undef BASE_ATOMIC_OPS_EMULATED */

/* Erroneously cleared dirty bits checking. Use only for debugging of the
   incremental collector. */
/* #undef CHECKSUMS */

/* Define to discover thread stack bounds on Darwin without trying to walk the
   frames on the stack. */
/* #undef DARWIN_DONT_PARSE_STACK */

/* Define to force debug headers on all objects. */
#define DBG_HDRS_ALL 1

/* Do not use user32.dll import library (Win32). */
/* #undef DONT_USE_USER32_DLL */

/* Define to support pointer mask/shift set at runtime. */
/* #undef DYNAMIC_POINTER_MASK */

/* Define to enable eCos target support. */
/* #undef ECOS */

/* Wine `getenv` may not return `NULL` for missing entry. */
/* #undef EMPTY_GETENV_RESULTS */

/* Define to enable alternative finalization interface. */
#define ENABLE_DISCLAIM 1

/* Define to enable internal debug assertions. */
/* #undef GC_ASSERTIONS */

/* Define to enable atomic uncollectible allocation. */
#define GC_ATOMIC_UNCOLLECTABLE 1

/* Use GCC atomic intrinsics instead of `libatomic_ops` primitives. */
#define GC_BUILTIN_ATOMIC 1

/* Define to build dynamic libraries with only API symbols exposed. */
/* #undef GC_DLL */

/* Skip the initial guess of data root sets. */
/* #undef GC_DONT_REGISTER_MAIN_STATIC_DATA */

/* Define to turn on `GC_suspend_thread` support (Linux only). */
#define GC_ENABLE_SUSPEND_THREAD 1

/* Define to include support for `gcj`. */
#define GC_GCJ_SUPPORT 1

/* Define if backtrace information is supported. */
/* #undef GC_HAVE_BUILTIN_BACKTRACE */

/* Define to use `pthread_sigmask` function if needed. */
/* #undef GC_HAVE_PTHREAD_SIGMASK */

/* Enable Win32 `DllMain`-based approach of threads registering. */
/* #undef GC_INSIDE_DLL */

/* Missing execinfo.h header. */
/* #undef GC_MISSING_EXECINFO_H */

/* Missing `sigsetjmp` function. */
/* #undef GC_NO_SIGSETJMP */

/* Disable threads discovery in the collector. */
/* #undef GC_NO_THREADS_DISCOVERY */

/* Read environment variables from the GC 'env' file. */
/* #undef GC_READ_ENV_FILE */

/* Define and export `GC_wcsdup` function. */
#define GC_REQUIRE_WCSDUP 1

/* Define to support platform-specific threads. */
#define GC_THREADS 1

/* Force the GC to use signals based on `SIGRTMIN+k`. */
/* #undef GC_USESIGRT_SIGNALS */

/* Define to cause the collector to redefine `malloc` and intercepted
   `pthreads` routines with their real names while using `dlsym` to refer to
   the original routines. */
/* #undef GC_USE_DLOPEN_WRAP */

/* The major version number of this GC release. */
#define GC_VERSION_MAJOR 8

/* The micro version number of this GC release. */
#define GC_VERSION_MICRO 0

/* The minor version number of this GC release. */
#define GC_VERSION_MINOR 3

/* Define to support pthreads-win32 or winpthreads. */
/* #undef GC_WIN32_PTHREADS */

/* Define to install `pthread_atfork` handlers by default. */
#define HANDLE_FORK 1

/* Define to use `dladdr` function. */
#define HAVE_DLADDR 1

/* Define to 1 if you have the <dlfcn.h> header file. */
#define HAVE_DLFCN_H 1

/* Define to 1 if you have the 'dl_iterate_phdr' function. */
/* #undef HAVE_DL_ITERATE_PHDR */

/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H 1

/* `libatomic_ops` `AO_or` primitive implementation is lock-free. */
/* #undef HAVE_LOCKFREE_AO_OR */

/* Define to use `pthread_setname_np(const char *)` function. */
#define HAVE_PTHREAD_SETNAME_NP_WITHOUT_TID 1

/* Define to use `pthread_setname_np(pthread_t, const char *)` function. */
/* #undef HAVE_PTHREAD_SETNAME_NP_WITH_TID */

/* Define to use `pthread_setname_np(pthread_t, const char *, void *)`
   function. */
/* #undef HAVE_PTHREAD_SETNAME_NP_WITH_TID_AND_ARG */

/* Define to use `pthread_set_name_np(pthread_t, const char *)` function. */
/* #undef HAVE_PTHREAD_SET_NAME_NP */

/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H 1

/* Define to 1 if you have the <stdio.h> header file. */
#define HAVE_STDIO_H 1

/* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H 1

/* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H 1

/* Define to 1 if you have the <string.h> header file. */
#define HAVE_STRING_H 1

/* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1

/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1

/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H 1

/* Do not define `DYNAMIC_LOADING` even if supported (i.e., build the
   collector with disabled tracing of dynamic library data roots). */
/* #undef IGNORE_DYNAMIC_LOADING */

/* Define to make it somewhat safer by default to finalize objects out of
   order by specifying a nonstandard finalization mark procedure. */
#define JAVA_FINALIZATION 1

/* Define to save back-pointers in debugging headers. */
#define KEEP_BACK_PTRS 1

/* Define to optimize for large heaps or root sets. */
/* #undef LARGE_CONFIG */

/* Define to the sub-directory where libtool stores uninstalled libraries. */
#define LT_OBJDIR ".libs/"

/* Define to build the collector with the support of the functionality to
   print max length of chain through unreachable objects ending in a reachable
   one. */
/* #undef MAKE_BACK_GRAPH */

/* Number of sequential garbage collections during those a candidate block for
   unmapping should be marked as free. */
#define MUNMAP_THRESHOLD 7

/* Define to not use system clock (cross compiling). */
/* #undef NO_CLOCK */

/* Disable debugging, like `GC_dump` and its callees. */
/* #undef NO_DEBUGGING */

/* Define to make the collector not allocate executable memory by default. */
#define NO_EXECUTE_PERMISSION 1

/* Missing `getcontext` function. */
/* #undef NO_GETCONTEXT */

/* Prohibit installation of `pthread_atfork` handlers. */
/* #undef NO_HANDLE_FORK */

/* Name of package */
#define PACKAGE "gc"

/* Define to the address where bug reports for this package should be sent. */
#define PACKAGE_BUGREPORT "https://github.com/bdwgc/bdwgc/issues"

/* Define to the full name of this package. */
#define PACKAGE_NAME "gc"

/* Define to the full name and version of this package. */
#define PACKAGE_STRING "gc 8.3.0"

/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "gc"

/* Define to the home page for this package. */
#define PACKAGE_URL ""

/* Define to the version of this package. */
#define PACKAGE_VERSION "8.3.0"

/* Define to enable parallel marking. */
#define PARALLEL_MARK 1

/* If defined, redirect `free` to this function. */
/* #undef REDIRECT_FREE */

/* If defined, redirect `malloc` to this function. */
/* #undef REDIRECT_MALLOC */

/* If defined, redirect `realloc` to this function. */
/* #undef REDIRECT_REALLOC */

/* The number of caller frames saved when allocating with the debugging API.
   */
/* #undef SAVE_CALL_COUNT */

/* Shorten the headers to minimize object size at the expense of checking for
   writes past the end. */
/* #undef SHORT_DBG_HDRS */

/* Define to tune the collector for small heap sizes. */
/* #undef SMALL_CONFIG */

/* Define to 1 if all of the C89 standard headers exist (not just the ones
   required in a freestanding environment). This macro is provided for
   backward compatibility; new code need not use it. */
#define STDC_HEADERS 1

/* Define to work around a Solaris 5.3 bug (see dyn_load.c). */
/* #undef SUNOS53_SHARED_LIB */

/* Define to enable thread-local allocation optimization. */
/* #undef THREAD_LOCAL_ALLOC */

/* Use Unicode (W) variant of Win32 API instead of ASCII (A) one. */
/* #undef UNICODE */

/* Define to use of compiler-support for thread-local variables. */
/* #undef USE_COMPILER_TLS */

/* Define to use `mmap` instead of `sbrk` to expand the heap. */
#define USE_MMAP 1

/* Define to return memory to OS with `munmap` calls. */
#define USE_MUNMAP 1

/* Use `rwlock` for the allocator lock instead of mutex. */
/* #undef USE_RWLOCK */

/* Define to use Win32 `VirtualAlloc` (instead of `sbrk` or `mmap`) to expand
   the heap. */
/* #undef USE_WINALLOC */

/* Define to support tracking `GC_malloc` and friends for heap profiling
   tools. */
/* #undef VALGRIND_TRACKING */

/* Version number of package */
#define VERSION "8.3.0"

/* The POSIX feature macro. */
/* #undef _POSIX_C_SOURCE */

/* Indicates the use of `pthreads` (NetBSD). */
/* #undef _PTHREADS */

/* Required define if using POSIX threads. */
#define _REENTRANT 1

/* Define to '__inline__' or '__inline' if that's what the C compiler
   calls it, or to nothing if 'inline' is not supported under any name.  */
#ifndef __cplusplus
/* #undef inline */
#endif

#endif

#if !defined(GC_BUILD) && !defined(NOT_GCBUILD)
#  define GC_BUILD
#endif

#if (defined(__linux__) || defined(__GLIBC__) || defined(__GNU__) \
     || defined(__CYGWIN__) || defined(HAVE_DLADDR)               \
     || (defined(__COSMOPOLITAN__) && defined(USE_MUNMAP))        \
     || defined(GC_HAVE_PTHREAD_SIGMASK)                          \
     || defined(HAVE_PTHREAD_SETNAME_NP_WITHOUT_TID)              \
     || defined(HAVE_PTHREAD_SETNAME_NP_WITH_TID_AND_ARG)         \
     || defined(HAVE_PTHREAD_SETNAME_NP_WITH_TID))                \
    && !defined(_GNU_SOURCE)
/* Cannot test `LINUX`, since this must be defined before other includes. */
#  define _GNU_SOURCE 1
#endif

#if defined(__INTERIX) && !defined(_ALL_SOURCE)
#  define _ALL_SOURCE 1
#endif

#if (defined(DGUX) && defined(GC_THREADS) || defined(DGUX386_THREADS) \
     || defined(GC_DGUX386_THREADS))                                  \
    && !defined(_USING_POSIX4A_DRAFT10)
#  define _USING_POSIX4A_DRAFT10 1
#endif

#if defined(__MINGW32__) && !defined(__MINGW_EXCPT_DEFINE_PSDK) \
    && defined(__i386__)                                        \
    && defined(GC_EXTERN) /*< defined in `extra/gc.c` file */
/* See the description in `mark.c` file. */
#  define __MINGW_EXCPT_DEFINE_PSDK 1
#endif

#if defined(NO_DEBUGGING) && !defined(GC_ASSERTIONS) && !defined(NDEBUG)
/* To turn off assertion checking (in `atomic_ops.h` file). */
#  define NDEBUG 1
#endif

#ifndef GC_H
#  include "gc/gc.h"
#endif

#include <stdlib.h>
#if !defined(sony_news)
#  include <stddef.h>
#endif

#ifdef DGUX
#  include <sys/resource.h>
#  include <sys/time.h>
#endif

#ifdef BSD_TIME
#  include <sys/resource.h>
#  include <sys/time.h>
#endif

#ifdef PARALLEL_MARK
#  define AO_REQUIRE_CAS
#  if !defined(__GNUC__) && !defined(AO_ASSUME_WINDOWS98)
#    define AO_ASSUME_WINDOWS98
#  endif
#endif

#include "gc/gc_mark.h"
#include "gc/gc_tiny_fl.h"

typedef GC_word word;

#ifndef PTR_T_DEFINED
/*
 * A generic pointer to which we can add byte displacements and which
 * can be used for address comparisons.
 */
typedef char *ptr_t;
#  define PTR_T_DEFINED
#endif

#ifndef SIZE_MAX
#  include <limits.h>
#endif
#if defined(SIZE_MAX) && !defined(CPPCHECK)
/*
 * A constant representing maximum value for `size_t` type.  Note: an extra
 * cast is used to workaround some buggy `SIZE_MAX` definitions.
 */
#  define GC_SIZE_MAX ((size_t)SIZE_MAX)
#else
#  define GC_SIZE_MAX (~(size_t)0)
#endif

#if (GC_GNUC_PREREQ(3, 0) || defined(__clang__)) && !defined(LINT2)
/* Equivalent to `e`, but predict that usually `e` is true (false). */
#  define LIKELY(e) __builtin_expect(e, 1 /* `TRUE` */)
#  define UNLIKELY(e) __builtin_expect(e, 0 /* `FALSE` */)
#else
#  define LIKELY(e) (e)
#  define UNLIKELY(e) (e)
#endif /* __GNUC__ */

/*
 * Saturated addition of `size_t` values.  Used to avoid value wrap around
 * on overflow.  The arguments should have no side effects.
 */
#define SIZET_SAT_ADD(a, b) \
  (LIKELY((a) < GC_SIZE_MAX - (b)) ? (a) + (b) : GC_SIZE_MAX)

/*
 * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
 * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
 * Copyright (c) 1996 by Silicon Graphics.  All rights reserved.
 * Copyright (c) 2000-2004 Hewlett-Packard Development Company, L.P.
 * Copyright (c) 2009-2025 Ivan Maidanski
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */

/*
 * This header is private to the collector.  It is almost always
 * included from `gc_priv.h` file.  However it is possible to include
 * it by itself if just the configuration macros are needed.  In that
 * case, a few declarations relying on types declared in `gc_priv.h`
 * file will be omitted.
 */

#ifndef GCCONFIG_H
#define GCCONFIG_H

#ifndef GC_H
#  ifdef HAVE_CONFIG_H
#  endif


#endif

#ifdef CPPCHECK
#  undef CLOCKS_PER_SEC
#  undef FIXUP_POINTER
#  undef POINTER_MASK
#  undef POINTER_SHIFT
#  undef REDIRECT_REALLOC
#  undef _MAX_PATH
#endif

#ifndef PTR_T_DEFINED
typedef char *ptr_t;
#  define PTR_T_DEFINED
#endif

#if !defined(sony_news)
#  include <stddef.h> /*< for `size_t`, etc. */
#endif

/*
 * Note: only wrap our own declarations, and not the included headers;
 * in this case, wrap our entire file, but temporarily unwrap/rewrap
 * around `#include` entities; macros do not need such wrapping.
 */
#ifdef __cplusplus
#  define EXTERN_C_BEGIN extern "C" {
#  define EXTERN_C_END } /* extern "C" */
#else
#  define EXTERN_C_BEGIN
#  define EXTERN_C_END
#endif

EXTERN_C_BEGIN

/* Convenient internal macro to test version of Clang. */
#if defined(__clang__) && defined(__clang_major__)
#  define GC_CLANG_PREREQ(major, minor) \
    ((__clang_major__ << 8) + __clang_minor__ >= ((major) << 8) + (minor))
#  define GC_CLANG_PREREQ_FULL(major, minor, patchlevel)          \
    (GC_CLANG_PREREQ(major, (minor) + 1)                          \
     || (__clang_major__ == (major) && __clang_minor__ == (minor) \
         && __clang_patchlevel__ >= (patchlevel)))
#else
#  define GC_CLANG_PREREQ(major, minor) 0 /*< `FALSE` */
#  define GC_CLANG_PREREQ_FULL(major, minor, patchlevel) 0
#endif

/*
 * Machine-dependent parameters.  Some tuning parameters can be found
 * near the top of `gc_priv.h` file.
 */

/* Machine-specific parts contributed by various people.  See `README` file. */

#if defined(__ANDROID__) && !defined(HOST_ANDROID)
/* A Linux-based OS.  `__ANDROID__` macro is defined by Android NDK gcc. */
#  define HOST_ANDROID 1
#endif

#if defined(TIZEN) && !defined(HOST_TIZEN)
/* A Linux-based OS. */
#  define HOST_TIZEN 1
#endif

#if defined(__SYMBIAN32__) && !defined(SYMBIAN)
#  define SYMBIAN
#  ifdef __WINS__
#    pragma data_seg(".data2")
#  endif
#endif

/* First a unified test for Linux. */
#if (defined(linux) || defined(__linux__) || defined(HOST_ANDROID) \
     || defined(HOST_TIZEN))                                       \
    && !defined(LINUX) && !defined(__native_client__)
#  define LINUX
#endif

/* And one for NetBSD. */
#if defined(__NetBSD__)
#  define NETBSD
#endif

/* And one for OpenBSD. */
#if defined(__OpenBSD__)
#  define OPENBSD
#endif

/* And one for FreeBSD. */
#if (defined(__FreeBSD__) || defined(__DragonFly__) \
     || defined(__FreeBSD_kernel__))                \
    && !defined(FREEBSD)                            \
    && !defined(GC_NO_FREEBSD) /*< Orbis compiler defines `__FreeBSD__` */
#  define FREEBSD
#endif

#if defined(FREEBSD) || defined(NETBSD) || defined(OPENBSD)
#  define ANY_BSD
#endif

#if defined(__COSMOPOLITAN__)
#  define COSMO
#endif

#if defined(__EMBOX__)
#  define EMBOX
#endif

#if defined(__KOS__)
#  define KOS
#endif

#if defined(__QNX__) && !defined(QNX)
#  define QNX
#endif

#if defined(__serenity__)
#  define SERENITY
#endif

/* And one for Darwin. */
#if defined(macosx) || (defined(__APPLE__) && defined(__MACH__))
#  define DARWIN
EXTERN_C_END
#  include <TargetConditionals.h>
EXTERN_C_BEGIN
#endif

/* Determine the machine type. */
#if defined(__native_client__)
#  define NACL
#  if !defined(__portable_native_client__) && !defined(__arm__)
#    define I386
#    define mach_type_known
#  else
/* Here we will rely upon arch-specific defines. */
#  endif
#endif
#if defined(__aarch64__) && !defined(ANY_BSD) && !defined(COSMO) \
    && !defined(DARWIN) && !defined(LINUX) && !defined(KOS)      \
    && !defined(NN_BUILD_TARGET_PLATFORM_NX) && !defined(QNX)    \
    && !defined(SERENITY) && !defined(_WIN32)
#  define AARCH64
#  define NOSYS
#  define mach_type_known
#endif
#if defined(__arm) || defined(__arm__) || defined(__thumb__)
#  define ARM32
#  if defined(NACL) || defined(SYMBIAN)
#    define mach_type_known
#  elif !defined(ANY_BSD) && !defined(DARWIN) && !defined(LINUX)             \
      && !defined(QNX) && !defined(NN_PLATFORM_CTR)                          \
      && !defined(SN_TARGET_PSP2) && !defined(_WIN32) && !defined(__CEGCC__) \
      && !defined(GC_NO_NOSYS)
#    define NOSYS
#    define mach_type_known
#  endif
#endif
#if defined(__riscv) && !defined(ANY_BSD) && !defined(LINUX)
#  define RISCV
#  define NOSYS
#  define mach_type_known
#endif
#if defined(vax) || defined(__vax__)
#  define VAX
#  ifdef ultrix
#    define ULTRIX
#  else
#    define BSD
#  endif
#  define mach_type_known
#endif
#if defined(NETBSD) && defined(__vax__)
#  define VAX
#  define mach_type_known
#endif
#if (defined(mips) || defined(__mips) || defined(_mips)) \
    && !defined(__TANDEM) && !defined(ANY_BSD) && !defined(LINUX)
#  define MIPS
#  if defined(nec_ews) || defined(_nec_ews)
#    define EWS4800
#    define mach_type_known
#  elif defined(ultrix) || defined(__ultrix)
#    define ULTRIX
#    define mach_type_known
#  elif !defined(_WIN32_WCE) && !defined(__CEGCC__) && !defined(__MINGW32CE__)
#    define IRIX5 /*< or Irix 6.x */
#    define mach_type_known
#  endif /* !MSWINCE */
#endif
#if defined(DGUX) && (defined(i386) || defined(__i386__))
#  define I386
#  ifndef _USING_DGUX
#    define _USING_DGUX
#  endif
#  define mach_type_known
#endif
#if defined(sequent) && (defined(i386) || defined(__i386__))
#  define I386
#  define SEQUENT
#  define mach_type_known
#endif
#if (defined(sun) || defined(__sun)) && (defined(i386) || defined(__i386__))
#  define I386
#  define SOLARIS
#  define mach_type_known
#endif
#if (defined(sun) || defined(__sun)) && defined(__amd64)
#  define X86_64
#  define SOLARIS
#  define mach_type_known
#endif
#if (defined(__OS2__) || defined(__EMX__)) && defined(__32BIT__)
#  define I386
#  define OS2
#  define mach_type_known
#endif
#if (defined(sun) || defined(__sun)) && (defined(sparc) || defined(__sparc))
/* SunOS 5.x */
EXTERN_C_END
#  include <errno.h>
EXTERN_C_BEGIN
#  define SPARC
#  define SOLARIS
#  define mach_type_known
#elif defined(sparc) && defined(unix) && !defined(sun) && !defined(linux) \
    && !defined(ANY_BSD)
#  define SPARC
#  define DRSNX
#  define mach_type_known
#endif
#if defined(_IBMR2) /* `&& defined(_AIX)` */
#  define POWERPC
#  define AIX
#  define mach_type_known
#endif
#if defined(_M_XENIX) && defined(_M_SYSV) && defined(_M_I386)
/* TODO: The above test may need refinement. */
#  define I386
#  if defined(_SCO_ELF)
#    define SCO_ELF
#  else
#    define SCO
#  endif
#  define mach_type_known
#endif
#if defined(_PA_RISC1_0) || defined(_PA_RISC1_1) || defined(_PA_RISC2_0) \
    || defined(hppa) || defined(__hppa__)
#  define HP_PA
#  if !defined(LINUX) && !defined(HPUX) && !defined(OPENBSD)
#    define HPUX
#  endif
#  define mach_type_known
#endif
#if defined(__ia64) && (defined(_HPUX_SOURCE) || defined(__HP_aCC))
#  define IA64
#  ifndef HPUX
#    define HPUX
#  endif
#  define mach_type_known
#endif
#if (defined(__BEOS__) || defined(__HAIKU__)) && defined(_X86_)
#  define I386
#  define HAIKU
#  define mach_type_known
#endif
#if defined(__HAIKU__) && (defined(__amd64__) || defined(__x86_64__))
#  define X86_64
#  define HAIKU
#  define mach_type_known
#endif
#if defined(__alpha) || defined(__alpha__)
#  define ALPHA
#  if !defined(ANY_BSD) && !defined(LINUX)
#    define OSF1 /*< Compaq Tru64 UNIX (Digital UNIX) */
#  endif
#  define mach_type_known
#endif
#if defined(__rtems__) && (defined(i386) || defined(__i386__))
#  define I386
#  define RTEMS
#  define mach_type_known
#endif
#if defined(NeXT) && defined(mc68000)
#  define M68K
#  define NEXT
#  define mach_type_known
#endif
#if defined(NeXT) && (defined(i386) || defined(__i386__))
#  define I386
#  define NEXT
#  define mach_type_known
#endif
#if defined(bsdi) && (defined(i386) || defined(__i386__))
#  define I386
#  define BSDI
#  define mach_type_known
#endif
#if defined(__386BSD__) && !defined(mach_type_known)
#  define I386
#  define THREE86BSD
#  define mach_type_known
#endif
#if defined(_CX_UX) && defined(_M88K)
#  define M88K
#  define CX_UX
#  define mach_type_known
#endif
#if defined(DGUX) && defined(m88k)
#  define M88K
/* `DGUX` macro is already defined. */
#  define mach_type_known
#endif
#if defined(_WIN32_WCE) || defined(__CEGCC__) || defined(__MINGW32CE__)
/*
 * `SH3`, `SH4`, `MIPS` macros are already defined for the corresponding
 * architectures.
 */
#  if defined(SH3) || defined(SH4)
#    define SH
#  endif
#  if defined(x86) || defined(__i386__)
#    define I386
#  endif
#  if defined(_M_ARM) || defined(ARM) || defined(_ARM_)
#    define ARM32
#  endif
#  define MSWINCE
#  define mach_type_known
#else
#  if ((defined(_MSDOS) || defined(_MSC_VER)) && (_M_IX86 >= 300))          \
      || (defined(_WIN32) && !defined(__CYGWIN32__) && !defined(__CYGWIN__) \
          && !defined(__INTERIX) && !defined(SYMBIAN))                      \
      || defined(__MINGW32__)
#    if defined(__LP64__) || defined(_M_X64)
#      define X86_64
#    elif defined(_M_ARM)
#      define ARM32
#    elif defined(_M_ARM64)
#      define AARCH64
#    else /* _M_IX86 */
#      define I386
#    endif
#    ifdef _XBOX_ONE
#      define MSWIN_XBOX1
#    else
#      ifndef MSWIN32
#        define MSWIN32 /*< or Win64 */
#      endif
#      if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_APP)
#        define MSWINRT_FLAVOR
#      endif
#    endif
#    define mach_type_known
#  endif
#  if defined(_MSC_VER) && defined(_M_IA64)
#    define IA64
/*
 * Really Win64, but we do not treat 64-bit variants as a different
 * platform.
 */
#    define MSWIN32
#  endif
#endif /* !_WIN32_WCE && !__CEGCC__ && !__MINGW32CE__ */
#if defined(__DJGPP__)
#  define I386
#  ifndef DJGPP
/* MSDOS running the DJGPP port of gcc. */
#    define DJGPP
#  endif
#  define mach_type_known
#endif
#if defined(__CYGWIN32__) || defined(__CYGWIN__)
#  if defined(__LP64__)
#    define X86_64
#  else
#    define I386
#  endif
#  define CYGWIN32
#  define mach_type_known
#endif /* __CYGWIN__ */
#if defined(__INTERIX)
#  define I386
#  define INTERIX
#  define mach_type_known
#endif
#if defined(_UTS) && !defined(mach_type_known)
#  define S370
#  define UTS4
#  define mach_type_known
#endif
#if defined(__embedded__) && defined(PPC)
#  define POWERPC
#  define NOSYS
#  define mach_type_known
#endif
#if defined(__WATCOMC__) && defined(__386__)
#  define I386
#  if !defined(OS2) && !defined(MSWIN32) && !defined(DOS4GW)
#    if defined(__OS2__)
#      define OS2
#    elif defined(__WINDOWS_386__) || defined(__NT__)
#      define MSWIN32
#    else
#      define DOS4GW
#    endif
#  endif
#  define mach_type_known
#endif /* __WATCOMC__ && __386__ */
#if defined(__GNU__) && defined(__i386__)
/* The Debian Hurd running on generic PC. */
#  define HURD
#  define I386
#  define mach_type_known
#endif
#if defined(__GNU__) && defined(__x86_64__)
#  define HURD
#  define X86_64
#  define mach_type_known
#endif
#if defined(__TANDEM)
/* Nonstop S-series. */
/* FIXME: Should recognize Integrity series? */
#  define MIPS
#  define NONSTOP
#  define mach_type_known
#endif
#if defined(__tile__) && defined(LINUX)
#  ifdef __tilegx__
#    define TILEGX
#  else
#    define TILEPRO
#  endif
#  define mach_type_known
#endif
#if defined(NN_BUILD_TARGET_PLATFORM_NX)
#  define AARCH64
#  define NINTENDO_SWITCH
#  define mach_type_known
#endif
#if defined(__EMSCRIPTEN__) || defined(EMSCRIPTEN)
#  define WEBASSEMBLY
#  ifndef EMSCRIPTEN
#    define EMSCRIPTEN
#  endif
#  define mach_type_known
#endif
#if defined(__wasi__)
/* The WebAssembly System Interface (WASI). */
#  define WEBASSEMBLY
#  define WASI
#  define mach_type_known
#endif

#if defined(__aarch64__)                                      \
    && (defined(ANY_BSD) || defined(COSMO) || defined(DARWIN) \
        || defined(LINUX) || defined(KOS) || defined(QNX)     \
        || defined(SERENITY))
#  define AARCH64
#  define mach_type_known
#elif defined(__arc__) && defined(LINUX)
#  define ARC
#  define mach_type_known
#elif (defined(__arm) || defined(__arm__) || defined(__arm32__)               \
       || defined(__ARM__))                                                   \
    && (defined(ANY_BSD) || defined(DARWIN) || defined(LINUX) || defined(QNX) \
        || defined(NN_PLATFORM_CTR) || defined(SN_TARGET_PSP2))
#  define ARM32
#  define mach_type_known
#elif defined(__avr32__) && defined(LINUX)
#  define AVR32
#  define mach_type_known
#elif defined(__cris__) && defined(LINUX)
#  ifndef CRIS
#    define CRIS
#  endif
#  define mach_type_known
#elif defined(__e2k__) && defined(LINUX)
#  define E2K
#  define mach_type_known
#elif defined(__hexagon__) && defined(LINUX)
#  define HEXAGON
#  define mach_type_known
#elif (defined(__i386__) || defined(i386) || defined(__X86__)) \
    && (defined(ANY_BSD) || defined(DARWIN) || defined(EMBOX)  \
        || defined(LINUX) || defined(QNX) || defined(SERENITY))
#  define I386
#  define mach_type_known
#elif (defined(__ia64) || defined(__ia64__)) && defined(LINUX)
#  define IA64
#  define mach_type_known
#elif defined(__loongarch__) && defined(LINUX)
#  define LOONGARCH
#  define mach_type_known
#elif defined(__m32r__) && defined(LINUX)
#  define M32R
#  define mach_type_known
#elif ((defined(__m68k__) || defined(m68k))      \
       && (defined(NETBSD) || defined(OPENBSD))) \
    || (defined(__mc68000__) && defined(LINUX))
#  define M68K
#  define mach_type_known
#elif (defined(__mips) || defined(_mips) || defined(mips)) \
    && (defined(ANY_BSD) || defined(LINUX))
#  define MIPS
#  define mach_type_known
#elif (defined(__NIOS2__) || defined(__NIOS2) || defined(__nios2__)) \
    && defined(LINUX)
#  define NIOS2 /*< Altera NIOS2 */
#  define mach_type_known
#elif defined(__or1k__) && defined(LINUX)
#  define OR1K /*< OpenRISC (or1k) */
#  define mach_type_known
#elif (defined(__powerpc__) || defined(__powerpc64__) || defined(__ppc__) \
       || defined(__ppc64__) || defined(powerpc) || defined(powerpc64))   \
    && (defined(ANY_BSD) || defined(DARWIN) || defined(LINUX))
#  define POWERPC
#  define mach_type_known
#elif defined(__riscv) && (defined(ANY_BSD) || defined(LINUX))
#  define RISCV
#  define mach_type_known
#elif defined(__s390__) && defined(LINUX)
#  define S390
#  define mach_type_known
#elif defined(__sh__) \
    && (defined(LINUX) || defined(NETBSD) || defined(OPENBSD))
#  define SH
#  define mach_type_known
#elif (defined(__sparc) || defined(sparc)) \
    && (defined(ANY_BSD) || defined(LINUX))
#  define SPARC
#  define mach_type_known
#elif defined(__sw_64__) && defined(LINUX)
#  define SW_64
#  define mach_type_known
#elif (defined(__x86_64) || defined(__x86_64__) || defined(__amd64__) \
       || defined(__X86_64__))                                        \
    && (defined(ANY_BSD) || defined(COSMO) || defined(DARWIN)         \
        || defined(LINUX) || defined(QNX) || defined(SERENITY))
#  define X86_64
#  define mach_type_known
#endif

/*
 * Feel free to add more clauses here.  Or manually define the machine
 * type here.  A machine type is characterized by the architecture.
 * Some machine types are further subdivided by OS.  Macros such as
 * `LINUX`, `FREEBSD`, etc. distinguish them.  The distinction in these
 * cases is usually the stack starting address.
 */

#if !defined(mach_type_known) && !defined(CPPCHECK)
#  error The collector has not been ported to this machine/OS combination
#endif

/*
 * The CPU architecture mapping is:
 *   - `AARCH64`: ARM AArch64 ILP32/64-bit (running `COSMO` environment,
 *     `DARWIN` (OS X or iOS), `KOS`, `LINUX`, `MSWIN32`, `NETBSD`,
 *     `NINTENDO_SWITCH`, `NOSYS` environment, `OPENBSD`, `QNX`, `SERENITY`);
 *   - `ALPHA`: DEC Alpha (running `FREEBSD`, `LINUX`, `NETBSD`, `OPENBSD`,
 *     `OSF1`);
 *   - `ARC`: Synopsys ARC (running `LINUX`);
 *   - `ARM32`: ARMv7 (running `DARWIN` (iOS), `FREEBSD`, `LINUX`, `MSWIN32`,
 *     `MSWINCE`, `NETBSD`, `NN_PLATFORM_CTR`, `NOSYS` environment,
 *     `OPENBSD`, `QNX`, `SN_TARGET_PSP2`, `SYMBIAN`);
 *   - `AVR32`: Atmel RISC (running `LINUX`);
 *   - `CRIS`: Axis Etrax (running `LINUX`);
 *   - `E2K`: Elbrus 2000 32/64-bit (running `LINUX`);
 *   - `HEXAGON`: Qualcomm Hexagon (running `LINUX`);
 *   - `HP_PA`: HP9000/700 and HP9000/800 32/64-bit (running `HPUX`, `LINUX`,
 *     `OPENBSD`);
 *   - `I386`: Intel 486/586/686 (running `BSDI`, `CYGWIN32` environment,
 *     `DARWIN` (macOS), `DGUX`, `DJGPP` environment, `DOS4GW` environment,
 *     `EMBOX`, `FREEBSD`, `HAIKU`, `HURD`, `INTERIX`, `LINUX`, `MSWIN32`,
 *     `MSWINCE`, `NACL` environment, `NETBSD`, `NEXT`, `OPENBSD`, `OS2`,
 *     `QNX`, `RTEMS`, `SCO`, `SCO_ELF`, `SEQUENT`, `SERENITY`, `SOLARIS`,
 *     `THREE86BSD`);
 *   - `IA64`: Intel IPF, e.g. Itanium 32/64-bit (running `HPUX`, `LINUX`,
 *     `MSWIN32`);
 *   - `LOONGARCH`: Loongson LoongArch 32/64-bit (running `LINUX`);
 *   - `M32R`: Renesas M32R (running `LINUX`);
 *   - `M68K`: Motorola 680x0 (running `LINUX`, `NETBSD`, `NEXT`, `OPENBSD`);
 *   - `M88K`: Motorola 88xx0 (running `CX_UX`, `DGUX`);
 *   - `MIPS`: R2000+ 32/64-bit (running `EWS4800`, `FREEBSD`, `IRIX5`,
 *     `LINUX`, `MSWINCE`, `NETBSD`, `NONSTOP`, `OPENBSD`, `ULTRIX`);
 *   - `NIOS2`: Altera NIOS2 (running `LINUX`);
 *   - `OR1K`: OpenRISC/or1k (running `LINUX`);
 *   - `POWERPC`: IBM/Apple PowerPC 32/64-bit (running `AIX`, `DARWIN`,
 *     `FREEBSD`, `LINUX`, `NETBSD`, `NOSYS` environment, `OPENBSD`,
 *     `SN_TARGET_PS3`);
 *   - `RISCV`: RISC-V 32/64-bit (running `FREEBSD`, `LINUX`, `NETBSD`,
 *     `NOSYS` environment, `OPENBSD`);
 *   - `S370`: A 370-like machine (running `UTS4`);
 *   - `S390`: A 390-like machine 32/64-bit (running `LINUX`);
 *   - `SH`: Hitachi SuperH (running `LINUX`, `MSWINCE`, `NETBSD`,
 *     `OPENBSD`);
 *   - `SH4`: Hitachi SH4 (running `MSWINCE`);
 *   - `SPARC`: SPARC v7/v8/v9 32/64-bit (running `DRSNX`, `FREEBSD`,
 *     `LINUX`, `NETBSD`, `OPENBSD`, `SOLARIS`);
 *   - `SW_64`: Sunway/Shenwei (running `LINUX`);
 *   - `TILEGX`: Tilera TILE-Gx 32/64-bit (running `LINUX`);
 *   - `TILEPRO`: Tilera TILEPro (running `LINUX`);
 *   - `VAX`: DEC VAX (running `BSD`, `ULTRIX`);
 *   - `WEBASSEMBLY`: WebAssembly/Wasm (running `EMSCRIPTEN` environment,
 *     `WASI` environment);
 *   - `X86_64`: AMD x86-64 ILP32/64-bit (running `COSMO` environment,
 *     `CYGWIN32` environment, `DARWIN` (macOS), `FREEBSD`, `HAIKU`,
 *     `HURD`, `LINUX`, `MSWIN32`, `MSWIN_XBOX1`, `NETBSD`, `OPENBSD`,
 *     `PLATFORM_GETMEM` environment, `QNX`, `SERENITY`, `SOLARIS`).
 */

/*
 * For each architecture and OS, the following need to be defined:
 *
 * `CPP_WORDSZ` is a simple integer constant representing the word size
 * in bits.  We assume byte addressability, where a byte has 8 bits.
 * We also assume `CPP_WORDSZ` is either 32 or 64.
 * (We care about the length of a pointer address, not hardware
 * bus widths.  Thus a 64-bit processor with a C compiler that uses
 * 32-bit pointers should use `CPP_WORDSZ` of 32, not 64.)
 *
 * `CPP_PTRSZ` is the pointer size in bits.  For most of the supported
 * targets, it is equal to `CPP_WORDSZ`.
 *
 * `MACH_TYPE` is a string representation of the machine type.
 * `OS_TYPE` is analogous for the OS.
 *
 * `ALIGNMENT` is the largest `n`, such that all pointer are guaranteed
 * to be aligned on `n`-byte boundary.  Defining it to be 1 will always
 * work, but will perform poorly.  Should not be larger than size of
 * a pointer.
 *
 * `DATASTART` is the beginning of the data segment.  On some platforms
 * `SEARCH_FOR_DATA_START` is defined.  The latter will cause
 * `GC_data_start` to be set to an address determined by accessing data
 * backwards from `_end` until an unmapped page is found.  `DATASTART` will
 * be defined to be `GC_data_start`.   On UNIX-like systems, the collector
 * will scan the area between `DATASTART` and `DATAEND` for root pointers.
 *
 * `DATAEND`, if not `end`, where `end` is defined as `extern int end[]`.
 * RTH suggests gaining access to linker script synth'd values with
 * this idiom instead of `&end`, where `end` is defined as `extern int end`.
 * Otherwise, "gcc will assume these are in .sdata/.sbss" and it will, e.g.,
 * cause failures on `alpha*-*-*` with `-msmall-data` or `-fpic` or
 * `mips-*-*` without any special options.
 *
 * `STACKBOTTOM` is the cold end of the stack, which is usually the
 * highest address in the stack.
 * Under OS/2, we have other ways of finding thread stacks.
 * For each machine, the following should:
 *   1. Define `STACK_GROWS_UP` if the stack grows toward higher addresses;
 *   2. Define exactly one of
 *      - `STACKBOTTOM` (should be defined to be an expression),
 *      - `HEURISTIC1`,
 *      - `SPECIFIC_MAIN_STACKBOTTOM`,
 *      - `HEURISTIC2`.
 *
 * If `STACKBOTTOM` is defined, then its value will be used directly (as
 * the stack bottom).  If `SPECIFIC_MAIN_STACKBOTTOM` is defined, then it
 * will be determined with a specific method appropriate for the operating
 * system.  Currently we look first for `__libc_stack_end` (currently only
 * if `USE_LIBC_PRIVATES` is defined), and if that fails, read it from
 * `/proc` pseudo-file.  (If `USE_LIBC_PRIVATES` is not defined and
 * `NO_PROC_STAT` is defined, we revert to `HEURISTIC2`.)
 * If either of the last two macros are defined, then `STACKBOTTOM` is
 * computed during collector startup using one of the following two
 * heuristics:
 *   - `HEURISTIC1`: Take an address inside `GC_init`'s frame, and round it
 *     up to the next multiple of `STACK_GRAN`;
 *   - `HEURISTIC2`: Take an address inside `GC_init`'s frame, increment it
 *     repeatedly in small steps (decrement if `STACK_GROWS_UP`), and read
 *     the value at each location, remember the value when the first
 *     Segmentation violation or Bus error is signaled, round that to the
 *     nearest plausible page boundary, and use that instead of
 *     `STACKBOTTOM`.
 *
 * Gustavo Rodriguez-Rivera points out that on most (all?) UNIX machines,
 * the value of `environ` is a pointer that can serve as `STACKBOTTOM`.
 * I expect that `HEURISTIC2` can be replaced by this approach, which
 * interferes far less with debugging.  However it has the disadvantage that
 * it is confused by a `putenv()` call before the collector is initialized.
 * This could be dealt with by intercepting `putenv()`...
 *
 * If no expression for `STACKBOTTOM` can be found, and neither of the above
 * heuristics are usable, the collector can still be used with all of the
 * above undefined, provided one of the following is done:
 *   1. `GC_mark_roots` can be changed to somehow mark from the correct
 *      stack(s) without reference to `STACKBOTTOM`.  This is appropriate for
 *      use in conjunction with thread packages, since there will be multiple
 *      stacks.  (Allocating thread stacks in the heap, and treating them as
 *      ordinary heap data objects is also possible as a last resort.
 *      However, this is likely to introduce significant amounts of excess
 *      storage retention unless the dead parts of the thread stacks are
 *      periodically cleared.)
 *   2. Client code may set `GC_stackbottom` before calling any `GC_`
 *      routines.  If the author of the client code owns the main program,
 *      this could be accomplished by introducing a new `main` function,
 *      calling `GC_call_with_gc_active()` which sets `GC_stackbottom` and
 *      then calls the original (real) `main` function.
 *
 * Each architecture may also define the style of virtual dirty bit
 * implementation to be used:
 *   - `GWW_VDB`: use Win32 `GetWriteWatch` primitive;
 *   - `MPROTECT_VDB`: write-protect the heap and catch faults;
 *   - `PROC_VDB`: use the SVR4 `/proc` primitives to read dirty bits;
 *   - `SOFT_VDB`: use the Linux `/proc` primitives to track dirty bits.
 *
 * The first and second one may be combined, in which case a runtime
 * selection will be made, based on `GetWriteWatch` availability.
 *
 * An architecture may define `DYNAMIC_LOADING` if `dyn_load.c` file
 * implements `GC_register_dynamic_libraries()` for the architecture.
 *
 * An architecture may define `PREFETCH(x)` to preload the cache with `*x`.
 * This defaults to gcc built-in operation (or a no-op for other compilers).
 *
 * `GC_PREFETCH_FOR_WRITE(x)` is used if `*x` is about to be written.
 *
 * An architecture may also define `CLEAR_DOUBLE(x)` to be a fast way to
 * clear 2 pointers at `GC_malloc`-aligned address `x`.  The default
 * implementation is just to store two `NULL` pointers.
 *
 * `HEAP_START` may be defined as the initial address hint for `mmap`-based
 * allocation.
 */

#ifdef LINUX
/* TODO: FreeBSD too? */
EXTERN_C_END
#  include <features.h> /*< for `__GLIBC__` and `__GLIBC_MINOR__`, at least */
EXTERN_C_BEGIN
#endif

/* Convenient internal macro to test `glibc` version (if compiled against). */
#if defined(__GLIBC__) && defined(__GLIBC_MINOR__)
#  define GC_GLIBC_PREREQ(major, minor) \
    ((__GLIBC__ << 8) + __GLIBC_MINOR__ >= ((major) << 8) + (minor))
#else
#  define GC_GLIBC_PREREQ(major, minor) 0 /*< `FALSE` */
#endif

/*
 * Align a `ptr_t` pointer down/up to a given boundary.  The latter should
 * be a power of two.
 */
#if GC_CLANG_PREREQ(11, 0)
#  define PTR_ALIGN_DOWN(p, b) __builtin_align_down(p, b)
#  if defined(DARWIN) && defined(GC_SINGLE_OBJ_BUILD) && GC_CLANG_PREREQ(17, 0)
/* Workaround a crash in Apple clang-17. */
/* TODO: Disable for later clang versions when the bug is fixed. */
#    define PTR_ALIGN_UP(p, b) \
      ((ptr_t)__builtin_align_up((GC_uintptr_t)(p), b))
#  else
#    define PTR_ALIGN_UP(p, b) __builtin_align_up(p, b)
#  endif
#else
#  define PTR_ALIGN_DOWN(p, b) \
    ((ptr_t)((GC_uintptr_t)(p) & ~((GC_uintptr_t)(b) - (GC_uintptr_t)1)))
#  define PTR_ALIGN_UP(p, b)                                           \
    ((ptr_t)(((GC_uintptr_t)(p) + (GC_uintptr_t)(b) - (GC_uintptr_t)1) \
             & ~((GC_uintptr_t)(b) - (GC_uintptr_t)1)))
#endif

/*
 * If available, we can use `__builtin_unwind_init()` to push the relevant
 * registers onto the stack.
 */
#if GC_GNUC_PREREQ(2, 8)                                                   \
    && !GC_GNUC_PREREQ(11, 0) /*< broken at least in 11.2.0 on cygwin64 */ \
    && !defined(__INTEL_COMPILER) && !defined(__PATHCC__)                  \
    && !defined(__FUJITSU)                    /*< for FX10 system */       \
    && !(defined(POWERPC) && defined(DARWIN)) /*< for MacOS X 10.3.9 */    \
    && !defined(E2K) && !defined(RTEMS)                                    \
    && !defined(__ARMCC_VERSION) /*< does not exist in armcc gnu emu */    \
    && !(defined(__clang__)                                                \
         && defined(__ARM_ARCH_5TE__) /* clang-19 emits `vpush`/`vpop` */) \
    && (!defined(__clang__)                                                \
        || GC_CLANG_PREREQ(8, 0) /* was no-op in clang-3 at least */)
#  define HAVE_BUILTIN_UNWIND_INIT
#endif

#if (defined(__CC_ARM) || defined(CX_UX) || defined(DJGPP) || defined(EMBOX) \
     || defined(EWS4800) || defined(LINUX) || defined(OS2) || defined(RTEMS) \
     || defined(UTS4) || defined(MSWIN32) || defined(MSWINCE)                \
     || (defined(NOSYS) && defined(RISCV)))                                  \
    && !defined(NO_UNDERSCORE_SETJMP)
#  define NO_UNDERSCORE_SETJMP
#endif

/*
 * The common OS-specific definitions.  Should be applicable to all
 * (or most, at least) supported architectures.
 */

#ifdef CYGWIN32
#  define OS_TYPE "CYGWIN32"
#  define RETRY_GET_THREAD_CONTEXT
#  ifdef USE_WINALLOC
#    define GWW_VDB
#  elif defined(USE_MMAP)
#    define USE_MMAP_ANON
#  endif
#endif /* CYGWIN32 */

#ifdef COSMO
#  define OS_TYPE "COSMO"
#  ifndef USE_GET_STACKBASE_FOR_MAIN
#    define USE_GET_STACKBASE_FOR_MAIN
#  endif
extern int __data_start[] __attribute__((__weak__));
#  define DATASTART ((ptr_t)__data_start)
extern int _end[];
#  define DATAEND ((ptr_t)_end)
#  define USE_MMAP_ANON
#  ifndef HAVE_CLOCK_GETTIME
#    define HAVE_CLOCK_GETTIME 1
#  endif
#  ifndef HAVE_PTHREAD_SETNAME_NP_WITH_TID
/* Normally should be defined by `configure`, etc. */
#    define HAVE_PTHREAD_SETNAME_NP_WITH_TID 1
#  endif
#  if !defined(GC_THREADS) || defined(NO_HANDLE_FORK) \
      || defined(GC_NO_CAN_CALL_ATFORK)
#    define MPROTECT_VDB
/* FIXME: Otherwise `gctest` crashes in child process. */
#  endif
/* FIXME: A deadlock occurs in markers, thus disabled for now. */
#  undef PARALLEL_MARK
#endif /* COSMO */

#ifdef DARWIN
#  define OS_TYPE "DARWIN"
#  define DYNAMIC_LOADING
/*
 * TODO: See `get_end(3)`, `get_etext` and `get_end` should not be used.
 * These are not used when `dyld` support is enabled (it is the default).
 */
#  define DATASTART ((ptr_t)get_etext())
#  define DATAEND ((ptr_t)get_end())
#  define USE_MMAP_ANON
/* There seems to be some issues with try-lock hanging on Darwin. */
/* TODO: This should be looked into some more. */
#  define NO_PTHREAD_TRYLOCK
#  ifndef TARGET_OS_XR
#    define TARGET_OS_XR 0
#  endif
#  ifndef TARGET_OS_VISION
#    define TARGET_OS_VISION 0
#  endif
#endif /* DARWIN */

#ifdef EMBOX
#  define OS_TYPE "EMBOX"
extern int _modules_data_start[], _apps_bss_end[];
#  define DATASTART ((ptr_t)_modules_data_start)
#  define DATAEND ((ptr_t)_apps_bss_end)
/*
 * Note: the designated area might be quite large (several dozens of MBs)
 * as it includes `.data` and `.bss` of all apps and modules of the built
 * binary image.
 */
#endif /* EMBOX */

#ifdef FREEBSD
#  define OS_TYPE "FREEBSD"
#  define SPECIFIC_MAIN_STACKBOTTOM
#  ifdef __ELF__
#    define DYNAMIC_LOADING
#  endif
#  ifndef USE_MMAP
/* `sbrk()` is not available. */
#    define USE_MMAP 1
#  endif
#  if !defined(ALPHA) && !defined(SPARC)
extern char etext[];
#    define DATASTART GC_SysVGetDataStart(0x1000, (ptr_t)etext)
#    define DATASTART_USES_XGETDATASTART
#    ifndef REDIRECT_MALLOC
#      define MPROTECT_VDB
#    else
/* Similar as on Linux, `fread()` might use `malloc()`. */
#    endif
#  endif
#endif /* FREEBSD */

#ifdef HAIKU
#  define OS_TYPE "HAIKU"
#  define DYNAMIC_LOADING
/* Note: `DATASTART` is not used really, see `GC_register_main_static_data`. */
extern int etext[];
#  define DATASTART PTR_ALIGN_UP((ptr_t)etext, 0x1000)
#  ifndef USE_GET_STACKBASE_FOR_MAIN
#    define USE_GET_STACKBASE_FOR_MAIN
#  endif
#  define USE_MMAP_ANON
/*
 * On Haiku R1, at least, `pthreads` locks never spin but always call
 * into the kernel if the lock cannot be acquired with a simple atomic
 * operation.  (Up to 5x overall performance improvement of the
 * collector is observed by forcing use of spin locks.)
 */
#  ifndef USE_SPIN_LOCK
#    define USE_SPIN_LOCK
#  endif
/*
 * TODO: `MPROTECT_VDB` is not working correctly on anything other than
 * recent nightly Haiku OS builds (as of Nov 2024), and also it is
 * considerably slower than regular collecting, so do not enable it for now.
 */
EXTERN_C_END
#  include <OS.h>
EXTERN_C_BEGIN
#  define GETPAGESIZE() (unsigned)B_PAGE_SIZE
#  ifndef HAVE_CLOCK_GETTIME
#    define HAVE_CLOCK_GETTIME 1
#  endif
#endif /* HAIKU */

#ifdef HPUX
#  define OS_TYPE "HPUX"
extern int __data_start[];
#  define DATASTART ((ptr_t)__data_start)
#  ifdef USE_MMAP
#    define USE_MMAP_ANON
#  endif
#  define DYNAMIC_LOADING
#  define GETPAGESIZE() (unsigned)sysconf(_SC_PAGE_SIZE)
#endif /* HPUX */

#ifdef HURD
#  define OS_TYPE "HURD"
#  define HEURISTIC2
#  define SEARCH_FOR_DATA_START
extern int _end[];
#  define DATAEND ((ptr_t)_end)
/* TODO: `MPROTECT_VDB` is not quite working yet? */
#  define DYNAMIC_LOADING
#  define USE_MMAP_ANON
#endif /* HURD */

#ifdef LINUX
#  define OS_TYPE "LINUX"
#  if defined(FORCE_MPROTECT_BEFORE_MADVISE) || defined(PREFER_MMAP_PROT_NONE)
#    define COUNT_UNMAPPED_REGIONS
#  endif
#  define RETRY_TKILL_ON_EAGAIN
#  if !defined(MIPS) && !defined(POWERPC)
#    define SPECIFIC_MAIN_STACKBOTTOM
#  endif
#  if defined(__ELF__) && !defined(IA64)
#    define DYNAMIC_LOADING
#  endif
#  if defined(__ELF__) && !defined(ARC) && !defined(RISCV) && !defined(S390) \
      && !defined(TILEGX) && !defined(TILEPRO)
extern int _end[];
#    define DATAEND ((ptr_t)_end)
#  endif
#  if !defined(REDIRECT_MALLOC) && !defined(E2K)
/* Requires Linux 2.3.47 or later. */
#    define MPROTECT_VDB
#  else
/*
 * We seem to get random errors in the incremental mode, possibly because
 * the Linux threads implementation itself is a `malloc` client and cannot
 * deal with the signals.  `fread()` uses `malloc()` too.
 * In case of e2k, unless `-fsemi-spec-ld` (or `-O0`) option is passed
 * to gcc (both when compiling the collector library and the client),
 * a semi-speculative optimization may lead to `SIGILL` (with `ILL_ILLOPN`
 * `si_code`) instead of `SIGSEGV`.
 */
#  endif
#endif /* LINUX */

#ifdef KOS
#  define OS_TYPE "KOS"
#  ifndef USE_GET_STACKBASE_FOR_MAIN
/* Note: this requires `-lpthread` option. */
#    define USE_GET_STACKBASE_FOR_MAIN
#  endif
extern int __data_start[];
#  define DATASTART ((ptr_t)__data_start)
#endif /* KOS */

#ifdef MSWIN32
#  define OS_TYPE "MSWIN32"
/* `STACKBOTTOM` and `DATASTART` are handled specially in `os_dep.c` file. */
#  if !defined(CPPCHECK)
#    define DATAEND /*< not needed */
#  endif
#  if defined(USE_GLOBAL_ALLOC) && !defined(MSWINRT_FLAVOR)
/* Cannot pass `MEM_WRITE_WATCH` to `GlobalAlloc()`. */
#  else
#    define GWW_VDB
#  endif
#endif

#ifdef MSWINCE
#  define OS_TYPE "MSWINCE"
#  if !defined(CPPCHECK)
#    define DATAEND /*< not needed */
#  endif
#endif

#ifdef NACL
#  define OS_TYPE "NACL"
#  if defined(__GLIBC__)
#    define DYNAMIC_LOADING
#  endif
#  define DATASTART MAKE_CPTR(0x10020000)
extern int _end[];
#  define DATAEND ((ptr_t)_end)
#  define STACK_GRAN 0x10000
#  define HEURISTIC1
#  define NO_PTHREAD_GETATTR_NP
#  define USE_MMAP_ANON
/* FIXME: Not real page size. */
#  define GETPAGESIZE() 65536
#  define MAX_NACL_GC_THREADS 1024
#endif /* NACL */

#ifdef NETBSD
#  define OS_TYPE "NETBSD"
#  define HEURISTIC2
#  ifdef __ELF__
#    define SEARCH_FOR_DATA_START
#    define DYNAMIC_LOADING
#  elif !defined(MIPS)
/* TODO: Probably do not exclude it. */
extern char etext[];
#    define DATASTART ((ptr_t)etext)
#  endif
#  define MPROTECT_VDB
#endif /* NETBSD */

#ifdef NEXT
#  define OS_TYPE "NEXT"
#  define DATASTART ((ptr_t)get_etext())
#  define DATAEND /*< not needed */
#  undef USE_MUNMAP
#endif

#ifdef OPENBSD
#  define OS_TYPE "OPENBSD"
#  ifndef GC_THREADS
#    define HEURISTIC2
#  endif
#  ifdef __ELF__
extern int __data_start[], _end[];
#    define DATASTART ((ptr_t)__data_start)
#    define DATAEND ((ptr_t)_end)
#    define DYNAMIC_LOADING
#  else
extern char etext[];
#    define DATASTART ((ptr_t)etext)
#  endif
#  define MPROTECT_VDB
#endif /* OPENBSD */

#ifdef QNX
#  define OS_TYPE "QNX"
#  define SA_RESTART 0
#  ifndef SPECIFIC_MAIN_STACKBOTTOM
/* TODO: This is not used by default. */
#    define STACK_GRAN 0x1000000
#    define HEURISTIC1
#  endif
extern char etext[];
#  define DATASTART ((ptr_t)etext)
extern int _end[];
#  define DATAEND ((ptr_t)_end)
#endif /* QNX */

#ifdef SERENITY
#  define OS_TYPE "SERENITY"
extern int etext[], _end[];
#  define DATASTART PTR_ALIGN_UP((ptr_t)etext, 0x1000)
#  define DATAEND ((ptr_t)_end)
#  define DYNAMIC_LOADING
/* TODO: Enable `mprotect`-based VDB. */
#  define USE_MMAP_ANON
#endif /* SERENITY */

#ifdef SOLARIS
#  define OS_TYPE "SOLARIS"
extern int _end[];
#  define DATAEND ((ptr_t)_end)
#  if !defined(USE_MMAP) && defined(REDIRECT_MALLOC)
#    define USE_MMAP 1
/*
 * Otherwise we now use `calloc()`.  `mmap()` may result in the heap
 * interleaved with thread stacks, which can result in excessive
 * black-listing.  `sbrk()` is unusable since it does not interact
 * correctly with the system `malloc()`.
 */
#  endif
#  ifdef USE_MMAP
#    define HEAP_START ((word)0x40000000)
#  else
#    define HEAP_START ADDR(DATAEND)
#  endif
#  ifndef GC_THREADS
#    define MPROTECT_VDB
#  endif
#  define DYNAMIC_LOADING
/*
 * Define `STACKBOTTOM` as `(ptr_t)_start` worked through 2.7, but
 * reportedly breaks under 2.8.  It appears that the stack base is
 * a property of the executable, so this should not break old executables.
 * `HEURISTIC1` reportedly no longer works under Solaris 2.7.
 * `HEURISTIC2` probably works, but this appears to be preferable.
 * Apparently `USRSTACK` is defined to be `USERLIMIT`, but in some
 * installations that is undefined.  We work around this with a gross hack.
 */
EXTERN_C_END
#  include <sys/vmparam.h>
EXTERN_C_BEGIN
#  ifdef USERLIMIT
/* This should work everywhere, but does not. */
#    define STACKBOTTOM ((ptr_t)USRSTACK)
#  else
#    define HEURISTIC2
#  endif
#endif /* SOLARIS */

#ifdef SYMBIAN
#  define OS_TYPE "SYMBIAN"
#  define DATASTART ((ptr_t)ALIGNMENT) /*< cannot be `NULL` */
#  define DATAEND ((ptr_t)ALIGNMENT)
#  ifndef USE_MMAP
/* `sbrk()` is not available. */
#    define USE_MMAP 1
#  endif
#endif /* SYMBIAN */

/*
 * Below are the definitions specific to each supported architecture
 * and OS, grouped by the former.
 */

#ifdef M68K
#  define MACH_TYPE "M68K"
#  define CPP_WORDSZ 32
#  define ALIGNMENT 2 /*< not 4 */
#  ifdef OPENBSD
/* Nothing specific. */
#  endif
#  ifdef NETBSD
/* Nothing specific. */
#  endif
#  ifdef LINUX
#    ifdef __ELF__
#      if GC_GLIBC_PREREQ(2, 0)
#        define SEARCH_FOR_DATA_START
#      else
/*
 * Hideous kludge: `__environ` is the first word in platform `crt0.o` file,
 * and delimits the start of the data segment, no matter which `ld` options
 * were passed through.  We could use `_etext` instead, but that would
 * include `.rodata`, which may contain large read-only data tables that
 * we would rather not scan.
 */
extern char **__environ;
#        define DATASTART ((ptr_t)(&__environ))
#      endif
#    else
extern int etext[];
#      define DATASTART PTR_ALIGN_UP((ptr_t)etext, 0x1000)
#    endif
#  endif
#  ifdef NEXT
#    define STACKBOTTOM MAKE_CPTR(0x4000000)
#  endif
#endif

#ifdef POWERPC
#  define MACH_TYPE "POWERPC"
#  ifdef LINUX
#    if defined(__powerpc64__)
#      define CPP_WORDSZ 64
#      ifndef HBLKSIZE
#        define HBLKSIZE 4096
#      endif
#    else
#      define CPP_WORDSZ 32
#    endif
/*
 * `HEURISTIC1` has been reliably reported to fail for a 32-bit executable
 * on a 64-bit kernel.
 */
#    if defined(__bg__)
/*
 * The Linux Compute Node Kernel (used on BlueGene systems) does not
 * support the `SPECIFIC_MAIN_STACKBOTTOM` way.
 */
#      define HEURISTIC2
#      define NO_PTHREAD_GETATTR_NP
#    else
#      define SPECIFIC_MAIN_STACKBOTTOM
#    endif
#    define SEARCH_FOR_DATA_START
#    ifndef SOFT_VDB
#      define SOFT_VDB
#    endif
#  endif
#  ifdef DARWIN
#    if defined(__ppc64__)
#      define CPP_WORDSZ 64
#      define STACKBOTTOM MAKE_CPTR(0x7fff5fc00000)
#      define CACHE_LINE_SIZE 64
#      ifndef HBLKSIZE
#        define HBLKSIZE 4096
#      endif
#    else
#      define CPP_WORDSZ 32
#      define STACKBOTTOM MAKE_CPTR(0xc0000000)
#    endif
#    define MPROTECT_VDB
#    if defined(USE_PPC_PREFETCH) && defined(__GNUC__)
/* The performance impact of prefetches is untested. */
#      define PREFETCH(x) \
        __asm__ __volatile__("dcbt 0,%0" : : "r"((const void *)(x)))
#      define GC_PREFETCH_FOR_WRITE(x) \
        __asm__ __volatile__("dcbtst 0,%0" : : "r"((const void *)(x)))
#    endif
#  endif
#  ifdef OPENBSD
#    if defined(__powerpc64__)
#      define CPP_WORDSZ 64
#    else
#      define CPP_WORDSZ 32
#    endif
#  endif
#  ifdef FREEBSD
#    if defined(__powerpc64__)
#      define CPP_WORDSZ 64
#      ifndef HBLKSIZE
#        define HBLKSIZE 4096
#      endif
#    else
#      define CPP_WORDSZ 32
#    endif
#  endif
#  ifdef NETBSD
#    define CPP_WORDSZ 32
#  endif
#  ifdef SN_TARGET_PS3
#    define OS_TYPE "SN_TARGET_PS3"
#    define CPP_WORDSZ 32
#    define NO_GETENV
extern int _end[], __bss_start;
#    define DATASTART ((ptr_t)__bss_start)
#    define DATAEND ((ptr_t)_end)
#    define STACKBOTTOM ((ptr_t)ps3_get_stack_bottom())
void *ps3_get_mem(size_t lb);
#    define GET_MEM(lb) ps3_get_mem(lb)
/*
 * The current `LOCK()` implementation for PS3 explicitly uses
 * `pthread_mutex_lock()` for some reason.
 */
#    define NO_PTHREAD_TRYLOCK
#  endif
#  ifdef AIX
#    define OS_TYPE "AIX"
#    undef ALIGNMENT /*< in case it is defined */
#    undef IA64
/*
 * DOB: some AIX installs stupidly define `IA64` in platform
 * `sys/systemcfg.h` file.
 */
#    ifdef __64BIT__
#      define CPP_WORDSZ 64
#      define STACKBOTTOM MAKE_CPTR(0x1000000000000000)
#    else
#      define CPP_WORDSZ 32
extern int errno;
#      define STACKBOTTOM ((ptr_t)(&errno))
#    endif
#    define USE_MMAP_ANON
/*
 * From AIX linker man page:
 *   - `_text` specifies the first location of the program;
 *   - `_etext` specifies the first location after the program;
 *   - `_data` specifies the first location of the data;
 *   - `_edata` specifies the first location after the initialized data;
 *   - `_end` (or `end`) specifies the first location after all data.
 */
extern int _data[], _end[];
#    define DATASTART ((ptr_t)_data)
#    define DATAEND ((ptr_t)_end)
#    define MPROTECT_VDB
#    define DYNAMIC_LOADING
/*
 * Note: for really old versions of AIX, `DYNAMIC_LOADING` may have to
 * be removed.
 */
#  endif
#  ifdef NOSYS
#    define OS_TYPE "NOSYS"
#    define CPP_WORDSZ 32
extern void __end[], __dso_handle[];
#    define DATASTART ((ptr_t)__dso_handle) /*< OK, that is ugly */
#    define DATAEND ((ptr_t)__end)
/* Note: stack starts at 0xE0000000 for the simulator. */
#    define STACKBOTTOM PTR_ALIGN_UP(GC_approx_sp(), 0x10000000)
#  endif
#endif /* POWERPC */

#ifdef VAX
#  define MACH_TYPE "VAX"
#  define CPP_WORDSZ 32
/* Pointers are `longword`-aligned by C compiler v4.2. */
extern char etext[];
#  define DATASTART ((ptr_t)etext)
#  ifdef BSD
#    define OS_TYPE "BSD"
#    define STACK_GRAN 0x1000000
#    define HEURISTIC1
/* Note: `HEURISTIC2` may be OK, but it is hard to test. */
#  endif
#  ifdef ULTRIX
#    define OS_TYPE "ULTRIX"
#    define STACKBOTTOM MAKE_CPTR(0x7fffc800)
#  endif
#endif /* VAX */

#ifdef SPARC
#  define MACH_TYPE "SPARC"
#  if defined(__arch64__) || defined(__sparcv9)
#    define CPP_WORDSZ 64
#    define ELF_CLASS ELFCLASS64
#  else
#    define CPP_WORDSZ 32
#    define ALIGNMENT 4 /*< required by hardware */
#  endif
#  ifdef SOLARIS
extern int _etext[];
#    define DATASTART GC_SysVGetDataStart(0x10000, (ptr_t)_etext)
#    define PROC_VDB
/*
 * `getpagesize()` appeared to be missing from at least one Solaris 5.4
 * installation.  Weird.
 */
#    define GETPAGESIZE() (unsigned)sysconf(_SC_PAGESIZE)
#  endif
#  ifdef DRSNX
#    define OS_TYPE "DRSNX"
extern int etext[];
#    define DATASTART GC_SysVGetDataStart(0x10000, (ptr_t)etext)
#    define MPROTECT_VDB
#    define STACKBOTTOM MAKE_CPTR(0xdfff0000)
#    define DYNAMIC_LOADING
#  endif
#  ifdef LINUX
extern int _etext[];
#    ifdef __arch64__
#      define DATASTART GC_SysVGetDataStart(0x100000, (ptr_t)_etext)
#    else
#      define DATASTART GC_SysVGetDataStart(0x10000, (ptr_t)_etext)
#    endif
#  endif
#  ifdef OPENBSD
/* Nothing specific. */
#  endif
#  ifdef NETBSD
/* Nothing specific. */
#  endif
#  ifdef FREEBSD
extern char etext[];
#    define DATASTART ((ptr_t)(&etext))
#    define DATAEND ((ptr_t)GC_find_limit(DATASTART, TRUE))
#    define DATAEND_IS_FUNC
#    define GC_HAVE_DATAREGION2
extern char edata[], end[];
#    define DATASTART2 ((ptr_t)(&edata))
#    define DATAEND2 ((ptr_t)(&end))
#  endif
#endif /* SPARC */

#ifdef I386
#  define MACH_TYPE "I386"
#  if (defined(__LP64__) || defined(_WIN64)) && !defined(CPPCHECK)
#    error This should be handled as X86_64
#  endif
#  define CPP_WORDSZ 32
/*
 * The 4-byte alignment appears to hold for all 32-bit compilers
 * except Borland and Watcom.  If using the Borland (bcc32) or
 * Watcom (wcc386) compiler, `-a4` or `-zp4` option, respectively,
 * should be passed to the compiler, both for building the library
 * and client code.  (The alternate solution is to define
 * `FORCE_ALIGNMENT_ONE` macro but this would have significant
 * negative performance implications.)
 */
#  if defined(FORCE_ALIGNMENT_ONE) \
      && (defined(__BORLANDC__) || defined(__WATCOMC__))
#    define ALIGNMENT 1
#  endif
#  ifdef SEQUENT
#    define OS_TYPE "SEQUENT"
extern int etext[];
#    define DATASTART PTR_ALIGN_UP((ptr_t)etext, 0x1000)
#    define STACKBOTTOM MAKE_CPTR(0x3ffff000)
#  endif
#  ifdef HAIKU
/* Nothing specific. */
#  endif
#  ifdef HURD
/* Nothing specific. */
#  endif
#  ifdef EMBOX
/* Nothing specific. */
#  endif
#  ifdef NACL
/* Nothing specific. */
#  endif
#  ifdef QNX
/* Nothing specific. */
#  endif
#  ifdef SERENITY
/* Nothing specific. */
#  endif
#  ifdef SOLARIS
extern int _etext[];
#    define DATASTART GC_SysVGetDataStart(0x1000, (ptr_t)_etext)
#    define PROC_VDB
#  endif
#  ifdef SCO
#    define OS_TYPE "SCO"
extern int etext[];
#    define DATASTART \
      (PTR_ALIGN_UP((ptr_t)etext, 0x400000) + (ADDR(etext) & 0xfff))
#    define STACKBOTTOM MAKE_CPTR(0x7ffffffc)
#  endif
#  ifdef SCO_ELF
#    define OS_TYPE "SCO_ELF"
extern int etext[];
#    define DATASTART ((ptr_t)etext)
#    define STACKBOTTOM MAKE_CPTR(0x8048000)
#    define DYNAMIC_LOADING
#    define ELF_CLASS ELFCLASS32
#  endif
#  ifdef DGUX
#    define OS_TYPE "DGUX"
extern int _etext, _end;
#    define DATASTART GC_SysVGetDataStart(0x1000, (ptr_t)(&_etext))
#    define DATASTART_USES_XGETDATASTART
#    define DATAEND ((ptr_t)(&_end))
#    define HEURISTIC2
#    define DYNAMIC_LOADING
#    ifndef USE_MMAP
#      define USE_MMAP 1
#    endif
#    define MAP_FAILED ((void *)(~(GC_uintptr_t)0))
#    define HEAP_START ((word)0x40000000)
#  endif /* DGUX */
#  ifdef LINUX
/*
 * This encourages `mmap()` to give us low addresses, thus allowing the
 * heap to grow to ~3 GB.
 */
#    define HEAP_START ((word)0x1000)
#    ifdef __ELF__
#      if GC_GLIBC_PREREQ(2, 0) || defined(HOST_ANDROID)
#        define SEARCH_FOR_DATA_START
#      else
/* See the comment of the Linux/m68k case. */
extern char **__environ;
#        define DATASTART ((ptr_t)(&__environ))
#      endif
#      if !defined(GC_NO_SIGSETJMP)                                  \
          && (defined(HOST_TIZEN)                                    \
              || (defined(HOST_ANDROID)                              \
                  && !(GC_GNUC_PREREQ(4, 8) || GC_CLANG_PREREQ(3, 2) \
                       || __ANDROID_API__ >= 18)))
/*
 * Older Android NDK releases lack `sigsetjmp` in x86 `libc` (`setjmp` is
 * used instead to find `data_start`).  The bug is fixed in Android NDK r8e
 * (so, it is OK to use `sigsetjmp` if gcc-4.8+, clang-3.2+ or Android
 * API level 18+).
 */
#        define GC_NO_SIGSETJMP 1
#      endif
#    else
extern int etext[];
#      define DATASTART PTR_ALIGN_UP((ptr_t)etext, 0x1000)
#    endif
#    ifdef USE_I686_PREFETCH
/*
 * Empirically `prefetcht0` is much more effective at reducing cache miss
 * stalls for the targeted load instructions.  But it seems to interfere
 * enough with other cache traffic that the net result is worse than
 * `prefetchnta`.
 */
#      define PREFETCH(x) \
        __asm__ __volatile__("prefetchnta %0" : : "m"(*(char *)(x)))
#      ifdef FORCE_WRITE_PREFETCH
/*
 * Using prefetches for write seems to have a slight negative impact on
 * performance, at least for a PIII/500.
 */
#        define GC_PREFETCH_FOR_WRITE(x) \
          __asm__ __volatile__("prefetcht0 %0" : : "m"(*(char *)(x)))
#      else
#        define GC_NO_PREFETCH_FOR_WRITE
#      endif
#    elif defined(USE_3DNOW_PREFETCH)
#      define PREFETCH(x) \
        __asm__ __volatile__("prefetch %0" : : "m"(*(char *)(x)))
#      define GC_PREFETCH_FOR_WRITE(x) \
        __asm__ __volatile__("prefetchw %0" : : "m"(*(char *)(x)))
#    endif
#    if defined(__GLIBC__) && !defined(__UCLIBC__) \
        && !defined(GLIBC_TSX_BUG_FIXED)
/* Workaround lock elision implementation for some `glibc`. */
#      define GLIBC_2_19_TSX_BUG
EXTERN_C_END
#      include <gnu/libc-version.h> /*< for `gnu_get_libc_version()` */
EXTERN_C_BEGIN
#    endif
#    ifndef SOFT_VDB
#      define SOFT_VDB
#    endif
#  endif
#  ifdef CYGWIN32
#    define WOW64_THREAD_CONTEXT_WORKAROUND
#    define DATASTART ((ptr_t)GC_DATASTART) /*< defined in `gc.h` file */
#    define DATAEND ((ptr_t)GC_DATAEND)
#    ifndef USE_WINALLOC
/* `MPROTECT_VDB` does not work, it leads to a spurious exit. */
#    endif
#  endif
#  ifdef INTERIX
#    define OS_TYPE "INTERIX"
extern int _data_start__[], _bss_end__[];
#    define DATASTART ((ptr_t)_data_start__)
#    define DATAEND ((ptr_t)_bss_end__)
#    define STACKBOTTOM                                        \
      ({                                                       \
        ptr_t rv;                                              \
        __asm__ __volatile__("movl %%fs:4, %%eax" : "=a"(rv)); \
        rv;                                                    \
      })
#    define USE_MMAP_ANON
#  endif
#  ifdef OS2
#    define OS_TYPE "OS2"
/*
 * `STACKBOTTOM` and `DATASTART` are handled specially in `os_dep.c`
 * file.  OS/2 actually has the right system call!
 */
#    define DATAEND /*< not needed */
#    undef USE_MUNMAP
#    define GETPAGESIZE() os2_getpagesize()
#  endif
#  ifdef MSWIN32
#    define WOW64_THREAD_CONTEXT_WORKAROUND
#    define RETRY_GET_THREAD_CONTEXT
#    if defined(__BORLANDC__)
/*
 * TODO: VDB based on `VirtualProtect` and `SetUnhandledExceptionFilter`
 * does not work correctly.
 */
#    else
#      define MPROTECT_VDB
#    endif
#  endif
#  ifdef MSWINCE
/* Nothing specific. */
#  endif
#  ifdef DJGPP
#    define OS_TYPE "DJGPP"
EXTERN_C_END
#    include "stubinfo.h"
EXTERN_C_BEGIN
extern int etext[];
#    define DATASTART PTR_ALIGN_UP((ptr_t)etext, 0x200)
extern int __djgpp_stack_limit, _stklen;
#    define STACKBOTTOM (MAKE_CPTR(__djgpp_stack_limit) + _stklen)
#  endif
#  ifdef OPENBSD
/* Nothing specific. */
#  endif
#  ifdef FREEBSD
#    if defined(__GLIBC__)
extern int _end[];
#      define DATAEND ((ptr_t)_end)
#    endif
#  endif
#  ifdef NETBSD
/* Nothing specific. */
#  endif
#  ifdef THREE86BSD
#    define OS_TYPE "THREE86BSD"
#    define HEURISTIC2
extern char etext[];
#    define DATASTART ((ptr_t)etext)
#  endif
#  ifdef BSDI
#    define OS_TYPE "BSDI"
#    define HEURISTIC2
extern char etext[];
#    define DATASTART ((ptr_t)etext)
#  endif
#  ifdef NEXT
#    define STACKBOTTOM MAKE_CPTR(0xc0000000)
#  endif
#  ifdef RTEMS
#    define OS_TYPE "RTEMS"
EXTERN_C_END
#    include <sys/unistd.h>
EXTERN_C_BEGIN
extern int etext[];
#    define DATASTART ((ptr_t)etext)
void *rtems_get_stack_bottom(void);
#    define InitStackBottom rtems_get_stack_bottom()
#    define STACKBOTTOM ((ptr_t)InitStackBottom)
#    undef USE_MUNMAP
#  endif
#  ifdef DOS4GW
#    define OS_TYPE "DOS4GW"
extern long __nullarea;
extern char _end;
extern char *_STACKTOP;
/*
 * Depending on calling conventions Watcom C either precedes or does not
 * precede the names of the C variables with an underscore.
 * Make sure the startup code variables always have the same name.
 */
#    pragma aux __nullarea "*";
#    pragma aux _end "*";
#    define STACKBOTTOM ((ptr_t)_STACKTOP) /*< confused? me too */
#    define DATASTART ((ptr_t)(&__nullarea))
#    define DATAEND ((ptr_t)(&_end))
#    undef USE_MUNMAP
#    define GETPAGESIZE() 4096
#  endif
#  ifdef DARWIN
#    define DARWIN_DONT_PARSE_STACK 1
#    define STACKBOTTOM MAKE_CPTR(0xc0000000)
#    define MPROTECT_VDB
#  endif
#endif /* I386 */

#ifdef LOONGARCH
#  define MACH_TYPE "LOONGARCH"
#  define CPP_WORDSZ (__SIZEOF_SIZE_T__ * 8)
#  ifdef LINUX
#    pragma weak __data_start
extern int __data_start[];
#    define DATASTART ((ptr_t)__data_start)
#  endif
#endif /* LOONGARCH */

#ifdef SW_64
#  define MACH_TYPE "SW_64"
#  define CPP_WORDSZ 64
#  ifdef LINUX
/* Nothing specific. */
#  endif
#endif /* SW_64 */

#ifdef MIPS
#  define MACH_TYPE "MIPS"
#  ifdef LINUX
#    ifdef _MIPS_SZPTR
#      define CPP_WORDSZ _MIPS_SZPTR
#    else
#      define CPP_WORDSZ 32
#    endif
#    pragma weak __data_start
extern int __data_start[];
#    define DATASTART ((ptr_t)__data_start)
#    ifndef HBLKSIZE
#      define HBLKSIZE 4096
#    endif
#    if GC_GLIBC_PREREQ(2, 2)
#      define SPECIFIC_MAIN_STACKBOTTOM
#    else
#      define STACKBOTTOM MAKE_CPTR(0x7fff8000)
#    endif
#  endif
#  ifdef EWS4800
#    define OS_TYPE "EWS4800"
#    define HEURISTIC2
#    if defined(_MIPS_SZPTR) && (_MIPS_SZPTR == 64)
#      define CPP_WORDSZ _MIPS_SZPTR
extern int _fdata[], _end[];
#      define DATASTART ((ptr_t)_fdata)
#      define DATAEND ((ptr_t)_end)
#    else
#      define CPP_WORDSZ 32
extern int etext[], edata[];
#      define DATASTART \
        (PTR_ALIGN_UP((ptr_t)etext, 0x40000) + (ADDR(etext) & 0xffff))
#      define DATAEND ((ptr_t)edata)
#      define GC_HAVE_DATAREGION2
extern int _DYNAMIC_LINKING[], _gp[];
#      define DATASTART2                                               \
        (_DYNAMIC_LINKING ? PTR_ALIGN_UP((ptr_t)_gp + 0x8000, 0x40000) \
                          : (ptr_t)edata)
extern int end[];
#      define DATAEND2 ((ptr_t)end)
#    endif
#  endif
#  ifdef ULTRIX
#    define OS_TYPE "ULTRIX"
#    define CPP_WORDSZ 32
#    define HEURISTIC2
/*
 * Note: the actual beginning of the data segment could probably be
 * slightly higher since startup code allocates lots of stuff.
 */
#    define DATASTART MAKE_CPTR(0x10000000)
#  endif
#  ifdef IRIX5
#    define OS_TYPE "IRIX5"
#    ifdef _MIPS_SZPTR
#      define CPP_WORDSZ _MIPS_SZPTR
#    else
#      define CPP_WORDSZ 32
#    endif
#    define HEURISTIC2
extern int _fdata[];
#    define DATASTART ((ptr_t)_fdata)
/*
 * Lowest plausible heap address.  In the `USE_MMAP` case, we map there.
 * In either case it is used to identify heap sections so they are not
 * considered as roots.
 */
#    ifdef USE_MMAP
#      define HEAP_START ((word)0x30000000)
#    else
#      define HEAP_START ADDR(DATASTART)
#    endif
/* `MPROTECT_VDB` should work, but there is evidence of a breakage. */
#    define DYNAMIC_LOADING
#  endif
#  ifdef MSWINCE
#    define CPP_WORDSZ 32
#  endif
#  ifdef NETBSD
#    define CPP_WORDSZ 32
#    ifndef __ELF__
#      define DATASTART MAKE_CPTR(0x10000000)
#      define STACKBOTTOM MAKE_CPTR(0x7ffff000)
#    endif
#  endif
#  ifdef OPENBSD
#    define CPP_WORDSZ 64 /*< all OpenBSD/mips platforms are 64-bit */
#  endif
#  ifdef FREEBSD
#    define CPP_WORDSZ 32
#  endif
#  ifdef NONSTOP
#    define OS_TYPE "NONSTOP"
#    define CPP_WORDSZ 32
#    define DATASTART MAKE_CPTR(0x8000000)
extern char **environ;
#    define DATAEND ((ptr_t)(environ - 0x10))
#    define STACKBOTTOM MAKE_CPTR(0x4fffffff)
#    undef USE_MUNMAP
#  endif
#endif /* MIPS */

#ifdef NIOS2
#  define MACH_TYPE "NIOS2"
#  define CPP_WORDSZ 32
#  ifndef HBLKSIZE
#    define HBLKSIZE 4096
#  endif
#  ifdef LINUX
extern int __data_start[];
#    define DATASTART ((ptr_t)__data_start)
#  endif
#endif /* NIOS2 */

#ifdef OR1K
#  define MACH_TYPE "OR1K"
#  define CPP_WORDSZ 32
#  ifndef HBLKSIZE
#    define HBLKSIZE 4096
#  endif
#  ifdef LINUX
extern int __data_start[];
#    define DATASTART ((ptr_t)__data_start)
#  endif
#endif /* OR1K */

#ifdef HP_PA
#  define MACH_TYPE "HP_PA"
#  ifdef __LP64__
#    define CPP_WORDSZ 64
#  else
#    define CPP_WORDSZ 32
#  endif
#  define STACK_GROWS_UP
#  ifdef HPUX
#    ifndef GC_THREADS
#      define MPROTECT_VDB
#    endif
#    ifdef USE_HPUX_FIXED_STACKBOTTOM
/*
 * The following appears to work for 7xx systems running HP/UX 9.xx.
 * Furthermore, it might result in much faster collections than `HEURISTIC2`,
 * which may involve scanning segments that directly precede the stack.
 * It is not the default, since it may not work on older machine/OS
 * combinations. (Thanks to Raymond X.T. Nijssen for uncovering this.)
 * This technique also does not work with HP/UX 11.xx.  The stack size is
 * settable using the kernel `maxssiz` variable, and the size can be set
 * dynamically in HP/UX 11.23 and later.  It also does not handle
 * `SHMEM_MAGIC` binaries that have stack and data in the first quadrant.
 * This is from platform `/etc/conf/h/param.h` file.
 */
#      define STACKBOTTOM MAKE_CPTR(0x7b033000)
#    elif defined(USE_ENVIRON_POINTER)
/*
 * Gustavo Rodriguez-Rivera suggested changing `HEURISTIC2` to this.
 * Note that the collector must be initialized before the first `putenv()`
 * call.  Unfortunately, some clients do not obey.
 */
extern char **environ;
#      define STACKBOTTOM ((ptr_t)environ)
#    elif !defined(HEURISTIC2)
/* This uses `pst_vm_status` support. */
#      define SPECIFIC_MAIN_STACKBOTTOM
#    endif
#    ifndef __GNUC__
#      define PREFETCH(x)                   \
        do {                                \
          register long addr = (long)(x);   \
          (void)_asm("LDW", 0, 0, addr, 0); \
        } while (0)
#    endif
#  endif /* HPUX */
#  ifdef LINUX
#    define SEARCH_FOR_DATA_START
#  endif
#  ifdef OPENBSD
/* Nothing specific. */
#  endif
#endif /* HP_PA */

#ifdef ALPHA
#  define MACH_TYPE "ALPHA"
#  define CPP_WORDSZ 64
#  ifdef NETBSD
#    define ELFCLASS32 32
#    define ELFCLASS64 64
#    define ELF_CLASS ELFCLASS64
#  endif
#  ifdef OPENBSD
/* Nothing specific. */
#  endif
#  ifdef FREEBSD
extern char etext[];
#    define DATASTART ((ptr_t)(&etext))
#    define DATAEND ((ptr_t)GC_find_limit(DATASTART, TRUE))
#    define DATAEND_IS_FUNC
/*
 * Handle unmapped hole which `alpha*-*-freebsd[45]*` puts between
 * `etext` and `edata`.
 */
#    define GC_HAVE_DATAREGION2
extern char edata[], end[];
#    define DATASTART2 ((ptr_t)(&edata))
#    define DATAEND2 ((ptr_t)(&end))
/* `MPROTECT_VDB` is not yet supported at all on FreeBSD/alpha. */
#  endif
#  ifdef OSF1
#    define OS_TYPE "OSF1"
#    define DATASTART MAKE_CPTR(0x140000000)
extern int _end[];
#    define DATAEND ((ptr_t)(&_end))
extern char **environ;
/*
 * Round up from the value of `environ` to the nearest page boundary.
 * Probably this is broken if `putenv()` is called before the collector
 * initialization.
 */
#    define STACKBOTTOM PTR_ALIGN_UP((ptr_t)environ, getpagesize())
/*
 * Normally `HEURISTIC2` is too conservative, since the text segment
 * immediately follows the stack.  Hence we give an upper bound.
 * This is currently unused, since `HEURISTIC2` is not defined.
 */
extern int __start[];
#    define HEURISTIC2_LIMIT PTR_ALIGN_DOWN((ptr_t)__start, getpagesize())
#    ifndef GC_THREADS
/* FIXME: Unresolved signal issues with threads. */
#      define MPROTECT_VDB
#    endif
#    define DYNAMIC_LOADING
#  endif
#  ifdef LINUX
#    ifdef __ELF__
#      define SEARCH_FOR_DATA_START
#    else
#      define DATASTART MAKE_CPTR(0x140000000)
extern int _end[];
#      define DATAEND ((ptr_t)_end)
#    endif
#  endif
#endif /* ALPHA */

#ifdef IA64
#  define MACH_TYPE "IA64"
#  ifdef HPUX
#    ifdef _ILP32
#      define CPP_WORDSZ 32
/* Note: requires 8-byte alignment (granularity) for `malloc()`. */
#      define ALIGNMENT 4
#    else
#      if !defined(_LP64) && !defined(CPPCHECK)
#        error Unknown ABI
#      endif
#      define CPP_WORDSZ 64
/* Note: requires 16-byte alignment (granularity) for `malloc()`. */
#      define ALIGNMENT 8
#    endif
/*
 * Note that the collector must be initialized before the 1st `putenv`
 * call.
 */
extern char **environ;
#    define STACKBOTTOM ((ptr_t)environ)
/*
 * The following was empirically determined, and is probably not very
 * robust.  Note that the backing store base seems to be at a nice address
 * minus one page.
 */
#    define BACKING_STORE_DISPLACEMENT 0x1000000
#    define BACKING_STORE_ALIGNMENT 0x1000
/* Known to be wrong for recent HP/UX versions!!! */
#  endif
#  ifdef LINUX
#    define CPP_WORDSZ 64
/*
 * The following works on NUE and older kernels:
 * `define STACKBOTTOM MAKE_CPTR(0xa000000000000000l)`.
 */
/* TODO: `SPECIFIC_MAIN_STACKBOTTOM` does not work on NUE. */
/* We also need the base address of the register stack backing store. */
#    define SEARCH_FOR_DATA_START
#    ifdef __GNUC__
#      define DYNAMIC_LOADING
#    else
/*
 * In the Intel compiler environment, we seem to end up with statically
 * linked executables and an undefined reference to `_DYNAMIC`.
 */
#    endif
#    ifdef __GNUC__
#      ifndef __INTEL_COMPILER
#        define PREFETCH(x) __asm__("        lfetch  [%0]" : : "r"(x))
#        define GC_PREFETCH_FOR_WRITE(x) \
          __asm__("        lfetch.excl     [%0]" : : "r"(x))
#        define CLEAR_DOUBLE(x) \
          __asm__("        stf.spill       [%0]=f0" : : "r"((void *)(x)))
#      else
EXTERN_C_END
#        include <ia64intrin.h>
EXTERN_C_BEGIN
#        define PREFETCH(x) __lfetch(__lfhint_none, (x))
#        define GC_PREFETCH_FOR_WRITE(x) __lfetch(__lfhint_nta, (x))
#        define CLEAR_DOUBLE(x) __stf_spill((void *)(x), 0)
#      endif /* __INTEL_COMPILER */
#    endif
#  endif
#  ifdef MSWIN32
/* FIXME: This is a very partial guess.  There is no port, yet. */
#    if defined(_WIN64)
#      define CPP_WORDSZ 64
#    else
/* TODO: Is this possible? */
#      define CPP_WORDSZ 32
#    endif
#  endif
#endif /* IA64 */

#ifdef E2K
#  define MACH_TYPE "E2K"
#  ifdef __LP64__
#    define CPP_WORDSZ 64
#  else
#    define CPP_WORDSZ 32
#  endif
#  ifndef HBLKSIZE
#    define HBLKSIZE 4096
#  endif
#  ifdef LINUX
extern int __dso_handle[];
#    define DATASTART ((ptr_t)__dso_handle)
#    ifdef REDIRECT_MALLOC
#      define NO_PROC_FOR_LIBRARIES
#    endif
#  endif
#endif /* E2K */

#ifdef M88K
#  define MACH_TYPE "M88K"
#  define CPP_WORDSZ 32
#  define STACKBOTTOM MAKE_CPTR(0xf0000000) /*< determined empirically */
extern int etext[];
#  ifdef CX_UX
#    define OS_TYPE "CX_UX"
#    define DATASTART (PTR_ALIGN_UP((ptr_t)etext, 0x400000) + 0x10000)
#  endif
#  ifdef DGUX
#    define OS_TYPE "DGUX"
#    define DATASTART GC_SysVGetDataStart(0x10000, (ptr_t)etext)
#    define DATASTART_USES_XGETDATASTART
#  endif
#endif /* M88K */

#ifdef S370
/*
 * If this still works, and if anyone cares, this should probably be moved
 * to the `S390` category.
 */
#  define MACH_TYPE "S370"
#  define CPP_WORDSZ 32
#  define ALIGNMENT 4 /*< required by hardware */
#  ifdef UTS4
#    define OS_TYPE "UTS4"
extern int _etext[], _end[];
#    define DATASTART GC_SysVGetDataStart(0x10000, (ptr_t)_etext)
#    define DATAEND ((ptr_t)_end)
#    define HEURISTIC2
#  endif
#endif /* S370 */

#ifdef S390
#  define MACH_TYPE "S390"
#  ifndef __s390x__
#    define CPP_WORDSZ 32
#  else
#    define CPP_WORDSZ 64
#    ifndef HBLKSIZE
#      define HBLKSIZE 4096
#    endif
#  endif
#  ifdef LINUX
extern int __data_start[] __attribute__((__weak__));
extern int _end[] __attribute__((__weak__));
#    define DATASTART ((ptr_t)__data_start)
#    define DATAEND ((ptr_t)_end)
#    define CACHE_LINE_SIZE 256
#    define GETPAGESIZE() 4096
#    ifndef SOFT_VDB
#      define SOFT_VDB
#    endif
#  endif
#endif /* S390 */

#ifdef AARCH64
#  define MACH_TYPE "AARCH64"
#  ifdef __ILP32__
#    define CPP_WORDSZ 32
#  else
#    define CPP_WORDSZ 64
#  endif
#  ifndef HBLKSIZE
#    define HBLKSIZE 4096
#  endif
#  ifdef LINUX
#    if defined(HOST_ANDROID)
#      define SEARCH_FOR_DATA_START
#    else
extern int __data_start[] __attribute__((__weak__));
#      define DATASTART ((ptr_t)__data_start)
#    endif
#  endif
#  ifdef COSMO
/* Empty. */
#  endif
#  ifdef DARWIN
/* OS X, iOS, visionOS */
#    define DARWIN_DONT_PARSE_STACK 1
#    define STACKBOTTOM MAKE_CPTR(0x16fdfffff)
#    if (TARGET_OS_IPHONE || TARGET_OS_XR || TARGET_OS_VISION)
/*
 * `MPROTECT_VDB` causes use of non-public API like `exc_server`, this
 * could be a reason for blocking the client application in the store.
 */
#    elif TARGET_OS_OSX
#      define MPROTECT_VDB
#    endif
#  endif
#  ifdef FREEBSD
/* Nothing specific. */
#  endif
#  ifdef NETBSD
#    define ELF_CLASS ELFCLASS64
#  endif
#  ifdef OPENBSD
/* Nothing specific. */
#  endif
#  ifdef NINTENDO_SWITCH
#    define OS_TYPE "NINTENDO_SWITCH"
#    define NO_HANDLE_FORK 1
extern int __bss_end[];
#    define DATASTART ((ptr_t)ALIGNMENT) /*< cannot be `NULL` */
#    define DATAEND ((ptr_t)(&__bss_end))
void *switch_get_stack_bottom(void);
#    define STACKBOTTOM ((ptr_t)switch_get_stack_bottom())
void *switch_get_mem(size_t lb);
#    define GET_MEM(lb) switch_get_mem(lb)
#    define GETPAGESIZE() 4096
#    undef USE_MMAP
#    undef USE_MUNMAP
#    ifndef HAVE_CLOCK_GETTIME
#      define HAVE_CLOCK_GETTIME 1
#    endif
#  endif
#  ifdef KOS
/* Nothing specific. */
#  endif
#  ifdef QNX
/* Nothing specific. */
#  endif
#  ifdef SERENITY
/* Nothing specific. */
#  endif
#  ifdef MSWIN32
/* UWP */
/* TODO: Enable `MPROTECT_VDB`. */
#  endif
#  ifdef NOSYS
#    define OS_TYPE "NOSYS"
/* `__data_start` is usually defined in the target linker script. */
extern int __data_start[];
#    define DATASTART ((ptr_t)__data_start)
extern void *__stack_base__;
#    define STACKBOTTOM ((ptr_t)__stack_base__)
#  endif
#endif /* AARCH64 */

#ifdef ARM32
#  define MACH_TYPE "ARM32"
#  define CPP_WORDSZ 32
#  ifdef LINUX
#    if GC_GLIBC_PREREQ(2, 0) || defined(HOST_ANDROID)
#      define SEARCH_FOR_DATA_START
#    else
/* See the comment of the Linux/m68k case. */
extern char **__environ;
#      define DATASTART ((ptr_t)(&__environ))
#    endif
#  endif
#  ifdef MSWINCE
/* Nothing specific. */
#  endif
#  ifdef FREEBSD
/* Nothing specific. */
#  endif
#  ifdef DARWIN
/* iOS */
#    define DARWIN_DONT_PARSE_STACK 1
#    define STACKBOTTOM MAKE_CPTR(0x30000000)
/* `MPROTECT_VDB` causes use of non-public API. */
#  endif
#  ifdef NETBSD
/* Nothing specific. */
#  endif
#  ifdef OPENBSD
/* Nothing specific. */
#  endif
#  ifdef QNX
/* Nothing specific. */
#  endif
#  ifdef SN_TARGET_PSP2
#    define OS_TYPE "SN_TARGET_PSP2"
#    define NO_HANDLE_FORK 1
#    ifndef HBLKSIZE
#      define HBLKSIZE 65536 /*< page size is 64 KB */
#    endif
#    define DATASTART ((ptr_t)ALIGNMENT)
#    define DATAEND ((ptr_t)ALIGNMENT)
void *psp2_get_stack_bottom(void);
#    define STACKBOTTOM ((ptr_t)psp2_get_stack_bottom())
void *psp2_get_mem(size_t lb);
#    define GET_MEM(lb) psp2_get_mem(lb)
#  endif
#  ifdef NN_PLATFORM_CTR
#    define OS_TYPE "NN_PLATFORM_CTR"
extern unsigned char Image$$ZI$$ZI$$Base[];
#    define DATASTART ((ptr_t)Image$$ZI$$ZI$$Base)
extern unsigned char Image$$ZI$$ZI$$Limit[];
#    define DATAEND ((ptr_t)Image$$ZI$$ZI$$Limit)
void *n3ds_get_stack_bottom(void);
#    define STACKBOTTOM ((ptr_t)n3ds_get_stack_bottom())
#  endif
#  ifdef MSWIN32
/* UWP */
/* TODO: Enable `MPROTECT_VDB`. */
#  endif
#  ifdef NOSYS
#    define OS_TYPE "NOSYS"
/* `__data_start` is usually defined in the target linker script. */
extern int __data_start[];
#    define DATASTART ((ptr_t)__data_start)
/* `__stack_base__` is set in platform `newlib/libc/sys/arm/crt0.S` file. */
extern void *__stack_base__;
#    define STACKBOTTOM ((ptr_t)__stack_base__)
#  endif
#  ifdef SYMBIAN
/* Nothing specific. */
#  endif
#endif /* ARM32 */

#ifdef CRIS
#  define MACH_TYPE "CRIS"
#  define CPP_WORDSZ 32
#  define ALIGNMENT 1
#  ifdef LINUX
#    define SEARCH_FOR_DATA_START
#  endif
#endif /* CRIS */

#if defined(SH) && !defined(SH4)
#  define MACH_TYPE "SH"
#  define CPP_WORDSZ 32
#  ifdef LINUX
#    define SEARCH_FOR_DATA_START
#  endif
#  ifdef NETBSD
/* Nothing specific. */
#  endif
#  ifdef OPENBSD
/* Nothing specific. */
#  endif
#  ifdef MSWINCE
/* Nothing specific. */
#  endif
#endif

#ifdef SH4
#  define MACH_TYPE "SH4"
#  define CPP_WORDSZ 32
#  ifdef MSWINCE
/* Nothing specific. */
#  endif
#endif /* SH4 */

#ifdef AVR32
#  define MACH_TYPE "AVR32"
#  define CPP_WORDSZ 32
#  ifdef LINUX
#    define SEARCH_FOR_DATA_START
#  endif
#endif /* AVR32 */

#ifdef M32R
#  define MACH_TYPE "M32R"
#  define CPP_WORDSZ 32
#  ifdef LINUX
#    define SEARCH_FOR_DATA_START
#  endif
#endif /* M32R */

#ifdef X86_64
#  define MACH_TYPE "X86_64"
#  ifdef __ILP32__
#    define CPP_WORDSZ 32
#  else
#    define CPP_WORDSZ 64
#  endif
#  ifndef HBLKSIZE
#    define HBLKSIZE 4096
#  endif
#  ifndef CACHE_LINE_SIZE
#    define CACHE_LINE_SIZE 64
#  endif
#  ifdef PLATFORM_GETMEM
#    define OS_TYPE "PLATFORM_GETMEM"
#    define DATASTART ((ptr_t)ALIGNMENT)
#    define DATAEND ((ptr_t)ALIGNMENT)
EXTERN_C_END
#    include <pthread.h>
EXTERN_C_BEGIN
void *platform_get_stack_bottom(void);
#    define STACKBOTTOM ((ptr_t)platform_get_stack_bottom())
void *platform_get_mem(size_t lb);
#    define GET_MEM(lb) platform_get_mem(lb)
#  endif
#  ifdef LINUX
#    define SEARCH_FOR_DATA_START
#    if defined(__GLIBC__) && !defined(__UCLIBC__)
/*
 * A workaround for GCF (Google Cloud Function) which does not support
 * `mmap()` for `/dev/zero` pseudo-file.  Should not cause any harm to
 * other targets.
 */
#      define USE_MMAP_ANON
#    endif
#    if defined(__GLIBC__) && !defined(__UCLIBC__) \
        && !defined(GETCONTEXT_FPU_BUG_FIXED)
/*
 * At present, there is a bug in `glibc` `getcontext()` on Linux/x86_64
 * (it clears FPU exception mask).  We define this macro to workaround it.
 */
/* TODO: This seems to be fixed in `glibc` 2.14. */
#      define GETCONTEXT_FPU_EXCMASK_BUG
#    endif
#    if defined(__GLIBC__) && !defined(__UCLIBC__) \
        && !defined(GLIBC_TSX_BUG_FIXED)
/* Workaround lock elision implementation for some `glibc`. */
#      define GLIBC_2_19_TSX_BUG
EXTERN_C_END
#      include <gnu/libc-version.h> /*< for `gnu_get_libc_version()` */
EXTERN_C_BEGIN
#    endif
#    ifndef SOFT_VDB
#      define SOFT_VDB
#    endif
#  endif
#  ifdef COSMO
/* Empty. */
#  endif
#  ifdef DARWIN
#    define DARWIN_DONT_PARSE_STACK 1
#    define STACKBOTTOM MAKE_CPTR(0x7fff5fc00000)
#    define MPROTECT_VDB
#  endif
#  ifdef FREEBSD
#    if defined(__GLIBC__)
extern int _end[];
#      define DATAEND ((ptr_t)_end)
#    endif
#    if defined(__DragonFly__)
/*
 * DragonFly BSD still has `vm.max_proc_mmap`, according to its
 * `mmap(2)` man page.
 */
#      define COUNT_UNMAPPED_REGIONS
#    endif
#  endif
#  ifdef NETBSD
/* Nothing specific. */
#  endif
#  ifdef OPENBSD
/* Nothing specific. */
#  endif
#  ifdef HAIKU
/* Nothing specific. */
#  endif
#  ifdef HURD
/* Nothing specific. */
#  endif
#  ifdef QNX
/* Nothing specific. */
#  endif
#  ifdef SERENITY
/* Nothing specific. */
#  endif
#  ifdef SOLARIS
#    define ELF_CLASS ELFCLASS64
extern int _etext[];
#    define DATASTART GC_SysVGetDataStart(0x1000, (ptr_t)_etext)
#    define PROC_VDB
#  endif
#  ifdef CYGWIN32
#    ifndef USE_WINALLOC
#      if defined(THREAD_LOCAL_ALLOC)
/*
 * TODO: For an unknown reason, thread-local allocations lead to spurious
 * process exit after the fault handler is once invoked.
 */
#      else
#        define MPROTECT_VDB
#      endif
#    endif
#  endif
#  ifdef MSWIN_XBOX1
#    define OS_TYPE "MSWIN_XBOX1"
#    define NO_GETENV
#    define DATASTART ((ptr_t)ALIGNMENT)
#    define DATAEND ((ptr_t)ALIGNMENT)
LONG64 durango_get_stack_bottom(void);
#    define STACKBOTTOM ((ptr_t)durango_get_stack_bottom())
#    define GETPAGESIZE() 4096
#    ifndef USE_MMAP
#      define USE_MMAP 1
#    endif
/* The following is from platform `sys/mman.h` file. */
#    define PROT_NONE 0
#    define PROT_READ 1
#    define PROT_WRITE 2
#    define PROT_EXEC 4
#    define MAP_PRIVATE 2
#    define MAP_FIXED 0x10
#    define MAP_FAILED ((void *)(~(GC_uintptr_t)0))
#  endif
#  ifdef MSWIN32
#    define RETRY_GET_THREAD_CONTEXT
#    if !defined(__GNUC__) || defined(__INTEL_COMPILER) \
        || (GC_GNUC_PREREQ(4, 7) && !defined(__MINGW64__))
/*
 * Older gcc and Mingw-w64 (both gcc and clang) do not support
 * `SetUnhandledExceptionFilter()` properly on x86_64.
 */
#      define MPROTECT_VDB
#    endif
#  endif
#endif /* X86_64 */

#ifdef ARC
#  define MACH_TYPE "ARC"
#  define CPP_WORDSZ 32
#  define CACHE_LINE_SIZE 64
#  ifdef LINUX
extern int __data_start[] __attribute__((__weak__));
#    define DATASTART ((ptr_t)__data_start)
#  endif
#endif /* ARC */

#ifdef HEXAGON
#  define MACH_TYPE "HEXAGON"
#  define CPP_WORDSZ 32
#  ifdef LINUX
#    if defined(__GLIBC__)
#      define SEARCH_FOR_DATA_START
#    elif !defined(CPPCHECK)
#      error Unknown Hexagon libc configuration
#    endif
#  endif
#endif /* HEXAGON */

#ifdef TILEPRO
#  define MACH_TYPE "TILEPRO"
#  define CPP_WORDSZ 32
#  define PREFETCH(x) __insn_prefetch(x)
#  define CACHE_LINE_SIZE 64
#  ifdef LINUX
extern int __data_start[];
#    define DATASTART ((ptr_t)__data_start)
#  endif
#endif /* TILEPRO */

#ifdef TILEGX
#  define MACH_TYPE "TILEGX"
#  define CPP_WORDSZ (__SIZEOF_PTRDIFF_T__ * 8)
#  if CPP_WORDSZ == 32
#    define CLEAR_DOUBLE(x) (void)(*(long long *)(x) = 0)
#  endif
#  define PREFETCH(x) __insn_prefetch_l1(x)
#  define CACHE_LINE_SIZE 64
#  ifdef LINUX
extern int __data_start[];
#    define DATASTART ((ptr_t)__data_start)
#  endif
#endif /* TILEGX */

#ifdef RISCV
#  define MACH_TYPE "RISCV"
#  define CPP_WORDSZ (__SIZEOF_SIZE_T__ * 8) /*< 32 or 64 */
#  ifdef FREEBSD
/* Nothing specific. */
#  endif
#  ifdef LINUX
extern int __data_start[] __attribute__((__weak__));
#    define DATASTART ((ptr_t)__data_start)
#  endif
#  ifdef NETBSD
/* Nothing specific. */
#  endif
#  ifdef OPENBSD
/* Nothing specific. */
#  endif
#  ifdef NOSYS
#    define OS_TYPE "NOSYS"
extern char etext[];
#    define DATASTART ((ptr_t)etext)
/* FIXME: `STACKBOTTOM` is wrong! */
extern char **environ;
#    define STACKBOTTOM ((ptr_t)environ)
/* TODO: Support 64K page size. */
#    define GETPAGESIZE() 4096
#  endif
#endif /* RISCV */

#ifdef WEBASSEMBLY
#  define MACH_TYPE "WEBASSEMBLY"
#  if defined(__wasm64__) && !defined(CPPCHECK)
#    error 64-bit WebAssembly is not yet supported
#  endif
#  define CPP_WORDSZ 32
/*
 * Emscripten does emulate `mmap` and `munmap`, but those should not be
 * used in the collector, since WebAssembly lacks the native support of
 * memory mapping.  Use `sbrk()` instead (by default).
 */
#  undef USE_MMAP
#  undef USE_MUNMAP
#  ifdef EMSCRIPTEN_TINY
void *emmalloc_memalign(size_t align, size_t lb);
#    define GET_MEM(lb) emmalloc_memalign(GC_page_size, lb)
#  endif
#  ifdef EMSCRIPTEN
#    define OS_TYPE "EMSCRIPTEN"
#    define DATASTART ((ptr_t)ALIGNMENT)
#    define DATAEND ((ptr_t)ALIGNMENT)
#    if defined(GC_THREADS) && !defined(CPPCHECK)
#      error No thread support yet
#    endif
#  endif
#  ifdef WASI
#    define OS_TYPE "WASI"
extern char __global_base, __heap_base;
#    define DATASTART ((ptr_t)(&__global_base))
#    define DATAEND ((ptr_t)(&__heap_base))
#    define STACKBOTTOM DATASTART
#    ifndef GC_NO_SIGSETJMP
#      define GC_NO_SIGSETJMP 1 /*< no support of signals */
#    endif
#    ifndef NO_CLOCK
#      define NO_CLOCK 1 /*< no support of `clock()` */
#    endif
#    if defined(GC_THREADS) && !defined(CPPCHECK)
#      error No thread support yet
#    endif
#  endif
#endif /* WEBASSEMBLY */

#if defined(CYGWIN32) || defined(MSWIN32) || defined(MSWINCE)
/* Note: it does not include Xbox One. */
#  define ANY_MSWIN
#endif

#if defined(GC_PTHREADS) || defined(GC_WIN32_THREADS)          \
    || ((defined(NN_PLATFORM_CTR) || defined(NINTENDO_SWITCH)  \
         || defined(SN_TARGET_PS3) || defined(SN_TARGET_PSP2)) \
        && defined(GC_THREADS))
#  define THREADS
#endif

/*
 * If the client app is known not to create threads (even indirectly in
 * the used libraries) and the collector is not multi-threaded, then the
 * collector could be built with `SINGLE_THREADED_PROCESS` macro defined.
 * But in practice the macro should never be defined.
 */
#if defined(THREADS) && defined(SINGLE_THREADED_PROCESS)
#  undef SINGLE_THREADED_PROCESS
#endif

#if defined(__CHERI_PURE_CAPABILITY__)
#  define CHERI_PURECAP
#endif

#if defined(__GLIBC__) && !defined(DONT_USE_LIBC_PRIVATES)
/* Use the stack-end marker of `glibc`. */
#  define USE_LIBC_PRIVATES
#endif

#ifdef NO_RETRY_GET_THREAD_CONTEXT
#  undef RETRY_GET_THREAD_CONTEXT
#endif

#if defined(LINUX) && defined(SPECIFIC_MAIN_STACKBOTTOM) \
    && defined(NO_PROC_STAT) && !defined(USE_LIBC_PRIVATES)
/*
 * This combination will fail, since we have no way to get the stack bottom.
 * Use `HEURISTIC2` instead.
 */
#  undef SPECIFIC_MAIN_STACKBOTTOM
#  define HEURISTIC2
/* This may still fail on some architectures like `IA64`.  We tried... */
#endif

#if defined(USE_MMAP_ANON) && !defined(USE_MMAP)
#  define USE_MMAP 1
#elif (defined(LINUX) || defined(OPENBSD)) && defined(USE_MMAP)
/*
 * The OS kernel may do a somewhat better job merging mappings with
 * anonymous mappings.
 */
#  define USE_MMAP_ANON
#endif

#if defined(CHERI_PURECAP) && defined(USE_MMAP)
/* TODO: Currently turned off to avoid downgrading permissions on CHERI. */
#  undef USE_MUNMAP
#endif

#if (defined(E2K) && defined(USE_PTR_HWTAG) || defined(CHERI_PURECAP)) \
    && !defined(NO_BLACK_LISTING)
/*
 * Misinterpreting of an integer is not possible on the platforms with
 * H/W-tagged pointers, thus the black-listing mechanism is redundant.
 */
#  define NO_BLACK_LISTING
#endif

#if defined(REDIRECT_MALLOC) && defined(THREADS) \
    && (defined(LINUX) || defined(NACL))
/* TODO: Unclear if NaCl really needs this. */
#  define REDIR_MALLOC_AND_LINUXTHREADS
#endif

#if defined(REDIR_MALLOC_AND_LINUXTHREADS) && !defined(NO_PROC_FOR_LIBRARIES) \
    && !defined(USE_PROC_FOR_LIBRARIES)
/*
 * NPTL allocates thread stacks with `mmap`, which is fine.  But it
 * keeps a cache of thread stacks.  Each thread stack contains a thread
 * control block (TCB).  The latter, in turn, contains a pointer to
 * (`sizeof(void*)` from the beginning of) the `dtv` for thread-local
 * storage, which is `calloc`-allocated.  If we do not scan the cached
 * thread stacks, we appear to lose the `dtv`.  This tends to result in
 * something that looks like a bogus `dtv` count, which tends to result
 * in a `memset()` call on a block that is way too large.  Sometimes
 * we are lucky and the process just dies...  There seems to be
 * a similar issue with some other memory allocated by the dynamic
 * loader.  This should be avoidable by either:
 *   - Defining `USE_PROC_FOR_LIBRARIES` here (that performs very poorly,
 *     precisely because we end up scanning cached stacks);
 *   - Have `calloc()` look at its callers (in spite of the fact that it
 *     is gross and disgusting).
 *
 * In fact, neither seems to suffice, probably in part because even
 * with `USE_PROC_FOR_LIBRARIES`, we do not scan parts of stack
 * segments that appear to be out of bounds.  Thus we actually do both,
 * which seems to yield the best results.
 */
#  define USE_PROC_FOR_LIBRARIES
#endif

#ifndef OS_TYPE
#  define OS_TYPE ""
#endif

#ifndef MACH_TYPE
#  define MACH_TYPE ""
#endif

#ifndef DATAEND
extern int end[];
#  define DATAEND ((ptr_t)end)
#endif

/*
 * Workaround for Android NDK clang-3.5+ (as of NDK r10e) which does
 * not provide correct `_end` symbol.  Unfortunately, alternate `__end__`
 * symbol is provided only by NDK `bfd` linker.
 */
#if defined(HOST_ANDROID) && defined(__clang__) && !defined(BROKEN_UUENDUU_SYM)
#  undef DATAEND
#  pragma weak __end__
extern int __end__[];
#  define DATAEND (__end__ != 0 ? (ptr_t)__end__ : (ptr_t)_end)
#endif

#if defined(SOLARIS) || defined(DRSNX) || defined(UTS4) \
    || (defined(LINUX) && defined(SPARC))
/* OS has SVR4 generic features.  Probably others also qualify. */
#  define SVR4
#  define DATASTART_USES_XGETDATASTART
#endif

#if defined(HAVE_SYS_TYPES_H)                                      \
    || !(defined(__CC_ARM) || defined(GC_NO_TYPES) || defined(OS2) \
         || defined(MSWINCE) || defined(SN_TARGET_PSP2))
EXTERN_C_END
#  if defined(COSMO) && defined(MPROTECT_VDB) && !defined(_GNU_SOURCE)
#    define _GNU_SOURCE 1
#  endif
#  include <sys/types.h>
EXTERN_C_BEGIN
#endif /* HAVE_SYS_TYPES_H */

#if defined(HAVE_UNISTD_H)                                                \
    || !(defined(GC_NO_TYPES) || defined(MSWIN32) || defined(MSWINCE)     \
         || defined(MSWIN_XBOX1) || defined(NINTENDO_SWITCH)              \
         || defined(NN_PLATFORM_CTR) || defined(OS2) || defined(SERENITY) \
         || defined(SN_TARGET_PSP2) || defined(__CC_ARM))
EXTERN_C_END
#  include <unistd.h>
EXTERN_C_BEGIN
#endif /* HAVE_UNISTD_H */

#if !defined(ANY_MSWIN) && !defined(GETPAGESIZE)
#  if defined(DGUX) || defined(HOST_ANDROID) || defined(HOST_TIZEN) \
      || defined(KOS) || defined(SERENITY)                          \
      || (defined(LINUX) && defined(SPARC))
#    define GETPAGESIZE() (unsigned)sysconf(_SC_PAGESIZE)
#  else
#    define GETPAGESIZE() (unsigned)getpagesize()
#  endif
#endif /* !ANY_MSWIN && !GETPAGESIZE */

#if defined(HOST_ANDROID) && !(__ANDROID_API__ >= 23)           \
    && ((defined(MIPS) && (CPP_WORDSZ == 32)) || defined(ARM32) \
        || defined(I386) /* but not x32 */)
/*
 * `tkill()` exists only on arm32/mips(32)/x86.
 * NDK r11+ deprecates `tkill()` but keeps it for Mono clients.
 */
#  define USE_TKILL_ON_ANDROID
#endif

#if defined(MPROTECT_VDB) && defined(__GLIBC__) && !GC_GLIBC_PREREQ(2, 2)
#  error glibc too old?
#endif

#if defined(SOLARIS) || defined(DRSNX)
/* OS has Solaris-style semi-undocumented interface to dynamic loader. */
#  define SOLARISDL
/* OS has Solaris-style signal handlers. */
#  define SUNOS5SIGS
#endif

#if (defined(FREEBSD)                                     \
     && (defined(__DragonFly__) || defined(__GLIBC__)     \
         || __FreeBSD_kernel__ >= 4 || __FreeBSD__ >= 4)) \
    || defined(HPUX)
#  define SUNOS5SIGS
#endif

#if defined(COSMO) || defined(HPUX) || defined(HURD) || defined(NETBSD) \
    || defined(SERENITY) || (defined(FREEBSD) && defined(SUNOS5SIGS))   \
    || (defined(IRIX5) && defined(_sigargs)) /*< Irix 5.x, not 6.x */
#  define USE_SEGV_SIGACT
/* We may also get `SIGBUS`. */
#  define USE_BUS_SIGACT
#elif defined(ANY_BSD) || defined(HAIKU) || defined(IRIX5) || defined(OSF1) \
    || defined(SUNOS5SIGS)
#  define USE_SEGV_SIGACT
#endif

#if !defined(GC_EXPLICIT_SIGNALS_UNBLOCK) && defined(SUNOS5SIGS) \
    && !defined(GC_NO_PTHREAD_SIGMASK)
#  define GC_EXPLICIT_SIGNALS_UNBLOCK
#endif

#if !defined(NO_SIGNALS_UNBLOCK_IN_MAIN) && defined(GC_NO_PTHREAD_SIGMASK)
#  define NO_SIGNALS_UNBLOCK_IN_MAIN
#endif

#ifndef PARALLEL_MARK
#  undef GC_PTHREADS_PARAMARK /*< just in case it is defined by client */
#elif defined(GC_PTHREADS) && !defined(GC_PTHREADS_PARAMARK) \
    && !defined(__MINGW32__)
/*
 * Use `pthreads`-based parallel mark implementation.  Except for
 * MinGW 32/64 to workaround a deadlock in winpthreads-3.0b internals.
 */
#  define GC_PTHREADS_PARAMARK
#endif

#if !defined(NO_MARKER_SPECIAL_SIGMASK)                                 \
    && (defined(NACL) || defined(GC_WIN32_PTHREADS)                     \
        || (defined(GC_PTHREADS_PARAMARK) && defined(GC_WIN32_THREADS)) \
        || defined(GC_NO_PTHREAD_SIGMASK))
/*
 * Either there is no `pthread_sigmask()`, or the GC marker thread cannot
 * steal and drop user signal calls.
 */
#  define NO_MARKER_SPECIAL_SIGMASK
#endif

#if defined(NETBSD) && defined(THREADS)
#  define SIGRTMIN 33
#  define SIGRTMAX 63
/*
 * It seems to be necessary to wait until threads have restarted.
 * But it is unclear why that is the case.
 */
#  define GC_NETBSD_THREADS_WORKAROUND
#endif

#if defined(OPENBSD) && defined(THREADS)
EXTERN_C_END
#  include <sys/param.h>
EXTERN_C_BEGIN
#endif

#if defined(AIX) || defined(ANY_BSD) || defined(BSD) || defined(COSMO)     \
    || defined(DARWIN) || defined(DGUX) || defined(HAIKU) || defined(HPUX) \
    || defined(HURD) || defined(IRIX5) || defined(LINUX) || defined(OSF1)  \
    || defined(QNX) || defined(SERENITY) || defined(SVR4)
/* Basic UNIX-like system calls work. */
#  define UNIX_LIKE
#endif

#if defined(CPPCHECK)
#  undef CPP_WORDSZ
#  define CPP_WORDSZ (__SIZEOF_PTRDIFF_T__ * 8)
#elif CPP_WORDSZ != 32 && CPP_WORDSZ != 64
#  error Bad word size
#endif

#ifndef CPP_PTRSZ
#  ifdef CHERI_PURECAP
#    define CPP_PTRSZ (__SIZEOF_POINTER__ * 8)
#  else
#    define CPP_PTRSZ CPP_WORDSZ
#  endif
#endif

#ifndef CPPCHECK
#  if GC_SIZEOF_PTR * 8 != CPP_PTRSZ
#    error Bad pointer size
#  endif
#endif /* !CPPCHECK */

#ifndef ALIGNMENT
#  define ALIGNMENT (CPP_PTRSZ >> 3)
#endif

#if !defined(STACKBOTTOM) && (defined(ECOS) || defined(NOSYS)) \
    && !defined(CPPCHECK)
#  error Undefined STACKBOTTOM
#endif

#ifdef IGNORE_DYNAMIC_LOADING
#  undef DYNAMIC_LOADING
#endif

#if defined(SMALL_CONFIG) && !defined(GC_DISABLE_INCREMENTAL)
/* Presumably not worth the space it takes. */
#  define GC_DISABLE_INCREMENTAL
#endif

/* `USE_WINALLOC` is only an option for Cygwin. */
#ifndef CYGWIN32
#  undef USE_WINALLOC
#endif
#if defined(MSWIN32) || defined(MSWINCE)
#  define USE_WINALLOC 1
#endif

#ifdef USE_WINALLOC
#  undef USE_MMAP
#endif

#if defined(ANY_BSD) || defined(DARWIN) || defined(IRIX5) || defined(LINUX) \
    || defined(SERENITY) || defined(SOLARIS)                                \
    || ((defined(CYGWIN32) || defined(USE_MMAP) || defined(USE_MUNMAP))     \
        && !defined(USE_WINALLOC))
/* Try both `sbrk` and `mmap`, in that order. */
#  define MMAP_SUPPORTED
#endif

/*
 * Xbox One (DURANGO) may not need to be this aggressive, but the
 * default is likely too lax under heavy allocation pressure.
 * The platform does not have a virtual paging system, so it does not
 * have a large virtual address space that a standard x86_64 platform has.
 */
#if defined(USE_MUNMAP) && !defined(MUNMAP_THRESHOLD)     \
    && (defined(SN_TARGET_PS3) || defined(SN_TARGET_PSP2) \
        || defined(MSWIN_XBOX1))
#  define MUNMAP_THRESHOLD 3
#endif

#if defined(GC_DISABLE_INCREMENTAL) || defined(DEFAULT_VDB)
#  undef GWW_VDB
#  undef MPROTECT_VDB
#  undef PROC_VDB
#  undef SOFT_VDB
#endif

#ifdef NO_GWW_VDB
#  undef GWW_VDB
#endif

#ifdef NO_MPROTECT_VDB
#  undef MPROTECT_VDB
#endif

#ifdef NO_SOFT_VDB
#  undef SOFT_VDB
#endif

#if defined(SOFT_VDB) && defined(SOFT_VDB_LINUX_VER_STATIC_CHECK)
EXTERN_C_END
#  include <linux/version.h> /*< for `LINUX_VERSION`, `LINUX_VERSION_CODE` */
EXTERN_C_BEGIN
#  if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
/* Not reliable in Linux kernels prior to v3.18. */
#    undef SOFT_VDB
#  endif
#endif /* SOFT_VDB */

#ifdef GC_DISABLE_INCREMENTAL
#  undef CHECKSUMS
#endif

#if defined(BASE_ATOMIC_OPS_EMULATED)
/*
 * `GC_write_fault_handler()` cannot use lock-based atomic primitives
 * as this could lead to a deadlock.
 */
#  undef MPROTECT_VDB
#endif

#if defined(USE_PROC_FOR_LIBRARIES) && defined(LINUX) && defined(THREADS)
/* Incremental GC based on `mprotect` is incompatible with `/proc` roots. */
#  undef MPROTECT_VDB
#endif

#if defined(MPROTECT_VDB) && defined(GC_PREFER_MPROTECT_VDB)
/* Choose `MPROTECT_VDB` manually (if multiple strategies available). */
#  undef PROC_VDB
/* `GWW_VDB`, `SOFT_VDB` are handled in `os_dep.c` file. */
#endif

#ifdef PROC_VDB
/* Mutually exclusive VDB implementations (for now). */
#  undef MPROTECT_VDB
/* For a test purpose only. */
#  undef SOFT_VDB
#endif

#if defined(MPROTECT_VDB) && !defined(MSWIN32) && !defined(MSWINCE)
EXTERN_C_END
#  include <signal.h> /*< for `SA_SIGINFO`, `SIGBUS` */
EXTERN_C_BEGIN
#endif

#if defined(SIGBUS) && !defined(HAVE_SIGBUS) && !defined(CPPCHECK)
#  define HAVE_SIGBUS
#endif

#ifndef SA_SIGINFO
#  define NO_SA_SIGACTION
#endif

#if (defined(NO_SA_SIGACTION) || defined(GC_NO_SIGSETJMP))            \
    && defined(MPROTECT_VDB) && !defined(DARWIN) && !defined(MSWIN32) \
    && !defined(MSWINCE)
#  undef MPROTECT_VDB
#endif

#if !defined(DEFAULT_VDB) && !defined(GWW_VDB) && !defined(MPROTECT_VDB) \
    && !defined(PROC_VDB) && !defined(SOFT_VDB)                          \
    && !defined(GC_DISABLE_INCREMENTAL)
#  define DEFAULT_VDB
#endif

#if defined(CHECK_SOFT_VDB) && !defined(CPPCHECK)             \
    && (defined(GC_PREFER_MPROTECT_VDB) || !defined(SOFT_VDB) \
        || !defined(MPROTECT_VDB))
#  error Invalid config for CHECK_SOFT_VDB
#endif

#if (defined(GC_DISABLE_INCREMENTAL) || defined(BASE_ATOMIC_OPS_EMULATED) \
     || defined(REDIRECT_MALLOC) || defined(SMALL_CONFIG)                 \
     || defined(REDIRECT_MALLOC_IN_HEADER) || defined(CHECKSUMS))         \
    && !defined(NO_MANUAL_VDB)
/* TODO: Implement `CHECKSUMS` for manual VDB. */
#  define NO_MANUAL_VDB
#endif

#if !defined(PROC_VDB) && !defined(SOFT_VDB) \
    && !defined(NO_VDB_FOR_STATIC_ROOTS)
/* Cannot determine whether a static root page is dirty. */
#  define NO_VDB_FOR_STATIC_ROOTS
#endif

#if defined(MPROTECT_VDB) && !defined(DONT_COUNT_PROTECTED_REGIONS) \
    && !defined(COUNT_PROTECTED_REGIONS)                            \
    && (defined(LINUX) || defined(__DragonFly__))
#  define COUNT_PROTECTED_REGIONS
#endif

#if (defined(COUNT_PROTECTED_REGIONS) || defined(COUNT_UNMAPPED_REGIONS)) \
    && !defined(GC_UNMAPPED_REGIONS_SOFT_LIMIT)
/*
 * The default limit of `vm.max_map_count` on Linux is ~65530.
 * There is approximately one mapped region to every protected or
 * unmapped region.  Therefore if we aim to use up to half of
 * `vm.max_map_count` for the collector (leaving half for the rest
 * of the process), then the number of such regions should be one
 * quarter of `vm.max_map_count`.
 */
#  if defined(__DragonFly__)
#    define GC_UNMAPPED_REGIONS_SOFT_LIMIT (1000000 / 4)
#  else
#    define GC_UNMAPPED_REGIONS_SOFT_LIMIT 16384
#  endif
#endif

#if (((defined(ARM32) || defined(AVR32) || defined(MIPS) || defined(NIOS2) \
       || defined(OR1K))                                                   \
      && defined(UNIX_LIKE))                                               \
     || defined(DARWIN) || defined(HAIKU) || defined(HURD)                 \
     || defined(OPENBSD) || defined(QNX) || defined(RTEMS)                 \
     || defined(SERENITY) || defined(HOST_ANDROID)                         \
     || (defined(LINUX) && !defined(__gnu_linux__)))                       \
    && !defined(NO_GETCONTEXT)
#  define NO_GETCONTEXT 1
#endif

#if defined(MSWIN32) && !defined(CONSOLE_LOG) && defined(_MSC_VER) \
    && defined(_DEBUG) && !defined(NO_CRT)
/*
 * This should be included before the platform `intrin.h` file to
 * workaround some bug in Windows Kit (as of 10.0.17763) headers
 * causing redefinition of `_malloca` macro.
 */
EXTERN_C_END
#  include <crtdbg.h> /*< for `_CrtDbgReport` */
EXTERN_C_BEGIN
#endif

#ifndef PREFETCH
#  if (GC_GNUC_PREREQ(3, 0) || defined(__clang__)) && !defined(NO_PREFETCH)
#    define PREFETCH(x) __builtin_prefetch((x), 0, 0)
#  elif defined(_MSC_VER) && !defined(NO_PREFETCH)                      \
      && (defined(_M_IX86) || defined(_M_X64)) && !defined(_CHPE_ONLY_) \
      && (_MSC_VER >= 1900 /* VS 2015+ */)
EXTERN_C_END
#    include <intrin.h>
EXTERN_C_BEGIN
#    define PREFETCH(x) _mm_prefetch((const char *)(x), _MM_HINT_T0)
/* TODO: Support also `_M_ARM` and `_M_ARM64` (`__prefetch`). */
#  else
#    define PREFETCH(x) (void)0
#  endif
#endif /* !PREFETCH */

#ifndef GC_PREFETCH_FOR_WRITE
/*
 * The default `GC_PREFETCH_FOR_WRITE(x)` is defined in `gc_inline.h` file,
 * the latter one is included from `gc_priv.h` file.
 */
#endif

#ifndef CACHE_LINE_SIZE
#  define CACHE_LINE_SIZE 32 /*< wild guess */
#endif

#ifndef STATIC
#  ifdef GC_ASSERTIONS
#    define STATIC /*< ignore to aid debugging (or profiling) */
#  else
#    define STATIC static
#  endif
#endif

/*
 * Do we need the `GC_find_limit` machinery to find the end of
 * a data segment (or the backing store base)?
 */
#if defined(HEURISTIC2) || defined(SEARCH_FOR_DATA_START) || defined(IA64)    \
    || defined(DGUX) || defined(FREEBSD) || defined(OPENBSD) || defined(SVR4) \
    || (defined(HPUX) && defined(SPECIFIC_MAIN_STACKBOTTOM))                  \
    || (defined(CYGWIN32) && defined(I386) && defined(USE_MMAP)               \
        && !defined(USE_WINALLOC))                                            \
    || (defined(NETBSD) && defined(__ELF__))
#  define NEED_FIND_LIMIT
#endif

#if defined(LINUX)                                       \
    && (defined(USE_PROC_FOR_LIBRARIES) || defined(IA64) \
        || !defined(SMALL_CONFIG))
#  define NEED_PROC_MAPS
#endif

#if defined(LINUX) || defined(HURD) || defined(__GLIBC__)
#  define REGISTER_LIBRARIES_EARLY
/*
 * We sometimes use `dl_iterate_phdr`, which may acquire an internal lock.
 * This is not safe after the world has stopped.  So we must call
 * `GC_register_dynamic_libraries` before stopping the world.
 * For performance reasons, this may be beneficial on other platforms
 * as well, though it should be avoided on Windows.
 */
#endif /* LINUX */

#if defined(SEARCH_FOR_DATA_START)
extern ptr_t GC_data_start;
#  define DATASTART GC_data_start
#endif

#ifndef HEAP_START
#  define HEAP_START 0
#endif

#ifndef CLEAR_DOUBLE
#  define CLEAR_DOUBLE(x) \
    (void)(((ptr_t *)(x))[0] = NULL, ((ptr_t *)(x))[1] = NULL)
#endif

/*
 * Some `libc` implementations like `bionic`, `musl` and `glibc` 2.34+
 * do not have `libpthread.so` file because the `pthreads`-related code
 * is located in `libc.so` file, thus potential `calloc()` calls from
 * such code are forwarded to real (`libc`) `calloc()` without any
 * special handling on the collector side.  Checking `glibc` version at
 * compile time for the purpose seems to be fine.
 */
#if defined(REDIR_MALLOC_AND_LINUXTHREADS) && !defined(HAVE_LIBPTHREAD_SO) \
    && defined(__GLIBC__) && !GC_GLIBC_PREREQ(2, 34)
#  define HAVE_LIBPTHREAD_SO
#endif

#if defined(REDIR_MALLOC_AND_LINUXTHREADS) \
    && !defined(INCLUDE_LINUX_THREAD_DESCR)
/*
 * Will not work, since `libc` and the dynamic loader use thread locals,
 * sometimes as the only reference.
 */
#  define INCLUDE_LINUX_THREAD_DESCR
#endif

#ifndef CPPCHECK
#  if defined(GC_AIX_THREADS) && !defined(AIX)                            \
      || (defined(GC_DARWIN_THREADS) && !defined(DARWIN))                 \
      || (defined(GC_DGUX386_THREADS) && !defined(DGUX))                  \
      || (defined(GC_FREEBSD_THREADS) && !defined(FREEBSD))               \
      || (defined(GC_HAIKU_THREADS) && !defined(HAIKU))                   \
      || (defined(GC_HPUX_THREADS) && !defined(HPUX))                     \
      || (defined(GC_IRIX_THREADS) && !defined(IRIX5))                    \
      || (defined(GC_LINUX_THREADS) && !defined(LINUX) && !defined(NACL)) \
      || (defined(GC_NETBSD_THREADS) && !defined(NETBSD))                 \
      || (defined(GC_OPENBSD_THREADS) && !defined(OPENBSD))               \
      || (defined(GC_OSF1_THREADS) && !defined(OSF1))                     \
      || (defined(GC_RTEMS_PTHREADS) && !defined(RTEMS))                  \
      || (defined(GC_SOLARIS_THREADS) && !defined(SOLARIS))               \
      || (defined(GC_WIN32_THREADS) && !defined(ANY_MSWIN)                \
          && !defined(MSWIN_XBOX1))
#    error Inconsistent configuration
#  elif defined(GC_WIN32_PTHREADS) && defined(CYGWIN32)
#    error Inconsistent configuration (GC_PTHREADS)
#  endif
#  if defined(PARALLEL_MARK) && !defined(THREADS)
#    error Invalid config: PARALLEL_MARK requires GC_THREADS
#  endif
#  if defined(GWW_VDB) && !defined(USE_WINALLOC)
#    error Invalid config: GWW_VDB requires USE_WINALLOC
#  endif
#  if (defined(GC_FINDLEAK_DELAY_FREE) && defined(SHORT_DBG_HDRS)) \
      || ((defined(FIND_LEAK) || defined(GC_FINDLEAK_DELAY_FREE))  \
          && defined(NO_FIND_LEAK))
#    error Invalid config: FIND_LEAK and NO_FIND_LEAK are mutually exclusive
#  endif
#endif /* !CPPCHECK */

#if defined(NO_FIND_LEAK) && !defined(DONT_USE_ATEXIT)
#  define DONT_USE_ATEXIT
#endif

/* Whether `GC_page_size` is to be set to a value other than page size. */
#if defined(CYGWIN32) && (defined(MPROTECT_VDB) || defined(USE_MUNMAP)) \
    || (!defined(ANY_MSWIN) && !defined(WASI) && !defined(USE_MMAP)     \
        && (defined(GC_DISABLE_INCREMENTAL) || defined(DEFAULT_VDB)))
/*
 * Cygwin: use the allocation granularity instead.
 * Other than WASI and Windows: use `HBLKSIZE` instead (unless `mmap()`
 * is used).
 */
#  define ALT_PAGESIZE_USED
#  ifndef GC_NO_VALLOC
/* Nonetheless, we need the real page size is some extra functions. */
#    define REAL_PAGESIZE_NEEDED
#  endif
#endif

#if defined(GC_PTHREADS) && !defined(DARWIN) && !defined(GC_WIN32_THREADS) \
    && !defined(PLATFORM_STOP_WORLD) && !defined(SN_TARGET_PSP2)
#  define PTHREAD_STOP_WORLD_IMPL
#endif

#if defined(PTHREAD_STOP_WORLD_IMPL) && !defined(NACL)
#  define SIGNAL_BASED_STOP_WORLD
#endif

#if (defined(E2K) || defined(HP_PA) || defined(IA64) || defined(M68K) \
     || defined(NO_SA_SIGACTION))                                     \
    && defined(SIGNAL_BASED_STOP_WORLD)
#  define SUSPEND_HANDLER_NO_CONTEXT
#endif

#if (defined(MSWIN32) || defined(MSWINCE)                      \
     || (defined(USE_PROC_FOR_LIBRARIES) && defined(THREADS))) \
    && !defined(NO_CRT) && !defined(NO_WRAP_MARK_SOME)
/*
 * Under rare conditions, we may end up marking from nonexistent memory.
 * Hence we need to be prepared to recover by running `GC_mark_some` with
 * a suitable handler in place.
 */
/* TODO: Should we also define it for Cygwin? */
#  define WRAP_MARK_SOME
#endif

#if !defined(MSWIN32) && !defined(MSWINCE) || defined(__GNUC__) \
    || defined(NO_CRT)
#  define NO_SEH_AVAILABLE
#endif

#ifdef GC_WIN32_THREADS
/* The number of copied registers in `copy_ptr_regs()`. */
#  if defined(I386)
#    ifdef WOW64_THREAD_CONTEXT_WORKAROUND
#      define PUSHED_REGS_COUNT 9
#    else
#      define PUSHED_REGS_COUNT 7
#    endif
#  elif defined(X86_64)
#    ifdef XMM_CANT_STORE_PTRS
/* If pointers cannot be located in Xmm registers. */
#      define PUSHED_REGS_COUNT 15
#    else
/*
 * gcc-13 may store pointers into SIMD registers when certain compiler
 * optimizations are enabled.
 */
#      define PUSHED_REGS_COUNT (15 + 32)
#    endif
#  elif defined(SHx)
#    define PUSHED_REGS_COUNT 15
#  elif defined(ARM32)
#    define PUSHED_REGS_COUNT 13
#  elif defined(AARCH64)
#    define PUSHED_REGS_COUNT 30
#  elif defined(MIPS) || defined(ALPHA)
#    define PUSHED_REGS_COUNT 28
#  elif defined(PPC)
#    define PUSHED_REGS_COUNT 29
#  endif
#endif /* GC_WIN32_THREADS */

#if !defined(GC_PTHREADS) && !defined(GC_PTHREADS_PARAMARK)
#  undef HAVE_PTHREAD_SETNAME_NP_WITH_TID
#  undef HAVE_PTHREAD_SETNAME_NP_WITH_TID_AND_ARG
#  undef HAVE_PTHREAD_SET_NAME_NP
#endif

#if !(defined(GC_PTHREADS) || defined(GC_PTHREADS_PARAMARK) \
      || (defined(MPROTECT_VDB) && defined(DARWIN)))
#  undef HAVE_PTHREAD_SETNAME_NP_WITHOUT_TID
#endif

#if defined(USE_RWLOCK) || defined(GC_DISABLE_SUSPEND_THREAD)
/*
 * At least in the Linux threads implementation, `rwlock` primitives are
 * not atomic in respect to signals, and suspending externally a thread
 * which is running inside `pthread_rwlock_rdlock()` may lead to a deadlock.
 */
/* TODO: As a workaround `GC_suspend_thread()` API is disabled. */
#  undef GC_ENABLE_SUSPEND_THREAD
#endif

#ifndef GC_NO_THREADS_DISCOVERY
#  if defined(DARWIN) && defined(THREADS)
/* Task-based thread registration requires stack-frame-walking code. */
#    if defined(DARWIN_DONT_PARSE_STACK)
#      define GC_NO_THREADS_DISCOVERY
#    endif
#  elif defined(GC_WIN32_THREADS)
/*
 * `DllMain`-based thread registration is currently incompatible with
 * thread-local allocation, `pthreads` and WinCE.
 */
#    if (!defined(GC_DLL) && !defined(GC_INSIDE_DLL)) || defined(GC_PTHREADS) \
        || defined(MSWINCE) || defined(NO_CRT) || defined(THREAD_LOCAL_ALLOC)
#      define GC_NO_THREADS_DISCOVERY
#    endif
#  else
#    define GC_NO_THREADS_DISCOVERY
#  endif
#endif /* !GC_NO_THREADS_DISCOVERY */

#if defined(GC_DISCOVER_TASK_THREADS) && defined(GC_NO_THREADS_DISCOVERY) \
    && !defined(CPPCHECK)
#  error Defined both GC_DISCOVER_TASK_THREADS and GC_NO_THREADS_DISCOVERY
#endif

#if defined(PARALLEL_MARK) && !defined(DEFAULT_STACK_MAYBE_SMALL) \
    && (defined(DGUX) || defined(HPUX)                            \
        || defined(NO_GETCONTEXT) /* e.g. musl */)
/* TODO: Test default stack size in configure. */
#  define DEFAULT_STACK_MAYBE_SMALL
#endif

#ifdef PARALLEL_MARK
/* The minimum stack size for a marker thread. */
#  define MIN_STACK_SIZE (8 * HBLKSIZE * sizeof(ptr_t))
#endif

#if defined(HOST_ANDROID) && !defined(THREADS) \
    && !defined(USE_GET_STACKBASE_FOR_MAIN)
/*
 * Always use `pthread_attr_getstack` on Android (`-lpthread` option
 * is not needed to be specified manually) since Linux-specific
 * `os_main_stackbottom()` causes application crash if invoked inside
 * Dalvik VM.
 */
#  define USE_GET_STACKBASE_FOR_MAIN
#endif

/*
 * Outline `pthreads` primitives to use in `GC_get_stack_base()` and
 * `GC_get_main_stack_base()`.
 */
#if ((defined(FREEBSD) && defined(__GLIBC__) /* kFreeBSD */)               \
     || defined(COSMO) || defined(HAIKU) || defined(LINUX) || defined(KOS) \
     || defined(NETBSD))                                                   \
    && !defined(NO_PTHREAD_GETATTR_NP)
#  define HAVE_PTHREAD_GETATTR_NP 1
#elif defined(FREEBSD) && !defined(__GLIBC__) \
    && !defined(NO_PTHREAD_ATTR_GET_NP)
#  define HAVE_PTHREAD_NP_H 1 /*< requires include `pthread_np.h` file */
#  define HAVE_PTHREAD_ATTR_GET_NP 1
#endif

#if (defined(HAVE_PTHREAD_ATTR_GET_NP) || defined(HAVE_PTHREAD_GETATTR_NP)) \
    && defined(USE_GET_STACKBASE_FOR_MAIN) && !defined(STACKBOTTOM)         \
    && !defined(HEURISTIC1) && !defined(HEURISTIC2) && !defined(STACK_GRAN) \
    && !defined(SPECIFIC_MAIN_STACKBOTTOM)
/* Dummy definitions; rely on `pthread_attr_getstack` actually. */
#  define HEURISTIC1
#  define STACK_GRAN 0x1000000
#endif

#if !defined(HAVE_CLOCK_GETTIME) && defined(_POSIX_TIMERS) \
    && (defined(CYGWIN32) || (defined(LINUX) && defined(__USE_POSIX199309)))
#  define HAVE_CLOCK_GETTIME 1
#endif

#if defined(GC_PTHREADS) && !defined(E2K) && !defined(IA64)   \
    && (!defined(DARWIN) || defined(DARWIN_DONT_PARSE_STACK)) \
    && !defined(SN_TARGET_PSP2) && !defined(REDIRECT_MALLOC)
/*
 * Note: unimplemented in case of redirection of `malloc()` because
 * the client-provided function might call some `pthreads` primitive
 * which, in turn, may use `malloc()` internally.
 */
#  define STACKPTR_CORRECTOR_AVAILABLE
#endif

#if defined(UNIX_LIKE) && defined(THREADS) && !defined(NO_CANCEL_SAFE) \
    && !defined(HOST_ANDROID)
/*
 * Make the code cancellation-safe.  This basically means that we ensure
 * that cancellation requests are ignored while we are in the collector.
 * This applies only to POSIX deferred cancellation; we do not handle POSIX
 * asynchronous cancellation.  Note that this only works if
 * `pthread_setcancelstate` is async-signal-safe, at least in the absence of
 * asynchronous cancellation.  This appears to be true for `glibc`, though it
 * is not documented.  Without that assumption, there seems to be no way to
 * safely wait in a signal handler, which we need to do for thread suspension.
 * Also note that little other code appears to be cancellation-safe.
 * Hence it may make sense to turn this off for performance.
 */
#  define CANCEL_SAFE
#endif

#ifdef CANCEL_SAFE
#  define IF_CANCEL(x) x
#else
#  define IF_CANCEL(x) /*< empty */
#endif

#if defined(DARWIN) && defined(MPROTECT_VDB)   \
    && !defined(NO_DESC_CATCH_EXCEPTION_RAISE) \
    && !defined(FORCE_DESC_CATCH_EXCEPTION_RAISE) && GC_CLANG_PREREQ(17, 0)
/*
 * Workaround "REFERENCED_DYNAMICALLY flag on _catch_exception_raise"
 * linker deprecation warnings on macOS 15.4.
 */
#  define NO_DESC_CATCH_EXCEPTION_RAISE
#endif

#if !defined(CAN_HANDLE_FORK) && !defined(NO_HANDLE_FORK)                 \
    && !defined(HAVE_NO_FORK)                                             \
    && ((defined(GC_PTHREADS) && !defined(NACL)                           \
         && !defined(GC_WIN32_PTHREADS) && !defined(USE_WINALLOC))        \
        || (defined(DARWIN) && defined(MPROTECT_VDB) /* `&& !THREADS` */) \
        || (defined(HANDLE_FORK) && defined(GC_PTHREADS)))
/*
 * Attempts (where supported and requested) to make `GC_malloc` work in
 * a child process forked from a multi-threaded parent process.
 */
#  define CAN_HANDLE_FORK
#endif

/*
 * Workaround "failed to create new win32 semaphore" Cygwin fatal error
 * during semaphores fixup-after-fork.
 */
#if defined(CYGWIN32) && defined(THREADS) && defined(CAN_HANDLE_FORK) \
    && !defined(CYGWIN_SEM_FIXUP_AFTER_FORK_BUG_FIXED)                \
    && !defined(EMULATE_PTHREAD_SEMAPHORE)
#  define EMULATE_PTHREAD_SEMAPHORE
#endif

#if defined(CAN_HANDLE_FORK) && !defined(CAN_CALL_ATFORK)      \
    && !defined(GC_NO_CAN_CALL_ATFORK) && !defined(HOST_TIZEN) \
    && !defined(HURD) && (!defined(HOST_ANDROID) || __ANDROID_API__ >= 21)
/* Have working `pthread_atfork()`. */
#  define CAN_CALL_ATFORK
#endif

#if !defined(CAN_HANDLE_FORK) && !defined(HAVE_NO_FORK) \
    && !(defined(CYGWIN32) || defined(SOLARIS) || defined(UNIX_LIKE))
#  define HAVE_NO_FORK
#endif

#if !defined(USE_MARK_BITS) && !defined(USE_MARK_BYTES) \
    && defined(PARALLEL_MARK)
/* Minimize compare-and-swap usage. */
#  define USE_MARK_BYTES
#endif

#if (defined(MSWINCE) && !defined(__CEGCC__) || defined(MSWINRT_FLAVOR)) \
    && !defined(NO_GETENV)
#  define NO_GETENV
#endif

#if (defined(NO_GETENV) || defined(MSWINCE)) && !defined(NO_GETENV_WIN32)
#  define NO_GETENV_WIN32
#endif

#if !defined(MSGBOX_ON_ERROR) && !defined(NO_MSGBOX_ON_ERROR)                \
    && defined(MSWIN32) && !defined(MSWINRT_FLAVOR) && !defined(MSWIN_XBOX1) \
    && !defined(SMALL_CONFIG)
/*
 * Show a Windows message box with "OK" button on a GC fatal error.
 * Client application is terminated once the user clicks the button.
 */
#  define MSGBOX_ON_ERROR
#endif

#ifndef STRTOULL
#  if defined(_WIN64) && !defined(__GNUC__)
#    define STRTOULL _strtoui64
#  elif defined(_LLP64) || defined(__LLP64__) || defined(_WIN64)
#    define STRTOULL strtoull
#  else
/* `strtoul()` fits since `sizeof(long)` is not less than `sizeof(word)`. */
#    define STRTOULL strtoul
#  endif
#endif /* !STRTOULL */

#ifndef GC_WORD_C
#  if defined(_WIN64) && !defined(__GNUC__)
#    define GC_WORD_C(val) val##ui64
#  elif defined(_LLP64) || defined(__LLP64__) || defined(_WIN64)
#    define GC_WORD_C(val) val##ULL
#  else
#    define GC_WORD_C(val) ((word)val##UL)
#  endif
#endif /* !GC_WORD_C */

#if defined(__has_feature)
/* `__has_feature()` is supported. */
#  if __has_feature(address_sanitizer)
#    define ADDRESS_SANITIZER
#  endif
#  if __has_feature(memory_sanitizer)
#    define MEMORY_SANITIZER
#  endif
#  if __has_feature(thread_sanitizer) && defined(THREADS)
#    define THREAD_SANITIZER
#  endif
#else
#  ifdef __SANITIZE_ADDRESS__
/* gcc v4.8+ */
#    define ADDRESS_SANITIZER
#  endif
#  if defined(__SANITIZE_THREAD__) && defined(THREADS)
/* gcc v7.1+ */
#    define THREAD_SANITIZER
#  endif
#endif /* !__has_feature */

#if defined(SPARC)
/* Stack clearing is crucial, and we include assembly code to do it well. */
#  define ASM_CLEAR_CODE
#endif

/*
 * Can we save call chain in objects for debugging?  Set `NFRAMES`
 * (number of saved frames) and `NARGS` (number of arguments for each
 * frame) to reasonable values for the platform.
 * Define `SAVE_CALL_CHAIN` if we can.  `SAVE_CALL_COUNT` can be
 * specified at build time, though we feel free to adjust it slightly.
 * Define `NEED_CALLINFO` if we either save the call stack or
 * `GC_ADD_CALLER` is defined.  Note: `GC_CAN_SAVE_CALL_STACKS` is
 * defined (for certain platforms) in `gc_config_macros.h` file.
 */
#if defined(SPARC)                         \
    || ((defined(I386) || defined(X86_64)) \
        && (defined(LINUX) || defined(__GLIBC__)))
/*
 * Linux/x86: `SAVE_CALL_CHAIN` is supported if the code is compiled to save
 * frame pointers by default, i.e. no `-fomit-frame-pointer` flag is given.
 */
#  define CAN_SAVE_CALL_ARGS
#endif

#if defined(SAVE_CALL_COUNT) && !defined(GC_ADD_CALLER) \
    && defined(GC_CAN_SAVE_CALL_STACKS)
#  define SAVE_CALL_CHAIN
#endif

#ifdef SAVE_CALL_CHAIN
/* Number of arguments to save for each call. */
#  if defined(SAVE_CALL_NARGS) && defined(CAN_SAVE_CALL_ARGS)
#    define NARGS SAVE_CALL_NARGS
#  else
#    define NARGS 0
#  endif
/* Number of frames to save.  Even for alignment reasons. */
#  if !defined(SAVE_CALL_COUNT) || defined(CPPCHECK)
#    define NFRAMES 6
#  else
#    define NFRAMES ((SAVE_CALL_COUNT + 1) & ~1)
#  endif
#  define NEED_CALLINFO
#elif defined(GC_ADD_CALLER)
#  define NFRAMES 1
#  define NARGS 0
#  define NEED_CALLINFO
#endif

#if (defined(FREEBSD) || (defined(DARWIN) && !defined(_POSIX_C_SOURCE)) \
     || (defined(SOLARIS)                                               \
         && (!defined(_XOPEN_SOURCE) || defined(__EXTENSIONS__)))       \
     || defined(LINUX))                                                 \
    && !defined(HAVE_DLADDR)
#  define HAVE_DLADDR 1
#endif

#if defined(MAKE_BACK_GRAPH) && !defined(DBG_HDRS_ALL)
#  define DBG_HDRS_ALL 1
#endif

#if defined(POINTER_MASK) && !defined(POINTER_SHIFT)
#  define POINTER_SHIFT 0
#elif !defined(POINTER_MASK) && defined(POINTER_SHIFT)
#  define POINTER_MASK GC_WORD_MAX
#endif

#if defined(FIXUP_POINTER)
/* Custom `FIXUP_POINTER(p)`. */
#  define NEED_FIXUP_POINTER
#elif defined(DYNAMIC_POINTER_MASK)
#  define FIXUP_POINTER(p) \
    (p = (ptr_t)((((word)(p)) & GC_pointer_mask) << GC_pointer_shift))
#  undef POINTER_MASK
#  undef POINTER_SHIFT
#  define NEED_FIXUP_POINTER
#elif defined(POINTER_MASK)
/*
 * Note: extra parentheses around custom-defined `POINTER_MASK` and
 *`POINTER_SHIFT` are intentional.
 */
#  define FIXUP_POINTER(p) \
    (p = (ptr_t)(((word)(p) & (POINTER_MASK)) << (POINTER_SHIFT)))
#  define NEED_FIXUP_POINTER
#else
#  define FIXUP_POINTER(p) (void)(p)
#endif

#ifdef LINT2
/*
 * A macro (based on a tricky expression) to prevent false warnings
 * like "Array compared to 0", "Comparison of identical expressions",
 * "Untrusted loop bound" output by some static code analysis tools.
 * The argument should not be a literal value.  The result is
 * converted to `word` type.  (Actually, `GC_word` is used instead of
 * `word` type as the latter might be undefined at the place of use.)
 */
#  define COVERT_DATAFLOW(w) (~(GC_word)(w) ^ (~(GC_word)0))
#else
#  define COVERT_DATAFLOW(w) ((GC_word)(w))
#endif

#if CPP_PTRSZ > CPP_WORDSZ
/* TODO: Cannot use tricky operations on a pointer. */
#  define COVERT_DATAFLOW_P(p) ((ptr_t)(p))
#else
#  define COVERT_DATAFLOW_P(p) ((ptr_t)COVERT_DATAFLOW(p))
#endif

#if defined(REDIRECT_MALLOC) && defined(THREADS) && !defined(LINUX) \
    && !defined(REDIRECT_MALLOC_IN_HEADER)
/*
 * May work on other platforms (e.g. Darwin) provided the client
 * ensures all the client threads are registered with the collector,
 * e.g. by using the preprocessor-based interception of the thread
 * primitives (i.e., define `GC_THREADS` and include `gc.h` file from
 * all the client files those are using `pthread_create` and friends).
 */
#endif

EXTERN_C_END

#endif /* GCCONFIG_H */


#ifdef __cplusplus
typedef bool GC_bool;
#elif defined(__BORLANDC__) || defined(__WATCOMC__)
typedef int GC_bool;
#else
typedef char GC_bool;
#endif

#if defined(__cplusplus) && !defined(ANY_MSWIN)
/* Avoid macro redefinition on a Windows platform. */
#  define TRUE true
#  define FALSE false
#else
#  define TRUE 1
#  define FALSE 0
#endif

#if !defined(GC_ATOMIC_UNCOLLECTABLE) && defined(ATOMIC_UNCOLLECTABLE)
/* For compatibility with old-style naming. */
#  define GC_ATOMIC_UNCOLLECTABLE
#endif

#ifndef GC_INNER
/*
 * This tagging macro must be used at the start of every variable definition
 * which is declared with `GC_EXTERN`.  Should be also used for the GC-scope
 * function definitions and prototypes.  Must not be used in `gcconfig.h`
 * file.  Should not be used for the debugging-only functions.
 */
#  if defined(GC_DLL) && defined(__GNUC__) && !defined(ANY_MSWIN)
#    if GC_GNUC_PREREQ(4, 0) && !defined(GC_NO_VISIBILITY)
/* See the corresponding `GC_API` definition. */
#      define GC_INNER __attribute__((__visibility__("hidden")))
#    else
/* The attribute is unsupported. */
#      define GC_INNER /*< empty */
#    endif
#  else
#    define GC_INNER /*< empty */
#  endif

#  define GC_EXTERN extern GC_INNER
/*
 * Used only for the GC-scope variables (prefixed with `GC_`) declared
 * in the private header files.  Must not be used for thread-local
 * variables.  Must not be used in `gcconfig.h` file.
 * The corresponding variable definition must start with `GC_INNER`.
 * Should not be used for the debugging- or profiling-only variables.
 * As of now, there are some other exceptions, e.g. for the variables
 * that are known to be used by some popular clients.
 */
#endif /* !GC_INNER */

#ifdef __cplusplus
/* `register` storage specifier is deprecated in C++11. */
#  define REGISTER /*< empty */
#else
/*
 * Used only for several local variables in the performance-critical
 * functions.  Should not be used for new code.
 */
#  define REGISTER register
#endif

#if defined(CPPCHECK)
#  define MACRO_BLKSTMT_BEGIN {
#  define MACRO_BLKSTMT_END }
#  define LOCAL_VAR_INIT_OK = 0 /*< to avoid "uninit var" false positive */
#else
#  define MACRO_BLKSTMT_BEGIN do {
#  define MACRO_BLKSTMT_END \
    }                       \
    while (0)
#  define LOCAL_VAR_INIT_OK /*< empty */
#endif

#if defined(M68K) && defined(__GNUC__)
/*
 * By default, `__alignof__(void *)` is 2 on m68k architecture.
 * Use this attribute to have the machine-word alignment (i.e. 4-byte one
 * on the given 32-bit architecture).
 */
#  define GC_ATTR_PTRT_ALIGNED __attribute__((__aligned__(sizeof(ptr_t))))
#else
#  define GC_ATTR_PTRT_ALIGNED /*< empty */
#endif

#ifdef CHERI_PURECAP
#  include <cheriintrin.h>
#endif

EXTERN_C_BEGIN

typedef GC_uintptr_t GC_funcptr_uint;
#define FUNCPTR_IS_DATAPTR

typedef unsigned int unsigned32;

#define hblk GC_hblk_s
struct hblk;

typedef struct hblkhdr hdr;

EXTERN_C_END

/*
 * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
 * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */

#ifndef GC_HEADERS_H
#define GC_HEADERS_H

#if !defined(GC_PRIVATE_H) && !defined(CPPCHECK)
#  error gc_hdrs.h should be included from gc_priv.h
#endif

#if CPP_WORDSZ != 32 && CPP_WORDSZ < 36 && !defined(CPPCHECK)
#  error Get a real machine
#endif

EXTERN_C_BEGIN

/*
 * The 2-level tree data structure that is used to find block headers.
 * If there are more than 32 bits in a pointer, the top level is a hash
 * table.
 *
 * This defines `HDR()`, `GET_HDR()`, and `SET_HDR()`, the main macros
 * used to retrieve and set object headers.
 *
 * We take advantage of a header lookup cache.  This is a locally declared
 * direct mapped cache, used inside the marker.  The `HC_GET_HDR()` macro
 * uses and maintains this cache.  Assuming we get reasonable hit rates,
 * this saves a few memory references from each pointer validation.
 */

#if CPP_WORDSZ > 32
#  define HASH_TL
#endif

/* Define appropriate out-degrees for each of the two tree levels. */
#if defined(LARGE_CONFIG) || !defined(SMALL_CONFIG)
#  define LOG_BOTTOM_SZ 10
#else
/* Keep top index size reasonable with smaller blocks. */
#  define LOG_BOTTOM_SZ 11
#endif
#define BOTTOM_SZ (1 << LOG_BOTTOM_SZ)

#ifndef HASH_TL
#  define LOG_TOP_SZ (CPP_WORDSZ - LOG_BOTTOM_SZ - LOG_HBLKSIZE)
#else
#  define LOG_TOP_SZ 11
#endif
#define TOP_SZ (1 << LOG_TOP_SZ)

#ifdef COUNT_HDR_CACHE_HITS
extern word GC_hdr_cache_hits; /*< used for debugging/profiling */
extern word GC_hdr_cache_misses;
#  define HC_HIT() (void)(++GC_hdr_cache_hits)
#  define HC_MISS() (void)(++GC_hdr_cache_misses)
#else
#  define HC_HIT() (void)0
#  define HC_MISS() (void)0
#endif

typedef struct hce {
  word block_addr; /*< right-shifted by `LOG_HBLKSIZE` */
  hdr *hce_hdr;
} hdr_cache_entry;

#define HDR_CACHE_SIZE 8 /*< a power of two */

#define DECLARE_HDR_CACHE hdr_cache_entry hdr_cache[HDR_CACHE_SIZE]

#define INIT_HDR_CACHE BZERO(hdr_cache, sizeof(hdr_cache))

#define HCE(h) (hdr_cache + ((ADDR(h) >> LOG_HBLKSIZE) & (HDR_CACHE_SIZE - 1)))

#define HCE_VALID_FOR(hce, h) ((hce)->block_addr == (ADDR(h) >> LOG_HBLKSIZE))

#define HCE_HDR(h) ((hce)->hce_hdr)

#ifdef PRINT_BLACK_LIST
/*
 * Handle a header cache miss.  Returns a pointer to the header
 * corresponding to `p`, if the latter can possibly be a valid object
 * pointer, and `NULL` otherwise.  Guaranteed to return `NULL` for
 * a pointer past the first page of an object unless both
 * `GC_all_interior_pointers` is set and `p` is in fact a valid object
 * pointer.  Never returns a pointer to a free `hblk`.
 */
GC_INNER hdr *GC_header_cache_miss(ptr_t p, hdr_cache_entry *hce,
                                   ptr_t source);

#  define HEADER_CACHE_MISS(p, hce, source) \
    GC_header_cache_miss(p, hce, source)
#else
GC_INNER hdr *GC_header_cache_miss(ptr_t p, hdr_cache_entry *hce);
#  define HEADER_CACHE_MISS(p, hce, source) GC_header_cache_miss(p, hce)
#endif

/*
 * Set `hhdr` to the header for `p`.  Analogous to `GET_HDR()` below,
 * except that in the case of large objects, it gets the header for the
 * object beginning if `GC_all_interior_pointers` is true.  Sets `hhdr`
 * to `NULL` if `p` points to somewhere other than the first page of an
 * object, and it is not a valid pointer to the object.
 */
#define HC_GET_HDR(p, hhdr, source)                \
  { /*< cannot use `do ... while (0)` here */      \
    hdr_cache_entry *hce = HCE(p);                 \
    if (LIKELY(HCE_VALID_FOR(hce, p))) {           \
      HC_HIT();                                    \
      hhdr = hce->hce_hdr;                         \
    } else {                                       \
      hhdr = HEADER_CACHE_MISS(p, hce, source);    \
      if (NULL == hhdr)                            \
        break; /*< go to the enclosing loop end */ \
    }                                              \
  }

typedef struct bi {
  /*
   * The bottom-level index contains one of three kinds of values:
   *   - 0 means we are not responsible for this block, or this is
   *     a block other than the first one in a free block;
   *   - 1 < (long)`x` <= `MAX_JUMP` means the block starts at least
   *     `x * HBLKSIZE` bytes before the current address;
   *   - a valid pointer points to a `hdr` structure (the above cannot
   *     be valid pointers due to the `GET_MEM()` return convention).
   */
  hdr *index[BOTTOM_SZ];

  /*
   * All indices are linked in the ascending and descending orders,
   * respectively.
   */
  struct bi *asc_link;
  struct bi *desc_link;

  word key; /*< high-order address bits */
#ifdef HASH_TL
  struct bi *hash_link; /*< hash chain link */
#endif
} bottom_index;

#define MAX_JUMP (HBLKSIZE - 1)

#define HDR_FROM_BI(bi, p) \
  (bi)->index[(ADDR(p) >> LOG_HBLKSIZE) & (BOTTOM_SZ - 1)]
#ifndef HASH_TL
#  define BI(p) GC_top_index[ADDR(p) >> (LOG_BOTTOM_SZ + LOG_HBLKSIZE)]
#  define HDR_INNER(p) HDR_FROM_BI(BI(p), p)
#  ifdef SMALL_CONFIG
#    define HDR(p) GC_find_header(p)
#  else
#    define HDR(p) HDR_INNER(p)
#  endif
#  define GET_BI(p, bottom_indx) (void)((bottom_indx) = BI(p))
#  define GET_HDR(p, hhdr) (void)((hhdr) = HDR(p))
#  define SET_HDR(p, hhdr) (void)(HDR_INNER(p) = (hhdr))
#  define GET_HDR_ADDR(p, ha) (void)((ha) = &HDR_INNER(p))
#else
/* A hash function for the tree top level. */
#  define TL_HASH(hi) ((hi) & (TOP_SZ - 1))
/* Set `bottom_indx` to point to the bottom index for address `p`. */
#  define GET_BI(p, bottom_indx)                                    \
    do {                                                            \
      REGISTER word hi = ADDR(p) >> (LOG_BOTTOM_SZ + LOG_HBLKSIZE); \
      REGISTER bottom_index *_bi = GC_top_index[TL_HASH(hi)];       \
      while (_bi->key != hi && _bi != GC_all_nils)                  \
        _bi = _bi->hash_link;                                       \
      (bottom_indx) = _bi;                                          \
    } while (0)
#  define GET_HDR_ADDR(p, ha)     \
    do {                          \
      REGISTER bottom_index *bi;  \
      GET_BI(p, bi);              \
      (ha) = &HDR_FROM_BI(bi, p); \
    } while (0)
#  define GET_HDR(p, hhdr)  \
    do {                    \
      REGISTER hdr **_ha;   \
      GET_HDR_ADDR(p, _ha); \
      (hhdr) = *_ha;        \
    } while (0)
#  define SET_HDR(p, hhdr)          \
    do {                            \
      REGISTER bottom_index *bi;    \
      GET_BI(p, bi);                \
      GC_ASSERT(bi != GC_all_nils); \
      HDR_FROM_BI(bi, p) = (hhdr);  \
    } while (0)
#  define HDR(p) GC_find_header(p)
#endif

/*
 * Is the result a forwarding address to someplace closer to the
 * beginning of the block or `NULL`?
 */
#define IS_FORWARDING_ADDR_OR_NIL(hhdr) ((size_t)ADDR(hhdr) <= MAX_JUMP)

/*
 * Get an `HBLKSIZE`-aligned address closer to the beginning of the
 * block `h`.  Assumes that `hhdr` is equal to `HDR(h)`,
 * `IS_FORWARDING_ADDR(hhdr)` is true and `hhdr` is not `NULL`.
 * `HDR(result)` is expected to be non-`NULL`.
 */
#define FORWARDED_ADDR(h, hhdr) \
  ((struct hblk *)(h) - (size_t)(GC_uintptr_t)(hhdr))

EXTERN_C_END

#endif /* GC_HEADERS_H */


#ifndef GC_ATTR_NO_SANITIZE_ADDR
#  ifndef ADDRESS_SANITIZER
#    define GC_ATTR_NO_SANITIZE_ADDR /*< empty */
#  elif GC_CLANG_PREREQ(3, 8)
#    define GC_ATTR_NO_SANITIZE_ADDR \
      __attribute__((__no_sanitize__("address")))
#  else
#    define GC_ATTR_NO_SANITIZE_ADDR __attribute__((__no_sanitize_address__))
#  endif
#endif /* !GC_ATTR_NO_SANITIZE_ADDR */

#ifndef GC_ATTR_NO_SANITIZE_MEMORY
#  ifndef MEMORY_SANITIZER
#    define GC_ATTR_NO_SANITIZE_MEMORY /*< empty */
#  elif GC_CLANG_PREREQ(3, 8)
#    define GC_ATTR_NO_SANITIZE_MEMORY \
      __attribute__((__no_sanitize__("memory")))
#  else
#    define GC_ATTR_NO_SANITIZE_MEMORY __attribute__((__no_sanitize_memory__))
#  endif
#endif /* !GC_ATTR_NO_SANITIZE_MEMORY */

#ifndef GC_ATTR_NO_SANITIZE_THREAD
#  ifndef THREAD_SANITIZER
#    define GC_ATTR_NO_SANITIZE_THREAD /*< empty */
#  elif GC_CLANG_PREREQ(3, 8)
#    define GC_ATTR_NO_SANITIZE_THREAD \
      __attribute__((__no_sanitize__("thread")))
#  else
/*
 * It seems that `no_sanitize_thread` attribute has no effect if the
 * function is inlined (as of gcc-11.1.0, at least).
 */
#    define GC_ATTR_NO_SANITIZE_THREAD \
      GC_ATTR_NOINLINE __attribute__((__no_sanitize_thread__))
#  endif
#endif /* !GC_ATTR_NO_SANITIZE_THREAD */

#define GC_ATTR_NO_SANITIZE_ADDR_MEM_THREAD           \
  GC_ATTR_NO_SANITIZE_ADDR GC_ATTR_NO_SANITIZE_MEMORY \
      GC_ATTR_NO_SANITIZE_THREAD

#ifndef UNUSED_ARG
#  define UNUSED_ARG(arg) ((void)(arg))
#endif

#ifdef HAVE_CONFIG_H
/* The `inline` keyword is determined by `AC_C_INLINE` of `autoconf`. */
#  define GC_INLINE static inline
#elif defined(_MSC_VER) || defined(__INTEL_COMPILER) || defined(__DMC__) \
    || (GC_GNUC_PREREQ(3, 0) && defined(__STRICT_ANSI__))                \
    || defined(__BORLANDC__) || defined(__WATCOMC__)
#  define GC_INLINE static __inline
#elif GC_GNUC_PREREQ(3, 0) || defined(__sun)
#  define GC_INLINE static inline
#else
#  define GC_INLINE static
#endif

#ifndef GC_ATTR_NOINLINE
#  if GC_GNUC_PREREQ(4, 0)
#    define GC_ATTR_NOINLINE __attribute__((__noinline__))
#  elif _MSC_VER >= 1400
#    define GC_ATTR_NOINLINE __declspec(noinline)
#  else
#    define GC_ATTR_NOINLINE /*< empty */
#  endif
#endif

#ifndef GC_API_OSCALL
/* This is used to identify GC routines called by name from OS. */
#  if defined(__GNUC__)
#    if GC_GNUC_PREREQ(4, 0) && !defined(GC_NO_VISIBILITY)
/* Same as `GC_API` macro if `GC_DLL` one is defined. */
#      define GC_API_OSCALL extern __attribute__((__visibility__("default")))
#    else
/* The attribute is unsupported. */
#      define GC_API_OSCALL extern
#    endif
#  else
#    define GC_API_OSCALL GC_API
#  endif
#endif

#ifndef GC_API_PRIV
#  define GC_API_PRIV GC_API
#endif

#ifndef GC_API_PATCHABLE
#  define GC_API_PATCHABLE GC_ATTR_NOINLINE GC_API
#endif

#if defined(THREADS) && !defined(NN_PLATFORM_CTR)
/*
 * Copyright (c) 2017 Ivan Maidanski
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */

/*
 * This is a private collector header which provides an implementation of
 * `libatomic_ops` subset primitives sufficient for the collector assuming
 * that gcc atomic intrinsics are available (and have the correct
 * implementation).  This is enabled by defining `GC_BUILTIN_ATOMIC` macro.
 * Otherwise, `libatomic_ops` library is used to define the primitives.
 */

#ifndef GC_ATOMIC_OPS_H
#define GC_ATOMIC_OPS_H

#ifdef GC_BUILTIN_ATOMIC




#  ifdef __cplusplus
extern "C" {
#  endif

typedef size_t AO_t;

#  ifdef GC_PRIVATE_H /*< i.e. have `GC_INLINE` */
#    define AO_INLINE GC_INLINE
#  else
#    define AO_INLINE static __inline
#  endif

#  if !defined(THREAD_SANITIZER) && !defined(GC_PRIVATE_H)
/* Similar to that in `gcconfig.h` file. */
#    if defined(__has_feature)
#      if __has_feature(thread_sanitizer)
#        define THREAD_SANITIZER
#      endif
#    elif defined(__SANITIZE_THREAD__)
#      define THREAD_SANITIZER
#    endif
#  endif /* !THREAD_SANITIZER && !GC_PRIVATE_H */

typedef unsigned char AO_TS_t;
#  define AO_TS_CLEAR 0
#  define AO_TS_INITIALIZER ((AO_TS_t)AO_TS_CLEAR)
#  if defined(__GCC_ATOMIC_TEST_AND_SET_TRUEVAL) && !defined(CPPCHECK)
#    define AO_TS_SET __GCC_ATOMIC_TEST_AND_SET_TRUEVAL
#  else
#    define AO_TS_SET (AO_TS_t)1 /*< true */
#  endif
#  define AO_CLEAR(p) __atomic_clear(p, __ATOMIC_RELEASE)
#  define AO_test_and_set_acquire(p) \
    (__atomic_test_and_set(p, __ATOMIC_ACQUIRE) ? AO_TS_SET : AO_TS_CLEAR)
#  define AO_HAVE_test_and_set_acquire

#  define AO_compiler_barrier() __atomic_signal_fence(__ATOMIC_SEQ_CST)

#  if defined(THREAD_SANITIZER) && !defined(AO_USE_ATOMIC_THREAD_FENCE)
/*
 * Workaround a compiler warning (reported by gcc-11, at least) that
 * `__atomic_thread_fence` is unsupported with thread sanitizer.
 */
AO_INLINE void
AO_nop_full(void)
{
  volatile AO_TS_t dummy = AO_TS_INITIALIZER;
  (void)__atomic_test_and_set(&dummy, __ATOMIC_SEQ_CST);
}
#  else
#    define AO_nop_full() __atomic_thread_fence(__ATOMIC_SEQ_CST)
#  endif
#  define AO_HAVE_nop_full

#  define AO_fetch_and_add(p, v) __atomic_fetch_add(p, v, __ATOMIC_RELAXED)
#  define AO_HAVE_fetch_and_add
#  define AO_fetch_and_add1(p) AO_fetch_and_add(p, 1)
#  define AO_HAVE_fetch_and_add1
#  define AO_fetch_and_sub1(p) AO_fetch_and_add(p, ~(AO_t)0 /* -1 */)
#  define AO_HAVE_fetch_and_sub1

#  define AO_or(p, v) (void)__atomic_or_fetch(p, v, __ATOMIC_RELAXED)
#  define AO_HAVE_or

#  define AO_load(p) __atomic_load_n(p, __ATOMIC_RELAXED)
#  define AO_HAVE_load
#  define AO_load_acquire(p) __atomic_load_n(p, __ATOMIC_ACQUIRE)
#  define AO_HAVE_load_acquire
/*
 * `AO_load_acquire_read(p)` is not defined as it is unused, but we need
 * its `AO_HAVE_` macro defined.
 */
#  define AO_HAVE_load_acquire_read

#  define AO_store(p, v) __atomic_store_n(p, v, __ATOMIC_RELAXED)
#  define AO_HAVE_store
#  define AO_store_release(p, v) __atomic_store_n(p, v, __ATOMIC_RELEASE)
#  define AO_HAVE_store_release
#  define AO_store_release_write(p, v) AO_store_release(p, v)
#  define AO_HAVE_store_release_write

#  define AO_char_load(p) __atomic_load_n(p, __ATOMIC_RELAXED)
#  define AO_HAVE_char_load
#  define AO_char_store(p, v) __atomic_store_n(p, v, __ATOMIC_RELAXED)
#  define AO_HAVE_char_store
#  define AO_char_fetch_and_add1(p) __atomic_fetch_add(p, 1, __ATOMIC_RELAXED)
#  define AO_HAVE_char_fetch_and_add1

#  ifdef AO_REQUIRE_CAS
AO_INLINE int
AO_compare_and_swap_release(volatile AO_t *p, AO_t ov, AO_t nv)
{
  return (int)__atomic_compare_exchange_n(p, &ov, nv, 0, __ATOMIC_RELEASE,
                                          __ATOMIC_RELAXED /* on fail */);
}
#    define AO_HAVE_compare_and_swap_release
#  endif

#  ifdef __cplusplus
} /* extern "C" */
#  endif

#  ifndef NO_LOCKFREE_AO_OR
/* `__atomic_or_fetch()` is assumed to be lock-free. */
#    define HAVE_LOCKFREE_AO_OR 1
#  endif

#else
/* Fall back to `libatomic_ops`. */
#  include "atomic_ops.h"

/*
 * `AO_compiler_barrier`, `AO_load` and `AO_store` should be defined
 * for all targets; the rest of the primitives are guaranteed to exist
 * only if `AO_REQUIRE_CAS` is defined (or if the corresponding
 * `AO_HAVE_` macro is defined).  i686 and x86_64 targets have
 * `AO_nop_full`, `AO_load_acquire`, `AO_store_release`, at least.
 */
#  if (!defined(AO_HAVE_load) || !defined(AO_HAVE_store)) && !defined(CPPCHECK)
#    error AO_load or AO_store is missing; probably old version of atomic_ops
#  endif

#endif /* !GC_BUILTIN_ATOMIC */

#if defined(GC_BUILTIN_ATOMIC) || defined(__CHERI_PURE_CAPABILITY__)
/*
 * Assume that gcc atomic intrinsics are available (and have correct
 * implementation).  `p` should be of a pointer to `ptr_t` (`char *`) value.
 */
#  define GC_cptr_load(p) __atomic_load_n(p, __ATOMIC_RELAXED)
#  define GC_cptr_load_acquire(p) __atomic_load_n(p, __ATOMIC_ACQUIRE)
#  define GC_cptr_load_acquire_read(p) GC_cptr_load_acquire(p)
#  define GC_cptr_store(p, v) __atomic_store_n(p, v, __ATOMIC_RELAXED)
#  define GC_cptr_store_release(p, v) __atomic_store_n(p, v, __ATOMIC_RELEASE)
#  define GC_cptr_store_release_write(p, v) GC_cptr_store_release(p, v)
#  ifdef AO_REQUIRE_CAS
AO_INLINE int
GC_cptr_compare_and_swap(char *volatile *p, char *ov, char *nv)
{
  return (int)__atomic_compare_exchange_n(p, &ov, nv, 0, __ATOMIC_RELAXED,
                                          __ATOMIC_RELAXED);
}
#  endif
#else
/*
 * Redirect to the `AO_` primitives.  Assume the size of `AO_t` matches
 * that of a pointer.
 */
#  define GC_cptr_load(p) (char *)AO_load((volatile AO_t *)(p))
#  define GC_cptr_load_acquire(p) (char *)AO_load_acquire((volatile AO_t *)(p))
#  define GC_cptr_load_acquire_read(p) \
    (char *)AO_load_acquire_read((volatile AO_t *)(p))
#  define GC_cptr_store(p, v) AO_store((volatile AO_t *)(p), (AO_t)(v))
#  define GC_cptr_store_release(p, v) \
    AO_store_release((volatile AO_t *)(p), (AO_t)(v))
#  define GC_cptr_store_release_write(p, v) \
    AO_store_release_write((volatile AO_t *)(p), (AO_t)(v))
#  ifdef AO_REQUIRE_CAS
#    define GC_cptr_compare_and_swap(p, ov, nv) \
      AO_compare_and_swap((volatile AO_t *)(p), (AO_t)(ov), (AO_t)(nv))
#  endif
#endif /* !GC_BUILTIN_ATOMIC */

#endif /* GC_ATOMIC_OPS_H */

#  ifndef AO_HAVE_compiler_barrier
#    define AO_HAVE_compiler_barrier 1
#  endif
#endif

#ifdef ANY_MSWIN
#  ifndef WIN32_LEAN_AND_MEAN
#    define WIN32_LEAN_AND_MEAN 1
#  endif
#  define NOSERVICE
#  include <windows.h>

/* This is included strictly after the platform `windows.h` file. */
#  include <winbase.h>
#endif /* ANY_MSWIN */

/*
 * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
 * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
 * Copyright (c) 1996-1999 by Silicon Graphics.  All rights reserved.
 * Copyright (c) 1999 by Hewlett-Packard Company. All rights reserved.
 * Copyright (c) 2008-2025 Ivan Maidanski
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */

#ifndef GC_LOCKS_H
#define GC_LOCKS_H

#if !defined(GC_PRIVATE_H) && !defined(CPPCHECK)
#  error gc_locks.h should be included from gc_priv.h
#endif

/*
 * Mutual exclusion between allocator/collector routines.  Needed if
 * there is more than one allocator thread.  Note that `I_HOLD_LOCK`,
 * `I_DONT_HOLD_LOCK` and `I_HOLD_READER_LOCK` macros are used only
 * positively in assertions, and may return `TRUE` in the "do not know"
 * case.
 */

#ifdef THREADS

EXTERN_C_BEGIN

#  if defined(NN_PLATFORM_CTR) || defined(NINTENDO_SWITCH)
extern void GC_lock(void);
extern void GC_unlock(void);
#    define UNCOND_LOCK() GC_lock()
#    define UNCOND_UNLOCK() GC_unlock()
#    ifdef GC_ASSERTIONS
#      define SET_LOCK_HOLDER() (void)0
#    endif
#  endif

#  if (!defined(AO_HAVE_test_and_set_acquire) || defined(GC_WIN32_THREADS) \
       || defined(LINT2) || defined(RTEMS) || defined(SN_TARGET_PS3)       \
       || defined(BASE_ATOMIC_OPS_EMULATED) || defined(USE_RWLOCK))        \
      && defined(GC_PTHREADS)
#    define USE_PTHREAD_LOCKS
#    undef USE_SPIN_LOCK
#    if (defined(GC_WIN32_THREADS) || defined(LINT2) || defined(USE_RWLOCK)) \
        && !defined(NO_PTHREAD_TRYLOCK)
/*
 * `pthread_mutex_trylock` may not win in `GC_lock` on Win32, due to
 * built-in support for spinning first?
 */
#      define NO_PTHREAD_TRYLOCK
#    endif
#  endif

#  if defined(GC_WIN32_THREADS) && !defined(USE_PTHREAD_LOCKS) \
      || defined(GC_PTHREADS)
/* A value which is not equal to `NUMERIC_THREAD_ID(id)` for any thread. */
#    define NO_THREAD ((unsigned long)(-1L))
#    ifdef GC_ASSERTIONS
GC_EXTERN unsigned long GC_lock_holder;
#      define UNSET_LOCK_HOLDER() (void)(GC_lock_holder = NO_THREAD)
#    endif
#  endif /* GC_WIN32_THREADS || GC_PTHREADS */

#  if defined(GC_WIN32_THREADS) && !defined(USE_PTHREAD_LOCKS)
#    ifdef USE_RWLOCK
GC_EXTERN SRWLOCK GC_allocate_ml;
#    else
GC_EXTERN CRITICAL_SECTION GC_allocate_ml;
#    endif
#    ifdef GC_ASSERTIONS
#      define SET_LOCK_HOLDER() (void)(GC_lock_holder = GetCurrentThreadId())
#      define I_HOLD_LOCK() \
        (!GC_need_to_lock || GC_lock_holder == GetCurrentThreadId())
#      ifdef THREAD_SANITIZER
#        define I_DONT_HOLD_LOCK() TRUE /*< conservatively say yes */
#      else
#        define I_DONT_HOLD_LOCK() \
          (!GC_need_to_lock || GC_lock_holder != GetCurrentThreadId())
#      endif
#      ifdef USE_RWLOCK
#        define UNCOND_READER_LOCK()               \
          {                                        \
            GC_ASSERT(I_DONT_HOLD_LOCK());         \
            AcquireSRWLockShared(&GC_allocate_ml); \
          }
#        define UNCOND_READER_UNLOCK()             \
          {                                        \
            GC_ASSERT(I_DONT_HOLD_LOCK());         \
            ReleaseSRWLockShared(&GC_allocate_ml); \
          }
#        define UNCOND_LOCK()                         \
          {                                           \
            GC_ASSERT(I_DONT_HOLD_LOCK());            \
            AcquireSRWLockExclusive(&GC_allocate_ml); \
            SET_LOCK_HOLDER();                        \
          }
#        define UNCOND_UNLOCK()                       \
          {                                           \
            GC_ASSERT(I_HOLD_LOCK());                 \
            UNSET_LOCK_HOLDER();                      \
            ReleaseSRWLockExclusive(&GC_allocate_ml); \
          }
#      else
#        define UNCOND_LOCK()                      \
          {                                        \
            GC_ASSERT(I_DONT_HOLD_LOCK());         \
            EnterCriticalSection(&GC_allocate_ml); \
            SET_LOCK_HOLDER();                     \
          }
#        define UNCOND_UNLOCK()                    \
          {                                        \
            GC_ASSERT(I_HOLD_LOCK());              \
            UNSET_LOCK_HOLDER();                   \
            LeaveCriticalSection(&GC_allocate_ml); \
          }
#      endif
#    else
#      ifdef USE_RWLOCK
#        define UNCOND_READER_LOCK() AcquireSRWLockShared(&GC_allocate_ml)
#        define UNCOND_READER_UNLOCK() ReleaseSRWLockShared(&GC_allocate_ml)
#        define UNCOND_LOCK() AcquireSRWLockExclusive(&GC_allocate_ml)
#        define UNCOND_UNLOCK() ReleaseSRWLockExclusive(&GC_allocate_ml)
#      else
#        define UNCOND_LOCK() EnterCriticalSection(&GC_allocate_ml)
#        define UNCOND_UNLOCK() LeaveCriticalSection(&GC_allocate_ml)
#      endif
#    endif /* !GC_ASSERTIONS */
#  elif defined(GC_PTHREADS)
EXTERN_C_END
#    include <pthread.h>
EXTERN_C_BEGIN
/*
 * POSIX allows `pthread_t` to be a structure type, though it rarely is.
 * Unfortunately, we need to use a `pthread_t` to index a data structure.
 * It also helps if comparisons do not involve a function call.
 * Hence we introduce platform-dependent macros to compare `pthread_t` ids
 * and to map them to integers (of `unsigned long` type).  This mapping
 * does not need to result in different values for each thread, though
 * that should be true as much as possible.
 */
#    if !defined(GC_WIN32_PTHREADS)
#      define NUMERIC_THREAD_ID(id) ((unsigned long)(GC_uintptr_t)(id))
#      define THREAD_EQUAL(id1, id2) ((id1) == (id2))
#      define NUMERIC_THREAD_ID_UNIQUE
#    elif defined(__WINPTHREADS_VERSION_MAJOR) /*< winpthreads */
#      define NUMERIC_THREAD_ID(id) ((unsigned long)(id))
#      define THREAD_EQUAL(id1, id2) ((id1) == (id2))
/* `NUMERIC_THREAD_ID()` is 32-bit and, thus, not unique on Win64. */
#      ifndef _WIN64
#        define NUMERIC_THREAD_ID_UNIQUE
#      endif
#    else /* pthreads-win32 */
#      define NUMERIC_THREAD_ID(id) ((unsigned long)(word)(id.p))
/*
 * The platform on which `pthread_t` is a structure.
 * Using documented internal details of pthreads-win32 library.
 * Faster than `pthread_equal()`.  Should not change with the
 * future versions of pthreads-win32 library.
 */
#      define THREAD_EQUAL(id1, id2) (id1.p == id2.p && id1.x == id2.x)
/*
 * Generic definitions based on `pthread_equal()` always work but will
 * result in poor performance (as `NUMERIC_THREAD_ID()` might give
 * a constant value) and weak assertion checking.
 */
#      undef NUMERIC_THREAD_ID_UNIQUE
#    endif

#    ifdef SN_TARGET_PSP2
EXTERN_C_END
#      include "psp2-support.h"
EXTERN_C_BEGIN
GC_EXTERN WapiMutex GC_allocate_ml_PSP2;
#      define UNCOND_LOCK()                           \
        {                                             \
          int res;                                    \
          GC_ASSERT(I_DONT_HOLD_LOCK());              \
          res = PSP2_MutexLock(&GC_allocate_ml_PSP2); \
          GC_ASSERT(0 == res);                        \
          (void)res;                                  \
          SET_LOCK_HOLDER();                          \
        }
#      define UNCOND_UNLOCK()                           \
        {                                               \
          int res;                                      \
          GC_ASSERT(I_HOLD_LOCK());                     \
          UNSET_LOCK_HOLDER();                          \
          res = PSP2_MutexUnlock(&GC_allocate_ml_PSP2); \
          GC_ASSERT(0 == res);                          \
          (void)res;                                    \
        }

#    elif (!defined(THREAD_LOCAL_ALLOC) || defined(USE_SPIN_LOCK))   \
        && !defined(USE_PTHREAD_LOCKS) && !defined(THREAD_SANITIZER) \
        && !defined(USE_RWLOCK)
/*
 * In the `THREAD_LOCAL_ALLOC` case, the allocator lock tends to
 * be held for long periods, if it is held at all.
 * Thus spinning and sleeping for fixed periods are likely to result
 * in significant wasted time.  We thus rely mostly on queued locks.
 */
#      undef USE_SPIN_LOCK
#      define USE_SPIN_LOCK
GC_INNER void GC_lock(void);
#      ifdef GC_ASSERTIONS
#        define UNCOND_LOCK()                                            \
          {                                                              \
            GC_ASSERT(I_DONT_HOLD_LOCK());                               \
            if (AO_test_and_set_acquire(&GC_allocate_lock) == AO_TS_SET) \
              GC_lock();                                                 \
            SET_LOCK_HOLDER();                                           \
          }
#        define UNCOND_UNLOCK()          \
          {                              \
            GC_ASSERT(I_HOLD_LOCK());    \
            UNSET_LOCK_HOLDER();         \
            AO_CLEAR(&GC_allocate_lock); \
          }
#      else
#        define UNCOND_LOCK()                                            \
          {                                                              \
            if (AO_test_and_set_acquire(&GC_allocate_lock) == AO_TS_SET) \
              GC_lock();                                                 \
          }
#        define UNCOND_UNLOCK() AO_CLEAR(&GC_allocate_lock)
#      endif /* !GC_ASSERTIONS */
#    else
#      ifndef USE_PTHREAD_LOCKS
#        define USE_PTHREAD_LOCKS
#      endif
#    endif /* THREAD_LOCAL_ALLOC || USE_PTHREAD_LOCKS */
#    ifdef USE_PTHREAD_LOCKS
EXTERN_C_END
#      include <pthread.h>
EXTERN_C_BEGIN
#      ifdef GC_ASSERTIONS
GC_INNER void GC_lock(void);
#        define UNCOND_LOCK()              \
          {                                \
            GC_ASSERT(I_DONT_HOLD_LOCK()); \
            GC_lock();                     \
            SET_LOCK_HOLDER();             \
          }
#      endif
#      ifdef USE_RWLOCK
GC_EXTERN pthread_rwlock_t GC_allocate_ml;
#        ifdef GC_ASSERTIONS
#          define UNCOND_READER_LOCK()                      \
            {                                               \
              GC_ASSERT(I_DONT_HOLD_LOCK());                \
              (void)pthread_rwlock_rdlock(&GC_allocate_ml); \
            }
#          define UNCOND_READER_UNLOCK()                    \
            {                                               \
              GC_ASSERT(I_DONT_HOLD_LOCK());                \
              (void)pthread_rwlock_unlock(&GC_allocate_ml); \
            }
#          define UNCOND_UNLOCK()                           \
            {                                               \
              GC_ASSERT(I_HOLD_LOCK());                     \
              UNSET_LOCK_HOLDER();                          \
              (void)pthread_rwlock_unlock(&GC_allocate_ml); \
            }
#        else
#          define UNCOND_READER_LOCK() \
            (void)pthread_rwlock_rdlock(&GC_allocate_ml)
#          define UNCOND_READER_UNLOCK() UNCOND_UNLOCK()
#          define UNCOND_LOCK() (void)pthread_rwlock_wrlock(&GC_allocate_ml)
#          define UNCOND_UNLOCK() (void)pthread_rwlock_unlock(&GC_allocate_ml)
#        endif /* !GC_ASSERTIONS */
#      else
GC_EXTERN pthread_mutex_t GC_allocate_ml;
#        ifdef GC_ASSERTIONS
#          define UNCOND_UNLOCK()                    \
            {                                        \
              GC_ASSERT(I_HOLD_LOCK());              \
              UNSET_LOCK_HOLDER();                   \
              pthread_mutex_unlock(&GC_allocate_ml); \
            }
#        else
#          if defined(NO_PTHREAD_TRYLOCK)
#            define UNCOND_LOCK() pthread_mutex_lock(&GC_allocate_ml)
#          else
GC_INNER void GC_lock(void);
#            define UNCOND_LOCK()                                \
              {                                                  \
                if (pthread_mutex_trylock(&GC_allocate_ml) != 0) \
                  GC_lock();                                     \
              }
#          endif
#          define UNCOND_UNLOCK() pthread_mutex_unlock(&GC_allocate_ml)
#        endif /* !GC_ASSERTIONS */
#      endif
#    endif /* USE_PTHREAD_LOCKS */
#    ifdef GC_ASSERTIONS
/* The allocator lock holder. */
#      define SET_LOCK_HOLDER() \
        (void)(GC_lock_holder = NUMERIC_THREAD_ID(pthread_self()))
#      define I_HOLD_LOCK() \
        (!GC_need_to_lock   \
         || GC_lock_holder == NUMERIC_THREAD_ID(pthread_self()))
#      if !defined(NUMERIC_THREAD_ID_UNIQUE) || defined(THREAD_SANITIZER)
#        define I_DONT_HOLD_LOCK() TRUE /*< conservatively say yes */
#      else
#        define I_DONT_HOLD_LOCK() \
          (!GC_need_to_lock        \
           || GC_lock_holder != NUMERIC_THREAD_ID(pthread_self()))
#      endif
#    endif /* GC_ASSERTIONS */
#    if !defined(GC_WIN32_THREADS)
/*
 * A hint that we are in the collector and holding the allocator lock
 * for an extended period.
 */
GC_EXTERN volatile unsigned char GC_collecting;

#      ifdef AO_HAVE_char_store
#        if defined(GC_ASSERTIONS) && defined(AO_HAVE_char_fetch_and_add1)
/* Ensure `ENTER_GC()` is not used recursively. */
#          define ENTER_GC() GC_ASSERT(!AO_char_fetch_and_add1(&GC_collecting))
#        else
#          define ENTER_GC() AO_char_store(&GC_collecting, TRUE)
#        endif
#        define EXIT_GC() AO_char_store(&GC_collecting, FALSE)
#      else
#        define ENTER_GC() (void)(GC_collecting = TRUE)
#        define EXIT_GC() (void)(GC_collecting = FALSE)
#      endif
#    endif
#  endif /* GC_PTHREADS */
#  if defined(GC_ALWAYS_MULTITHREADED) \
      && (defined(USE_PTHREAD_LOCKS) || defined(USE_SPIN_LOCK))
#    define GC_need_to_lock TRUE
#    define set_need_to_lock() (void)0
#  else
#    if defined(GC_ALWAYS_MULTITHREADED) && !defined(CPPCHECK)
#      error Runtime initialization of the allocator lock is needed!
#    endif
#    undef GC_ALWAYS_MULTITHREADED
#    ifdef THREAD_SANITIZER
/*
 * To workaround TSan false positive (e.g., when `GC_pthread_create()` is
 * called from multiple threads in parallel), do not set `GC_need_to_lock`
 * if it is already set.
 */
#      define set_need_to_lock()                     \
        (void)(*(GC_bool volatile *)&GC_need_to_lock \
                   ? FALSE                           \
                   : (GC_need_to_lock = TRUE))
#    else
#      define set_need_to_lock() (void)(GC_need_to_lock = TRUE)
/* We are multi-threaded now. */
#    endif
#  endif

EXTERN_C_END

#else /* !THREADS */
#  define LOCK() (void)0
#  define UNLOCK() (void)0
#  ifdef GC_ASSERTIONS
/*
 * `I_HOLD_LOCK()` and `I_DONT_HOLD_LOCK()` are used only in positive
 * assertions or to test whether we still need to acquire the allocator
 * lock; `TRUE` works in either case.
 */
#    define I_HOLD_LOCK() TRUE
#    define I_DONT_HOLD_LOCK() TRUE
#  endif
#endif /* !THREADS */

#if defined(UNCOND_LOCK) && !defined(LOCK)
#  if (defined(LINT2) && defined(USE_PTHREAD_LOCKS)) \
      || defined(GC_ALWAYS_MULTITHREADED)
/*
 * Instruct code analysis tools not to care about `GC_need_to_lock`
 * influence to `LOCK`/`UNLOCK` semantic.
 */
#    define LOCK() UNCOND_LOCK()
#    define UNLOCK() UNCOND_UNLOCK()
#    ifdef UNCOND_READER_LOCK
#      define READER_LOCK() UNCOND_READER_LOCK()
#      define READER_UNLOCK() UNCOND_READER_UNLOCK()
#    endif
#  else
/* At least two thread running; need to lock. */
#    define LOCK()           \
      do {                   \
        if (GC_need_to_lock) \
          UNCOND_LOCK();     \
      } while (0)
#    define UNLOCK()         \
      do {                   \
        if (GC_need_to_lock) \
          UNCOND_UNLOCK();   \
      } while (0)
#    ifdef UNCOND_READER_LOCK
#      define READER_LOCK()       \
        do {                      \
          if (GC_need_to_lock)    \
            UNCOND_READER_LOCK(); \
        } while (0)
#      define READER_UNLOCK()       \
        do {                        \
          if (GC_need_to_lock)      \
            UNCOND_READER_UNLOCK(); \
        } while (0)
#    endif
#  endif
#endif /* UNCOND_LOCK && !LOCK */

#ifdef READER_LOCK
#  define HAS_REAL_READER_LOCK
/* TODO: Implement I_HOLD_READER_LOCK, conservatively say yes for now. */
#  define I_HOLD_READER_LOCK() TRUE
#else
#  define READER_LOCK() LOCK()
#  define READER_UNLOCK() UNLOCK()
#  ifdef GC_ASSERTIONS
/*
 * A macro to check that the allocator lock is held at least in the
 * reader mode.
 */
#    define I_HOLD_READER_LOCK() I_HOLD_LOCK()
#  endif
#endif /* !READER_LOCK */

/*
 * A variant of `READER_UNLOCK()` which ensures that data written
 * before the unlock will be visible to the thread which acquires the
 * allocator lock in the exclusive mode.  But according to some `rwlock`
 * documentation: writers synchronize with prior writers and readers.
 */
#define READER_UNLOCK_RELEASE() READER_UNLOCK()

#ifndef ENTER_GC
#  define ENTER_GC()
#  define EXIT_GC()
#endif

#endif /* GC_LOCKS_H */


#ifdef GC_ASSERTIONS
#  define GC_ASSERT(e)                                                   \
    do {                                                                 \
      if (UNLIKELY(!(e))) {                                              \
        GC_err_printf("Assertion failure: %s:%d\n", __FILE__, __LINE__); \
        ABORT("assertion failure");                                      \
      }                                                                  \
    } while (0)
#else
#  define GC_ASSERT(e)
#endif

#include "gc/gc_inline.h"

/*
 * Prevent certain compiler warnings by making a pointer-related cast
 * through a "pointer-sized" numeric type.
 */
#define CAST_THRU_UINTPTR(t, x) ((t)(GC_uintptr_t)(x))

#define CAST_AWAY_VOLATILE_PVOID(p) \
  CAST_THRU_UINTPTR(/* no volatile */ void *, p)

/*
 * Convert an `unsigned` value to a `void` pointer.  Typically used to
 * print a numeric value using "%p" format specifier.  The pointer is not
 * supposed to be dereferenced.
 */
#define NUMERIC_TO_VPTR(v) ((void *)(GC_uintptr_t)(v))

/* Create a `ptr_t` pointer from a number (of `word` type). */
#define MAKE_CPTR(w) ((ptr_t)(GC_uintptr_t)(word)(w))

#define GC_WORD_MAX (~(word)0)

/* Convert given pointer to its address.  Result is of `word` type. */
#ifdef CHERI_PURECAP
#  define ADDR(p) cheri_address_get(p)
#else
#  define ADDR(p) ((word)(GC_uintptr_t)(p))
#endif

#define ADDR_LT(p, q) GC_ADDR_LT(p, q)
#define ADDR_GE(p, q) (!ADDR_LT(p, q))

/*
 * Check whether pointer `p` is in range [`s`, `e_p1`).
 * `p` should not have side effects.
 */
#define ADDR_INSIDE(p, s, e_p1) (ADDR_GE(p, s) && ADDR_LT(p, e_p1))

/* Handy definitions to compare and adjust pointers in a stack. */
#ifdef STACK_GROWS_UP
#  define HOTTER_THAN(p, q) ADDR_LT(q, p) /*< inverse */
#  define MAKE_COOLER(p, d) \
    (void)((p) -= ADDR(p) > (word)((d) * sizeof(*(p))) ? (d) : 0)
#  define MAKE_HOTTER(p, d) (void)((p) += (d))
#else
#  define HOTTER_THAN(p, q) ADDR_LT(p, q)
#  define MAKE_COOLER(p, d) \
    (void)((p)              \
           += ADDR(p) <= (word)(GC_WORD_MAX - (d) * sizeof(*(p))) ? (d) : 0)
#  define MAKE_HOTTER(p, d) (void)((p) -= (d))
#endif /* !STACK_GROWS_UP */

/* Clear/set flags (given by a mask) in a pointer. */
#define CPTR_CLEAR_FLAGS(p, mask) \
  (ptr_t)((GC_uintptr_t)(p) & ~(GC_uintptr_t)(word)(mask))
#define CPTR_SET_FLAGS(p, mask) (ptr_t)((GC_uintptr_t)(p) | (word)(mask))

/* Easily changeable parameters are below. */

#ifdef ALL_INTERIOR_POINTERS
/*
 * Forces all pointers into the interior of an object to be considered valid.
 * Also causes the sizes of all objects to be inflated by at least one byte.
 * This should suffice to guarantee that in the presence of a compiler that
 * does not perform garbage-collector-unsafe optimizations, all portable,
 * strictly ANSI conforming C programs should be safely usable with `malloc`
 * replaced by `GC_malloc` and `free` calls removed.  There are several
 * disadvantages:
 *   1. There are probably no interesting, portable, strictly ANSI-conforming
 *      C programs;
 *   2. This option makes it hard for the collector to allocate space that is
 *      not "pointed to" by integers, etc.  (Under SunOS 4.x with a statically
 *      linked `libc`, we empirically observed that it would be difficult to
 *      allocate individual objects larger than 100 KB; even if only smaller
 *      objects are allocated, more swap space is likely to be needed;
 *      fortunately, much of this will never be touched.)
 *
 * If you can easily avoid using this option, do.  If not, try to keep
 * individual objects small.  This is really controlled at startup, through
 * `GC_all_interior_pointers` variable.
 */
#endif

EXTERN_C_BEGIN

#ifndef GC_NO_FINALIZATION
/*
 * If `GC_finalize_on_demand` is not set, invoke eligible finalizers.
 * Otherwise: call `(*GC_finalizer_notifier)()` if there are finalizers to
 * be run, and we have not called this procedure yet this collection cycle.
 */
GC_INNER void GC_notify_or_invoke_finalizers(void);

/*
 * Perform all indicated finalization actions on unmarked objects.
 * Unreachable finalizable objects are enqueued for processing by
 * `GC_invoke_finalizers()`.  Cause disappearing links to disappear
 * and unreachable objects to be enqueued for finalization.
 * Invoked with the allocator lock held but the world is running.
 */
GC_INNER void GC_finalize(void);

#  ifndef GC_TOGGLE_REFS_NOT_NEEDED
/* Process the "toggle-refs" before GC starts. */
GC_INNER void GC_process_togglerefs(void);
#  endif
#  ifndef SMALL_CONFIG
GC_INNER void GC_print_finalization_stats(void);
#  endif
#else
#  define GC_notify_or_invoke_finalizers() (void)0
#endif /* GC_NO_FINALIZATION */

#if !defined(DONT_ADD_BYTE_AT_END)
#  ifdef LINT2
/*
 * Explicitly instruct the code analysis tool that `GC_all_interior_pointers`
 * is assumed to have only value of 0 or 1.
 */
#    define EXTRA_BYTES ((size_t)(GC_all_interior_pointers ? 1 : 0))
#  else
#    define EXTRA_BYTES ((size_t)GC_all_interior_pointers)
#  endif
#  define MAX_EXTRA_BYTES 1
#else
#  define EXTRA_BYTES 0
#  define MAX_EXTRA_BYTES 0
#endif

#ifdef LARGE_CONFIG
#  define MINHINCR 64
#  define MAXHINCR 4096
#else
/*
 * Minimum heap increment, in blocks of `HBLKSIZE`.  Note: must be multiple
 * of largest page size.
 */
#  define MINHINCR 16

/* Maximum heap increment, in blocks. */
#  define MAXHINCR 2048
#endif /* !LARGE_CONFIG */

/* Stack saving for debugging. */

#ifdef NEED_CALLINFO
struct callinfo {
  GC_return_addr_t ci_pc; /*< `pc` of caller, not callee */
#  if NARGS > 0
  GC_hidden_pointer ci_arg[NARGS]; /*< hide to avoid retention */
#  endif
#  if (NFRAMES * (NARGS + 1)) % 2 == 1
  /* Likely alignment problem. */
  ptr_t ci_dummy;
#  endif
};

#  ifdef SAVE_CALL_CHAIN
/*
 * Fill in the `pc` and argument information for up to `NFRAMES` of
 * my callers.  Ignore my frame and my callers frame.
 */
GC_INNER void GC_save_callers(struct callinfo info[NFRAMES]);
#  endif

/* Print `info` to `stderr`.  We do not hold the allocator lock. */
GC_INNER void GC_print_callers(struct callinfo info[NFRAMES]);
#endif /* NEED_CALLINFO */

EXTERN_C_END

/*
 * Macros to ensure same formatting of C array/struct/union initializer
 * across multiple versions of clang-format.
 */
#define C_INITIALIZER_BEGIN {
#define C_INITIALIZER_END }

/* OS interface routines. */

#ifndef NO_CLOCK
#  ifdef BSD_TIME
#    undef CLOCK_TYPE
#    undef GET_TIME
#    undef MS_TIME_DIFF
#    define CLOCK_TYPE struct timeval
#    define CLOCK_TYPE_INITIALIZER C_INITIALIZER_BEGIN 0, 0 C_INITIALIZER_END
#    define GET_TIME(x)                  \
      do {                               \
        struct rusage rusage;            \
        getrusage(RUSAGE_SELF, &rusage); \
        x = rusage.ru_utime;             \
      } while (0)

/*
 * Compute time difference.  `a` time is expected to be not earlier
 * than `b` one; the result has `unsigned long` type.
 */
#    define MS_TIME_DIFF(a, b)                                            \
      ((unsigned long)((long)(a.tv_sec - b.tv_sec) * 1000                 \
                       + (long)(a.tv_usec - b.tv_usec) / 1000             \
                       - (a.tv_usec < b.tv_usec                           \
                                  && (long)(a.tv_usec - b.tv_usec) % 1000 \
                                         != 0                             \
                              ? 1                                         \
                              : 0)))

/*
 * The nanosecond part of the time difference.  The total time difference
 * could be computed as:
 * `MS_TIME_DIFF(a, b) * 1000000 + NS_FRAC_TIME_DIFF(a, b)`.
 */
#    define NS_FRAC_TIME_DIFF(a, b)                                          \
      ((unsigned long)((a.tv_usec < b.tv_usec                                \
                                && (long)(a.tv_usec - b.tv_usec) % 1000 != 0 \
                            ? 1000L                                          \
                            : 0)                                             \
                       + (long)(a.tv_usec - b.tv_usec) % 1000)               \
       * 1000)

#  elif defined(MSWIN32) || defined(MSWINCE) || defined(WINXP_USE_PERF_COUNTER)
#    if defined(MSWINRT_FLAVOR) || defined(WINXP_USE_PERF_COUNTER)
#      define CLOCK_TYPE ULONGLONG
/*
 * Note: two standalone `if` statements below are used to avoid MS VC
 * false warning (FP) about potentially uninitialized `tc` variable.
 */
#      define GET_TIME(x)                                              \
        do {                                                           \
          LARGE_INTEGER freq, tc;                                      \
          if (!QueryPerformanceFrequency(&freq))                       \
            ABORT("QueryPerformanceFrequency requires WinXP+");        \
          if (!QueryPerformanceCounter(&tc))                           \
            ABORT("QueryPerformanceCounter failed");                   \
          x = (CLOCK_TYPE)((double)tc.QuadPart / freq.QuadPart * 1e9); \
          /* TODO: Call QueryPerformanceFrequency once at GC init. */  \
        } while (0)
#      define MS_TIME_DIFF(a, b) ((unsigned long)(((a) - (b)) / 1000000UL))
#      define NS_FRAC_TIME_DIFF(a, b) \
        ((unsigned long)(((a) - (b)) % 1000000UL))
#    else
#      define CLOCK_TYPE DWORD
#      define GET_TIME(x) (void)(x = GetTickCount())
#      define MS_TIME_DIFF(a, b) ((unsigned long)((a) - (b)))
#      define NS_FRAC_TIME_DIFF(a, b) 0UL
#    endif /* !WINXP_USE_PERF_COUNTER */

#  elif defined(NN_PLATFORM_CTR)
#    define CLOCK_TYPE long long
EXTERN_C_BEGIN
CLOCK_TYPE n3ds_get_system_tick(void);
CLOCK_TYPE n3ds_convert_tick_to_ms(CLOCK_TYPE tick);
EXTERN_C_END
#    define GET_TIME(x) (void)(x = n3ds_get_system_tick())
#    define MS_TIME_DIFF(a, b) \
      ((unsigned long)n3ds_convert_tick_to_ms((a) - (b)))
/* TODO: Implement NS_FRAC_TIME_DIFF(). */
#    define NS_FRAC_TIME_DIFF(a, b) 0UL

#  elif defined(HAVE_CLOCK_GETTIME)
#    include <time.h>
#    define CLOCK_TYPE struct timespec
#    define CLOCK_TYPE_INITIALIZER C_INITIALIZER_BEGIN 0, 0 C_INITIALIZER_END
#    if defined(_POSIX_MONOTONIC_CLOCK) && !defined(NINTENDO_SWITCH)
#      define GET_TIME(x)                               \
        do {                                            \
          if (clock_gettime(CLOCK_MONOTONIC, &x) == -1) \
            ABORT("clock_gettime failed");              \
        } while (0)
#    else
#      define GET_TIME(x)                              \
        do {                                           \
          if (clock_gettime(CLOCK_REALTIME, &x) == -1) \
            ABORT("clock_gettime failed");             \
        } while (0)
#    endif
#    define MS_TIME_DIFF(a, b)                                            \
      /* `a.tv_nsec - b.tv_nsec` is in range -1e9 to 1e9, exclusively. */ \
      ((unsigned long)((a).tv_nsec + (1000000L * 1000 - (b).tv_nsec))     \
           / 1000000UL                                                    \
       + ((unsigned long)((a).tv_sec - (b).tv_sec) * 1000UL) - 1000UL)
#    define NS_FRAC_TIME_DIFF(a, b)                                   \
      ((unsigned long)((a).tv_nsec + (1000000L * 1000 - (b).tv_nsec)) \
       % 1000000UL)

#  else /* !BSD_TIME && !LINUX && !NN_PLATFORM_CTR && !MSWIN32 */
#    include <time.h>
#    if defined(FREEBSD) && !defined(CLOCKS_PER_SEC)
#      include <machine/limits.h>
#      define CLOCKS_PER_SEC CLK_TCK
#    endif
#    if !defined(CLOCKS_PER_SEC)
/*
 * This is technically a bug in the implementation.
 * ANSI requires that `CLOCKS_PER_SEC` be defined.  But at least under
 * SunOS 4.1.1, it is not.  Also note that the combination of ANSI C
 * and POSIX is incredibly gross here.  The type `clock_t` is used by
 * both `clock()` and `times()`.  But on some machines these use
 * different notions of a clock tick, `CLOCKS_PER_SEC` seems to apply
 * only to `clock()`.  Hence we use it here.  On many machines,
 * including SunOS, `clock()` actually uses units of microseconds (that
 * are not really clock ticks).
 */
#      define CLOCKS_PER_SEC 1000000
#    endif
#    define CLOCK_TYPE clock_t
#    define GET_TIME(x) (void)(x = clock())
#    define MS_TIME_DIFF(a, b)                            \
      (CLOCKS_PER_SEC % 1000 == 0                         \
           ? (unsigned long)((a) - (b))                   \
                 / (unsigned long)(CLOCKS_PER_SEC / 1000) \
           : ((unsigned long)((a) - (b)) * 1000)          \
                 / (unsigned long)CLOCKS_PER_SEC)
/*
 * Avoid using `double` type since some targets (like ARM) might
 * require `-lm` option for `double`-to-`long` conversion.
 */
#    define NS_FRAC_TIME_DIFF(a, b)                                         \
      (CLOCKS_PER_SEC <= 1000                                               \
           ? 0UL                                                            \
           : (unsigned long)(CLOCKS_PER_SEC <= (clock_t)1000000UL           \
                                 ? (((a) - (b))                             \
                                    * ((clock_t)1000000UL / CLOCKS_PER_SEC) \
                                    % 1000)                                 \
                                       * 1000                               \
                                 : (CLOCKS_PER_SEC                          \
                                            <= (clock_t)1000000UL * 1000    \
                                        ? ((a) - (b))                       \
                                              * ((clock_t)1000000UL * 1000  \
                                                 / CLOCKS_PER_SEC)          \
                                        : (((a) - (b)) * (clock_t)1000000UL \
                                           * 1000)                          \
                                              / CLOCKS_PER_SEC)             \
                                       % (clock_t)1000000UL))
#  endif /* !BSD_TIME && !MSWIN32 */
#  ifndef CLOCK_TYPE_INITIALIZER
/*
 * This is used to initialize `CLOCK_TYPE` variables (to some value)
 * to avoid "variable might be uninitialized" compiler warnings.
 */
#    define CLOCK_TYPE_INITIALIZER 0
#  endif
#endif /* !NO_CLOCK */

/* We use `bzero()` and `bcopy()` internally.  They may not be available. */
#if defined(M68K) && defined(NEXT) || defined(VAX)
#  define BCOPY_EXISTS
#elif defined(DARWIN)
#  include <string.h>
#  define BCOPY_EXISTS
#endif

#if !defined(BCOPY_EXISTS) || defined(CPPCHECK)
#  include <string.h>
#  define BCOPY(x, y, n) memcpy(y, x, (size_t)(n))
#  define BZERO(x, n) memset(x, 0, (size_t)(n))
#else
#  define BCOPY(x, y, n) bcopy((void *)(x), (void *)(y), (size_t)(n))
#  define BZERO(x, n) bzero((void *)(x), (size_t)(n))
#endif

EXTERN_C_BEGIN

#if defined(CPPCHECK) && defined(ANY_MSWIN)
#  undef TEXT
#  ifdef UNICODE
#    define TEXT(s) L##s
#  else
#    define TEXT(s) s
#  endif
#endif /* CPPCHECK && ANY_MSWIN */

/* Stop and restart mutator threads. */
#if defined(NN_PLATFORM_CTR) || defined(NINTENDO_SWITCH) \
    || defined(GC_WIN32_THREADS) || defined(GC_PTHREADS)
GC_INNER void GC_stop_world(void);
GC_INNER void GC_start_world(void);
#  define STOP_WORLD() GC_stop_world()
#  define START_WORLD() GC_start_world()
#else
/* Just do a sanity check: we are not inside `GC_do_blocking()`. */
#  define STOP_WORLD() GC_ASSERT(GC_blocked_sp == NULL)
#  define START_WORLD()
#endif

/* Abandon ship. */
#ifdef SMALL_CONFIG
#  define GC_on_abort(msg) (void)0 /*< be silent on abort */
#else
GC_API_PRIV GC_abort_func GC_on_abort;
#endif
#if defined(CPPCHECK)
#  define ABORT(msg)    \
    {                   \
      GC_on_abort(msg); \
      abort();          \
    }
#else
#  if defined(MSWIN_XBOX1) && !defined(DebugBreak)
#    define DebugBreak() __debugbreak()
#  elif defined(MSWINCE) && !defined(DebugBreak) \
      && (!defined(UNDER_CE) || (defined(__MINGW32CE__) && !defined(ARM32)))
/*
 * This simplifies linking for WinCE (and, probably, does not
 * hurt debugging much); use `-D DebugBreak=DebugBreak` to override
 * this behavior if really needed.  This is also a workaround for
 * x86mingw32ce toolchain (if it is still declaring `DebugBreak()`
 * instead of defining it as a macro).
 */
#    define DebugBreak() _exit(-1) /*< there is no `abort()` in WinCE */
#  endif
#  if defined(MSWIN32) && (defined(NO_DEBUGGING) || defined(LINT2))
/*
 * A more user-friendly abort after showing fatal message.
 * Exit on error without running "at-exit" callbacks.
 */
#    define ABORT(msg) (GC_on_abort(msg), _exit(-1))
#  elif defined(MSWINCE) && defined(NO_DEBUGGING)
#    define ABORT(msg) (GC_on_abort(msg), ExitProcess(-1))
#  elif defined(MSWIN32) || defined(MSWINCE)
#    if defined(_CrtDbgBreak) && defined(_DEBUG) && defined(_MSC_VER)
#      define ABORT(msg)                          \
        {                                         \
          GC_on_abort(msg);                       \
          _CrtDbgBreak() /*< `__debugbreak()` */; \
        }
#    else
#      define ABORT(msg)    \
        {                   \
          GC_on_abort(msg); \
          DebugBreak();     \
        }
/*
 * Note: on a WinCE box, this could be silently ignored (i.e., the program
 * is not aborted); `DebugBreak()` is a statement in some toolchains.
 */
#    endif
#  else /* !MSWIN32 */
#    define ABORT(msg) (GC_on_abort(msg), abort())
#  endif
#endif /* !CPPCHECK */

/*
 * For the abort message with 1 .. 3 arguments.  `C_msg` and `C_fmt`
 * should be literals.  `C_msg` should not contain format specifiers.
 * Arguments should match their format specifiers.
 */
#define ABORT_ARG1(C_msg, C_fmt, arg1)               \
  MACRO_BLKSTMT_BEGIN                                \
  GC_ERRINFO_PRINTF(C_msg /* + */ C_fmt "\n", arg1); \
  ABORT(C_msg);                                      \
  MACRO_BLKSTMT_END
#define ABORT_ARG2(C_msg, C_fmt, arg1, arg2)               \
  MACRO_BLKSTMT_BEGIN                                      \
  GC_ERRINFO_PRINTF(C_msg /* + */ C_fmt "\n", arg1, arg2); \
  ABORT(C_msg);                                            \
  MACRO_BLKSTMT_END
#define ABORT_ARG3(C_msg, C_fmt, arg1, arg2, arg3)               \
  MACRO_BLKSTMT_BEGIN                                            \
  GC_ERRINFO_PRINTF(C_msg /* + */ C_fmt "\n", arg1, arg2, arg3); \
  ABORT(C_msg);                                                  \
  MACRO_BLKSTMT_END

/*
 * Same as `ABORT` but does not have a `noreturn` attribute.
 * I.e. `ABORT` on a dummy condition (which is always true).
 */
#define ABORT_RET(msg)                                                \
  if ((GC_funcptr_uint)GC_current_warn_proc == ~(GC_funcptr_uint)0) { \
  } else                                                              \
    ABORT(msg)

/* Exit process abnormally, but without making a mess (e.g. out of memory). */
#define EXIT() (GC_on_abort(NULL), exit(1 /* `EXIT_FAILURE` */))

/*
 * Print warning message, e.g. almost out of memory.  The argument (if any)
 * format specifier should be: "%s", "%p", "%"`WARN_PRIdPTR` or
 * "%"`WARN_PRIuPTR`.
 */
#define WARN(msg, arg) \
  GC_current_warn_proc("GC Warning: " msg, (GC_uintptr_t)(arg))
GC_EXTERN GC_warn_proc GC_current_warn_proc;

/*
 * Print format type macro for decimal `GC_signed_word` value passed to
 * `WARN()`.  This could be redefined for Win64 or LLP64, but typically
 * should not be done as the `WARN` format string is, possibly,
 * processed on the client side, so non-standard print type modifiers
 * (like MS "I64d") should be avoided here if possible.
 * TODO: Assuming `sizeof(void *)` is equal to `sizeof(long)` or this
 * is a little-endian machine.
 */
#ifndef WARN_PRIdPTR
#  define WARN_PRIdPTR "ld"
#  define WARN_PRIuPTR "lu"
#endif

/*
 * A tagging macro (for a code static analyzer) to indicate that the
 * string obtained from an untrusted source (e.g., `argv[]`, `getenv`)
 * is safe to use in a vulnerable operation (e.g., `open`, `exec`).
 */
#define TRUSTED_STRING(s) COVERT_DATAFLOW_P(s)

#ifdef GC_READ_ENV_FILE
/*
 * This routine scans `GC_envfile_content` for the specified environment
 * variable (and returns its value if found).
 */
GC_INNER char *GC_envfile_getenv(const char *name);

/* Get the process environment entry. */
#  define GETENV(name) GC_envfile_getenv(name)
#elif defined(NO_GETENV) && !defined(CPPCHECK)
#  define GETENV(name) NULL
#elif defined(EMPTY_GETENV_RESULTS) && !defined(CPPCHECK)
/* Workaround for a reputed Wine bug. */
GC_INLINE char *
fixed_getenv(const char *name)
{
  char *value = getenv(name);
  return value != NULL && *value != '\0' ? value : NULL;
}
#  define GETENV(name) fixed_getenv(name)
#else
#  define GETENV(name) getenv(name)
#endif

EXTERN_C_END

#if defined(DARWIN)
#  include <mach/thread_status.h>
#  ifndef MAC_OS_X_VERSION_MAX_ALLOWED
/* Include this header just to import the above macro. */
#    include <AvailabilityMacros.h>
#  endif
#  if defined(POWERPC)
#    if CPP_WORDSZ == 32
#      define GC_THREAD_STATE_T ppc_thread_state_t
#    else
#      define GC_THREAD_STATE_T ppc_thread_state64_t
#      define GC_MACH_THREAD_STATE PPC_THREAD_STATE64
#      define GC_MACH_THREAD_STATE_COUNT PPC_THREAD_STATE64_COUNT
#    endif
#  elif defined(I386) || defined(X86_64)
#    if CPP_WORDSZ == 32
#      if defined(i386_THREAD_STATE_COUNT) \
          && !defined(x86_THREAD_STATE32_COUNT)
/* Use old naming convention for i686. */
#        define GC_THREAD_STATE_T i386_thread_state_t
#        define GC_MACH_THREAD_STATE i386_THREAD_STATE
#        define GC_MACH_THREAD_STATE_COUNT i386_THREAD_STATE_COUNT
#      else
#        define GC_THREAD_STATE_T x86_thread_state32_t
#        define GC_MACH_THREAD_STATE x86_THREAD_STATE32
#        define GC_MACH_THREAD_STATE_COUNT x86_THREAD_STATE32_COUNT
#      endif
#    else
#      define GC_THREAD_STATE_T x86_thread_state64_t
#      define GC_MACH_THREAD_STATE x86_THREAD_STATE64
#      define GC_MACH_THREAD_STATE_COUNT x86_THREAD_STATE64_COUNT
#    endif
#  elif defined(ARM32) && defined(ARM_UNIFIED_THREAD_STATE) \
      && !defined(CPPCHECK)
#    define GC_THREAD_STATE_T arm_unified_thread_state_t
#    define GC_MACH_THREAD_STATE ARM_UNIFIED_THREAD_STATE
#    define GC_MACH_THREAD_STATE_COUNT ARM_UNIFIED_THREAD_STATE_COUNT
#  elif defined(ARM32)
#    define GC_THREAD_STATE_T arm_thread_state_t
#    ifdef ARM_MACHINE_THREAD_STATE_COUNT
#      define GC_MACH_THREAD_STATE ARM_MACHINE_THREAD_STATE
#      define GC_MACH_THREAD_STATE_COUNT ARM_MACHINE_THREAD_STATE_COUNT
#    endif
#  elif defined(AARCH64)
#    define GC_THREAD_STATE_T arm_thread_state64_t
#    define GC_MACH_THREAD_STATE ARM_THREAD_STATE64
#    define GC_MACH_THREAD_STATE_COUNT ARM_THREAD_STATE64_COUNT
#  elif !defined(CPPCHECK)
#    error define GC_THREAD_STATE_T
#  endif
#  ifndef GC_MACH_THREAD_STATE
#    define GC_MACH_THREAD_STATE MACHINE_THREAD_STATE
#    define GC_MACH_THREAD_STATE_COUNT MACHINE_THREAD_STATE_COUNT
#  endif

/*
 * Try to work out the right way to access thread state structure members.
 * The structure has different definition in different Darwin versions.
 * This now defaults to the (older) names without `__`, thus hopefully
 * not breaking any existing `Makefile.direct` builds.
 */
#  if __DARWIN_UNIX03
#    define THREAD_FLD_NAME(x) __##x
#  else
#    define THREAD_FLD_NAME(x) x
#  endif
#  if defined(ARM32) && defined(ARM_UNIFIED_THREAD_STATE)
#    define THREAD_FLD(x) ts_32.THREAD_FLD_NAME(x)
#  else
#    define THREAD_FLD(x) THREAD_FLD_NAME(x)
#  endif
#endif /* DARWIN */

#ifndef WASI
#  include <setjmp.h>
#endif

#include <stdio.h>

#if defined(CAN_HANDLE_FORK) && defined(GC_PTHREADS)
#  include <pthread.h> /*< for `pthread_t` */
#endif

#if __STDC_VERSION__ >= 201112L
#  include <assert.h> /*< for `static_assert` */
#endif

EXTERN_C_BEGIN

/* Definitions depending on `word` size. */

#define modWORDSZ(n) ((n) & (CPP_WORDSZ - 1)) /*< `n mod size_of_word` */
#define divWORDSZ(n) ((n) / CPP_WORDSZ)

#define SIGNB ((word)1 << (CPP_WORDSZ - 1))
#define SIZET_SIGNB (GC_SIZE_MAX ^ (GC_SIZE_MAX >> 1))

#if CPP_PTRSZ / 8 != ALIGNMENT
#  define UNALIGNED_PTRS
#endif

#define BYTES_TO_GRANULES(lb) ((lb) / GC_GRANULE_BYTES)
#define GRANULES_TO_BYTES(lg) (GC_GRANULE_BYTES * (lg))
#define BYTES_TO_PTRS(lb) ((lb) / sizeof(ptr_t))
#define PTRS_TO_BYTES(lpw) ((lpw) * sizeof(ptr_t))
#define GRANULES_TO_PTRS(lg) (GC_GRANULE_PTRS * (lg))

/*
 * Convert size in bytes to that in pointers rounding up (but not adding
 * extra byte at end).
 */
#define BYTES_TO_PTRS_ROUNDUP(lb) BYTES_TO_PTRS((lb) + sizeof(ptr_t) - 1)

/* Size parameters. */

/*
 * Heap block size, in bytes.  Should be a power of two.
 * Incremental collection with `MPROTECT_VDB` currently requires the
 * page size to be a multiple of `HBLKSIZE`.  Since most modern
 * architectures support variable page sizes down to 4 KB, and i686 and
 * x86_64 are generally 4 KB, we now default to 4 KB, except for:
 *   - Alpha: seems to be used with 8 KB pages;
 *   - `SMALL_CONFIG`: want less block-level fragmentation.
 */
#ifndef HBLKSIZE
#  if defined(SMALL_CONFIG) && !defined(LARGE_CONFIG)
#    define CPP_LOG_HBLKSIZE 10
#  elif defined(ALPHA)
#    define CPP_LOG_HBLKSIZE 13
#  else
#    define CPP_LOG_HBLKSIZE 12
#  endif
#else
#  if HBLKSIZE == 512
#    define CPP_LOG_HBLKSIZE 9
#  elif HBLKSIZE == 1024
#    define CPP_LOG_HBLKSIZE 10
#  elif HBLKSIZE == 2048
#    define CPP_LOG_HBLKSIZE 11
#  elif HBLKSIZE == 4096
#    define CPP_LOG_HBLKSIZE 12
#  elif HBLKSIZE == 8192
#    define CPP_LOG_HBLKSIZE 13
#  elif HBLKSIZE == 16384
#    define CPP_LOG_HBLKSIZE 14
#  elif HBLKSIZE == 32768
#    define CPP_LOG_HBLKSIZE 15
#  elif HBLKSIZE == 65536
#    define CPP_LOG_HBLKSIZE 16
#  elif !defined(CPPCHECK)
#    error Bad HBLKSIZE value
#  endif
#  undef HBLKSIZE
#endif

#define LOG_HBLKSIZE ((size_t)CPP_LOG_HBLKSIZE)
#define HBLKSIZE ((size_t)1 << CPP_LOG_HBLKSIZE)

#define GC_SQRT_SIZE_MAX ((((size_t)1) << (sizeof(size_t) * 8 / 2)) - 1)

/*
 * Maximum size of objects supported by free list (larger objects are
 * allocated directly with `GC_alloc_large()`, by rounding to the next
 * multiple of `HBLKSIZE`).
 */
#define MAXOBJBYTES (HBLKSIZE >> 1)
#define MAXOBJGRANULES BYTES_TO_GRANULES(MAXOBJBYTES)

#define divHBLKSZ(n) ((n) >> LOG_HBLKSIZE)

/*
 * Equivalent to subtracting one `hblk` pointer from another.  We do it
 * this way because a compiler should find it hard to use an integer
 * division instead of a shift.  The bundled SunOS 4.1 otherwise sometimes
 * pessimizes the subtraction to involve a call to `.div`.
 */
#define HBLK_PTR_DIFF(p, q) divHBLKSZ((ptr_t)p - (ptr_t)q)

#define modHBLKSZ(n) ((n) & (HBLKSIZE - 1))

#define HBLKPTR(objptr) \
  ((struct hblk *)PTR_ALIGN_DOWN((ptr_t)(objptr), HBLKSIZE))
#define HBLKDISPL(objptr) modHBLKSZ((size_t)ADDR(objptr))

/* Same as `HBLKPTR` but points to the first block in the page. */
#define HBLK_PAGE_ALIGNED(objptr) \
  ((struct hblk *)PTR_ALIGN_DOWN((ptr_t)(objptr), GC_page_size))

/* Round up allocation size (in bytes) to a multiple of a granule. */
#define ROUNDUP_GRANULE_SIZE(lb) /*< `lb` should have no side-effect */ \
  (SIZET_SAT_ADD(lb, GC_GRANULE_BYTES - 1) & ~(size_t)(GC_GRANULE_BYTES - 1))

/*
 * Round up byte allocation request (after adding `EXTRA_BYTES`) to
 * a multiple of a granule, then convert it to granules.
 */
#define ALLOC_REQUEST_GRANS(lb) /*< `lb` should have no side-effect */ \
  BYTES_TO_GRANULES(SIZET_SAT_ADD(lb, GC_GRANULE_BYTES - 1 + EXTRA_BYTES))

#if MAX_EXTRA_BYTES == 0
#  define ADD_EXTRA_BYTES(lb) (lb)
#  define SMALL_OBJ(lb) LIKELY((lb) <= MAXOBJBYTES)
#else
#  define ADD_EXTRA_BYTES(lb) /*< `lb` should have no side-effect */ \
    SIZET_SAT_ADD(lb, EXTRA_BYTES)

/*
 * This really just tests that `lb` is not greater than
 * `MAXOBJBYTES - EXTRA_BYTES`, but we try to avoid looking up `EXTRA_BYTES`.
 */
#  define SMALL_OBJ(lb) /*< `lb` should have no side-effect */ \
    (LIKELY((lb) <= MAXOBJBYTES - MAX_EXTRA_BYTES)             \
     || (lb) <= MAXOBJBYTES - EXTRA_BYTES)
#endif

/*
 * Hash table representation of sets of pages.  Implements a map from
 * `HBLKSIZE`-aligned chunks of the address space to one bit each.
 * This assumes it is OK to spuriously set bits, e.g. because multiple
 * addresses are represented by a single location.  Used by
 * black-listing code, and perhaps by dirty bit maintenance code.
 */
#ifndef LOG_PHT_ENTRIES
#  ifdef LARGE_CONFIG
#    if CPP_WORDSZ == 32
/*
 * Collisions are impossible (because of a 4 GB space limit).
 * Each table takes 128 KB, some of which may never be touched.
 */
#      define LOG_PHT_ENTRIES 20
#    else
/*
 * Collisions likely at 2M blocks, which is greater than 8 GB.
 * Each table takes 256 KB, some of which may never be touched.
 */
#      define LOG_PHT_ENTRIES 21
#    endif
#  elif !defined(SMALL_CONFIG)
/*
 * Collisions are likely if heap grows to more than 256K blocks,
 * which is greater than 1 GB.  Each hash table occupies 32 KB.
 * Even for somewhat smaller heaps, say half of that, collisions may
 * be an issue because we blacklist addresses outside the heap.
 */
#    define LOG_PHT_ENTRIES 18
#  else
/*
 * Collisions are likely if heap grows to more than 32K blocks,
 * which is 128 MB.  Each hash table occupies 4 KB.
 */
#    define LOG_PHT_ENTRIES 15
#  endif
#endif /* !LOG_PHT_ENTRIES */

#define PHT_ENTRIES (1 << LOG_PHT_ENTRIES)
#define PHT_SIZE (PHT_ENTRIES > CPP_WORDSZ ? PHT_ENTRIES / CPP_WORDSZ : 1)
typedef word page_hash_table[PHT_SIZE];

#define PHT_HASH(p) ((size_t)((ADDR(p) >> LOG_HBLKSIZE) & (PHT_ENTRIES - 1)))

#define get_pht_entry_from_index(bl, index) \
  (((bl)[divWORDSZ(index)] >> modWORDSZ(index)) & 1)
#define set_pht_entry_from_index(bl, index) \
  (void)((bl)[divWORDSZ(index)] |= (word)1 << modWORDSZ(index))

#if defined(THREADS) && defined(AO_HAVE_or)
/*
 * And, one more variant for `GC_add_to_black_list_normal` and
 *`GC_add_to_black_list_stack` (invoked indirectly by `GC_do_local_mark()`)
 * and `async_set_pht_entry_from_index()` (invoked by `GC_dirty()` or the
 * write fault handler).
 */
#  define set_pht_entry_from_index_concurrent(bl, index) \
    AO_or((volatile AO_t *)&(bl)[divWORDSZ(index)],      \
          (AO_t)1 << modWORDSZ(index))
#  ifdef MPROTECT_VDB
#    define set_pht_entry_from_index_concurrent_volatile(bl, index) \
      set_pht_entry_from_index_concurrent(bl, index)
#  endif
#else
#  define set_pht_entry_from_index_concurrent(bl, index) \
    set_pht_entry_from_index(bl, index)
#  ifdef MPROTECT_VDB
/*
 * Same as `set_pht_entry_from_index` but avoiding the compound assignment
 * for a `volatile` array.
 */
#    define set_pht_entry_from_index_concurrent_volatile(bl, index) \
      (void)((bl)[divWORDSZ(index)]                                 \
             = (bl)[divWORDSZ(index)] | ((word)1 << modWORDSZ(index)))
#  endif
#endif

/* Heap blocks. */

/*
 * The upper bound.  We allocate 1 bit per allocation granule.
 * If `MARK_BIT_PER_OBJ` is not defined, we use every `n`-th bit, where
 * `n` is the number of allocation granules per object.  Otherwise, we only
 * use the initial group of mark bits, and it is safe to allocate smaller
 * header for large objects.
 */
#define MARK_BITS_PER_HBLK (HBLKSIZE / GC_GRANULE_BYTES)

#ifndef MARK_BIT_PER_OBJ
/*
 * We maintain layout maps for heap blocks containing objects of
 * a given size.  Each entry in this map describes a byte offset
 * (displacement) and has the following type.
 */
#  if (1 << (CPP_LOG_HBLKSIZE - 1)) / GC_GRANULE_BYTES <= 0x100
typedef unsigned char hb_map_entry_t;
#  else
typedef unsigned short hb_map_entry_t;
#  endif
#endif /* !MARK_BIT_PER_OBJ */

struct hblkhdr {
  /*
   * Link field for `hblk` free list and for lists of chunks waiting to
   * be reclaimed.
   */
  struct hblk *hb_next;

  struct hblk *hb_prev; /*< backwards link for free list */

  struct hblk *hb_block; /*< the corresponding block */

  /*
   * Kind of objects in the block.  Each kind identifies a mark
   * procedure and a set of list headers.  Sometimes called regions.
   */
  unsigned char hb_obj_kind;

  unsigned char hb_flags;

  /* Ignore pointers that do not point to the first `hblk` of this object. */
#define IGNORE_OFF_PAGE 1

  /*
   * This is a free block, which has been unmapped from the address space.
   * `GC_remap()` must be invoked on it before it can be reallocated.
   * Set only if `USE_MUNMAP` macro is defined.
   */
#define WAS_UNMAPPED 2

  /* Block is free, i.e. not in use. */
#define FREE_BLK 4

#ifdef ENABLE_DISCLAIM
  /* This kind has a callback on reclaim. */
#  define HAS_DISCLAIM 8

  /*
   * Mark from all objects, marked or not.  Used to mark objects needed
   * by reclaim notifier.
   */
#  define MARK_UNCONDITIONALLY 0x10
#endif

#ifndef MARK_BIT_PER_OBJ
#  define LARGE_BLOCK 0x20
#endif

  /*
   * Value of `GC_gc_no` when block was last allocated or swept.
   * May wrap.  For a free block, this is maintained only for `USE_MUNMAP`,
   * and indicates when the header was allocated, or when the size of the
   * block last changed.
   */
  unsigned short hb_last_reclaimed;

#ifdef MARK_BIT_PER_OBJ
#  define LARGE_INV_SZ ((unsigned32)1 << 16)

  /*
   * A good upper bound for `2**32 / hb_sz`.
   * For large objects, we use `LARGE_INV_SZ`.
   */
  unsigned32 hb_inv_sz;
#endif

  /*
   * If in use, size in bytes, of objects in the block.
   * Otherwise, the size of the whole free block.  We assume that this is
   * convertible to `GC_signed_word` without generating a negative result.
   * We avoid generating free blocks larger than that.
   */
  size_t hb_sz;

  /* Object descriptor for marking.  See `gc_mark.h` file. */
  word hb_descr;

#ifndef MARK_BIT_PER_OBJ
  /*
   * A table of remainders `mod BYTES_TO_GRANULES(hb_sz)` essentially,
   * except for large blocks.  See `GC_obj_map`.
   */
  hb_map_entry_t *hb_map;
#endif

#ifdef PARALLEL_MARK
  /*
   * Number of set mark bits, excluding the one always set at the end.
   * Currently it is updated concurrently and hence only approximate.
   * But a zero value does guarantee that the block contains
   * no marked objects.  Ensuring this property means that we never
   * decrement it to zero during a collection, and hence the count may
   * be one too high.  Due to concurrent updates, an arbitrary number
   * of increments, but not all of them (!) may be lost, hence it may,
   * in theory, be much too low.  The count may also be too high
   * if multiple mark threads mark the same object due to a race.
   */
  volatile AO_t hb_n_marks;
#else
  /* Without parallel marking, the count is accurate. */
  size_t hb_n_marks;
#endif

#ifdef USE_MARK_BYTES
  /*
   * Unlike the other case, this is in units of bytes.  Since we force
   * certain alignment, we need at most one mark bit per a granule.
   * But we do allocate and set one extra mark bit to avoid
   * an explicit check for the partial object at the end of each block.
   */
#  define HB_MARKS_SZ (MARK_BITS_PER_HBLK + 1)
  union {
    /*
     * The `i`-th byte is 1 if the object starting at granule `i`
     * or object `i` is marked, 0 otherwise.  The mark bit for the
     * "one past the end" object is always set to avoid a special case
     * test in the marker.
     */
    char _hb_marks[HB_MARKS_SZ];
    word dummy; /*< force word alignment of mark bytes */
  } _mark_byte_union;
#  define hb_marks _mark_byte_union._hb_marks
#else
#  define HB_MARKS_SZ (MARK_BITS_PER_HBLK / CPP_WORDSZ + 1)

#  if defined(PARALLEL_MARK) || (defined(THREAD_SANITIZER) && defined(THREADS))
  volatile AO_t hb_marks[HB_MARKS_SZ];
#  else
  word hb_marks[HB_MARKS_SZ];
#  endif
#endif /* !USE_MARK_BYTES */
};

/* A "random" mark bit index for assertions. */
#define ANY_INDEX 23

/* Heap block body. */

#define HBLK_WORDS (HBLKSIZE / sizeof(word))
#define HBLK_GRANULES (HBLKSIZE / GC_GRANULE_BYTES)

/*
 * The number of objects in a block dedicated to a certain size.
 * May erroneously yield zero (instead of one) for large objects.
 */
#define HBLK_OBJS(sz_in_bytes) (HBLKSIZE / (sz_in_bytes))

struct hblk {
  char hb_body[HBLKSIZE];
};

#define HBLK_IS_FREE(hhdr) (((hhdr)->hb_flags & FREE_BLK) != 0)

#define OBJ_SZ_TO_BLOCKS(lb) divHBLKSZ((lb) + HBLKSIZE - 1)

/*
 * Size of block (in units of `HBLKSIZE`) needed to hold objects of
 * given `lb` (in bytes).  The checked variant prevents wrap around.
 */
#define OBJ_SZ_TO_BLOCKS_CHECKED(lb) /*< `lb` should have no side-effect */ \
  divHBLKSZ(SIZET_SAT_ADD(lb, HBLKSIZE - 1))

/* The object free-list link. */
#define obj_link(p) (*(void **)(p))

/*
 * Root sets.  Logically private to `mark_rts.c` file.  But we do not
 * want the tables scanned, so we put them here.
 */

/* The maximum number of ranges that can be registered as static roots. */
#ifdef LARGE_CONFIG
#  define MAX_ROOT_SETS 8192
#elif !defined(SMALL_CONFIG)
#  define MAX_ROOT_SETS 2048
#else
#  define MAX_ROOT_SETS 512
#endif

/* Maximum number of segments that can be excluded from root sets. */
#define MAX_EXCLUSIONS (MAX_ROOT_SETS / 4)

/* A data structure for excluded static roots. */
struct exclusion {
  ptr_t e_start;
  ptr_t e_end;
};

/*
 * A data structure for list of root sets.  We keep a hash table, so that
 * we can filter out duplicate additions.  Under Win32, we need to do
 * a better job of filtering overlaps, so we resort to sequential search,
 * and pay the price.
 */
struct roots {
  ptr_t r_start; /*< multiple of pointer size */
  ptr_t r_end;   /*< multiple of pointer size and greater than `r_start` */
#ifndef ANY_MSWIN
  struct roots *r_next;
#endif
  /* Delete before registering new dynamic libraries if set. */
  GC_bool r_tmp;
};

#ifndef ANY_MSWIN
/* Size of hash table index to roots. */
#  define LOG_RT_SIZE 6

/* `RT_SIZE` should be a power of 2, may be not equal to `MAX_ROOT_SETS`. */
#  define RT_SIZE (1 << LOG_RT_SIZE)
#endif

#if (!defined(MAX_HEAP_SECTS) || defined(CPPCHECK)) \
    && (defined(ANY_MSWIN) || defined(USE_PROC_FOR_LIBRARIES))
#  ifdef LARGE_CONFIG
#    if CPP_WORDSZ > 32
#      define MAX_HEAP_SECTS 81920
#    else
#      define MAX_HEAP_SECTS 7680
#    endif
#  elif defined(SMALL_CONFIG) && !defined(USE_PROC_FOR_LIBRARIES)
#    if defined(PARALLEL_MARK) && (defined(MSWIN32) || defined(CYGWIN32))
#      define MAX_HEAP_SECTS 384
#    else
#      define MAX_HEAP_SECTS 128 /*< roughly 256 MB (`128 * 2048 * 1024`) */
#    endif
#  elif CPP_WORDSZ > 32
#    define MAX_HEAP_SECTS 1024 /*< roughly 8 GB */
#  else
#    define MAX_HEAP_SECTS 512 /*< roughly 4 GB */
#  endif
#endif /* !MAX_HEAP_SECTS */

typedef struct GC_ms_entry {
  ptr_t mse_start; /*< beginning of object, pointer-aligned one */
#ifdef PARALLEL_MARK
  volatile AO_t mse_descr;
#else
  /*
   * The descriptor; the low-order two bits are tags, as described
   * in `gc_mark.h` file.
   */
  word mse_descr;
#endif
} mse;

/*
 * Current state of marking.  Used to remember where we are during the
 * concurrent marking.
 */
typedef int mark_state_t;

struct disappearing_link;
struct finalizable_object;

struct dl_hashtbl_s {
  struct disappearing_link **head;
  size_t entries;
  unsigned log_size;
};

struct fnlz_roots_s {
  struct finalizable_object **fo_head;
  /* List of objects that should be finalized now. */
  struct finalizable_object *finalize_now;
};

union toggle_ref_u {
  /* The least significant bit is used to distinguish between choices. */
  void *strong_ref;
  GC_hidden_pointer weak_ref;
};

/*
 * Extended descriptors.  `GC_typed_mark_proc` understands these.
 * These are used for simple objects that are larger than what can
 * be described by a `BITMAP_BITS`-sized bitmap.
 */
typedef struct {
  /*
   * The least significant bit corresponds to the first "pointer-sized"
   * word.
   */
  word ed_bitmap;
  GC_bool ed_continued; /*< next entry is continuation */
} typed_ext_descr_t;

struct HeapSect {
  ptr_t hs_start;
  size_t hs_bytes;
};

#ifdef MAKE_BACK_GRAPH
/* The maximum in-degree we handle directly. */
#  define BACKGRAPH_MAX_IN 10

struct back_edges_s {
  /* Number of edges, including those in continuation structures. */
  word n_edges;

  unsigned short flags;

  /*
   * If `height` is greater than zero, then keeps the `GC_gc_no` value
   * when it was computed.  If it was computed this cycle, then it is
   * current.  If it was computed during the last cycle, then it belongs
   * to the old height, which is only saved for live objects referenced by
   * dead ones.  This may grow due to references from newly dead objects.
   */
  unsigned short height_gc_no;

  /*
   * Longest path through unreachable nodes to this node that we found
   * using depth first search.
   */
  GC_signed_word height;

  ptr_t edges[BACKGRAPH_MAX_IN];

  /*
   * Pointer to continuation structure; we use only the edges field in
   * the continuation.  Also used as a free-list link.
   */
  struct back_edges_s *cont;
};
#endif

/*
 * Lists of all heap blocks and free lists as well as other random data
 * structures that should not be scanned by the collector.  These are
 * grouped together in a structure so that they can be easily skipped by
 * `GC_push_conditional_with_exclusions()`.  The ordering is weird to
 * make `GC_malloc` faster by keeping the important fields sufficiently
 * close together that a single load of a base register will do.
 * Scalars that could easily appear to be pointers are also put here.
 * The main fields should precede any conditionally included fields, where
 * possible.
 */
struct _GC_arrays {
#define GC_heapsize GC_arrays._heapsize
  word _heapsize; /*< heap size in bytes (value never goes down) */

#define GC_requested_heapsize GC_arrays._requested_heapsize
  word _requested_heapsize; /*< heap size due to explicit expansion */

#define GC_heapsize_on_gc_disable GC_arrays._heapsize_on_gc_disable
  word _heapsize_on_gc_disable;

#define GC_heapsize_at_forced_unmap GC_arrays._heapsize_at_forced_unmap
  word _heapsize_at_forced_unmap; /*< accessed with the allocator lock held */

#define GC_last_heap_addr GC_arrays._last_heap_addr
  word _last_heap_addr;

  /*
   * Total bytes contained in blocks on the free list of large objects.
   * (A large object is the one that occupies a block of at least
   * two `HBLKSIZE`.)
   */
#define GC_large_free_bytes GC_arrays._large_free_bytes
  word _large_free_bytes;

  /* Total number of bytes in allocated large objects blocks. */
#define GC_large_allocd_bytes GC_arrays._large_allocd_bytes
  word _large_allocd_bytes;

  /*
   * Maximum number of bytes that were ever allocated in large object blocks.
   * This is used to help decide when it is safe to split up a large block.
   */
#define GC_max_large_allocd_bytes GC_arrays._max_large_allocd_bytes
  word _max_large_allocd_bytes;

  /* Number of bytes allocated before this collection cycle. */
#define GC_bytes_allocd_before_gc GC_arrays._bytes_allocd_before_gc
  word _bytes_allocd_before_gc;

#define GC_our_mem_bytes GC_arrays._our_mem_bytes
  word _our_mem_bytes;

  /* Number of bytes allocated during this collection cycle. */
#define GC_bytes_allocd GC_arrays._bytes_allocd
  word _bytes_allocd;

  /*
   * Number of black-listed bytes dropped during GC cycle as a result
   * of repeated scanning during allocation attempts.  These are treated
   * largely as allocated, even though they are not useful to the client.
   */
#define GC_bytes_dropped GC_arrays._bytes_dropped
  word _bytes_dropped;

  /*
   * Approximate number of bytes in objects (and headers) that became
   * ready for finalization in the last collection.
   */
#define GC_bytes_finalized GC_arrays._bytes_finalized
  word _bytes_finalized;

  /*
   * Number of explicitly deallocated bytes of memory since last
   * collection.
   */
#define GC_bytes_freed GC_arrays._bytes_freed
  word _bytes_freed;

  /*
   * Bytes of memory explicitly deallocated while finalizers were running.
   * Used to approximate size of memory explicitly deallocated by finalizers.
   */
#define GC_finalizer_bytes_freed GC_arrays._finalizer_bytes_freed
  word _finalizer_bytes_freed;

  /*
   * Pointer to the first (lowest address) `bottom_index` entity;
   * assumes the allocator lock is held.
   */
#define GC_all_bottom_indices GC_arrays._all_bottom_indices
  bottom_index *_all_bottom_indices;

  /*
   * Pointer to the last (highest address) `bottom_index` entity;
   * assumes the allocator lock is held.
   */
#define GC_all_bottom_indices_end GC_arrays._all_bottom_indices_end
  bottom_index *_all_bottom_indices_end;

#define GC_scratch_free_ptr GC_arrays._scratch_free_ptr
  ptr_t _scratch_free_ptr;

#define GC_hdr_free_list GC_arrays._hdr_free_list
  hdr *_hdr_free_list;

#define GC_scratch_end_addr GC_arrays._scratch_end_addr
  word _scratch_end_addr; /*< the end point of the current scratch area */

#if defined(IRIX5) || (defined(USE_PROC_FOR_LIBRARIES) && !defined(LINUX))
#  define USE_SCRATCH_LAST_END_PTR
  /*
   * The address of the end point of the last obtained scratch area.
   * Used by `GC_register_dynamic_libraries()`.
   */
#  define GC_scratch_last_end_addr GC_arrays._scratch_last_end_addr
  word _scratch_last_end_addr;
#endif

#if defined(GC_ASSERTIONS) || defined(MAKE_BACK_GRAPH) \
    || defined(INCLUDE_LINUX_THREAD_DESCR)             \
    || (defined(KEEP_BACK_PTRS) && ALIGNMENT == 1)
#  define SET_REAL_HEAP_BOUNDS

  /*
   * Similar to `GC_least_plausible_heap_addr` and
   * `GC_greatest_plausible_heap_addr` but do not include future
   * (potential) heap expansion.  Both variables are zero initially.
   */
#  define GC_least_real_heap_addr GC_arrays._least_real_heap_addr
#  define GC_greatest_real_heap_addr GC_arrays._greatest_real_heap_addr
  word _least_real_heap_addr;
  word _greatest_real_heap_addr;
#endif

#define GC_used_heap_size_after_full GC_arrays._used_heap_size_after_full
  word _used_heap_size_after_full;

  /* Number of explicitly managed bytes of storage at last collection. */
#define GC_non_gc_bytes_at_gc GC_arrays._non_gc_bytes_at_gc
  word _non_gc_bytes_at_gc;

  /* The number of extra calls to `GC_mark_some` that we have made. */
#define GC_mark_deficit GC_arrays._mark_deficit
  size_t _mark_deficit;

#ifndef NO_BLACK_LISTING
  /*
   * Counter of the cases when found block by `GC_allochblk_nth` is
   * black-listed completely.
   */
#  define GC_drop_blacklisted_count GC_arrays._drop_blacklisted_count
  unsigned _drop_blacklisted_count;

  /* Number of warnings suppressed so far. */
#  define GC_large_alloc_warn_suppressed GC_arrays._large_alloc_warn_suppressed
  long _large_alloc_warn_suppressed;

  /* Non-stack false references seen at last full collection. */
#  define GC_old_normal_bl GC_arrays._old_normal_bl
  word *_old_normal_bl;
  /* Non-stack false references seen since last full collection. */
#  define GC_incomplete_normal_bl GC_arrays._incomplete_normal_bl
  word *_incomplete_normal_bl;

#  define GC_old_stack_bl GC_arrays._old_stack_bl
#  define GC_incomplete_stack_bl GC_arrays._incomplete_stack_bl
  word *_old_stack_bl;
  word *_incomplete_stack_bl;
#endif

  /* The limits of stack for `GC_mark_some()` and friends. */
#define GC_mark_stack GC_arrays._mark_stack
#define GC_mark_stack_limit GC_arrays._mark_stack_limit
  mse *_mark_stack;
  mse *_mark_stack_limit;

  /*
   * All ranges between `GC_mark_stack` (incl.) and `GC_mark_stack_top`
   * (incl.) still need to be marked from.
   */
#define GC_mark_stack_top GC_arrays._mark_stack_top
#ifdef PARALLEL_MARK
  /* Updated only with the mark lock held, but read asynchronously. */
  mse *volatile _mark_stack_top;

#  define GC_mark_no GC_arrays._mark_no
  word _mark_no; /*< protected by the mark lock */

  /*
   * Number of bytes of memory allocated since we released the allocator lock.
   * Instead of reacquiring the allocator lock just to add this in, we add it
   * in the next time we reacquire the allocator lock.  (Atomically adding it
   * does not work, since we would have to atomically update it in
   * `GC_malloc`, which is too expensive.)
   */
#  define GC_bytes_allocd_tmp GC_arrays._bytes_allocd_tmp
  volatile AO_t _bytes_allocd_tmp;
#else
  mse *_mark_stack_top;
#endif

#ifdef DYNAMIC_POINTER_MASK
  /*
   * Both mask and shift are zeros by default; if mask is zero, then
   * correct it to ~0 at the collector initialization.
   */
#  define GC_pointer_mask GC_arrays._pointer_mask
#  define GC_pointer_shift GC_arrays._pointer_shift
  word _pointer_mask;
  unsigned char _pointer_shift;
#endif

#ifdef THREADS
#  ifdef USE_SPIN_LOCK
#    define GC_allocate_lock GC_arrays._allocate_lock
  volatile AO_TS_t _allocate_lock;
#  endif
#  if !defined(HAVE_LOCKFREE_AO_OR) && defined(AO_HAVE_test_and_set_acquire) \
      && (!defined(NO_MANUAL_VDB) || defined(MPROTECT_VDB))
#    define NEED_FAULT_HANDLER_LOCK
#    define GC_fault_handler_lock GC_arrays._fault_handler_lock
  volatile AO_TS_t _fault_handler_lock;
#  endif
#  if !(defined(GC_ALWAYS_MULTITHREADED) \
        && (defined(USE_PTHREAD_LOCKS) || defined(USE_SPIN_LOCK)))
#    define GC_need_to_lock GC_arrays._need_to_lock
  GC_bool _need_to_lock;
#  endif

#  ifdef GC_ASSERTIONS
#    define GC_thr_initialized GC_arrays._thr_initialized
  GC_bool _thr_initialized;
#  endif
#  if defined(GC_USE_DLOPEN_WRAP) && !defined(GC_WIN32_THREADS) \
      && !defined(PLATFORM_THREADS) && !defined(SN_TARGET_PSP2)
#    define GC_syms_wrap_initialized GC_arrays._syms_wrap_initialized
  GC_bool _syms_wrap_initialized;
#  endif

#  define GC_roots_were_cleared GC_arrays._roots_were_cleared
  GC_bool _roots_were_cleared;
#else
#  ifndef GC_NO_FINALIZATION
/*
 * The variables to minimize the level of recursion when a client
 * finalizer allocates memory.
 */
#    define GC_finalizer_nested GC_arrays._finalizer_nested
#    define GC_finalizer_skipped GC_arrays._finalizer_skipped
  unsigned char _finalizer_nested;
  unsigned short _finalizer_skipped;
#  endif
#endif

  /* Has `GC_init()` been run? */
#define GC_is_initialized GC_arrays._is_initialized
  GC_bool _is_initialized;

  /*
   * Do we need a larger mark stack?  May be set by client-supplied
   * mark routines.
   */
#define GC_mark_stack_too_small GC_arrays._mark_stack_too_small
  GC_bool _mark_stack_too_small;

  /* Are there collectible marked objects in the heap? */
#define GC_objects_are_marked GC_arrays._objects_are_marked
  GC_bool _objects_are_marked;

#ifndef GC_DISABLE_INCREMENTAL
#  define GC_should_start_incremental_collection \
    GC_arrays._should_start_incremental_collection
  GC_bool _should_start_incremental_collection;
#endif

#ifndef GC_NO_FINALIZATION
  /* Avoid the work if unreachable finalizable objects are not used. */
  /* TODO: Turn this variable into a counter. */
#  define GC_need_unreachable_finalization \
    GC_arrays._need_unreachable_finalization
  GC_bool _need_unreachable_finalization;
#endif

#ifndef DONT_USE_ATEXIT
#  ifdef SMALL_CONFIG
#    define GC_skip_collect_atexit FALSE
#  else
  /*
   * A dedicated variable to avoid a garbage collection on abort.
   * `GC_find_leak` cannot be used for this purpose as otherwise
   * TSan finds a data race (between `GC_default_on_abort` and, e.g.,
   * `GC_finish_collection`).
   */
#    define GC_skip_collect_atexit GC_arrays._skip_collect_atexit
  GC_bool _skip_collect_atexit;
#  endif
#endif

#if defined(NO_FIND_LEAK) && defined(SHORT_DBG_HDRS)
#  define GC_debugging_initialized GC_arrays._debugging_initialized
  GC_bool _debugging_initialized;
#  define GC_debugging_started FALSE
#else
#  define GC_debugging_initialized GC_debugging_started
  /* `GC_debug_malloc()` has been called, once at least. */
#  define GC_debugging_started GC_arrays._debugging_started
  GC_bool _debugging_started;

#  define GC_have_errors GC_arrays._have_errors
#  ifdef AO_HAVE_store
  volatile AO_t _have_errors;
#  else
  GC_bool _have_errors;
#  endif
#endif

#define GC_explicit_typing_initialized GC_arrays._explicit_typing_initialized
#ifdef AO_HAVE_load_acquire
  volatile AO_t _explicit_typing_initialized;
#else
  GC_bool _explicit_typing_initialized;
#endif

  /* Indicate whether a full collection due to heap growth is needed. */
#define GC_need_full_gc GC_arrays._need_full_gc
  GC_bool _need_full_gc;

#ifndef NO_CLOCK
  /*
   * Do performance measurements if set to true (e.g., accumulation of the
   * total time of full collections).
   */
#  define GC_measure_performance GC_arrays._measure_performance
  GC_bool _measure_performance;

  /*
   * Variables for world-stop average delay time statistic computation.
   * `GC_world_stopped_total_divisor` is incremented every world stop and
   * halved when reached its maximum (or upon `GC_world_stopped_total_time`
   * overflow).  In milliseconds.
   */
  /* TODO: Store the nanosecond part. */
#  define GC_world_stopped_total_time GC_arrays._world_stopped_total_time
#  define GC_world_stopped_total_divisor GC_arrays._world_stopped_total_divisor
  unsigned _world_stopped_total_time;
  unsigned _world_stopped_total_divisor;
#endif

#ifndef NO_FIND_LEAK
#  define GC_n_leaked GC_arrays._n_leaked
  unsigned _n_leaked;
#endif
#ifndef SHORT_DBG_HDRS
#  define GC_n_smashed GC_arrays._n_smashed
  unsigned _n_smashed;
#endif

  /* Number of bytes in the accessible composite objects. */
#define GC_composite_in_use GC_arrays._composite_in_use
  word _composite_in_use;

  /* Number of bytes in the accessible atomic objects. */
#define GC_atomic_in_use GC_arrays._atomic_in_use
  word _atomic_in_use;

  /* GC number of latest successful `GC_expand_hp_inner()` call. */
#define GC_last_heap_growth_gc_no GC_arrays._last_heap_growth_gc_no
  word _last_heap_growth_gc_no;

  /*
   * Number of bytes of memory reclaimed minus the number of bytes originally
   * on free lists that we had to drop.  Protected by the allocator lock.
   */
#define GC_bytes_found GC_arrays._bytes_found
  GC_signed_word _bytes_found;

#ifndef GC_GET_HEAP_USAGE_NOT_NEEDED
  /*
   * Number of bytes reclaimed before this collection cycle; used for
   * statistics only.
   */
#  define GC_reclaimed_bytes_before_gc GC_arrays._reclaimed_bytes_before_gc
  word _reclaimed_bytes_before_gc;
#endif

#ifdef USE_MUNMAP
#  define GC_unmapped_bytes GC_arrays._unmapped_bytes
  word _unmapped_bytes;
#else
#  define GC_unmapped_bytes 0
#endif

#if defined(COUNT_UNMAPPED_REGIONS) && defined(USE_MUNMAP)
#  define GC_num_unmapped_regions GC_arrays._num_unmapped_regions
  GC_signed_word _num_unmapped_regions;
#else
#  define GC_num_unmapped_regions 0
#endif

#define GC_all_nils GC_arrays._all_nils
  bottom_index *_all_nils;

#define GC_scan_ptr GC_arrays._scan_ptr
  struct hblk *_scan_ptr;

#ifdef PARALLEL_MARK
#  define GC_main_local_mark_stack GC_arrays._main_local_mark_stack
  mse *_main_local_mark_stack;

  /*
   * The lowest entry on mark stack that may not be empty.
   * Updated only by the initiating thread.
   */
#  define GC_first_nonempty GC_arrays._first_nonempty
  volatile ptr_t _first_nonempty;
#endif

#ifdef ENABLE_TRACE
#  define GC_trace_ptr GC_arrays._trace_ptr
  ptr_t _trace_ptr;
#endif

#if !defined(ALWAYS_SMALL_CLEAR_STACK) && !defined(STACK_NOT_SCANNED) \
    && !defined(THREADS)
  /*
   * Coolest stack pointer value from which we have already cleared
   * the stack.
   */
#  define GC_min_sp GC_arrays._min_sp
  ptr_t _min_sp;

  /*
   * The "hottest" stack pointer value we have seen recently.
   * Degrades over time.
   */
#  define GC_high_water GC_arrays._high_water
  ptr_t _high_water;
#endif

#if CPP_PTRSZ > CPP_WORDSZ
#  define GC_noop_sink_ptr GC_arrays._noop_sink_ptr
  volatile ptr_t _noop_sink_ptr;
#endif

#define GC_noop_sink GC_arrays._noop_sink
#if defined(AO_HAVE_store) && defined(THREAD_SANITIZER)
  volatile AO_t _noop_sink;
#else
  volatile word _noop_sink;
#endif

#if !defined(SMALL_CONFIG) && !defined(GC_NO_FINALIZATION)
  /* Saved number of disappearing links for stats printing. */
#  define GC_old_dl_entries GC_arrays._old_dl_entries
  size_t _old_dl_entries;
#  ifndef GC_LONG_REFS_NOT_NEEDED
#    define GC_old_ll_entries GC_arrays._old_ll_entries
  size_t _old_ll_entries;
#  endif
#endif

#define GC_mark_stack_size GC_arrays._mark_stack_size
  size_t _mark_stack_size;

#define GC_mark_state GC_arrays._mark_state
  mark_state_t _mark_state; /*< initialized to `MS_NONE` (0) */

#define GC_capacity_heap_sects GC_arrays._capacity_heap_sects
  size_t _capacity_heap_sects;

#define GC_n_heap_sects GC_arrays._n_heap_sects
  size_t _n_heap_sects; /*< number of separately added heap sections */

#ifdef ANY_MSWIN
#  define GC_n_heap_bases GC_arrays._n_heap_bases
  size_t _n_heap_bases; /*< see `GC_heap_bases[]` */
#endif

#ifdef USE_PROC_FOR_LIBRARIES
  /* Number of `GET_MEM`-allocated memory sections. */
#  define GC_n_memory GC_arrays._n_memory
  word _n_memory;
#endif

#ifdef GC_GCJ_SUPPORT
#  define GC_last_finalized_no GC_arrays._last_finalized_no
#  define GC_gcjobjfreelist GC_arrays._gcjobjfreelist
  word _last_finalized_no;
  ptr_t *_gcjobjfreelist;
#endif

#define GC_fo_entries GC_arrays._fo_entries
  size_t _fo_entries;

#ifndef GC_NO_FINALIZATION
#  define GC_dl_hashtbl GC_arrays._dl_hashtbl
#  define GC_fnlz_roots GC_arrays._fnlz_roots
#  define GC_log_fo_table_size GC_arrays._log_fo_table_size
#  ifndef GC_LONG_REFS_NOT_NEEDED
#    define GC_ll_hashtbl GC_arrays._ll_hashtbl
  struct dl_hashtbl_s _ll_hashtbl;
#  endif
  struct dl_hashtbl_s _dl_hashtbl;
  struct fnlz_roots_s _fnlz_roots;
  unsigned _log_fo_table_size;

#  ifndef GC_TOGGLE_REFS_NOT_NEEDED
#    define GC_toggleref_arr GC_arrays._toggleref_arr
#    define GC_toggleref_array_size GC_arrays._toggleref_array_size
#    define GC_toggleref_array_capacity GC_arrays._toggleref_array_capacity
  union toggle_ref_u *_toggleref_arr;
  size_t _toggleref_array_size;
  size_t _toggleref_array_capacity;
#  endif
#endif

#ifdef TRACE_BUF
#  define GC_trace_buf_pos GC_arrays._trace_buf_pos
  size_t _trace_buf_pos; /*< an index in the circular buffer */
#endif

  /*
   * How many consecutive collection/expansion failures?
   * Reset by `GC_allochblk()`.
   */
#define GC_alloc_fail_count GC_arrays._alloc_fail_count
  unsigned _alloc_fail_count;

#ifdef ENABLE_DISCLAIM
#  define GC_finalized_kind GC_arrays._finalized_kind
  unsigned _finalized_kind;
#endif

#ifndef NO_CLOCK
#  define GC_full_gc_total_time GC_arrays._full_gc_total_time
#  define GC_stopped_mark_total_time GC_arrays._stopped_mark_total_time
#  define GC_full_gc_total_ns_frac GC_arrays._full_gc_total_ns_frac
#  define GC_stopped_mark_total_ns_frac GC_arrays._stopped_mark_total_ns_frac
  unsigned long _full_gc_total_time; /*< in ms, may wrap */
  unsigned long _stopped_mark_total_time;
  unsigned32 _full_gc_total_ns_frac; /*< fraction of 1 ms */
  unsigned32 _stopped_mark_total_ns_frac;
#endif

#ifdef GC_WIN32_THREADS
  /* Largest index in `dll_thread_table` that was ever used. */
#  define GC_max_thread_index GC_arrays._max_thread_index
  volatile LONG _max_thread_index;
#endif

  /* Total size of registered root sections. */
#define GC_root_size GC_arrays._root_size
  word _root_size;

  /* `GC_static_roots[0..n_root_sets-1]` contains the valid root sets. */
#define n_root_sets GC_arrays._n_root_sets
  size_t _n_root_sets;

#define GC_excl_table_entries GC_arrays._excl_table_entries
  size_t _excl_table_entries; /*< number of entries in use */

#define GC_ed_size GC_arrays._ed_size
  size_t _ed_size; /*< current size of above arrays */

#define GC_avail_descr GC_arrays._avail_descr
  size_t _avail_descr; /*< next available slot */

#if defined(CAN_HANDLE_FORK) && defined(GC_PTHREADS)
  /* Value of `pthread_self()` of the thread which called `fork()`. */
#  define GC_parent_pthread_self GC_arrays._parent_pthread_self
  pthread_t _parent_pthread_self;
#endif

  /* Points to array of extended descriptors. */
#define GC_ext_descriptors GC_arrays._ext_descriptors
  typed_ext_descr_t *_ext_descriptors;

  /*
   * Table of user-defined mark procedures.  There is a small number
   * of these, which can be referenced by `DS_PROC` mark descriptors.
   * See `gc_mark.h` file.
   */
#define GC_mark_procs GC_arrays._mark_procs
  GC_mark_proc _mark_procs[GC_MAX_MARK_PROCS];

  /*
   * `GC_valid_offsets[i]` implies
   * `GC_modws_valid_offsets[i % sizeof(ptr_t)]`.
   */
#define GC_modws_valid_offsets GC_arrays._modws_valid_offsets
  char _modws_valid_offsets[sizeof(ptr_t)];

#ifdef MAKE_BACK_GRAPH
  /* Points to never-used `back_edges` space. */
#  define GC_n_back_edge_structs GC_arrays._n_back_edge_structs
  int _n_back_edge_structs;

#  define GC_back_edge_space GC_arrays._back_edge_space
  struct back_edges_s *_back_edge_space;

  /* Pointer to free list of deallocated `back_edges` structures. */
#  define GC_avail_back_edges GC_arrays._avail_back_edges
  struct back_edges_s *_avail_back_edges;

  /*
   * Table of objects that are currently on the depth-first search stack.
   * Only objects with in-degree one are in this table.  Other objects are
   * identified using `HEIGHT_IN_PROGRESS`.
   */
  /* FIXME: This data structure needs improvement. */
#  define GC_backgraph_in_progress_space GC_arrays._backgraph_in_progress_space
#  define GC_backgraph_in_progress_size GC_arrays._backgraph_in_progress_size
#  define GC_backgraph_n_in_progress GC_arrays._backgraph_n_in_progress
  ptr_t *_backgraph_in_progress_space;
  size_t _backgraph_in_progress_size;
  size_t _backgraph_n_in_progress;

#  define GC_backgraph_max_deepest_h GC_arrays._backgraph_max_deepest_h
#  define GC_backgraph_deepest_height GC_arrays._backgraph_deepest_height
#  define GC_backgraph_deepest_obj GC_arrays._backgraph_deepest_obj
  word _backgraph_max_deepest_h;
  word _backgraph_deepest_height;
  ptr_t _backgraph_deepest_obj;
#endif

#ifdef GC_READ_ENV_FILE
  /*
   * The content of the `.gc.env` file with CR and LF replaced to '\0'.
   * `NULL` if the file is missing or empty.  Otherwise, always ends
   * with '\0' (designating the end of the file).
   */
#  define GC_envfile_content GC_arrays._envfile_content
  char *_envfile_content;

  /* Length of `GC_envfile_content` (if non-`NULL`). */
#  define GC_envfile_length GC_arrays._envfile_length
  size_t _envfile_length;
#endif

#ifndef ANY_MSWIN
  /*
   * The hash table header.  Used only to check whether a range
   * is already present.
   */
#  define GC_root_index GC_arrays._root_index
  struct roots *_root_index[RT_SIZE];
#endif

#if defined(SAVE_CALL_CHAIN) && !defined(DONT_SAVE_TO_LAST_STACK) \
    && (!defined(REDIRECT_MALLOC) || !defined(GC_HAVE_BUILTIN_BACKTRACE))
  /*
   * Stack at last garbage collection.  Useful for debugging mysterious
   * object disappearances.  In the multi-threaded case, we currently only
   * save the calling stack.  Not supported in case of `malloc` redirection
   * because `backtrace()` may call `malloc()`.
   */
  struct callinfo _last_stack[NFRAMES];
#  define SAVE_CALLERS_TO_LAST_STACK() GC_save_callers(GC_arrays._last_stack)
#else
#  define SAVE_CALLERS_TO_LAST_STACK() (void)0
#endif

  /* Free list for `NORMAL` objects. */
#define GC_objfreelist GC_arrays._objfreelist
  void *_objfreelist[MAXOBJGRANULES + 1];

  /* Free list for atomic objects. */
#define GC_aobjfreelist GC_arrays._aobjfreelist
  void *_aobjfreelist[MAXOBJGRANULES + 1];

  /*
   * Uncollectible but traced objects.  Objects on this and `_auobjfreelist`
   * are always marked, except during garbage collections.
   */
#define GC_uobjfreelist GC_arrays._uobjfreelist
  void *_uobjfreelist[MAXOBJGRANULES + 1];

#ifdef GC_ATOMIC_UNCOLLECTABLE
  /* Atomic uncollectible but traced objects. */
#  define GC_auobjfreelist GC_arrays._auobjfreelist
  void *_auobjfreelist[MAXOBJGRANULES + 1];
#endif

  /*
   * Number of granules to allocate when asked for a certain number of bytes
   * (plus `EXTRA_BYTES`).  Should be accessed with the allocator lock held.
   */
#define GC_size_map GC_arrays._size_map
  size_t _size_map[MAXOBJBYTES + 1];

#ifndef MARK_BIT_PER_OBJ
  /*
   * If the element is not `NULL`, then it points to a map of valid object
   * addresses.  `GC_obj_map[lg][i]` is `i % lg`.  This is now used purely
   * to replace a division in the marker by a table lookup.
   * `GC_obj_map[0]` is used for large objects and contains all nonzero
   * entries.  This gets us out of the marker fast path without an extra test.
   */
#  define GC_obj_map GC_arrays._obj_map
  hb_map_entry_t *_obj_map[MAXOBJGRANULES + 1];

#  define OBJ_MAP_LEN BYTES_TO_GRANULES(HBLKSIZE)
#endif

#define VALID_OFFSET_SZ HBLKSIZE
  /*
   * A nonzero `GC_valid_offsets[i]` means `i` is registered as
   * a displacement.
   */
#define GC_valid_offsets GC_arrays._valid_offsets
  char _valid_offsets[VALID_OFFSET_SZ];

#ifndef GC_DISABLE_INCREMENTAL
  /* Pages that were dirty at last `GC_read_dirty()` call. */
#  define GC_grungy_pages GC_arrays._grungy_pages
  page_hash_table _grungy_pages;

  /* Pages dirtied since last `GC_read_dirty()` call. */
#  define GC_dirty_pages GC_arrays._dirty_pages
#  ifdef MPROTECT_VDB
  volatile
#  endif
      page_hash_table _dirty_pages;
#endif

#if (defined(CHECKSUMS) && (defined(GWW_VDB) || defined(SOFT_VDB))) \
    || defined(PROC_VDB)
  /* A table to indicate the pages ever dirtied. */
#  define GC_written_pages GC_arrays._written_pages
  page_hash_table _written_pages;
#endif

  /* Heap segments potentially containing client objects. */
#define GC_heap_sects GC_arrays._heap_sects
  struct HeapSect *_heap_sects;

#if defined(USE_PROC_FOR_LIBRARIES)
  /* All `GET_MEM`-allocated memory.  Includes block headers and the like. */
#  define GC_our_memory GC_arrays._our_memory
  struct HeapSect _our_memory[MAX_HEAP_SECTS];
#endif

#ifdef ANY_MSWIN
  /* Start address of memory regions obtained from OS. */
#  define GC_heap_bases GC_arrays._heap_bases
  ptr_t _heap_bases[MAX_HEAP_SECTS];
#endif

#ifdef MSWINCE
  /* Committed lengths of memory regions obtained from OS. */
#  define GC_heap_lengths GC_arrays._heap_lengths
  word _heap_lengths[MAX_HEAP_SECTS];
#endif

#define GC_static_roots GC_arrays._static_roots
  struct roots _static_roots[MAX_ROOT_SETS];

  /* Array of exclusions, ascending address order. */
#define GC_excl_table GC_arrays._excl_table
  struct exclusion _excl_table[MAX_EXCLUSIONS];

  /*
   * The block header index.  Each entry points to a `bottom_index` entity.
   * On a 32-bit machine, it points to the index for a set of the high-order
   * bits equal to the index.  For longer addresses, we hash the high-order
   * bits to compute the index in `GC_top_index`, and each entry points to
   * a hash chain.  The last entry in each chain is `GC_all_nils`.
   */
#define GC_top_index GC_arrays._top_index
  bottom_index *_top_index[TOP_SZ];

#ifdef ECOS
#  ifndef ECOS_GC_MEMORY_SIZE
#    define ECOS_GC_MEMORY_SIZE (448 * 1024)
#  endif
#  define GC_ecos_brk_idx GC_arrays._ecos_brk_idx
#  define GC_ecos_memory GC_arrays._ecos_memory
  size_t _ecos_brk_idx;
  char _ecos_memory[ECOS_GC_MEMORY_SIZE];
#endif
};

GC_API_PRIV struct _GC_arrays GC_arrays;
#define beginGC_arrays ((ptr_t)(&GC_arrays))
#define endGC_arrays (beginGC_arrays + sizeof(GC_arrays))

/* Object kinds. */
#ifndef MAXOBJKINDS
#  ifdef SMALL_CONFIG
#    define MAXOBJKINDS 16
#  else
#    define MAXOBJKINDS 24
#  endif
#endif
GC_EXTERN struct obj_kind {
  /*
   * Array of free-list headers for this kind of object.  Point either
   * to `GC_arrays` or to storage allocated with `GC_scratch_alloc()`.
   */
  void **ok_freelist;

  /*
   * List headers for lists of blocks waiting to be swept.
   * Indexed by object size in granules.
   */
  struct hblk **ok_reclaim_list;

  /* Descriptor template for objects in this block. */
  word ok_descriptor;

  /*
   * Add object size in bytes to descriptor template to obtain descriptor.
   * Otherwise the template is used as is.
   */
  GC_bool ok_relocate_descr;

  /* Clear objects before putting them on the free list. */
  GC_bool ok_init;

#ifdef ENABLE_DISCLAIM
  /*
   * Mark from all, including unmarked, objects in block.
   * Used to protect objects reachable from reclaim notifiers.
   */
  GC_bool ok_mark_unconditionally;

  /*
   * The disclaim procedure is called before `obj` is reclaimed, but
   * must also tolerate being called with object from free list.
   * A nonzero exit prevents object from being reclaimed.
   */
  int(GC_CALLBACK *ok_disclaim_proc)(void * /* `obj` */);

#  define OK_DISCLAIM_INITZ /* comma */ , FALSE, 0
#else
#  define OK_DISCLAIM_INITZ /*< empty */
#endif
} GC_obj_kinds[MAXOBJKINDS];

#define beginGC_obj_kinds ((ptr_t)(&GC_obj_kinds[0]))
#define endGC_obj_kinds (beginGC_obj_kinds + sizeof(GC_obj_kinds))

/* The predefined kinds. */
#define PTRFREE GC_I_PTRFREE
#define NORMAL GC_I_NORMAL
#define UNCOLLECTABLE 2
#ifdef GC_ATOMIC_UNCOLLECTABLE
#  define AUNCOLLECTABLE 3
#  define IS_UNCOLLECTABLE(k) (((k) & ~1) == UNCOLLECTABLE)
#  define GC_N_KINDS_INITIAL_VALUE 4
#else
#  define IS_UNCOLLECTABLE(k) ((k) == UNCOLLECTABLE)
#  define GC_N_KINDS_INITIAL_VALUE 3
#endif

GC_EXTERN unsigned GC_n_kinds;

/* May mean the allocation granularity size, not page size. */
GC_EXTERN size_t GC_page_size;

#ifdef REAL_PAGESIZE_NEEDED
GC_EXTERN size_t GC_real_page_size;
#else
#  define GC_real_page_size GC_page_size
#endif

/*
 * Get heap memory from the OS.
 * Note that `sbrk`-like allocation is preferred, since it usually
 * makes it possible to merge consecutively allocated chunks.
 * It also avoids unintended recursion with `REDIRECT_MALLOC` macro
 * defined.  `GET_MEM()` argument should be of `size_t` type and
 * have no side-effect.  `GET_MEM()` returns `HBLKSIZE`-aligned chunk
 * (`NULL` means a failure).  In case of `MMAP_SUPPORTED`, the argument
 * must also be a multiple of a physical page size.
 * `GET_MEM` is currently not assumed to retrieve zero-filled space.
 */
/* TODO: Take advantage of `GET_MEM()` returning a zero-filled space. */
#if defined(ANY_MSWIN) || defined(MSWIN_XBOX1) || defined(OS2)
GC_INNER void *GC_get_mem(size_t lb);
#  define GET_MEM(lb) GC_get_mem(lb)
#  if defined(CYGWIN32) && !defined(USE_WINALLOC)
#    define NEED_UNIX_GET_MEM
#  endif
#elif defined(DOS4GW) || defined(EMBOX) || defined(KOS) || defined(NEXT) \
    || defined(NONSTOP) || defined(RTEMS) || defined(__CC_ARM)           \
    || (defined(SOLARIS) && !defined(USE_MMAP))
/* TODO: Use `page_alloc()` directly on Embox. */
#  if defined(REDIRECT_MALLOC) && !defined(CPPCHECK)
#    error Malloc redirection is unsupported
#  endif
#  define GET_MEM(lb)                                                  \
    ((void *)HBLKPTR((ptr_t)calloc(1, SIZET_SAT_ADD(lb, GC_page_size)) \
                     + GC_page_size - 1))
#elif !defined(GET_MEM)
GC_INNER void *GC_unix_get_mem(size_t lb);
#  define GET_MEM(lb) GC_unix_get_mem(lb)
#  define NEED_UNIX_GET_MEM
#endif

/*
 * Round up allocation size to a multiple of a page size.
 * `GC_setpagesize()` is assumed to be already invoked.
 */
#define ROUNDUP_PAGESIZE(lb) /*< `lb` should have no side-effect */ \
  (SIZET_SAT_ADD(lb, GC_page_size - 1) & ~(GC_page_size - 1))

/*
 * Same as `ROUNDUP_PAGESIZE` but is used to make `GET_MEM()` argument
 * safe.
 */
#ifdef MMAP_SUPPORTED
#  define ROUNDUP_PAGESIZE_IF_MMAP(lb) ROUNDUP_PAGESIZE(lb)
#else
#  define ROUNDUP_PAGESIZE_IF_MMAP(lb) (lb)
#endif

#ifdef ANY_MSWIN
GC_EXTERN SYSTEM_INFO GC_sysinfo;

/*
 * Is `p` the start of either the `malloc` heap, or of one of the collector
 * heap sections?
 */
GC_INNER GC_bool GC_is_heap_base(const void *p);
#endif

#ifdef GC_GCJ_SUPPORT
/* Note: `GC_hblkfreelist` and `GC_free_bytes` remain visible to GNU `gcj`. */
extern struct hblk *GC_hblkfreelist[];
extern word GC_free_bytes[];
#endif

/* This is used by `GC_do_blocking()`. */
struct blocking_data {
  GC_fn_type fn;
  void *client_data; /*< and result */
};

/* This is used by `GC_call_with_gc_active`, `GC_push_all_stack_sections`. */
struct GC_traced_stack_sect_s {
  ptr_t saved_stack_ptr;
#ifdef IA64
  ptr_t saved_backing_store_ptr;
  ptr_t backing_store_end;
#endif
  struct GC_traced_stack_sect_s *prev;
};

#ifdef THREADS
/*
 * Process all "traced stack sections" - scan entire stack except for
 * frames belonging to the user functions invoked by `GC_do_blocking`.
 */
GC_INNER void
GC_push_all_stack_sections(ptr_t lo, ptr_t hi,
                           struct GC_traced_stack_sect_s *traced_stack_sect);

/*
 * The total size, in bytes, of all stacks.
 * Updated on every `GC_push_all_stacks()` call.
 */
GC_EXTERN word GC_total_stacksize;

#else
/* Note: `NULL` value means we are not inside `GC_do_blocking()` call. */
GC_EXTERN ptr_t GC_blocked_sp;

/*
 * Points to the "frame" data held in stack by the innermost
 * `GC_call_with_gc_active()`.  `NULL` if no such "frame" active.
 */
GC_EXTERN struct GC_traced_stack_sect_s *GC_traced_stack_sect;
#endif /* !THREADS */

#if defined(E2K) && defined(THREADS) || defined(IA64)
/*
 * The bottom of the register stack of the primordial thread.
 * E2K: holds the offset (`ps_ofs`) instead of a pointer.
 */
GC_EXTERN ptr_t GC_register_stackbottom;
#endif

#ifdef IA64
/* Similar to `GC_push_all_stack_sections` but for IA-64 registers store. */
GC_INNER void GC_push_all_register_sections(
    ptr_t bs_lo, ptr_t bs_hi, GC_bool eager,
    struct GC_traced_stack_sect_s *traced_stack_sect);
#endif

/*
 * Mark bit operations.
 *
 * The marks are in a reserved area of each heap block.
 * Each object or granule has one mark bit associated with it.
 * Only those corresponding to the beginning of an object are used.
 */

/*
 * Retrieve, set, clear the `n`-th mark bit in a given heap block.
 * (Recall that bit `n` corresponds to `n`-th object or allocation granule
 * relative to the beginning of the block, including unused space.)
 */

#ifdef USE_MARK_BYTES
#  define mark_bit_from_hdr(hhdr, n) ((hhdr)->hb_marks[n])
#  define set_mark_bit_from_hdr(hhdr, n) (void)((hhdr)->hb_marks[n] = 1)
#  define clear_mark_bit_from_hdr(hhdr, n) (void)((hhdr)->hb_marks[n] = 0)
#else
/* Set mark bit correctly, even if mark bits may be concurrently accessed. */
#  if defined(PARALLEL_MARK) || (defined(THREAD_SANITIZER) && defined(THREADS))
/*
 * Workaround TSan false positive: there is no race between
 * `mark_bit_from_hdr` and `set_mark_bit_from_hdr` when `n` is different
 * (alternatively, `USE_MARK_BYTES` could be used).  If TSan is off, then
 * `AO_or()` is used only if we define `USE_MARK_BITS` macro explicitly.
 */
#    define OR_WORD(addr, bits) AO_or(addr, bits)
#  else
#    define OR_WORD(addr, bits) (void)(*(addr) |= (bits))
#  endif
#  define mark_bit_from_hdr(hhdr, n) \
    (((hhdr)->hb_marks[divWORDSZ(n)] >> modWORDSZ(n)) & (word)1)
#  define set_mark_bit_from_hdr(hhdr, n) \
    OR_WORD((hhdr)->hb_marks + divWORDSZ(n), (word)1 << modWORDSZ(n))
#  define clear_mark_bit_from_hdr(hhdr, n)                                    \
    (void)(((word *)CAST_AWAY_VOLATILE_PVOID((hhdr)->hb_marks))[divWORDSZ(n)] \
           &= ~((word)1 << modWORDSZ(n)))
#endif /* !USE_MARK_BYTES */

#ifdef MARK_BIT_PER_OBJ
/*
 * Get the mark bit index corresponding to the given byte offset and
 * size (in bytes).
 */
#  define MARK_BIT_NO(offset, sz) ((offset) / (sz))

/* Spacing between useful mark bits. */
#  define MARK_BIT_OFFSET(sz) 1

/* Position of final, always set, mark bit. */
#  define FINAL_MARK_BIT(sz) ((sz) > MAXOBJBYTES ? 1 : HBLK_OBJS(sz))
#else
#  define MARK_BIT_NO(offset, sz) BYTES_TO_GRANULES(offset)
#  define MARK_BIT_OFFSET(sz) BYTES_TO_GRANULES(sz)
#  define FINAL_MARK_BIT(sz)                 \
    ((sz) > MAXOBJBYTES ? MARK_BITS_PER_HBLK \
                        : BYTES_TO_GRANULES(HBLK_OBJS(sz) * (sz)))
#endif /* !MARK_BIT_PER_OBJ */

/* Important internal collector routines. */

/* Return the current stack pointer, approximately. */
GC_INNER ptr_t GC_approx_sp(void);

/*
 * Same as `GC_approx_sp` but a macro.  `sp` should be a local variable
 * of `volatile` `ptr_t` type.
 */
#if (defined(E2K) && defined(__clang__)         \
     || (defined(S390) && __clang_major__ < 8)) \
    && !defined(CPPCHECK)
/*
 * Workaround some bugs in clang:
 *   - "undefined reference to llvm.frameaddress" error (clang-9/e2k);
 *   - a crash in SystemZTargetLowering of libLLVM-3.8 (s390).
 */
#  define STORE_APPROX_SP_TO(sp) (void)(sp = (ptr_t)(&sp))
#elif defined(CPPCHECK)                              \
    || ((__GNUC__ >= 4 /* `GC_GNUC_PREREQ(4, 0)` */) \
        && !defined(STACK_NOT_SCANNED))
/* TODO: Use `GC_GNUC_PREREQ` after fixing a bug in cppcheck. */
/* Note: l-value is passed instead of pointer to `sp` (because of cppcheck). */
#  define STORE_APPROX_SP_TO(sp) (void)(sp = (ptr_t)__builtin_frame_address(0))
#else
#  define STORE_APPROX_SP_TO(sp) (void)(sp = (ptr_t)(&sp))
#endif

/* Have we allocated enough to amortize a collection? */
GC_INNER GC_bool GC_should_collect(void);

/*
 * Get the next block whose address is at least `h`.  Returned block
 * is managed by the collector.  The block must be in use unless
 * `allow_free` is TRUE.  Return `NULL` if there is no such block.
 */
GC_INNER struct hblk *GC_next_block(struct hblk *h, GC_bool allow_free);

/*
 * Get the last (highest address) block whose address is at most `h`.
 * Returned block is managed by the collector, but may or may not be in use.
 * Return `NULL` if there is no such block.
 */
GC_INNER struct hblk *GC_prev_block(struct hblk *h);

GC_INNER void GC_mark_init(void);

/*
 * Clear mark bits in all allocated heap blocks (i.e. for all heap objects).
 * This invalidates the marker invariant, and sets `GC_mark_state` to
 * reflect this.  (This implicitly starts marking to reestablish the
 * invariant.)
 */
GC_INNER void GC_clear_marks(void);

/*
 * Tell the marker that marked objects may point to unmarked ones, and
 * roots may point to unmarked objects.  Reset mark stack.
 */
GC_INNER void GC_invalidate_mark_state(void);

/*
 * Perform a small amount of marking.  We try to touch roughly a page
 * of memory.  Returns quickly if no collection is in progress.
 * Returns `TRUE` if we just finished a mark phase.
 * `cold_gc_frame` argument is an address inside a frame of the
 * collector that remains valid until all marking is complete;
 * `NULL` value indicates that it is OK to miss some register values.
 * In the case of an incremental collection, the world may be running.
 */
GC_INNER GC_bool GC_mark_some(ptr_t cold_gc_frame);

/*
 * Initiate a garbage collection.  Initiates a full collection if the
 * mark state is invalid; otherwise it is a partial one.
 */
GC_INNER void GC_initiate_gc(void);

/*
 * Is a collection in progress?  Note that this can return `TRUE` in
 * the non-incremental case, if a collection has been abandoned and
 * the mark state is now `MS_INVALID`.
 */
GC_INNER GC_bool GC_collection_in_progress(void);

/*
 * Push contents of the symbol residing in the static roots area excluded
 * from scanning by the collector for a reason.  Note: it should be used only
 * for symbols of relatively small size (containing one or several pointers).
 */
#define GC_PUSH_ALL_SYM(sym) GC_push_all_eager(&(sym), &(sym) + 1)

/* Same as `GC_push_all` but consider interior pointers as valid. */
GC_INNER void GC_push_all_stack(ptr_t b, ptr_t t);

#ifdef NO_VDB_FOR_STATIC_ROOTS
#  define GC_push_conditional_static(b, t, all) \
    ((void)(all), GC_push_all(b, t))
#else
/*
 * Same as `GC_push_conditional` (does either of `GC_push_all` or
 * `GC_push_selected` depending on the third argument) but the caller
 * guarantees the region belongs to the registered static roots.
 */
GC_INNER void GC_push_conditional_static(void *b, void *t, GC_bool all);
#endif

#if defined(WRAP_MARK_SOME) && defined(PARALLEL_MARK)
/*
 * Similar to `GC_push_conditional` but scans the whole region immediately.
 * `GC_mark_local` does not handle memory protection faults yet.
 * So, the static data regions are scanned immediately by `GC_push_roots`.
 */
GC_INNER void GC_push_conditional_eager(void *bottom, void *top, GC_bool all);
#endif

/*
 * In the multi-threaded case, we push part of the current thread stack
 * with `GC_push_all_eager` when we push the registers.  This gets the
 * callee-save registers that may disappear.  The remainder of the stacks
 * are scheduled for scanning in `(*GC_push_other_roots)()`, which is
 * thread-package-specific.
 */

/*
 * Push all or dirty roots.  Call the mark routine (`GC_push_one` for
 * a single pointer, `GC_push_conditional` on groups of pointers) on every
 * top level accessible pointer.  If not `all`, then arrange to push only
 * possibly altered values.  `cold_gc_frame` is an address inside
 * a collector frame that remains valid until all marking is complete;
 * a `NULL` pointer indicates that it is OK to miss some register values.
 */
GC_INNER void GC_push_roots(GC_bool all, ptr_t cold_gc_frame);

/*
 * Push system or application specific roots onto the mark stack.
 * In some environments (e.g. a multi-threaded one) this is predefined
 * to be nonzero.  A client-supplied replacement should also call the
 * original function.  Remains externally visible as used by some
 * well-known 3rd-party software (e.g., ECL) currently.
 */
GC_API_PRIV GC_push_other_roots_proc GC_push_other_roots;

#ifdef THREADS
GC_INNER void GC_push_thread_structures(void);
#endif

/*
 * A pointer set to `GC_push_typed_structures_proc` lazily so that we can
 * avoid linking in the typed allocation support if the latter is unused.
 */
GC_EXTERN void (*GC_push_typed_structures)(void);

typedef void (*GC_with_callee_saves_func)(ptr_t arg, void *context);

/*
 * Ensure that either registers are pushed, or callee-save registers are
 * somewhere on the stack, and then call `fn(arg, ctxt)`.  `ctxt` is either
 * a pointer to a `ucontext_t` entity we generated, or `NULL`.  Could be
 * called with or w/o the allocator lock held; could be called from a signal
 * handler as well.
 */
GC_INNER void GC_with_callee_saves_pushed(GC_with_callee_saves_func fn,
                                          ptr_t arg);

#if defined(IA64) || defined(SPARC)
/*
 * Cause all stacked registers to be saved in memory.  Return a pointer to
 * the top of the corresponding memory stack.
 */
ptr_t GC_save_regs_in_stack(void);
#endif

#ifdef E2K
#  include <asm/e2k_syswork.h>
#  include <errno.h>
#  include <sys/syscall.h>

#  if defined(CPPCHECK)
/*
 * Workaround "Uninitialized bs_lo" and "obsolete alloca() called"
 * false positive (FP) warnings.
 */
#    define PS_ALLOCA_BUF(pbuf, sz) \
      (void)(GC_noop1_ptr(pbuf), *(pbuf) = (ptr_t)__builtin_alloca(sz))
#  else
#    define PS_ALLOCA_BUF(pbuf, sz) (void)(*(pbuf) = (ptr_t)alloca(sz))
#  endif

/*
 * Approximate size (in bytes) of the obtained procedure stack part
 * belonging to `syscall()` itself.
 */
#  define PS_SYSCALL_TAIL_BYTES 0x100

/*
 * Determine the current size of the whole procedure stack.  The size
 * is valid only within the current function.
 */
#  define GET_PROCEDURE_STACK_SIZE_INNER(psz_ull)                            \
    do {                                                                     \
      *(psz_ull) = 0; /*< might be redundant */                              \
      if (syscall(__NR_access_hw_stacks, E2K_GET_PROCEDURE_STACK_SIZE, NULL, \
                  NULL, 0, psz_ull)                                          \
          == -1)                                                             \
        ABORT_ARG1("Cannot get size of procedure stack", ": errno= %d",      \
                   errno);                                                   \
      GC_ASSERT(*(psz_ull) > 0 && *(psz_ull) % sizeof(ptr_t) == 0);          \
    } while (0)

#  ifdef THREADS
#    define PS_COMPUTE_ADJUSTED_OFS(padj_ps_ofs, ps_ofs, ofs_sz_ull)      \
      do {                                                                \
        if ((ofs_sz_ull) <= (ps_ofs) /* `&& ofs_sz_ull > 0` */)           \
          ABORT_ARG2("Incorrect size of procedure stack",                 \
                     ": ofs= %lu, size= %lu", (unsigned long)(ps_ofs),    \
                     (unsigned long)(ofs_sz_ull));                        \
        *(padj_ps_ofs) = (ps_ofs) > (unsigned)PS_SYSCALL_TAIL_BYTES       \
                             ? (ps_ofs) - (unsigned)PS_SYSCALL_TAIL_BYTES \
                             : 0;                                         \
      } while (0)
#  else
/* A simplified variant of the above assuming `ps_ofs` is a zero const. */
#    define PS_COMPUTE_ADJUSTED_OFS(padj_ps_ofs, ps_ofs, ofs_sz_ull) \
      do {                                                           \
        GC_STATIC_ASSERT((ps_ofs) == 0);                             \
        (void)(ofs_sz_ull);                                          \
        *(padj_ps_ofs) = 0;                                          \
      } while (0)
#  endif /* !THREADS */

/*
 * Copy procedure (register) stack to a stack-allocated buffer.
 * Usable from a signal handler.  The buffer (`*pbuf`) is valid only
 * within the current function.  `ps_ofs` designates the offset in the
 * procedure stack to copy the contents from.  Note: this macro cannot
 * be changed to a function because `alloca()` and both `syscall()`
 * should be called in the context of the caller.
 */
#  define GET_PROCEDURE_STACK_LOCAL(ps_ofs, pbuf, psz)                      \
    do {                                                                    \
      unsigned long long ofs_sz_ull;                                        \
      size_t adj_ps_ofs;                                                    \
                                                                            \
      GET_PROCEDURE_STACK_SIZE_INNER(&ofs_sz_ull);                          \
      PS_COMPUTE_ADJUSTED_OFS(&adj_ps_ofs, ps_ofs, ofs_sz_ull);             \
      *(psz) = (size_t)ofs_sz_ull - adj_ps_ofs;                             \
      /* Allocate buffer on the stack; cannot return `NULL`. */             \
      PS_ALLOCA_BUF(pbuf, *(psz));                                          \
      /* Copy the procedure stack at the given offset to the buffer. */     \
      for (;;) {                                                            \
        ofs_sz_ull = adj_ps_ofs;                                            \
        if (syscall(__NR_access_hw_stacks, E2K_READ_PROCEDURE_STACK_EX,     \
                    &ofs_sz_ull, *(pbuf), *(psz), NULL)                     \
            != -1)                                                          \
          break;                                                            \
        if (errno != EAGAIN)                                                \
          ABORT_ARG2("Cannot read procedure stack", ": sz= %lu, errno= %d", \
                     (unsigned long)(*(psz)), errno);                       \
      }                                                                     \
    } while (0)
#endif /* E2K */

#if defined(E2K) && defined(USE_PTR_HWTAG)
/* Load value and get tag of the target memory. */
#  if defined(__ptr64__)
#    define LOAD_TAGGED_VALUE(v, tag, p)                        \
      do {                                                      \
        ptr_t val;                                              \
        __asm__ __volatile__("ldd, sm %[adr], 0x0, %[val]\n\t"  \
                             "gettagd %[val], %[tag]\n"         \
                             : [val] "=r"(val), [tag] "=r"(tag) \
                             : [adr] "r"(p));                   \
        v = val;                                                \
      } while (0)
#  elif !defined(CPPCHECK)
#    error Unsupported -march for e2k target
#  endif

#  define LOAD_PTR_OR_CONTINUE(v, p) \
    {                                \
      int tag LOCAL_VAR_INIT_OK;     \
      LOAD_TAGGED_VALUE(v, tag, p);  \
      if (tag != 0)                  \
        continue;                    \
    }
#elif defined(CHERI_PURECAP)
#  define HAS_TAG_AND_PERM_LOAD(cap) \
    (cheri_tag_get(cap) != 0 && (cheri_perms_get(cap) & CHERI_PERM_LOAD) != 0)

#  define LOAD_PTR_OR_CONTINUE(v, p)                                         \
    {                                                                        \
      word base_addr;                                                        \
      v = *(ptr_t *)(p);                                                     \
      if (!HAS_TAG_AND_PERM_LOAD(v))                                         \
        continue;                                                            \
      base_addr = cheri_base_get(v);                                         \
      if (ADDR(v) < base_addr || ADDR(v) >= base_addr + cheri_length_get(v)) \
        continue;                                                            \
    }

#  define CAPABILITY_COVERS_RANGE(cap, b_addr, e_addr) \
    (cheri_base_get(cap) <= (b_addr)                   \
     && cheri_base_get(cap) + cheri_length_get(cap) >= (e_addr))
#  define SPANNING_CAPABILITY(cap, b_addr, e_addr)                       \
    (cheri_tag_get(cap) && CAPABILITY_COVERS_RANGE(cap, b_addr, e_addr)  \
     && (cheri_perms_get(cap) & (CHERI_PERM_LOAD | CHERI_PERM_LOAD_CAP)) \
            != 0)
#else
#  define LOAD_PTR_OR_CONTINUE(v, p) (void)(v = *(ptr_t *)(p))
#endif /* !CHERI_PURECAP */

#if defined(DARWIN) && defined(THREADS)
/*
 * If `p` points to an object, mark it and push contents on the mark stack.
 * Pointer recognition test always accepts interior pointers, i.e. this is
 * appropriate for pointers found on the thread stack.
 */
void GC_push_one(word p);
#endif

/*
 * Mark and push (i.e. gray) a single object `p` onto the main mark stack.
 * Consider `p` to be valid if it is an interior pointer.  The object `p`
 * has passed a preliminary pointer validity test, but we do not definitely
 * know whether it is valid.  Mark bits are not atomically updated; thus
 * this must be the only thread setting them.
 */
#if defined(PRINT_BLACK_LIST) || defined(KEEP_BACK_PTRS)
GC_INNER void GC_mark_and_push_stack(ptr_t p, ptr_t source);
#else
GC_INNER void GC_mark_and_push_stack(ptr_t p);
#endif

/* Is the block with the given header containing no pointers? */
#define IS_PTRFREE(hhdr) (0 == (hhdr)->hb_descr)

/* Clear all mark bits in the header. */
GC_INNER void GC_clear_hdr_marks(hdr *hhdr);

/* Set all mark bits in the header.  Used for uncollectible blocks. */
GC_INNER void GC_set_hdr_marks(hdr *hhdr);

/* Set all mark bits associated with a free list. */
GC_INNER void GC_set_fl_marks(ptr_t);

#if defined(GC_ASSERTIONS) && defined(THREAD_LOCAL_ALLOC)
/*
 * Check that all mark bits associated with a free list are set.
 * Abort if not.
 */
void GC_check_fl_marks(void **);
#endif

/*
 * Add [`b`,`e`) to the root set.  Adding the same interval a second
 * time is a moderately fast no-op, and hence benign.  We do not handle
 * different but overlapping intervals efficiently.  (But we do handle
 * them correctly.)  `tmp` specifies that the interval may be deleted
 * before re-registering dynamic libraries.
 */
GC_INNER void GC_add_roots_inner(ptr_t b, ptr_t e, GC_bool tmp);

#ifdef USE_PROC_FOR_LIBRARIES
/*
 * Remove given range from every static root which intersects with the range.
 * `GC_remove_tmp_roots` is assumed to be called before this function is
 * called (repeatedly) by `GC_register_map_entries`.
 */
GC_INNER void GC_remove_roots_subregion(ptr_t b, ptr_t e);
#endif

/*
 * Inform the collector that a certain section of statically allocated
 * memory contains no pointers to garbage-collected memory.
 * The range boundaries should be properly aligned and valid.
 */
GC_INNER void GC_exclude_static_roots_inner(ptr_t start, ptr_t finish);

#if defined(ANY_MSWIN) || defined(DYNAMIC_LOADING)
/* Add dynamic library data sections to the root set. */
GC_INNER void GC_register_dynamic_libraries(void);
#endif

/*
 * Remove and re-register dynamic libraries if we are configured to do
 * that at each collection.
 */
GC_INNER void GC_cond_register_dynamic_libraries(void);

/* Machine-dependent startup routines. */

/*
 * Get the cold end of the stack of the primordial thread.  This is always
 * called from the main (primordial) thread.
 */
GC_INNER ptr_t GC_get_main_stack_base(void);

#ifdef IA64
/* Get the cold end of register stack. */
GC_INNER ptr_t GC_get_register_stack_base(void);
#endif

GC_INNER void GC_register_data_segments(void);

#ifdef THREADS
/* Both are invoked from `GC_init()` only. */
GC_INNER void GC_thr_init(void);

/*
 * Perform all initializations, including those that may require allocation,
 * e.g. initialize thread-local free lists if used.  Called by `GC_init()`.
 */
GC_INNER void GC_init_parallel(void);

#  ifndef DONT_USE_ATEXIT
GC_INNER GC_bool GC_is_main_thread(void);
#  endif
#else
#  ifdef TRACE_BUF
void GC_add_trace_entry(const char *caller_fn_name, ptr_t arg1, ptr_t arg2);
#  endif
#endif /* !THREADS */

#ifdef NO_BLACK_LISTING
#  define GC_bl_init() (void)0
/* Do not define `GC_bl_init_no_interiors()`. */
#  define GC_ADD_TO_BLACK_LIST_NORMAL(p, source) ((void)(p))
#  define GC_ADD_TO_BLACK_LIST_STACK(p, source) ((void)(p))
#  define GC_promote_black_lists() (void)0
#  define GC_unpromote_black_lists() (void)0
#else

/*
 * If we need a block of `n` bytes, and we have a block of `n + BL_LIMIT`
 * bytes available, and `n` is greater than `BL_LIMIT`, but all possible
 * positions in it are black-listed, we just use it anyway (and print
 * a warning, if warnings are enabled).  This risks subsequently leaking
 * the block due to a false reference.  But not using the block risks
 * unreasonable immediate heap growth.
 */
#  define BL_LIMIT GC_black_list_spacing

/*
 * Average number of bytes between black-listed blocks.  Approximate.
 * Counts only blocks that are "stack black-listed", i.e. that are
 * problematic in the interior of an object.
 */
GC_EXTERN word GC_black_list_spacing;

/*
 * The interval between unsuppressed warnings about repeated allocation
 * of a very large block.
 */
GC_EXTERN long GC_large_alloc_warn_interval;

/* Initialize the black listing mechanism. */
GC_INNER void GC_bl_init(void);
GC_INNER void GC_bl_init_no_interiors(void);

#  ifdef PRINT_BLACK_LIST
/*
 * Register bits as a possible future false reference from the heap
 * or static data.  The argument `p` is not a valid pointer reference,
 * but it falls inside the plausible heap bounds.
 */
GC_INNER void GC_add_to_black_list_normal(ptr_t p, ptr_t source);
#    define GC_ADD_TO_BLACK_LIST_NORMAL(p, source) \
      if (GC_all_interior_pointers) {              \
        GC_add_to_black_list_stack(p, source);     \
      } else                                       \
        GC_add_to_black_list_normal(p, source)
GC_INNER void GC_add_to_black_list_stack(ptr_t p, ptr_t source);
#    define GC_ADD_TO_BLACK_LIST_STACK(p, source) \
      GC_add_to_black_list_stack(p, source)
#  else
GC_INNER void GC_add_to_black_list_normal(ptr_t p);
#    define GC_ADD_TO_BLACK_LIST_NORMAL(p, source) \
      if (GC_all_interior_pointers) {              \
        GC_add_to_black_list_stack(p);             \
      } else                                       \
        GC_add_to_black_list_normal(p)
GC_INNER void GC_add_to_black_list_stack(ptr_t p);
#    define GC_ADD_TO_BLACK_LIST_STACK(p, source) GC_add_to_black_list_stack(p)
#  endif /* PRINT_BLACK_LIST */

/*
 * Declare an end to a black listing phase.  (I.e. signal the completion of
 * a collection.)  Turn the incomplete black lists into new black lists, etc.
 */
GC_INNER void GC_promote_black_lists(void);

/*
 * Approximately undo the effect of `GC_promote_black_lists()`.
 * This actually loses some information, but only in a reasonably safe way.
 */
GC_INNER void GC_unpromote_black_lists(void);
#endif

/*
 * The collector internal memory allocation for small objects.
 * Deallocation is not possible.  May return `NULL`.
 */
GC_INNER ptr_t GC_scratch_alloc(size_t bytes);

#ifdef GWW_VDB
/* `GC_scratch_recycle_no_gww()` is not used. */
#else
#  define GC_scratch_recycle_no_gww GC_scratch_recycle_inner
#endif
/* Reuse the memory region by the heap. */
GC_INNER void GC_scratch_recycle_inner(void *ptr, size_t sz);

#ifndef MARK_BIT_PER_OBJ
/*
 * Add a heap block map for objects of a size in granules to `GC_obj_map`.
 * A size of zero is used for large objects.  Returns `FALSE` on failure.
 */
GC_INNER GC_bool GC_add_map_entry(size_t lg);
#endif

/*
 * Same as `GC_register_displacement` but assuming the allocator lock
 * is already held.
 */
GC_INNER void GC_register_displacement_inner(size_t offset);

/*
 * Allocate a new heap block for small objects of size `lg` (in granules)
 * and `kind`.  Add all of the block's objects to the free list for objects
 * of that size.  Set all mark bits if objects are uncollectible.
 * Will fail to do anything if out of memory.
 */
GC_INNER void GC_new_hblk(size_t lg, int kind);

/*
 * Build a free list for objects of size `lg` (in granules) inside heap
 * block `h`.  Clear objects inside `h` if `clear` argument is set.
 * Add `list` to the end of the free list we build.  Return the new
 * free list.  Normally called by `GC_new_hblk()`, but this could also
 * be called without the allocator lock, if we ensure that there is no
 * concurrent collection which might reclaim objects that we have not
 * yet allocated.
 */
GC_INNER ptr_t GC_build_fl(struct hblk *h, ptr_t list, size_t lg,
                           GC_bool clear);

/*
 * Allocate (and return pointer to) a heap block for objects of the
 * given size and alignment (in bytes), searching over the appropriate
 * free block lists; inform the marker that the found block is valid
 * for objects of the indicated size.  Assumes (as implied by the argument
 * name) that `EXTRA_BYTES` value is already added to the size, if needed.
 * The client is responsible for clearing the block, if needed.
 * Note: we set `GC_obj_map` field in the header correctly; the caller
 * is responsible for building an object's free list in the block.
 */
GC_INNER struct hblk *GC_allochblk(size_t lb_adjusted, int kind,
                                   unsigned flags, size_t align_m1);

/*
 * Deallocate (free) a heap block and mark it as invalid.  Coalesce it
 * with its neighbors if possible.  All mark words are assumed to be cleared.
 */
GC_INNER void GC_freehblk(struct hblk *p);

/*  Miscellaneous GC routines. */

/*
 * This explicitly increases the size of the heap.  It is used internally,
 * but may also be invoked from `GC_expand_hp` by client.  The argument is
 * in units of `HBLKSIZE`.  (An argument of zero is treated as 1.)
 * Returns `FALSE` on failure.
 */
GC_INNER GC_bool GC_expand_hp_inner(word n);

/*
 * Restore unmarked objects to free lists, or (if `abort_if_found` is `TRUE`)
 * report them.  (I.e. perform `GC_reclaim_block()` on the entire heap,
 * after first clearing small-object free lists if we are not just looking
 * for leaks.)  Sweeping of small object pages is largely deferred.
 */
GC_INNER void GC_start_reclaim(GC_bool abort_if_found);

/*
 * Sweep blocks of the indicated object size (in granules) and kind
 * until either the appropriate nonempty free list is found, or there
 * are no more blocks to sweep.
 */
GC_INNER void GC_continue_reclaim(size_t lg, int kind);

/*
 * Reclaim all small blocks waiting to be reclaimed.  Abort and return
 * `FALSE` when/if `(*stop_func)()` returns `TRUE`.  If this returns `TRUE`,
 * then it is safe to restart the world with incorrectly cleared mark bits.
 * If `ignore_old`, then reclaim only blocks that have been reclaimed
 * recently, and discard the rest.  `stop_func` may be 0.
 */
GC_INNER GC_bool GC_reclaim_all(GC_stop_func stop_func, GC_bool ignore_old);

/*
 * Generic procedure to rebuild a free list in `hbp` with header `hhdr`,
 * with objects of size `sz` bytes.  Add `list` to the end of the free list.
 * Add the number of reclaimed bytes to `*pcount`.  Note: it could be called
 * directly from `GC_malloc_many`.
 */
GC_INNER ptr_t GC_reclaim_generic(struct hblk *hbp, hdr *hhdr, size_t sz,
                                  GC_bool init, ptr_t list, word *pcount);

/*
 * Is given heap block completely unmarked (i.e. contains no marked objects)?
 * This does not require the block to be in physical memory.
 */
GC_INNER GC_bool GC_block_empty(const hdr *hhdr);

/* Always returns 0 (`FALSE`). */
GC_INNER int GC_CALLBACK GC_never_stop_func(void);

/*
 * Stop-the-world garbage collection.  The caller must have acquired
 * the allocator lock.  If `stop_func` is not `GC_never_stop_func`, then
 * abort if `stop_func` returns `TRUE`.  Return `TRUE` if we successfully
 * completed the collection (otherwise the collection is aborted).
 */
GC_INNER GC_bool GC_try_to_collect_inner(GC_stop_func stop_func);

#define GC_gcollect_inner() (void)GC_try_to_collect_inner(GC_never_stop_func)

#ifdef THREADS
/*
 * We may currently be in thread creation or destruction.  Only set to `TRUE`
 * while the allocator lock is held.  When set, it is OK to run the garbage
 * collection from an unknown thread.  Protected by the allocator lock.
 */
GC_EXTERN GC_bool GC_in_thread_creation;
#endif

/*
 * Do `n_blocks` units of a garbage collection work, if appropriate.
 * A unit is an amount appropriate for `HBLKSIZE` bytes of allocation.
 */
GC_INNER void GC_collect_a_little_inner(size_t n_blocks);

GC_INNER void *GC_malloc_kind_aligned_global(size_t lb, int kind,
                                             size_t align_m1);

GC_INNER void *GC_generic_malloc_aligned(size_t lb, int kind, unsigned flags,
                                         size_t align_m1);

/*
 * Allocate an object of the given `kind` but assuming the allocator
 * lock is already held.  Should not be used to directly allocate
 * objects requiring special handling on allocation.  `flags` argument
 * should be 0 or `IGNORE_OFF_PAGE`; in the latter case the client
 * guarantees there will always be a pointer to the beginning (i.e.
 * within the first `hblk`) of the object while it is live.
 */
GC_INNER void *GC_generic_malloc_inner(size_t lb, int kind, unsigned flags);

/*
 * Collect or expand heap in an attempt make the indicated number of
 * free blocks available.  Should be called until the blocks are
 * available (setting `retry` value to `TRUE` unless this is the first
 * call in a loop) or until it fails by returning `FALSE`.  The `flags`
 * argument should be `IGNORE_OFF_PAGE` or 0.
 */
GC_INNER GC_bool GC_collect_or_expand(word needed_blocks, unsigned flags,
                                      GC_bool retry);

/*
 * Make the indicated object free list nonempty, and return its head (the
 * first object on the free list).  The object must be removed from the free
 * list by the caller.  The size is in granules.
 */
GC_INNER ptr_t GC_allocobj(size_t lg, int kind);

#ifdef GC_ADD_CALLER
/*
 * `GC_DBG_EXTRAS` is used by the collector debug API functions (unlike
 * `GC_EXTRAS` used by the debug API macros) thus `GC_RETURN_ADDR_PARENT`
 * (pointing to client caller) should be used if possible.
 */
#  ifdef GC_HAVE_RETURN_ADDR_PARENT
#    define GC_DBG_EXTRAS GC_RETURN_ADDR_PARENT, NULL, 0
#  else
#    define GC_DBG_EXTRAS GC_RETURN_ADDR, NULL, 0
#  endif
#else
#  define GC_DBG_EXTRAS "unknown", 0
#endif /* !GC_ADD_CALLER */

#ifdef GC_COLLECT_AT_MALLOC
/*
 * Parameter to force collection at every `malloc` of size greater or
 * equal to the given value.  This might be handy during debugging.
 * Note: this variable is visible outside for debugging purpose.
 */
extern size_t GC_dbg_collect_at_malloc_min_lb;

#  define GC_DBG_COLLECT_AT_MALLOC(lb) \
    (void)((lb) >= GC_dbg_collect_at_malloc_min_lb ? (GC_gcollect(), 0) : 0)
#else
#  define GC_DBG_COLLECT_AT_MALLOC(lb) (void)0
#endif /* !GC_COLLECT_AT_MALLOC */

/* Allocation routines that bypass the thread-local cache. */

#if defined(THREAD_LOCAL_ALLOC) && defined(GC_GCJ_SUPPORT)
/*
 * Allocate an object, clear it, and store the pointer to the type
 * structure ("vtable" in `gcj`).  This adds a byte at the end of the
 * object if `GC_malloc` would.
 */
GC_INNER void *GC_core_gcj_malloc(size_t lb, const void *vtable_ptr,
                                  unsigned flags);
#endif

GC_INNER void GC_init_headers(void);

/*
 * Install a header for block `h`.  Return `NULL` on failure, or the
 * uninitialized header otherwise.
 */
GC_INNER hdr *GC_install_header(struct hblk *h);

/*
 * Set up forwarding counts for block `h` of size `sz`.  Return `FALSE`
 * on failure.
 */
GC_INNER GC_bool GC_install_counts(struct hblk *h, size_t sz);

/* Remove the header for block `h`. */
GC_INNER void GC_remove_header(struct hblk *h);

/* Remove forwarding counts for `h`. */
GC_INNER void GC_remove_counts(struct hblk *h, size_t sz);

/* A non-macro variant of the header location routine. */
GC_INNER hdr *GC_find_header(const void *h);

/*
 * Get `HBLKSIZE`-aligned heap memory chunk from the OS and add the
 * chunk to `GC_our_memory`.  Return `NULL` if out of memory.
 */
GC_INNER ptr_t GC_os_get_mem(size_t bytes);

#if defined(NO_FIND_LEAK) && defined(SHORT_DBG_HDRS)
#  define GC_print_all_errors() (void)0
#  define GC_check_heap() (void)0
#  define GC_print_all_smashed() (void)0
#else
/*
 * Print smashed and leaked objects, if any.  Clear the lists of such
 * objects.  Called without the allocator lock held.
 */
GC_INNER void GC_print_all_errors(void);

/*
 * Check that all objects in the heap with debugging info are intact.
 * Add any that are not to `GC_smashed` list.
 */
GC_EXTERN void (*GC_check_heap)(void);

/* Print `GC_smashed` list if it is not empty.  Then clear the list. */
GC_EXTERN void (*GC_print_all_smashed)(void);
#endif

/*
 * If possible, print (using `GC_err_printf()`) a more detailed
 * description (terminated with "\n") of the object referred to by `p`.
 */
GC_EXTERN void (*GC_print_heap_obj)(ptr_t p);

GC_INNER void GC_default_print_heap_obj_proc(ptr_t p);

#if defined(LINUX) && defined(__ELF__) && !defined(SMALL_CONFIG)
/*
 * Print an address map of the process.  The caller should hold the
 * allocator lock.
 */
void GC_print_address_map(void);
#endif

#ifdef NO_FIND_LEAK
#  define GC_find_leak_inner FALSE
#else
#  define GC_find_leak_inner GC_find_leak
#  ifndef SHORT_DBG_HDRS
/*
 * Do not immediately deallocate object on `free()` in the find-leak mode,
 * just mark it as freed (and deallocate it after collection).
 */
GC_EXTERN GC_bool GC_findleak_delay_free;
#  endif
#endif /* !NO_FIND_LEAK */

#if defined(NO_FIND_LEAK) && defined(SHORT_DBG_HDRS)
#  define get_have_errors() FALSE
#elif defined(AO_HAVE_store)
#  define GC_SET_HAVE_ERRORS() AO_store(&GC_have_errors, (AO_t)TRUE)
#  define get_have_errors() \
    ((GC_bool)AO_load(&GC_have_errors)) /*< no barrier */
#else
#  define GC_SET_HAVE_ERRORS() (void)(GC_have_errors = TRUE)
/*
 * We saw a smashed or leaked object.  Call error printing routine
 * occasionally.  It is OK to read it not acquiring the allocator lock.
 * Once set to `TRUE`, it is never cleared.
 */
#  define get_have_errors() GC_have_errors
#endif /* !AO_HAVE_store */

#define VERBOSE 2
#if !defined(NO_CLOCK) || !defined(SMALL_CONFIG)
/*
 * Value of 1 generates basic collector log; `VERBOSE` generates additional
 * messages.
 */
GC_EXTERN int GC_print_stats;
#else /* SMALL_CONFIG */
/*
 * Defined as a macro to aid the compiler to remove the relevant message
 * character strings from the executable (with a particular level of
 * optimizations).
 */
#  define GC_print_stats 0
#endif

#ifdef KEEP_BACK_PTRS
/* Number of random backtraces to generate for each collection. */
GC_EXTERN long GC_backtraces;
#endif

/*
 * A trivial (linear congruential) pseudo-random numbers generator,
 * safe for the concurrent usage.
 */
#define GC_RAND_MAX ((int)(~0U >> 1))
#if defined(AO_HAVE_store) && defined(THREAD_SANITIZER)
#  define GC_RAND_STATE_T volatile AO_t
#  define GC_RAND_NEXT(pseed) GC_rand_next(pseed)
GC_INLINE int
GC_rand_next(GC_RAND_STATE_T *pseed)
{
  AO_t next = (AO_t)((AO_load(pseed) * (unsigned32)1103515245UL + 12345)
                     & (unsigned32)((unsigned)GC_RAND_MAX));
  AO_store(pseed, next);
  return (int)next;
}
#else
#  define GC_RAND_STATE_T unsigned32
#  define GC_RAND_NEXT(pseed) /*< overflow and race are OK */      \
    (int)(*(pseed) = (*(pseed) * (unsigned32)1103515245UL + 12345) \
                     & (unsigned32)((unsigned)GC_RAND_MAX))
#endif

#ifdef MAKE_BACK_GRAPH
GC_EXTERN GC_bool GC_print_back_height;
void GC_print_back_graph_stats(void);
#endif

#ifdef THREADS
/*
 * Explicitly deallocate the object when we already hold the allocator lock.
 * Only used for internally allocated objects.
 */
GC_INNER void GC_free_inner(void *p);
#endif

#ifdef VALGRIND_TRACKING
#  define FREE_PROFILER_HOOK(p) GC_free_profiler_hook(p)
#else
#  define FREE_PROFILER_HOOK(p) (void)(p)
#endif

/*
 * Macros used for collector internal allocation.  These assume the
 * allocator lock is held.
 */
#ifdef DBG_HDRS_ALL

/*
 * An allocation function for internal use.  Normally internally allocated
 * objects do not have debug information.  But in this case, we need to make
 * sure that all objects have debug headers.
 */
GC_INNER void *GC_debug_generic_malloc_inner(size_t lb, int kind,
                                             unsigned flags);

#  define GC_INTERNAL_MALLOC(lb, k) GC_debug_generic_malloc_inner(lb, k, 0)
#  define GC_INTERNAL_MALLOC_IGNORE_OFF_PAGE(lb, k) \
    GC_debug_generic_malloc_inner(lb, k, IGNORE_OFF_PAGE)
#  ifdef THREADS
/* Used internally; we assume it is called correctly. */
GC_INNER void GC_debug_free_inner(void *p);

#    define GC_INTERNAL_FREE GC_debug_free_inner
#  else
#    define GC_INTERNAL_FREE GC_debug_free
#  endif
#else
#  define GC_INTERNAL_MALLOC(lb, k) GC_generic_malloc_inner(lb, k, 0)
#  define GC_INTERNAL_MALLOC_IGNORE_OFF_PAGE(lb, k) \
    GC_generic_malloc_inner(lb, k, IGNORE_OFF_PAGE)
#  ifdef THREADS
#    define GC_INTERNAL_FREE GC_free_inner
#  else
#    define GC_INTERNAL_FREE GC_free
#  endif
#endif /* !DBG_HDRS_ALL */

/* Memory unmapping routines. */
#ifdef USE_MUNMAP

/*
 * Unmap blocks that have not been recently touched.  This is the only
 * way blocks are ever unmapped.
 */
GC_INNER void GC_unmap_old(unsigned threshold);

/*
 * Merge all unmapped blocks that are adjacent to other free blocks.
 * This may involve remapping, since all blocks are either fully mapped
 * or fully unmapped.  Returns `TRUE` if at least one block was merged.
 */
GC_INNER GC_bool GC_merge_unmapped(void);

GC_INNER void GC_unmap(ptr_t start, size_t bytes);
GC_INNER void GC_remap(ptr_t start, size_t bytes);

/*
 * Two adjacent blocks have already been unmapped and are about to be merged.
 * Unmap the whole block.  This typically requires that we unmap a small
 * section in the middle that was not previously unmapped due to alignment
 * constraints.
 */
GC_INNER void GC_unmap_gap(ptr_t start1, size_t bytes1, ptr_t start2,
                           size_t bytes2);
#endif

#ifdef CAN_HANDLE_FORK
/*
 * Fork-handling mode:
 *   - 0 means no `fork` handling is requested (but client could anyway
 *     call `fork()` provided it is surrounded with `GC_atfork_prepare`,
 *     `GC_atfork_parent`, `GC_atfork_child` calls);
 *   - (-1) means the collector tries to use `pthread_at_fork()` if it is
 *     available (if it succeeds, then `GC_handle_fork` value is changed to
 *     one), a portable client should nonetheless surround `fork()` with
 *     `GC_atfork_prepare()` and the accompanying routines (for the case
 *     of `pthread_at_fork()` failure or absence);
 *   - 1 (or other values) means client fully relies on `pthread_at_fork`
 *     (so if it is missing or failed, then `abort` occurs in `GC_init()`),
 *     `GC_atfork_prepare` and the accompanying routines are no-op in such
 *     a case.
 *
 * Note: the value is examined by `GC_thr_init`.
 */
GC_EXTERN int GC_handle_fork;

#  ifdef THREADS
#    if defined(SOLARIS) && !defined(_STRICT_STDC)
/* Update `pthreads` id in the child process right after `fork`. */
GC_INNER void GC_stackbase_info_update_after_fork(void);
#    else
#      define GC_stackbase_info_update_after_fork() (void)0
#    endif
#  endif
#endif /* CAN_HANDLE_FORK */

#ifdef NO_MANUAL_VDB
#  define GC_manual_vdb FALSE
#  define GC_auto_incremental GC_incremental
#  define GC_dirty(p) (void)(p)
#  define REACHABLE_AFTER_DIRTY(p) (void)(p)
#else
/*
 * The incremental collection is in the manual VDB mode.
 * Assumes `GC_incremental` is `TRUE`.  Should not be modified once
 * `GC_incremental` is set to `TRUE`.
 */
GC_EXTERN GC_bool GC_manual_vdb;

#  define GC_auto_incremental (GC_incremental && !GC_manual_vdb)

/*
 * Manually mark the page containing `p` as dirty.  Logically, this
 * dirties the entire object.  Does not require locking.
 * Exported and marked as `noinline` for the purpose of some clients that
 * need to patch the symbol when using write barrier validation.
 */
GC_API_PATCHABLE void GC_dirty_inner(const void *p);

#  define GC_dirty(p) (GC_manual_vdb ? GC_dirty_inner(p) : (void)0)
#  define REACHABLE_AFTER_DIRTY(p) GC_reachable_here(p)
#endif /* !NO_MANUAL_VDB */

#ifdef GC_DISABLE_INCREMENTAL
#  define GC_incremental FALSE
#else
/*
 * Using incremental/generational collection.  Assumes dirty bits are
 * being maintained.
 */
GC_EXTERN GC_bool GC_incremental;

/* Virtual dirty bit (VDB) implementations; each one exports the following. */

/*
 * Initialize the virtual dirty bit implementation.  Returns `TRUE` if
 * virtual dirty bits are maintained (otherwise it is OK to be called again
 * if the client calls `GC_enable_incremental()` once more).
 */
GC_INNER GC_bool GC_dirty_init(void);

/*
 * Retrieve system dirty bits for the heap to a local buffer (unless
 * `output_unneeded`).  The caller should set `output_unneeded` to indicate
 * that reading of the retrieved dirty bits is not planned till the next
 * retrieval.  Restore the systems notion of which pages are dirty.
 * We assume that either the world is stopped or it is OK to lose dirty bits
 * while it is happening (`GC_enable_incremental()` is the caller and
 * `output_unneeded` is `TRUE` at least if the multi-threading support is on).
 */
GC_INNER void GC_read_dirty(GC_bool output_unneeded);

/*
 * Is the `HBLKSIZE`-sized page at `h` marked dirty in the local buffer?
 * If the actual page size is different, this returns `TRUE` if any of
 * the pages overlapping `h` are dirty.  This routine may err on the side
 * of labeling pages as dirty (and this implementation does).
 */
GC_INNER GC_bool GC_page_was_dirty(struct hblk *h);

/*
 * Block `h` is about to be written or allocated shortly.  Ensure that
 * all pages containing any part of the `nblocks` `hblk` entities starting
 * at `h` are no longer write-protected (by the virtual dirty bit
 * implementation).  I.e., this is a call that:
 *   - hints that [`h`, `h + nblocks`) is about to be written;
 *   - guarantees that protection is removed;
 *   - may speed up some virtual dirty bit implementations;
 *   - may be essential if we need to ensure that pointer-free system
 *     call buffers in the heap are not protected.
 */
GC_INNER void GC_remove_protection(struct hblk *h, size_t nblocks,
                                   GC_bool is_ptrfree);

#  if !defined(NO_VDB_FOR_STATIC_ROOTS) && !defined(PROC_VDB)
/* Is VDB working for static roots? */
GC_INNER GC_bool GC_is_vdb_for_static_roots(void);
#  endif

#  ifdef CAN_HANDLE_FORK
#    if defined(PROC_VDB) || defined(SOFT_VDB) \
        || (defined(MPROTECT_VDB) && defined(DARWIN) && defined(THREADS))
/*
 * Update pid-specific resources (like `/proc` file descriptors) needed
 * by the dirty bits implementation after `fork` in the child process.
 */
GC_INNER void GC_dirty_update_child(void);
#    else
#      define GC_dirty_update_child() (void)0
#    endif
#  endif /* CAN_HANDLE_FORK */

#  if defined(MPROTECT_VDB) && defined(DARWIN)
EXTERN_C_END
#    include <pthread.h>
EXTERN_C_BEGIN
#    ifdef THREADS
GC_INNER int GC_inner_pthread_create(pthread_t *t,
                                     GC_PTHREAD_CREATE_CONST pthread_attr_t *a,
                                     void *(*fn)(void *), void *arg);
#    else
#      define GC_inner_pthread_create pthread_create
#    endif
#  endif
#endif /* !GC_DISABLE_INCREMENTAL */

#if defined(COUNT_PROTECTED_REGIONS) && defined(MPROTECT_VDB)
/*
 * Do actions on heap growth, if needed, to prevent hitting the OS kernel
 * limit on the VM map regions.
 */
GC_INNER void GC_handle_protected_regions_limit(void);
#else
#  define GC_handle_protected_regions_limit() (void)0
#endif

/* Same as `GC_base` but accepts and returns a pointer to `const` object. */
#define GC_base_C(p) ((const void *)GC_base(GC_CAST_AWAY_CONST_PVOID(p)))

/* Some debugging print routines. */
void GC_print_block_list(void);
void GC_print_hblkfreelist(void);
void GC_print_heap_sects(void);
void GC_print_static_roots(void);

#ifdef KEEP_BACK_PTRS
/*
 * Store back pointer to `source` in `dest`, if that appears to be
 * possible.  This is not completely safe, since we may mistakenly
 * conclude that `dest` has a debugging wrapper.  But the error
 * probability is very small, and this should not be used in
 * production code.  We assume that `dest` is the real base pointer.
 * `source` should usually be a pointer to the interior of an object.
 */
GC_INNER void GC_store_back_pointer(ptr_t source, ptr_t dest);

GC_INNER void GC_marked_for_finalization(ptr_t dest);
#  define GC_STORE_BACK_PTR(source, dest) GC_store_back_pointer(source, dest)
#  define GC_MARKED_FOR_FINALIZATION(dest) GC_marked_for_finalization(dest)
#else
#  define GC_STORE_BACK_PTR(source, dest) (void)(source)
#  define GC_MARKED_FOR_FINALIZATION(dest)
#endif /* !KEEP_BACK_PTRS */

/* Make arguments appear live to compiler. */
void GC_noop6(word, word, word, word, word, word);

#ifndef GC_ATTR_FORMAT_PRINTF
#  if GC_GNUC_PREREQ(3, 0)
#    define GC_ATTR_FORMAT_PRINTF(spec_argnum, first_checked) \
      __attribute__((__format__(__printf__, spec_argnum, first_checked)))
#  else
#    define GC_ATTR_FORMAT_PRINTF(spec_argnum, first_checked)
#  endif
#endif

/* Logging and diagnostic output. */

/*
 * `GC_printf` is used typically on client explicit print requests.
 * A variant of `printf` that does not allocate, 1 KB total output length.
 * (It uses `sprintf()` internally; hopefully the latter does not allocate
 * memory for `long` arguments.)  For all `GC_*_printf` routines,
 * it is recommended to put "\n" at the end of `format` string (for the
 * output atomicity).
 */
GC_API_PRIV void GC_printf(const char *format, ...)
    GC_ATTR_FORMAT_PRINTF(1, 2);
GC_API_PRIV void GC_err_printf(const char *format, ...)
    GC_ATTR_FORMAT_PRINTF(1, 2);

/*
 * The basic logging routine.  Typically, it is called directly only inside
 * various `DEBUG_*` blocks.
 */
GC_API_PRIV void GC_log_printf(const char *format, ...)
    GC_ATTR_FORMAT_PRINTF(1, 2);

#ifndef GC_ANDROID_LOG
#  define GC_PRINT_STATS_FLAG (GC_print_stats != 0)
#  define GC_INFOLOG_PRINTF GC_COND_LOG_PRINTF
/*
 * The "verbose" logging routine which is called only if `GC_print_stats`
 * is `VERBOSE`.
 */
#  define GC_verbose_log_printf GC_log_printf
#else
extern GC_bool GC_quiet;
#  define GC_PRINT_STATS_FLAG (!GC_quiet)
/* `INFO`/`DBG` loggers are enabled even if `GC_print_stats` is off. */
#  ifndef GC_INFOLOG_PRINTF
#    define GC_INFOLOG_PRINTF \
      if (GC_quiet) {         \
      } else                  \
        GC_info_log_printf
#  endif
GC_INNER void GC_info_log_printf(const char *format, ...)
    GC_ATTR_FORMAT_PRINTF(1, 2);
GC_INNER void GC_verbose_log_printf(const char *format, ...)
    GC_ATTR_FORMAT_PRINTF(1, 2);
#endif /* GC_ANDROID_LOG */

#if defined(SMALL_CONFIG) || defined(GC_ANDROID_LOG)
#  define GC_ERRINFO_PRINTF GC_INFOLOG_PRINTF
#else
#  define GC_ERRINFO_PRINTF GC_log_printf
#endif

/*
 * Convenient wrapper macros over `GC_log_printf()` and
 * `GC_verbose_log_printf()`.
 */
#define GC_COND_LOG_PRINTF       \
  if (LIKELY(!GC_print_stats)) { \
  } else                         \
    GC_log_printf
#define GC_VERBOSE_LOG_PRINTF              \
  if (LIKELY(GC_print_stats != VERBOSE)) { \
  } else                                   \
    GC_verbose_log_printf
#ifndef GC_DBGLOG_PRINTF
#  define GC_DBGLOG_PRINTF      \
    if (!GC_PRINT_STATS_FLAG) { \
    } else                      \
      GC_log_printf
#endif

/* Write `s` to `stderr`, but do not buffer, do not add newlines, do not... */
void GC_err_puts(const char *s);

/*
 * A handy macro for logging size values (of `word` type) in KiB, rounding
 * to nearest value.
 */
#define TO_KiB_UL(v) ((unsigned long)(((v) + ((1 << 9) - 1)) >> 10))

#ifdef USE_MUNMAP
GC_EXTERN unsigned GC_unmap_threshold; /*< defined in `alloc.c` file */

/*
 * Force memory unmapping on every collection.  Has no effect on
 * implicitly-initiated collections.
 */
GC_EXTERN GC_bool GC_force_unmap_on_gcollect;
#endif

#ifdef MSWIN32
GC_EXTERN GC_bool GC_no_win32_dlls; /*< defined in `os_dep.c` file */

/* Is this a Windows NT derivative (i.e. NT, Win2K, XP or later)? */
GC_EXTERN GC_bool GC_wnt;
#endif

#ifdef THREADS
#  if (defined(MSWIN32) && !defined(CONSOLE_LOG)) || defined(MSWINCE)
GC_EXTERN CRITICAL_SECTION GC_write_cs;
#    ifdef GC_ASSERTIONS
/*
 * Set to `TRUE` only if `GC_stop_world()` has acquired `GC_write_cs`.
 * Protected by `GC_write_cs`.
 */
GC_EXTERN GC_bool GC_write_disabled;
#    endif
#  endif /* MSWIN32 || MSWINCE */
#  ifdef NEED_FAULT_HANDLER_LOCK
/*
 * Acquire the spin lock we use to update dirty bits.  Threads should
 * not get stopped holding it.  But we may acquire and release it during
 * `GC_remove_protection()` call.
 */
#    define GC_acquire_dirty_lock() \
      do { /* Empty. */             \
      } while (AO_test_and_set_acquire(&GC_fault_handler_lock) == AO_TS_SET)
#    define GC_release_dirty_lock() AO_CLEAR(&GC_fault_handler_lock)
#  else
#    define GC_acquire_dirty_lock() (void)0
#    define GC_release_dirty_lock() (void)0
#  endif
#  ifdef MSWINCE
GC_EXTERN GC_bool GC_dont_query_stack_min;
#  endif
#elif defined(IA64)
/* Value returned from register flushing routine (`ar.bsp`). */
GC_EXTERN ptr_t GC_save_regs_ret_val;
#endif /* !THREADS */

#ifdef THREAD_LOCAL_ALLOC
GC_EXTERN GC_bool GC_world_stopped; /*< defined in `alloc.c` file */

/*
 * We must explicitly mark `ptrfree` and `gcj` free lists, since the
 * free list links would not otherwise be found.  We also set them in
 * the normal free lists, since that involves touching less memory than
 * if we scanned them normally.
 */
GC_INNER void GC_mark_thread_local_free_lists(void);
#endif

#if defined(GLIBC_2_19_TSX_BUG) && defined(GC_PTHREADS_PARAMARK)
/* Parse string like `<major>[.<minor>[<tail>]]` and return `major` value. */
GC_INNER int GC_parse_version(int *pminor, const char *pverstr);
#endif

#if defined(MPROTECT_VDB) && defined(GWW_VDB)
/*
 * Returns `TRUE` if `GetWriteWatch()` is available.  May be called
 * repeatedly.  May be called with or without the allocator lock held.
 */
GC_INNER GC_bool GC_gww_dirty_init(void);
#endif

#if defined(CHECKSUMS) || defined(PROC_VDB)
/* Could any valid GC heap pointer ever have been written to this page? */
GC_INNER GC_bool GC_page_was_ever_dirty(struct hblk *h);
#endif

#ifndef GC_NO_DEINIT
GC_INNER void GC_reset_freelist(void);
GC_INNER void GC_reset_obj_kinds(void);
#  ifdef CHECKSUMS
void GC_reset_check_page(void);
#  endif
#  ifdef THREADS
GC_INNER void GC_reset_threads(void);
#  endif
#  ifdef THREAD_LOCAL_ALLOC
GC_INNER void GC_reset_thread_local_initialization(void);
#  endif
#endif

#ifdef CHECKSUMS
#  ifdef MPROTECT_VDB
void GC_record_fault(struct hblk *h);
#  endif
void GC_check_dirty(void);
#endif

GC_INNER void GC_setpagesize(void);

GC_INNER void GC_initialize_offsets(void);

#if defined(REDIR_MALLOC_AND_LINUXTHREADS) \
    && !defined(REDIRECT_MALLOC_IN_HEADER)
GC_INNER void GC_init_lib_bounds(void);
#else
#  define GC_init_lib_bounds() (void)0
#endif

#ifdef REDIR_MALLOC_AND_LINUXTHREADS
/*
 * Find the text (code) mapping for the library whose name, after
 * stripping the directory part, starts with `nm`.
 */
GC_INNER GC_bool GC_text_mapping(const char *nm, ptr_t *startp, ptr_t *endp);
#endif

#if defined(USE_WINALLOC) && !defined(REDIRECT_MALLOC)
GC_INNER void GC_add_current_malloc_heap(void);
#endif

#ifdef MAKE_BACK_GRAPH
/*
 * Rebuild the representation of the backward reachability graph.
 * Does not examine mark bits.  Could be called before collection.
 */
GC_INNER void GC_build_back_graph(void);

GC_INNER void GC_traverse_back_graph(void);
#endif

#ifdef MSWIN32
GC_INNER void GC_init_win32(void);
#endif

#ifndef ANY_MSWIN
/*
 * Is a particular static root (with the given start) registered?
 * If so, then return a pointer to it, else `NULL`.  The type is a lie,
 * since the real type does not make sense here, and we only test for `NULL`.
 */
GC_INNER void *GC_roots_present(ptr_t);
#endif

#if defined(GC_WIN32_THREADS)
/* Same as `GC_push_one` but for a sequence of registers. */
GC_INNER void GC_push_many_regs(const word *regs, unsigned count);

/*
 * Find stack with the lowest address which overlaps the interval
 * [`start`, `limit`).  Return stack bounds in `*plo` and `*phi`.
 * If no such stack is found, both `*phi` and `*plo` will be set to an
 * address higher than `limit`.
 */
GC_INNER void GC_get_next_stack(ptr_t start, ptr_t limit, ptr_t *plo,
                                ptr_t *phi);

#  if defined(MPROTECT_VDB) && !defined(CYGWIN32)
GC_INNER void GC_set_write_fault_handler(void);
#  endif
#  if defined(WRAP_MARK_SOME) && !defined(GC_PTHREADS)
/*
 * Did we invalidate mark phase with an unexpected thread start?
 * Return `TRUE` if a thread was attached since we last asked or since
 * `GC_attached_thread` was explicitly reset.
 */
GC_INNER GC_bool GC_started_thread_while_stopped(void);
#  endif
#endif /* GC_WIN32_THREADS */

#if defined(MPROTECT_VDB) && defined(DARWIN) && defined(THREADS)
GC_INNER void GC_mprotect_stop(void);
GC_INNER void GC_mprotect_resume(void);
#  ifndef GC_NO_THREADS_DISCOVERY
GC_INNER void GC_darwin_register_self_mach_handler(void);
#  endif
#endif

#ifndef NOT_GCBUILD
/*
 * Iterate over forwarding addresses, if any, to get the beginning of
 * the block and its header.  Assumes `*phhdr` is non-`NULL` on entry,
 * and guarantees `*phhdr` is non-`NULL` on return.
 */
GC_INLINE struct hblk *
GC_find_starting_hblk(struct hblk *h, hdr **phhdr)
{
  hdr *hhdr = *phhdr;

  GC_ASSERT(HDR(h) == hhdr);
  for (; IS_FORWARDING_ADDR_OR_NIL(hhdr); hhdr = HDR(h)) {
    GC_ASSERT(hhdr != NULL);
    h = FORWARDED_ADDR(h, hhdr);
  }
  *phhdr = hhdr;
  return h;
}
#endif /* !NOT_GCBUILD */

#if (defined(PARALLEL_MARK)                                      \
     && !defined(HAVE_PTHREAD_SETNAME_NP_WITH_TID_AND_ARG)       \
     && (defined(HAVE_PTHREAD_SETNAME_NP_WITH_TID)               \
         || defined(HAVE_PTHREAD_SETNAME_NP_WITHOUT_TID)         \
         || defined(HAVE_PTHREAD_SET_NAME_NP)))                  \
    || (defined(DYNAMIC_LOADING)                                 \
        && ((defined(USE_PROC_FOR_LIBRARIES) && !defined(LINUX)) \
            || defined(DARWIN) || defined(IRIX5)))               \
    || defined(PROC_VDB) || defined(SOFT_VDB)
/*
 * A function to convert a long integer value `lv` to a string adding
 * the `prefix` and optional `suffix`.  The resulting string is put to
 * `buf` of the designated size (`buf_sz`).  Guaranteed to append
 * a trailing "\0" and not to exceed the buffer size.  (Note that it is
 * recommended to reserve at least 20 characters for the number part of
 * the string in `buf` to avoid a compiler warning about potential
 * number truncation.)
 */
#  ifndef GC_DISABLE_SNPRINTF
#    define GC_snprintf_s_ld_s(buf, buf_sz, prefix, lv, suffix)    \
      (void)(snprintf(buf, buf_sz, "%s%ld%s", prefix, lv, suffix), \
             (buf)[(buf_sz) - (size_t)1] = '\0')
#  else
#    define NEED_SNPRINTF_SLDS
GC_INNER void GC_snprintf_s_ld_s(char *buf, size_t buf_sz, const char *prefix,
                                 long lv, const char *suffix);
#  endif
#endif

#ifdef THREADS
#  ifndef GC_NO_FINALIZATION
/* Called by `GC_finalize()` (in case of an allocation failure observed). */
GC_INNER void GC_reset_finalizer_nested(void);

/*
 * Check and update the thread-local level of finalizers recursion.
 * Returns `NULL` if `GC_invoke_finalizers()` should not be called by
 * the collector (to minimize the risk of a deep finalizers recursion),
 * otherwise returns a pointer to the thread-local `finalizer_nested`.
 * Called by `GC_notify_or_invoke_finalizers()` only.
 */
GC_INNER unsigned char *GC_check_finalizer_nested(void);
#  endif

GC_INNER void GC_do_blocking_inner(ptr_t data, void *context);

/*
 * Should do exactly the right thing if the world is stopped; should
 * not fail if it is not stopped.
 */
GC_INNER void GC_push_all_stacks(void);

#  ifdef USE_PROC_FOR_LIBRARIES
GC_INNER GC_bool GC_segment_is_thread_stack(ptr_t lo, ptr_t hi);
#  endif
#  if (defined(HAVE_PTHREAD_ATTR_GET_NP) || defined(HAVE_PTHREAD_GETATTR_NP)) \
      && defined(IA64)
/*
 * Find the largest stack base smaller than `bound`.  May be used to find
 * the boundary between a register stack and adjacent immediately preceding
 * memory stack.
 */
GC_INNER ptr_t GC_greatest_stack_base_below(ptr_t bound);
#  endif
#endif /* THREADS */

#ifdef DYNAMIC_LOADING
/* Do we need to separately register the main static data segment? */
GC_INNER GC_bool GC_register_main_static_data(void);

#  ifdef DARWIN
GC_INNER void GC_init_dyld(void);
#  endif
#endif /* DYNAMIC_LOADING */

#ifdef SEARCH_FOR_DATA_START
GC_INNER void GC_init_linux_data_start(void);
#endif

#ifdef NEED_PROC_MAPS
#  if defined(DYNAMIC_LOADING) && defined(USE_PROC_FOR_LIBRARIES) \
      || defined(IA64) || defined(INCLUDE_LINUX_THREAD_DESCR)     \
      || (defined(CHECK_SOFT_VDB) && defined(MPROTECT_VDB))       \
      || defined(REDIR_MALLOC_AND_LINUXTHREADS)
/*
 * Assign various fields of the first line in `maps_ptr` to `*p_start`,
 * `*p_end`, `*p_prot`, `*p_maj_dev` and `*p_mapping_name`.
 * `p_mapping_name` may be `NULL`. `*p_prot` and `*p_mapping_name` are
 * assigned pointers into the original buffer.
 */
GC_INNER const char *GC_parse_map_entry(const char *maps_ptr, ptr_t *p_start,
                                        ptr_t *p_end, const char **p_prot,
                                        unsigned *p_maj_dev,
                                        const char **p_mapping_name);
#  endif
#  if defined(IA64) || defined(INCLUDE_LINUX_THREAD_DESCR) \
      || (defined(CHECK_SOFT_VDB) && defined(MPROTECT_VDB))
/*
 * Try to read the backing store base from `/proc/self/maps` file.
 * Return the bounds of the writable mapping with a zero major device,
 * which includes the address passed as data.  Return `FALSE` if there
 * is no such mapping.
 */
GC_INNER GC_bool GC_enclosing_writable_mapping(ptr_t addr, ptr_t *startp,
                                               ptr_t *endp);
#  endif

/*
 * Copy the content of `/proc/self/maps` file to a buffer in our
 * address space.  Return the address of the buffer.
 */
GC_INNER const char *GC_get_maps(void);
#endif /* NEED_PROC_MAPS */

#ifdef GC_ASSERTIONS
/* Should return the same value as `GC_large_free_bytes`. */
GC_INNER word GC_compute_large_free_bytes(void);

/* Should return the same value as `GC_root_size`. */
GC_INNER word GC_compute_root_size(void);
#endif

/* Check a compile time assertion at compile time. */
#if defined(_MSC_VER) && (_MSC_VER >= 1700)
#  define GC_STATIC_ASSERT(e) static_assert(e, "static assertion failed: " #e)
#elif defined(static_assert) && !defined(CPPCHECK) \
    && (__STDC_VERSION__ >= 201112L)
#  define GC_STATIC_ASSERT(e)                                               \
    do { /* placed in `do`-`while` for proper formatting by clang-format */ \
      static_assert(e, #e);                                                 \
    } while (0)
#elif defined(mips) && !defined(__GNUC__) && !defined(CPPCHECK)
/*
 * DOB: MIPSPro C gets an internal error taking the `sizeof` an array type.
 * This code works correctly (ugliness is to avoid "unused var" warnings).
 */
#  define GC_STATIC_ASSERT(e) \
    do {                      \
      if (0) {                \
        char j[(e) ? 1 : -1]; \
        j[0] = '\0';          \
        j[0] = j[0];          \
      }                       \
    } while (0)
#else
/* The error message for failure is a bit baroque, but... */
#  define GC_STATIC_ASSERT(e) (void)sizeof(char[(e) ? 1 : -1])
#endif

/*
 * Runtime check for an argument declared as non-`NULL` is actually
 * not `NULL`.
 */
#if GC_GNUC_PREREQ(4, 0)
/* Workaround tautological-pointer-compare Clang warning. */
#  define NONNULL_ARG_NOT_NULL(arg) \
    (*CAST_THRU_UINTPTR(volatile void **, &(arg)) != NULL)
#else
#  define NONNULL_ARG_NOT_NULL(arg) ((arg) != NULL)
#endif

#define COND_DUMP_CHECKS                                             \
  do {                                                               \
    GC_ASSERT(I_HOLD_LOCK());                                        \
    GC_ASSERT(GC_compute_large_free_bytes() == GC_large_free_bytes); \
    GC_ASSERT(GC_compute_root_size() == GC_root_size);               \
  } while (0)

#ifndef NO_DEBUGGING
/* A flag to generate regular debugging dumps. */
GC_EXTERN GC_bool GC_dump_regularly;
#  define COND_DUMP                    \
    if (UNLIKELY(GC_dump_regularly)) { \
      GC_dump_named(NULL);             \
    } else                             \
      COND_DUMP_CHECKS
#else
#  define COND_DUMP COND_DUMP_CHECKS
#endif

/*
 * We need additional synchronization facilities from the thread support.
 * We believe these are less performance critical than the allocator lock;
 * standard `pthreads`-based implementations should be sufficient.
 */
#ifdef PARALLEL_MARK

/*
 * Number of mark threads we would like to have excluding the initiating
 * thread.
 */
#  define GC_markers_m1 GC_parallel

/* A flag to temporarily avoid parallel marking. */
GC_EXTERN GC_bool GC_parallel_mark_disabled;

/*
 * The routines to deal with the mark lock and condition variables.
 * If the allocator lock is also acquired, it must be done first.
 * The mark lock is used to both protect some variables used by the
 * parallel marker, and to protect `GC_fl_builder_count`.
 * `GC_notify_all_marker()` is called when the state of the parallel marker
 * changes in some significant way (see `gc_mark.h` file for details).
 * The latter set of events includes incrementing `GC_mark_no`.
 * `GC_notify_all_builder()` is called when `GC_fl_builder_count` reaches
 * zero.
 */

/*
 * Wait all markers to finish initialization (i.e. store `marker_sp`,
 * `marker_bsp`, `marker_mach_threads`, `GC_marker_Id`).
 */
GC_INNER void GC_wait_for_markers_init(void);

GC_INNER void GC_acquire_mark_lock(void);
GC_INNER void GC_release_mark_lock(void);
GC_INNER void GC_notify_all_builder(void);
GC_INNER void GC_wait_for_reclaim(void);

/*
 * Number of threads currently building free lists without holding
 * the allocator lock.  It is not safe to collect if this is nonzero.
 * Also, together with the mark lock, it is used as a semaphore during
 * marker threads startup.  Protected by the mark lock.
 */
GC_EXTERN GC_signed_word GC_fl_builder_count;

GC_INNER void GC_notify_all_marker(void);
GC_INNER void GC_wait_marker(void);

/*
 * Try to help out parallel marker, if it is running, for mark cycle
 * `my_mark_no`.  Returns if the mark cycle finishes or was already
 * done, or there was nothing to do for some other reason.  We hold the
 * mark lock only, the initiating thread holds the allocator lock.
 */
GC_INNER void GC_help_marker(word my_mark_no);

GC_INNER void GC_start_mark_threads_inner(void);

#  define INCR_MARKS(hhdr) \
    AO_store(&(hhdr)->hb_n_marks, AO_load(&(hhdr)->hb_n_marks) + 1)
#else
#  define INCR_MARKS(hhdr) (void)(++(hhdr)->hb_n_marks)
#endif /* !PARALLEL_MARK */

#if defined(SIGNAL_BASED_STOP_WORLD) && !defined(SIG_SUSPEND)
/*
 * We define the thread suspension signal here, so that we can refer
 * to it in the virtual dirty bit (VDB) implementation, if necessary.
 * Ideally we would allocate a (real-time?) signal using the standard
 * mechanism.  Unfortunately, there is no such one.  (There is one in
 * Linux `glibc`, but it is not exported.)  Thus we continue to use
 * the same hard-coded signals we have always used.
 */
#  ifdef THREAD_SANITIZER
/*
 * Unfortunately, use of an asynchronous signal to suspend threads leads to
 * the situation when the signal is not delivered (it is stored to
 * `pending_signals` in TSan runtime actually) while the destination thread
 * is blocked in `pthread_mutex_lock()`.  Thus, we use some synchronous one
 * instead (which is again unlikely to be used by clients directly).
 */
#    define SIG_SUSPEND SIGSYS
#  elif (defined(DGUX) || defined(LINUX)) && !defined(GC_USESIGRT_SIGNALS)
#    if defined(SPARC) && !defined(SIGPWR)
/*
 * Linux/SPARC does not properly define `SIGPWR` in platform `signal.h` file.
 * It is aliased to `SIGLOST` in platform `asm/signal.h` file, though.
 */
#      define SIG_SUSPEND SIGLOST
#    else
/* LinuxThreads itself uses `SIGUSR1` and `SIGUSR2`. */
#      define SIG_SUSPEND SIGPWR
#    endif
#  elif defined(FREEBSD) && defined(__GLIBC__) && !defined(GC_USESIGRT_SIGNALS)
#    define SIG_SUSPEND (32 + 6)
#  elif (defined(FREEBSD) || defined(HURD) || defined(RTEMS)) \
      && !defined(GC_USESIGRT_SIGNALS)
#    define SIG_SUSPEND SIGUSR1
/* `SIGTSTP` and `SIGCONT` could be used alternatively on FreeBSD. */
#  elif (defined(OPENBSD) && !defined(GC_USESIGRT_SIGNALS)) \
      || defined(SERENITY)
#    define SIG_SUSPEND SIGXFSZ
#  elif defined(_SIGRTMIN) && !defined(CPPCHECK)
#    define SIG_SUSPEND _SIGRTMIN + 6
#  else
#    define SIG_SUSPEND SIGRTMIN + 6
#  endif
#endif /* GC_PTHREADS && !SIG_SUSPEND */

#if defined(GC_PTHREADS) && !defined(GC_SEM_INIT_PSHARED)
#  define GC_SEM_INIT_PSHARED 0
#endif

/*
 * Some macros for `setjmp()` working across signal handlers, where
 * possible.
 */
#if (defined(UNIX_LIKE) || (defined(NEED_FIND_LIMIT) && defined(CYGWIN32))) \
    && !defined(GC_NO_SIGSETJMP)
#  if defined(SUNOS5SIGS) && !defined(FREEBSD) && !defined(LINUX)
EXTERN_C_END
#    include <sys/siginfo.h>
EXTERN_C_BEGIN
#  endif
/*
 * Define `SETJMP()` and friends to be the variant restoring the signal
 * mask.
 */
#  define SETJMP(env) sigsetjmp(env, 1)
#  define LONGJMP(env, val) siglongjmp(env, val)
#  define JMP_BUF sigjmp_buf
#else
#  ifdef ECOS
#    define SETJMP(env) hal_setjmp(env)
#  else
#    define SETJMP(env) setjmp(env)
#  endif
#  define LONGJMP(env, val) longjmp(env, val)
#  define JMP_BUF jmp_buf
#endif /* !UNIX_LIKE || GC_NO_SIGSETJMP */

#ifdef DATASTART_USES_XGETDATASTART
#  ifdef FREEBSD
EXTERN_C_END
#    include <machine/trap.h>
EXTERN_C_BEGIN
#  endif
GC_INNER ptr_t GC_SysVGetDataStart(size_t, ptr_t);
#endif /* DATASTART_USES_XGETDATASTART */

#if defined(USE_PROC_FOR_LIBRARIES) && defined(THREADS) \
    || defined(NEED_FIND_LIMIT) || defined(SEARCH_FOR_DATA_START)
#  if (defined(HOST_ANDROID) || defined(__ANDROID__)) \
      && defined(IGNORE_DYNAMIC_LOADING)
/* Declared as public one in `gc.h` file. */
#  else
void *GC_find_limit(void *p, int up);
#  endif
#endif

#if defined(NEED_FIND_LIMIT)                                 \
    || (defined(UNIX_LIKE) && !defined(NO_DEBUGGING))        \
    || (defined(USE_PROC_FOR_LIBRARIES) && defined(THREADS)) \
    || (defined(WRAP_MARK_SOME) && defined(NO_SEH_AVAILABLE))
typedef void (*GC_fault_handler_t)(int);
GC_INNER void GC_set_and_save_fault_handler(GC_fault_handler_t);
#endif

#if defined(NEED_FIND_LIMIT)                                 \
    || (defined(USE_PROC_FOR_LIBRARIES) && defined(THREADS)) \
    || (defined(WRAP_MARK_SOME) && defined(NO_SEH_AVAILABLE))
GC_EXTERN JMP_BUF GC_jmp_buf;

/*
 * Set up a handler for address faults which will `longjmp`
 * to `GC_jmp_buf`.
 */
GC_INNER void GC_setup_temporary_fault_handler(void);

/* Undo the effect of `GC_setup_temporary_fault_handler`. */
GC_INNER void GC_reset_fault_handler(void);
#endif /* NEED_FIND_LIMIT || USE_PROC_FOR_LIBRARIES || WRAP_MARK_SOME */

/* Some convenience macros for cancellation support. */
#ifdef CANCEL_SAFE
#  if defined(GC_ASSERTIONS)                                            \
      && (defined(USE_COMPILER_TLS)                                     \
          || (defined(LINUX) && !defined(ARM32) && GC_GNUC_PREREQ(3, 3) \
              || defined(HPUX) /* and probably others... */))
extern __thread unsigned char GC_cancel_disable_count;
#    define NEED_CANCEL_DISABLE_COUNT
#    define INCR_CANCEL_DISABLE() ++GC_cancel_disable_count
#    define DECR_CANCEL_DISABLE() --GC_cancel_disable_count
#    define ASSERT_CANCEL_DISABLED() GC_ASSERT(GC_cancel_disable_count > 0)
#  else
#    define INCR_CANCEL_DISABLE()
#    define DECR_CANCEL_DISABLE()
#    define ASSERT_CANCEL_DISABLED() (void)0
#  endif /* !GC_ASSERTIONS */
#  define DISABLE_CANCEL(state)                               \
    do {                                                      \
      pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &state); \
      INCR_CANCEL_DISABLE();                                  \
    } while (0)
#  define RESTORE_CANCEL(state)            \
    do {                                   \
      ASSERT_CANCEL_DISABLED();            \
      pthread_setcancelstate(state, NULL); \
      DECR_CANCEL_DISABLE();               \
    } while (0)
#else
#  define DISABLE_CANCEL(state) (void)0
#  define RESTORE_CANCEL(state) (void)0
#  define ASSERT_CANCEL_DISABLED() (void)0
#endif /* !CANCEL_SAFE */

/* Multiply 32-bit unsigned values (used by `GC_push_contents_hdr()`). */
#ifdef NO_LONGLONG64
#  define LONG_MULT(hprod, lprod, x, y)                                 \
    do {                                                                \
      unsigned32 lx = (x) & (0xffffU);                                  \
      unsigned32 ly = (y) & (0xffffU);                                  \
      unsigned32 hx = (x) >> 16;                                        \
      unsigned32 hy = (y) >> 16;                                        \
      unsigned32 lxhy = lx * hy;                                        \
      unsigned32 mid = hx * ly + lxhy; /*< may overflow */              \
      unsigned32 lxly = lx * ly;                                        \
                                                                        \
      lprod = (mid << 16) + lxly; /*< may overflow */                   \
      hprod = hx * hy + ((lprod) < lxly ? 1U : 0)                       \
              + (mid < lxhy ? (unsigned32)0x10000UL : 0) + (mid >> 16); \
    } while (0)
#elif defined(I386) && defined(__GNUC__) && !defined(NACL)
#  define LONG_MULT(hprod, lprod, x, y) \
    __asm__ __volatile__("mull %2" : "=a"(lprod), "=d"(hprod) : "r"(y), "0"(x))
#else
#  if (defined(__int64) && !defined(__GNUC__) || defined(__BORLANDC__)) \
      && !defined(CPPCHECK)
#    define ULONG_MULT_T unsigned __int64
#  else
#    define ULONG_MULT_T unsigned long long
#  endif
#  define LONG_MULT(hprod, lprod, x, y)                          \
    do {                                                         \
      ULONG_MULT_T prod = (ULONG_MULT_T)(x) * (ULONG_MULT_T)(y); \
                                                                 \
      GC_STATIC_ASSERT(sizeof(x) + sizeof(y) <= sizeof(prod));   \
      hprod = (unsigned32)(prod >> 32);                          \
      lprod = (unsigned32)prod;                                  \
    } while (0)
#endif /* !I386 && !NO_LONGLONG64 */

EXTERN_C_END

#endif /* GC_PRIVATE_H */

#ifdef KEEP_BACK_PTRS
#  include "gc/gc_backptr.h"
#endif

EXTERN_C_BEGIN

#ifndef GC_FREED_MEM_MARKER
#  if CPP_WORDSZ == 32
#    define GC_FREED_MEM_MARKER (GC_uintptr_t)0xdeadbeef
#  else
#    define GC_FREED_MEM_MARKER ((GC_uintptr_t)GC_WORD_C(0xEFBEADDEdeadbeef))
#  endif
#endif /* !GC_FREED_MEM_MARKER */

/*
 * Stored both one past the end of user object, and one before the end of
 * the object as seen by the allocator.
 */
#if CPP_WORDSZ == 32
#  define START_FLAG (GC_uintptr_t)0xfedcedcb
#  define END_FLAG (GC_uintptr_t)0xbcdecdef
#else
#  define START_FLAG ((GC_uintptr_t)GC_WORD_C(0xFEDCEDCBfedcedcb))
#  define END_FLAG ((GC_uintptr_t)GC_WORD_C(0xBCDECDEFbcdecdef))
#endif

#if defined(KEEP_BACK_PTRS) || defined(PRINT_BLACK_LIST)
/*
 * Pointer "source" variants that are not real locations.  Used in
 * `oh_back_ptr` fields and as `source` argument to some marking functions.
 */

/* Object was marked because it is finalizable. */
#  define MARKED_FOR_FINALIZATION ((ptr_t)NUMERIC_TO_VPTR(2))

/*
 * Object was marked from a register.  Hence the "source" of the reference
 * does not have an address.
 */
#  define MARKED_FROM_REGISTER ((ptr_t)NUMERIC_TO_VPTR(4))

#  define NOT_MARKED ((ptr_t)NUMERIC_TO_VPTR(8))
#endif /* KEEP_BACK_PTRS || PRINT_BLACK_LIST */

/*
 * Object debug header.  The size of the structure is assumed not to
 * de-align things, and to be a multiple of a double-pointer length.
 */
typedef struct {
#if defined(KEEP_BACK_PTRS) || defined(MAKE_BACK_GRAPH)
  /*
   * We potentially keep two different kinds of back pointers.
   * `KEEP_BACK_PTRS` stores a single back pointer in each reachable
   * object to allow reporting of why an object was retained.
   * `MAKE_BACK_GRAPH` builds a graph containing the inverse of all
   * "points-to" edges including those involving objects that have just
   * become unreachable.  This allows detection of growing chains of
   * unreachable objects.  It may be possible to eventually combine both,
   * but for now we keep them separate.  Both kinds of back pointers are
   * hidden using the following macros.  In both cases, the plain variant
   * is constrained to have the least significant bit of 1, to allow it
   * to be distinguished from a free-list link.  This means the plain
   * variant must have the least significant bit of zero.
   * Note that blocks dropped by black-listing will also have the least
   * significant bit clear once debugging has started; we are careful never
   * to overwrite such a value.
   */
#  if ALIGNMENT == 1
  /* Fudge back pointer to be even. */
#    define HIDE_BACK_PTR(p) \
      GC_HIDE_POINTER((ptr_t)(~(GC_uintptr_t)1 & (GC_uintptr_t)(p)))
#  else
#    define HIDE_BACK_PTR(p) GC_HIDE_POINTER(p)
#  endif
  /*
   * Always define either none or both of the fields to ensure
   * double-pointer alignment.
   */
  GC_hidden_pointer oh_back_ptr;
  GC_hidden_pointer oh_bg_ptr;
#endif
  const char *oh_string; /*< object descriptor string (file name) */
  GC_signed_word oh_int; /*< object descriptor integer (line number) */
#ifdef NEED_CALLINFO
  struct callinfo oh_ci[NFRAMES];
#endif
#ifndef SHORT_DBG_HDRS
  GC_uintptr_t oh_sz; /*< the original `malloc` argument */
  GC_uintptr_t oh_sf; /*< the "start" flag (marker) */
#endif
} oh;

#define GET_OH_LINENUM(ohdr) ((int)(ohdr)->oh_int)

#ifdef SHORT_DBG_HDRS
#  define DEBUG_BYTES sizeof(oh)
#  define UNCOLLECTABLE_DEBUG_BYTES DEBUG_BYTES
#else
/*
 * Add space for `END_FLAG`, but use any extra space that was already
 * added to catch off-the-end pointers.  For uncollectible objects, the
 * extra byte is not added.
 */
#  define UNCOLLECTABLE_DEBUG_BYTES (sizeof(oh) + sizeof(GC_uintptr_t))
#  define DEBUG_BYTES (UNCOLLECTABLE_DEBUG_BYTES - EXTRA_BYTES)
#endif

/*
 * `ADD_CALL_CHAIN` stores a (partial) call chain into an object header;
 * it should be called with the allocator lock held.
 * `PRINT_CALL_CHAIN` prints the call chain stored in an object to `stderr`;
 * it requires we do not hold the allocator lock.
 */
#if defined(SAVE_CALL_CHAIN)
#  define ADD_CALL_CHAIN(base, ra) GC_save_callers(((oh *)(base))->oh_ci)
#  if defined(REDIRECT_MALLOC) && defined(THREADS) && defined(DBG_HDRS_ALL) \
      && NARGS == 0 && NFRAMES % 2 == 0 && defined(GC_HAVE_BUILTIN_BACKTRACE)
/*
 * A dummy variant of `GC_save_callers()` which does not call
 * `backtrace()`.
 */
GC_INNER void GC_save_callers_no_unlock(struct callinfo info[NFRAMES]);

#    define ADD_CALL_CHAIN_INNER(base) \
      GC_save_callers_no_unlock(((oh *)(base))->oh_ci)
#  endif
#  define PRINT_CALL_CHAIN(base) GC_print_callers(((oh *)(base))->oh_ci)
#elif defined(GC_ADD_CALLER)
#  define ADD_CALL_CHAIN(base, ra) ((oh *)(base))->oh_ci[0].ci_pc = (ra)
#  define PRINT_CALL_CHAIN(base) GC_print_callers(((oh *)(base))->oh_ci)
#else
#  define ADD_CALL_CHAIN(base, ra)
#  define PRINT_CALL_CHAIN(base)
#endif

#if !defined(ADD_CALL_CHAIN_INNER) && defined(DBG_HDRS_ALL)
/* A variant of `ADD_CALL_CHAIN()` used for internal allocations. */
#  define ADD_CALL_CHAIN_INNER(base) ADD_CALL_CHAIN(base, GC_RETURN_ADDR)
#endif

#ifdef GC_ADD_CALLER
#  define OPT_RA ra,
#else
#  define OPT_RA
#endif

/*
 * Check whether object given by its base pointer has debugging info.
 * The argument (`base`) is assumed to point to a legitimate object in the
 * collector heap.  This excludes the check as to whether the back pointer
 * is odd, which is added by the `GC_HAS_DEBUG_INFO` macro.  Note that
 * if `DBG_HDRS_ALL` is defined, uncollectible objects on free lists
 * may not have debug information set.  Thus, it is not always safe to
 * return 1 (true), even if the client does its part.  Return -1 if the
 * object with debug info has been marked as deallocated.
 */
#ifdef SHORT_DBG_HDRS
#  define GC_has_other_debug_info(base) 1
#else
GC_INNER int GC_has_other_debug_info(ptr_t base);

GC_INNER void GC_add_smashed(ptr_t smashed);

/*
 * Use `GC_err_printf()` and friends to print a description of the object
 * whose client-visible address is `p`, and which was smashed at memory
 * location pointed by `clobbered`.
 */
GC_INNER void GC_print_smashed_obj(const char *msg, void *p, ptr_t clobbered);

/* Print all objects on the list.  Clear the list. */
GC_INNER void GC_print_all_smashed_proc(void);
#endif /* !SHORT_DBG_HDRS */

#if defined(KEEP_BACK_PTRS) || defined(MAKE_BACK_GRAPH)
#  if defined(SHORT_DBG_HDRS) && !defined(CPPCHECK)
#    error Non-ptr stored in object results in GC_HAS_DEBUG_INFO malfunction
/* We may mistakenly conclude that base has a debugging wrapper. */
#  endif
#  if defined(PARALLEL_MARK) && defined(KEEP_BACK_PTRS)
/*
 * Note: the atomic load is used as `GC_store_back_pointer` stores
 * `oh_back_ptr` atomically (`base` might point to the field); this prevents
 * a TSan warning.
 */
#    define GC_HAS_DEBUG_INFO(base)                                    \
      (((GC_uintptr_t)GC_cptr_load((volatile ptr_t *)(base)) & 1) != 0 \
       && GC_has_other_debug_info(base) > 0)
#  else
#    define GC_HAS_DEBUG_INFO(base)         \
      (((*(GC_uintptr_t *)(base)) & 1) != 0 \
       && GC_has_other_debug_info(base) > 0)
#  endif
#else
#  define GC_HAS_DEBUG_INFO(base) (GC_has_other_debug_info(base) > 0)
#endif /* !KEEP_BACK_PTRS && !MAKE_BACK_GRAPH */

EXTERN_C_END

#endif /* GC_DBG_MLC_H */


/*
 * This implements a full, though not well-tuned, representation of the
 * backwards points-to graph.  This is used to test for non-GC-robust
 * data structures; the code is not used during normal garbage collection.
 *
 * One restriction is that we drop all back-edges from nodes with very
 * high in-degree, and simply add them to a list of such nodes.  They are
 * then treated as permanent roots.  If this by itself does not introduce
 * a space leak, then such nodes cannot contribute to a growing space leak.
 */

#ifdef MAKE_BACK_GRAPH

#  if (!defined(DBG_HDRS_ALL)                                          \
       || (ALIGNMENT != CPP_PTRSZ / 8) /* `|| !defined(UNIX_LIKE)` */) \
      && !defined(CPPCHECK)
#    error The configuration does not support MAKE_BACK_GRAPH
#  endif

/*
 * We store single back pointers directly in the object's `oh_bg_ptr` field.
 * If there is more than one pointer to an object, we store `q` or'ed with
 * `FLAG_MANY`, where `q` is a pointer to a `back_edges` object.
 * Every once in a while we use a `back_edges` object even for a single
 * pointer, since we need the other fields in the `back_edges` structure to
 * be present in some fraction of the objects.  Otherwise we get serious
 * performance issues.
 */
#  define FLAG_MANY 2

/* Directly points to a reachable object; retain for the next collection. */
#  define RETAIN 1

/* Special values of `height` field of `back_edges`. */
#  define HEIGHT_UNKNOWN (-2)
#  define HEIGHT_IN_PROGRESS (-1)

typedef struct back_edges_s back_edges;

#  define MAX_BACK_EDGE_STRUCTS 100000

/*
 * Allocate a new back edge structure.  Should be more sophisticated
 * if this were production code.
 */
static back_edges *
new_back_edges(void)
{
  GC_ASSERT(I_HOLD_LOCK());
  if (NULL == GC_back_edge_space) {
    size_t bytes_to_get;

    GC_ASSERT(0 == GC_n_back_edge_structs);
    GC_ASSERT(NULL == GC_avail_back_edges);
    GC_ASSERT(GC_page_size != 0);
    bytes_to_get
        = ROUNDUP_PAGESIZE_IF_MMAP(MAX_BACK_EDGE_STRUCTS * sizeof(back_edges));

    GC_back_edge_space = (back_edges *)GC_os_get_mem(bytes_to_get);
    if (NULL == GC_back_edge_space)
      ABORT("Insufficient memory for back edges");
  }
  if (GC_avail_back_edges != NULL) {
    back_edges *result = GC_avail_back_edges;
    GC_avail_back_edges = result->cont;
    result->cont = NULL;
    return result;
  }
  if (GC_n_back_edge_structs >= MAX_BACK_EDGE_STRUCTS - 1) {
    ABORT("Needed too much space for back edges: adjust "
          "MAX_BACK_EDGE_STRUCTS");
  }
  return &GC_back_edge_space[GC_n_back_edge_structs++];
}

/* Deallocate `p` and its associated continuation structures. */
static void
deallocate_back_edges(back_edges *p)
{
  back_edges *last;

  for (last = p; last->cont != NULL;)
    last = last->cont;

  last->cont = GC_avail_back_edges;
  GC_avail_back_edges = p;
}

#  define INITIAL_IN_PROGRESS 10000

static void
push_in_progress(ptr_t p)
{
  GC_ASSERT(I_HOLD_LOCK());
  if (GC_backgraph_n_in_progress >= GC_backgraph_in_progress_size) {
    ptr_t *new_in_progress_space;

    GC_ASSERT(GC_page_size != 0);
    if (NULL == GC_backgraph_in_progress_space) {
      GC_ASSERT(0 == GC_backgraph_n_in_progress);
      GC_ASSERT(0 == GC_backgraph_in_progress_size);
      GC_backgraph_in_progress_size
          = ROUNDUP_PAGESIZE_IF_MMAP(INITIAL_IN_PROGRESS * sizeof(ptr_t))
            / sizeof(ptr_t);
      new_in_progress_space = (ptr_t *)GC_os_get_mem(
          GC_backgraph_in_progress_size * sizeof(ptr_t));
    } else {
      GC_backgraph_in_progress_size *= 2;
      new_in_progress_space = (ptr_t *)GC_os_get_mem(
          GC_backgraph_in_progress_size * sizeof(ptr_t));
      if (new_in_progress_space != NULL)
        BCOPY(GC_backgraph_in_progress_space, new_in_progress_space,
              GC_backgraph_n_in_progress * sizeof(ptr_t));
    }
#  ifndef GWW_VDB
    GC_scratch_recycle_no_gww(GC_backgraph_in_progress_space,
                              GC_backgraph_n_in_progress * sizeof(ptr_t));
#  elif defined(LINT2)
    /* TODO: Implement GWW-aware recycling as in `alloc_mark_stack`. */
    GC_noop1_ptr(GC_backgraph_in_progress_space);
#  endif
    GC_backgraph_in_progress_space = new_in_progress_space;
  }
  if (NULL == GC_backgraph_in_progress_space)
    ABORT("MAKE_BACK_GRAPH: Out of in-progress space: "
          "Huge linear data structure?");
  GC_backgraph_in_progress_space[GC_backgraph_n_in_progress++] = p;
}

static GC_bool
is_in_progress(const char *p)
{
  size_t i;
  for (i = 0; i < GC_backgraph_n_in_progress; ++i) {
    if (GC_backgraph_in_progress_space[i] == p)
      return TRUE;
  }
  return FALSE;
}

GC_INLINE void
pop_in_progress(ptr_t p)
{
#  ifndef GC_ASSERTIONS
  UNUSED_ARG(p);
#  endif
  --GC_backgraph_n_in_progress;
  GC_ASSERT(GC_backgraph_in_progress_space[GC_backgraph_n_in_progress] == p);
}

#  define GET_OH_BG_PTR(p) (ptr_t) GC_REVEAL_POINTER(((oh *)(p))->oh_bg_ptr)
#  define SET_OH_BG_PTR(p, q) (((oh *)(p))->oh_bg_ptr = GC_HIDE_POINTER(q))

/* Ensure that `p` has a `back_edges` structure associated with it. */
static void
ensure_struct(ptr_t p)
{
  ptr_t old_back_ptr = GET_OH_BG_PTR(p);

  GC_ASSERT(I_HOLD_LOCK());
  if ((ADDR(old_back_ptr) & FLAG_MANY) == 0) {
    back_edges *be = new_back_edges();

    be->flags = 0;
#  if defined(CPPCHECK)
    GC_noop1_ptr(&old_back_ptr);
    /* Workaround a false positive that `old_back_ptr` cannot be `NULL`. */
#  endif
    if (NULL == old_back_ptr) {
      be->n_edges = 0;
    } else {
      be->n_edges = 1;
      be->edges[0] = old_back_ptr;
    }
    be->height = HEIGHT_UNKNOWN;
    be->height_gc_no = (unsigned short)(GC_gc_no - 1);
    GC_ASSERT(ADDR_GE((ptr_t)be, (ptr_t)GC_back_edge_space));
    SET_OH_BG_PTR(p, CPTR_SET_FLAGS(be, FLAG_MANY));
  }
}

/*
 * Add the (forward) edge from `p` to `q` to the backward graph.  Both `p`
 * and `q` are pointers to the object base, i.e. pointers to an `oh`.
 */
static void
add_edge(ptr_t p, ptr_t q)
{
  ptr_t pred = GET_OH_BG_PTR(q);
  back_edges *be, *be_cont;
  word i;

  GC_ASSERT(p == GC_base(p) && q == GC_base(q));
  GC_ASSERT(I_HOLD_LOCK());
  if (!GC_HAS_DEBUG_INFO(q) || !GC_HAS_DEBUG_INFO(p)) {
    /*
     * This is really a misinterpreted free-list link, since we saw
     * a pointer to a free list.  Do not overwrite it!
     */
    return;
  }
#  if defined(CPPCHECK)
  GC_noop1_ptr(&pred);
#  endif
  if (NULL == pred) {
    /*
     * A not very random number we use to occasionally allocate
     * a `back_edges` structure even for a single backward edge.
     * This prevents us from repeatedly tracing back through very long
     * chains, since we will have some place to store `height` and
     * `HEIGHT_IN_PROGRESS` flag along the way.
     */
#  define GOT_LUCKY_NUMBER (((++random_number) & 0x7f) == 0)
    static unsigned random_number = 13;

    SET_OH_BG_PTR(q, p);
    if (GOT_LUCKY_NUMBER)
      ensure_struct(q);
    return;
  }

  /* Check whether it was already in the list of predecessors. */
  {
    back_edges *e = (back_edges *)CPTR_CLEAR_FLAGS(pred, FLAG_MANY);
    word n_edges;
    word total;
    int local = 0;

    if ((ADDR(pred) & FLAG_MANY) != 0) {
      n_edges = e->n_edges;
    } else if ((COVERT_DATAFLOW(ADDR(pred)) & 1) == 0) {
      /* A misinterpreted free-list link. */
      n_edges = 1;
      local = -1;
    } else {
      n_edges = 0;
    }
    for (total = 0; total < n_edges; ++total) {
      if (local == BACKGRAPH_MAX_IN) {
        e = e->cont;
        local = 0;
      }
      if (local >= 0)
        pred = e->edges[local++];
      if (pred == p)
        return;
    }
  }

  ensure_struct(q);
  be = (back_edges *)CPTR_CLEAR_FLAGS(GET_OH_BG_PTR(q), FLAG_MANY);
  for (i = be->n_edges, be_cont = be; i > BACKGRAPH_MAX_IN;
       i -= BACKGRAPH_MAX_IN)
    be_cont = be_cont->cont;
  if (i == BACKGRAPH_MAX_IN) {
    be_cont->cont = new_back_edges();
    be_cont = be_cont->cont;
    i = 0;
  }
  be_cont->edges[i] = p;
  be->n_edges++;
#  ifdef DEBUG_PRINT_BIG_N_EDGES
  if (GC_print_stats == VERBOSE && be->n_edges == 100) {
    GC_err_printf("The following object has big in-degree:\n");
#    ifdef THREADS
    /*
     * Note: we cannot call the debug variant of `GC_print_heap_obj` here
     * because the allocator lock is held.
     */
    GC_default_print_heap_obj_proc(q);
#    else
    GC_print_heap_obj(q);
#    endif
  }
#  endif
}

typedef void (*per_object_func)(ptr_t p, size_t sz, word descr);

static GC_CALLBACK void
per_object_helper(struct hblk *h, void *fn_ptr)
{
  const hdr *hhdr = HDR(h);
  word descr = hhdr->hb_descr;
  per_object_func fn = *(per_object_func *)fn_ptr;
  size_t sz = hhdr->hb_sz;
  size_t i = 0;

  do {
    fn((ptr_t)(h->hb_body + i), sz, descr);
    i += sz;
  } while (i + sz <= HBLKSIZE);
}

GC_INLINE void
GC_apply_to_each_object(per_object_func fn)
{
  GC_apply_to_all_blocks(per_object_helper, &fn);
}

static void
reset_back_edge(ptr_t p, size_t sz, word descr)
{
  UNUSED_ARG(sz);
  UNUSED_ARG(descr);
  GC_ASSERT(I_HOLD_LOCK());
  /* Skip any free-list links, or dropped blocks. */
  if (GC_HAS_DEBUG_INFO(p)) {
    ptr_t old_back_ptr = GET_OH_BG_PTR(p);

    if ((ADDR(old_back_ptr) & FLAG_MANY) != 0) {
      back_edges *be = (back_edges *)CPTR_CLEAR_FLAGS(old_back_ptr, FLAG_MANY);

      if (!(be->flags & RETAIN)) {
        deallocate_back_edges(be);
        SET_OH_BG_PTR(p, NULL);
      } else {
        GC_ASSERT(GC_is_marked(p));

        /*
         * Back edges may point to objects that will not be retained.
         * Delete them for now, but remember the height.  Some will be
         * added back at next collection.
         */
        be->n_edges = 0;
        if (be->cont != NULL) {
          deallocate_back_edges(be->cont);
          be->cont = NULL;
        }

        GC_ASSERT(GC_is_marked(p));
        /* We only retain things for one collection cycle at a time. */
        be->flags &= (unsigned short)~RETAIN;
      }
    } else /* simple back pointer */ {
      /* Clear to avoid dangling pointer. */
      SET_OH_BG_PTR(p, NULL);
    }
  }
}

static void
add_back_edges(ptr_t p, size_t sz, word descr)
{
  ptr_t current_p = p + sizeof(oh);

  /* For now, fix up non-length descriptors conservatively. */
  if ((descr & GC_DS_TAGS) != GC_DS_LENGTH) {
    descr = sz;
  }

  for (; ADDR_LT(current_p, p + descr); current_p += sizeof(ptr_t)) {
    ptr_t q;

    LOAD_PTR_OR_CONTINUE(q, current_p);
    FIXUP_POINTER(q);
    if (GC_least_real_heap_addr < ADDR(q)
        && ADDR(q) < GC_greatest_real_heap_addr) {
      ptr_t target = (ptr_t)GC_base(q);

      if (target != NULL)
        add_edge(p, target);
    }
  }
}

GC_INNER void
GC_build_back_graph(void)
{
  GC_ASSERT(I_HOLD_LOCK());
  GC_apply_to_each_object(add_back_edges);
}

/*
 * Return an approximation to the length of the longest simple path through
 * unreachable objects to `p`.  We refer to this as the height of `p`.
 */
static word
backwards_height(ptr_t p)
{
  word result;
  ptr_t pred = GET_OH_BG_PTR(p);
  back_edges *be;

  GC_ASSERT(I_HOLD_LOCK());
#  if defined(CPPCHECK)
  GC_noop1_ptr(&pred);
#  endif
  if (NULL == pred)
    return 1;
  if ((ADDR(pred) & FLAG_MANY) == 0) {
    if (is_in_progress(p)) {
      /*
       * DFS (depth-first search) back edge, i.e. we followed an edge to
       * an object already on our stack.  Ignore.
       */
      return 0;
    }
    push_in_progress(p);
    result = backwards_height(pred) + 1;
    pop_in_progress(p);
    return result;
  }
  be = (back_edges *)CPTR_CLEAR_FLAGS(pred, FLAG_MANY);
  if (be->height >= 0 && be->height_gc_no == (unsigned short)GC_gc_no)
    return (word)be->height;
  /* Ignore back edges in DFS. */
  if (be->height == HEIGHT_IN_PROGRESS)
    return 0;

  result = be->height > 0 ? (word)be->height : 1U;
  be->height = HEIGHT_IN_PROGRESS;

  {
    back_edges *e = be;
    word n_edges;
    word total;
    int local = 0;

    if ((ADDR(pred) & FLAG_MANY) != 0) {
      n_edges = e->n_edges;
    } else if ((ADDR(pred) & 1) == 0) {
      /* A misinterpreted free-list link. */
      n_edges = 1;
      local = -1;
    } else {
      n_edges = 0;
    }
    for (total = 0; total < n_edges; ++total) {
      word this_height;
      if (local == BACKGRAPH_MAX_IN) {
        e = e->cont;
        local = 0;
      }
      if (local >= 0)
        pred = e->edges[local++];

      /*
       * Execute the following once for each predecessor `pred` of `p`
       * in the points-to graph.
       */
      if (GC_is_marked(pred) && (ADDR(GET_OH_BG_PTR(p)) & FLAG_MANY) == 0) {
        GC_COND_LOG_PRINTF("Found bogus pointer from %p to %p\n", (void *)pred,
                           (void *)p);
        /*
         * Reachable object "points to" unreachable one.  Could be caused
         * by our lax treatment of the collector descriptors.
         */
        this_height = 1;
      } else {
        this_height = backwards_height(pred);
      }
      if (this_height >= result)
        result = this_height + 1;
    }
  }

  be->height = (GC_signed_word)result;
  be->height_gc_no = (unsigned short)GC_gc_no;
  return result;
}

/*
 * Compute the maximum height of every unreachable predecessor `p` of
 * a reachable object.  Arrange to save the heights of all such objects `p`
 * so that they can be used in calculating the height of objects in the next
 * collection.  Set `GC_backgraph_deepest_height` to be the maximum height we
 * encounter, and `GC_backgraph_deepest_obj` to be the corresponding object.
 */
static void
update_deepest_height(ptr_t p, size_t sz, word descr)
{
  UNUSED_ARG(sz);
  UNUSED_ARG(descr);
  GC_ASSERT(I_HOLD_LOCK());
  if (GC_is_marked(p) && GC_HAS_DEBUG_INFO(p)) {
    word p_height = 0;
    ptr_t p_deepest_obj = NULL;
    ptr_t back_ptr;
    back_edges *be = NULL;

    /*
     * If we remembered a height last time, use it as a minimum.
     * It may have increased due to newly unreachable chains pointing
     * to `p`, but it cannot have decreased.
     */
    back_ptr = GET_OH_BG_PTR(p);
#  if defined(CPPCHECK)
    GC_noop1_ptr(&back_ptr);
#  endif
    if (back_ptr != NULL && (ADDR(back_ptr) & FLAG_MANY) != 0) {
      be = (back_edges *)CPTR_CLEAR_FLAGS(back_ptr, FLAG_MANY);
      if (be->height != HEIGHT_UNKNOWN)
        p_height = (word)be->height;
    }

    {
      ptr_t pred = back_ptr;
      back_edges *e = (back_edges *)CPTR_CLEAR_FLAGS(pred, FLAG_MANY);
      word n_edges;
      word total;
      int local = 0;

      if ((ADDR(pred) & FLAG_MANY) != 0) {
        n_edges = e->n_edges;
      } else if (pred != NULL && (ADDR(pred) & 1) == 0) {
        /* A misinterpreted free-list link. */
        n_edges = 1;
        local = -1;
      } else {
        n_edges = 0;
      }
      for (total = 0; total < n_edges; ++total) {
        if (local == BACKGRAPH_MAX_IN) {
          e = e->cont;
          local = 0;
        }
        if (local >= 0)
          pred = e->edges[local++];

        /*
         * Execute the following once for each predecessor `pred` of `p`
         * in the points-to graph.
         */
        if (!GC_is_marked(pred) && GC_HAS_DEBUG_INFO(pred)) {
          word this_height = backwards_height(pred);

          if (this_height > p_height) {
            p_height = this_height;
            p_deepest_obj = pred;
          }
        }
      }
    }

    if (p_height > 0) {
      /* Remember the height for next time. */
      if (NULL == be) {
        ensure_struct(p);
        back_ptr = GET_OH_BG_PTR(p);
        be = (back_edges *)CPTR_CLEAR_FLAGS(back_ptr, FLAG_MANY);
      }
      be->flags |= RETAIN;
      be->height = (GC_signed_word)p_height;
      be->height_gc_no = (unsigned short)GC_gc_no;
    }
    if (p_height > GC_backgraph_deepest_height) {
      GC_backgraph_deepest_height = p_height;
      GC_backgraph_deepest_obj = p_deepest_obj;
    }
  }
}

GC_INNER void
GC_traverse_back_graph(void)
{
  GC_ASSERT(I_HOLD_LOCK());
  GC_backgraph_deepest_height = 0;
  GC_apply_to_each_object(update_deepest_height);
  if (GC_backgraph_deepest_obj != NULL) {
    /* Keep the pointer until we can print it. */
    GC_set_mark_bit(GC_backgraph_deepest_obj);
  }
}

void
GC_print_back_graph_stats(void)
{
  GC_ASSERT(I_HOLD_LOCK());
  GC_printf("Maximum backwards height of reachable objects"
            " at GC #%lu is %lu\n",
            (unsigned long)GC_gc_no,
            (unsigned long)GC_backgraph_deepest_height);
  if (GC_backgraph_deepest_height > GC_backgraph_max_deepest_h) {
    ptr_t obj = GC_backgraph_deepest_obj;

    GC_backgraph_max_deepest_h = GC_backgraph_deepest_height;
    UNLOCK();
    GC_err_printf(
        "The following unreachable object is last in a longest chain "
        "of unreachable objects:\n");
    GC_print_heap_obj(obj);
    LOCK();
  }
  GC_COND_LOG_PRINTF("Needed max total of %d back-edge structs\n",
                     GC_n_back_edge_structs);
  GC_apply_to_each_object(reset_back_edge);
  GC_backgraph_deepest_obj = NULL;
}

#endif /* MAKE_BACK_GRAPH */

/*
 * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
 * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */


#ifndef NO_BLACK_LISTING

/*
 * We maintain several hash tables of `hblk` entities that have had false
 * hits.  Each contains one bit per hash bucket.  If any page in the bucket
 * has had a false hit, we assume that all of them have.
 * See the definition of `page_hash_table` in `gc_priv.h` file.
 * False hits from the stack(s) are much more dangerous than false hits
 * from elsewhere, since the former can pin a large object that spans the
 * block, even though it does not start on the dangerous block.
 */

/*
 * Externally callable routines are:
 *   - `GC_add_to_black_list_normal`,
 *   - `GC_add_to_black_list_stack`,
 *   - `GC_promote_black_lists`.
 */

GC_INNER word GC_black_list_spacing = 0;

STATIC void
GC_clear_bl(word *bl)
{
  BZERO(bl, sizeof(page_hash_table));
}

STATIC void
GC_copy_bl(const word *old, word *dest)
{
  BCOPY(old, dest, sizeof(page_hash_table));
}

#  ifdef PRINT_BLACK_LIST
STATIC void
GC_print_blacklisted_ptr(ptr_t p, ptr_t source, const char *kind_str)
{
  ptr_t base = (ptr_t)GC_base(source);

  if (0 == base) {
    GC_err_printf("Black listing (%s) %p referenced from %p in %s\n", kind_str,
                  (void *)p, (void *)source,
                  NULL != source ? "root set" : "register");
  } else {
    /*
     * FIXME: We cannot call the debug variant of `GC_print_heap_obj`
     * (with `PRINT_CALL_CHAIN`) here because the allocator lock is held
     * and the world is stopped.
     */
    GC_err_printf("Black listing (%s) %p referenced from %p in"
                  " object at %p of appr. %lu bytes\n",
                  kind_str, (void *)p, (void *)source, (void *)base,
                  (unsigned long)GC_size(base));
  }
}
#  endif /* PRINT_BLACK_LIST */

GC_INNER void
GC_bl_init_no_interiors(void)
{
  GC_ASSERT(I_HOLD_LOCK());
  if (NULL == GC_incomplete_normal_bl) {
    GC_old_normal_bl = (word *)GC_scratch_alloc(sizeof(page_hash_table));
    GC_incomplete_normal_bl
        = (word *)GC_scratch_alloc(sizeof(page_hash_table));
    if (NULL == GC_old_normal_bl || NULL == GC_incomplete_normal_bl) {
      GC_err_printf("Insufficient memory for black list\n");
      EXIT();
    }
    GC_clear_bl(GC_old_normal_bl);
    GC_clear_bl(GC_incomplete_normal_bl);
  }
}

GC_INNER void
GC_bl_init(void)
{
  GC_ASSERT(I_HOLD_LOCK());
  GC_black_list_spacing = MINHINCR * HBLKSIZE; /*< initial guess */
  if (!GC_all_interior_pointers) {
    GC_bl_init_no_interiors();
  }
  GC_ASSERT(NULL == GC_old_stack_bl && NULL == GC_incomplete_stack_bl);
  GC_old_stack_bl = (word *)GC_scratch_alloc(sizeof(page_hash_table));
  GC_incomplete_stack_bl = (word *)GC_scratch_alloc(sizeof(page_hash_table));
  if (NULL == GC_old_stack_bl || NULL == GC_incomplete_stack_bl) {
    GC_err_printf("Insufficient memory for black list\n");
    EXIT();
  }
  GC_clear_bl(GC_old_stack_bl);
  GC_clear_bl(GC_incomplete_stack_bl);
}

static word compute_total_stack_black_listed(void);

GC_INNER void
GC_promote_black_lists(void)
{
  word *very_old_normal_bl = GC_old_normal_bl;
  word *very_old_stack_bl = GC_old_stack_bl;
  word total_stack_black_listed; /*< number of bytes on stack blacklist */

  GC_ASSERT(I_HOLD_LOCK());
  GC_old_normal_bl = GC_incomplete_normal_bl;
  GC_old_stack_bl = GC_incomplete_stack_bl;
  if (!GC_all_interior_pointers) {
    GC_clear_bl(very_old_normal_bl);
  }
  GC_clear_bl(very_old_stack_bl);
  GC_incomplete_normal_bl = very_old_normal_bl;
  GC_incomplete_stack_bl = very_old_stack_bl;
  total_stack_black_listed = compute_total_stack_black_listed();
  GC_VERBOSE_LOG_PRINTF(
      "%lu bytes in heap blacklisted for interior pointers\n",
      (unsigned long)total_stack_black_listed);
  if (total_stack_black_listed != 0)
    GC_black_list_spacing
        = HBLKSIZE * (GC_heapsize / total_stack_black_listed);
  if (GC_black_list_spacing < 3 * HBLKSIZE)
    GC_black_list_spacing = 3 * HBLKSIZE;
  if (GC_black_list_spacing > MAXHINCR * HBLKSIZE) {
    /*
     * Make it easier to allocate really huge blocks, which otherwise may
     * have problems with nonuniform blacklist distributions.
     * This way we should always succeed immediately after growing the heap.
     */
    GC_black_list_spacing = MAXHINCR * HBLKSIZE;
  }
}

GC_INNER void
GC_unpromote_black_lists(void)
{
  if (!GC_all_interior_pointers) {
    GC_copy_bl(GC_old_normal_bl, GC_incomplete_normal_bl);
  }
  GC_copy_bl(GC_old_stack_bl, GC_incomplete_stack_bl);
}

#  if defined(PARALLEL_MARK) && defined(THREAD_SANITIZER)
#    define backlist_set_pht_entry_from_index(db, index) \
      set_pht_entry_from_index_concurrent(db, index)
#  else
/*
 * It is safe to set a bit in a blacklist even without synchronization,
 * the only drawback is that we might have to redo black-listing sometimes.
 */
#    define backlist_set_pht_entry_from_index(bl, index) \
      set_pht_entry_from_index(bl, index)
#  endif

#  ifdef PRINT_BLACK_LIST
GC_INNER void
GC_add_to_black_list_normal(ptr_t p, ptr_t source)
#  else
GC_INNER void
GC_add_to_black_list_normal(ptr_t p)
#  endif
{
#  ifndef PARALLEL_MARK
  GC_ASSERT(I_HOLD_LOCK());
#  endif
  if (GC_modws_valid_offsets[ADDR(p) & (sizeof(ptr_t) - 1)]) {
    size_t index = PHT_HASH(p);

    if (NULL == HDR(p) || get_pht_entry_from_index(GC_old_normal_bl, index)) {
#  ifdef PRINT_BLACK_LIST
      if (!get_pht_entry_from_index(GC_incomplete_normal_bl, index)) {
        GC_print_blacklisted_ptr(p, source, "normal");
      }
#  endif
      backlist_set_pht_entry_from_index(GC_incomplete_normal_bl, index);
    } else {
      /*
       * This is probably just an interior pointer to an allocated object,
       * and is not worth black listing.
       */
    }
  }
}

#  ifdef PRINT_BLACK_LIST
GC_INNER void
GC_add_to_black_list_stack(ptr_t p, ptr_t source)
#  else
GC_INNER void
GC_add_to_black_list_stack(ptr_t p)
#  endif
{
  size_t index = PHT_HASH(p);

#  ifndef PARALLEL_MARK
  GC_ASSERT(I_HOLD_LOCK());
#  endif
  if (NULL == HDR(p) || get_pht_entry_from_index(GC_old_stack_bl, index)) {
#  ifdef PRINT_BLACK_LIST
    if (!get_pht_entry_from_index(GC_incomplete_stack_bl, index)) {
      GC_print_blacklisted_ptr(p, source, "stack");
    }
#  endif
    backlist_set_pht_entry_from_index(GC_incomplete_stack_bl, index);
  }
}

#endif /* !NO_BLACK_LISTING */

GC_API struct GC_hblk_s *GC_CALL
GC_is_black_listed(struct GC_hblk_s *h, size_t len)
{
#ifdef NO_BLACK_LISTING
  UNUSED_ARG(h);
  UNUSED_ARG(len);
#else
  size_t index = PHT_HASH(h);
  size_t i, nblocks;

  if (!GC_all_interior_pointers
      && (get_pht_entry_from_index(GC_old_normal_bl, index)
          || get_pht_entry_from_index(GC_incomplete_normal_bl, index))) {
    return h + 1;
  }

  nblocks = divHBLKSZ(len);
  for (i = 0;;) {
    if (GC_old_stack_bl[divWORDSZ(index)] == 0
        && GC_incomplete_stack_bl[divWORDSZ(index)] == 0) {
      /* An easy case. */
      i += CPP_WORDSZ - modWORDSZ(index);
    } else {
      if (get_pht_entry_from_index(GC_old_stack_bl, index)
          || get_pht_entry_from_index(GC_incomplete_stack_bl, index)) {
        return &h[i + 1];
      }
      i++;
    }
    if (i >= nblocks)
      break;
    index = PHT_HASH(h + i);
  }
#endif
  return NULL;
}

#ifndef NO_BLACK_LISTING
/*
 * Return the number of black-listed blocks in a given range.  Used only
 * for statistical purposes.  Looks only at the `GC_incomplete_stack_bl`.
 */
STATIC word
GC_number_stack_black_listed(struct hblk *start, struct hblk *endp1)
{
  struct hblk *h;
  word result = 0;

  for (h = start; ADDR_LT((ptr_t)h, (ptr_t)endp1); h++) {
    size_t index = PHT_HASH(h);

    if (get_pht_entry_from_index(GC_old_stack_bl, index))
      result++;
  }
  return result;
}

/* Return the total number of (stack) black-listed bytes. */
static word
compute_total_stack_black_listed(void)
{
  size_t i;
  word total = 0;

  for (i = 0; i < GC_n_heap_sects; i++) {
    struct hblk *start = (struct hblk *)GC_heap_sects[i].hs_start;
    struct hblk *endp1 = start + divHBLKSZ(GC_heap_sects[i].hs_bytes);

    total += GC_number_stack_black_listed(start, endp1);
  }
  return total * HBLKSIZE;
}
#endif /* !NO_BLACK_LISTING */

/*
 * Copyright (c) 1992-1994 by Xerox Corporation.  All rights reserved.
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */


#ifdef CHECKSUMS

/*
 * This is debugging code intended to verify the results of dirty bit
 * computations.  Currently works only in a single-threaded environment.
 */

#  define NSUMS 10000
#  define OFFSET 0x10000

typedef struct {
  GC_bool new_valid;
  word old_sum;
  word new_sum;

  /*
   * Block to which this refers plus `OFFSET` to hide it from the
   * garbage collector.
   */
  struct hblk *block;
} page_entry;

page_entry GC_sums[NSUMS] = { { FALSE, 0, 0, NULL } };

/* Record of pages on which we saw a write fault. */
STATIC word GC_faulted[NSUMS] = { 0 };

STATIC size_t GC_n_faulted = 0;

#  ifndef GC_NO_DEINIT
void
GC_reset_check_page(void)
{
  BZERO(GC_sums, sizeof(GC_sums));
  GC_n_faulted = 0;
}
#  endif

#  ifdef MPROTECT_VDB
void
GC_record_fault(struct hblk *h)
{
  GC_ASSERT(GC_page_size != 0);
  if (GC_n_faulted >= NSUMS)
    ABORT("write fault log overflowed");
  GC_faulted[GC_n_faulted++] = ADDR(HBLK_PAGE_ALIGNED(h));
}
#  endif

STATIC GC_bool
GC_was_faulted(struct hblk *h)
{
  size_t i;
  word page = ADDR(HBLK_PAGE_ALIGNED(h));

  for (i = 0; i < GC_n_faulted; ++i) {
    if (GC_faulted[i] == page)
      return TRUE;
  }
  return FALSE;
}

STATIC word
GC_checksum(struct hblk *h)
{
  word *p;
  word *lim = (word *)(h + 1);
  word result = 0;

  for (p = (word *)h; ADDR_LT((ptr_t)p, (ptr_t)lim); p++) {
    result += *p;
  }
  return result | SIGNB; /*< does not look like pointer */
}

int GC_n_dirty_errors = 0;
int GC_n_faulted_dirty_errors = 0;
unsigned long GC_n_clean = 0;
unsigned long GC_n_dirty = 0;

STATIC void
GC_update_check_page(struct hblk *h, int index)
{
  page_entry *pe = &GC_sums[index];
  hdr *hhdr = HDR(h);

  if (pe->block != 0 && pe->block != h + OFFSET)
    ABORT("goofed");
  pe->old_sum = pe->new_sum;
  pe->new_sum = GC_checksum(h);
#  if !defined(MSWIN32) && !defined(MSWINCE)
  if (pe->new_sum != SIGNB && !GC_page_was_ever_dirty(h)) {
    GC_err_printf("GC_page_was_ever_dirty(%p) is wrong\n", (void *)h);
  }
#  endif
  if (GC_page_was_dirty(h)) {
    GC_n_dirty++;
  } else {
    GC_n_clean++;
  }
  if (hhdr != NULL) {
    (void)GC_find_starting_hblk(h, &hhdr);
    if (pe->new_valid
#  ifdef SOFT_VDB
        && !HBLK_IS_FREE(hhdr)
#  endif
        && !IS_PTRFREE(hhdr) && pe->old_sum != pe->new_sum) {
      if (!GC_page_was_dirty(h) || !GC_page_was_ever_dirty(h)) {
        GC_bool was_faulted = GC_was_faulted(h);

        GC_n_dirty_errors++; /*< set breakpoint here */
        if (was_faulted)
          GC_n_faulted_dirty_errors++;
      }
    }
  }
  pe->new_valid = TRUE;
  pe->block = h + OFFSET;
}

/* Should be called immediately after `GC_read_dirty`. */
void
GC_check_dirty(void)
{
  int index;
  size_t i;

  GC_n_dirty_errors = 0;
  GC_n_faulted_dirty_errors = 0;
  GC_n_clean = 0;
  GC_n_dirty = 0;

  index = 0;
  for (i = 0; i < GC_n_heap_sects; i++) {
    ptr_t start = GC_heap_sects[i].hs_start;
    struct hblk *h;

    for (h = (struct hblk *)start;
         ADDR_LT((ptr_t)h, start + GC_heap_sects[i].hs_bytes); h++) {
      GC_update_check_page(h, index);
      index++;
      if (index >= NSUMS) {
        i = GC_n_heap_sects;
        break;
      }
    }
  }

  GC_COND_LOG_PRINTF("Checked %lu clean and %lu dirty pages\n", GC_n_clean,
                     GC_n_dirty);
  if (GC_n_dirty_errors > 0) {
    GC_err_printf("Found %d dirty bit errors (%d were faulted)\n",
                  GC_n_dirty_errors, GC_n_faulted_dirty_errors);
  }
  if (GC_n_faulted > 0) {
    /* Do not expose block addresses to the garbage collector. */
    BZERO(GC_faulted, GC_n_faulted * sizeof(word));
    GC_n_faulted = 0;
  }
}

#endif /* CHECKSUMS */

/*
 * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
 * Copyright (c) 1999-2004 Hewlett-Packard Development Company, L.P.
 * Copyright (c) 2008-2025 Ivan Maidanski
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */

/*
 * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
 * Copyright (c) 2001 by Hewlett-Packard Company. All rights reserved.
 * Copyright (c) 2008-2025 Ivan Maidanski
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */

/*
 * Private declarations of the collector marker data structures (like the
 * mark stack) and macros.  Needed by the marker and the client-supplied
 * mark routines.  Transitively includes `gc_priv.h` file.
 */

#ifndef GC_PMARK_H
#define GC_PMARK_H

#if defined(HAVE_CONFIG_H) && !defined(GC_PRIVATE_H)
/*
 * When `gc_pmark.h` file is included from `gc_priv.h` file, some of
 * macros might be undefined in `gcconfig.h` file, so skip `config.h`
 * file in this case.
 */
#endif

#ifndef GC_BUILD
#  define GC_BUILD
#endif

#if (defined(__linux__) || defined(__GLIBC__) || defined(__GNU__)) \
    && !defined(_GNU_SOURCE) && defined(GC_PTHREADS)               \
    && !defined(GC_NO_PTHREAD_SIGMASK)
#  define _GNU_SOURCE 1
#endif

#if defined(KEEP_BACK_PTRS) || defined(PRINT_BLACK_LIST)
#endif




EXTERN_C_BEGIN

/*
 * The real declarations of the following is in `gc_priv.h` file, so
 * that we can avoid scanning `GC_mark_procs` table.
 */

/*
 * Mark descriptor stuff that should remain private for now, mostly
 * because it is hard to export `CPP_WORDSZ` macro without include
 * `gcconfig.h` file.
 */
#define BITMAP_BITS (CPP_WORDSZ - GC_DS_TAG_BITS)
#define PROC(descr) \
  (GC_mark_procs[((descr) >> GC_DS_TAG_BITS) & (GC_MAX_MARK_PROCS - 1)])
#define ENV(descr) ((descr) >> (GC_DS_TAG_BITS + GC_LOG_MAX_MARK_PROCS))
#define MAX_ENV (((word)1 << (BITMAP_BITS - GC_LOG_MAX_MARK_PROCS)) - 1)

GC_EXTERN unsigned GC_n_mark_procs;

/* Number of mark stack entries to discard on overflow. */
#define GC_MARK_STACK_DISCARDS (INITIAL_MARK_STACK_SIZE / 8)

#ifdef PARALLEL_MARK
/*
 * Allow multiple threads to participate in the marking process.
 * This works roughly as follows:
 *   - The main mark stack never shrinks, but it can grow.
 *   - The initiating threads holds the allocator lock, sets
 *     `GC_help_wanted`.
 *   - Other threads:
 *     1. Update `GC_helper_count` (while holding the mark lock).
 *     2. Allocate a local mark stack repeatedly:
 *        2.1. Steal a global mark stack entry by atomically replacing
 *             its descriptor with 0;
 *        2.2. Copy it to the local stack;
 *        2.3. Mark on the local stack until it is empty, or it may be
 *             profitable to copy it back;
 *        2.4. If necessary, copy local stack to global one, holding the
 *             mark lock;
 *        2.5. Stop when the global mark stack is empty.
 *     3. Decrement `GC_helper_count` (holding the mark lock).
 *
 * This is an experiment to see if we can do something along the lines
 * of the University of Tokyo SGC in a less intrusive, though probably
 * also less performant, way.
 */

/* `GC_mark_stack_top` is protected by the mark lock. */

/*
 * `GC_notify_all_marker()` is used when `GC_help_wanted` is first set,
 * when the last helper becomes inactive, when something is added to the
 * global mark stack, and just after `GC_mark_no` is incremented.
 * This could be split into multiple conditional variables (and probably
 * should be) to scale to really large numbers of processors.
 */
#endif /* PARALLEL_MARK */

/*
 * Push the object `obj` with corresponding heap block header `hhdr`
 * onto the mark stack.  Returns the updated `mark_stack_top` value.
 */
GC_INLINE mse *
GC_push_obj(ptr_t obj, const hdr *hhdr, mse *mark_stack_top,
            mse *mark_stack_limit)
{
  GC_ASSERT(!HBLK_IS_FREE(hhdr));
  if (!IS_PTRFREE(hhdr)) {
    mark_stack_top = GC_custom_push_proc(hhdr->hb_descr, obj, mark_stack_top,
                                         mark_stack_limit);
  }
  return mark_stack_top;
}

/*
 * Push the contents of `current` onto the mark stack if it is a valid
 * pointer to a currently unmarked object.  Mark it.
 */
#define PUSH_CONTENTS(current, mark_stack_top, mark_stack_limit, source)   \
  do {                                                                     \
    hdr *my_hhdr;                                                          \
    HC_GET_HDR(current, my_hhdr, source); /*< contains `break` */          \
    mark_stack_top = GC_push_contents_hdr(                                 \
        current, mark_stack_top, mark_stack_limit, source, my_hhdr, TRUE); \
  } while (0)

/* Set mark bit, exit (using `break` statement) if it is already set. */
#ifdef USE_MARK_BYTES
#  if defined(PARALLEL_MARK) && defined(AO_HAVE_char_store) \
      && !defined(BASE_ATOMIC_OPS_EMULATED)
/*
 * There is a race here, and we may set the bit twice in the concurrent
 * case.  This can result in the object being pushed twice.  But that is
 * only a performance issue.
 */
#    define SET_MARK_BIT_EXIT_IF_SET(hhdr, bit_no)                 \
      { /*< cannot use `do ... while (0)` here */                  \
        volatile unsigned char *mark_byte_addr                     \
            = (unsigned char *)(hhdr)->hb_marks + (bit_no);        \
        /* Unordered atomic load and store are sufficient here. */ \
        if (AO_char_load(mark_byte_addr) != 0)                     \
          break; /*< go to the enclosing loop end */               \
        AO_char_store(mark_byte_addr, 1);                          \
      }
#  else
#    define SET_MARK_BIT_EXIT_IF_SET(hhdr, bit_no)                 \
      { /*< cannot use `do ... while (0)` here */                  \
        ptr_t mark_byte_addr = (ptr_t)(hhdr)->hb_marks + (bit_no); \
                                                                   \
        if (*mark_byte_addr != 0)                                  \
          break; /*< go to the enclosing loop end */               \
        *mark_byte_addr = 1;                                       \
      }
#  endif /* !PARALLEL_MARK */
#else
#  if defined(PARALLEL_MARK) || (defined(THREAD_SANITIZER) && defined(THREADS))
#    ifdef THREAD_SANITIZER
#      define MARK_WORD_READ(addr) AO_load(addr)
#    else
#      define MARK_WORD_READ(addr) (*(addr))
#    endif
/*
 * This is used only if we explicitly define `USE_MARK_BITS` macro.
 * The following may fail to exit even if the bit was already set.
 * For our uses, that is benign.
 */
#    define SET_MARK_BIT_EXIT_IF_SET(hhdr, bit_no)                            \
      { /*< cannot use `do ... while (0)` here */                             \
        volatile AO_t *mark_word_addr = (hhdr)->hb_marks + divWORDSZ(bit_no); \
        word my_bits = (word)1 << modWORDSZ(bit_no);                          \
                                                                              \
        if ((MARK_WORD_READ(mark_word_addr) & my_bits) != 0)                  \
          break; /*< go to the enclosing loop end */                          \
        AO_or(mark_word_addr, my_bits);                                       \
      }
#  else /* !PARALLEL_MARK */
#    define SET_MARK_BIT_EXIT_IF_SET(hhdr, bit_no)                   \
      { /*< cannot use `do ... while (0)` here */                    \
        word *mark_word_addr = (hhdr)->hb_marks + divWORDSZ(bit_no); \
        word old = *mark_word_addr;                                  \
        word my_bits = (word)1 << modWORDSZ(bit_no);                 \
                                                                     \
        if ((old & my_bits) != 0)                                    \
          break; /*< go to the enclosing loop end */                 \
        *(mark_word_addr) = old | my_bits;                           \
      }
#  endif
#endif /* !USE_MARK_BYTES */

#ifdef ENABLE_TRACE
#  define TRACE(source, cmd)                                     \
    if (GC_trace_ptr != NULL && (ptr_t)(source) == GC_trace_ptr) \
    cmd
#  define TRACE_TARGET(target, cmd)                          \
    if (GC_trace_ptr != NULL && GC_is_heap_ptr(GC_trace_ptr) \
        && (target) == *(ptr_t *)GC_trace_ptr)               \
    cmd
#else
#  define TRACE(source, cmd)
#  define TRACE_TARGET(source, cmd)
#endif

/*
 * If the mark bit corresponding to `current` is not set, set it, and
 * push the contents of the object on the mark stack.  `current` points
 * to the beginning of the object.  We rely on the fact that the
 * preceding header calculation will succeed for a pointer past the
 * first page of an object, only if it is in fact a valid pointer
 * to the object.  Thus we can omit the otherwise necessary tests here.
 */
GC_INLINE mse *
GC_push_contents_hdr(ptr_t current, mse *mark_stack_top, mse *mark_stack_limit,
                     ptr_t source, hdr *hhdr, GC_bool do_offset_check)
{
  do {
    /*
     * Displacement in the block, in bytes; always within range.
     * Note, in particular, that this value is the displacement from the
     * beginning of the heap block, which may itself be in the interior
     * of a large object.  If `current` does not point to the first block,
     * then we are in the all-interior-pointers mode, and it is safe to
     * use any displacement value.
     */
    size_t displ = HBLKDISPL(current);
    ptr_t base = current;
#ifdef MARK_BIT_PER_OBJ
    unsigned32 gran_displ; /*< `high_prod` */
    unsigned32 inv_sz = hhdr->hb_inv_sz;

#else
    size_t gran_displ = BYTES_TO_GRANULES(displ);
    size_t gran_offset = hhdr->hb_map[gran_displ];
    size_t byte_offset = displ & (GC_GRANULE_BYTES - 1);

    /* The following always fails for large block references. */
    if (UNLIKELY((gran_offset | byte_offset) != 0))
#endif
    {
#ifdef MARK_BIT_PER_OBJ
      if (UNLIKELY(inv_sz == LARGE_INV_SZ))
#else
      if ((hhdr->hb_flags & LARGE_BLOCK) != 0)
#endif
      {
        /* `gran_offset` is bogus. */
        size_t obj_displ;

        base = (ptr_t)hhdr->hb_block;
        obj_displ = (size_t)(current - base);
        if (obj_displ != displ) {
          GC_ASSERT(obj_displ < hhdr->hb_sz);
          /*
           * Must be in the all-interior-pointers mode, non-first block
           * already did validity check on cache miss.
           */
        } else if (do_offset_check && !GC_valid_offsets[obj_displ]) {
          GC_ADD_TO_BLACK_LIST_NORMAL(current, source);
          break;
        }
        GC_ASSERT(hhdr->hb_sz > HBLKSIZE
                  || hhdr->hb_block == HBLKPTR(current));
        GC_ASSERT(ADDR_GE(current, (ptr_t)hhdr->hb_block));
        gran_displ = 0;
      } else {
#ifdef MARK_BIT_PER_OBJ
        unsigned32 low_prod;

        LONG_MULT(gran_displ, low_prod, (unsigned32)displ, inv_sz);
        if ((low_prod >> 16) != 0)
#endif
        {
          size_t obj_displ;

#ifdef MARK_BIT_PER_OBJ
          /* Accurate enough if `HBLKSIZE` is not greater than 2**15. */
          GC_STATIC_ASSERT(HBLKSIZE <= (1 << 15));
          obj_displ = (((low_prod >> 16) + 1) * hhdr->hb_sz) >> 16;
#else
          obj_displ = GRANULES_TO_BYTES(gran_offset) + byte_offset;
#endif

          if (do_offset_check && !GC_valid_offsets[obj_displ]) {
            GC_ADD_TO_BLACK_LIST_NORMAL(current, source);
            break;
          }
#ifndef MARK_BIT_PER_OBJ
          gran_displ -= gran_offset;
#endif
          base -= obj_displ;
        }
      }
    }
#ifdef MARK_BIT_PER_OBJ
    /*
     * May get here for pointer to start of block not at the beginning
     * of object.  If so, it is valid, and we are fine.
     */
    GC_ASSERT(gran_displ <= HBLK_OBJS(hhdr->hb_sz));
#else
    GC_ASSERT(hhdr == GC_find_header(base));
    GC_ASSERT(gran_displ % BYTES_TO_GRANULES(hhdr->hb_sz) == 0);
#endif
    TRACE(source, GC_log_printf("GC #%lu: passed validity tests\n",
                                (unsigned long)GC_gc_no));
    SET_MARK_BIT_EXIT_IF_SET(hhdr, gran_displ); /*< contains `break` */
    TRACE(source, GC_log_printf("GC #%lu: previously unmarked\n",
                                (unsigned long)GC_gc_no));
    TRACE_TARGET(base, GC_log_printf("GC #%lu: marking %p from %p instead\n",
                                     (unsigned long)GC_gc_no, (void *)base,
                                     (void *)source));
    INCR_MARKS(hhdr);
    GC_STORE_BACK_PTR(source, base);
    mark_stack_top = GC_push_obj(base, hhdr, mark_stack_top, mark_stack_limit);
  } while (0);
  return mark_stack_top;
}

#if defined(PRINT_BLACK_LIST) || defined(KEEP_BACK_PTRS)
#  define PUSH_ONE_CHECKED_STACK(p, source) \
    GC_mark_and_push_stack(p, (ptr_t)(source))
#else
#  define PUSH_ONE_CHECKED_STACK(p, source) GC_mark_and_push_stack(p)
#endif

/*
 * Push a single value onto mark stack.  Mark from the object
 * pointed to by `p`.  The argument should be of `ptr_t` type.
 * Invoke `FIXUP_POINTER()` before any further processing.
 * p` is considered valid even if it is an interior pointer.
 * Previously marked objects are not pushed.  Hence we make progress
 * even if the mark stack overflows.
 */
#ifdef NEED_FIXUP_POINTER
/* Try both the raw variant and the fixed up one. */
#  define GC_PUSH_ONE_STACK(p, source)                              \
    do {                                                            \
      ptr_t pp = (p);                                               \
                                                                    \
      if (ADDR_LT((ptr_t)GC_least_plausible_heap_addr, p)           \
          && ADDR_LT(p, (ptr_t)GC_greatest_plausible_heap_addr)) {  \
        PUSH_ONE_CHECKED_STACK(p, source);                          \
      }                                                             \
      FIXUP_POINTER(pp);                                            \
      if (ADDR_LT((ptr_t)GC_least_plausible_heap_addr, pp)          \
          && ADDR_LT(pp, (ptr_t)GC_greatest_plausible_heap_addr)) { \
        PUSH_ONE_CHECKED_STACK(pp, source);                         \
      }                                                             \
    } while (0)
#else /* !NEED_FIXUP_POINTER */
#  define GC_PUSH_ONE_STACK(p, source)                             \
    do {                                                           \
      if (ADDR_LT((ptr_t)GC_least_plausible_heap_addr, p)          \
          && ADDR_LT(p, (ptr_t)GC_greatest_plausible_heap_addr)) { \
        PUSH_ONE_CHECKED_STACK(p, source);                         \
      }                                                            \
    } while (0)
#endif

/*
 * Same as `GC_PUSH_ONE_STACK`, but the interior pointers recognition as
 * for normal heap pointers.
 */
#define GC_PUSH_ONE_HEAP(p, source, mark_stack_top)                   \
  do {                                                                \
    FIXUP_POINTER(p);                                                 \
    if (ADDR_LT((ptr_t)GC_least_plausible_heap_addr, p)               \
        && ADDR_LT(p, (ptr_t)GC_greatest_plausible_heap_addr))        \
      mark_stack_top = GC_mark_and_push(                              \
          p, mark_stack_top, GC_mark_stack_limit, (void **)(source)); \
  } while (0)

/*
 * Mark objects pointed to by the regions described by mark stack entries
 * between `mark_stack` and `mark_stack_top`, inclusive.  Assumes the upper
 * limit of a mark stack entry is never `NULL`.  A mark stack entry never
 * has zero size.  Return the new value of `mark_stack_top`.
 * We try to traverse on the order of a `hblk` of memory before we return.
 * Caller is responsible for calling this until the mark stack is empty.
 * Note that this is the most performance critical routine in the collector.
 * Hence it contains all sorts of ugly hacks to speed things up.
 * In particular, we avoid procedure calls on the common path, we take
 * advantage of peculiarities of the mark descriptor encoding, we optionally
 * maintain a cache for the block address to header mapping, we prefetch
 * when an object is "grayed", etc.
 */
GC_INNER mse *GC_mark_from(mse *mark_stack_top, mse *mark_stack,
                           mse *mark_stack_limit);

#define MARK_FROM_MARK_STACK()                                       \
  GC_mark_stack_top = GC_mark_from(GC_mark_stack_top, GC_mark_stack, \
                                   GC_mark_stack + GC_mark_stack_size);

#define GC_mark_stack_empty() \
  ADDR_LT((ptr_t)GC_mark_stack_top, (ptr_t)GC_mark_stack)

/*
 * The current state of marking, as follows.  We say something is dirty
 * if it was written since the last time we retrieved dirty bits.
 * We say it is grungy if it was marked dirty in the last set of bits
 * we retrieved.  Invariant "I": all roots and marked objects `p` are
 * either dirty, or point to objects `q` that are either marked or
 * a pointer to `q` appears in a range on the mark stack.
 */

/* No marking in progress.  "I" holds.  Mark stack is empty. */
#define MS_NONE 0

/*
 * Rescuing objects are currently being pushed.  "I" holds, except that
 * grungy roots may point to unmarked objects, as may marked grungy objects
 * above `GC_scan_ptr`.
 */
#define MS_PUSH_RESCUERS 1

/*
 * Uncollectible objects are currently being pushed.  "I" holds, except
 * that marked uncollectible objects above `GC_scan_ptr` may point to
 * unmarked objects.  Roots may point to unmarked objects too.
 */
#define MS_PUSH_UNCOLLECTABLE 2

/* "I" holds, mark stack may be nonempty. */
#define MS_ROOTS_PUSHED 3

/*
 * "I" may not hold, e.g. because of the mark stack overflow.  However,
 * marked heap objects below `GC_scan_ptr` point to marked or stacked
 * objects.
 */
#define MS_PARTIALLY_INVALID 4

/* "I" may not hold. */
#define MS_INVALID 5

EXTERN_C_END

#endif /* GC_PMARK_H */


#ifdef GC_GCJ_SUPPORT

/*
 * This is an allocator interface tuned for `gcj` (the GNU static Java
 * compiler).
 *
 * Each allocated object has a pointer in its beginning to a "vtable",
 * which for our purposes is simply a structure describing the type of
 * the object.  This descriptor structure contains a GC marking
 * descriptor at offset `GC_GCJ_MARK_DESCR_OFFSET`.
 *
 * It is hoped that this interface may also be useful for other systems,
 * possibly with some tuning of the constants.  But the immediate goal
 * is to get better `gcj` performance.
 *
 * We assume: counting on explicit initialization of this interface is OK.
 */

#  include "gc/gc_gcj.h"

/* Object kind for objects with descriptors in "vtable". */
int GC_gcj_kind = 0;

/* The kind of objects that are always marked with a mark procedure call. */
int GC_gcj_debug_kind = 0;

STATIC struct GC_ms_entry *GC_CALLBACK
GC_gcj_fake_mark_proc(word *addr, struct GC_ms_entry *mark_stack_top,
                      struct GC_ms_entry *mark_stack_limit, word env)
{
  UNUSED_ARG(addr);
  UNUSED_ARG(mark_stack_limit);
  UNUSED_ARG(env);
#  if defined(FUNCPTR_IS_DATAPTR) && defined(CPPCHECK)
  GC_noop1((word)(GC_funcptr_uint)(&GC_init_gcj_malloc));
#  endif
  ABORT_RET("No client gcj mark proc is specified");
  return mark_stack_top;
}

#  ifdef FUNCPTR_IS_DATAPTR
GC_API void GC_CALL
GC_init_gcj_malloc(int mp_index, void *mp)
{
  GC_init_gcj_malloc_mp((unsigned)mp_index,
                        CAST_THRU_UINTPTR(GC_mark_proc, mp),
                        GC_GCJ_MARK_DESCR_OFFSET);
}
#  endif /* FUNCPTR_IS_DATAPTR */

GC_API void GC_CALL
GC_init_gcj_malloc_mp(unsigned mp_index, GC_mark_proc mp, size_t descr_offset)
{
#  ifndef GC_IGNORE_GCJ_INFO
  GC_bool ignore_gcj_info;
#  endif

  GC_STATIC_ASSERT(GC_GCJ_MARK_DESCR_OFFSET >= sizeof(ptr_t));
  if (0 == mp) {
    /* In case `GC_DS_PROC` is unused. */
    mp = GC_gcj_fake_mark_proc;
  }

  /* Initialize the collector just in case it is not done yet. */
  GC_init();
  if (descr_offset != GC_GCJ_MARK_DESCR_OFFSET)
    ABORT("GC_init_gcj_malloc_mp: bad offset");

  LOCK();
  if (GC_gcjobjfreelist != NULL) {
    /* Already initialized. */
    UNLOCK();
    return;
  }
#  ifdef GC_IGNORE_GCJ_INFO
  /* This is useful for debugging on platforms with missing `getenv()`. */
#    define ignore_gcj_info TRUE
#  else
  ignore_gcj_info = GETENV("GC_IGNORE_GCJ_INFO") != NULL;
#  endif
  if (ignore_gcj_info) {
    GC_COND_LOG_PRINTF("Gcj-style type information is disabled!\n");
  }
  GC_ASSERT(GC_mark_procs[mp_index] == (GC_mark_proc)0); /*< unused */
  GC_mark_procs[mp_index] = mp;
  if (mp_index >= GC_n_mark_procs)
    ABORT("GC_init_gcj_malloc_mp: bad index");
  /* Set up object kind `gcj`-style indirect descriptor. */
  GC_gcjobjfreelist = (ptr_t *)GC_new_free_list_inner();
  if (ignore_gcj_info) {
    /*
     * Use a simple length-based descriptor, thus forcing a fully
     * conservative scan.
     */
    GC_gcj_kind = (int)GC_new_kind_inner((void **)GC_gcjobjfreelist,
                                         /* 0 | */ GC_DS_LENGTH, TRUE, TRUE);
    GC_gcj_debug_kind = GC_gcj_kind;
  } else {
    GC_gcj_kind = (int)GC_new_kind_inner(
        (void **)GC_gcjobjfreelist,
        (((word)(-(GC_signed_word)GC_GCJ_MARK_DESCR_OFFSET
                 - GC_INDIR_PER_OBJ_BIAS))
         | GC_DS_PER_OBJECT),
        FALSE, TRUE);
    /* Set up object kind for objects that require mark procedure call. */
    GC_gcj_debug_kind = (int)GC_new_kind_inner(
        GC_new_free_list_inner(),
        GC_MAKE_PROC(mp_index, 1 /* allocated with debug info */), FALSE,
        TRUE);
  }
  UNLOCK();
#  undef ignore_gcj_info
}

#  ifdef THREAD_LOCAL_ALLOC
GC_INNER
#  else
STATIC
#  endif
void *
GC_core_gcj_malloc(size_t lb, const void *vtable_ptr, unsigned flags)
{
  ptr_t op;
  size_t lg;

  GC_DBG_COLLECT_AT_MALLOC(lb);
  LOCK();
  if (SMALL_OBJ(lb)
      && (op = GC_gcjobjfreelist[lg = GC_size_map[lb]], LIKELY(op != NULL))) {
    GC_gcjobjfreelist[lg] = (ptr_t)obj_link(op);
    GC_bytes_allocd += GRANULES_TO_BYTES((word)lg);
    GC_ASSERT(NULL == ((void **)op)[1]);
  } else {
    /*
     * A mechanism to release the allocator lock and invoke finalizers.
     * We do not really have an opportunity to do this on a rarely
     * executed path on which the allocator lock is not held.  Thus we
     * check at a rarely executed point at which it is safe to release
     * the allocator lock; we do this even where we could just call
     * `GC_notify_or_invoke_finalizers()`, since it is probably cheaper
     * and certainly more uniform.
     */
    /* TODO: Consider doing the same elsewhere? */
    if (GC_gc_no != GC_last_finalized_no) {
      UNLOCK();
      GC_notify_or_invoke_finalizers();
      LOCK();
      GC_last_finalized_no = GC_gc_no;
    }

    op = (ptr_t)GC_generic_malloc_inner(lb, GC_gcj_kind, flags);
    if (NULL == op) {
      GC_oom_func oom_fn = GC_oom_fn;
      UNLOCK();
      return (*oom_fn)(lb);
    }
  }
  *(const void **)op = vtable_ptr;
  UNLOCK();
  GC_dirty(op);
  REACHABLE_AFTER_DIRTY(vtable_ptr);
  return GC_clear_stack(op);
}

#  ifndef THREAD_LOCAL_ALLOC
GC_API GC_ATTR_MALLOC void *GC_CALL
GC_gcj_malloc(size_t lb, const void *vtable_ptr)
{
  return GC_core_gcj_malloc(lb, vtable_ptr, 0 /* `flags` */);
}
#  endif /* !THREAD_LOCAL_ALLOC */

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_gcj_malloc_ignore_off_page(size_t lb, const void *vtable_ptr)
{
  return GC_core_gcj_malloc(lb, vtable_ptr, IGNORE_OFF_PAGE);
}

#endif /* GC_GCJ_SUPPORT */

/*
 * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
 * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
 * Copyright (c) 1996 by Silicon Graphics.  All rights reserved.
 * Copyright (c) 2008-2025 Ivan Maidanski
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */


#if defined(KEEP_BACK_PTRS) && defined(GC_ASSERTIONS)
#endif

/*
 * This implements:
 *   1. Allocation of heap block headers;
 *   2. A map from addresses to heap block addresses to heap block headers.
 *
 * Access speed is crucial.  We implement an index structure based on
 * a two-level tree.
 */

GC_INNER hdr *
GC_find_header(const void *h)
{
#ifdef HASH_TL
  hdr *result;
  GET_HDR(h, result);
  return result;
#else
  return HDR_INNER(h);
#endif
}

GC_INNER hdr *
#ifdef PRINT_BLACK_LIST
GC_header_cache_miss(ptr_t p, hdr_cache_entry *hce, ptr_t source)
#else
GC_header_cache_miss(ptr_t p, hdr_cache_entry *hce)
#endif
{
  hdr *hhdr;

  HC_MISS();
  GET_HDR(p, hhdr);
  if (IS_FORWARDING_ADDR_OR_NIL(hhdr)) {
    if (GC_all_interior_pointers) {
      if (hhdr != NULL) {
        /* Pointer to near the start of the large object. */
        ptr_t current = (ptr_t)GC_find_starting_hblk(HBLKPTR(p), &hhdr);

        if (hhdr->hb_flags & IGNORE_OFF_PAGE)
          return 0;
        if (HBLK_IS_FREE(hhdr) || p - current >= (GC_signed_word)hhdr->hb_sz) {
          GC_ADD_TO_BLACK_LIST_NORMAL(p, source);
          /* The pointer is past the end of the block. */
          return 0;
        }
      } else {
        GC_ADD_TO_BLACK_LIST_NORMAL(p, source);
        /* And return `NULL`. */
      }
      GC_ASSERT(NULL == hhdr || !HBLK_IS_FREE(hhdr));
      /*
       * Pointers past the first page are probably too rare to add them to
       * the cache.  We do not.  And correctness relies on the fact that
       * we do not.
       */
      return hhdr;
    } else {
      if (NULL == hhdr) {
        GC_ADD_TO_BLACK_LIST_NORMAL(p, source);
      }
      return 0;
    }
  } else {
    if (HBLK_IS_FREE(hhdr)) {
      GC_ADD_TO_BLACK_LIST_NORMAL(p, source);
      return 0;
    } else {
      hce->block_addr = ADDR(p) >> LOG_HBLKSIZE;
      hce->hce_hdr = hhdr;
      return hhdr;
    }
  }
}

/*
 * Routines to dynamically allocate collector data structures that will
 * never be freed.
 */

GC_INNER ptr_t
GC_scratch_alloc(size_t bytes)
{
  ptr_t result = GC_scratch_free_ptr;
  size_t bytes_to_get;

  GC_ASSERT(I_HOLD_LOCK());
  bytes = ROUNDUP_GRANULE_SIZE(bytes);
  for (;;) {
    GC_ASSERT(GC_scratch_end_addr >= ADDR(result));
    if (bytes <= GC_scratch_end_addr - ADDR(result)) {
      /* Unallocated space of scratch buffer has enough size. */
      GC_scratch_free_ptr = result + bytes;
      return result;
    }

    GC_ASSERT(GC_page_size != 0);
    if (bytes >= MINHINCR * HBLKSIZE) {
      bytes_to_get = ROUNDUP_PAGESIZE_IF_MMAP(bytes);
      result = GC_os_get_mem(bytes_to_get);
      if (result != NULL) {
#if defined(KEEP_BACK_PTRS) && (GC_GRANULE_BYTES < 0x10)
        GC_ASSERT(ADDR(result) > (word)NOT_MARKED);
#endif
        /* No update of scratch free area pointer; get memory directly. */
#ifdef USE_SCRATCH_LAST_END_PTR
        /*
         * Update end point of last obtained area (needed only by
         * `GC_register_dynamic_libraries` for some targets).
         */
        GC_scratch_last_end_addr = ADDR(result) + bytes;
#endif
      }
      return result;
    }

    /* This is rounded up for a safety reason. */
    bytes_to_get = ROUNDUP_PAGESIZE_IF_MMAP(MINHINCR * HBLKSIZE);

    result = GC_os_get_mem(bytes_to_get);
    if (UNLIKELY(NULL == result)) {
      WARN("Out of memory - trying to allocate requested amount"
           " (%" WARN_PRIuPTR " bytes)...\n",
           bytes);
      bytes_to_get = ROUNDUP_PAGESIZE_IF_MMAP(bytes);
      result = GC_os_get_mem(bytes_to_get);
      if (result != NULL) {
#ifdef USE_SCRATCH_LAST_END_PTR
        GC_scratch_last_end_addr = ADDR(result) + bytes;
#endif
      }
      return result;
    }

    /* TODO: Some amount of unallocated space may remain unused forever. */
    /* Update scratch area pointers and retry. */
    GC_scratch_free_ptr = result;
    GC_scratch_end_addr = ADDR(GC_scratch_free_ptr) + bytes_to_get;
#ifdef USE_SCRATCH_LAST_END_PTR
    GC_scratch_last_end_addr = GC_scratch_end_addr;
#endif
  }
}

/* Return an uninitialized header. */
static hdr *
alloc_hdr(void)
{
  hdr *result;

  GC_ASSERT(I_HOLD_LOCK());
  if (NULL == GC_hdr_free_list) {
    result = (hdr *)GC_scratch_alloc(sizeof(hdr));
  } else {
    result = GC_hdr_free_list;
    GC_hdr_free_list = (hdr *)result->hb_next;
  }
  return result;
}

GC_INLINE void
free_hdr(hdr *hhdr)
{
  hhdr->hb_next = (struct hblk *)GC_hdr_free_list;
  GC_hdr_free_list = hhdr;
}

#ifdef COUNT_HDR_CACHE_HITS
/* Used for debugging/profiling (the symbols are externally visible). */
word GC_hdr_cache_hits = 0;
word GC_hdr_cache_misses = 0;
#endif

GC_INNER void
GC_init_headers(void)
{
  unsigned i;

  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT(NULL == GC_all_nils);
  GC_all_nils = (bottom_index *)GC_scratch_alloc(sizeof(bottom_index));
  if (GC_all_nils == NULL) {
    GC_err_printf("Insufficient memory for GC_all_nils\n");
    EXIT();
  }
  BZERO(GC_all_nils, sizeof(bottom_index));
  for (i = 0; i < TOP_SZ; i++) {
    GC_top_index[i] = GC_all_nils;
  }
}

/*
 * Make sure that there is a bottom-level index block for address `addr`.
 * Returns `FALSE` on failure.
 */
static GC_bool
get_index(word addr)
{
  word hi = addr >> (LOG_BOTTOM_SZ + LOG_HBLKSIZE);
  bottom_index *r;
  bottom_index *p;
  bottom_index **prev;
  bottom_index *pi; /*< `old_p` */
  word i;

  GC_ASSERT(I_HOLD_LOCK());
#ifdef HASH_TL
  i = TL_HASH(hi);
  pi = GC_top_index[i];
  for (p = pi; p != GC_all_nils; p = p->hash_link) {
    if (p->key == hi)
      return TRUE;
  }
#else
  if (GC_top_index[hi] != GC_all_nils)
    return TRUE;
  i = hi;
#endif
  r = (bottom_index *)GC_scratch_alloc(sizeof(bottom_index));
  if (UNLIKELY(NULL == r))
    return FALSE;
  BZERO(r, sizeof(bottom_index));
  r->key = hi;
#ifdef HASH_TL
  r->hash_link = pi;
#endif

  /* Add it to the list of bottom indices. */
  prev = &GC_all_bottom_indices; /*< pointer to `p` */

  pi = NULL; /*< `bottom_index` preceding `p` */
  while ((p = *prev) != 0 && p->key < hi) {
    pi = p;
    prev = &p->asc_link;
  }
  r->desc_link = pi;
  if (NULL == p) {
    GC_all_bottom_indices_end = r;
  } else {
    p->desc_link = r;
  }
  r->asc_link = p;
  *prev = r;

  GC_top_index[i] = r;
  return TRUE;
}

GC_INNER hdr *
GC_install_header(struct hblk *h)
{
  hdr *result;

  GC_ASSERT(I_HOLD_LOCK());
  if (UNLIKELY(!get_index(ADDR(h))))
    return NULL;

  result = alloc_hdr();
  if (LIKELY(result != NULL)) {
    GC_ASSERT(!IS_FORWARDING_ADDR_OR_NIL(result));
    SET_HDR(h, result);
#ifdef USE_MUNMAP
    result->hb_last_reclaimed = (unsigned short)GC_gc_no;
#endif
  }
  return result;
}

GC_INNER GC_bool
GC_install_counts(struct hblk *h, size_t sz /* bytes */)
{
  struct hblk *hbp;

  for (hbp = h; ADDR_LT((ptr_t)hbp, (ptr_t)h + sz); hbp += BOTTOM_SZ) {
    if (!get_index(ADDR(hbp)))
      return FALSE;
    /* Is overflow of `hbp` expected? */
    if (ADDR(hbp) > GC_WORD_MAX - (word)BOTTOM_SZ * HBLKSIZE)
      break;
  }
  if (!get_index(ADDR(h) + sz - 1))
    return FALSE;

  GC_ASSERT(!IS_FORWARDING_ADDR_OR_NIL(HDR(h)));
  for (hbp = h + 1; ADDR_LT((ptr_t)hbp, (ptr_t)h + sz); hbp++) {
    word i = (word)HBLK_PTR_DIFF(hbp, h);

    SET_HDR(hbp, (hdr *)NUMERIC_TO_VPTR(i > MAX_JUMP ? MAX_JUMP : i));
  }
  return TRUE;
}

GC_INNER void
GC_remove_header(struct hblk *h)
{
  hdr **ha;
  GET_HDR_ADDR(h, ha);
  free_hdr(*ha);
  *ha = 0;
}

GC_INNER void
GC_remove_counts(struct hblk *h, size_t sz /* bytes */)
{
  struct hblk *hbp;

  if (sz <= HBLKSIZE)
    return;
  if (NULL == HDR(h + 1)) {
#ifdef GC_ASSERTIONS
    for (hbp = h + 2; ADDR_LT((ptr_t)hbp, (ptr_t)h + sz); hbp++) {
      GC_ASSERT(NULL == HDR(hbp));
    }
#endif
    return;
  }

  for (hbp = h + 1; ADDR_LT((ptr_t)hbp, (ptr_t)h + sz); hbp++) {
    SET_HDR(hbp, NULL);
  }
}

#define HBLK_ADDR(bi, j) \
  ((((bi)->key << LOG_BOTTOM_SZ) + (word)(j)) << LOG_HBLKSIZE)

GC_API void GC_CALL
GC_apply_to_all_blocks(GC_walk_hblk_fn fn, void *client_data)
{
  bottom_index *bi;

  for (bi = GC_all_bottom_indices; bi != NULL; bi = bi->asc_link) {
    GC_signed_word j;

    for (j = BOTTOM_SZ - 1; j >= 0;) {
      hdr *hhdr = bi->index[j];

      if (IS_FORWARDING_ADDR_OR_NIL(hhdr)) {
        j -= (GC_signed_word)(hhdr != NULL ? ADDR(hhdr) : 1);
      } else {
        if (!HBLK_IS_FREE(hhdr)) {
          GC_ASSERT(HBLK_ADDR(bi, j) == ADDR(hhdr->hb_block));
          fn(hhdr->hb_block, client_data);
        }
        j--;
      }
    }
  }
}

GC_INNER struct hblk *
GC_next_block(struct hblk *h, GC_bool allow_free)
{
  REGISTER bottom_index *bi;
  REGISTER size_t j = (size_t)(ADDR(h) >> LOG_HBLKSIZE) & (BOTTOM_SZ - 1);

  GC_ASSERT(I_HOLD_READER_LOCK());
  GET_BI(h, bi);
  if (bi == GC_all_nils) {
    REGISTER word hi = ADDR(h) >> (LOG_BOTTOM_SZ + LOG_HBLKSIZE);

    bi = GC_all_bottom_indices;
    while (bi != NULL && bi->key < hi)
      bi = bi->asc_link;
    j = 0;
  }

  for (; bi != NULL; bi = bi->asc_link) {
    while (j < BOTTOM_SZ) {
      hdr *hhdr = bi->index[j];

      if (IS_FORWARDING_ADDR_OR_NIL(hhdr)) {
        j++;
      } else {
        if (allow_free || !HBLK_IS_FREE(hhdr)) {
          GC_ASSERT(HBLK_ADDR(bi, j) == ADDR(hhdr->hb_block));
          return hhdr->hb_block;
        }
        j += divHBLKSZ(hhdr->hb_sz);
      }
    }
    j = 0;
  }
  return NULL;
}

GC_INNER struct hblk *
GC_prev_block(struct hblk *h)
{
  bottom_index *bi;
  GC_signed_word j = (ADDR(h) >> LOG_HBLKSIZE) & (BOTTOM_SZ - 1);

  GC_ASSERT(I_HOLD_READER_LOCK());
  GET_BI(h, bi);
  if (bi == GC_all_nils) {
    word hi = ADDR(h) >> (LOG_BOTTOM_SZ + LOG_HBLKSIZE);

    bi = GC_all_bottom_indices_end;
    while (bi != NULL && bi->key > hi)
      bi = bi->desc_link;
    j = BOTTOM_SZ - 1;
  }
  for (; bi != NULL; bi = bi->desc_link) {
    while (j >= 0) {
      hdr *hhdr = bi->index[j];

      if (NULL == hhdr) {
        --j;
      } else if (IS_FORWARDING_ADDR_OR_NIL(hhdr)) {
        j -= (GC_signed_word)ADDR(hhdr);
      } else {
        GC_ASSERT(HBLK_ADDR(bi, j) == ADDR(hhdr->hb_block));
        return hhdr->hb_block;
      }
    }
    j = BOTTOM_SZ - 1;
  }
  return NULL;
}

/*
 * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
 * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
 * Copyright (c) 1999-2001 by Hewlett-Packard Company. All rights reserved.
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */


#ifndef SMALL_CONFIG
/*
 * Build a free list for two-pointer cleared objects inside the given block.
 * Set the last link to be `ofl`.  Return a pointer to the first free-list
 * entry.
 */
STATIC ptr_t
GC_build_fl_clear2(struct hblk *h, ptr_t ofl)
{
  ptr_t *p = (ptr_t *)h->hb_body;
  ptr_t plim = (ptr_t)(h + 1);

  p[0] = ofl;
  p[1] = NULL;
  p[2] = (ptr_t)p;
  p[3] = NULL;
  for (p += 4; ADDR_LT((ptr_t)p, plim); p += 4) {
    p[0] = (ptr_t)(p - 2);
    p[1] = NULL;
    p[2] = (ptr_t)p;
    p[3] = NULL;
  }
  return (ptr_t)(p - 2);
}

/* The same as above but uncleared objects. */
STATIC ptr_t
GC_build_fl2(struct hblk *h, ptr_t ofl)
{
  ptr_t *p = (ptr_t *)h->hb_body;
  ptr_t plim = (ptr_t)(h + 1);

  p[0] = ofl;
  p[2] = (ptr_t)p;
  for (p += 4; ADDR_LT((ptr_t)p, plim); p += 4) {
    p[0] = (ptr_t)(p - 2);
    p[2] = (ptr_t)p;
  }
  return (ptr_t)(p - 2);
}

/* The same as above but for four-pointer cleared objects. */
STATIC ptr_t
GC_build_fl_clear4(struct hblk *h, ptr_t ofl)
{
  ptr_t *p = (ptr_t *)h->hb_body;
  ptr_t plim = (ptr_t)(h + 1);

  p[0] = ofl;
  p[1] = NULL;
  p[2] = NULL;
  p[3] = NULL;
  for (p += 4; ADDR_LT((ptr_t)p, plim); p += 4) {
    GC_PREFETCH_FOR_WRITE((ptr_t)(p + 64));
    p[0] = (ptr_t)(p - 4);
    p[1] = NULL;
    CLEAR_DOUBLE(p + 2);
  }
  return (ptr_t)(p - 4);
}

/* The same as `GC_build_fl_clear4()` but uncleared objects. */
STATIC ptr_t
GC_build_fl4(struct hblk *h, ptr_t ofl)
{
  ptr_t *p = (ptr_t *)h->hb_body;
  ptr_t plim = (ptr_t)(h + 1);

  p[0] = ofl;
  p[4] = (ptr_t)p;
  /* Unroll the loop by 2. */
  for (p += 8; ADDR_LT((ptr_t)p, plim); p += 8) {
    GC_PREFETCH_FOR_WRITE((ptr_t)(p + 64));
    p[0] = (ptr_t)(p - 4);
    p[4] = (ptr_t)p;
  }
  return (ptr_t)(p - 4);
}
#endif /* !SMALL_CONFIG */

GC_INNER ptr_t
GC_build_fl(struct hblk *h, ptr_t list, size_t lg, GC_bool clear)
{
  ptr_t *p, *prev;
  ptr_t plim; /*< points to last object in new `hblk` entity */
  size_t lpw = GRANULES_TO_PTRS(lg);

  /*
   * Do a few prefetches here, just because it is cheap.
   * If we were more serious about it, these should go inside the loops.
   * But write prefetches usually do not seem to matter much.
   */
  GC_PREFETCH_FOR_WRITE((ptr_t)h);
  GC_PREFETCH_FOR_WRITE((ptr_t)h + 128);
  GC_PREFETCH_FOR_WRITE((ptr_t)h + 256);
  GC_PREFETCH_FOR_WRITE((ptr_t)h + 378);
#ifndef SMALL_CONFIG
  /*
   * Handle small objects sizes more efficiently.  For larger objects
   * the difference is less significant.
   */
  switch (lpw) {
  case 2:
    if (clear) {
      return GC_build_fl_clear2(h, list);
    } else {
      return GC_build_fl2(h, list);
    }
  case 4:
    if (clear) {
      return GC_build_fl_clear4(h, list);
    } else {
      return GC_build_fl4(h, list);
    }
  default:
    break;
  }
#endif /* !SMALL_CONFIG */

  /* Clear the page if necessary. */
  if (clear)
    BZERO(h, HBLKSIZE);

  /* Add objects to free list. */
  prev = (ptr_t *)h->hb_body; /*< one object behind `p` */

  /* The last place for the last object to start. */
  plim = (ptr_t)h + HBLKSIZE - lpw * sizeof(ptr_t);

  /* Make a list of all objects in `*h` with head as last object. */
  for (p = prev + lpw; ADDR_GE(plim, (ptr_t)p); p += lpw) {
    /* The current object's link points to last object. */
    obj_link(p) = (ptr_t)prev;
    prev = p;
  }
  p -= lpw;
  /* `p` now points to the last object. */

  /*
   * Put `p` (which is now head of list of objects in `*h`) as first pointer
   * in the appropriate free list for this size.
   */
  *(ptr_t *)h = list;
  return (ptr_t)p;
}

GC_INNER void
GC_new_hblk(size_t lg, int kind)
{
  struct hblk *h; /*< the new heap block */
  size_t lb_adjusted = GRANULES_TO_BYTES(lg);

  GC_STATIC_ASSERT(sizeof(struct hblk) == HBLKSIZE);
  GC_ASSERT(I_HOLD_LOCK());
  /* Allocate a new heap block. */
  h = GC_allochblk(lb_adjusted, kind, 0 /* `flags` */, 0 /* `align_m1` */);
  if (UNLIKELY(NULL == h))
    return; /*< out of memory */

  /* Mark all objects if appropriate. */
  if (IS_UNCOLLECTABLE(kind))
    GC_set_hdr_marks(HDR(h));

  /* Build the free list. */
  GC_obj_kinds[kind].ok_freelist[lg]
      = GC_build_fl(h, (ptr_t)GC_obj_kinds[kind].ok_freelist[lg], lg,
                    GC_debugging_started || GC_obj_kinds[kind].ok_init);
}

/*
 * Routines for maintaining maps describing heap block layouts for various
 * object sizes.  Allows fast pointer validity checks and fast location of
 * object start locations on machines (such as SPARC) with slow division.
 */

GC_API void GC_CALL
GC_register_displacement(size_t offset)
{
  LOCK();
  GC_register_displacement_inner(offset);
  UNLOCK();
}

GC_INNER void
GC_register_displacement_inner(size_t offset)
{
  GC_ASSERT(I_HOLD_LOCK());
  if (offset >= VALID_OFFSET_SZ) {
    ABORT("Bad argument to GC_register_displacement");
  }
  if (!GC_valid_offsets[offset]) {
    GC_valid_offsets[offset] = TRUE;
    GC_modws_valid_offsets[offset % sizeof(ptr_t)] = TRUE;
  }
}

#ifndef MARK_BIT_PER_OBJ
GC_INNER GC_bool
GC_add_map_entry(size_t lg)
{
  size_t displ;
  hb_map_entry_t *new_map;

  GC_ASSERT(I_HOLD_LOCK());
  /*
   * Ensure `displ % lg` fits into `hb_map_entry_t` type.
   * Note: the maximum value is computed in this way to avoid compiler
   * complains about constant truncation or expression overflow.
   */
  GC_STATIC_ASSERT(
      MAXOBJGRANULES - 1
      <= (~(size_t)0 >> ((sizeof(size_t) - sizeof(hb_map_entry_t)) * 8)));

  if (lg > MAXOBJGRANULES)
    lg = 0;
  if (LIKELY(GC_obj_map[lg] != NULL))
    return TRUE;

  new_map = (hb_map_entry_t *)GC_scratch_alloc(OBJ_MAP_LEN
                                               * sizeof(hb_map_entry_t));
  if (UNLIKELY(NULL == new_map))
    return FALSE;

  GC_COND_LOG_PRINTF("Adding block map for size of %u granules (%u bytes)\n",
                     (unsigned)lg, (unsigned)GRANULES_TO_BYTES(lg));
  if (0 == lg) {
    for (displ = 0; displ < OBJ_MAP_LEN; displ++) {
      /* Set to a nonzero to get us out of the marker fast path. */
      new_map[displ] = 1;
    }
  } else {
    for (displ = 0; displ < OBJ_MAP_LEN; displ++) {
      new_map[displ] = (hb_map_entry_t)(displ % lg);
    }
  }
  GC_obj_map[lg] = new_map;
  return TRUE;
}
#endif /* !MARK_BIT_PER_OBJ */

GC_INNER void
GC_initialize_offsets(void)
{
  size_t i;

  if (GC_all_interior_pointers) {
    for (i = 0; i < VALID_OFFSET_SZ; ++i)
      GC_valid_offsets[i] = TRUE;
  } else {
    BZERO(GC_valid_offsets, sizeof(GC_valid_offsets));
    for (i = 0; i < sizeof(ptr_t); ++i)
      GC_modws_valid_offsets[i] = FALSE;
  }
}

/*
 * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */


/*
 * These are checking routines calls to which could be inserted by
 * a preprocessor to validate C pointer arithmetic.
 */

STATIC void GC_CALLBACK
GC_default_same_obj_print_proc(void *p, void *q)
{
  ABORT_ARG2("GC_same_obj test failed",
             ": %p and %p are not in the same object", p, q);
}

GC_same_obj_print_proc_t GC_same_obj_print_proc
    = GC_default_same_obj_print_proc;

GC_API void *GC_CALL
GC_same_obj(void *p, void *q)
{
  hdr *hhdr;
  ptr_t base, limit;
  size_t sz;

  if (UNLIKELY(!GC_is_initialized))
    GC_init();
  hhdr = HDR(p);
  if (NULL == hhdr) {
    if (divHBLKSZ(ADDR(p)) != divHBLKSZ(ADDR(q)) && HDR(q) != NULL) {
      GC_same_obj_print_proc((ptr_t)p, (ptr_t)q);
    }
    return p;
  }
  /*
   * If it is a pointer to the middle of a large object, move it to
   * the beginning.
   */
  if (IS_FORWARDING_ADDR_OR_NIL(hhdr)) {
    struct hblk *h = GC_find_starting_hblk(HBLKPTR(p), &hhdr);

    limit = (ptr_t)h + hhdr->hb_sz;
    if (ADDR_GE((ptr_t)p, limit) || ADDR_GE((ptr_t)q, limit)
        || ADDR_LT((ptr_t)q, (ptr_t)h)) {
      GC_same_obj_print_proc((ptr_t)p, (ptr_t)q);
    }
    return p;
  }
  sz = hhdr->hb_sz;
  if (sz > MAXOBJBYTES) {
    base = (ptr_t)HBLKPTR(p);
    limit = base + sz;
    if (ADDR_GE((ptr_t)p, limit)) {
      GC_same_obj_print_proc((ptr_t)p, (ptr_t)q);
      return p;
    }
  } else {
    size_t offset;

    if (HBLKPTR(p) != HBLKPTR(q)) {
      /*
       * Without this check, we might miss an error if `q` points to
       * the first object on a page, and points just before the page.
       */
      GC_same_obj_print_proc((ptr_t)p, (ptr_t)q);
      return p;
    }
    offset = HBLKDISPL(p) % sz;
    base = (ptr_t)p - offset;
    limit = base + sz;
  }
  /*
   * [`base`,`limit`) delimits the object containing `p`, if any.
   * If `p` is not inside a valid object, then either `q` is also
   * outside any valid object, or it is outside [`base`,`limit`).
   */
  if (!ADDR_INSIDE((ptr_t)q, base, limit)) {
    GC_same_obj_print_proc((ptr_t)p, (ptr_t)q);
  }
  return p;
}

STATIC void GC_CALLBACK
GC_default_is_valid_displacement_print_proc(void *p)
{
  ABORT_ARG1("GC_is_valid_displacement test failed", ": %p not valid", p);
}

GC_valid_ptr_print_proc_t GC_is_valid_displacement_print_proc
    = GC_default_is_valid_displacement_print_proc;

GC_API void *GC_CALL
GC_is_valid_displacement(void *p)
{
  hdr *hhdr;
  size_t offset;
  struct hblk *h;
  size_t sz;

  if (UNLIKELY(!GC_is_initialized))
    GC_init();
  if (NULL == p)
    return NULL;
  hhdr = HDR(p);
  if (NULL == hhdr)
    return p;
  h = HBLKPTR(p);
  if (GC_all_interior_pointers) {
    h = GC_find_starting_hblk(h, &hhdr);
  } else if (IS_FORWARDING_ADDR_OR_NIL(hhdr)) {
    GC_is_valid_displacement_print_proc((ptr_t)p);
    return p;
  }
  sz = hhdr->hb_sz;
  offset = HBLKDISPL(p) % sz;
  if ((sz > MAXOBJBYTES && ADDR_GE((ptr_t)p, (ptr_t)h + sz))
      || !GC_valid_offsets[offset]
      || (ADDR_LT((ptr_t)(h + 1), (ptr_t)p + sz - offset)
          && !IS_FORWARDING_ADDR_OR_NIL(HDR(h + 1)))) {
    GC_is_valid_displacement_print_proc((ptr_t)p);
  }
  return p;
}

STATIC void GC_CALLBACK
GC_default_is_visible_print_proc(void *p)
{
  ABORT_ARG1("GC_is_visible test failed", ": %p not GC-visible", p);
}

GC_valid_ptr_print_proc_t GC_is_visible_print_proc
    = GC_default_is_visible_print_proc;

#ifndef THREADS
/* Could `p` be a stack address? */
STATIC GC_bool
GC_on_stack(ptr_t p)
{
  return HOTTER_THAN(p, GC_stackbottom) && !HOTTER_THAN(p, GC_approx_sp());
}

/* Is the address `p` in one of the registered static root sections? */
STATIC GC_bool
GC_is_static_root(ptr_t p)
{
  static size_t last_static_root_set = MAX_ROOT_SETS;
  size_t i;

#  if defined(CPPCHECK)
  if (n_root_sets > MAX_ROOT_SETS)
    ABORT("Bad n_root_sets");
#  endif
  if (last_static_root_set < n_root_sets
      && ADDR_INSIDE(p, GC_static_roots[last_static_root_set].r_start,
                     GC_static_roots[last_static_root_set].r_end))
    return TRUE;
  for (i = 0; i < n_root_sets; i++) {
    if (ADDR_INSIDE(p, GC_static_roots[i].r_start, GC_static_roots[i].r_end)) {
      last_static_root_set = i;
      return TRUE;
    }
  }
  return FALSE;
}
#endif /* !THREADS */

GC_API void *GC_CALL
GC_is_visible(void *p)
{
  const hdr *hhdr;

  if ((ADDR(p) & (ALIGNMENT - 1)) != 0)
    goto fail;
  if (UNLIKELY(!GC_is_initialized))
    GC_init();
#ifdef THREADS
  hhdr = HDR(p);
  if (hhdr != NULL && NULL == GC_base(p)) {
    goto fail;
  } else {
    /* May be inside thread stack.  We cannot do much. */
    return p;
  }
#else
  /* Check stack first. */
  if (GC_on_stack((ptr_t)p))
    return p;

  hhdr = HDR(p);
  if (NULL == hhdr) {
    if (GC_is_static_root((ptr_t)p)) {
      return p;
    }
    /* Else do it again correctly. */
#  if defined(ANY_MSWIN) || defined(DYNAMIC_LOADING)
    if (!GC_no_dls) {
      GC_register_dynamic_libraries();
      if (GC_is_static_root((ptr_t)p))
        return p;
    }
#  endif
  } else {
    /* `p` points to the heap. */
    word descr;
    /* TODO: Should `GC_base` be manually inlined? */
    ptr_t base = (ptr_t)GC_base(p);

    if (NULL == base)
      goto fail;
    if (HBLKPTR(base) != HBLKPTR(p))
      hhdr = HDR(base);
    descr = hhdr->hb_descr;
  retry:
    switch (descr & GC_DS_TAGS) {
    case GC_DS_LENGTH:
      if ((word)((ptr_t)p - base) >= descr)
        goto fail;
      break;
    case GC_DS_BITMAP:
      if ((ptr_t)p - base >= (ptrdiff_t)PTRS_TO_BYTES(BITMAP_BITS))
        goto fail;
#  if ALIGNMENT != CPP_PTRSZ / 8
      if ((ADDR(p) & (sizeof(ptr_t) - 1)) != 0)
        goto fail;
#  endif
      if (!(((word)1 << (CPP_WORDSZ - 1 - (word)((ptr_t)p - base))) & descr))
        goto fail;
      break;
    case GC_DS_PROC:
      /* We could try to decipher this partially.  For now we just punt. */
      break;
    case GC_DS_PER_OBJECT:
      if (!(descr & SIGNB)) {
        descr = *(word *)((ptr_t)base + (descr & ~(word)GC_DS_TAGS));
      } else {
        ptr_t type_descr = *(ptr_t *)base;

        if (UNLIKELY(NULL == type_descr)) {
          /* See the comment in `GC_mark_from`. */
          goto fail;
        }
        descr = *(word *)(type_descr
                          - ((GC_signed_word)descr
                             + (GC_INDIR_PER_OBJ_BIAS - GC_DS_PER_OBJECT)));
      }
      goto retry;
    }
    return p;
  }
#endif
fail:
  GC_is_visible_print_proc((ptr_t)p);
  return p;
}

GC_API void *GC_CALL
GC_pre_incr(void **p, ptrdiff_t how_much)
{
  void *initial = *p;
  void *result = GC_same_obj((ptr_t)initial + how_much, initial);

  if (!GC_all_interior_pointers) {
    (void)GC_is_valid_displacement(result);
  }
  *p = result;
  return result; /*< updated pointer */
}

GC_API void *GC_CALL
GC_post_incr(void **p, ptrdiff_t how_much)
{
  void *initial = *p;
  void *result = GC_same_obj((ptr_t)initial + how_much, initial);

  if (!GC_all_interior_pointers) {
    (void)GC_is_valid_displacement(result);
  }
  *p = result;
  return initial; /*< original `*p` */
}

GC_API void GC_CALL
GC_set_same_obj_print_proc(GC_same_obj_print_proc_t fn)
{
  GC_ASSERT(NONNULL_ARG_NOT_NULL(fn));
  GC_same_obj_print_proc = fn;
}

GC_API GC_same_obj_print_proc_t GC_CALL
GC_get_same_obj_print_proc(void)
{
  return GC_same_obj_print_proc;
}

GC_API void GC_CALL
GC_set_is_valid_displacement_print_proc(GC_valid_ptr_print_proc_t fn)
{
  GC_ASSERT(NONNULL_ARG_NOT_NULL(fn));
  GC_is_valid_displacement_print_proc = fn;
}

GC_API GC_valid_ptr_print_proc_t GC_CALL
GC_get_is_valid_displacement_print_proc(void)
{
  return GC_is_valid_displacement_print_proc;
}

GC_API void GC_CALL
GC_set_is_visible_print_proc(GC_valid_ptr_print_proc_t fn)
{
  GC_ASSERT(NONNULL_ARG_NOT_NULL(fn));
  GC_is_visible_print_proc = fn;
}

GC_API GC_valid_ptr_print_proc_t GC_CALL
GC_get_is_visible_print_proc(void)
{
  return GC_is_visible_print_proc;
}


/*
 * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
 * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
 * Copyright (c) 1998-1999 by Silicon Graphics.  All rights reserved.
 * Copyright (c) 1999 by Hewlett-Packard Company. All rights reserved.
 * Copyright (c) 2008-2025 Ivan Maidanski
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */


#ifdef GC_USE_ENTIRE_HEAP
int GC_use_entire_heap = TRUE;
#else
int GC_use_entire_heap = FALSE;
#endif

/*
 * Free heap blocks are kept on one of several free lists, depending on
 * the size of the block.  Each free list is doubly linked.  Adjacent
 * free blocks are coalesced.
 */

/*
 * Largest block we will be allocated starting on a black-listed block.
 * Must be not smaller than `HBLKSIZE`.
 */
#define MAX_BLACK_LIST_ALLOC (2 * HBLKSIZE)

/* Sizes up to this many `hblk` entities each have their own free list. */
#define UNIQUE_THRESHOLD 32

/*
 * Sizes of at least this many heap blocks are mapped to a single free
 * list.
 */
#define HUGE_THRESHOLD 256

/* In between sizes map this many distinct sizes to a single bin. */
#define FL_COMPRESSION 8

#define N_HBLK_FLS \
  ((HUGE_THRESHOLD - UNIQUE_THRESHOLD) / FL_COMPRESSION + UNIQUE_THRESHOLD)

/*
 * List of completely empty heap blocks.  Linked through `hb_next` field
 * of header structure associated with block.  Remains externally visible
 * as used by GNU `gcj`.
 */
#ifndef GC_GCJ_SUPPORT
STATIC
#endif
struct hblk *GC_hblkfreelist[N_HBLK_FLS + 1] = { 0 };

GC_API void GC_CALL
GC_iterate_free_hblks(GC_walk_free_blk_fn fn, void *client_data)
{
  int i;

  for (i = 0; i <= N_HBLK_FLS; ++i) {
    struct hblk *h;

    for (h = GC_hblkfreelist[i]; h != NULL; h = HDR(h)->hb_next) {
      fn(h, i, client_data);
    }
  }
}

/* Number of free bytes on each list.  Remains visible to `gcj`. */
#ifndef GC_GCJ_SUPPORT
STATIC
#endif
word GC_free_bytes[N_HBLK_FLS + 1] = { 0 };

#ifndef GC_NO_DEINIT
GC_INNER void
GC_reset_freelist(void)
{
  BZERO(GC_hblkfreelist, sizeof(GC_hblkfreelist));
  BZERO(GC_free_bytes, sizeof(GC_free_bytes));
}
#endif

/*
 * Return the largest `n` such that the number of free bytes on lists
 * `n` .. `N_HBLK_FLS` is greater or equal to `GC_max_large_allocd_bytes`
 * minus `GC_large_allocd_bytes`.  If there is no such `n`, return 0.
 */
GC_INLINE size_t
GC_enough_large_bytes_left(void)
{
  size_t n;
  word bytes = GC_large_allocd_bytes;

  GC_ASSERT(GC_max_large_allocd_bytes <= GC_heapsize);
  for (n = N_HBLK_FLS; n > 0; n--) {
    bytes += GC_free_bytes[n];
    if (bytes >= GC_max_large_allocd_bytes)
      break;
  }
  return n;
}

/* Map a number of blocks to the appropriate large block free-list index. */
STATIC size_t
GC_hblk_fl_from_blocks(size_t blocks_needed)
{
  if (blocks_needed <= UNIQUE_THRESHOLD)
    return blocks_needed;
  if (blocks_needed >= HUGE_THRESHOLD)
    return N_HBLK_FLS;
  return (blocks_needed - UNIQUE_THRESHOLD) / FL_COMPRESSION
         + UNIQUE_THRESHOLD;
}

#define PHDR(hhdr) HDR((hhdr)->hb_prev)
#define NHDR(hhdr) HDR((hhdr)->hb_next)

#ifdef USE_MUNMAP
#  define IS_MAPPED(hhdr) (((hhdr)->hb_flags & WAS_UNMAPPED) == 0)
#else
#  define IS_MAPPED(hhdr) TRUE
#endif /* !USE_MUNMAP */

#if !defined(NO_DEBUGGING) || defined(GC_ASSERTIONS)
static void GC_CALLBACK
add_hb_sz(struct hblk *h, int i, void *total_free_ptr)
{
  UNUSED_ARG(i);
  *(word *)total_free_ptr += HDR(h)->hb_sz;
#  if defined(CPPCHECK)
  GC_noop1_ptr(h);
#  endif
}

GC_INNER word
GC_compute_large_free_bytes(void)
{
  word total_free = 0;

  GC_iterate_free_hblks(add_hb_sz, &total_free);
  return total_free;
}
#endif /* !NO_DEBUGGING || GC_ASSERTIONS */

#ifndef NO_DEBUGGING
static void GC_CALLBACK
print_hblkfreelist_item(struct hblk *h, int i, void *prev_index_ptr)
{
  hdr *hhdr = HDR(h);

#  if defined(CPPCHECK)
  GC_noop1_ptr(h);
#  endif
  if (i != *(int *)prev_index_ptr) {
    GC_printf("Free list %d (total size %lu):\n", i,
              (unsigned long)GC_free_bytes[i]);
    *(int *)prev_index_ptr = i;
  }

#  ifdef NO_BLACK_LISTING
  GC_printf("\t%p size %lu\n", (void *)h, (unsigned long)hhdr->hb_sz);
#  else
  GC_printf("\t%p size %lu %s black listed\n", (void *)h,
            (unsigned long)hhdr->hb_sz,
            GC_is_black_listed(h, HBLKSIZE) != NULL      ? "start"
            : GC_is_black_listed(h, hhdr->hb_sz) != NULL ? "partially"
                                                         : "not");
#  endif
}

void
GC_print_hblkfreelist(void)
{
  word total;
  int prev_index = -1;

  GC_iterate_free_hblks(print_hblkfreelist_item, &prev_index);
  GC_printf("GC_large_free_bytes: %lu\n", (unsigned long)GC_large_free_bytes);
  total = GC_compute_large_free_bytes();
  if (total != GC_large_free_bytes)
    GC_err_printf("GC_large_free_bytes INCONSISTENT!! Should be: %lu\n",
                  (unsigned long)total);
}

/*
 * Return the free-list index on which the block described by the header
 * appears, or -1 if it appears nowhere.
 */
static int
free_list_index_of(const hdr *wanted)
{
  int i;

  for (i = 0; i <= N_HBLK_FLS; ++i) {
    const struct hblk *h;
    const hdr *hhdr;

    for (h = GC_hblkfreelist[i]; h != NULL; h = hhdr->hb_next) {
      hhdr = HDR(h);
      if (hhdr == wanted)
        return i;
    }
  }
  return -1;
}

GC_API void GC_CALL
GC_foreach_heap_section_inner(GC_heap_section_proc fn, void *client_data)
{
  size_t i;

  /*
   * The collector memory is organized in heap sections that are split in
   * blocks.  Each such block has a header (obtained by `HDR(p)`) and the
   * block size is aligned to `HBLKSIZE`.  The block headers are kept
   * separately from the memory they point to.
   */
  for (i = 0; i < GC_n_heap_sects; ++i) {
    ptr_t start = GC_heap_sects[i].hs_start;
    ptr_t finish = start + GC_heap_sects[i].hs_bytes;
    ptr_t p;

    /* Merge in contiguous sections. */
    while (i + 1 < GC_n_heap_sects
           && GC_heap_sects[i + 1].hs_start == finish) {
      ++i;
      finish = GC_heap_sects[i].hs_start + GC_heap_sects[i].hs_bytes;
    }

    fn(start, finish, GC_HEAP_SECTION_TYPE_WHOLE_SECT, client_data);
    for (p = start; ADDR_LT(p, finish);) {
      /*
       * Lookup into 2-level tree data structure which uses address
       * as a hash key to find the block header.
       */
      hdr *hhdr = HDR(p);

      if (IS_FORWARDING_ADDR_OR_NIL(hhdr)) {
        /*
         * The pointer has no header registered in the headers cache.
         * Skip one `HBLKSIZE` and retry.  Might be mapped or not.
         */
        fn(p, p + HBLKSIZE, GC_HEAP_SECTION_TYPE_FORWARDING, client_data);
        p += HBLKSIZE;
        continue;
      }

      if (HBLK_IS_FREE(hhdr)) {
        /*
         * The block is marked as free.  Note: `hb_sz` is the size in bytes
         * of the whole block.
         */
        fn(p, p + hhdr->hb_sz,
           IS_MAPPED(hhdr) ? GC_HEAP_SECTION_TYPE_FREE
                           : GC_HEAP_SECTION_TYPE_UNMAPPED,
           client_data);
        p += hhdr->hb_sz;
      } else {
        /*
         * This heap block is used.  Report also the padding, if any.
         * Note: `hb_sz` is the size (in bytes) of objects in the block.
         */
        ptr_t blockEnd = p + HBLKSIZE * OBJ_SZ_TO_BLOCKS(hhdr->hb_sz);
        ptr_t usedBlockEnd = p + hhdr->hb_sz;

        fn(p, usedBlockEnd, GC_HEAP_SECTION_TYPE_USED, client_data);
        if (ADDR_LT(usedBlockEnd, blockEnd))
          fn(usedBlockEnd, blockEnd, GC_HEAP_SECTION_TYPE_PADDING,
             client_data);
        p = blockEnd;
      }
    }
  }
}

static void GC_CALLBACK
dump_regions_proc(void *start, void *finish, GC_heap_section_type type,
                  void *client_data)
{
  hdr *hhdr;
  int correct_index, actual_index;

  UNUSED_ARG(client_data);
  switch (type) {
  case GC_HEAP_SECTION_TYPE_WHOLE_SECT:
    GC_printf("***Section from %p to %p\n", start, finish);
    break;
  case GC_HEAP_SECTION_TYPE_FORWARDING:
    GC_printf("\t%p Missing header!!(%p)\n", start, (void *)HDR(start));
    break;
  case GC_HEAP_SECTION_TYPE_FREE:
  case GC_HEAP_SECTION_TYPE_UNMAPPED:
    hhdr = HDR(start);
    GC_printf("\t%p\tfree block of size 0x%lx bytes%s\n", start,
              (unsigned long)hhdr->hb_sz,
              type == GC_HEAP_SECTION_TYPE_UNMAPPED ? " (unmapped)" : "");
    actual_index = free_list_index_of(hhdr);
    correct_index = (int)GC_hblk_fl_from_blocks(divHBLKSZ(hhdr->hb_sz));
    if (-1 == actual_index) {
      GC_printf("\t\tBlock not on free list %d!!\n", correct_index);
    } else if (correct_index != actual_index) {
      GC_printf("\t\tBlock on list %d, should be on %d!!\n", actual_index,
                correct_index);
    }
    break;
  case GC_HEAP_SECTION_TYPE_USED:
    GC_printf("\t%p\tused for blocks of size 0x%lx bytes\n", start,
              (unsigned long)(ADDR(finish) - ADDR(start)));
    break;
  case GC_HEAP_SECTION_TYPE_PADDING:
    /* Empty. */
    break;
  }
}

GC_API void GC_CALL
GC_dump_regions(void)
{
  GC_foreach_heap_section_inner(dump_regions_proc, NULL);
}
#endif /* !NO_DEBUGGING */

/*
 * Initialize `hhdr` for a `block` containing the indicated size
 * `lb_adjusted` and `kind` of objects.  Return `FALSE` on failure.
 */
static GC_bool
setup_header(hdr *hhdr, struct hblk *block, size_t lb_adjusted, int kind,
             unsigned flags)
{
  const struct obj_kind *ok;
  word descr;

  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT(lb_adjusted >= ALIGNMENT);
#ifndef MARK_BIT_PER_OBJ
  if (lb_adjusted > MAXOBJBYTES)
    flags |= LARGE_BLOCK;
#endif
  ok = &GC_obj_kinds[kind];
#ifdef ENABLE_DISCLAIM
  if (ok->ok_disclaim_proc)
    flags |= HAS_DISCLAIM;
  if (ok->ok_mark_unconditionally)
    flags |= MARK_UNCONDITIONALLY;
#endif

  /* Set size, kind and mark procedure fields. */
  hhdr->hb_sz = lb_adjusted;
  hhdr->hb_obj_kind = (unsigned char)kind;
  hhdr->hb_flags = (unsigned char)flags;
  hhdr->hb_block = block;
  descr = ok->ok_descriptor;
#if ALIGNMENT > GC_DS_TAGS
  /*
   * An extra byte is not added in case of ignore-off-page allocated objects
   * not smaller than `HBLKSIZE`.
   */
  if (EXTRA_BYTES != 0 && (flags & IGNORE_OFF_PAGE) != 0 && kind == NORMAL
      && lb_adjusted >= HBLKSIZE)
    descr += ALIGNMENT; /*< or set to 0 */
#endif
  if (ok->ok_relocate_descr)
    descr += lb_adjusted;
  hhdr->hb_descr = descr;

#ifdef MARK_BIT_PER_OBJ
  /*
   * Set `hb_inv_sz` as portably as possible.  We set it to the smallest
   * value such that `lb_adjusted * inv_sz >= 2**32`.
   * This may be more precision than necessary.
   */
  if (lb_adjusted > MAXOBJBYTES) {
    hhdr->hb_inv_sz = LARGE_INV_SZ;
  } else {
    unsigned32 inv_sz;

    GC_ASSERT(lb_adjusted > 1);
#  if CPP_WORDSZ > 32
    inv_sz = (unsigned32)(((word)1 << 32) / lb_adjusted);
    if (((inv_sz * (word)lb_adjusted) >> 32) == 0)
      ++inv_sz;
#  else
    inv_sz = (((unsigned32)1 << 31) / lb_adjusted) << 1;
    while ((inv_sz * lb_adjusted) > lb_adjusted)
      inv_sz++;
#  endif
#  if (CPP_WORDSZ == 32) && defined(__GNUC__)
    GC_ASSERT(((1ULL << 32) + lb_adjusted - 1) / lb_adjusted == inv_sz);
#  endif
    hhdr->hb_inv_sz = inv_sz;
  }
#else
  {
    size_t lg = BYTES_TO_GRANULES(lb_adjusted);

    if (UNLIKELY(!GC_add_map_entry(lg))) {
      /* Make it look like a valid block. */
      hhdr->hb_sz = HBLKSIZE;
      hhdr->hb_descr = 0;
      hhdr->hb_flags |= LARGE_BLOCK;
      hhdr->hb_map = NULL;
      return FALSE;
    }
    hhdr->hb_map = GC_obj_map[(hhdr->hb_flags & LARGE_BLOCK) != 0 ? 0 : lg];
  }
#endif

  /* Clear mark bits. */
  GC_clear_hdr_marks(hhdr);

  hhdr->hb_last_reclaimed = (unsigned short)GC_gc_no;
  return TRUE;
}

/*
 * Remove `hhdr` from the free list (it is assumed to be specified by
 * `index`).
 */
STATIC void
GC_remove_from_fl_at(hdr *hhdr, size_t index)
{
  GC_ASSERT(modHBLKSZ(hhdr->hb_sz) == 0);
  if (hhdr->hb_prev == 0) {
    GC_ASSERT(HDR(GC_hblkfreelist[index]) == hhdr);
    GC_hblkfreelist[index] = hhdr->hb_next;
  } else {
    hdr *phdr;
    GET_HDR(hhdr->hb_prev, phdr);
    phdr->hb_next = hhdr->hb_next;
  }
  /* We always need index to maintain free counts. */
  GC_ASSERT(GC_free_bytes[index] >= hhdr->hb_sz);
  GC_free_bytes[index] -= hhdr->hb_sz;
  if (hhdr->hb_next != NULL) {
    hdr *nhdr;

    GC_ASSERT(!IS_FORWARDING_ADDR_OR_NIL(NHDR(hhdr)));
    GET_HDR(hhdr->hb_next, nhdr);
    nhdr->hb_prev = hhdr->hb_prev;
  }
}

/*
 * Remove `hhdr` from the appropriate free list (we assume it is on the
 * size-appropriate free list).
 */
GC_INLINE void
GC_remove_from_fl(hdr *hhdr)
{
  GC_remove_from_fl_at(hhdr, GC_hblk_fl_from_blocks(divHBLKSZ(hhdr->hb_sz)));
}

/* Return a pointer to the block ending just before `h`, if any. */
static struct hblk *
get_block_ending_at(struct hblk *h)
{
  struct hblk *p = h - 1;
  hdr *hhdr;

  GET_HDR(p, hhdr);
  if (hhdr != NULL) {
    return GC_find_starting_hblk(p, &hhdr);
  }
  p = GC_prev_block(p);
  if (p != NULL) {
    hhdr = HDR(p);
    if ((ptr_t)p + hhdr->hb_sz == (ptr_t)h) {
      return p;
    }
  }
  return NULL;
}

/* Return a pointer to the free block ending just before `h`, if any. */
STATIC struct hblk *
GC_free_block_ending_at(struct hblk *h)
{
  struct hblk *p = get_block_ending_at(h);

  if (p /* `!= NULL` */) { /*< CPPCHECK */
    const hdr *hhdr = HDR(p);

    if (HBLK_IS_FREE(hhdr)) {
      return p;
    }
  }
  return 0;
}

/*
 * Add `hhdr` to the appropriate free list.  We maintain individual
 * free lists sorted by address.
 */
STATIC void
GC_add_to_fl(struct hblk *h, hdr *hhdr)
{
  size_t index = GC_hblk_fl_from_blocks(divHBLKSZ(hhdr->hb_sz));
  struct hblk *second = GC_hblkfreelist[index];

#if defined(GC_ASSERTIONS) && !defined(USE_MUNMAP) && !defined(CHERI_PURECAP)
  {
    struct hblk *next = (struct hblk *)((ptr_t)h + hhdr->hb_sz);
    const hdr *nexthdr = HDR(next);
    struct hblk *prev = GC_free_block_ending_at(h);
    const hdr *prevhdr = HDR(prev);

    GC_ASSERT(NULL == nexthdr || !HBLK_IS_FREE(nexthdr)
              || (GC_heapsize & SIGNB) != 0);
    /* In the last case, blocks may be too large to be merged. */
    GC_ASSERT(NULL == prev || !HBLK_IS_FREE(prevhdr)
              || (GC_heapsize & SIGNB) != 0);
  }
#endif
  GC_ASSERT(modHBLKSZ(hhdr->hb_sz) == 0);
  GC_hblkfreelist[index] = h;
  GC_free_bytes[index] += hhdr->hb_sz;
  GC_ASSERT(GC_free_bytes[index] <= GC_large_free_bytes);
  hhdr->hb_next = second;
  hhdr->hb_prev = NULL;
  if (second /* `!= NULL` */) { /*< CPPCHECK */
    hdr *second_hdr;

    GET_HDR(second, second_hdr);
    second_hdr->hb_prev = h;
  }
  hhdr->hb_flags |= FREE_BLK;
}

#define BLOCKS_MERGE_OVERFLOW(hhdr, nexthdr) \
  ((((hhdr)->hb_sz + (nexthdr)->hb_sz) & SIZET_SIGNB) != 0)

#ifdef USE_MUNMAP

/*
 * `GC_unmap_old` will avoid creating more than this many unmapped regions,
 * but an unmapped region may be split again so exceeding the limit.
 */
#  ifdef COUNT_UNMAPPED_REGIONS

/*
 * Return the change in number of unmapped regions if the block `h` swaps
 * from its current state of mapped/unmapped to the opposite state.
 */
static int
calc_num_unmapped_regions_delta(struct hblk *h, hdr *hhdr)
{
  struct hblk *prev = get_block_ending_at(h);
  struct hblk *next;
  GC_bool prev_unmapped = FALSE;
  GC_bool next_unmapped = FALSE;

  next = GC_next_block((struct hblk *)((ptr_t)h + hhdr->hb_sz), TRUE);
  /* Ensure next is contiguous with `h`. */
  if (next != HBLK_PAGE_ALIGNED((ptr_t)h + hhdr->hb_sz)) {
    next = NULL;
  }
  if (prev != NULL) {
    const hdr *prevhdr = HDR(prev);
    prev_unmapped = !IS_MAPPED(prevhdr);
  }
  if (next != NULL) {
    const hdr *nexthdr = HDR(next);
    next_unmapped = !IS_MAPPED(nexthdr);
  }

  if (prev_unmapped && next_unmapped) {
    /*
     * If `h` is unmapped, merge two unmapped regions into one.
     * If `h` is remapped, split one unmapped region into two.
     */
    return IS_MAPPED(hhdr) ? -1 : 1;
  }
  if (!prev_unmapped && !next_unmapped) {
    /*
     * If `h` is unmapped, create an isolated unmapped region.
     * If `h` is remapped, remove it.
     */
    return IS_MAPPED(hhdr) ? 1 : -1;
  }
  /*
   * If `h` is unmapped, merge it with previous or next unmapped region.
   * If `h` is remapped, reduce either previous or next unmapped region.
   * In either way, no change to the number of unmapped regions.
   */
  return 0;
}
#  endif /* COUNT_UNMAPPED_REGIONS */

/*
 * Update `GC_num_unmapped_regions` assuming the block `h` changes from
 * its current state of mapped/unmapped to the opposite state.
 */
GC_INLINE void
GC_adjust_num_unmapped(struct hblk *h, hdr *hhdr)
{
#  ifdef COUNT_UNMAPPED_REGIONS
  GC_num_unmapped_regions += calc_num_unmapped_regions_delta(h, hhdr);
#  else
  UNUSED_ARG(h);
  UNUSED_ARG(hhdr);
#  endif
}

GC_INNER void
GC_unmap_old(unsigned threshold)
{
  size_t i;

#  ifdef COUNT_UNMAPPED_REGIONS
  /*
   * Skip unmapping if we have already exceeded the soft limit.
   * This forgoes any opportunities to merge unmapped regions though.
   */
  if (GC_num_unmapped_regions >= GC_UNMAPPED_REGIONS_SOFT_LIMIT)
    return;
#  endif

  for (i = 0; i <= N_HBLK_FLS; ++i) {
    struct hblk *h;
    hdr *hhdr;

    for (h = GC_hblkfreelist[i]; h != NULL; h = hhdr->hb_next) {
      hhdr = HDR(h);
      if (!IS_MAPPED(hhdr))
        continue;

      /*
       * Check that the interval is not smaller than the `threshold`.
       * The truncated counter value wrapping is handled correctly.
       */
      if ((unsigned short)(GC_gc_no - hhdr->hb_last_reclaimed)
          >= (unsigned short)threshold) {
#  ifdef COUNT_UNMAPPED_REGIONS
        /*
         * Continue with unmapping the block only if it will not create
         * too many unmapped regions, or if unmapping reduces the number
         * of regions.
         */
        int delta = calc_num_unmapped_regions_delta(h, hhdr);
        GC_signed_word regions = GC_num_unmapped_regions + delta;

        if (delta >= 0 && regions >= GC_UNMAPPED_REGIONS_SOFT_LIMIT) {
          GC_COND_LOG_PRINTF("Unmapped regions limit reached!\n");
          return;
        }
        GC_num_unmapped_regions = regions;
#  endif
        GC_unmap((ptr_t)h, hhdr->hb_sz);
        hhdr->hb_flags |= WAS_UNMAPPED;
      }
    }
  }
}

GC_INNER GC_bool
GC_merge_unmapped(void)
{
  size_t i;
  GC_bool merged = FALSE;

  for (i = 0; i <= N_HBLK_FLS; ++i) {
    struct hblk *h = GC_hblkfreelist[i];

    while (h != NULL) {
      struct hblk *next;
      hdr *hhdr, *nexthdr;
      size_t size, next_size;

      GET_HDR(h, hhdr);
      size = hhdr->hb_sz;
      next = (struct hblk *)((ptr_t)h + size);
      GET_HDR(next, nexthdr);
      /* Coalesce with successor, if possible. */
      if (NULL == nexthdr || !HBLK_IS_FREE(nexthdr)
          || BLOCKS_MERGE_OVERFLOW(hhdr, nexthdr)) {
        /* Not mergeable with the successor. */
        h = hhdr->hb_next;
        continue;
      }

      next_size = nexthdr->hb_sz;
#  ifdef CHERI_PURECAP
      /* FIXME: Coalesce with super-capability. */
      if (!CAPABILITY_COVERS_RANGE(h, ADDR(next), ADDR(next) + nextsize)) {
        h = hhdr->hb_next;
        continue;
      }
#  endif

      /*
       * Note that we usually try to avoid adjacent free blocks that are
       * either both mapped or both unmapped.  But that is not guaranteed
       * to hold since we remap blocks when we split them, and do not merge
       * at that point.  It may also not hold if the merged block would be
       * too big.
       */
      if (IS_MAPPED(hhdr) && !IS_MAPPED(nexthdr)) {
        /* Make both consistent, so that we can merge. */
        if (size > next_size) {
          GC_adjust_num_unmapped(next, nexthdr);
          GC_remap((ptr_t)next, next_size);
        } else {
          GC_adjust_num_unmapped(h, hhdr);
          GC_unmap((ptr_t)h, size);
          GC_unmap_gap((ptr_t)h, size, (ptr_t)next, next_size);
          hhdr->hb_flags |= WAS_UNMAPPED;
        }
      } else if (IS_MAPPED(nexthdr) && !IS_MAPPED(hhdr)) {
        if (size > next_size) {
          GC_adjust_num_unmapped(next, nexthdr);
          GC_unmap((ptr_t)next, next_size);
          GC_unmap_gap((ptr_t)h, size, (ptr_t)next, next_size);
        } else {
          GC_adjust_num_unmapped(h, hhdr);
          GC_remap((ptr_t)h, size);
          hhdr->hb_flags &= (unsigned char)~WAS_UNMAPPED;
          hhdr->hb_last_reclaimed = nexthdr->hb_last_reclaimed;
        }
      } else if (!IS_MAPPED(hhdr) && !IS_MAPPED(nexthdr)) {
        /* Unmap any gap in the middle. */
        GC_unmap_gap((ptr_t)h, size, (ptr_t)next, next_size);
      }
      /* If they are both unmapped, we merge, but leave unmapped. */
      GC_remove_from_fl_at(hhdr, i);
      GC_remove_from_fl(nexthdr);
      hhdr->hb_sz += nexthdr->hb_sz;
      GC_remove_header(next);
      GC_add_to_fl(h, hhdr);
      merged = TRUE;
      /* Start over at the beginning of list. */
      h = GC_hblkfreelist[i];
    }
  }
  return merged;
}

#endif /* USE_MUNMAP */

/*
 * Return a pointer to a block starting at `h`.  Memory for the block
 * is mapped.  Remove the block from its free list, and return the
 * remainder (if any) to its appropriate free list.  May fail by
 * returning `NULL`.  The header for the returned block must be set up
 * by the caller.  If the returned pointer is not `NULL`, then `hhdr`
 * is the header for it.
 */
STATIC struct hblk *
GC_get_first_part(struct hblk *h, hdr *hhdr, size_t size_needed, size_t index)
{
  size_t total_size;
  struct hblk *rest;
  hdr *rest_hdr;

  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT(modHBLKSZ(size_needed) == 0);
  total_size = hhdr->hb_sz;
  GC_ASSERT(modHBLKSZ(total_size) == 0);
  GC_remove_from_fl_at(hhdr, index);
  if (total_size == size_needed)
    return h;

  rest = (struct hblk *)((ptr_t)h + size_needed);
  rest_hdr = GC_install_header(rest);
  if (UNLIKELY(NULL == rest_hdr)) {
    /* FIXME: This is likely to be very bad news... */
    WARN("Header allocation failed: dropping block\n", 0);
    return NULL;
  }
  rest_hdr->hb_block = rest;
  rest_hdr->hb_sz = total_size - size_needed;
  rest_hdr->hb_flags = 0;
#ifdef GC_ASSERTIONS
  /* Mark `h` as non-free, to avoid assertion about adjacent free blocks. */
  hhdr->hb_flags &= (unsigned char)~FREE_BLK;
#endif
  GC_add_to_fl(rest, rest_hdr);
  return h;
}

/*
 * Split the block.  `hbp` is a free block; `last_hbp` points at address
 * inside it; a new header for `last_hbp` is assumed to be already set up.
 * Fix up the header of `hbp` to reflect the fact that it is being split,
 * move it to the appropriate free list.  `last_hbp` replaces `hbp` in the
 * original free list.  `last_hdr` is not completely filled in, since it
 * is about to be allocated.  It may, in fact, end up on the wrong free
 * list for its size.  That is not a disaster, since `last_hbp` is to be
 * allocated by our caller.  (Hence adding it to a free list is silly.
 * But this path is hopefully rare enough that it does not matter.
 * The code is cleaner this way.)
 */
STATIC void
GC_split_block(struct hblk *hbp, hdr *hhdr, struct hblk *last_hbp,
               hdr *last_hdr, size_t index /* of free list */)
{
  size_t h_size = (size_t)((ptr_t)last_hbp - (ptr_t)hbp);
  struct hblk *prev = hhdr->hb_prev;
  struct hblk *next = hhdr->hb_next;

  /* Replace `hbp` with `last_hbp` on its free list. */
  last_hdr->hb_prev = prev;
  last_hdr->hb_next = next;
  last_hdr->hb_block = last_hbp;
  last_hdr->hb_sz = hhdr->hb_sz - h_size;
  last_hdr->hb_flags = 0;
  if (prev /* `!= NULL` */) { /*< CPPCHECK */
    HDR(prev)->hb_next = last_hbp;
  } else {
    GC_hblkfreelist[index] = last_hbp;
  }
  if (next /* `!= NULL` */) {
    HDR(next)->hb_prev = last_hbp;
  }
  GC_ASSERT(GC_free_bytes[index] > h_size);
  GC_free_bytes[index] -= h_size;
#ifdef USE_MUNMAP
  hhdr->hb_last_reclaimed = (unsigned short)GC_gc_no;
#endif
  hhdr->hb_sz = h_size;
  GC_add_to_fl(hbp, hhdr);
  last_hdr->hb_flags |= FREE_BLK;
}

STATIC struct hblk *GC_allochblk_nth(size_t lb_adjusted, int kind,
                                     unsigned flags, size_t index,
                                     int may_split, size_t align_m1);

#ifdef USE_MUNMAP
#  define AVOID_SPLIT_REMAPPED 2
#endif

GC_INNER struct hblk *
GC_allochblk(size_t lb_adjusted, int kind,
             unsigned flags /* `IGNORE_OFF_PAGE` or 0 */, size_t align_m1)
{
  size_t blocks, start_list;
  struct hblk *result;
  int may_split;
  size_t split_limit; /* highest index of free list whose blocks we split */

  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT((lb_adjusted & (GC_GRANULE_BYTES - 1)) == 0);
  blocks = OBJ_SZ_TO_BLOCKS_CHECKED(lb_adjusted);
  if (UNLIKELY(SIZET_SAT_ADD(blocks * HBLKSIZE, align_m1)
               >= (GC_SIZE_MAX >> 1)))
    return NULL; /* overflow */

  start_list = GC_hblk_fl_from_blocks(blocks);
  /* Try for an exact match first. */
  result = GC_allochblk_nth(lb_adjusted, kind, flags, start_list, FALSE,
                            align_m1);
  if (result != NULL)
    return result;

  may_split = TRUE;
  if (GC_use_entire_heap || GC_dont_gc
      || GC_heapsize - GC_large_free_bytes < GC_requested_heapsize
      || GC_incremental || !GC_should_collect()) {
    /* Should use more of the heap, even if it requires splitting. */
    split_limit = N_HBLK_FLS;
  } else if (GC_finalizer_bytes_freed > (GC_heapsize >> 4)) {
    /*
     * If we are deallocating lots of memory from finalizers, then fail
     * and collect sooner rather than later.
     */
    split_limit = 0;
  } else {
    /*
     * If we have enough large blocks left to cover any previous request
     * for large blocks, we go ahead and split.  Assuming a steady state,
     * that should be safe.  It means that we can use the full heap
     * if we allocate only small objects.
     */
    split_limit = GC_enough_large_bytes_left();
#ifdef USE_MUNMAP
    if (split_limit > 0)
      may_split = AVOID_SPLIT_REMAPPED;
#endif
  }
  if (start_list < UNIQUE_THRESHOLD && 0 == align_m1) {
    /*
     * No reason to try `start_list` again, since all blocks are exact
     * matches.
     */
    ++start_list;
  }
  for (; start_list <= split_limit; ++start_list) {
    result = GC_allochblk_nth(lb_adjusted, kind, flags, start_list, may_split,
                              align_m1);
    if (result != NULL)
      break;
  }
  return result;
}

#define ALIGN_PAD_SZ(p, align_m1) \
  (((align_m1) + 1 - (size_t)ADDR(p)) & (align_m1))

static GC_bool
next_hblk_fits_better(const hdr *hhdr, size_t size_avail, size_t size_needed,
                      size_t align_m1)
{
  const hdr *nexthdr;
  size_t next_size;
  size_t next_ofs;
  struct hblk *next_hbp = hhdr->hb_next;

  if (NULL == next_hbp)
    return FALSE; /*< no next block */
  GET_HDR(next_hbp, nexthdr);
  next_size = nexthdr->hb_sz;
  if (size_avail <= next_size)
    return FALSE; /*< not enough size */

  next_ofs = ALIGN_PAD_SZ(next_hbp, align_m1);
  return next_size >= size_needed + next_ofs
#ifndef NO_BLACK_LISTING
         && !GC_is_black_listed(next_hbp + divHBLKSZ(next_ofs), size_needed)
#endif
      ;
}

static struct hblk *
find_nonbl_hblk(struct hblk *last_hbp, size_t size_remain,
                size_t eff_size_needed, size_t align_m1)
{
#ifdef NO_BLACK_LISTING
  UNUSED_ARG(size_remain);
  UNUSED_ARG(eff_size_needed);
  return last_hbp + divHBLKSZ(ALIGN_PAD_SZ(last_hbp, align_m1));
#else
  ptr_t search_end
      = PTR_ALIGN_DOWN((ptr_t)last_hbp + size_remain, align_m1 + 1);

  do {
    struct hblk *next_hbp;

    last_hbp += divHBLKSZ(ALIGN_PAD_SZ(last_hbp, align_m1));
    next_hbp = GC_is_black_listed(last_hbp, eff_size_needed);
    if (NULL == next_hbp)
      return last_hbp; /*< not black-listed */
    last_hbp = next_hbp;
  } while (ADDR_GE(search_end, (ptr_t)last_hbp));
  return NULL;
#endif
}

#ifndef NO_BLACK_LISTING
/*
 * Allocate and drop the block in small chunks, to maximize the chance
 * that we will recover some later.  `hhdr` should correspond to `hbp`.
 */
static void
drop_hblk_in_chunks(size_t n, struct hblk *hbp, hdr *hhdr)
{
  size_t total_size = hhdr->hb_sz;
  const struct hblk *limit = hbp + divHBLKSZ(total_size);

  GC_ASSERT(HDR(hbp) == hhdr);
  GC_ASSERT(modHBLKSZ(total_size) == 0 && total_size > 0);
  GC_large_free_bytes -= total_size;
  GC_bytes_dropped += total_size;
  GC_remove_from_fl_at(hhdr, n);
  do {
    (void)setup_header(hhdr, hbp, HBLKSIZE, PTRFREE, 0); /*< cannot fail */
    if (GC_debugging_started)
      BZERO(hbp, HBLKSIZE);
    hbp++;
    if (ADDR_GE(hbp, limit))
      break;

    hhdr = GC_install_header(hbp);
  } while (LIKELY(hhdr != NULL)); /*< no header allocation failure? */
}
#endif /* !NO_BLACK_LISTING */

#if defined(MPROTECT_VDB) && defined(DONT_PROTECT_PTRFREE)
static GC_bool
is_hblks_mix_in_page(struct hblk *hbp, GC_bool is_ptrfree)
{
  struct hblk *h = HBLK_PAGE_ALIGNED(hbp);
  size_t i, cnt = divHBLKSZ(GC_page_size);

  /*
   * Iterate over blocks in the page to check if all the occupied blocks
   * are pointer-free if we are going to allocate a pointer-free one,
   * and vice versa.
   */
  for (i = 0; i < cnt; i++) {
    hdr *hhdr;

    GET_HDR(&h[i], hhdr);
    if (NULL == hhdr)
      continue;
    (void)GC_find_starting_hblk(&h[i], &hhdr);
    if (!HBLK_IS_FREE(hhdr)) {
      /* It is OK to check only the first found occupied block. */
      return IS_PTRFREE(hhdr) != is_ptrfree;
    }
  }
  /* All blocks are free. */
  return FALSE;
}
#endif /* MPROTECT_VDB && DONT_PROTECT_PTRFREE */

/*
 * The same as `GC_allochblk`, but with search restricted to the
 * `index`-th free list.  `flags` should be `IGNORE_OFF_PAGE` or zero;
 * `may_split` indicates whether it is OK to split larger blocks; size
 * `lb_adjusted` is in bytes.  If `may_split` is set to
 * `AVOID_SPLIT_REMAPPED`, then memory remapping followed by splitting
 * should be generally avoided.  Rounded-up `lb_adjusted` plus
 * `align_m1` value should be less than `GC_SIZE_MAX / 2`.
 */
STATIC struct hblk *
GC_allochblk_nth(size_t lb_adjusted, int kind, unsigned flags, size_t index,
                 int may_split, size_t align_m1)
{
  struct hblk *hbp, *last_hbp;
  /* The header corresponding to `hbp`. */
  hdr *hhdr;
  /* Number of bytes in requested objects. */
  size_t size_needed = (lb_adjusted + HBLKSIZE - 1) & ~(HBLKSIZE - 1);

  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT(((align_m1 + 1) & align_m1) == 0 && lb_adjusted > 0);
  GC_ASSERT(0 == align_m1 || modHBLKSZ(align_m1 + 1) == 0);
#ifndef NO_BLACK_LISTING
retry:
#endif
  /* Search for a big enough block in free list. */
  for (hbp = GC_hblkfreelist[index];; hbp = hhdr->hb_next) {
    size_t size_avail; /*< bytes available in this block */
    size_t align_ofs;

    if (hbp /* `!= NULL` */) {
      /* CPPCHECK */
    } else {
      return NULL;
    }
    GET_HDR(hbp, hhdr); /*< set `hhdr` value */
    size_avail = hhdr->hb_sz;
    if (!may_split && size_avail != size_needed)
      continue;

    align_ofs = ALIGN_PAD_SZ(hbp, align_m1);
    if (size_avail < size_needed + align_ofs)
      continue; /*< the block is too small */

    if (size_avail != size_needed) {
      /*
       * If the next heap block is obviously better, go on.
       * This prevents us from disassembling a single large block to get
       * tiny blocks.
       */
      if (next_hblk_fits_better(hhdr, size_avail, size_needed, align_m1))
        continue;
    }

#if defined(MPROTECT_VDB) && defined(DONT_PROTECT_PTRFREE)
    /*
     * Avoid write-protecting pointer-free blocks (only the case
     * if page size is larger than the block size).
     */
    GC_ASSERT(GC_page_size != 0);
    if (GC_page_size != HBLKSIZE
        && (!GC_incremental /*< not enabled yet */
            || GC_incremental_protection_needs() != GC_PROTECTS_NONE)
        && is_hblks_mix_in_page(hbp, kind == PTRFREE))
      continue;
#endif

    if (IS_UNCOLLECTABLE(kind)
        || (kind == PTRFREE && size_needed <= MAX_BLACK_LIST_ALLOC)) {
      last_hbp = hbp + divHBLKSZ(align_ofs);
      break;
    }

    last_hbp = find_nonbl_hblk(
        hbp, size_avail - size_needed,
        (flags & IGNORE_OFF_PAGE) != 0 ? HBLKSIZE : size_needed, align_m1);
    /* Is non-black-listed part of enough size? */
    if (last_hbp != NULL) {
#ifdef USE_MUNMAP
      /* Avoid remapping followed by splitting. */
      if (may_split == AVOID_SPLIT_REMAPPED && last_hbp != hbp
          && !IS_MAPPED(hhdr))
        continue;
#endif
      break;
    }

#ifndef NO_BLACK_LISTING
    /*
     * The block is completely black-listed.  If so, we need to
     * drop some such blocks, since otherwise we spend all our
     * time traversing them if pointer-free blocks are unpopular.
     * A dropped block will be reconsidered at next collection.
     */
    if (size_needed == HBLKSIZE && 0 == align_m1 && !GC_find_leak_inner
        && IS_MAPPED(hhdr) && (++GC_drop_blacklisted_count & 3) == 0) {
      const struct hblk *prev = hhdr->hb_prev;

      drop_hblk_in_chunks(index, hbp, hhdr);
      if (NULL == prev)
        goto retry;
      /* Restore `hhdr` to point at free block. */
      hhdr = HDR(prev);
      continue;
    }

    if (size_needed > BL_LIMIT && size_avail - size_needed > BL_LIMIT) {
      /* Punt, since anything else risks unreasonable heap growth. */
      if (++GC_large_alloc_warn_suppressed >= GC_large_alloc_warn_interval) {
        WARN("Repeated allocation of very large block"
             " (appr. size %" WARN_PRIuPTR " KiB):\n"
             "\tMay lead to memory leak and poor performance\n",
             size_needed >> 10);
        GC_large_alloc_warn_suppressed = 0;
      }
      last_hbp = hbp + divHBLKSZ(align_ofs);
      break;
    }
#endif
  }

  GC_ASSERT((ADDR(last_hbp) & align_m1) == 0);
  if (last_hbp != hbp) {
    hdr *last_hdr = GC_install_header(last_hbp);

    if (UNLIKELY(NULL == last_hdr))
      return NULL;
#ifdef USE_MUNMAP
    /* Make sure it is mapped before we mangle it. */
    if (!IS_MAPPED(hhdr)) {
      GC_adjust_num_unmapped(hbp, hhdr);
      GC_remap((ptr_t)hbp, hhdr->hb_sz);
      hhdr->hb_flags &= (unsigned char)~WAS_UNMAPPED;
    }
#endif
    /* Split the block at `last_hbp`. */
    GC_split_block(hbp, hhdr, last_hbp, last_hdr, index);
    /*
     * We must now allocate `last_hbp`, since it may be on the wrong
     * free list.
     */
    hbp = last_hbp;
    hhdr = last_hdr;
  }
  GC_ASSERT(hhdr->hb_sz >= size_needed);

#ifdef USE_MUNMAP
  if (!IS_MAPPED(hhdr)) {
    GC_adjust_num_unmapped(hbp, hhdr);
    GC_remap((ptr_t)hbp, hhdr->hb_sz);
    hhdr->hb_flags &= (unsigned char)~WAS_UNMAPPED;
    /* Note: this may leave adjacent, mapped free blocks. */
  }
#endif
  /*
   * `hbp` may be on the wrong free list; the parameter `index` is
   * important.
   */
  hbp = GC_get_first_part(hbp, hhdr, size_needed, index);
  if (UNLIKELY(NULL == hbp))
    return NULL;

  /* Add it to map of valid blocks. */
  if (UNLIKELY(!GC_install_counts(hbp, size_needed)))
    return NULL; /*< note: this leaks memory under very rare conditions */

  /* Set up the header. */
  GC_ASSERT(HDR(hbp) == hhdr);
#ifdef MARK_BIT_PER_OBJ
  (void)setup_header(hhdr, hbp, lb_adjusted, kind, flags);
  /* Result is always `TRUE`, not checked to avoid a cppcheck warning. */
#else
  if (UNLIKELY(!setup_header(hhdr, hbp, lb_adjusted, kind, flags))) {
    GC_remove_counts(hbp, size_needed);
    return NULL; /*< ditto */
  }
#endif

#ifndef GC_DISABLE_INCREMENTAL
  /*
   * Notify virtual dirty bit implementation that we are about to write.
   * Ensure that pointer-free objects are not protected if it is avoidable.
   * This also ensures that newly allocated blocks are treated as
   * dirty - it is necessary since we do not protect free blocks.
   */
  GC_ASSERT(modHBLKSZ(size_needed) == 0);
  GC_remove_protection(hbp, divHBLKSZ(size_needed), IS_PTRFREE(hhdr));
#endif
  /*
   * We just successfully allocated a block.  Restart count of consecutive
   * failures.
   */
  GC_alloc_fail_count = 0;

  GC_large_free_bytes -= size_needed;
  GC_ASSERT(IS_MAPPED(hhdr));
  return hbp;
}

#ifdef VALGRIND_TRACKING
/*
 * Note: this is intentionally defined in a file other than `malloc.c`
 * and `reclaim.c` files.
 */
GC_ATTR_NOINLINE
GC_API void GC_CALLBACK
GC_free_profiler_hook(void *p)
{
#  ifndef PARALLEL_MARK
  GC_ASSERT(I_HOLD_LOCK());
#  endif
  /* Prevent treating this function by the compiler as a no-op one. */
  GC_noop1_ptr(p);
}
#endif /* VALGRIND_TRACKING */

GC_INNER void
GC_freehblk(struct hblk *hbp)
{
  struct hblk *next, *prev;
  hdr *hhdr, *prevhdr, *nexthdr;
  size_t size;

  GET_HDR(hbp, hhdr);
  size = HBLKSIZE * OBJ_SZ_TO_BLOCKS(hhdr->hb_sz);
  if ((size & SIZET_SIGNB) != 0) {
    /*
     * Probably possible if we try to allocate more than half the address
     * space at once.  If we do not catch it here, strange things happen
     * later.
     */
    ABORT("Deallocating excessively large block.  Too large an allocation?");
  }
  GC_remove_counts(hbp, size);
  hhdr->hb_sz = size;
#ifdef USE_MUNMAP
  hhdr->hb_last_reclaimed = (unsigned short)GC_gc_no;
#endif

  /* Check for duplicate deallocation in the easy case. */
  if (HBLK_IS_FREE(hhdr)) {
    ABORT_ARG1("Duplicate large block deallocation", " of %p", (void *)hbp);
  }

  GC_ASSERT(IS_MAPPED(hhdr));
  hhdr->hb_flags |= FREE_BLK;
  next = (struct hblk *)((ptr_t)hbp + size);
  GET_HDR(next, nexthdr);
  prev = GC_free_block_ending_at(hbp);
  /* Coalesce with successor, if possible. */
  if (nexthdr != NULL && HBLK_IS_FREE(nexthdr)
      && IS_MAPPED(nexthdr)
#ifdef CHERI_PURECAP
      /* FIXME: Coalesce with super-capability. */
      /*
       * Bounds of capability should span the entire coalesced memory;
       * bounds being larger than the block size is OK; bounded by the
       * imprecision of original capability obtained from system memory.
       */
      && CAPABILITY_COVERS_RANGE(hbp, ADDR(next), ADDR(next) + nexthdr->hb_sz)
#endif
      && !BLOCKS_MERGE_OVERFLOW(hhdr, nexthdr)) {
    GC_remove_from_fl(nexthdr);
    hhdr->hb_sz += nexthdr->hb_sz;
    GC_remove_header(next);
  }

  /* Coalesce with predecessor, if possible. */
  if (prev /* `!= NULL` */) { /*< CPPCHECK */
    prevhdr = HDR(prev);
    if (IS_MAPPED(prevhdr)
#ifdef CHERI_PURECAP
        /* FIXME: Coalesce with super-capability. */
        && cheri_base_get(hbp) <= ADDR(prev)
#endif
        && !BLOCKS_MERGE_OVERFLOW(prevhdr, hhdr)) {
      GC_remove_from_fl(prevhdr);
      prevhdr->hb_sz += hhdr->hb_sz;
#ifdef USE_MUNMAP
      prevhdr->hb_last_reclaimed = (unsigned short)GC_gc_no;
#endif
      GC_remove_header(hbp);
      hbp = prev;
      hhdr = prevhdr;
    }
  }
  /*
   * FIXME: It is not clear if we really always want to do these merges
   * with `USE_MUNMAP`, since it updates ages and hence prevents unmapping.
   */

  GC_large_free_bytes += size;
  GC_add_to_fl(hbp, hhdr);
}

/*
 * Copyright (c) 1988-1989 Hans-J. Boehm, Alan J. Demers
 * Copyright (c) 1991-1996 by Xerox Corporation.  All rights reserved.
 * Copyright (c) 1996-1999 by Silicon Graphics.  All rights reserved.
 * Copyright (c) 1999-2011 Hewlett-Packard Development Company, L.P.
 * Copyright (c) 2008-2025 Ivan Maidanski
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */


/*
 * Separate free lists are maintained for different sized objects up
 * to `MAXOBJBYTES`.
 * The call `GC_allocobj(lg, kind)` ensures that the free list for the given
 * kind of objects of the given size in granules is a nonempty one.
 * It returns a pointer to the first entry on the free list.
 * In a single-threaded world, `GC_allocobj` may be called to allocate
 * an object of small size `lb` (and `NORMAL` kind) as follows
 * (`GC_generic_malloc_inner` is a wrapper over `GC_allocobj` which also
 * fills in `GC_size_map` if needed):
 *
 * ```
 *   lg = GC_size_map[lb];
 *   op = GC_objfreelist[lg];
 *   if (NULL == op) {
 *     op = GC_generic_malloc_inner(lb, NORMAL, 0);
 *   } else {
 *     GC_objfreelist[lg] = obj_link(op);
 *     GC_bytes_allocd += GRANULES_TO_BYTES((word)lg);
 *   }
 * ```
 *
 * Note that this is very fast if the free list is not empty; it should
 * only involve the execution of 4 or 5 simple instructions.
 * All composite objects on freelists are cleared, except for
 * their first "pointer-sized" word.
 */

/*
 * The allocator uses `GC_allochblk` to allocate large chunks of objects.
 * These chunks all start on addresses that are multiples of `HBLKSIZE`.
 * Each allocated chunk has an associated header, which can be located
 * quickly based on the address of the chunk.  This makes it possible
 * to check quickly whether an arbitrary address corresponds to an object
 * administered by the allocator.  (See `headers.c` file for details.)
 */

/* Number of bytes not intended to be collected. */
word GC_non_gc_bytes = 0;

word GC_gc_no = 0;

#ifndef NO_CLOCK

GC_API void GC_CALL
GC_start_performance_measurement(void)
{
  GC_measure_performance = TRUE;
}

GC_API unsigned long GC_CALL
GC_get_full_gc_total_time(void)
{
  return GC_full_gc_total_time;
}

GC_API unsigned long GC_CALL
GC_get_stopped_mark_total_time(void)
{
  return GC_stopped_mark_total_time;
}

#  ifndef MAX_TOTAL_TIME_DIVISOR
/*
 * We shall not use big values here (so "outdated" delay time values would
 * have less impact on "average" delay time value than newer ones).
 */
#    define MAX_TOTAL_TIME_DIVISOR 1000
#  endif

GC_API unsigned long GC_CALL
GC_get_avg_stopped_mark_time_ns(void)
{
  unsigned long total_time;
  unsigned divisor;

  READER_LOCK();
  total_time = (unsigned long)GC_world_stopped_total_time;
  divisor = GC_world_stopped_total_divisor;
  READER_UNLOCK();
  if (0 == divisor) {
    GC_ASSERT(0 == total_time);
    /*
     * No world-stopped collection has occurred since the start of
     * performance measurements.
     */
    return 0;
  }

  /* Halve values to prevent overflow during the multiplication. */
  for (; total_time > ~0UL / (1000UL * 1000); total_time >>= 1) {
    divisor >>= 1;
    if (UNLIKELY(0 == divisor)) {
      /* The actual result is larger than representable value. */
      return ~0UL;
    }
  }

  return total_time * (1000UL * 1000) / divisor;
}

#endif /* !NO_CLOCK */

#ifndef GC_DISABLE_INCREMENTAL
GC_INNER GC_bool GC_incremental = FALSE; /*< by default, stop the world */
#endif

GC_API int GC_CALL
GC_is_incremental_mode(void)
{
  return (int)GC_incremental;
}

#ifdef THREADS
int GC_parallel = FALSE; /*< parallel collection is off by default */
#endif

#if defined(GC_FULL_FREQ) && !defined(CPPCHECK)
int GC_full_freq = GC_FULL_FREQ;
#else
/*
 * Every `GC_full_freq + 1` collection is a full collection, whether we
 * need it or not.
 */
int GC_full_freq = 19;
#endif

#ifdef THREAD_LOCAL_ALLOC
GC_INNER GC_bool GC_world_stopped = FALSE;
#endif

STATIC GC_bool GC_disable_automatic_collection = FALSE;

GC_API void GC_CALL
GC_set_disable_automatic_collection(int value)
{
  LOCK();
  GC_disable_automatic_collection = (GC_bool)value;
  UNLOCK();
}

GC_API int GC_CALL
GC_get_disable_automatic_collection(void)
{
  int value;

  READER_LOCK();
  value = (int)GC_disable_automatic_collection;
  READER_UNLOCK();
  return value;
}

/*
 * The version macros are now defined in `gc_version.h` file, which is
 * included by `gc.h` file, which, in turn, is included by `gc_priv.h` file.
 */
#ifndef GC_NO_VERSION_VAR
EXTERN_C_BEGIN
extern const GC_VERSION_VAL_T GC_version;
EXTERN_C_END

const GC_VERSION_VAL_T GC_version = ((GC_VERSION_VAL_T)GC_VERSION_MAJOR << 16)
                                    | (GC_VERSION_MINOR << 8)
                                    | GC_VERSION_MICRO;
#endif

GC_API GC_VERSION_VAL_T GC_CALL
GC_get_version(void)
{
  return ((GC_VERSION_VAL_T)GC_VERSION_MAJOR << 16) | (GC_VERSION_MINOR << 8)
         | GC_VERSION_MICRO;
}

GC_API int GC_CALL
GC_get_dont_add_byte_at_end(void)
{
#ifdef DONT_ADD_BYTE_AT_END
  return 1;
#else
  /* This is meaningful only if `GC_all_interior_pointers`. */
  return 0;
#endif
}

/* Some more variables. */

#ifdef GC_DONT_EXPAND
int GC_dont_expand = TRUE;
#else
int GC_dont_expand = FALSE;
#endif

#if defined(GC_FREE_SPACE_DIVISOR) && !defined(CPPCHECK)
word GC_free_space_divisor = GC_FREE_SPACE_DIVISOR; /*< must be positive */
#else
word GC_free_space_divisor = 3;
#endif

GC_INNER int GC_CALLBACK
GC_never_stop_func(void)
{
  return FALSE;
}

#if defined(GC_TIME_LIMIT) && !defined(CPPCHECK)
/*
 * We try to keep pause times from exceeding this by much.
 * In milliseconds.
 */
unsigned long GC_time_limit = GC_TIME_LIMIT;
#elif defined(PARALLEL_MARK)
/*
 * The parallel marker cannot be interrupted for now, so the time limit
 * is absent by default.
 */
unsigned long GC_time_limit = GC_TIME_UNLIMITED;
#else
unsigned long GC_time_limit = 15;
#endif

#ifndef NO_CLOCK
/*
 * The nanoseconds add-on to `GC_time_limit` value.
 * Not updated by `GC_set_time_limit()`.
 * Ignored if the value of `GC_time_limit` is `GC_TIME_UNLIMITED`.
 */
STATIC unsigned long GC_time_lim_nsec = 0;

#  define TV_NSEC_LIMIT (1000UL * 1000) /*< amount of nanoseconds in 1 ms */

GC_API void GC_CALL
GC_set_time_limit_tv(struct GC_timeval_s tv)
{
  GC_ASSERT(tv.tv_ms <= GC_TIME_UNLIMITED);
  GC_ASSERT(tv.tv_nsec < TV_NSEC_LIMIT);
  GC_time_limit = tv.tv_ms;
  GC_time_lim_nsec = tv.tv_nsec;
}

GC_API struct GC_timeval_s GC_CALL
GC_get_time_limit_tv(void)
{
  struct GC_timeval_s tv;

  tv.tv_ms = GC_time_limit;
  tv.tv_nsec = GC_time_lim_nsec;
  return tv;
}

/* Time at which we stopped world.  Used only by `GC_timeout_stop_func()`. */
STATIC CLOCK_TYPE GC_start_time = CLOCK_TYPE_INITIALIZER;
#endif /* !NO_CLOCK */

/* Number of attempts at finishing collection within `GC_time_limit`. */
STATIC int GC_n_attempts = 0;

/* Note: accessed holding the allocator lock. */
STATIC GC_stop_func GC_default_stop_func = GC_never_stop_func;

GC_API void GC_CALL
GC_set_stop_func(GC_stop_func stop_func)
{
  GC_ASSERT(NONNULL_ARG_NOT_NULL(stop_func));
  LOCK();
  GC_default_stop_func = stop_func;
  UNLOCK();
}

GC_API GC_stop_func GC_CALL
GC_get_stop_func(void)
{
  GC_stop_func stop_func;

  READER_LOCK();
  stop_func = GC_default_stop_func;
  READER_UNLOCK();
  return stop_func;
}

#if defined(GC_DISABLE_INCREMENTAL) || defined(NO_CLOCK)
#  define GC_timeout_stop_func GC_default_stop_func
#else
STATIC int GC_CALLBACK
GC_timeout_stop_func(void)
{
  CLOCK_TYPE current_time;
  static unsigned count = 0;
  unsigned long time_diff, nsec_diff;

  GC_ASSERT(I_HOLD_LOCK());
  if (GC_default_stop_func())
    return TRUE;

  if (GC_time_limit == GC_TIME_UNLIMITED || (count++ & 3) != 0)
    return FALSE;

  GET_TIME(current_time);
  time_diff = MS_TIME_DIFF(current_time, GC_start_time);
  nsec_diff = NS_FRAC_TIME_DIFF(current_time, GC_start_time);
#  if defined(CPPCHECK)
  GC_noop1_ptr(&nsec_diff);
#  endif
  if (time_diff >= GC_time_limit
      && (time_diff > GC_time_limit || nsec_diff >= GC_time_lim_nsec)) {
    GC_COND_LOG_PRINTF("Abandoning stopped marking after %lu ms %lu ns"
                       " (attempt %d)\n",
                       time_diff, nsec_diff, GC_n_attempts);
    return TRUE;
  }

  return FALSE;
}
#endif /* !GC_DISABLE_INCREMENTAL */

#ifdef THREADS
GC_INNER word GC_total_stacksize = 0;
#endif

/* The lowest value returned by `min_bytes_allocd()`. */
static size_t min_bytes_allocd_minimum = 1;

GC_API void GC_CALL
GC_set_min_bytes_allocd(size_t value)
{
  GC_ASSERT(value > 0);
  min_bytes_allocd_minimum = value;
}

GC_API size_t GC_CALL
GC_get_min_bytes_allocd(void)
{
  return min_bytes_allocd_minimum;
}

/*
 * Return the minimum number of bytes that must be allocated between
 * collections to amortize the cost of the latter.  Should be nonzero.
 */
static word
min_bytes_allocd(void)
{
  word result;
  word stack_size;
  /*
   * Total size of roots, it includes double stack size, since the stack
   * is expensive to scan.
   */
  word total_root_size;
  /* Estimate of memory to be scanned during normal collection. */
  word scan_size;

  GC_ASSERT(I_HOLD_LOCK());
#ifdef THREADS
  if (GC_need_to_lock) {
    /* We are multi-threaded... */
    stack_size = GC_total_stacksize;
    /*
     * For now, we just use the value computed during the latest garbage
     * collection.
     */
#  ifdef DEBUG_THREADS
    GC_log_printf("Total stacks size: %lu\n", (unsigned long)stack_size);
#  endif
  } else
#endif
  /* else */ {
#ifdef STACK_NOT_SCANNED
    stack_size = 0;
#elif defined(STACK_GROWS_UP)
    stack_size = (word)(GC_approx_sp() - GC_stackbottom);
#else
    stack_size = (word)(GC_stackbottom - GC_approx_sp());
#endif
  }

  total_root_size = 2 * stack_size + GC_root_size;
  scan_size = 2 * GC_composite_in_use + GC_atomic_in_use / 4 + total_root_size;
  result = scan_size / GC_free_space_divisor;
  if (GC_incremental) {
    result /= 2;
  }
  return result > min_bytes_allocd_minimum ? result : min_bytes_allocd_minimum;
}

/*
 * Return the number of bytes allocated, adjusted for explicit storage
 * management, etc.  This number is used in deciding when to trigger
 * collections.
 */
STATIC word
GC_adj_bytes_allocd(void)
{
  GC_signed_word result;
  GC_signed_word expl_managed = (GC_signed_word)GC_non_gc_bytes
                                - (GC_signed_word)GC_non_gc_bytes_at_gc;

  /*
   * Do not count what was explicitly freed, or newly allocated for
   * explicit management.  Note that deallocating an explicitly managed
   * object should not alter result, assuming the client is playing by
   * the rules.
   */
  result = (GC_signed_word)GC_bytes_allocd + (GC_signed_word)GC_bytes_dropped
           - (GC_signed_word)GC_bytes_freed
           + (GC_signed_word)GC_finalizer_bytes_freed - expl_managed;
  if (result > (GC_signed_word)GC_bytes_allocd) {
    /* Probably a client bug or unfortunate scheduling. */
    result = (GC_signed_word)GC_bytes_allocd;
  }
  /*
   * We count objects enqueued for finalization as though they had been
   * reallocated this round.  Finalization is visible to user.
   * And if we do not count this, we have stability problems for programs
   * that finalize all objects.
   */
  result += (GC_signed_word)GC_bytes_finalized;
  if (result < (GC_signed_word)(GC_bytes_allocd >> 3)) {
    /*
     * Always count at least 1/8 of the allocations.  We do not want to
     * collect too infrequently, since that would inhibit coalescing of
     * free storage blocks.  This also makes us partially robust against
     * client bugs.
     */
    result = (GC_signed_word)(GC_bytes_allocd >> 3);
  }
  return (word)result;
}

/*
 * Clear up a few frames worth of garbage left at the top of the stack.
 * This is used to prevent us from accidentally treating garbage left
 * on the stack by other parts of the collector as roots.
 * This differs from the code in `misc.c` file, which actually tries
 * to keep the stack clear of long-lived, client-generated garbage.
 */
STATIC void
GC_clear_a_few_frames(void)
{
#ifndef CLEAR_STACK_NPTRS
#  define CLEAR_STACK_NPTRS 64 /*< pointers */
#endif
  volatile ptr_t frames[CLEAR_STACK_NPTRS];

  BZERO(CAST_AWAY_VOLATILE_PVOID(frames), sizeof(frames));
}

GC_API void GC_CALL
GC_start_incremental_collection(void)
{
#ifndef GC_DISABLE_INCREMENTAL
  LOCK();
  if (GC_incremental) {
    GC_should_start_incremental_collection = TRUE;
    if (!GC_dont_gc) {
      GC_collect_a_little_inner(1);
    }
  }
  UNLOCK();
#endif
}

GC_INNER GC_bool
GC_should_collect(void)
{
  static word last_min_bytes_allocd, last_gc_no;

  GC_ASSERT(I_HOLD_LOCK());
  if (last_gc_no != GC_gc_no) {
    last_min_bytes_allocd = min_bytes_allocd();
    last_gc_no = GC_gc_no;
  }
#ifndef GC_DISABLE_INCREMENTAL
  if (GC_should_start_incremental_collection) {
    GC_should_start_incremental_collection = FALSE;
    return TRUE;
  }
#endif
  if (GC_disable_automatic_collection)
    return FALSE;

  if (GC_last_heap_growth_gc_no == GC_gc_no) {
    /* Avoid expanding past limits used by black-listing. */
    return TRUE;
  }

  return GC_adj_bytes_allocd() >= last_min_bytes_allocd;
}

/*
 * Called at start of full collections.  Not called if zero.
 * Called with the allocator lock held.  Not used by the collector itself.
 */
/* `STATIC` */ GC_start_callback_proc GC_start_call_back = 0;

GC_API void GC_CALL
GC_set_start_callback(GC_start_callback_proc fn)
{
  LOCK();
  GC_start_call_back = fn;
  UNLOCK();
}

GC_API GC_start_callback_proc GC_CALL
GC_get_start_callback(void)
{
  GC_start_callback_proc fn;

  READER_LOCK();
  fn = GC_start_call_back;
  READER_UNLOCK();
  return fn;
}

GC_INLINE void
GC_notify_full_gc(void)
{
  if (GC_start_call_back != 0) {
    (*GC_start_call_back)();
  }
}

STATIC GC_bool GC_is_full_gc = FALSE;

STATIC GC_bool GC_stopped_mark(GC_stop_func stop_func);
STATIC void GC_finish_collection(void);

/*
 * Initiate a garbage collection if appropriate.  Choose judiciously
 * between partial, full, and stop-world collections.
 */
STATIC void
GC_maybe_gc(void)
{
  static int n_partial_gcs = 0;

  GC_ASSERT(I_HOLD_LOCK());
  ASSERT_CANCEL_DISABLED();
  if (!GC_should_collect())
    return;

  if (!GC_incremental) {
    GC_gcollect_inner();
    return;
  }

  GC_ASSERT(!GC_collection_in_progress());
#ifdef PARALLEL_MARK
  if (GC_parallel)
    GC_wait_for_reclaim();
#endif
  if (GC_need_full_gc || n_partial_gcs >= GC_full_freq) {
    GC_COND_LOG_PRINTF(
        "***>Full mark for collection #%lu after %lu allocd bytes\n",
        (unsigned long)GC_gc_no + 1, (unsigned long)GC_bytes_allocd);
    GC_notify_full_gc();
    ENTER_GC();
    GC_promote_black_lists();
    (void)GC_reclaim_all((GC_stop_func)0, TRUE);
    GC_clear_marks();
    EXIT_GC();
    n_partial_gcs = 0;
    GC_is_full_gc = TRUE;
  } else {
    n_partial_gcs++;
  }

  /*
   * Try to mark with the world stopped.  If we run out of time, then this
   * turns into an incremental marking.
   */
#ifndef NO_CLOCK
  if (GC_time_limit != GC_TIME_UNLIMITED)
    GET_TIME(GC_start_time);
#endif
  if (GC_stopped_mark(GC_timeout_stop_func)) {
    SAVE_CALLERS_TO_LAST_STACK();
    GC_finish_collection();
  } else if (!GC_is_full_gc) {
    /* Count this as the first attempt. */
    GC_n_attempts++;
  }
}

STATIC GC_on_collection_event_proc GC_on_collection_event = 0;

GC_API void GC_CALL
GC_set_on_collection_event(GC_on_collection_event_proc fn)
{
  /* `fn` may be 0 (means no event notifier). */
  LOCK();
  GC_on_collection_event = fn;
  UNLOCK();
}

GC_API GC_on_collection_event_proc GC_CALL
GC_get_on_collection_event(void)
{
  GC_on_collection_event_proc fn;

  READER_LOCK();
  fn = GC_on_collection_event;
  READER_UNLOCK();
  return fn;
}

GC_INNER GC_bool
GC_try_to_collect_inner(GC_stop_func stop_func)
{
#ifndef NO_CLOCK
  CLOCK_TYPE start_time = CLOCK_TYPE_INITIALIZER;
  GC_bool start_time_valid;
#endif

  ASSERT_CANCEL_DISABLED();
  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT(GC_is_initialized);
  if (GC_dont_gc || (*stop_func)())
    return FALSE;
  if (GC_on_collection_event)
    GC_on_collection_event(GC_EVENT_START);
  if (GC_incremental && GC_collection_in_progress()) {
    GC_COND_LOG_PRINTF(
        "GC_try_to_collect_inner: finishing collection in progress\n");
    /* Just finish collection already in progress. */
    do {
      if ((*stop_func)()) {
        /* TODO: Notify `GC_EVENT_ABANDON`. */
        return FALSE;
      }
      GC_collect_a_little_inner(1);
    } while (GC_collection_in_progress());
  }
  GC_notify_full_gc();
#ifndef NO_CLOCK
  start_time_valid = FALSE;
  if ((GC_print_stats | (int)GC_measure_performance) != 0) {
    if (GC_print_stats)
      GC_log_printf("Initiating full world-stop collection!\n");
    start_time_valid = TRUE;
    GET_TIME(start_time);
  }
#endif
  GC_promote_black_lists();
  /*
   * Make sure all blocks have been reclaimed, so sweep routines do not
   * see cleared mark bits.  If we are guaranteed to finish, then this
   * is unnecessary.  In the find-leak case, we have to finish to
   * guarantee that previously unmarked objects are not reported as leaks.
   */
#ifdef PARALLEL_MARK
  if (GC_parallel)
    GC_wait_for_reclaim();
#endif
  ENTER_GC();
  if ((GC_find_leak_inner || stop_func != GC_never_stop_func)
      && !GC_reclaim_all(stop_func, FALSE)) {
    /* Aborted.  So far everything is still consistent. */
    EXIT_GC();
    /* TODO: Notify `GC_EVENT_ABANDON`. */
    return FALSE;
  }
  GC_invalidate_mark_state(); /*< flush mark stack */
  GC_clear_marks();
  SAVE_CALLERS_TO_LAST_STACK();
  GC_is_full_gc = TRUE;
  EXIT_GC();
  if (!GC_stopped_mark(stop_func)) {
    if (!GC_incremental) {
      /*
       * We are partially done and have no way to complete or use
       * current work.  Reestablish invariants as cheaply as possible.
       */
      GC_invalidate_mark_state();
      GC_unpromote_black_lists();
    } else {
      /*
       * We claim the world is already (or still) consistent.
       * We will finish incrementally.
       */
    }
    /* TODO: Notify `GC_EVENT_ABANDON`. */
    return FALSE;
  }
  GC_finish_collection();
#ifndef NO_CLOCK
  if (start_time_valid) {
    CLOCK_TYPE current_time;
    unsigned long time_diff, ns_frac_diff;

    GET_TIME(current_time);
    time_diff = MS_TIME_DIFF(current_time, start_time);
    ns_frac_diff = NS_FRAC_TIME_DIFF(current_time, start_time);
    if (GC_measure_performance) {
      GC_full_gc_total_time += time_diff; /*< may wrap */
      GC_full_gc_total_ns_frac += (unsigned32)ns_frac_diff;
      if (GC_full_gc_total_ns_frac >= (unsigned32)1000000UL) {
        /* Overflow of the nanoseconds part. */
        GC_full_gc_total_ns_frac -= (unsigned32)1000000UL;
        GC_full_gc_total_time++;
      }
    }
    if (GC_print_stats)
      GC_log_printf("Complete collection took %lu ms %lu ns\n", time_diff,
                    ns_frac_diff);
  }
#endif
  if (GC_on_collection_event)
    GC_on_collection_event(GC_EVENT_END);
  return TRUE;
}

/* The default value of `GC_rate`. */
#ifndef GC_RATE
#  define GC_RATE 10
#endif

/*
 * When `GC_collect_a_little_inner()` performs `n_blocks` units of garbage
 * collection work, a unit is intended to touch roughly `GC_rate` pages.
 * (But, every once in a while, we do more than that.)  This needs to be
 * a fairly large number with our current incremental collection strategy,
 * since otherwise we allocate too much during garbage collection, and
 * the cleanup gets expensive.
 */
STATIC unsigned GC_rate = GC_RATE;

GC_API void GC_CALL
GC_set_rate(int value)
{
  GC_ASSERT(value > 0);
  GC_rate = (unsigned)value;
}

GC_API int GC_CALL
GC_get_rate(void)
{
  return (int)GC_rate;
}

/* The default maximum number of prior attempts at world stop marking. */
#ifndef MAX_PRIOR_ATTEMPTS
#  define MAX_PRIOR_ATTEMPTS 3
#endif

/*
 * The maximum number of prior attempts at world stop marking.
 * A value of 1 means that we finish the second time, no matter how long
 * it takes.  Does not count the initial root scan for a full collection.
 */
static int max_prior_attempts = MAX_PRIOR_ATTEMPTS;

GC_API void GC_CALL
GC_set_max_prior_attempts(int value)
{
  GC_ASSERT(value >= 0);
  max_prior_attempts = value;
}

GC_API int GC_CALL
GC_get_max_prior_attempts(void)
{
  return max_prior_attempts;
}

GC_INNER void
GC_collect_a_little_inner(size_t n_blocks)
{
  IF_CANCEL(int cancel_state;)

  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT(GC_is_initialized);
  DISABLE_CANCEL(cancel_state);
  if (GC_incremental && GC_collection_in_progress()) {
    size_t i;
    size_t max_deficit = GC_rate * n_blocks;

    ENTER_GC();
#ifdef PARALLEL_MARK
    if (GC_time_limit != GC_TIME_UNLIMITED)
      GC_parallel_mark_disabled = TRUE;
#endif
    for (i = GC_mark_deficit; i < max_deficit; i++) {
      if (GC_mark_some(NULL))
        break;
    }
#ifdef PARALLEL_MARK
    GC_parallel_mark_disabled = FALSE;
#endif
    EXIT_GC();

    if (i < max_deficit && !GC_dont_gc) {
      GC_ASSERT(!GC_collection_in_progress());
      /* Need to follow up with a full collection. */
      SAVE_CALLERS_TO_LAST_STACK();
#ifdef PARALLEL_MARK
      if (GC_parallel)
        GC_wait_for_reclaim();
#endif
#ifndef NO_CLOCK
      if (GC_time_limit != GC_TIME_UNLIMITED
          && GC_n_attempts < max_prior_attempts)
        GET_TIME(GC_start_time);
#endif
      if (GC_stopped_mark(GC_n_attempts < max_prior_attempts
                              ? GC_timeout_stop_func
                              : GC_never_stop_func)) {
        GC_finish_collection();
      } else {
        GC_n_attempts++;
      }
    }
    if (GC_mark_deficit > 0) {
      GC_mark_deficit
          = GC_mark_deficit > max_deficit ? GC_mark_deficit - max_deficit : 0;
    }
  } else if (!GC_dont_gc) {
    GC_maybe_gc();
  }
  RESTORE_CANCEL(cancel_state);
}

#if !defined(NO_FIND_LEAK) || !defined(SHORT_DBG_HDRS)
GC_INNER void (*GC_check_heap)(void) = 0;
GC_INNER void (*GC_print_all_smashed)(void) = 0;
#endif

GC_API int GC_CALL
GC_collect_a_little(void)
{
  int result;

  if (UNLIKELY(!GC_is_initialized))
    GC_init();
  LOCK();
  /*
   * Note: if the collection is in progress, this may do marking (not
   * stopping the world) even in case of disabled garbage collection.
   */
  GC_collect_a_little_inner(1);
  result = (int)GC_collection_in_progress();
  UNLOCK();
  if (GC_debugging_started && !result)
    GC_print_all_smashed();
  return result;
}

#ifdef THREADS
GC_API void GC_CALL
GC_stop_world_external(void)
{
  GC_ASSERT(GC_is_initialized);
  LOCK();
#  ifdef THREAD_LOCAL_ALLOC
  GC_ASSERT(!GC_world_stopped);
#  endif
  ENTER_GC();
  STOP_WORLD();
#  ifdef THREAD_LOCAL_ALLOC
  GC_world_stopped = TRUE;
#  endif
}

GC_API void GC_CALL
GC_start_world_external(void)
{
#  ifdef THREAD_LOCAL_ALLOC
  GC_ASSERT(GC_world_stopped);
  GC_world_stopped = FALSE;
#  else
  GC_ASSERT(GC_is_initialized);
#  endif
  START_WORLD();
  EXIT_GC();
  UNLOCK();
}
#endif /* THREADS */

#ifdef USE_MUNMAP
#  ifndef MUNMAP_THRESHOLD
#    define MUNMAP_THRESHOLD 7
#  endif
GC_INNER unsigned GC_unmap_threshold = MUNMAP_THRESHOLD;

#  define IF_USE_MUNMAP(x) x
#  define COMMA_IF_USE_MUNMAP(x) /* comma */ , x
#else
#  define IF_USE_MUNMAP(x)
#  define COMMA_IF_USE_MUNMAP(x)
#endif /* !USE_MUNMAP */

/*
 * We stop the world and mark from all roots.  If `stop_func()` ever
 * returns `TRUE`, we may fail and return `FALSE`.  Increment `GC_gc_no`
 * if we succeed.
 */
STATIC GC_bool
GC_stopped_mark(GC_stop_func stop_func)
{
  ptr_t cold_gc_frame = GC_approx_sp();
  unsigned abandoned_at;
#ifndef NO_CLOCK
  CLOCK_TYPE start_time = CLOCK_TYPE_INITIALIZER;
  GC_bool start_time_valid = FALSE;
#endif

  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT(GC_is_initialized);
  ENTER_GC();
#if !defined(REDIRECT_MALLOC) && defined(USE_WINALLOC)
  GC_add_current_malloc_heap();
#endif
#if defined(REGISTER_LIBRARIES_EARLY)
  GC_cond_register_dynamic_libraries();
#endif

#if !defined(GC_NO_FINALIZATION) && !defined(GC_TOGGLE_REFS_NOT_NEEDED)
  GC_process_togglerefs();
#endif

  /* Output blank line for convenience here. */
  GC_COND_LOG_PRINTF(
      "\n--> Marking for collection #%lu after %lu allocated bytes\n",
      (unsigned long)GC_gc_no + 1, (unsigned long)GC_bytes_allocd);
#ifndef NO_CLOCK
  if (GC_PRINT_STATS_FLAG || GC_measure_performance) {
    GET_TIME(start_time);
    start_time_valid = TRUE;
  }
#endif
#ifdef THREADS
  if (GC_on_collection_event)
    GC_on_collection_event(GC_EVENT_PRE_STOP_WORLD);
#endif
  STOP_WORLD();
#ifdef THREADS
  if (GC_on_collection_event)
    GC_on_collection_event(GC_EVENT_POST_STOP_WORLD);
#  ifdef THREAD_LOCAL_ALLOC
  GC_world_stopped = TRUE;
#  elif defined(CPPCHECK)
  /* Workaround a warning about adjacent same `if` condition. */
  (void)0;
#  endif
#endif

#ifdef MAKE_BACK_GRAPH
  if (GC_print_back_height) {
    GC_build_back_graph();
  }
#endif

  /* Notify about marking from all roots. */
  if (GC_on_collection_event)
    GC_on_collection_event(GC_EVENT_MARK_START);

  /* Minimize junk left in my registers and on the stack. */
  GC_clear_a_few_frames();
  GC_noop6(0, 0, 0, 0, 0, 0);

  GC_initiate_gc();
#ifdef PARALLEL_MARK
  if (stop_func != GC_never_stop_func)
    GC_parallel_mark_disabled = TRUE;
#endif
  for (abandoned_at = 1; !(*stop_func)(); abandoned_at++) {
    if (GC_mark_some(cold_gc_frame)) {
#ifdef PARALLEL_MARK
      if (GC_parallel && GC_parallel_mark_disabled) {
        GC_COND_LOG_PRINTF("Stopped marking done after %u iterations"
                           " with disabled parallel marker\n",
                           abandoned_at - 1);
      }
#endif
      abandoned_at = 0;
      break;
    }
  }
#ifdef PARALLEL_MARK
  GC_parallel_mark_disabled = FALSE;
#endif

  if (abandoned_at > 0) {
    /* Give the mutator a chance. */
    GC_mark_deficit = abandoned_at - 1;
    /* TODO: Notify `GC_EVENT_MARK_ABANDON`. */
  } else {
    GC_gc_no++;
    /* Check all debugged objects for consistency. */
    if (GC_debugging_started)
      GC_check_heap();
    if (GC_on_collection_event)
      GC_on_collection_event(GC_EVENT_MARK_END);
  }

#ifdef THREADS
  if (GC_on_collection_event)
    GC_on_collection_event(GC_EVENT_PRE_START_WORLD);
#endif
#ifdef THREAD_LOCAL_ALLOC
  GC_world_stopped = FALSE;
#endif
  START_WORLD();
#ifdef THREADS
  if (GC_on_collection_event)
    GC_on_collection_event(GC_EVENT_POST_START_WORLD);
#endif

#ifndef NO_CLOCK
  if (start_time_valid) {
    CLOCK_TYPE current_time;
    unsigned long time_diff, ns_frac_diff;

    /* TODO: Avoid code duplication from `GC_try_to_collect_inner`. */
    GET_TIME(current_time);
    time_diff = MS_TIME_DIFF(current_time, start_time);
    ns_frac_diff = NS_FRAC_TIME_DIFF(current_time, start_time);
    if (GC_measure_performance) {
      GC_stopped_mark_total_time += time_diff; /*< may wrap */
      GC_stopped_mark_total_ns_frac += (unsigned32)ns_frac_diff;
      if (GC_stopped_mark_total_ns_frac >= (unsigned32)1000000UL) {
        GC_stopped_mark_total_ns_frac -= (unsigned32)1000000UL;
        GC_stopped_mark_total_time++;
      }
    }

    if (GC_PRINT_STATS_FLAG || GC_measure_performance) {
      unsigned total_time = GC_world_stopped_total_time;
      unsigned divisor = GC_world_stopped_total_divisor;

      /* Compute new world-stop delay total time. */
      if (total_time > (((unsigned)-1) >> 1)
          || divisor >= MAX_TOTAL_TIME_DIVISOR) {
        /* Halve values if overflow occurs. */
        total_time >>= 1;
        divisor >>= 1;
      }
      total_time += time_diff < (((unsigned)-1) >> 1) ? (unsigned)time_diff
                                                      : ((unsigned)-1) >> 1;
      /* Update old `GC_world_stopped_total_time` and its divisor. */
      GC_world_stopped_total_time = total_time;
      GC_world_stopped_total_divisor = ++divisor;
      if (GC_PRINT_STATS_FLAG && 0 == abandoned_at) {
        GC_ASSERT(divisor != 0);
        GC_log_printf("World-stopped marking took %lu ms %lu ns"
                      " (%u ms in average)\n",
                      time_diff, ns_frac_diff, total_time / divisor);
      }
    }
  }
#endif

  EXIT_GC();
  if (0 == abandoned_at)
    return TRUE;
  GC_COND_LOG_PRINTF("Abandoned stopped marking after %u iterations\n",
                     abandoned_at - 1);
  return FALSE;
}

GC_INNER void
GC_set_fl_marks(ptr_t q)
{
#ifdef GC_ASSERTIONS
  ptr_t q2;
#endif
  struct hblk *h = HBLKPTR(q);
  const struct hblk *last_h = h;
  hdr *hhdr;
#ifdef MARK_BIT_PER_OBJ
  size_t sz;
#endif

  GC_ASSERT(q != NULL);
  hhdr = HDR(h);
#ifdef MARK_BIT_PER_OBJ
  sz = hhdr->hb_sz;
#endif
#ifdef GC_ASSERTIONS
  q2 = (ptr_t)obj_link(q);
#endif
  for (;;) {
    size_t bit_no = MARK_BIT_NO((size_t)((ptr_t)q - (ptr_t)h), sz);

    if (!mark_bit_from_hdr(hhdr, bit_no)) {
      set_mark_bit_from_hdr(hhdr, bit_no);
      INCR_MARKS(hhdr);
    }
    q = (ptr_t)obj_link(q);
    if (NULL == q)
      break;
#ifdef GC_ASSERTIONS
    /*
     * Detect a cycle in the free list.  The algorithm is to have
     * a "twice faster" iterator over the list which meets the first
     * one in case of a cycle existing in the list.
     */
    if (q2 != NULL) {
      q2 = (ptr_t)obj_link(q2);
      GC_ASSERT(q2 != q);
      if (q2 != NULL) {
        q2 = (ptr_t)obj_link(q2);
        GC_ASSERT(q2 != q);
      }
    }
#endif

    h = HBLKPTR(q);
    if (UNLIKELY(h != last_h)) {
      last_h = h;
      /* Update `hhdr` and `sz`. */
      hhdr = HDR(h);
#ifdef MARK_BIT_PER_OBJ
      sz = hhdr->hb_sz;
#endif
    }
  }
}

#if defined(GC_ASSERTIONS) && defined(THREAD_LOCAL_ALLOC)
/*
 * Check that all mark bits for the free list, whose first entry is
 * `*pfreelist`, are set.  The check is skipped if `*pfreelist` points to
 * a special value.
 */
void
GC_check_fl_marks(void **pfreelist)
{
  /*
   * TODO: There is a data race with `GC_FAST_MALLOC_GRANS` (which does
   * not do atomic updates to the free-list).  The race seems to be
   * harmless, and for now we just skip this check in case of TSan.
   */
#  if defined(AO_HAVE_load_acquire_read) && !defined(THREAD_SANITIZER)
  ptr_t list = GC_cptr_load_acquire_read((volatile ptr_t *)pfreelist);
  /* Atomic operations are used because the world is running. */
  ptr_t p, prev, next;

  if (ADDR(list) <= HBLKSIZE)
    return;

  prev = (ptr_t)pfreelist;
  for (p = list; p != NULL; p = next) {
    if (!GC_is_marked(p)) {
      ABORT_ARG2("Unmarked local free-list entry", ": object %p on list %p",
                 (void *)p, (void *)list);
    }

    /*
     * While traversing the free-list, it re-reads the pointer to the
     * current node before accepting its next pointer and bails out
     * if the latter has changed.  That way, it will not try to follow
     * the pointer which might be been modified after the object was
     * returned to the client.  It might perform the mark-check on the
     * just allocated object but that should be harmless.
     */
    next = GC_cptr_load_acquire_read((volatile ptr_t *)p);
    if (GC_cptr_load((volatile ptr_t *)prev) != p)
      break;
    prev = p;
  }
#  else
  /* FIXME: Not implemented (just skipped). */
  (void)pfreelist;
#  endif
}
#endif /* GC_ASSERTIONS && THREAD_LOCAL_ALLOC */

/*
 * Clear all mark bits for the free list (specified by the first entry).
 * Decrement `GC_bytes_found` by number of bytes on free list.
 */
STATIC void
GC_clear_fl_marks(ptr_t q)
{
  struct hblk *h = HBLKPTR(q);
  const struct hblk *last_h = h;
  hdr *hhdr = HDR(h);
  size_t sz = hhdr->hb_sz; /*< normally set only once */

  for (;;) {
    size_t bit_no = MARK_BIT_NO((size_t)((ptr_t)q - (ptr_t)h), sz);

    if (mark_bit_from_hdr(hhdr, bit_no)) {
      size_t n_marks = hhdr->hb_n_marks;

#ifdef LINT2
      if (0 == n_marks)
        ABORT("hhdr->hb_n_marks cannot be zero");
#else
      GC_ASSERT(n_marks != 0);
#endif
      clear_mark_bit_from_hdr(hhdr, bit_no);
      n_marks--;
#ifdef PARALLEL_MARK
      /* Approximate count, do not decrement to zero! */
      if (n_marks != 0 || !GC_parallel) {
        hhdr->hb_n_marks = n_marks;
      }
#else
      hhdr->hb_n_marks = n_marks;
#endif
    }
    GC_bytes_found -= (GC_signed_word)sz;

    q = (ptr_t)obj_link(q);
    if (NULL == q)
      break;

    h = HBLKPTR(q);
    if (UNLIKELY(h != last_h)) {
      last_h = h;
      /* Update `hhdr` and `sz`. */
      hhdr = HDR(h);
      sz = hhdr->hb_sz;
    }
  }
}

/* Mark all objects on the free lists for every object kind. */
static void
set_all_fl_marks(void)
{
  unsigned kind;

  for (kind = 0; kind < GC_n_kinds; kind++) {
    word size; /*< current object size */

    for (size = 1; size <= MAXOBJGRANULES; size++) {
      ptr_t q = (ptr_t)GC_obj_kinds[kind].ok_freelist[size];

      if (q != NULL)
        GC_set_fl_marks(q);
    }
  }
}

/*
 * Clear free-list mark bits.  Also subtract memory remaining from
 * `GC_bytes_found` count.
 */
static void
clear_all_fl_marks(void)
{
  unsigned kind;

  for (kind = 0; kind < GC_n_kinds; kind++) {
    word size; /*< current object size */

    for (size = 1; size <= MAXOBJGRANULES; size++) {
      ptr_t q = (ptr_t)GC_obj_kinds[kind].ok_freelist[size];

      if (q != NULL)
        GC_clear_fl_marks(q);
    }
  }
}

#if defined(GC_ASSERTIONS) && defined(THREAD_LOCAL_ALLOC)
void GC_check_tls(void);
#endif

GC_on_heap_resize_proc GC_on_heap_resize = 0;

/* Used for logging only. */
GC_INLINE int
GC_compute_heap_usage_percent(void)
{
  word used = GC_composite_in_use + GC_atomic_in_use + GC_bytes_allocd;
  word heap_sz = GC_heapsize - GC_unmapped_bytes;
#if defined(CPPCHECK)
  word limit = (GC_WORD_MAX >> 1) / 50; /*< to avoid a false positive */
#else
  const word limit = GC_WORD_MAX / 100;
#endif

  return used >= heap_sz ? 0
         : used < limit  ? (int)((used * 100) / heap_sz)
                         : (int)(used / (heap_sz / 100));
}

#define GC_DBGLOG_PRINT_HEAP_IN_USE()                                        \
  GC_DBGLOG_PRINTF("In-use heap: %d%% (%lu KiB pointers + %lu KiB other)\n", \
                   GC_compute_heap_usage_percent(),                          \
                   TO_KiB_UL(GC_composite_in_use),                           \
                   TO_KiB_UL(GC_atomic_in_use + GC_bytes_allocd))

/*
 * Finish up a collection.  Assumes mark bits are consistent, but the
 * world is otherwise running.
 */
STATIC void
GC_finish_collection(void)
{
#ifndef NO_CLOCK
  CLOCK_TYPE start_time = CLOCK_TYPE_INITIALIZER;
  CLOCK_TYPE finalize_time = CLOCK_TYPE_INITIALIZER;
#endif

  GC_ASSERT(I_HOLD_LOCK());
#if defined(GC_ASSERTIONS) && defined(THREAD_LOCAL_ALLOC) \
    && !defined(DBG_HDRS_ALL)
  /* Check that we marked some of our own data. */
  GC_check_tls();
  /* TODO: Add more checks. */
#endif

#ifndef NO_CLOCK
  if (GC_print_stats)
    GET_TIME(start_time);
#endif
  if (GC_on_collection_event)
    GC_on_collection_event(GC_EVENT_RECLAIM_START);

#ifndef GC_GET_HEAP_USAGE_NOT_NEEDED
  if (GC_bytes_found > 0)
    GC_reclaimed_bytes_before_gc += (word)GC_bytes_found;
#endif
  GC_bytes_found = 0;
#if defined(LINUX) && defined(__ELF__) && !defined(SMALL_CONFIG)
  if (GETENV("GC_PRINT_ADDRESS_MAP") != NULL) {
    GC_print_address_map();
  }
#endif
  COND_DUMP;
  if (GC_find_leak_inner) {
    set_all_fl_marks();
    /* This just checks; it does not really reclaim anything. */
    GC_start_reclaim(TRUE);
  }

#ifndef GC_NO_FINALIZATION
  GC_finalize();
#endif
#ifndef NO_CLOCK
  if (GC_print_stats)
    GET_TIME(finalize_time);
#endif
#ifdef MAKE_BACK_GRAPH
  if (GC_print_back_height) {
    GC_traverse_back_graph();
  }
#endif

  /*
   * Clear free-list mark bits, in case they got accidentally marked
   * (or `GC_find_leak` is set and they were intentionally marked).
   * Note that composite objects on free list are cleared, thus
   * accidentally marking a free list is not a problem; but some objects
   * on the list itself might be marked, and the given function call
   * fixes it.
   */
  clear_all_fl_marks();

  GC_VERBOSE_LOG_PRINTF("Bytes recovered before sweep - f.l. count = %ld\n",
                        (long)GC_bytes_found);

  /* Reconstruct free lists to contain everything not marked. */
  GC_start_reclaim(FALSE);

#ifdef USE_MUNMAP
  if (GC_unmap_threshold > 0    /*< memory unmapping enabled? */
      && LIKELY(GC_gc_no != 1)) /*< do not unmap during `GC_init` */
    GC_unmap_old(GC_unmap_threshold);

  GC_ASSERT(GC_heapsize >= GC_unmapped_bytes);
#endif
  GC_ASSERT(GC_our_mem_bytes >= GC_heapsize);
  GC_DBGLOG_PRINTF(
      "GC #%lu freed %ld bytes, heap %lu KiB (" IF_USE_MUNMAP(
          "+ %lu KiB unmapped ") "+ %lu KiB internal)\n",
      (unsigned long)GC_gc_no, (long)GC_bytes_found,
      TO_KiB_UL(GC_heapsize - GC_unmapped_bytes) /*, */
      COMMA_IF_USE_MUNMAP(TO_KiB_UL(GC_unmapped_bytes)),
      TO_KiB_UL(GC_our_mem_bytes - GC_heapsize + sizeof(GC_arrays)));
  GC_DBGLOG_PRINT_HEAP_IN_USE();
  if (GC_is_full_gc) {
    GC_used_heap_size_after_full = GC_heapsize - GC_large_free_bytes;
    GC_need_full_gc = FALSE;
  } else {
    GC_need_full_gc = GC_heapsize - GC_used_heap_size_after_full
                      > min_bytes_allocd() + GC_large_free_bytes;
  }

  /* Reset or increment counters for next cycle. */
  GC_n_attempts = 0;
  GC_is_full_gc = FALSE;
  GC_bytes_allocd_before_gc += GC_bytes_allocd;
  GC_non_gc_bytes_at_gc = GC_non_gc_bytes;
  GC_bytes_allocd = 0;
  GC_bytes_dropped = 0;
  GC_bytes_freed = 0;
  GC_finalizer_bytes_freed = 0;

  if (GC_on_collection_event)
    GC_on_collection_event(GC_EVENT_RECLAIM_END);
#ifndef NO_CLOCK
  if (GC_print_stats) {
    CLOCK_TYPE done_time;

    GET_TIME(done_time);
#  if !defined(SMALL_CONFIG) && !defined(GC_NO_FINALIZATION)
    /* A convenient place to output finalization statistics. */
    GC_print_finalization_stats();
#  endif
    GC_log_printf("Finalize and initiate sweep took %lu ms %lu ns"
                  " + %lu ms %lu ns\n",
                  MS_TIME_DIFF(finalize_time, start_time),
                  NS_FRAC_TIME_DIFF(finalize_time, start_time),
                  MS_TIME_DIFF(done_time, finalize_time),
                  NS_FRAC_TIME_DIFF(done_time, finalize_time));
  }
#elif !defined(SMALL_CONFIG) && !defined(GC_NO_FINALIZATION)
  if (GC_print_stats)
    GC_print_finalization_stats();
#endif
}

/* Note: if `stop_func` is 0, then `GC_default_stop_func` is used instead. */
STATIC GC_bool
GC_try_to_collect_general(GC_stop_func stop_func, GC_bool force_unmap)
{
  GC_bool result;
#ifdef USE_MUNMAP
  unsigned old_unmap_threshold;
#endif
  IF_CANCEL(int cancel_state;)

  if (UNLIKELY(!GC_is_initialized))
    GC_init();
  if (GC_debugging_started)
    GC_print_all_smashed();
  GC_notify_or_invoke_finalizers();
  LOCK();
  if (force_unmap) {
    /*
     * Record current heap size to make heap growth more conservative
     * afterwards (as if the heap is growing from zero size again).
     */
    GC_heapsize_at_forced_unmap = GC_heapsize;
  }
  DISABLE_CANCEL(cancel_state);
#ifdef USE_MUNMAP
  old_unmap_threshold = GC_unmap_threshold;
  if (force_unmap || (GC_force_unmap_on_gcollect && old_unmap_threshold > 0))
    GC_unmap_threshold = 1; /*< unmap as much as possible */
#endif
  /* Minimize junk left in my registers. */
  GC_noop6(0, 0, 0, 0, 0, 0);
  result = GC_try_to_collect_inner(stop_func != 0 ? stop_func
                                                  : GC_default_stop_func);
#ifdef USE_MUNMAP
  /* Restore it. */
  GC_unmap_threshold = old_unmap_threshold;
#endif
  RESTORE_CANCEL(cancel_state);
  UNLOCK();
  if (result) {
    if (GC_debugging_started)
      GC_print_all_smashed();
    GC_notify_or_invoke_finalizers();
  }
  return result;
}

/* Externally callable routines to invoke full, stop-the-world collection. */

GC_API int GC_CALL
GC_try_to_collect(GC_stop_func stop_func)
{
  GC_ASSERT(NONNULL_ARG_NOT_NULL(stop_func));
  return (int)GC_try_to_collect_general(stop_func, FALSE);
}

GC_API void GC_CALL
GC_gcollect(void)
{
  /*
   * Zero is passed as stop_func to get `GC_default_stop_func` value
   * while holding the allocator lock (to prevent data race).
   */
  (void)GC_try_to_collect_general(0, FALSE);
  if (get_have_errors())
    GC_print_all_errors();
}

GC_API void GC_CALL
GC_gcollect_and_unmap(void)
{
  /* Collect and force memory unmapping to OS. */
  (void)GC_try_to_collect_general(GC_never_stop_func, TRUE);
}

GC_INNER ptr_t
GC_os_get_mem(size_t bytes)
{
  ptr_t space;

  GC_ASSERT(I_HOLD_LOCK());
  space = (ptr_t)GET_MEM(bytes); /*< `HBLKSIZE`-aligned */
  if (UNLIKELY(NULL == space))
    return NULL;
#ifdef USE_PROC_FOR_LIBRARIES
  /* Add `HBLKSIZE`-aligned `GET_MEM`-generated block to `GC_our_memory`. */
  if (GC_n_memory >= MAX_HEAP_SECTS)
    ABORT("Too many GC-allocated memory sections: Increase MAX_HEAP_SECTS");
  GC_our_memory[GC_n_memory].hs_start = space;
  GC_our_memory[GC_n_memory].hs_bytes = bytes;
  GC_n_memory++;
#endif
  GC_our_mem_bytes += bytes;
  GC_VERBOSE_LOG_PRINTF("Got %lu bytes from OS\n", (unsigned long)bytes);
  return space;
}

/*
 * Use the chunk of memory starting at `h` of size `sz` as part of the heap.
 * Assumes `h` is `HBLKSIZE`-aligned, `sz` is a multiple of `HBLKSIZE`.
 */
STATIC void
GC_add_to_heap(struct hblk *h, size_t sz)
{
  hdr *hhdr;
  ptr_t endp;
  size_t old_capacity = 0;
  void *old_heap_sects = NULL;
#ifdef GC_ASSERTIONS
  size_t i;
#endif

  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT(ADDR(h) % HBLKSIZE == 0);
  GC_ASSERT(sz % HBLKSIZE == 0);
  GC_ASSERT(sz > 0);
  GC_ASSERT(GC_all_nils != NULL);

  if (UNLIKELY(GC_n_heap_sects == GC_capacity_heap_sects)) {
    /* Allocate new `GC_heap_sects` with sufficient capacity. */
#ifndef INITIAL_HEAP_SECTS
#  define INITIAL_HEAP_SECTS 32
#endif
    size_t new_capacity
        = GC_n_heap_sects > 0 ? GC_n_heap_sects * 2 : INITIAL_HEAP_SECTS;
    void *new_heap_sects
        = GC_scratch_alloc(new_capacity * sizeof(struct HeapSect));

    if (NULL == new_heap_sects) {
      /* Retry with smaller yet sufficient capacity. */
      new_capacity = GC_n_heap_sects + INITIAL_HEAP_SECTS;
      new_heap_sects
          = GC_scratch_alloc(new_capacity * sizeof(struct HeapSect));
      if (NULL == new_heap_sects)
        ABORT("Insufficient memory for heap sections");
    }
    old_capacity = GC_capacity_heap_sects;
    old_heap_sects = GC_heap_sects;
    /* Transfer `GC_heap_sects` contents to the newly allocated array. */
    if (GC_n_heap_sects > 0)
      BCOPY(old_heap_sects, new_heap_sects,
            GC_n_heap_sects * sizeof(struct HeapSect));
    GC_capacity_heap_sects = new_capacity;
    GC_heap_sects = (struct HeapSect *)new_heap_sects;
    GC_COND_LOG_PRINTF("Grew heap sections array to %lu elements\n",
                       (unsigned long)new_capacity);
  }

  while (UNLIKELY(ADDR(h) <= HBLKSIZE)) {
    /* Cannot handle memory near address zero. */
    ++h;
    sz -= HBLKSIZE;
    if (0 == sz)
      return;
  }
  while (UNLIKELY(ADDR(h) >= GC_WORD_MAX - sz)) {
    /* Prevent overflow when calculating `endp`. */
    sz -= HBLKSIZE;
    if (0 == sz)
      return;
  }
  endp = (ptr_t)h + sz;

  hhdr = GC_install_header(h);
  if (UNLIKELY(NULL == hhdr)) {
    /*
     * This is extremely unlikely. Cannot add it.  This will almost
     * certainly result in a `NULL` returned from the allocator, which
     * is entirely appropriate.
     */
    return;
  }
#ifdef GC_ASSERTIONS
  /* Ensure no intersection between sections. */
  for (i = 0; i < GC_n_heap_sects; i++) {
    ptr_t hs_start = GC_heap_sects[i].hs_start;
    ptr_t hs_end = hs_start + GC_heap_sects[i].hs_bytes;

    GC_ASSERT(!(ADDR_INSIDE((ptr_t)h, hs_start, hs_end)
                || (ADDR_LT(hs_start, endp) && ADDR_GE(hs_end, endp))
                || (ADDR_LT((ptr_t)h, hs_start) && ADDR_LT(hs_end, endp))));
  }
#endif
  GC_heap_sects[GC_n_heap_sects].hs_start = (ptr_t)h;
  GC_heap_sects[GC_n_heap_sects].hs_bytes = sz;
  GC_n_heap_sects++;
  hhdr->hb_block = h;
  hhdr->hb_sz = sz;
  hhdr->hb_flags = 0;
  GC_freehblk(h);
  GC_heapsize += sz;

  if (ADDR_GE((ptr_t)GC_least_plausible_heap_addr, (ptr_t)h)
      || UNLIKELY(NULL == GC_least_plausible_heap_addr)) {
    /*
     * Making it a little smaller than necessary prevents us from
     * getting a false hit from the variable itself.  There is some
     * unintentional reflection here.
     */
    GC_least_plausible_heap_addr = (ptr_t)h - sizeof(ptr_t);
  }
  if (ADDR_LT((ptr_t)GC_greatest_plausible_heap_addr, endp)) {
    GC_greatest_plausible_heap_addr = endp;
  }
#ifdef SET_REAL_HEAP_BOUNDS
  if (ADDR(h) < GC_least_real_heap_addr
      || UNLIKELY(0 == GC_least_real_heap_addr))
    GC_least_real_heap_addr = ADDR(h) - sizeof(ptr_t);
  if (GC_greatest_real_heap_addr < ADDR(endp)) {
#  ifdef INCLUDE_LINUX_THREAD_DESCR
    /* Avoid heap intersection with the static data roots. */
    GC_exclude_static_roots_inner((ptr_t)h, endp);
#  endif
    GC_greatest_real_heap_addr = ADDR(endp);
  }
#endif
  GC_handle_protected_regions_limit();
  if (UNLIKELY(old_capacity > 0)) {
#ifndef GWW_VDB
    /*
     * Recycling may call `GC_add_to_heap()` again but should not cause
     * resizing of `GC_heap_sects`.
     */
    GC_scratch_recycle_no_gww(old_heap_sects,
                              old_capacity * sizeof(struct HeapSect));
#else
    /* TODO: Implement GWW-aware recycling as in `alloc_mark_stack`. */
    GC_noop1_ptr(old_heap_sects);
#endif
  }
}

#ifndef NO_DEBUGGING
void
GC_print_heap_sects(void)
{
  size_t i;

  GC_printf("Total heap size: %lu" IF_USE_MUNMAP(" (%lu unmapped)") "\n",
            (unsigned long)GC_heapsize /*, */
                COMMA_IF_USE_MUNMAP((unsigned long)GC_unmapped_bytes));

  for (i = 0; i < GC_n_heap_sects; i++) {
    ptr_t start = GC_heap_sects[i].hs_start;
    size_t len = GC_heap_sects[i].hs_bytes;
    unsigned nbl = 0;
#  ifndef NO_BLACK_LISTING
    struct hblk *h;

    for (h = (struct hblk *)start; ADDR_LT((ptr_t)h, start + len); h++) {
      if (GC_is_black_listed(h, HBLKSIZE))
        nbl++;
    }
#  endif
    GC_printf("Section %u from %p to %p %u/%lu blacklisted\n", (unsigned)i,
              (void *)start, (void *)&start[len], nbl,
              (unsigned long)divHBLKSZ(len));
  }
}
#endif /* !NO_DEBUGGING */

void *GC_least_plausible_heap_addr = MAKE_CPTR(GC_WORD_MAX);
void *GC_greatest_plausible_heap_addr = NULL;

STATIC word GC_max_heapsize = 0;

GC_API void GC_CALL
GC_set_max_heap_size(GC_word n)
{
  GC_max_heapsize = n;
}

word GC_max_retries = 0;

GC_INNER void
GC_scratch_recycle_inner(void *ptr, size_t sz)
{
  size_t page_offset;
  size_t displ = 0;
  size_t recycled_bytes;

  GC_ASSERT(I_HOLD_LOCK());
  if (NULL == ptr)
    return;

  GC_ASSERT(sz != 0);
  GC_ASSERT(GC_page_size != 0);
  /* TODO: Assert correct memory flags if `GWW_VDB`. */
  page_offset = ADDR(ptr) & (GC_page_size - 1);
  if (page_offset != 0)
    displ = GC_page_size - page_offset;
  recycled_bytes = sz > displ ? (sz - displ) & ~(GC_page_size - 1) : 0;
  GC_COND_LOG_PRINTF("Recycle %lu/%lu scratch-allocated bytes at %p\n",
                     (unsigned long)recycled_bytes, (unsigned long)sz, ptr);
  if (recycled_bytes > 0)
    GC_add_to_heap((struct hblk *)((ptr_t)ptr + displ), recycled_bytes);
}

GC_INNER GC_bool
GC_expand_hp_inner(word n)
{
  size_t sz;
  struct hblk *space;
  /* Number of bytes by which we expect the heap to expand soon. */
  word expansion_slop;

  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT(GC_page_size != 0);
  if (0 == n)
    n = 1;
  sz = ROUNDUP_PAGESIZE((size_t)n * HBLKSIZE);
  GC_DBGLOG_PRINT_HEAP_IN_USE();
  if (GC_max_heapsize != 0
      && (GC_max_heapsize < (word)sz
          || GC_heapsize > GC_max_heapsize - (word)sz)) {
    /* Exceeded the self-imposed limit. */
    return FALSE;
  }
  space = (struct hblk *)GC_os_get_mem(sz);
  if (UNLIKELY(NULL == space)) {
    WARN("Failed to expand heap by %" WARN_PRIuPTR " KiB\n", sz >> 10);
    return FALSE;
  }
  GC_last_heap_growth_gc_no = GC_gc_no;
  GC_INFOLOG_PRINTF("Grow heap to %lu KiB after %lu bytes allocated\n",
                    TO_KiB_UL(GC_heapsize + sz),
                    (unsigned long)GC_bytes_allocd);

  /*
   * Adjust heap limits generously for black-listing to work better.
   * `GC_add_to_heap()` performs minimal adjustment needed for correctness.
   */
  expansion_slop = min_bytes_allocd() + 4 * MAXHINCR * HBLKSIZE;
  if ((0 == GC_last_heap_addr && (ADDR(space) & SIGNB) == 0)
      || (GC_last_heap_addr != 0 && GC_last_heap_addr < ADDR(space))) {
    /* Assume the heap is growing up. */
    if (LIKELY(ADDR(space) < GC_WORD_MAX - (sz + expansion_slop))) {
      ptr_t new_limit = (ptr_t)space + sz + expansion_slop;

      if (ADDR_LT((ptr_t)GC_greatest_plausible_heap_addr, new_limit))
        GC_greatest_plausible_heap_addr = new_limit;
    }
  } else {
    /* Heap is growing down. */
    if (LIKELY(ADDR(space) > expansion_slop + sizeof(ptr_t))) {
      ptr_t new_limit = (ptr_t)space - expansion_slop - sizeof(ptr_t);

      if (ADDR_LT(new_limit, (ptr_t)GC_least_plausible_heap_addr))
        GC_least_plausible_heap_addr = new_limit;
    }
  }
  GC_last_heap_addr = ADDR(space);

  GC_add_to_heap(space, sz);
  if (GC_on_heap_resize)
    (*GC_on_heap_resize)(GC_heapsize);

  return TRUE;
}

GC_API int GC_CALL
GC_expand_hp(size_t bytes)
{
  size_t n_blocks = OBJ_SZ_TO_BLOCKS_CHECKED(bytes);
  word old_heapsize;
  GC_bool result;

  if (UNLIKELY(!GC_is_initialized))
    GC_init();
  LOCK();
  old_heapsize = GC_heapsize;
  result = GC_expand_hp_inner(n_blocks);
  if (result) {
    GC_requested_heapsize += bytes;
    if (GC_dont_gc) {
      /* Do not call `WARN()` if the heap growth is intentional. */
      GC_ASSERT(GC_heapsize >= old_heapsize);
      GC_heapsize_on_gc_disable += GC_heapsize - old_heapsize;
    }
  }
  UNLOCK();
  /*
   * Really returns a `GC_bool` value, but the function is externally
   * visible, so that is clumsy.
   */
  return (int)result;
}

/*
 * The minimum value of the ratio of allocated bytes since the latest
 * collection to the amount of finalizers created since that collection
 * which triggers the collection instead heap expansion.  Has no effect
 * in the incremental mode.
 */
#if defined(GC_ALLOCD_BYTES_PER_FINALIZER) && !defined(CPPCHECK)
STATIC word GC_allocd_bytes_per_finalizer = GC_ALLOCD_BYTES_PER_FINALIZER;
#else
STATIC word GC_allocd_bytes_per_finalizer = 10000;
#endif

GC_API void GC_CALL
GC_set_allocd_bytes_per_finalizer(GC_word value)
{
  GC_allocd_bytes_per_finalizer = value;
}

GC_API GC_word GC_CALL
GC_get_allocd_bytes_per_finalizer(void)
{
  return GC_allocd_bytes_per_finalizer;
}

GC_INNER GC_bool
GC_collect_or_expand(word needed_blocks, unsigned flags, GC_bool retry)
{
  static word last_fo_entries, last_bytes_finalized;

  GC_bool gc_not_stopped = TRUE;
  word blocks_to_get;
  IF_CANCEL(int cancel_state;)

  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT(GC_is_initialized);
  DISABLE_CANCEL(cancel_state);
  if (!GC_incremental && !GC_dont_gc
      && ((GC_dont_expand && GC_bytes_allocd > 0)
          || (GC_fo_entries > last_fo_entries
              && (last_bytes_finalized | GC_bytes_finalized) != 0
              && (GC_fo_entries - last_fo_entries)
                         * GC_allocd_bytes_per_finalizer
                     > GC_bytes_allocd)
          || GC_should_collect())) {
    /*
     * Try to do a full collection using "default" `stop_func` (unless
     * nothing has been allocated since the latest collection or heap
     * expansion is disabled).
     */
    gc_not_stopped = GC_try_to_collect_inner(
        GC_bytes_allocd > 0 && (!GC_dont_expand || !retry)
            ? GC_default_stop_func
            : GC_never_stop_func);
    if (gc_not_stopped || !retry) {
      /*
       * Either the collection has not been aborted or this is the
       * first attempt (in a loop).
       */
      last_fo_entries = GC_fo_entries;
      last_bytes_finalized = GC_bytes_finalized;
      RESTORE_CANCEL(cancel_state);
      return TRUE;
    }
  }

  blocks_to_get = (GC_heapsize - GC_heapsize_at_forced_unmap)
                      / (HBLKSIZE * GC_free_space_divisor)
                  + needed_blocks;
  if (blocks_to_get > MAXHINCR) {
#ifdef NO_BLACK_LISTING
    UNUSED_ARG(flags);
    blocks_to_get = needed_blocks > MAXHINCR ? needed_blocks : MAXHINCR;
#else
    word slop;

    /*
     * Get the minimum required to make it likely that we can satisfy
     * the current request in the presence of black-listing.  This will
     * probably be bigger than `MAXHINCR`.
     */
    if ((flags & IGNORE_OFF_PAGE) != 0) {
      slop = 4;
    } else {
      slop = 2 * divHBLKSZ(BL_LIMIT);
      if (slop > needed_blocks)
        slop = needed_blocks;
    }
    if (needed_blocks + slop > MAXHINCR) {
      blocks_to_get = needed_blocks + slop;
    } else {
      blocks_to_get = MAXHINCR;
    }
#endif
    if (blocks_to_get > divHBLKSZ(GC_WORD_MAX))
      blocks_to_get = divHBLKSZ(GC_WORD_MAX);
  } else if (blocks_to_get < MINHINCR) {
    blocks_to_get = MINHINCR;
  }

  if (GC_max_heapsize > GC_heapsize) {
    word max_get_blocks = divHBLKSZ(GC_max_heapsize - GC_heapsize);
    if (blocks_to_get > max_get_blocks)
      blocks_to_get
          = max_get_blocks > needed_blocks ? max_get_blocks : needed_blocks;
  }

#ifdef USE_MUNMAP
  if (GC_unmap_threshold > 1) {
    /*
     * Return as much memory to the OS as possible before trying to
     * get memory from it.
     */
    GC_unmap_old(0);
  }
#endif
  if (!GC_expand_hp_inner(blocks_to_get)
      && (blocks_to_get == needed_blocks
          || !GC_expand_hp_inner(needed_blocks))) {
    if (!gc_not_stopped) {
      /* Do not increment `GC_alloc_fail_count` here (and no warning). */
      GC_gcollect_inner();
      GC_ASSERT(0 == GC_bytes_allocd);
    } else if (GC_alloc_fail_count++ < GC_max_retries) {
      WARN("Out of Memory!  Trying to continue...\n", 0);
      GC_gcollect_inner();
    } else {
#ifdef USE_MUNMAP
      GC_ASSERT(GC_heapsize >= GC_unmapped_bytes);
#endif
#if !defined(SMALL_CONFIG) && (CPP_WORDSZ >= 32)
#  define MAX_HEAPSIZE_WARNED_IN_BYTES (5 << 20) /*< 5 MB */

      if (GC_heapsize > (word)MAX_HEAPSIZE_WARNED_IN_BYTES) {
        WARN("Out of Memory! Heap size: %" WARN_PRIuPTR " MiB."
             " Returning NULL!\n",
             (GC_heapsize - GC_unmapped_bytes) >> 20);
      } else
#endif
      /* else */ {
        WARN("Out of Memory! Heap size: %" WARN_PRIuPTR " bytes."
             " Returning NULL!\n",
             GC_heapsize - GC_unmapped_bytes);
      }
      RESTORE_CANCEL(cancel_state);
      return FALSE;
    }
  } else if (GC_alloc_fail_count > 0) {
    GC_COND_LOG_PRINTF("Memory available again...\n");
  }
  RESTORE_CANCEL(cancel_state);
  return TRUE;
}

GC_INNER ptr_t
GC_allocobj(size_t lg, int kind)
{
#define MAX_ALLOCOBJ_RETRIES 3
  int retry_cnt = 0;
  void **flh = &GC_obj_kinds[kind].ok_freelist[lg];
#ifndef GC_DISABLE_INCREMENTAL
  GC_bool tried_minor = FALSE;
#endif

  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT(GC_is_initialized);
  if (UNLIKELY(0 == lg))
    return NULL;

  while (NULL == *flh) {
    /*
     * Only a few iterations are expected at most, otherwise something
     * is wrong in one of the functions called below.
     */
    if (retry_cnt > MAX_ALLOCOBJ_RETRIES)
      ABORT("Too many retries in GC_allocobj");
#ifndef GC_DISABLE_INCREMENTAL
    if (GC_incremental && GC_time_limit != GC_TIME_UNLIMITED && !GC_dont_gc) {
      /*
       * True incremental mode, not just generational.
       * Do our share of marking work.
       */
      GC_collect_a_little_inner(1);
    }
#endif
    /* Sweep blocks for objects of this size. */
    GC_ASSERT(!GC_is_full_gc || NULL == GC_obj_kinds[kind].ok_reclaim_list
              || NULL == GC_obj_kinds[kind].ok_reclaim_list[lg]);
    GC_continue_reclaim(lg, kind);
#if defined(CPPCHECK)
    GC_noop1_ptr(&flh);
#endif
    if (*flh != NULL)
      break;

    GC_new_hblk(lg, kind);
#if defined(CPPCHECK)
    GC_noop1_ptr(&flh);
#endif
    if (*flh != NULL)
      break;

#ifndef GC_DISABLE_INCREMENTAL
    if (GC_incremental && GC_time_limit == GC_TIME_UNLIMITED && !tried_minor
        && !GC_dont_gc) {
      GC_collect_a_little_inner(1);
      tried_minor = TRUE;
      continue;
    }
#endif
    if (UNLIKELY(!GC_collect_or_expand(1, 0 /* flags */, retry_cnt > 0)))
      return NULL;
    retry_cnt++;
  }
  /* Successful allocation; reset failure count. */
  GC_alloc_fail_count = 0;
  return (ptr_t)(*flh);
}

/*
 * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
 * Copyright (c) 1991-1995 by Xerox Corporation.  All rights reserved.
 * Copyright (c) 1997 by Silicon Graphics.  All rights reserved.
 * Copyright (c) 1999-2004 Hewlett-Packard Development Company, L.P.
 * Copyright (c) 2007 Free Software Foundation, Inc.
 * Copyright (c) 2008-2025 Ivan Maidanski
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */


#ifndef MSWINCE
#  include <errno.h>
#endif
#include <string.h>

#ifdef KEEP_BACK_PTRS

/*
 * Use a custom trivial `random()` implementation as the standard one might
 * lead to crashes (if used from a multi-threaded code) or to a compiler
 * warning about the deterministic result.
 */
static int
GC_rand(void)
{
  static GC_RAND_STATE_T seed;

  return GC_RAND_NEXT(&seed);
}

#  define RANDOM() (long)GC_rand()

GC_INNER void
GC_store_back_pointer(ptr_t source, ptr_t dest)
{
  if (GC_HAS_DEBUG_INFO(dest)) {
#  ifdef PARALLEL_MARK
    GC_cptr_store((volatile ptr_t *)&((oh *)dest)->oh_back_ptr,
                  (ptr_t)HIDE_BACK_PTR(source));
#  else
    ((oh *)dest)->oh_back_ptr = HIDE_BACK_PTR(source);
#  endif
  }
}

GC_INNER void
GC_marked_for_finalization(ptr_t dest)
{
  GC_store_back_pointer(MARKED_FOR_FINALIZATION, dest);
}

GC_API GC_ref_kind GC_CALL
GC_get_back_ptr_info(void *dest, void **base_p, size_t *offset_p)
{
  oh *ohdr = (oh *)GC_base(dest);
  ptr_t bp, bp_base;

#  ifdef LINT2
  /*
   * Explicitly instruct the code analysis tool that `GC_get_back_ptr_info`
   * is not expected to be called with an incorrect `dest` value.
   */
  if (!ohdr)
    ABORT("Invalid GC_get_back_ptr_info argument");
#  endif
  if (!GC_HAS_DEBUG_INFO((ptr_t)ohdr))
    return GC_NO_SPACE;
  bp = (ptr_t)GC_REVEAL_POINTER(ohdr->oh_back_ptr);
  if (MARKED_FOR_FINALIZATION == bp)
    return GC_FINALIZER_REFD;
  if (MARKED_FROM_REGISTER == bp)
    return GC_REFD_FROM_REG;
  if (NOT_MARKED == bp)
    return GC_UNREFERENCED;
#  if ALIGNMENT == 1
  /*
   * Heuristically try to fix off-by-one errors we introduced by
   * insisting on even addresses.
   */
  {
    ptr_t alternate_ptr = bp + 1;
    ptr_t target = *(ptr_t *)bp;
    ptr_t alternate_target = *(ptr_t *)alternate_ptr;

    if (GC_least_real_heap_addr < ADDR(alternate_target)
        && ADDR(alternate_target) < GC_greatest_real_heap_addr
        && (GC_least_real_heap_addr >= ADDR(target)
            || ADDR(target) >= GC_greatest_real_heap_addr)) {
      bp = alternate_ptr;
    }
  }
#  endif
  bp_base = (ptr_t)GC_base(bp);
  if (NULL == bp_base) {
    *base_p = bp;
    *offset_p = 0;
    return GC_REFD_FROM_ROOT;
  } else {
    if (GC_HAS_DEBUG_INFO(bp_base))
      bp_base += sizeof(oh);
    *base_p = bp_base;
    *offset_p = (size_t)(bp - bp_base);
    return GC_REFD_FROM_HEAP;
  }
}

GC_API void *GC_CALL
GC_generate_random_heap_address(void)
{
  size_t i;
  word heap_offset = (word)RANDOM();

  if (GC_heapsize > (word)GC_RAND_MAX) {
    heap_offset *= GC_RAND_MAX;
    heap_offset += (word)RANDOM();
  }

  /*
   * This does not yield a uniform distribution, especially if e.g.
   * `RAND_MAX` is `1.5 * GC_heapsize`.  But for typical cases,  it is
   * not too bad.
   */
  heap_offset %= GC_heapsize;

  for (i = 0;; ++i) {
    size_t size;

    if (i >= GC_n_heap_sects)
      ABORT("GC_generate_random_heap_address: size inconsistency");

    size = GC_heap_sects[i].hs_bytes;
    if (heap_offset < size)
      break;
    heap_offset -= size;
  }
  return GC_heap_sects[i].hs_start + heap_offset;
}

GC_API void *GC_CALL
GC_generate_random_valid_address(void)
{
  ptr_t result;
  ptr_t base;

  do {
    result = (ptr_t)GC_generate_random_heap_address();
    base = (ptr_t)GC_base(result);
  } while (NULL == base || !GC_is_marked(base));
  return result;
}

GC_API void GC_CALL
GC_print_backtrace(void *p)
{
  void *current = p;
  int i;

  GC_ASSERT(I_DONT_HOLD_LOCK());
  GC_print_heap_obj((ptr_t)GC_base(current));

  for (i = 0;; ++i) {
    void *base;
    size_t offset;
    GC_ref_kind source = GC_get_back_ptr_info(current, &base, &offset);

    if (GC_UNREFERENCED == source) {
      GC_err_printf("Reference could not be found\n");
      break;
    }
    if (GC_NO_SPACE == source) {
      GC_err_printf("No debug info in object: Can't find reference\n");
      break;
    }
    GC_err_printf("Reachable via %d levels of pointers from ", i);
    switch (source) {
    case GC_REFD_FROM_ROOT:
      GC_err_printf("root at %p\n\n", base);
      return;
    case GC_REFD_FROM_REG:
      GC_err_printf("root in register\n\n");
      return;
    case GC_FINALIZER_REFD:
      GC_err_printf("list of finalizable objects\n\n");
      return;
    case GC_REFD_FROM_HEAP:
      GC_err_printf("offset %ld in object:\n", (long)offset);
      /* Take `GC_base(base)` to get real base, i.e. header. */
      GC_print_heap_obj((ptr_t)GC_base(base));
      break;
    default:
      GC_err_printf("INTERNAL ERROR: UNEXPECTED SOURCE!!!!\n");
      return;
    }
    current = base;
  }
}

GC_API void GC_CALL
GC_generate_random_backtrace(void)
{
  void *current;

  GC_ASSERT(I_DONT_HOLD_LOCK());
  if (GC_try_to_collect(GC_never_stop_func) == 0) {
    GC_err_printf("Cannot generate a backtrace: "
                  "garbage collection is disabled!\n");
    return;
  }

  /* Generate/print a backtrace from a random heap address. */
  LOCK();
  current = GC_generate_random_valid_address();
  UNLOCK();
  GC_printf("\n***Chosen address %p in object\n", current);
  GC_print_backtrace(current);
}

#endif /* KEEP_BACK_PTRS */

#define CROSSES_HBLK(p, sz) \
  ((ADDR((p) + (sizeof(oh) - 1) + (sz)) ^ ADDR(p)) >= HBLKSIZE)

/*
 * Store debugging info into `p`.  Return displaced pointer.  Assume we hold
 * the allocator lock.
 */
STATIC void *
GC_store_debug_info_inner(void *base, size_t sz, const char *string,
                          int linenum)
{
  GC_uintptr_t *result = (GC_uintptr_t *)((oh *)base + 1);

  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT(GC_size(base) >= sizeof(oh) + sz);
  GC_ASSERT(!(SMALL_OBJ(sz) && CROSSES_HBLK((ptr_t)base, sz)));
#ifdef KEEP_BACK_PTRS
  ((oh *)base)->oh_back_ptr = HIDE_BACK_PTR(NOT_MARKED);
#endif
#ifdef MAKE_BACK_GRAPH
  ((oh *)base)->oh_bg_ptr = HIDE_BACK_PTR((ptr_t)0);
#endif
  ((oh *)base)->oh_string = string;
  ((oh *)base)->oh_int = linenum;
#ifdef SHORT_DBG_HDRS
  UNUSED_ARG(sz);
#else
  ((oh *)base)->oh_sz = (GC_uintptr_t)sz;
  ((oh *)base)->oh_sf = START_FLAG ^ (GC_uintptr_t)result;
  ((GC_uintptr_t *)base)[BYTES_TO_PTRS(GC_size(base)) - 1]
      = result[BYTES_TO_PTRS_ROUNDUP(sz)] = END_FLAG ^ (GC_uintptr_t)result;
#endif
  return result;
}

#ifndef SHORT_DBG_HDRS
/*
 * Check the object with debugging info at `ohdr`.  Return `NULL` if it
 * is OK.  Else return clobbered address.
 */
STATIC ptr_t
GC_check_annotated_obj(oh *ohdr)
{
  ptr_t body = (ptr_t)(ohdr + 1);
  size_t gc_sz = GC_size(ohdr);
  size_t lpw_up;

  if (ohdr->oh_sz + DEBUG_BYTES > (GC_uintptr_t)gc_sz) {
    return (ptr_t)(&ohdr->oh_sz);
  }
  if (ohdr->oh_sf != (START_FLAG ^ (GC_uintptr_t)body)) {
    return (ptr_t)(&ohdr->oh_sf);
  }

  {
    size_t lpw_m1 = BYTES_TO_PTRS(gc_sz) - 1;

    if (((GC_uintptr_t *)ohdr)[lpw_m1] != (END_FLAG ^ (GC_uintptr_t)body)) {
      return (ptr_t)(&((GC_uintptr_t *)ohdr)[lpw_m1]);
    }
  }
  lpw_up = BYTES_TO_PTRS_ROUNDUP((size_t)ohdr->oh_sz);
  if (((GC_uintptr_t *)body)[lpw_up] != (END_FLAG ^ (GC_uintptr_t)body)) {
    return (ptr_t)(&((GC_uintptr_t *)body)[lpw_up]);
  }
  return NULL;
}
#endif /* !SHORT_DBG_HDRS */

STATIC GC_describe_type_fn GC_describe_type_fns[MAXOBJKINDS] = { 0 };

GC_API void GC_CALL
GC_register_describe_type_fn(int kind, GC_describe_type_fn fn)
{
  GC_ASSERT((unsigned)kind < MAXOBJKINDS);
  GC_describe_type_fns[kind] = fn;
}

#ifndef SHORT_DBG_HDRS
#  define IF_NOT_SHORTDBG_HDRS(x) x
#  define COMMA_IFNOT_SHORTDBG_HDRS(x) /* comma */ , x
#else
#  define IF_NOT_SHORTDBG_HDRS(x)
#  define COMMA_IFNOT_SHORTDBG_HDRS(x)
#endif

/*
 * Print a human-readable description of the object to `stderr`.
 * The object is assumed to have the debugging info.
 */
STATIC void
GC_print_obj(ptr_t base)
{
  oh *ohdr = (oh *)base;
  ptr_t q;
  hdr *hhdr;
  int kind;
  const char *kind_str;
  char buffer[GC_TYPE_DESCR_LEN + 1];

  GC_ASSERT(I_DONT_HOLD_LOCK());
#ifdef LINT2
  if (!ohdr)
    ABORT("Invalid GC_print_obj argument");
#endif

  q = (ptr_t)(ohdr + 1);
  /*
   * Print a type description for the object whose client-visible address
   * is `q`.
   */
  hhdr = GC_find_header(q);
  kind = hhdr->hb_obj_kind;
  if (GC_describe_type_fns[kind] != 0 && GC_is_marked(ohdr)) {
    /*
     * This should preclude free-list objects except with thread-local
     * allocation.
     */
    buffer[GC_TYPE_DESCR_LEN] = 0;
    (*GC_describe_type_fns[kind])(q, buffer);
    GC_ASSERT(buffer[GC_TYPE_DESCR_LEN] == 0);
    kind_str = buffer;
  } else {
    switch (kind) {
    case PTRFREE:
      kind_str = "PTRFREE";
      break;
    case NORMAL:
      kind_str = "NORMAL";
      break;
    case UNCOLLECTABLE:
      kind_str = "UNCOLLECTABLE";
      break;
#ifdef GC_ATOMIC_UNCOLLECTABLE
    case AUNCOLLECTABLE:
      kind_str = "ATOMIC_UNCOLLECTABLE";
      break;
#endif
    default:
      kind_str = NULL;
      /*
       * The alternative is to use `snprintf(buffer)` but the latter is
       * not quite portable (see `vsnprintf` in `misc.c` file).
       */
    }
  }

  if (NULL != kind_str) {
    GC_err_printf("%p (%s:%d," IF_NOT_SHORTDBG_HDRS(" sz= %lu,") " %s)\n",
                  (void *)((ptr_t)ohdr + sizeof(oh)), ohdr->oh_string,
                  GET_OH_LINENUM(ohdr) /*, */
                  COMMA_IFNOT_SHORTDBG_HDRS((unsigned long)ohdr->oh_sz),
                  kind_str);
  } else {
    GC_err_printf("%p (%s:%d," IF_NOT_SHORTDBG_HDRS(
                      " sz= %lu,") " kind= %d, descr= 0x%lx)\n",
                  (void *)((ptr_t)ohdr + sizeof(oh)), ohdr->oh_string,
                  GET_OH_LINENUM(ohdr) /*, */
                  COMMA_IFNOT_SHORTDBG_HDRS((unsigned long)ohdr->oh_sz),
                  kind, (unsigned long)hhdr->hb_descr);
  }
  PRINT_CALL_CHAIN(ohdr);
}

STATIC void
GC_debug_print_heap_obj_proc(ptr_t base)
{
  GC_ASSERT(I_DONT_HOLD_LOCK());
  if (GC_HAS_DEBUG_INFO(base)) {
    GC_print_obj(base);
  } else {
    GC_default_print_heap_obj_proc(base);
  }
}

#ifndef SHORT_DBG_HDRS
STATIC void GC_check_heap_proc(void);
#elif !defined(NO_FIND_LEAK)
static void
do_nothing(void)
{
}
#endif /* SHORT_DBG_HDRS */

/*
 * Turn on the debugging mode.  Should not be called if
 * `GC_debugging_initialized` is already set.
 */
STATIC void
GC_start_debugging_inner(void)
{
  GC_ASSERT(I_HOLD_LOCK());
#ifndef SHORT_DBG_HDRS
  GC_check_heap = GC_check_heap_proc;
  GC_print_all_smashed = GC_print_all_smashed_proc;
#elif !defined(NO_FIND_LEAK)
  GC_check_heap = do_nothing;
  GC_print_all_smashed = do_nothing;
#endif
  GC_print_heap_obj = GC_debug_print_heap_obj_proc;
  GC_debugging_initialized = TRUE;
  GC_register_displacement_inner(sizeof(oh));
#if defined(CPPCHECK)
  GC_noop1(GC_debug_header_size);
#endif
}

/*
 * Check the allocation is successful, store debugging info into `base`,
 * start the debugging mode (if not yet), and return displaced pointer.
 */
static void *
store_debug_info(void *base, size_t lb, const char *fn, GC_EXTRA_PARAMS)
{
  void *result;

  if (NULL == base) {
    GC_err_printf("%s(%lu) returning NULL (%s:%d)\n", fn, (unsigned long)lb, s,
                  i);
    return NULL;
  }
  LOCK();
  if (!GC_debugging_initialized)
    GC_start_debugging_inner();
  result = GC_store_debug_info_inner(base, lb, s, i);
  ADD_CALL_CHAIN(base, ra);
  UNLOCK();
  return result;
}

const size_t GC_debug_header_size = sizeof(oh);

GC_API size_t GC_CALL
GC_get_debug_header_size(void)
{
  return sizeof(oh);
}

GC_API void GC_CALL
GC_debug_register_displacement(size_t offset)
{
  LOCK();
  GC_register_displacement_inner(offset);
  GC_register_displacement_inner(sizeof(oh) + offset);
  UNLOCK();
}

#ifdef GC_ADD_CALLER
#  if defined(HAVE_DLADDR) && defined(GC_HAVE_RETURN_ADDR_PARENT) \
      && defined(FUNCPTR_IS_DATAPTR)
#    include <dlfcn.h>

STATIC void
GC_caller_func_offset(GC_return_addr_t ra, const char **symp, int *offp)
{
  Dl_info caller;

  if (ra != 0 && dladdr((void *)ra, &caller) && caller.dli_sname != NULL) {
    *symp = caller.dli_sname;
    *offp = (int)((ptr_t)ra - (ptr_t)caller.dli_saddr);
  }
  if (NULL == *symp) {
    *symp = "unknown";
    /* Note: `*offp` is unchanged. */
  }
}
#  else
#    define GC_caller_func_offset(ra, symp, offp) (void)(*(symp) = "unknown")
#  endif
#endif /* GC_ADD_CALLER */

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_debug_malloc(size_t lb, GC_EXTRA_PARAMS)
{
  void *base;

  /*
   * Note that according to `malloc()` specification, if size (`lb`) is
   * zero, then `malloc()` returns either `NULL`, or a unique pointer
   * value that can later be successfully passed to `free()`.
   * We always do the latter.
   */
#if defined(_FORTIFY_SOURCE) && !defined(__clang__)
  /* Workaround to avoid "exceeds maximum object size" gcc warning. */
  base = GC_malloc(lb < GC_SIZE_MAX - DEBUG_BYTES ? lb + DEBUG_BYTES
                                                  : GC_SIZE_MAX >> 1);
#else
  base = GC_malloc(SIZET_SAT_ADD(lb, DEBUG_BYTES));
#endif
#ifdef GC_ADD_CALLER
  if (NULL == s) {
    GC_caller_func_offset(ra, &s, &i);
  }
#endif
  return store_debug_info(base, lb, "GC_debug_malloc", OPT_RA s, i);
}

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_debug_malloc_ignore_off_page(size_t lb, GC_EXTRA_PARAMS)
{
  void *base = GC_malloc_ignore_off_page(SIZET_SAT_ADD(lb, DEBUG_BYTES));

  return store_debug_info(base, lb, "GC_debug_malloc_ignore_off_page",
                          OPT_RA s, i);
}

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_debug_malloc_atomic_ignore_off_page(size_t lb, GC_EXTRA_PARAMS)
{
  void *base
      = GC_malloc_atomic_ignore_off_page(SIZET_SAT_ADD(lb, DEBUG_BYTES));

  return store_debug_info(base, lb, "GC_debug_malloc_atomic_ignore_off_page",
                          OPT_RA s, i);
}

STATIC void *
GC_debug_generic_malloc(size_t lb, int kind, GC_EXTRA_PARAMS)
{
  void *base = GC_generic_malloc_aligned(SIZET_SAT_ADD(lb, DEBUG_BYTES), kind,
                                         0 /* `flags` */, 0 /* `align_m1` */);

  return store_debug_info(base, lb, "GC_debug_generic_malloc", OPT_RA s, i);
}

#ifdef DBG_HDRS_ALL
GC_INNER void *
GC_debug_generic_malloc_inner(size_t lb, int kind, unsigned flags)
{
  void *base, *result;

  GC_ASSERT(I_HOLD_LOCK());
  base = GC_generic_malloc_inner(SIZET_SAT_ADD(lb, DEBUG_BYTES), kind, flags);
  if (NULL == base) {
    GC_err_printf("GC internal allocation (%lu bytes) returning NULL\n",
                  (unsigned long)lb);
    return NULL;
  }
  if (!GC_debugging_initialized)
    GC_start_debugging_inner();
  result = GC_store_debug_info_inner(base, lb, "INTERNAL", 0);
  ADD_CALL_CHAIN_INNER(base);
  return result;
}
#endif /* DBG_HDRS_ALL */

#ifndef CPPCHECK
GC_API void *GC_CALL
GC_debug_malloc_stubborn(size_t lb, GC_EXTRA_PARAMS)
{
  return GC_debug_malloc(lb, OPT_RA s, i);
}

GC_API void GC_CALL
GC_debug_change_stubborn(const void *p)
{
  UNUSED_ARG(p);
}
#endif /* !CPPCHECK */

GC_API void GC_CALL
GC_debug_end_stubborn_change(const void *p)
{
  const void *q = GC_base_C(p);

  if (NULL == q) {
    ABORT_ARG1("GC_debug_end_stubborn_change: bad arg", ": %p", p);
  }
  GC_end_stubborn_change(q);
}

GC_API void GC_CALL
GC_debug_ptr_store_and_dirty(void *p, const void *q)
{
  *(void **)GC_is_visible(p)
      = GC_is_valid_displacement(GC_CAST_AWAY_CONST_PVOID(q));
  GC_debug_end_stubborn_change(p);
  REACHABLE_AFTER_DIRTY(q);
}

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_debug_malloc_atomic(size_t lb, GC_EXTRA_PARAMS)
{
  void *base = GC_malloc_atomic(SIZET_SAT_ADD(lb, DEBUG_BYTES));

  return store_debug_info(base, lb, "GC_debug_malloc_atomic", OPT_RA s, i);
}

GC_API GC_ATTR_MALLOC char *GC_CALL
GC_debug_strdup(const char *str, GC_EXTRA_PARAMS)
{
  char *copy;
  size_t lb;
  if (str == NULL) {
    if (GC_find_leak_inner)
      GC_err_printf("strdup(NULL) behavior is undefined\n");
    return NULL;
  }

  lb = strlen(str) + 1;
  copy = (char *)GC_debug_malloc_atomic(lb, OPT_RA s, i);
  if (copy == NULL) {
#ifndef MSWINCE
    errno = ENOMEM;
#endif
    return NULL;
  }
  BCOPY(str, copy, lb);
  return copy;
}

GC_API GC_ATTR_MALLOC char *GC_CALL
GC_debug_strndup(const char *str, size_t size, GC_EXTRA_PARAMS)
{
  char *copy;
  /* `str` is expected to be non-`NULL`. */
  size_t len = strlen(str);

  if (len > size)
    len = size;
  copy = (char *)GC_debug_malloc_atomic(len + 1, OPT_RA s, i);
  if (copy == NULL) {
#ifndef MSWINCE
    errno = ENOMEM;
#endif
    return NULL;
  }
  if (len > 0)
    BCOPY(str, copy, len);
  copy[len] = '\0';
  return copy;
}

#ifdef GC_REQUIRE_WCSDUP
#  include <wchar.h> /*< for `wcslen()` */

GC_API GC_ATTR_MALLOC wchar_t *GC_CALL
GC_debug_wcsdup(const wchar_t *str, GC_EXTRA_PARAMS)
{
  size_t lb = (wcslen(str) + 1) * sizeof(wchar_t);
  wchar_t *copy = (wchar_t *)GC_debug_malloc_atomic(lb, OPT_RA s, i);
  if (copy == NULL) {
#  ifndef MSWINCE
    errno = ENOMEM;
#  endif
    return NULL;
  }
  BCOPY(str, copy, lb);
  return copy;
}
#endif /* GC_REQUIRE_WCSDUP */

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_debug_malloc_uncollectable(size_t lb, GC_EXTRA_PARAMS)
{
  void *base
      = GC_malloc_uncollectable(SIZET_SAT_ADD(lb, UNCOLLECTABLE_DEBUG_BYTES));

  return store_debug_info(base, lb, "GC_debug_malloc_uncollectable", OPT_RA s,
                          i);
}

#ifdef GC_ATOMIC_UNCOLLECTABLE
GC_API GC_ATTR_MALLOC void *GC_CALL
GC_debug_malloc_atomic_uncollectable(size_t lb, GC_EXTRA_PARAMS)
{
  void *base = GC_malloc_atomic_uncollectable(
      SIZET_SAT_ADD(lb, UNCOLLECTABLE_DEBUG_BYTES));

  return store_debug_info(base, lb, "GC_debug_malloc_atomic_uncollectable",
                          OPT_RA s, i);
}
#endif /* GC_ATOMIC_UNCOLLECTABLE */

#ifdef LINT2
/*
 * Copyright (c) 1996-1998 by Silicon Graphics.  All rights reserved.
 * Copyright (c) 2018-2025 Ivan Maidanski
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */

/* This file is kept for a binary compatibility purpose only. */

#ifndef GC_ALLOC_PTRS_H
#define GC_ALLOC_PTRS_H




#ifdef __cplusplus
extern "C" {
#endif

#ifndef GC_API_PRIV
#  define GC_API_PRIV GC_API
#endif

/*
 * Some compilers do not accept `const` together with the `dllimport`
 * attribute, so the symbols below are exported as non-constant ones.
 */
#ifndef GC_APIVAR_CONST
#  if defined(GC_BUILD) || !defined(GC_DLL)
#    define GC_APIVAR_CONST const
#  else
#    define GC_APIVAR_CONST /*< empty */
#  endif
#endif

GC_API_PRIV void **GC_APIVAR_CONST GC_objfreelist_ptr;
GC_API_PRIV void **GC_APIVAR_CONST GC_aobjfreelist_ptr;
GC_API_PRIV void **GC_APIVAR_CONST GC_uobjfreelist_ptr;

#ifdef GC_ATOMIC_UNCOLLECTABLE
GC_API_PRIV void **GC_APIVAR_CONST GC_auobjfreelist_ptr;
#endif

/*
 * Manually update the number of bytes allocated during the current
 * collection cycle and the number of explicitly deallocated bytes of
 * memory since the last collection, respectively.  Both functions are
 * unsynchronized, `GC_call_with_alloc_lock()` should be used to avoid
 * data race.
 */
GC_API_PRIV void GC_CALL GC_incr_bytes_allocd(size_t /* `bytes` */);
GC_API_PRIV void GC_CALL GC_incr_bytes_freed(size_t /* `bytes` */);

#ifdef __cplusplus
} /* extern "C" */
#endif

#endif /* GC_ALLOC_PTRS_H */

#endif

GC_API void GC_CALL
GC_debug_free(void *p)
{
  ptr_t base;
  if (0 == p)
    return;

  base = (ptr_t)GC_base(p);
  if (NULL == base) {
#if defined(REDIRECT_MALLOC)                                           \
    && ((defined(NEED_CALLINFO) && defined(GC_HAVE_BUILTIN_BACKTRACE)) \
        || defined(REDIR_MALLOC_AND_LINUXTHREADS)                      \
        || (defined(SOLARIS) && defined(THREADS)) || defined(MSWIN32))
    /*
     * In some cases, we should ignore objects that do not belong to
     * the collector heap.  See the comment in `GC_free()`.
     */
    if (!GC_is_heap_ptr(p))
      return;
#endif
    ABORT_ARG1("Invalid pointer passed to free()", ": %p", p);
  }
  if ((word)((ptr_t)p - base) != sizeof(oh)) {
#if defined(REDIRECT_FREE) && defined(USE_PROC_FOR_LIBRARIES)
    /*
     * TODO: Suppress the warning if `free()` caller is in `libpthread`
     * or `libdl`.
     */
#endif
    /*
     * TODO: Suppress the warning for objects allocated by `GC_memalign`
     * and friends (these ones do not have the debugging counterpart).
     */
    GC_err_printf("GC_debug_free called on pointer %p w/o debugging info\n",
                  p);
  } else {
#ifndef SHORT_DBG_HDRS
    ptr_t clobbered = GC_check_annotated_obj((oh *)base);
    size_t sz = GC_size(base);

    if (clobbered != NULL) {
      /* No "release" barrier is needed. */
      GC_SET_HAVE_ERRORS();
      if (((oh *)base)->oh_sz == (GC_uintptr_t)sz) {
        GC_print_smashed_obj(
            "GC_debug_free: found previously deallocated (?) object at", p,
            clobbered);
        /* Ignore double free. */
        return;
      } else {
        GC_print_smashed_obj("GC_debug_free: found smashed location at", p,
                             clobbered);
      }
    }
    /* Invalidate the size (mark the object as deallocated). */
    ((oh *)base)->oh_sz = (GC_uintptr_t)sz;
#endif /* !SHORT_DBG_HDRS */
  }
#ifndef NO_FIND_LEAK
  if (GC_find_leak_inner
#  ifndef SHORT_DBG_HDRS
      && ((word)((ptr_t)p - base) != sizeof(oh) || !GC_findleak_delay_free)
#  endif
  ) {
    GC_free(base);
  } else
#endif
  /* else */ {
    const hdr *hhdr = HDR(p);

    if (hhdr->hb_obj_kind == UNCOLLECTABLE
#ifdef GC_ATOMIC_UNCOLLECTABLE
        || hhdr->hb_obj_kind == AUNCOLLECTABLE
#endif
    ) {
      GC_free(base);
    } else {
      size_t sz = hhdr->hb_sz;
      size_t i;
      size_t lpw = BYTES_TO_PTRS(sz - sizeof(oh));

      for (i = 0; i < lpw; ++i)
        ((GC_uintptr_t *)p)[i] = GC_FREED_MEM_MARKER;
      GC_ASSERT((GC_uintptr_t *)p + i == (GC_uintptr_t *)(base + sz));
      /*
       * Update the counter even though the real deallocation
       * is deferred.
       */
      LOCK();
#ifdef LINT2
      GC_incr_bytes_freed(sz);
#else
      GC_bytes_freed += sz;
#endif
      UNLOCK();
    }
  }
}

#if defined(THREADS) && defined(DBG_HDRS_ALL)
GC_INNER void
GC_debug_free_inner(void *p)
{
  ptr_t base = (ptr_t)GC_base(p);

  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT((word)((ptr_t)p - base) == sizeof(oh));
#  ifdef LINT2
  if (!base)
    ABORT("Invalid GC_debug_free_inner argument");
#  endif
#  ifndef SHORT_DBG_HDRS
  /* Invalidate the size. */
  ((oh *)base)->oh_sz = (GC_uintptr_t)GC_size(base);
#  endif
  GC_free_inner(base);
}
#endif

GC_API void *GC_CALL
GC_debug_realloc(void *p, size_t lb, GC_EXTRA_PARAMS)
{
  ptr_t base;
  void *result;
  const hdr *hhdr;

  if (NULL == p) {
    return GC_debug_malloc(lb, OPT_RA s, i);
  }
  if (0 == lb) /* `&& p != NULL` */ {
    GC_debug_free(p);
    return NULL;
  }

#ifdef GC_ADD_CALLER
  if (NULL == s) {
    GC_caller_func_offset(ra, &s, &i);
  }
#endif
  base = (ptr_t)GC_base(p);
  if (NULL == base) {
    ABORT_ARG1("Invalid pointer passed to realloc()", ": %p", p);
  }
  if ((word)((ptr_t)p - base) != sizeof(oh)) {
    GC_err_printf("GC_debug_realloc called on pointer %p w/o debugging info\n",
                  p);
    return GC_realloc(p, lb);
  }
  hhdr = HDR(base);
  result
      = GC_debug_generic_or_special_malloc(lb, hhdr->hb_obj_kind, OPT_RA s, i);
  if (result != NULL) {
    size_t old_sz;
#ifdef SHORT_DBG_HDRS
    old_sz = GC_size(base) - sizeof(oh);
#else
    old_sz = (size_t)(((oh *)base)->oh_sz);
#endif
    if (old_sz > 0)
      BCOPY(p, result, old_sz < lb ? old_sz : lb);
    GC_debug_free(p);
  }
  return result;
}

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_debug_generic_or_special_malloc(size_t lb, int kind, GC_EXTRA_PARAMS)
{
  switch (kind) {
  case PTRFREE:
    return GC_debug_malloc_atomic(lb, OPT_RA s, i);
  case NORMAL:
    return GC_debug_malloc(lb, OPT_RA s, i);
  case UNCOLLECTABLE:
    return GC_debug_malloc_uncollectable(lb, OPT_RA s, i);
#ifdef GC_ATOMIC_UNCOLLECTABLE
  case AUNCOLLECTABLE:
    return GC_debug_malloc_atomic_uncollectable(lb, OPT_RA s, i);
#endif
  default:
    return GC_debug_generic_malloc(lb, kind, OPT_RA s, i);
  }
}

#ifndef SHORT_DBG_HDRS

/*
 * Check all marked objects in the given block for validity.
 * Note: avoid `GC_apply_to_each_object` for performance reasons.
 */
STATIC void GC_CALLBACK
GC_check_heap_block(struct hblk *hbp, void *dummy)
{
  const hdr *hhdr = HDR(hbp);
  ptr_t p = hbp->hb_body;
  ptr_t plim;
  size_t sz = hhdr->hb_sz;
  size_t bit_no;

  UNUSED_ARG(dummy);
  GC_ASSERT((ptr_t)hhdr->hb_block == p);
  plim = sz > MAXOBJBYTES ? p : p + HBLKSIZE - sz;
  /* Go through all objects in block. */
  for (bit_no = 0; ADDR_GE(plim, p); bit_no += MARK_BIT_OFFSET(sz), p += sz) {
    if (mark_bit_from_hdr(hhdr, bit_no) && GC_HAS_DEBUG_INFO(p)) {
      ptr_t clobbered = GC_check_annotated_obj((oh *)p);

      if (clobbered != NULL)
        GC_add_smashed(clobbered);
    }
  }
}

/*
 * This assumes that all accessible objects are marked.
 * Normally called by collector.
 */
STATIC void
GC_check_heap_proc(void)
{
  GC_ASSERT(I_HOLD_LOCK());
  GC_STATIC_ASSERT((sizeof(oh) & (GC_GRANULE_BYTES - 1)) == 0);
  /* FIXME: Should we check for twice that alignment? */
  GC_apply_to_all_blocks(GC_check_heap_block, NULL);
}

#endif /* !SHORT_DBG_HDRS */

#ifndef GC_NO_FINALIZATION

struct closure {
  GC_finalization_proc cl_fn;
  void *cl_data;
};

STATIC void *
GC_make_closure(GC_finalization_proc fn, void *data)
{
  struct closure *result =
#  ifdef DBG_HDRS_ALL
      (struct closure *)GC_debug_malloc(sizeof(struct closure), GC_EXTRAS);
#  else
      (struct closure *)GC_malloc(sizeof(struct closure));
#  endif
  if (result != NULL) {
    result->cl_fn = fn;
    result->cl_data = data;
  }
  return result;
}

/*
 * An auxiliary function to make finalization work correctly with
 * displaced pointers introduced by the debugging allocators.
 */
STATIC void GC_CALLBACK
GC_debug_invoke_finalizer(void *obj, void *data)
{
  struct closure *cl = (struct closure *)data;

  cl->cl_fn((ptr_t)obj + sizeof(oh), cl->cl_data);
}

/* Special `finalizer_proc` value to detect `GC_register_finalizer` failure. */
#  define OFN_UNSET ((GC_finalization_proc)(~(GC_funcptr_uint)0))

/* Set `ofn` and `ocd` to reflect the values we got back. */
static void
store_old(void *obj, GC_finalization_proc my_old_fn, struct closure *my_old_cd,
          GC_finalization_proc *ofn, void **ocd)
{
  if (my_old_fn != 0) {
    if (my_old_fn == OFN_UNSET) {
      /* `GC_register_finalizer()` failed; `*ofn` and `*ocd` are unchanged. */
      return;
    }
    if (my_old_fn != GC_debug_invoke_finalizer) {
      GC_err_printf("Debuggable object at %p had a non-debug finalizer\n",
                    obj);
      /* This should probably be fatal. */
    } else {
      if (ofn)
        *ofn = my_old_cd->cl_fn;
      if (ocd)
        *ocd = my_old_cd->cl_data;
    }
  } else {
    if (ofn)
      *ofn = 0;
    if (ocd)
      *ocd = NULL;
  }
}

GC_API void GC_CALL
GC_debug_register_finalizer(void *obj, GC_finalization_proc fn, void *cd,
                            GC_finalization_proc *ofn, void **ocd)
{
  GC_finalization_proc my_old_fn = OFN_UNSET;
  void *my_old_cd = NULL; /*< to avoid "might be uninitialized" warning */
  ptr_t base = (ptr_t)GC_base(obj);

  if (NULL == base) {
    /* We will not collect it, hence finalizer would not be run. */
    if (ocd)
      *ocd = NULL;
    if (ofn)
      *ofn = 0;
    return;
  }
  if ((ptr_t)obj - base != sizeof(oh)) {
    GC_err_printf("GC_debug_register_finalizer called with"
                  " non-base-pointer %p\n",
                  obj);
  }
  if (0 == fn) {
    GC_register_finalizer(base, 0, NULL, &my_old_fn, &my_old_cd);
  } else {
    cd = GC_make_closure(fn, cd);
    if (NULL == cd) {
      /* Out of memory; `*ofn` and `*ocd` are unchanged. */
      return;
    }
    GC_register_finalizer(base, GC_debug_invoke_finalizer, cd, &my_old_fn,
                          &my_old_cd);
  }
  store_old(obj, my_old_fn, (struct closure *)my_old_cd, ofn, ocd);
}

GC_API void GC_CALL
GC_debug_register_finalizer_no_order(void *obj, GC_finalization_proc fn,
                                     void *cd, GC_finalization_proc *ofn,
                                     void **ocd)
{
  GC_finalization_proc my_old_fn = OFN_UNSET;
  void *my_old_cd = NULL;
  ptr_t base = (ptr_t)GC_base(obj);
  if (NULL == base) {
    if (ocd)
      *ocd = NULL;
    if (ofn)
      *ofn = 0;
    return;
  }
  if ((ptr_t)obj - base != sizeof(oh)) {
    GC_err_printf("GC_debug_register_finalizer_no_order called with"
                  " non-base-pointer %p\n",
                  obj);
  }
  if (0 == fn) {
    GC_register_finalizer_no_order(base, 0, NULL, &my_old_fn, &my_old_cd);
  } else {
    cd = GC_make_closure(fn, cd);
    if (NULL == cd) {
      /* Out of memory. */
      return;
    }
    GC_register_finalizer_no_order(base, GC_debug_invoke_finalizer, cd,
                                   &my_old_fn, &my_old_cd);
  }
  store_old(obj, my_old_fn, (struct closure *)my_old_cd, ofn, ocd);
}

GC_API void GC_CALL
GC_debug_register_finalizer_unreachable(void *obj, GC_finalization_proc fn,
                                        void *cd, GC_finalization_proc *ofn,
                                        void **ocd)
{
  GC_finalization_proc my_old_fn = OFN_UNSET;
  void *my_old_cd = NULL;
  ptr_t base = (ptr_t)GC_base(obj);
  if (NULL == base) {
    if (ocd)
      *ocd = NULL;
    if (ofn)
      *ofn = 0;
    return;
  }
  if ((ptr_t)obj - base != sizeof(oh)) {
    GC_err_printf("GC_debug_register_finalizer_unreachable called with"
                  " non-base-pointer %p\n",
                  obj);
  }
  if (0 == fn) {
    GC_register_finalizer_unreachable(base, 0, NULL, &my_old_fn, &my_old_cd);
  } else {
    cd = GC_make_closure(fn, cd);
    if (NULL == cd) {
      /* Out of memory. */
      return;
    }
    GC_register_finalizer_unreachable(base, GC_debug_invoke_finalizer, cd,
                                      &my_old_fn, &my_old_cd);
  }
  store_old(obj, my_old_fn, (struct closure *)my_old_cd, ofn, ocd);
}

GC_API void GC_CALL
GC_debug_register_finalizer_ignore_self(void *obj, GC_finalization_proc fn,
                                        void *cd, GC_finalization_proc *ofn,
                                        void **ocd)
{
  GC_finalization_proc my_old_fn = OFN_UNSET;
  void *my_old_cd = NULL;
  ptr_t base = (ptr_t)GC_base(obj);
  if (NULL == base) {
    if (ocd)
      *ocd = NULL;
    if (ofn)
      *ofn = 0;
    return;
  }
  if ((ptr_t)obj - base != sizeof(oh)) {
    GC_err_printf("GC_debug_register_finalizer_ignore_self called with"
                  " non-base-pointer %p\n",
                  obj);
  }
  if (0 == fn) {
    GC_register_finalizer_ignore_self(base, 0, NULL, &my_old_fn, &my_old_cd);
  } else {
    cd = GC_make_closure(fn, cd);
    if (NULL == cd) {
      /* Out of memory. */
      return;
    }
    GC_register_finalizer_ignore_self(base, GC_debug_invoke_finalizer, cd,
                                      &my_old_fn, &my_old_cd);
  }
  store_old(obj, my_old_fn, (struct closure *)my_old_cd, ofn, ocd);
}

#  ifndef GC_TOGGLE_REFS_NOT_NEEDED
GC_API int GC_CALL
GC_debug_toggleref_add(void *obj, int is_strong_ref)
{
  ptr_t base = (ptr_t)GC_base(obj);

  if ((ptr_t)obj - base != sizeof(oh)) {
    GC_err_printf("GC_debug_toggleref_add called with"
                  " non-base-pointer %p\n",
                  obj);
  }
  return GC_toggleref_add(base, is_strong_ref);
}
#  endif /* !GC_TOGGLE_REFS_NOT_NEEDED */

#endif /* !GC_NO_FINALIZATION */

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_debug_malloc_replacement(size_t lb)
{
  return GC_debug_malloc(lb, GC_DBG_EXTRAS);
}

GC_API void *GC_CALL
GC_debug_realloc_replacement(void *p, size_t lb)
{
  return GC_debug_realloc(p, lb, GC_DBG_EXTRAS);
}

#ifdef GC_GCJ_SUPPORT



GC_API GC_ATTR_MALLOC void *GC_CALL
GC_debug_gcj_malloc(size_t lb, const void *vtable_ptr, GC_EXTRA_PARAMS)
{
  void *base, *result;

  /* We are careful to avoid extra calls those could confuse the backtrace. */
  LOCK();
  /* A mechanism to invoke finalizers (same as in `GC_core_gcj_malloc`). */
  if (GC_gc_no != GC_last_finalized_no) {
    UNLOCK();
    GC_notify_or_invoke_finalizers();
    LOCK();
    GC_last_finalized_no = GC_gc_no;
  }

  base = GC_generic_malloc_inner(SIZET_SAT_ADD(lb, DEBUG_BYTES),
                                 GC_gcj_debug_kind, 0 /* `flags` */);
  if (NULL == base) {
    GC_oom_func oom_fn = GC_oom_fn;
    UNLOCK();
    GC_err_printf("GC_debug_gcj_malloc(%lu, %p) returning NULL (%s:%d)\n",
                  (unsigned long)lb, vtable_ptr, s, i);
    return (*oom_fn)(lb);
  }
  *((const void **)((ptr_t)base + sizeof(oh))) = vtable_ptr;
  if (!GC_debugging_initialized)
    GC_start_debugging_inner();
  result = GC_store_debug_info_inner(base, lb, s, i);
  ADD_CALL_CHAIN(base, ra);
  UNLOCK();
  GC_dirty(result);
  REACHABLE_AFTER_DIRTY(vtable_ptr);
  return result;
}
#endif /* GC_GCJ_SUPPORT */

/*
 * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
 * Copyright (c) 1991-1996 by Xerox Corporation.  All rights reserved.
 * Copyright (c) 1996-1999 by Silicon Graphics.  All rights reserved.
 * Copyright (c) 2007 Free Software Foundation, Inc.
 * Copyright (c) 2008-2025 Ivan Maidanski
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */


#ifndef GC_NO_FINALIZATION
#  include "gc/javaxfc.h" /*< to get `GC_finalize_all()` as `extern "C"` */

/*
 * Type of mark procedure used for marking from finalizable object.
 * This procedure normally does not mark the object, only its descendants.
 */
typedef void (*finalization_mark_proc)(ptr_t /* `finalizable_obj_ptr` */);

#  define HASH3(addr, size, log_size)                               \
    ((size_t)((ADDR(addr) >> 3) ^ (ADDR(addr) >> (3 + (log_size)))) \
     & ((size) - (size_t)1))
#  define HASH2(addr, log_size) HASH3(addr, (size_t)1 << (log_size), log_size)

struct hash_chain_entry {
  GC_hidden_pointer hidden_key;
  struct hash_chain_entry *next;
};

struct disappearing_link {
  struct hash_chain_entry prolog;
#  define dl_hidden_link prolog.hidden_key /*< field to be cleared */
#  define dl_next(x) (struct disappearing_link *)((x)->prolog.next)
#  define dl_set_next(x, y) \
    (void)((x)->prolog.next = (struct hash_chain_entry *)(y))
  GC_hidden_pointer dl_hidden_obj; /*< pointer to object base */
};

struct finalizable_object {
  struct hash_chain_entry prolog;
  /*
   * Pointer to object base.  No longer hidden once object is on
   * `finalize_now` queue.
   */
#  define fo_hidden_base prolog.hidden_key
#  define fo_next(x) (struct finalizable_object *)((x)->prolog.next)
#  define fo_set_next(x, y) ((x)->prolog.next = (struct hash_chain_entry *)(y))
  GC_finalization_proc fo_fn;          /*< the finalizer */
  finalization_mark_proc fo_mark_proc; /*< mark-through procedure */
  ptr_t fo_client_data;
  size_t fo_object_sz; /*< in bytes */
};

#  ifdef AO_HAVE_store
/*
 * Update `finalize_now` atomically as `GC_should_invoke_finalizers`
 * does not acquire the allocator lock.
 */
#    define SET_FINALIZE_NOW(fo) \
      GC_cptr_store((volatile ptr_t *)&GC_fnlz_roots.finalize_now, (ptr_t)(fo))
#  else
#    define SET_FINALIZE_NOW(fo) (void)(GC_fnlz_roots.finalize_now = (fo))
#  endif /* !THREADS */

GC_API void GC_CALL
GC_push_finalizer_structures(void)
{
  GC_ASSERT(ADDR(&GC_dl_hashtbl.head) % ALIGNMENT == 0);
  GC_ASSERT(ADDR(&GC_fnlz_roots) % ALIGNMENT == 0);
#  ifndef GC_LONG_REFS_NOT_NEEDED
  GC_ASSERT(ADDR(&GC_ll_hashtbl.head) % ALIGNMENT == 0);
  GC_PUSH_ALL_SYM(GC_ll_hashtbl.head);
#  endif
  GC_PUSH_ALL_SYM(GC_dl_hashtbl.head);
  GC_PUSH_ALL_SYM(GC_fnlz_roots);
  /* `GC_toggleref_arr` is pushed specially by `GC_mark_togglerefs`. */
}

/*
 * Threshold of `log_size` to initiate full collection before growing
 * a hash table.
 */
#  ifndef GC_ON_GROW_LOG_SIZE_MIN
#    define GC_ON_GROW_LOG_SIZE_MIN LOG_HBLKSIZE
#  endif

/*
 * Ensure the hash table has enough capacity.  `*table_ptr` is a pointer
 * to an array of hash headers.  `*log_size_ptr` is the log of its current
 * size.  We update both `*table_ptr` and `*log_size_ptr` on success.
 */
STATIC void
GC_grow_table(struct hash_chain_entry ***table_ptr, unsigned *log_size_ptr,
              const size_t *entries_ptr)
{
  size_t i;
  struct hash_chain_entry *p;
  unsigned log_old_size = *log_size_ptr;
  unsigned log_new_size = log_old_size + 1;
  size_t old_size = NULL == *table_ptr ? 0 : (size_t)1 << log_old_size;
  size_t new_size = (size_t)1 << log_new_size;
  /* FIXME: Power-of-two size often gets rounded up to one more page. */
  struct hash_chain_entry **new_table;

  GC_ASSERT(I_HOLD_LOCK());
  /*
   * Avoid growing the table in case of at least 25% of entries can
   * be deleted by enforcing a collection.  Ignored for small tables.
   * In the incremental mode we skip this optimization, as we want to
   * avoid triggering a full collection whenever possible.
   */
  if (log_old_size >= (unsigned)GC_ON_GROW_LOG_SIZE_MIN && !GC_incremental) {
    IF_CANCEL(int cancel_state;)

    DISABLE_CANCEL(cancel_state);
    GC_gcollect_inner();
    RESTORE_CANCEL(cancel_state);
    /* `GC_finalize` might decrease entries value. */
    if (*entries_ptr < ((size_t)1 << log_old_size) - (*entries_ptr >> 2))
      return;
  }

  new_table = (struct hash_chain_entry **)GC_INTERNAL_MALLOC_IGNORE_OFF_PAGE(
      new_size * sizeof(struct hash_chain_entry *), NORMAL);
  if (NULL == new_table) {
    if (NULL == *table_ptr) {
      ABORT("Insufficient space for initial table allocation");
    } else {
      return;
    }
  }
  for (i = 0; i < old_size; i++) {
    for (p = (*table_ptr)[i]; p != NULL;) {
      ptr_t real_key = (ptr_t)GC_REVEAL_POINTER(p->hidden_key);
      struct hash_chain_entry *next = p->next;
      size_t new_hash = HASH3(real_key, new_size, log_new_size);

      p->next = new_table[new_hash];
      GC_dirty(p);
      new_table[new_hash] = p;
      p = next;
    }
  }
  *log_size_ptr = log_new_size;
  *table_ptr = new_table;
  GC_dirty(new_table); /*< entire object */
}

GC_API int GC_CALL
GC_register_disappearing_link(void **link)
{
  ptr_t base;

  base = (ptr_t)GC_base(link);
  if (base == 0)
    ABORT("Bad arg to GC_register_disappearing_link");
  return GC_general_register_disappearing_link(link, base);
}

STATIC int
GC_register_disappearing_link_inner(struct dl_hashtbl_s *dl_hashtbl,
                                    void **link, const void *obj,
                                    const char *tbl_log_name)
{
  struct disappearing_link *curr_dl;
  size_t index;
  struct disappearing_link *new_dl;

  GC_ASSERT(GC_is_initialized);
  if (UNLIKELY(GC_find_leak_inner))
    return GC_UNIMPLEMENTED;
#  ifdef GC_ASSERTIONS
  /* Just check accessibility. */
  GC_noop1_ptr(*link);
#  endif
  LOCK();
  GC_ASSERT(obj != NULL && GC_base_C(obj) == obj);
  if (UNLIKELY(NULL == dl_hashtbl->head)
      || UNLIKELY(dl_hashtbl->entries > ((size_t)1 << dl_hashtbl->log_size))) {
    GC_grow_table((struct hash_chain_entry ***)&dl_hashtbl->head,
                  &dl_hashtbl->log_size, &dl_hashtbl->entries);
    GC_COND_LOG_PRINTF("Grew %s table to %u entries\n", tbl_log_name,
                       1U << dl_hashtbl->log_size);
  }
  index = HASH2(link, dl_hashtbl->log_size);
  for (curr_dl = dl_hashtbl->head[index]; curr_dl != 0;
       curr_dl = dl_next(curr_dl)) {
    if (curr_dl->dl_hidden_link == GC_HIDE_POINTER(link)) {
      /* Alternatively, `GC_HIDE_NZ_POINTER()` could be used instead. */
      curr_dl->dl_hidden_obj = GC_HIDE_POINTER(obj);
      UNLOCK();
      return GC_DUPLICATE;
    }
  }
  new_dl = (struct disappearing_link *)GC_INTERNAL_MALLOC(
      sizeof(struct disappearing_link), NORMAL);
  if (UNLIKELY(NULL == new_dl)) {
    GC_oom_func oom_fn = GC_oom_fn;
    UNLOCK();
    new_dl = (struct disappearing_link *)(*oom_fn)(
        sizeof(struct disappearing_link));
    if (0 == new_dl) {
      return GC_NO_MEMORY;
    }
    /* It is not likely we will make it here, but... */
    LOCK();
    /* Recalculate `index` since the table may grow. */
    index = HASH2(link, dl_hashtbl->log_size);
    /* Check again that our disappearing link not in the table. */
    for (curr_dl = dl_hashtbl->head[index]; curr_dl != 0;
         curr_dl = dl_next(curr_dl)) {
      if (curr_dl->dl_hidden_link == GC_HIDE_POINTER(link)) {
        curr_dl->dl_hidden_obj = GC_HIDE_POINTER(obj);
        UNLOCK();
#  ifndef DBG_HDRS_ALL
        /* Free unused `new_dl` returned by `GC_oom_fn()`. */
        GC_free(new_dl);
#  endif
        return GC_DUPLICATE;
      }
    }
  }
  new_dl->dl_hidden_obj = GC_HIDE_POINTER(obj);
  new_dl->dl_hidden_link = GC_HIDE_POINTER(link);
  dl_set_next(new_dl, dl_hashtbl->head[index]);
  GC_dirty(new_dl);
  dl_hashtbl->head[index] = new_dl;
  dl_hashtbl->entries++;
  GC_dirty(dl_hashtbl->head + index);
  UNLOCK();
  return GC_SUCCESS;
}

GC_API int GC_CALL
GC_general_register_disappearing_link(void **link, const void *obj)
{
  if ((ADDR(link) & (ALIGNMENT - 1)) != 0 || !NONNULL_ARG_NOT_NULL(link))
    ABORT("Bad arg to GC_general_register_disappearing_link");
  return GC_register_disappearing_link_inner(&GC_dl_hashtbl, link, obj, "dl");
}

#  ifdef DBG_HDRS_ALL
#    define FREE_DL_ENTRY(curr_dl) dl_set_next(curr_dl, NULL)
#  else
#    define FREE_DL_ENTRY(curr_dl) GC_free(curr_dl)
#  endif

/* Unregisters given `link` and returns the link entry to free. */
GC_INLINE struct disappearing_link *
GC_unregister_disappearing_link_inner(struct dl_hashtbl_s *dl_hashtbl,
                                      void **link)
{
  struct disappearing_link *curr_dl;
  struct disappearing_link *prev_dl = NULL;
  size_t index;

  GC_ASSERT(I_HOLD_LOCK());
  if (UNLIKELY(NULL == dl_hashtbl->head))
    return NULL;

  index = HASH2(link, dl_hashtbl->log_size);
  for (curr_dl = dl_hashtbl->head[index]; curr_dl;
       curr_dl = dl_next(curr_dl)) {
    if (curr_dl->dl_hidden_link == GC_HIDE_POINTER(link)) {
      /* Remove found entry from the table. */
      if (NULL == prev_dl) {
        dl_hashtbl->head[index] = dl_next(curr_dl);
        GC_dirty(dl_hashtbl->head + index);
      } else {
        dl_set_next(prev_dl, dl_next(curr_dl));
        GC_dirty(prev_dl);
      }
      dl_hashtbl->entries--;
      break;
    }
    prev_dl = curr_dl;
  }
  return curr_dl;
}

GC_API int GC_CALL
GC_unregister_disappearing_link(void **link)
{
  struct disappearing_link *curr_dl;

  if ((ADDR(link) & (ALIGNMENT - 1)) != 0) {
    /* Nothing to do. */
    return 0;
  }

  LOCK();
  curr_dl = GC_unregister_disappearing_link_inner(&GC_dl_hashtbl, link);
  UNLOCK();
  if (NULL == curr_dl)
    return 0;
  FREE_DL_ENTRY(curr_dl);
  return 1;
}

/*
 * Mark from one finalizable object using the specified mark procedure.
 * May not mark the object pointed to by `real_ptr` (i.e, it is the job
 * of the caller, if appropriate).  Note that this is called with the
 * mutator running.  This is safe only if the mutator (client) gets
 * the allocator lock to reveal hidden pointers.
 */
GC_INLINE void
GC_mark_fo(ptr_t real_ptr, finalization_mark_proc fo_mark_proc)
{
  GC_ASSERT(I_HOLD_LOCK());
  fo_mark_proc(real_ptr);
  /* Process objects pushed by the mark procedure. */
  while (!GC_mark_stack_empty())
    MARK_FROM_MARK_STACK();
}

/* Complete a collection in progress, if any. */
GC_INLINE void
GC_complete_ongoing_collection(void)
{
  if (UNLIKELY(GC_collection_in_progress())) {
    while (!GC_mark_some(NULL)) {
      /* Empty. */
    }
  }
}

/* Toggle-refs support. */

#  ifndef GC_TOGGLE_REFS_NOT_NEEDED
typedef union toggle_ref_u GCToggleRef;

STATIC GC_toggleref_func GC_toggleref_callback = 0;

GC_INNER void
GC_process_togglerefs(void)
{
  size_t i;
  size_t new_size = 0;
  GC_bool needs_barrier = FALSE;

  GC_ASSERT(I_HOLD_LOCK());
  for (i = 0; i < GC_toggleref_array_size; ++i) {
    GCToggleRef *r = &GC_toggleref_arr[i];
    void *obj = r->strong_ref;

    if ((ADDR(obj) & 1) != 0) {
      obj = GC_REVEAL_POINTER(r->weak_ref);
      GC_ASSERT((ADDR(obj) & 1) == 0);
    }
    if (NULL == obj)
      continue;

    switch (GC_toggleref_callback(obj)) {
    case GC_TOGGLE_REF_DROP:
      break;
    case GC_TOGGLE_REF_STRONG:
      GC_toggleref_arr[new_size++].strong_ref = obj;
      needs_barrier = TRUE;
      break;
    case GC_TOGGLE_REF_WEAK:
      GC_toggleref_arr[new_size++].weak_ref = GC_HIDE_POINTER(obj);
      break;
    default:
      ABORT("Bad toggle-ref status returned by callback");
    }
  }

  if (new_size < GC_toggleref_array_size) {
    BZERO(&GC_toggleref_arr[new_size],
          (GC_toggleref_array_size - new_size) * sizeof(GCToggleRef));
    GC_toggleref_array_size = new_size;
  }
  if (needs_barrier)
    GC_dirty(GC_toggleref_arr); /*< entire object */
}

STATIC void GC_normal_finalize_mark_proc(ptr_t);

STATIC void
GC_mark_togglerefs(void)
{
  size_t i;

  GC_ASSERT(I_HOLD_LOCK());
  if (NULL == GC_toggleref_arr)
    return;

  GC_set_mark_bit(GC_toggleref_arr);
  for (i = 0; i < GC_toggleref_array_size; ++i) {
    void *obj = GC_toggleref_arr[i].strong_ref;
    if (obj != NULL && (ADDR(obj) & 1) == 0) {
      /* Push and mark the object. */
      GC_mark_fo((ptr_t)obj, GC_normal_finalize_mark_proc);
      GC_set_mark_bit(obj);
      GC_complete_ongoing_collection();
    }
  }
}

STATIC void
GC_clear_togglerefs(void)
{
  size_t i;

  GC_ASSERT(I_HOLD_LOCK());
  for (i = 0; i < GC_toggleref_array_size; ++i) {
    GCToggleRef *r = &GC_toggleref_arr[i];

    if ((ADDR(r->strong_ref) & 1) != 0) {
      if (!GC_is_marked(GC_REVEAL_POINTER(r->weak_ref))) {
        r->weak_ref = 0;
      } else {
        /* No need to copy, this garbage collector is a non-moving one. */
      }
    }
  }
}

GC_API void GC_CALL
GC_set_toggleref_func(GC_toggleref_func fn)
{
  LOCK();
  GC_toggleref_callback = fn;
  UNLOCK();
}

GC_API GC_toggleref_func GC_CALL
GC_get_toggleref_func(void)
{
  GC_toggleref_func fn;

  READER_LOCK();
  fn = GC_toggleref_callback;
  READER_UNLOCK();
  return fn;
}

static GC_bool
ensure_toggleref_capacity(size_t capacity_inc)
{
  GC_ASSERT(I_HOLD_LOCK());
  if (NULL == GC_toggleref_arr) {
    /* Set the initial capacity. */
    GC_toggleref_array_capacity = 32;

    GC_toggleref_arr = (GCToggleRef *)GC_INTERNAL_MALLOC_IGNORE_OFF_PAGE(
        GC_toggleref_array_capacity * sizeof(GCToggleRef), NORMAL);
    if (NULL == GC_toggleref_arr)
      return FALSE;
  }
  if (GC_toggleref_array_size + capacity_inc >= GC_toggleref_array_capacity) {
    GCToggleRef *new_array;
    while (GC_toggleref_array_capacity
           < GC_toggleref_array_size + capacity_inc) {
      GC_toggleref_array_capacity *= 2;
      if ((GC_toggleref_array_capacity
           & ((size_t)1 << (sizeof(size_t) * 8 - 1)))
          != 0) {
        /* An overflow. */
        return FALSE;
      }
    }

    new_array = (GCToggleRef *)GC_INTERNAL_MALLOC_IGNORE_OFF_PAGE(
        GC_toggleref_array_capacity * sizeof(GCToggleRef), NORMAL);
    if (UNLIKELY(NULL == new_array))
      return FALSE;
    if (LIKELY(GC_toggleref_array_size > 0))
      BCOPY(GC_toggleref_arr, new_array,
            GC_toggleref_array_size * sizeof(GCToggleRef));
    GC_INTERNAL_FREE(GC_toggleref_arr);
    GC_toggleref_arr = new_array;
  }
  return TRUE;
}

GC_API int GC_CALL
GC_toggleref_add(void *obj, int is_strong_ref)
{
  int res = GC_SUCCESS;

  GC_ASSERT(NONNULL_ARG_NOT_NULL(obj));
  LOCK();
  GC_ASSERT((ADDR(obj) & 1) == 0 && obj == GC_base(obj));
  if (GC_toggleref_callback != 0) {
    if (!ensure_toggleref_capacity(1)) {
      res = GC_NO_MEMORY;
    } else {
      GCToggleRef *r = &GC_toggleref_arr[GC_toggleref_array_size];

      if (is_strong_ref) {
        r->strong_ref = obj;
        GC_dirty(GC_toggleref_arr + GC_toggleref_array_size);
      } else {
        r->weak_ref = GC_HIDE_POINTER(obj);
        GC_ASSERT((r->weak_ref & 1) != 0);
      }
      GC_toggleref_array_size++;
    }
  }
  UNLOCK();
  return res;
}
#  endif /* !GC_TOGGLE_REFS_NOT_NEEDED */

/* Finalizer callback support. */

STATIC GC_await_finalize_proc GC_object_finalized_proc = 0;

GC_API void GC_CALL
GC_set_await_finalize_proc(GC_await_finalize_proc fn)
{
  LOCK();
  GC_object_finalized_proc = fn;
  UNLOCK();
}

GC_API GC_await_finalize_proc GC_CALL
GC_get_await_finalize_proc(void)
{
  GC_await_finalize_proc fn;

  READER_LOCK();
  fn = GC_object_finalized_proc;
  READER_UNLOCK();
  return fn;
}

#  ifndef GC_LONG_REFS_NOT_NEEDED
GC_API int GC_CALL
GC_register_long_link(void **link, const void *obj)
{
  if ((ADDR(link) & (ALIGNMENT - 1)) != 0 || !NONNULL_ARG_NOT_NULL(link))
    ABORT("Bad arg to GC_register_long_link");
  return GC_register_disappearing_link_inner(&GC_ll_hashtbl, link, obj,
                                             "long dl");
}

GC_API int GC_CALL
GC_unregister_long_link(void **link)
{
  struct disappearing_link *curr_dl;

  if ((ADDR(link) & (ALIGNMENT - 1)) != 0) {
    /* Nothing to do. */
    return 0;
  }
  LOCK();
  curr_dl = GC_unregister_disappearing_link_inner(&GC_ll_hashtbl, link);
  UNLOCK();
  if (NULL == curr_dl)
    return 0;
  FREE_DL_ENTRY(curr_dl);
  return 1;
}
#  endif /* !GC_LONG_REFS_NOT_NEEDED */

#  ifndef GC_MOVE_DISAPPEARING_LINK_NOT_NEEDED
STATIC int
GC_move_disappearing_link_inner(struct dl_hashtbl_s *dl_hashtbl, void **link,
                                void **new_link)
{
  struct disappearing_link *curr_dl, *new_dl;
  struct disappearing_link *prev_dl = NULL;
  size_t curr_index, new_index;
  GC_hidden_pointer curr_hidden_link, new_hidden_link;

#    ifdef GC_ASSERTIONS
  GC_noop1_ptr(*new_link);
#    endif
  GC_ASSERT(I_HOLD_LOCK());
  if (UNLIKELY(NULL == dl_hashtbl->head))
    return GC_NOT_FOUND;

  /* Find current link. */
  curr_index = HASH2(link, dl_hashtbl->log_size);
  curr_hidden_link = GC_HIDE_POINTER(link);
  for (curr_dl = dl_hashtbl->head[curr_index]; curr_dl;
       curr_dl = dl_next(curr_dl)) {
    if (curr_dl->dl_hidden_link == curr_hidden_link)
      break;
    prev_dl = curr_dl;
  }
  if (UNLIKELY(NULL == curr_dl)) {
    return GC_NOT_FOUND;
  } else if (link == new_link) {
    /* Nothing to do. */
    return GC_SUCCESS;
  }

  /* `link` is found; now check `new_link` is not present. */
  new_index = HASH2(new_link, dl_hashtbl->log_size);
  new_hidden_link = GC_HIDE_POINTER(new_link);
  for (new_dl = dl_hashtbl->head[new_index]; new_dl;
       new_dl = dl_next(new_dl)) {
    if (new_dl->dl_hidden_link == new_hidden_link) {
      /* Target already registered; bail out. */
      return GC_DUPLICATE;
    }
  }

  /* Remove from old, add to new, update `link`. */
  if (NULL == prev_dl) {
    dl_hashtbl->head[curr_index] = dl_next(curr_dl);
  } else {
    dl_set_next(prev_dl, dl_next(curr_dl));
    GC_dirty(prev_dl);
  }
  curr_dl->dl_hidden_link = new_hidden_link;
  dl_set_next(curr_dl, dl_hashtbl->head[new_index]);
  dl_hashtbl->head[new_index] = curr_dl;
  GC_dirty(curr_dl);
  GC_dirty(dl_hashtbl->head); /*< entire object */
  return GC_SUCCESS;
}

GC_API int GC_CALL
GC_move_disappearing_link(void **link, void **new_link)
{
  int result;

  if ((ADDR(new_link) & (ALIGNMENT - 1)) != 0
      || !NONNULL_ARG_NOT_NULL(new_link))
    ABORT("Bad new_link arg to GC_move_disappearing_link");
  if ((ADDR(link) & (ALIGNMENT - 1)) != 0) {
    /* Nothing to do. */
    return GC_NOT_FOUND;
  }
  LOCK();
  result = GC_move_disappearing_link_inner(&GC_dl_hashtbl, link, new_link);
  UNLOCK();
  return result;
}

#    ifndef GC_LONG_REFS_NOT_NEEDED
GC_API int GC_CALL
GC_move_long_link(void **link, void **new_link)
{
  int result;

  if ((ADDR(new_link) & (ALIGNMENT - 1)) != 0
      || !NONNULL_ARG_NOT_NULL(new_link))
    ABORT("Bad new_link arg to GC_move_long_link");
  if ((ADDR(link) & (ALIGNMENT - 1)) != 0) {
    /* Nothing to do. */
    return GC_NOT_FOUND;
  }
  LOCK();
  result = GC_move_disappearing_link_inner(&GC_ll_hashtbl, link, new_link);
  UNLOCK();
  return result;
}
#    endif
#  endif /* !GC_MOVE_DISAPPEARING_LINK_NOT_NEEDED */

/*
 * Various finalization marker procedures.  Note that mark stack overflow
 * is handled by the caller, and is not a disaster.
 */

#  if defined(_MSC_VER) && defined(I386)
GC_ATTR_NOINLINE
/* Otherwise some optimizer bug is tickled in VC for x86 (v19, at least). */
#  endif
STATIC void
GC_normal_finalize_mark_proc(ptr_t p)
{
  GC_mark_stack_top = GC_push_obj(p, HDR(p), GC_mark_stack_top,
                                  GC_mark_stack + GC_mark_stack_size);
}

/*
 * This only pays very partial attention to the mark descriptor.
 * It does the right thing for normal and atomic objects, and treats
 * most others as normal.
 */
STATIC void
GC_ignore_self_finalize_mark_proc(ptr_t p)
{
  const hdr *hhdr = HDR(p);
  word descr = hhdr->hb_descr;
  ptr_t current_p;
  ptr_t scan_limit;
  ptr_t target_limit = p + hhdr->hb_sz - 1;

  if ((descr & GC_DS_TAGS) == GC_DS_LENGTH) {
    scan_limit = p + descr - sizeof(ptr_t);
  } else {
    scan_limit = target_limit + 1 - sizeof(ptr_t);
  }
  for (current_p = p; ADDR_GE(scan_limit, current_p); current_p += ALIGNMENT) {
    ptr_t q;

    LOAD_PTR_OR_CONTINUE(q, current_p);
    if (ADDR_LT(q, p) || ADDR_LT(target_limit, q)) {
      GC_PUSH_ONE_HEAP(q, current_p, GC_mark_stack_top);
    }
  }
}

STATIC void
GC_null_finalize_mark_proc(ptr_t p)
{
  UNUSED_ARG(p);
}

/*
 * `GC_unreachable_finalize_mark_proc` is an alias for normal marking,
 * but it is explicitly tested for, and triggers different behavior.
 * Objects registered in this way are not finalized if they are reachable
 * by other finalizable objects, even if those other objects specify
 * no ordering.
 */
STATIC void
GC_unreachable_finalize_mark_proc(ptr_t p)
{
  /*
   * A dummy comparison to ensure the compiler not to optimize two
   * identical functions into a single one (thus, to ensure a unique
   * address of each).  Alternatively, `GC_noop1_ptr(p)` could be used.
   */
  if (UNLIKELY(NULL == p))
    return;

  GC_normal_finalize_mark_proc(p);
}

/*
 * Register a finalization function.  See `gc.h` file for details.
 * The last parameter is a procedure that determines marking for
 * finalization ordering.  Any objects marked by that procedure will be
 * guaranteed to not have been finalized when this finalizer is invoked.
 */
STATIC void
GC_register_finalizer_inner(void *obj, GC_finalization_proc fn, void *cd,
                            GC_finalization_proc *ofn, void **ocd,
                            finalization_mark_proc mp)
{
  struct finalizable_object *curr_fo;
  size_t index;
  struct finalizable_object *new_fo = 0;
  const hdr *hhdr = NULL; /*< initialized to prevent warning */

  GC_ASSERT(GC_is_initialized);
  if (UNLIKELY(GC_find_leak_inner)) {
    /* No-op.  `*ocd` and `*ofn` remain unchanged. */
    return;
  }
  LOCK();
  GC_ASSERT(obj != NULL && GC_base_C(obj) == obj);
  if (mp == GC_unreachable_finalize_mark_proc)
    GC_need_unreachable_finalization = TRUE;
  if (UNLIKELY(NULL == GC_fnlz_roots.fo_head)
      || UNLIKELY(GC_fo_entries > ((size_t)1 << GC_log_fo_table_size))) {
    GC_grow_table((struct hash_chain_entry ***)&GC_fnlz_roots.fo_head,
                  &GC_log_fo_table_size, &GC_fo_entries);
    GC_COND_LOG_PRINTF("Grew fo table to %u entries\n",
                       1U << GC_log_fo_table_size);
  }
  for (;;) {
    struct finalizable_object *prev_fo = NULL;
    GC_oom_func oom_fn;

    index = HASH2(obj, GC_log_fo_table_size);
    curr_fo = GC_fnlz_roots.fo_head[index];
    while (curr_fo != NULL) {
      GC_ASSERT(GC_size(curr_fo) >= sizeof(struct finalizable_object));
      if (curr_fo->fo_hidden_base == GC_HIDE_POINTER(obj)) {
        /*
         * Interruption by a signal in the middle of this should be safe.
         * The client may see only `*ocd` updated, but we will declare that
         * to be his problem.
         */
        if (ocd)
          *ocd = curr_fo->fo_client_data;
        if (ofn)
          *ofn = curr_fo->fo_fn;
        /* Delete the structure for `obj`. */
        if (prev_fo == 0) {
          GC_fnlz_roots.fo_head[index] = fo_next(curr_fo);
        } else {
          fo_set_next(prev_fo, fo_next(curr_fo));
          GC_dirty(prev_fo);
        }
        if (fn == 0) {
          GC_fo_entries--;
          /*
           * May not happen if we get a signal.  But a high estimate will
           * only make the table larger than necessary.
           */
#  if !defined(THREADS) && !defined(DBG_HDRS_ALL)
          GC_free(curr_fo);
#  endif
        } else {
          curr_fo->fo_fn = fn;
          curr_fo->fo_client_data = (ptr_t)cd;
          curr_fo->fo_mark_proc = mp;
          GC_dirty(curr_fo);
          /*
           * Reinsert it.  We deleted it first to maintain consistency in
           * the event of a signal.
           */
          if (prev_fo == 0) {
            GC_fnlz_roots.fo_head[index] = curr_fo;
          } else {
            fo_set_next(prev_fo, curr_fo);
            GC_dirty(prev_fo);
          }
        }
        if (NULL == prev_fo)
          GC_dirty(GC_fnlz_roots.fo_head + index);
        UNLOCK();
#  ifndef DBG_HDRS_ALL
        /* Free unused `new_fo` returned by `GC_oom_fn()`. */
        GC_free(new_fo);
#  endif
        return;
      }
      prev_fo = curr_fo;
      curr_fo = fo_next(curr_fo);
    }
    if (UNLIKELY(new_fo != 0)) {
      /* `new_fo` is returned by `GC_oom_fn()`. */
      GC_ASSERT(fn != 0);
#  ifdef LINT2
      if (NULL == hhdr)
        ABORT("Bad hhdr in GC_register_finalizer_inner");
#  endif
      break;
    }
    if (fn == 0) {
      if (ocd)
        *ocd = 0;
      if (ofn)
        *ofn = 0;
      UNLOCK();
      return;
    }
    GET_HDR(obj, hhdr);
    if (UNLIKELY(NULL == hhdr)) {
      /* We will not collect it, hence finalizer would not be run. */
      if (ocd)
        *ocd = 0;
      if (ofn)
        *ofn = 0;
      UNLOCK();
      return;
    }
    new_fo = (struct finalizable_object *)GC_INTERNAL_MALLOC(
        sizeof(struct finalizable_object), NORMAL);
    if (LIKELY(new_fo != 0))
      break;
    oom_fn = GC_oom_fn;
    UNLOCK();
    new_fo = (struct finalizable_object *)(*oom_fn)(
        sizeof(struct finalizable_object));
    if (0 == new_fo) {
      /* No enough memory.  `*ocd` and `*ofn` remain unchanged. */
      return;
    }
    /* It is not likely we will make it here, but... */
    LOCK();
    /*
     * Recalculate index since the table may grow and check again that
     * our finalizer is not in the table.
     */
  }
  GC_ASSERT(GC_size(new_fo) >= sizeof(struct finalizable_object));
  if (ocd)
    *ocd = 0;
  if (ofn)
    *ofn = 0;
  new_fo->fo_hidden_base = GC_HIDE_POINTER(obj);
  new_fo->fo_fn = fn;
  new_fo->fo_client_data = (ptr_t)cd;
  new_fo->fo_object_sz = hhdr->hb_sz;
  new_fo->fo_mark_proc = mp;
  fo_set_next(new_fo, GC_fnlz_roots.fo_head[index]);
  GC_dirty(new_fo);
  GC_fo_entries++;
  GC_fnlz_roots.fo_head[index] = new_fo;
  GC_dirty(GC_fnlz_roots.fo_head + index);
  UNLOCK();
}

GC_API void GC_CALL
GC_register_finalizer(void *obj, GC_finalization_proc fn, void *cd,
                      GC_finalization_proc *ofn, void **ocd)
{
  GC_register_finalizer_inner(obj, fn, cd, ofn, ocd,
                              GC_normal_finalize_mark_proc);
}

GC_API void GC_CALL
GC_register_finalizer_ignore_self(void *obj, GC_finalization_proc fn, void *cd,
                                  GC_finalization_proc *ofn, void **ocd)
{
  GC_register_finalizer_inner(obj, fn, cd, ofn, ocd,
                              GC_ignore_self_finalize_mark_proc);
}

GC_API void GC_CALL
GC_register_finalizer_no_order(void *obj, GC_finalization_proc fn, void *cd,
                               GC_finalization_proc *ofn, void **ocd)
{
  GC_register_finalizer_inner(obj, fn, cd, ofn, ocd,
                              GC_null_finalize_mark_proc);
}

GC_API void GC_CALL
GC_register_finalizer_unreachable(void *obj, GC_finalization_proc fn, void *cd,
                                  GC_finalization_proc *ofn, void **ocd)
{
  GC_ASSERT(GC_java_finalization);
  GC_register_finalizer_inner(obj, fn, cd, ofn, ocd,
                              GC_unreachable_finalize_mark_proc);
}

#  ifndef NO_DEBUGGING
STATIC void
GC_dump_finalization_links(const struct dl_hashtbl_s *dl_hashtbl)
{
  size_t dl_size = (size_t)1 << dl_hashtbl->log_size;
  size_t i;

  if (NULL == dl_hashtbl->head) {
    /* The table is empty. */
    return;
  }

  for (i = 0; i < dl_size; i++) {
    struct disappearing_link *curr_dl;

    for (curr_dl = dl_hashtbl->head[i]; curr_dl != 0;
         curr_dl = dl_next(curr_dl)) {
      ptr_t real_ptr = (ptr_t)GC_REVEAL_POINTER(curr_dl->dl_hidden_obj);
      ptr_t real_link = (ptr_t)GC_REVEAL_POINTER(curr_dl->dl_hidden_link);

      GC_printf("Object: %p, link value: %p, link addr: %p\n",
                (void *)real_ptr, *(void **)real_link, (void *)real_link);
    }
  }
}

GC_API void GC_CALL
GC_dump_finalization(void)
{
  struct finalizable_object *curr_fo;
  size_t i;
  size_t fo_size
      = GC_fnlz_roots.fo_head == NULL ? 0 : (size_t)1 << GC_log_fo_table_size;

  GC_printf("\n***Disappearing (short) links:\n");
  GC_dump_finalization_links(&GC_dl_hashtbl);
#    ifndef GC_LONG_REFS_NOT_NEEDED
  GC_printf("\n***Disappearing long links:\n");
  GC_dump_finalization_links(&GC_ll_hashtbl);
#    endif
  GC_printf("\n***Finalizers:\n");
  for (i = 0; i < fo_size; i++) {
    for (curr_fo = GC_fnlz_roots.fo_head[i]; curr_fo != NULL;
         curr_fo = fo_next(curr_fo)) {
      ptr_t real_ptr = (ptr_t)GC_REVEAL_POINTER(curr_fo->fo_hidden_base);

      GC_printf("Finalizable object: %p\n", (void *)real_ptr);
    }
  }
}
#  endif /* !NO_DEBUGGING */

#  ifndef THREADS
/*
 * Checks and updates the level of finalizers recursion.
 * Returns `NULL` if `GC_invoke_finalizers()` should not be called by
 * the collector (to minimize the risk of a deep finalizers recursion),
 * otherwise returns a pointer to `GC_finalizer_nested`.
 */
STATIC unsigned char *
GC_check_finalizer_nested(void)
{
  unsigned nesting_level = GC_finalizer_nested;
  if (nesting_level) {
    /*
     * We are inside another `GC_invoke_finalizers()`.  Skip some
     * implicitly-called `GC_invoke_finalizers()` depending on the
     * nesting (recursion) level.
     */
    if ((unsigned)(++GC_finalizer_skipped) < (1U << nesting_level))
      return NULL;
    GC_finalizer_skipped = 0;
  }
  GC_finalizer_nested = (unsigned char)(nesting_level + 1);
  return &GC_finalizer_nested;
}
#  endif /* !THREADS */

GC_INLINE void
GC_make_disappearing_links_disappear(struct dl_hashtbl_s *dl_hashtbl,
                                     GC_bool is_remove_dangling)
{
  size_t i;
  size_t dl_size = (size_t)1 << dl_hashtbl->log_size;
  GC_bool needs_barrier = FALSE;

  GC_ASSERT(I_HOLD_LOCK());
  if (NULL == dl_hashtbl->head) {
    /* The table is empty. */
    return;
  }

  for (i = 0; i < dl_size; i++) {
    struct disappearing_link *curr_dl, *next_dl;
    struct disappearing_link *prev_dl = NULL;

    for (curr_dl = dl_hashtbl->head[i]; curr_dl != NULL; curr_dl = next_dl) {
      next_dl = dl_next(curr_dl);
#  if defined(GC_ASSERTIONS) && !defined(THREAD_SANITIZER)
      /* Check accessibility of the location pointed by the link. */
      GC_noop1_ptr(*(ptr_t *)GC_REVEAL_POINTER(curr_dl->dl_hidden_link));
#  endif
      if (is_remove_dangling) {
        ptr_t real_link
            = (ptr_t)GC_base(GC_REVEAL_POINTER(curr_dl->dl_hidden_link));

        if (NULL == real_link || LIKELY(GC_is_marked(real_link))) {
          prev_dl = curr_dl;
          continue;
        }
      } else {
        if (LIKELY(GC_is_marked(
                (ptr_t)GC_REVEAL_POINTER(curr_dl->dl_hidden_obj)))) {
          prev_dl = curr_dl;
          continue;
        }
        *(ptr_t *)GC_REVEAL_POINTER(curr_dl->dl_hidden_link) = NULL;
      }

      /* Delete `curr_dl` entry from `dl_hashtbl`. */
      if (NULL == prev_dl) {
        dl_hashtbl->head[i] = next_dl;
        needs_barrier = TRUE;
      } else {
        dl_set_next(prev_dl, next_dl);
        GC_dirty(prev_dl);
      }
      GC_clear_mark_bit(curr_dl);
      dl_hashtbl->entries--;
    }
  }
  if (needs_barrier)
    GC_dirty(dl_hashtbl->head); /*< entire object */
}

GC_INNER void
GC_finalize(void)
{
  struct finalizable_object *curr_fo, *prev_fo, *next_fo;
  ptr_t real_ptr;
  size_t i;
  size_t fo_size
      = GC_fnlz_roots.fo_head == NULL ? 0 : (size_t)1 << GC_log_fo_table_size;
  GC_bool needs_barrier = FALSE;

  GC_ASSERT(I_HOLD_LOCK());
#  ifndef SMALL_CONFIG
  /* Save current `GC_dl_entries` value for stats printing. */
  GC_old_dl_entries = GC_dl_hashtbl.entries;
#    ifndef GC_LONG_REFS_NOT_NEEDED
  /* Save current `GC_ll_entries` value for stats printing. */
  GC_old_ll_entries = GC_ll_hashtbl.entries;
#    endif
#  endif

#  ifndef GC_TOGGLE_REFS_NOT_NEEDED
  GC_mark_togglerefs();
#  endif
  GC_make_disappearing_links_disappear(&GC_dl_hashtbl, FALSE);

  /*
   * Mark all objects reachable via chains of 1 or more pointers from
   * finalizable objects.
   */
  GC_ASSERT(!GC_collection_in_progress());
  for (i = 0; i < fo_size; i++) {
    for (curr_fo = GC_fnlz_roots.fo_head[i]; curr_fo != NULL;
         curr_fo = fo_next(curr_fo)) {
      GC_ASSERT(GC_size(curr_fo) >= sizeof(struct finalizable_object));
      real_ptr = (ptr_t)GC_REVEAL_POINTER(curr_fo->fo_hidden_base);
      if (!GC_is_marked(real_ptr)) {
        GC_MARKED_FOR_FINALIZATION(real_ptr);
        GC_mark_fo(real_ptr, curr_fo->fo_mark_proc);
        if (GC_is_marked(real_ptr)) {
          WARN("Finalization cycle involving %p\n", real_ptr);
        }
      }
    }
  }
  /* Enqueue for finalization all objects that are still unreachable. */
  GC_bytes_finalized = 0;
  for (i = 0; i < fo_size; i++) {
    curr_fo = GC_fnlz_roots.fo_head[i];
    prev_fo = NULL;
    while (curr_fo != NULL) {
      real_ptr = (ptr_t)GC_REVEAL_POINTER(curr_fo->fo_hidden_base);
      if (!GC_is_marked(real_ptr)) {
        if (!GC_java_finalization) {
          GC_set_mark_bit(real_ptr);
        }
        /* Delete from hash table. */
        next_fo = fo_next(curr_fo);
        if (NULL == prev_fo) {
          GC_fnlz_roots.fo_head[i] = next_fo;
          if (GC_object_finalized_proc) {
            GC_dirty(GC_fnlz_roots.fo_head + i);
          } else {
            needs_barrier = TRUE;
          }
        } else {
          fo_set_next(prev_fo, next_fo);
          GC_dirty(prev_fo);
        }
        GC_fo_entries--;
        if (GC_object_finalized_proc)
          GC_object_finalized_proc(real_ptr);

        /* Add to list of objects awaiting finalization. */
        fo_set_next(curr_fo, GC_fnlz_roots.finalize_now);
        GC_dirty(curr_fo);
        SET_FINALIZE_NOW(curr_fo);
        /* Unhide object pointer so any future collections will see it. */
        curr_fo->fo_hidden_base
            = (GC_hidden_pointer)GC_REVEAL_POINTER(curr_fo->fo_hidden_base);

        GC_bytes_finalized
            += (word)curr_fo->fo_object_sz + sizeof(struct finalizable_object);
        GC_ASSERT(GC_is_marked(GC_base(curr_fo)));
        curr_fo = next_fo;
      } else {
        prev_fo = curr_fo;
        curr_fo = fo_next(curr_fo);
      }
    }
  }

  if (GC_java_finalization) {
    /*
     * Make sure we mark everything reachable from objects finalized
     * using the no-order `fo_mark_proc`.
     */
    for (curr_fo = GC_fnlz_roots.finalize_now; curr_fo != NULL;
         curr_fo = fo_next(curr_fo)) {
      real_ptr = (ptr_t)curr_fo->fo_hidden_base; /*< revealed */
      if (!GC_is_marked(real_ptr)) {
        if (curr_fo->fo_mark_proc == GC_null_finalize_mark_proc) {
          GC_mark_fo(real_ptr, GC_normal_finalize_mark_proc);
        }
        if (curr_fo->fo_mark_proc != GC_unreachable_finalize_mark_proc) {
          GC_set_mark_bit(real_ptr);
        }
      }
    }

    /*
     * Now revive finalize-when-unreachable objects reachable from other
     * finalizable objects.
     */
    if (GC_need_unreachable_finalization) {
      curr_fo = GC_fnlz_roots.finalize_now;
#  if defined(GC_ASSERTIONS) || defined(LINT2)
      if (curr_fo != NULL && NULL == GC_fnlz_roots.fo_head)
        ABORT("GC_fnlz_roots.fo_head is null");
#  endif
      for (prev_fo = NULL; curr_fo != NULL;
           prev_fo = curr_fo, curr_fo = next_fo) {
        next_fo = fo_next(curr_fo);
        if (curr_fo->fo_mark_proc != GC_unreachable_finalize_mark_proc)
          continue;

        real_ptr = (ptr_t)curr_fo->fo_hidden_base; /*< revealed */
        if (!GC_is_marked(real_ptr)) {
          GC_set_mark_bit(real_ptr);
          continue;
        }
        if (NULL == prev_fo) {
          SET_FINALIZE_NOW(next_fo);
        } else {
          fo_set_next(prev_fo, next_fo);
          GC_dirty(prev_fo);
        }
        curr_fo->fo_hidden_base = GC_HIDE_POINTER(real_ptr);
        GC_bytes_finalized
            -= (word)curr_fo->fo_object_sz + sizeof(struct finalizable_object);

        i = HASH2(real_ptr, GC_log_fo_table_size);
        fo_set_next(curr_fo, GC_fnlz_roots.fo_head[i]);
        GC_dirty(curr_fo);
        GC_fo_entries++;
        GC_fnlz_roots.fo_head[i] = curr_fo;
        curr_fo = prev_fo;
        needs_barrier = TRUE;
      }
    }
  }
  if (needs_barrier)
    GC_dirty(GC_fnlz_roots.fo_head); /*< entire object */

  /* Remove dangling disappearing links. */
  GC_make_disappearing_links_disappear(&GC_dl_hashtbl, TRUE);

#  ifndef GC_TOGGLE_REFS_NOT_NEEDED
  GC_clear_togglerefs();
#  endif
#  ifndef GC_LONG_REFS_NOT_NEEDED
  GC_make_disappearing_links_disappear(&GC_ll_hashtbl, FALSE);
  GC_make_disappearing_links_disappear(&GC_ll_hashtbl, TRUE);
#  endif

  if (GC_alloc_fail_count > 0) {
    /*
     * Do not prevent running finalizers if there has been an allocation
     * failure recently.
     */
#  ifdef THREADS
    GC_reset_finalizer_nested();
#  else
    GC_finalizer_nested = 0;
#  endif
  }
}

/*
 * Count of finalizers to run, at most, during a single invocation
 * of `GC_invoke_finalizers()`; zero means no limit.  Accessed with the
 * allocator lock held.
 */
STATIC unsigned GC_interrupt_finalizers = 0;

#  ifndef JAVA_FINALIZATION_NOT_NEEDED

/*
 * Enqueue all remaining finalizers to be run.  A collection in progress,
 * if any, is completed when the first finalizer is enqueued.
 */
STATIC void
GC_enqueue_all_finalizers(void)
{
  size_t i;
  size_t fo_size
      = GC_fnlz_roots.fo_head == NULL ? 0 : (size_t)1 << GC_log_fo_table_size;

  GC_ASSERT(I_HOLD_LOCK());
  GC_bytes_finalized = 0;
  for (i = 0; i < fo_size; i++) {
    struct finalizable_object *curr_fo = GC_fnlz_roots.fo_head[i];

    GC_fnlz_roots.fo_head[i] = NULL;
    while (curr_fo != NULL) {
      struct finalizable_object *next_fo;
      ptr_t real_ptr = (ptr_t)GC_REVEAL_POINTER(curr_fo->fo_hidden_base);

      GC_mark_fo(real_ptr, GC_normal_finalize_mark_proc);
      GC_set_mark_bit(real_ptr);
      GC_complete_ongoing_collection();
      next_fo = fo_next(curr_fo);

      /* Add to list of objects awaiting finalization. */
      fo_set_next(curr_fo, GC_fnlz_roots.finalize_now);
      GC_dirty(curr_fo);
      SET_FINALIZE_NOW(curr_fo);

      /* Unhide object pointer so any future collections will see it. */
      curr_fo->fo_hidden_base
          = (GC_hidden_pointer)GC_REVEAL_POINTER(curr_fo->fo_hidden_base);
      GC_bytes_finalized
          += curr_fo->fo_object_sz + sizeof(struct finalizable_object);
      curr_fo = next_fo;
    }
  }
  /* All entries are deleted from the hash table. */
  GC_fo_entries = 0;
}

GC_API void GC_CALL
GC_finalize_all(void)
{
  LOCK();
  while (GC_fo_entries > 0) {
    unsigned saved_interrupt_finalizers;

    GC_enqueue_all_finalizers();
    /* Reset temporarily. */
    saved_interrupt_finalizers = GC_interrupt_finalizers;
    GC_interrupt_finalizers = 0;
    UNLOCK();
    GC_invoke_finalizers();
    /*
     * Running the finalizers in this thread is arguably not a good idea
     * when we should be notifying another thread to run them.
     * But otherwise we do not have a great way to wait for them to run.
     */
    LOCK();
    GC_interrupt_finalizers = saved_interrupt_finalizers;
  }
  UNLOCK();
}

#  endif /* !JAVA_FINALIZATION_NOT_NEEDED */

GC_API void GC_CALL
GC_set_interrupt_finalizers(unsigned value)
{
  LOCK();
  GC_interrupt_finalizers = value;
  UNLOCK();
}

GC_API unsigned GC_CALL
GC_get_interrupt_finalizers(void)
{
  unsigned value;

  READER_LOCK();
  value = GC_interrupt_finalizers;
  READER_UNLOCK();
  return value;
}

GC_API int GC_CALL
GC_should_invoke_finalizers(void)
{
#  ifdef AO_HAVE_load
  return GC_cptr_load((volatile ptr_t *)&GC_fnlz_roots.finalize_now) != NULL;
#  else
  return GC_fnlz_roots.finalize_now != NULL;
#  endif /* !THREADS */
}

GC_API int GC_CALL
GC_invoke_finalizers(void)
{
  int count = 0;
  word bytes_freed_before = 0; /*< initialized to prevent warning */

  GC_ASSERT(I_DONT_HOLD_LOCK());
  while (GC_should_invoke_finalizers()) {
    struct finalizable_object *curr_fo;
    ptr_t real_ptr;

    LOCK();
    if (count == 0) {
      /* Note: we hold the allocator lock here. */
      bytes_freed_before = GC_bytes_freed;
    } else if (UNLIKELY(GC_interrupt_finalizers != 0)
               && (unsigned)count >= GC_interrupt_finalizers) {
      UNLOCK();
      break;
    }
    curr_fo = GC_fnlz_roots.finalize_now;
#  ifdef THREADS
    if (UNLIKELY(NULL == curr_fo)) {
      UNLOCK();
      break;
    }
#  endif
    SET_FINALIZE_NOW(fo_next(curr_fo));
    UNLOCK();
    fo_set_next(curr_fo, 0);
    real_ptr = (ptr_t)curr_fo->fo_hidden_base; /*< revealed */
    curr_fo->fo_fn(real_ptr, curr_fo->fo_client_data);
    curr_fo->fo_client_data = NULL;
    ++count;
    /*
     * Explicit freeing of `curr_fo` is probably a bad idea.
     * It throws off accounting if nearly all objects are finalizable.
     * Otherwise it should not matter.
     */
  }
  /* `bytes_freed_before` is initialized whenever `count` is nonzero. */
  if (count != 0
#  if defined(THREADS) && !defined(THREAD_SANITIZER)
      /*
       * A quick check whether some memory was freed.  The race with
       * `GC_free()` is safe to be ignored because we only need to know
       * if the current thread has deallocated something.
       */
      && bytes_freed_before != GC_bytes_freed
#  endif
  ) {
    LOCK();
    GC_finalizer_bytes_freed += (GC_bytes_freed - bytes_freed_before);
    UNLOCK();
  }
  return count;
}

static word last_finalizer_notification = 0;

GC_INNER void
GC_notify_or_invoke_finalizers(void)
{
#  if defined(KEEP_BACK_PTRS) || defined(MAKE_BACK_GRAPH)
  static word last_back_trace_gc_no = 0;
#  endif
  GC_finalizer_notifier_proc notifier_fn = 0;

#  if defined(THREADS) && !defined(KEEP_BACK_PTRS) && !defined(MAKE_BACK_GRAPH)
  /* Quick check (while unlocked) for an empty finalization queue. */
  if (!GC_should_invoke_finalizers())
    return;
#  endif
  LOCK();

#  if defined(KEEP_BACK_PTRS) || defined(MAKE_BACK_GRAPH)
  /*
   * This is a convenient place to generate backtraces if appropriate,
   * since that code is not callable with the allocator lock.
   */
  if (GC_gc_no != last_back_trace_gc_no
      && LIKELY(GC_gc_no > 1) /*< skip initial collection */) {
#    ifdef KEEP_BACK_PTRS
    static GC_bool bt_in_progress = FALSE;

    if (!bt_in_progress) {
      long i;

      /* Prevent a recursion or parallel usage. */
      bt_in_progress = TRUE;
      for (i = 0; i < GC_backtraces; ++i) {
        /*
         * FIXME: This tolerates concurrent heap mutation, which may
         * cause occasional mysterious results.  We need to release
         * the allocator lock, since `GC_print_callers()` acquires it.
         * It probably should not.
         */
        void *current = GC_generate_random_valid_address();

        UNLOCK();
        GC_printf("\n***Chosen address %p in object\n", current);
        GC_print_backtrace(current);
        LOCK();
      }
      bt_in_progress = FALSE;
    }
#    endif
    last_back_trace_gc_no = GC_gc_no;
#    ifdef MAKE_BACK_GRAPH
    if (GC_print_back_height) {
      GC_print_back_graph_stats();
    }
#    endif
  }
#  endif
  if (NULL == GC_fnlz_roots.finalize_now) {
    UNLOCK();
    return;
  }

  if (!GC_finalize_on_demand) {
    unsigned char *pnested;

#  ifdef THREADS
    if (UNLIKELY(GC_in_thread_creation)) {
      UNLOCK();
      return;
    }
#  endif
    pnested = GC_check_finalizer_nested();
    UNLOCK();
    /* Skip `GC_invoke_finalizers()` if nested. */
    if (pnested != NULL) {
      (void)GC_invoke_finalizers();
      /* Reset since no more finalizers or interrupted. */
      *pnested = 0;
#  ifndef THREADS
      GC_ASSERT(NULL == GC_fnlz_roots.finalize_now
                || GC_interrupt_finalizers > 0);
#  else
      /*
       * Note: in the multi-threaded case GC can run concurrently and
       * add more finalizers to run.
       */
#  endif
    }
    return;
  }

  /* These variables require synchronization to avoid data race. */
  if (last_finalizer_notification != GC_gc_no) {
    notifier_fn = GC_finalizer_notifier;
    last_finalizer_notification = GC_gc_no;
  }
  UNLOCK();
  if (notifier_fn != 0) {
    /* Invoke the notifier. */
    (*notifier_fn)();
  }
}

#  ifndef SMALL_CONFIG
#    ifndef GC_LONG_REFS_NOT_NEEDED
#      define IF_LONG_REFS_PRESENT_ELSE(x, y) (x)
#    else
#      define IF_LONG_REFS_PRESENT_ELSE(x, y) (y)
#    endif

GC_INNER void
GC_print_finalization_stats(void)
{
  const struct finalizable_object *fo;
  unsigned long ready = 0;

  GC_log_printf(
      "%lu finalization entries;"
      " %lu/%lu short/long disappearing links alive\n",
      (unsigned long)GC_fo_entries, (unsigned long)GC_dl_hashtbl.entries,
      (unsigned long)IF_LONG_REFS_PRESENT_ELSE(GC_ll_hashtbl.entries, 0));

  for (fo = GC_fnlz_roots.finalize_now; fo != NULL; fo = fo_next(fo))
    ++ready;
  GC_log_printf("%lu finalization-ready objects;"
                " %ld/%ld short/long links cleared\n",
                ready, (long)GC_old_dl_entries - (long)GC_dl_hashtbl.entries,
                (long)IF_LONG_REFS_PRESENT_ELSE(
                    GC_old_ll_entries - GC_ll_hashtbl.entries, 0));
}
#  endif /* !SMALL_CONFIG */

#endif /* !GC_NO_FINALIZATION */

/*
 * Copyright (c) 2011 by Hewlett-Packard Company.  All rights reserved.
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */


#ifdef ENABLE_DISCLAIM

#  include "gc/gc_disclaim.h"

#  if defined(KEEP_BACK_PTRS) || defined(MAKE_BACK_GRAPH)
/* The first bit is already used for a debug purpose. */
#    define FINALIZER_CLOSURE_FLAG 0x2
#  else
#    define FINALIZER_CLOSURE_FLAG 0x1
#  endif

STATIC int GC_CALLBACK
GC_finalized_disclaim(void *obj)
{
#  ifdef AO_HAVE_load
  ptr_t fc_p = GC_cptr_load((volatile ptr_t *)obj);
#  else
  ptr_t fc_p = *(ptr_t *)obj;
#  endif

  if ((ADDR(fc_p) & FINALIZER_CLOSURE_FLAG) != 0) {
    /*
     * The disclaim function may be passed fragments from the free-list,
     * on which it should not run finalization.  To recognize this case,
     * we use the fact that the value of the first pointer of such
     * fragments is always, at least, multiple of a pointer size (a link
     * to the next fragment, or `NULL`).
     *
     * Note: if it is desirable to have a finalizer which does not use
     * the first pointer for storing the finalization information,
     * `GC_disclaim_and_reclaim()` must be extended to clear fragments
     * so that the assumption holds for the selected pointer location.
     */
    const struct GC_finalizer_closure *fc
        = (struct GC_finalizer_closure *)CPTR_CLEAR_FLAGS(
            fc_p, FINALIZER_CLOSURE_FLAG);

    GC_ASSERT(!GC_find_leak_inner);
    fc->proc((ptr_t *)obj + 1, fc->cd);
  }
  return 0;
}

STATIC void
GC_register_disclaim_proc_inner(unsigned kind, GC_disclaim_proc proc,
                                GC_bool mark_unconditionally)
{
  GC_ASSERT(kind < MAXOBJKINDS);
  if (UNLIKELY(GC_find_leak_inner))
    return;

  GC_obj_kinds[kind].ok_disclaim_proc = proc;
  GC_obj_kinds[kind].ok_mark_unconditionally = mark_unconditionally;
}

GC_API void GC_CALL
GC_init_finalized_malloc(void)
{
  /* Initialize the collector just in case it is not done yet. */
  GC_init();

  LOCK();
  if (GC_finalized_kind != 0) {
    UNLOCK();
    return;
  }

  /*
   * The finalizer closure is placed in the first pointer of the
   * object in order to use the lower bits to distinguish live
   * objects from objects on the free list.  The downside of this is
   * that we need one-pointer offset interior pointers, and that
   * `GC_base()` does not return the start of the user region.
   */
  GC_register_displacement_inner(sizeof(ptr_t));

  /*
   * And, the pointer to the finalizer closure object itself is displaced
   * due to baking in this indicator.
   */
  GC_register_displacement_inner(FINALIZER_CLOSURE_FLAG);
  GC_register_displacement_inner(sizeof(oh) | FINALIZER_CLOSURE_FLAG);

  GC_finalized_kind
      = GC_new_kind_inner(GC_new_free_list_inner(), GC_DS_LENGTH, TRUE, TRUE);
  GC_ASSERT(GC_finalized_kind != 0);
  GC_register_disclaim_proc_inner(GC_finalized_kind, GC_finalized_disclaim,
                                  TRUE);
  UNLOCK();
}

GC_API void GC_CALL
GC_register_disclaim_proc(int kind, GC_disclaim_proc proc,
                          int mark_unconditionally)
{
  LOCK();
  GC_register_disclaim_proc_inner((unsigned)kind, proc,
                                  (GC_bool)mark_unconditionally);
  UNLOCK();
}

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_finalized_malloc(size_t lb, const struct GC_finalizer_closure *fclos)
{
  void *op;
  ptr_t fc_p;

#  ifndef LINT2
  /* Actually, there is no data race because the variable is set once. */
  GC_ASSERT(GC_finalized_kind != 0);
#  endif
  GC_ASSERT(NONNULL_ARG_NOT_NULL(fclos));
  GC_ASSERT((ADDR(fclos) & FINALIZER_CLOSURE_FLAG) == 0);
  op = GC_malloc_kind(SIZET_SAT_ADD(lb, sizeof(ptr_t)),
                      (int)GC_finalized_kind);
  if (UNLIKELY(NULL == op))
    return NULL;

  /*
   * Set the flag (w/o conversion to a numeric type) and store
   * the finalizer closure.
   */
  fc_p = CPTR_SET_FLAGS(GC_CAST_AWAY_CONST_PVOID(fclos),
                        FINALIZER_CLOSURE_FLAG);
#  ifdef AO_HAVE_store
  GC_cptr_store((volatile ptr_t *)op, fc_p);
#  else
  *(ptr_t *)op = fc_p;
#  endif
  GC_dirty(op);
  REACHABLE_AFTER_DIRTY(fc_p);
  return (ptr_t *)op + 1;
}

#endif /* ENABLE_DISCLAIM */

/*
 * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
 * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
 * Copyright (c) 1999-2004 Hewlett-Packard Development Company, L.P.
 * Copyright (c) 2008-2025 Ivan Maidanski
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */


#include <string.h>

/* Allocate reclaim list for the kind.  Returns `TRUE` on success. */
STATIC GC_bool
GC_alloc_reclaim_list(struct obj_kind *ok)
{
  struct hblk **result;

  GC_ASSERT(I_HOLD_LOCK());
  result = (struct hblk **)GC_scratch_alloc((MAXOBJGRANULES + 1)
                                            * sizeof(struct hblk *));
  if (UNLIKELY(NULL == result))
    return FALSE;

  BZERO(result, (MAXOBJGRANULES + 1) * sizeof(struct hblk *));
  ok->ok_reclaim_list = result;
  return TRUE;
}

/*
 * Allocate a large block of size `lb_adjusted` bytes with the requested
 * alignment (`align_m1 + 1`).  The block is not cleared.  We assume that
 * the size is nonzero and a multiple of `GC_GRANULE_BYTES`, and that
 * it already includes `EXTRA_BYTES` value.  The `flags` argument should
 * be `IGNORE_OFF_PAGE` or 0.  Calls `GC_allochblk()` to do the actual
 * allocation, but also triggers collection and/or heap expansion
 * as appropriate.  Updates value of `GC_bytes_allocd`; does also other
 * accounting.
 */
STATIC ptr_t
GC_alloc_large(size_t lb_adjusted, int kind, unsigned flags, size_t align_m1)
{
  /*
   * TODO: It is unclear which retries limit is sufficient (value of 3 leads
   * to fail in some 32-bit applications, 10 is a kind of arbitrary value).
   */
#define MAX_ALLOCLARGE_RETRIES 10

  int retry_cnt;
  size_t n_blocks; /*< includes alignment */
  struct hblk *h;
  ptr_t result;

  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT(lb_adjusted != 0 && (lb_adjusted & (GC_GRANULE_BYTES - 1)) == 0);
  n_blocks = OBJ_SZ_TO_BLOCKS_CHECKED(SIZET_SAT_ADD(lb_adjusted, align_m1));
  if (UNLIKELY(!GC_is_initialized)) {
    UNLOCK(); /*< just to unset `GC_lock_holder` */
    GC_init();
    LOCK();
  }
  /* Do our share of marking work. */
  if (GC_incremental && !GC_dont_gc) {
    GC_collect_a_little_inner(n_blocks);
  }

  h = GC_allochblk(lb_adjusted, kind, flags, align_m1);
#ifdef USE_MUNMAP
  if (NULL == h && GC_merge_unmapped()) {
    h = GC_allochblk(lb_adjusted, kind, flags, align_m1);
  }
#endif
  for (retry_cnt = 0; NULL == h; retry_cnt++) {
    /*
     * Only a few iterations are expected at most, otherwise something
     * is wrong in one of the functions called below.
     */
    if (retry_cnt > MAX_ALLOCLARGE_RETRIES)
      ABORT("Too many retries in GC_alloc_large");
    if (UNLIKELY(!GC_collect_or_expand(n_blocks, flags, retry_cnt > 0)))
      return NULL;
    h = GC_allochblk(lb_adjusted, kind, flags, align_m1);
  }

  GC_bytes_allocd += lb_adjusted;
  if (lb_adjusted > HBLKSIZE) {
    GC_large_allocd_bytes += HBLKSIZE * OBJ_SZ_TO_BLOCKS(lb_adjusted);
    if (GC_large_allocd_bytes > GC_max_large_allocd_bytes)
      GC_max_large_allocd_bytes = GC_large_allocd_bytes;
  }
  /* FIXME: Do we need some way to reset `GC_max_large_allocd_bytes`? */
  result = h->hb_body;
  GC_ASSERT((ADDR(result) & align_m1) == 0);
  return result;
}

/*
 * Allocate a large block of given size in bytes, clear it if appropriate.
 * We assume that the size is nonzero and a multiple of `GC_GRANULE_BYTES`,
 * and that it already includes `EXTRA_BYTES` value.  Update value of
 * `GC_bytes_allocd`.
 */
STATIC ptr_t
GC_alloc_large_and_clear(size_t lb_adjusted, int kind, unsigned flags)
{
  ptr_t result;

  GC_ASSERT(I_HOLD_LOCK());
  result = GC_alloc_large(lb_adjusted, kind, flags, 0 /* `align_m1` */);
  if (LIKELY(result != NULL)
      && (GC_debugging_started || GC_obj_kinds[kind].ok_init)) {
    /* Clear the whole block, in case of `GC_realloc` call. */
    BZERO(result, HBLKSIZE * OBJ_SZ_TO_BLOCKS(lb_adjusted));
  }
  return result;
}

/*
 * Fill in additional entries in `GC_size_map`, including the `i`-th one.
 * Note that a filled in section of the array ending at `n` always has
 * the length of at least `n / 4`.
 */
STATIC void
GC_extend_size_map(size_t i)
{
  size_t original_lg = ALLOC_REQUEST_GRANS(i);
  size_t lg;
  /*
   * The size we try to preserve.  Close to `i`, unless this would
   * introduce too many distinct sizes.
   */
  size_t byte_sz = GRANULES_TO_BYTES(original_lg);
  size_t smaller_than_i = byte_sz - (byte_sz >> 3);
  /* The lowest indexed entry we initialize. */
  size_t low_limit;
  size_t number_of_objs;

  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT(0 == GC_size_map[i]);
  if (0 == GC_size_map[smaller_than_i]) {
    low_limit = byte_sz - (byte_sz >> 2); /*< much smaller than `i` */
    lg = original_lg;
    while (GC_size_map[low_limit] != 0)
      low_limit++;
  } else {
    low_limit = smaller_than_i + 1;
    while (GC_size_map[low_limit] != 0)
      low_limit++;

    lg = ALLOC_REQUEST_GRANS(low_limit);
    lg += lg >> 3;
    if (lg < original_lg)
      lg = original_lg;
  }

  /*
   * For these larger sizes, we use an even number of granules.
   * This makes it easier to, e.g., construct a 16-byte-aligned
   * allocator even if `GC_GRANULE_BYTES` is 8.
   */
  lg = (lg + 1) & ~(size_t)1;
  if (lg > MAXOBJGRANULES)
    lg = MAXOBJGRANULES;

  /* If we can fit the same number of larger objects in a block, do so. */
  GC_ASSERT(lg != 0);
  number_of_objs = HBLK_GRANULES / lg;
  GC_ASSERT(number_of_objs != 0);
  lg = (HBLK_GRANULES / number_of_objs) & ~(size_t)1;

  /*
   * We may need one extra byte; do not always fill in
   * `GC_size_map[byte_sz]`.
   */
  byte_sz = GRANULES_TO_BYTES(lg) - EXTRA_BYTES;

  for (; low_limit <= byte_sz; low_limit++)
    GC_size_map[low_limit] = lg;
}

STATIC void *
GC_generic_malloc_inner_small(size_t lb, int kind)
{
  struct obj_kind *ok = &GC_obj_kinds[kind];
  size_t lg = GC_size_map[lb];
  void **opp = &ok->ok_freelist[lg];
  void *op = *opp;

  GC_ASSERT(I_HOLD_LOCK());
  if (UNLIKELY(NULL == op)) {
    if (0 == lg) {
      if (UNLIKELY(!GC_is_initialized)) {
        UNLOCK(); /*< just to unset `GC_lock_holder` */
        GC_init();
        LOCK();
        lg = GC_size_map[lb];
      }
      if (0 == lg) {
        GC_extend_size_map(lb);
        lg = GC_size_map[lb];
        GC_ASSERT(lg != 0);
      }
      /* Retry. */
      opp = &ok->ok_freelist[lg];
      op = *opp;
    }
    if (NULL == op) {
      if (NULL == ok->ok_reclaim_list && !GC_alloc_reclaim_list(ok))
        return NULL;
      op = GC_allocobj(lg, kind);
      if (NULL == op)
        return NULL;
    }
  }
  *opp = obj_link(op);
  obj_link(op) = NULL;
  GC_bytes_allocd += GRANULES_TO_BYTES((word)lg);
  return op;
}

GC_INNER void *
GC_generic_malloc_inner(size_t lb, int kind, unsigned flags)
{
  size_t lb_adjusted;

  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT(kind < MAXOBJKINDS);
  if (SMALL_OBJ(lb)) {
    return GC_generic_malloc_inner_small(lb, kind);
  }

#if MAX_EXTRA_BYTES > 0
  if ((flags & IGNORE_OFF_PAGE) != 0 && lb >= HBLKSIZE) {
    /* No need to add `EXTRA_BYTES`. */
    lb_adjusted = lb;
  } else
#endif
  /* else */ {
    lb_adjusted = ADD_EXTRA_BYTES(lb);
  }
  return GC_alloc_large_and_clear(ROUNDUP_GRANULE_SIZE(lb_adjusted), kind,
                                  flags);
}

#ifdef GC_COLLECT_AT_MALLOC
#  if defined(CPPCHECK)
size_t GC_dbg_collect_at_malloc_min_lb = 16 * 1024; /*< some value */
#  else
size_t GC_dbg_collect_at_malloc_min_lb = (GC_COLLECT_AT_MALLOC);
#  endif
#endif

GC_INNER void *
GC_generic_malloc_aligned(size_t lb, int kind, unsigned flags, size_t align_m1)
{
  void *result;

  GC_ASSERT(kind < MAXOBJKINDS);
  if (UNLIKELY(get_have_errors()))
    GC_print_all_errors();
  GC_notify_or_invoke_finalizers();
  GC_DBG_COLLECT_AT_MALLOC(lb);
  if (SMALL_OBJ(lb) && LIKELY(align_m1 < GC_GRANULE_BYTES)) {
    LOCK();
    result = GC_generic_malloc_inner_small(lb, kind);
    UNLOCK();
  } else {
#ifdef THREADS
    size_t lg;
#endif
    size_t lb_adjusted;
    GC_bool init;

#if MAX_EXTRA_BYTES > 0
    if ((flags & IGNORE_OFF_PAGE) != 0 && lb >= HBLKSIZE) {
      /* No need to add `EXTRA_BYTES`. */
      lb_adjusted = ROUNDUP_GRANULE_SIZE(lb);
#  ifdef THREADS
      lg = BYTES_TO_GRANULES(lb_adjusted);
#  endif
    } else
#endif
    /* else */ {
#ifndef THREADS
      size_t lg; /*< CPPCHECK */
#endif

      if (UNLIKELY(0 == lb))
        lb = 1;
      lg = ALLOC_REQUEST_GRANS(lb);
      lb_adjusted = GRANULES_TO_BYTES(lg);
    }

    init = GC_obj_kinds[kind].ok_init;
    if (LIKELY(align_m1 < GC_GRANULE_BYTES)) {
      align_m1 = 0;
    } else if (align_m1 < HBLKSIZE) {
      align_m1 = HBLKSIZE - 1;
    }
    LOCK();
    result = GC_alloc_large(lb_adjusted, kind, flags, align_m1);
    if (LIKELY(result != NULL)) {
      if (GC_debugging_started
#ifndef THREADS
          || init
#endif
      ) {
        BZERO(result, HBLKSIZE * OBJ_SZ_TO_BLOCKS(lb_adjusted));
      } else {
#ifdef THREADS
        GC_ASSERT(GRANULES_TO_PTRS(lg) >= 2);
        /*
         * Clear any memory that might be used for the GC descriptors
         * before we release the allocator lock.
         */
        ((ptr_t *)result)[0] = NULL;
        ((ptr_t *)result)[1] = NULL;
        ((ptr_t *)result)[GRANULES_TO_PTRS(lg) - 1] = NULL;
        ((ptr_t *)result)[GRANULES_TO_PTRS(lg) - 2] = NULL;
#endif
      }
    }
    UNLOCK();
#ifdef THREADS
    if (init && !GC_debugging_started && result != NULL) {
      /* Clear the rest (i.e. excluding the initial 2 words). */
      BZERO((ptr_t *)result + 2,
            HBLKSIZE * OBJ_SZ_TO_BLOCKS(lb_adjusted) - 2 * sizeof(ptr_t));
    }
#endif
  }
  if (UNLIKELY(NULL == result)) {
    result = (*GC_get_oom_fn())(lb);
    /* Note: result might be misaligned. */
  }
  return result;
}

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_generic_malloc(size_t lb, int kind)
{
  return GC_generic_malloc_aligned(lb, kind, 0 /* `flags` */,
                                   0 /* `align_m1` */);
}

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_malloc_kind_global(size_t lb, int kind)
{
  return GC_malloc_kind_aligned_global(lb, kind, 0 /* `align_m1` */);
}

GC_INNER void *
GC_malloc_kind_aligned_global(size_t lb, int kind, size_t align_m1)
{
  GC_ASSERT(kind < MAXOBJKINDS);
  if (SMALL_OBJ(lb) && LIKELY(align_m1 < HBLKSIZE / 2)) {
    void *op;
    void **opp;
    size_t lg;

    GC_DBG_COLLECT_AT_MALLOC(lb);
    LOCK();
    lg = GC_size_map[lb];
    opp = &GC_obj_kinds[kind].ok_freelist[lg];
    op = *opp;
    if (UNLIKELY(align_m1 >= GC_GRANULE_BYTES)) {
      /* TODO: Avoid linear search. */
      for (; (ADDR(op) & align_m1) != 0; op = *opp) {
        opp = &obj_link(op);
      }
    }
    if (LIKELY(op != NULL)) {
      GC_ASSERT(PTRFREE == kind || NULL == obj_link(op)
                || (ADDR(obj_link(op)) < GC_greatest_real_heap_addr
                    && GC_least_real_heap_addr < ADDR(obj_link(op))));
      *opp = obj_link(op);
      if (kind != PTRFREE)
        obj_link(op) = NULL;
      GC_bytes_allocd += GRANULES_TO_BYTES((word)lg);
      UNLOCK();
      GC_ASSERT((ADDR(op) & align_m1) == 0);
      return op;
    }
    UNLOCK();
  }

  /*
   * We make the `GC_clear_stack()` call a tail one, hoping to get more
   * of the stack.
   */
  return GC_clear_stack(
      GC_generic_malloc_aligned(lb, kind, 0 /* `flags` */, align_m1));
}

#if defined(THREADS) && !defined(THREAD_LOCAL_ALLOC)
GC_API GC_ATTR_MALLOC void *GC_CALL
GC_malloc_kind(size_t lb, int kind)
{
  return GC_malloc_kind_global(lb, kind);
}
#endif

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_malloc_atomic(size_t lb)
{
  /* Allocate `lb` bytes of atomic (pointer-free) data. */
  return GC_malloc_kind(lb, PTRFREE);
}

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_malloc(size_t lb)
{
  /* Allocate `lb` bytes of composite (pointerful) data. */
  return GC_malloc_kind(lb, NORMAL);
}

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_generic_malloc_uncollectable(size_t lb, int kind)
{
  void *op;
  size_t lb_orig = lb;

  GC_ASSERT(kind < MAXOBJKINDS);
  if (EXTRA_BYTES != 0 && LIKELY(lb != 0)) {
    /*
     * We do not need the extra byte, since this will not be collected
     * anyway.
     */
    lb--;
  }

  if (SMALL_OBJ(lb)) {
    void **opp;
    size_t lg;

    if (UNLIKELY(get_have_errors()))
      GC_print_all_errors();
    GC_notify_or_invoke_finalizers();
    GC_DBG_COLLECT_AT_MALLOC(lb_orig);
    LOCK();
    lg = GC_size_map[lb];
    opp = &GC_obj_kinds[kind].ok_freelist[lg];
    op = *opp;
    if (LIKELY(op != NULL)) {
      *opp = obj_link(op);
      obj_link(op) = 0;
      GC_bytes_allocd += GRANULES_TO_BYTES((word)lg);
      /*
       * Mark bit was already set on free list.  It will be cleared only
       * temporarily during a collection, as a result of the normal
       * free-list mark bit clearing.
       */
      GC_non_gc_bytes += GRANULES_TO_BYTES((word)lg);
    } else {
      op = GC_generic_malloc_inner_small(lb, kind);
      if (NULL == op) {
        GC_oom_func oom_fn = GC_oom_fn;
        UNLOCK();
        return (*oom_fn)(lb_orig);
      }
      /* For small objects, the free lists are completely marked. */
    }
    GC_ASSERT(GC_is_marked(op));
    UNLOCK();
  } else {
    op = GC_generic_malloc_aligned(lb, kind, 0 /* `flags` */,
                                   0 /* `align_m1` */);
    if (op /* `!= NULL` */) { /*< CPPCHECK */
      hdr *hhdr = HDR(op);

      GC_ASSERT(HBLKDISPL(op) == 0); /*< large block */

      /*
       * We do not need to acquire the allocator lock before `HDR(op)`,
       * since we have an undisguised pointer, but we need it while we
       * adjust the mark bits.
       */
      LOCK();
      set_mark_bit_from_hdr(hhdr, 0); /*< the only object */
#ifndef THREADS
      /*
       * This is not guaranteed in the multi-threaded case because the
       * counter could be updated before locking.
       */
      GC_ASSERT(hhdr->hb_n_marks == 0);
#endif
      hhdr->hb_n_marks = 1;
      UNLOCK();
    }
  }
  return op;
}

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_malloc_uncollectable(size_t lb)
{
  /* Allocate `lb` bytes of pointerful, traced, but not collectible data. */
  return GC_generic_malloc_uncollectable(lb, UNCOLLECTABLE);
}

#ifdef GC_ATOMIC_UNCOLLECTABLE
GC_API GC_ATTR_MALLOC void *GC_CALL
GC_malloc_atomic_uncollectable(size_t lb)
{
  return GC_generic_malloc_uncollectable(lb, AUNCOLLECTABLE);
}
#endif /* GC_ATOMIC_UNCOLLECTABLE */

#if defined(REDIRECT_MALLOC) && !defined(REDIRECT_MALLOC_IN_HEADER)

#  ifndef MSWINCE
#    include <errno.h>
#  endif

/*
 * Avoid unnecessary nested procedure calls here, by `#define` some `malloc`
 * replacements.  Otherwise we end up saving a meaningless return address in
 * the object.  It also speeds things up, but it is admittedly quite ugly.
 */
#  define GC_debug_malloc_replacement(lb) GC_debug_malloc(lb, GC_DBG_EXTRAS)

#  if defined(CPPCHECK)
#    define REDIRECT_MALLOC_F GC_malloc /*< e.g. */
#  else
#    define REDIRECT_MALLOC_F REDIRECT_MALLOC
#  endif

void *
malloc(size_t lb)
{
  /*
   * It might help to manually inline the `GC_malloc` call here.
   * But any decent compiler should reduce the extra procedure call
   * to at most a jump instruction in this case.
   */
#  if defined(SOLARIS) && defined(THREADS) && defined(I386)
  /*
   * Thread initialization can call `malloc` before we are ready for.
   * It is not clear that this is enough to help matters.  The thread
   * implementation may well call `malloc` at other inopportune times.
   */
  if (UNLIKELY(!GC_is_initialized))
    return sbrk(lb);
#  endif
  return (void *)REDIRECT_MALLOC_F(lb);
}

#  ifdef REDIR_MALLOC_AND_LINUXTHREADS
#    ifdef HAVE_LIBPTHREAD_SO
STATIC ptr_t GC_libpthread_start = NULL;
STATIC ptr_t GC_libpthread_end = NULL;
#    endif
STATIC ptr_t GC_libld_start = NULL;
STATIC ptr_t GC_libld_end = NULL;
static GC_bool lib_bounds_set = FALSE;

GC_INNER void
GC_init_lib_bounds(void)
{
  IF_CANCEL(int cancel_state;)

  /*
   * This test does not need to ensure memory visibility, since the bounds
   * will be set when/if we create another thread.
   */
  if (LIKELY(lib_bounds_set))
    return;

  DISABLE_CANCEL(cancel_state);
  GC_init(); /*< if not called yet */

#    if defined(GC_ASSERTIONS) && defined(GC_ALWAYS_MULTITHREADED)
  LOCK(); /*< just to set `GC_lock_holder` */
#    endif
#    ifdef HAVE_LIBPTHREAD_SO
  if (!GC_text_mapping("libpthread-", &GC_libpthread_start,
                       &GC_libpthread_end)) {
    WARN("Failed to find libpthread.so text mapping: Expect crash\n", 0);
    /*
     * This might still work with some versions of `libpthread`,
     * so we do not `abort`.
     */
  }
#    endif
  if (!GC_text_mapping("ld-", &GC_libld_start, &GC_libld_end)) {
    WARN("Failed to find ld.so text mapping: Expect crash\n", 0);
  }
#    if defined(GC_ASSERTIONS) && defined(GC_ALWAYS_MULTITHREADED)
  UNLOCK();
#    endif
  RESTORE_CANCEL(cancel_state);
  lib_bounds_set = TRUE;
}
#  endif /* REDIR_MALLOC_AND_LINUXTHREADS */

void *
calloc(size_t n, size_t lb)
{
  if (UNLIKELY((lb | n) > GC_SQRT_SIZE_MAX) /*< fast initial test */
      && lb && n > GC_SIZE_MAX / lb)
    return (*GC_get_oom_fn())(GC_SIZE_MAX); /*< `n * lb` overflow */
#  ifdef REDIR_MALLOC_AND_LINUXTHREADS
  /*
   * The linker may allocate some memory that is only pointed to by
   * memory-mapped thread stacks.  Make sure it is not collectible.
   */
  {
    ptr_t caller = (ptr_t)__builtin_return_address(0);

    GC_init_lib_bounds();
    if (ADDR_INSIDE(caller, GC_libld_start, GC_libld_end)
#    ifdef HAVE_LIBPTHREAD_SO
        /*
         * Note: the two ranges are actually usually adjacent, so there
         * may be a way to speed this up.
         */
        || ADDR_INSIDE(caller, GC_libpthread_start, GC_libpthread_end)
#    endif
    ) {
      return GC_generic_malloc_uncollectable(n * lb, UNCOLLECTABLE);
    }
  }
#  endif
  return (void *)REDIRECT_MALLOC_F(n * lb);
}

#  ifndef strdup
char *
strdup(const char *s)
{
  size_t lb = strlen(s) + 1;
  char *result = (char *)REDIRECT_MALLOC_F(lb);

  if (UNLIKELY(NULL == result)) {
    errno = ENOMEM;
    return NULL;
  }
  BCOPY(s, result, lb);
  return result;
}
#  else
/*
 * If `strdup` is macro defined, we assume that it actually calls `malloc`,
 * and thus the right thing will happen even without overriding it.
 * This seems to be true on most Linux systems.
 */
#  endif /* strdup */

#  ifndef strndup
/* This is similar to `strdup()`. */
char *
strndup(const char *str, size_t size)
{
  char *copy;
  size_t len = strlen(str);
  if (UNLIKELY(len > size))
    len = size;
  copy = (char *)REDIRECT_MALLOC_F(len + 1);
  if (UNLIKELY(NULL == copy)) {
    errno = ENOMEM;
    return NULL;
  }
  if (LIKELY(len > 0))
    BCOPY(str, copy, len);
  copy[len] = '\0';
  return copy;
}
#  endif /* !strndup */

#  undef GC_debug_malloc_replacement

#endif /* REDIRECT_MALLOC */

/* Explicitly deallocate the object.  `hhdr` should correspond to `p`. */
static void
free_internal(void *p, const hdr *hhdr)
{
  size_t lb = hhdr->hb_sz;           /*< size in bytes */
  size_t lg = BYTES_TO_GRANULES(lb); /*< size in granules */
  int kind = hhdr->hb_obj_kind;

  GC_bytes_freed += lb;
  if (IS_UNCOLLECTABLE(kind))
    GC_non_gc_bytes -= lb;
  if (LIKELY(lg <= MAXOBJGRANULES)) {
    struct obj_kind *ok = &GC_obj_kinds[kind];
    void **flh;

    /*
     * It is unnecessary to clear the mark bit.  If the object is
     * reallocated, it does not matter.  Otherwise, the collector will
     * do it, since it is on a free list.
     */
    if (ok->ok_init && LIKELY(lb > sizeof(ptr_t))) {
      BZERO((ptr_t *)p + 1, lb - sizeof(ptr_t));
    }

    flh = &ok->ok_freelist[lg];
    obj_link(p) = *flh;
    *flh = (ptr_t)p;
  } else {
    if (lb > HBLKSIZE) {
      GC_large_allocd_bytes -= HBLKSIZE * OBJ_SZ_TO_BLOCKS(lb);
    }
    GC_ASSERT(ADDR(HBLKPTR(p)) == ADDR(hhdr->hb_block));
    GC_freehblk(hhdr->hb_block);
  }
}

GC_API void GC_CALL
GC_free(void *p)
{
  const hdr *hhdr;

  if (p /* `!= NULL` */) {
    /* CPPCHECK */
  } else {
    /* Required by ANSI.  It is not my fault... */
    return;
  }

#ifdef LOG_ALLOCS
  GC_log_printf("GC_free(%p) after GC #%lu\n", p, (unsigned long)GC_gc_no);
#endif
  hhdr = HDR(p);
#if defined(REDIRECT_MALLOC)                                           \
    && ((defined(NEED_CALLINFO) && defined(GC_HAVE_BUILTIN_BACKTRACE)) \
        || defined(REDIR_MALLOC_AND_LINUXTHREADS)                      \
        || (defined(SOLARIS) && defined(THREADS)) || defined(MSWIN32))
  /*
   * This might be called indirectly by `GC_print_callers` to free the
   * result of `backtrace_symbols()`.  For Solaris, we have to redirect
   * `malloc` calls during initialization.  For the others, this seems
   * to happen implicitly.  Do not try to deallocate that memory.
   */
  if (UNLIKELY(NULL == hhdr))
    return;
#endif
  GC_ASSERT(GC_base(p) == p);
  LOCK();
  free_internal(p, hhdr);
  FREE_PROFILER_HOOK(p);
  UNLOCK();
}

#ifdef THREADS
GC_INNER void
GC_free_inner(void *p)
{
  GC_ASSERT(I_HOLD_LOCK());
  free_internal(p, HDR(p));
}
#endif /* THREADS */

#if defined(REDIRECT_MALLOC) && !defined(REDIRECT_FREE)
#  define REDIRECT_FREE GC_free
#endif

#if defined(REDIRECT_FREE) && !defined(REDIRECT_MALLOC_IN_HEADER)

#  if defined(CPPCHECK)
#    define REDIRECT_FREE_F GC_free /*< e.g. */
#  else
#    define REDIRECT_FREE_F REDIRECT_FREE
#  endif

void
free(void *p)
{
#  ifdef IGNORE_FREE
  UNUSED_ARG(p);
#  else
#    if defined(REDIR_MALLOC_AND_LINUXTHREADS) \
        && !defined(USE_PROC_FOR_LIBRARIES)
  /*
   * Do not bother with initialization checks.  If nothing has been
   * initialized, then the check fails, and that is safe, since we have
   * not allocated uncollectible objects neither.
   */
  ptr_t caller = (ptr_t)__builtin_return_address(0);

  /*
   * This test does not need to ensure memory visibility, since the bounds
   * will be set when/if we create another thread.
   */
  if (ADDR_INSIDE(caller, GC_libld_start, GC_libld_end)
#      ifdef HAVE_LIBPTHREAD_SO
      || ADDR_INSIDE(caller, GC_libpthread_start, GC_libpthread_end)
#      endif
  ) {
    GC_free(p);
    return;
  }
#    endif
  REDIRECT_FREE_F(p);
#  endif
}
#endif /* REDIRECT_FREE */

/*
 * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
 * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
 * Copyright (c) 1996 by Silicon Graphics.  All rights reserved.
 * Copyright (c) 2000 by Hewlett-Packard Company.  All rights reserved.
 * Copyright (c) 2009-2025 Ivan Maidanski
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */


/*
 * These are extra allocation routines that are likely to be less
 * frequently used than those in `malloc.c` file.  They are separate in
 * the hope that the `.o` file will be excluded from statically linked
 * executables.  We should probably break this up further.
 */

#include <string.h>

#ifndef MSWINCE
#  include <errno.h>
#endif

/*
 * Some externally visible but unadvertised variables to allow access
 * to free lists from inlined allocators without include `gc_priv.h` file
 * or introducing dependencies on internal data structure layouts.
 */
void **const GC_objfreelist_ptr = GC_objfreelist;
void **const GC_aobjfreelist_ptr = GC_aobjfreelist;
void **const GC_uobjfreelist_ptr = GC_uobjfreelist;
#ifdef GC_ATOMIC_UNCOLLECTABLE
void **const GC_auobjfreelist_ptr = GC_auobjfreelist;
#endif

GC_API int GC_CALL
GC_get_kind_and_size(const void *p, size_t *psize)
{
  const hdr *hhdr = HDR(p);

  if (psize != NULL) {
    *psize = hhdr->hb_sz;
  }
  return hhdr->hb_obj_kind;
}

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_generic_or_special_malloc(size_t lb, int kind)
{
  switch (kind) {
  case PTRFREE:
  case NORMAL:
    return GC_malloc_kind(lb, kind);
  case UNCOLLECTABLE:
#ifdef GC_ATOMIC_UNCOLLECTABLE
  case AUNCOLLECTABLE:
#endif
    return GC_generic_malloc_uncollectable(lb, kind);
  default:
    return GC_generic_malloc_aligned(lb, kind, 0 /* `flags` */, 0);
  }
}

GC_API void *GC_CALL
GC_realloc(void *p, size_t lb)
{
  hdr *hhdr;
  void *result;
#if defined(_FORTIFY_SOURCE) && defined(__GNUC__) && !defined(__clang__)
  /*
   * Use `cleared_p` instead of `p` as a workaround to avoid passing
   * `alloc_size(lb)` attribute associated with `p` to `memset`
   * (including a `memset` call inside `GC_free`).
   */
  volatile GC_uintptr_t cleared_p = (GC_uintptr_t)p;
#else
#  define cleared_p p
#endif
  size_t sz;      /*< current size in bytes */
  size_t orig_sz; /*< original `sz` (in bytes) */
  int obj_kind;

  if (NULL == p) {
    /* Required by ANSI. */
    return GC_malloc(lb);
  }
  if (0 == lb) /* `&& p != NULL` */ {
#ifndef IGNORE_FREE
    GC_free(p);
#endif
    return NULL;
  }
  hhdr = HDR(HBLKPTR(p));
  sz = hhdr->hb_sz;
  obj_kind = hhdr->hb_obj_kind;
  orig_sz = sz;

  if (sz > MAXOBJBYTES) {
    const struct obj_kind *ok = &GC_obj_kinds[obj_kind];
    word descr = ok->ok_descriptor;

    /* Round it up to the next whole heap block. */
    sz = (sz + HBLKSIZE - 1) & ~(HBLKSIZE - 1);
#if ALIGNMENT > GC_DS_TAGS
    /*
     * An extra byte is not added in case of ignore-off-page allocated
     * objects not smaller than `HBLKSIZE`.
     */
    GC_ASSERT(sz >= HBLKSIZE);
    if (EXTRA_BYTES != 0 && (hhdr->hb_flags & IGNORE_OFF_PAGE) != 0
        && obj_kind == NORMAL)
      descr += ALIGNMENT; /*< or set to 0 */
#endif
    if (ok->ok_relocate_descr) {
      descr += sz;
    }

    /*
     * `GC_realloc` might be changing the block size while
     * `GC_reclaim_block` or `GC_clear_hdr_marks` is examining it.
     * The change to the size field is benign, in that `GC_reclaim`
     * (and `GC_clear_hdr_marks`) would work correctly with either
     * value, since we are not changing the number of objects in
     * the block.  But seeing a half-updated value (though unlikely
     * to occur in practice) could be probably bad.
     * Using unordered atomic accesses on `hb_sz` and `hb_descr`
     * fields would solve the issue.  (The alternate solution might
     * be to initially overallocate large objects, so we do not
     * have to adjust the size in `GC_realloc`, if they still fit.
     * But that is probably more expensive, since we may end up
     * scanning a bunch of zeros during the collection.)
     */
#ifdef AO_HAVE_store
    AO_store(&hhdr->hb_sz, sz);
    AO_store((AO_t *)&hhdr->hb_descr, descr);
#else
    {
      LOCK();
      hhdr->hb_sz = sz;
      hhdr->hb_descr = descr;
      UNLOCK();
    }
#endif

#ifdef MARK_BIT_PER_OBJ
    GC_ASSERT(hhdr->hb_inv_sz == LARGE_INV_SZ);
#else
    GC_ASSERT((hhdr->hb_flags & LARGE_BLOCK) != 0
              && hhdr->hb_map[ANY_INDEX] == 1);
#endif
    if (IS_UNCOLLECTABLE(obj_kind))
      GC_non_gc_bytes += (sz - orig_sz);
    /* Extra area is already cleared by `GC_alloc_large_and_clear`. */
  }
  if (ADD_EXTRA_BYTES(lb) <= sz) {
    if (lb >= (sz >> 1)) {
      if (orig_sz > lb) {
        /* Clear unneeded part of object to avoid bogus pointer tracing. */
        BZERO((ptr_t)cleared_p + lb, orig_sz - lb);
      }
      return p;
    }
    /*
     * Shrink it.  Note: shrinking of large blocks is not implemented
     * efficiently.
     */
    sz = lb;
  }
  result = GC_generic_or_special_malloc((word)lb, obj_kind);
  if (LIKELY(result != NULL)) {
    /*
     * In case of shrink, it could also return original object.
     * But this gives the client warning of imminent disaster.
     */
    BCOPY(p, result, sz);
#ifndef IGNORE_FREE
    GC_free((ptr_t)cleared_p);
#endif
  }
  return result;
#undef cleared_p
}

#if defined(REDIRECT_MALLOC) && !defined(REDIRECT_REALLOC)
#  define REDIRECT_REALLOC GC_realloc
#endif

#ifdef REDIRECT_REALLOC

/* As with `malloc`, avoid two levels of extra calls here. */
#  define GC_debug_realloc_replacement(p, lb) \
    GC_debug_realloc(p, lb, GC_DBG_EXTRAS)

#  if !defined(REDIRECT_MALLOC_IN_HEADER)
void *
realloc(void *p, size_t lb)
{
  return REDIRECT_REALLOC(p, lb);
}
#  endif

#  undef GC_debug_realloc_replacement
#endif /* REDIRECT_REALLOC */

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_generic_malloc_ignore_off_page(size_t lb, int kind)
{
  return GC_generic_malloc_aligned(lb, kind, IGNORE_OFF_PAGE,
                                   0 /* `align_m1` */);
}

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_malloc_ignore_off_page(size_t lb)
{
  return GC_generic_malloc_aligned(lb, NORMAL, IGNORE_OFF_PAGE, 0);
}

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_malloc_atomic_ignore_off_page(size_t lb)
{
  return GC_generic_malloc_aligned(lb, PTRFREE, IGNORE_OFF_PAGE, 0);
}

/*
 * Increment `GC_bytes_allocd` from code that does not have direct access
 * to `GC_arrays`.
 */
void GC_CALL
GC_incr_bytes_allocd(size_t n)
{
  GC_bytes_allocd += n;
}

/* The same as `GC_incr_bytes_allocd` but for `GC_bytes_freed`. */
void GC_CALL
GC_incr_bytes_freed(size_t n)
{
  GC_bytes_freed += n;
}

GC_API size_t GC_CALL
GC_get_expl_freed_bytes_since_gc(void)
{
  return (size_t)GC_bytes_freed;
}

GC_API void GC_CALL
GC_generic_malloc_many(size_t lb_adjusted, int kind, void **result)
{
  void *op;
  void *p;
  void **opp;
  /* The value of `lb_adjusted` converted to granules. */
  size_t lg;
  word my_bytes_allocd = 0;
  struct obj_kind *ok;
  struct hblk **rlh;

  GC_ASSERT(lb_adjusted != 0 && (lb_adjusted & (GC_GRANULE_BYTES - 1)) == 0);
  /* Currently a single object is always allocated if manual VDB. */
  /*
   * TODO: `GC_dirty` should be called for each linked object (but the
   * last one) to support multiple objects allocation.
   */
  if (UNLIKELY(lb_adjusted > MAXOBJBYTES) || GC_manual_vdb) {
    op = GC_generic_malloc_aligned(lb_adjusted - EXTRA_BYTES, kind,
                                   0 /* `flags` */, 0 /* `align_m1` */);
    if (LIKELY(op != NULL))
      obj_link(op) = NULL;
    *result = op;
#ifndef NO_MANUAL_VDB
    if (GC_manual_vdb && GC_is_heap_ptr(result)) {
      GC_dirty_inner(result);
      REACHABLE_AFTER_DIRTY(op);
    }
#endif
    return;
  }
  GC_ASSERT(kind < MAXOBJKINDS);
  lg = BYTES_TO_GRANULES(lb_adjusted);
  if (UNLIKELY(get_have_errors()))
    GC_print_all_errors();
  GC_notify_or_invoke_finalizers();
  GC_DBG_COLLECT_AT_MALLOC(lb_adjusted - EXTRA_BYTES);
  if (UNLIKELY(!GC_is_initialized))
    GC_init();
  LOCK();
  /* Do our share of marking work. */
  if (GC_incremental && !GC_dont_gc) {
    GC_collect_a_little_inner(1);
  }

  /* First see if we can reclaim a page of objects waiting to be reclaimed. */
  ok = &GC_obj_kinds[kind];
  rlh = ok->ok_reclaim_list;
  if (rlh != NULL) {
    struct hblk *hbp;
    hdr *hhdr;

    while ((hbp = rlh[lg]) != NULL) {
      hhdr = HDR(hbp);
      rlh[lg] = hhdr->hb_next;
      GC_ASSERT(hhdr->hb_sz == lb_adjusted);
      hhdr->hb_last_reclaimed = (unsigned short)GC_gc_no;
#ifdef PARALLEL_MARK
      if (GC_parallel) {
        GC_signed_word my_bytes_allocd_tmp
            = (GC_signed_word)AO_load(&GC_bytes_allocd_tmp);
        GC_ASSERT(my_bytes_allocd_tmp >= 0);
        /*
         * We only decrement it while holding the allocator lock.
         * Thus, we cannot accidentally adjust it down in more than
         * one thread simultaneously.
         */
        if (my_bytes_allocd_tmp != 0) {
          (void)AO_fetch_and_add(&GC_bytes_allocd_tmp,
                                 (AO_t)(-my_bytes_allocd_tmp));
          GC_bytes_allocd += (word)my_bytes_allocd_tmp;
        }
        GC_acquire_mark_lock();
        ++GC_fl_builder_count;
        UNLOCK();
        GC_release_mark_lock();
      }
#endif
      op = GC_reclaim_generic(hbp, hhdr, lb_adjusted, ok->ok_init, 0,
                              &my_bytes_allocd);
      if (op != 0) {
#ifdef PARALLEL_MARK
        if (GC_parallel) {
          *result = op;
          (void)AO_fetch_and_add(&GC_bytes_allocd_tmp, (AO_t)my_bytes_allocd);
          GC_acquire_mark_lock();
          --GC_fl_builder_count;
          if (GC_fl_builder_count == 0)
            GC_notify_all_builder();
#  ifdef THREAD_SANITIZER
          GC_release_mark_lock();
          LOCK();
          GC_bytes_found += (GC_signed_word)my_bytes_allocd;
          UNLOCK();
#  else
          /* The resulting `GC_bytes_found` may be inaccurate. */
          GC_bytes_found += (GC_signed_word)my_bytes_allocd;
          GC_release_mark_lock();
#  endif
          (void)GC_clear_stack(0);
          return;
        }
#endif
        /* We also reclaimed memory, so we need to adjust that count. */
        GC_bytes_found += (GC_signed_word)my_bytes_allocd;
        GC_bytes_allocd += my_bytes_allocd;
        goto out;
      }
#ifdef PARALLEL_MARK
      if (GC_parallel) {
        GC_acquire_mark_lock();
        --GC_fl_builder_count;
        if (GC_fl_builder_count == 0)
          GC_notify_all_builder();
        GC_release_mark_lock();
        /*
         * The allocator lock is needed for access to the reclaim list.
         * We must decrement `GC_fl_builder_count` before reacquiring
         * the allocator lock.  Hopefully this path is rare.
         */
        LOCK();

        /* Reload `rlh` after locking. */
        rlh = ok->ok_reclaim_list;
        if (NULL == rlh)
          break;
      }
#endif
    }
  }
  /*
   * Next try to use prefix of global free list if there is one.
   * We do not refill it, but we need to use it up before allocating
   * a new block ourselves.
   */
  opp = &ok->ok_freelist[lg];
  if ((op = *opp) != NULL) {
    *opp = NULL;
    my_bytes_allocd = 0;
    for (p = op; p != NULL; p = obj_link(p)) {
      my_bytes_allocd += lb_adjusted;
      if ((word)my_bytes_allocd >= HBLKSIZE) {
        *opp = obj_link(p);
        obj_link(p) = NULL;
        break;
      }
    }
    GC_bytes_allocd += my_bytes_allocd;
    goto out;
  }

  /* Next try to allocate a new block worth of objects of this size. */
  {
    struct hblk *h
        = GC_allochblk(lb_adjusted, kind, 0 /* `flags` */, 0 /* `align_m1` */);

    if (h /* `!= NULL` */) { /*< CPPCHECK */
      if (IS_UNCOLLECTABLE(kind))
        GC_set_hdr_marks(HDR(h));
      GC_bytes_allocd += HBLKSIZE - (HBLKSIZE % lb_adjusted);
#ifdef PARALLEL_MARK
      if (GC_parallel) {
        GC_acquire_mark_lock();
        ++GC_fl_builder_count;
        UNLOCK();
        GC_release_mark_lock();

        op = GC_build_fl(h, NULL, lg, ok->ok_init || GC_debugging_started);
        *result = op;
        GC_acquire_mark_lock();
        --GC_fl_builder_count;
        if (GC_fl_builder_count == 0)
          GC_notify_all_builder();
        GC_release_mark_lock();
        (void)GC_clear_stack(0);
        return;
      }
#endif
      op = GC_build_fl(h, NULL, lg, ok->ok_init || GC_debugging_started);
      goto out;
    }
  }

  /*
   * As a last attempt, try allocating a single object.
   * Note that this may trigger a collection or expand the heap.
   */
  op = GC_generic_malloc_inner(lb_adjusted - EXTRA_BYTES, kind,
                               0 /* `flags` */);
  if (op != NULL)
    obj_link(op) = NULL;

out:
  *result = op;
  UNLOCK();
  (void)GC_clear_stack(0);
}

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_malloc_many(size_t lb)
{
  void *result;
  size_t lg, lb_adjusted;

  if (UNLIKELY(0 == lb))
    lb = 1;
  lg = ALLOC_REQUEST_GRANS(lb);
  lb_adjusted = GRANULES_TO_BYTES(lg);
  GC_generic_malloc_many(lb_adjusted, NORMAL, &result);
  return result;
}

/*
 * TODO: The debugging variant of `GC_memalign` and friends is tricky
 * and currently missing.  The major difficulty is: `store_debug_info`
 * should return the pointer of the object with the requested alignment
 * (unlike the object header).
 */

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_memalign(size_t align, size_t lb)
{
  size_t align_m1 = align - 1;

  /* Check the alignment argument. */
  if (UNLIKELY(0 == align || (align & align_m1) != 0))
    return NULL;

  /* TODO: Use thread-local allocation. */
  if (align <= GC_GRANULE_BYTES)
    return GC_malloc(lb);
  return GC_malloc_kind_aligned_global(lb, NORMAL, align_m1);
}

GC_API int GC_CALL
GC_posix_memalign(void **memptr, size_t align, size_t lb)
{
  void *p;
  size_t align_minus_one = align - 1; /*< to workaround a cppcheck warning */

  /* Check alignment properly. */
  if (UNLIKELY(align < sizeof(void *) || (align_minus_one & align) != 0)) {
#ifdef MSWINCE
    return ERROR_INVALID_PARAMETER;
#else
    return EINVAL;
#endif
  }

  p = GC_memalign(align, lb);
  if (UNLIKELY(NULL == p)) {
#ifdef MSWINCE
    return ERROR_NOT_ENOUGH_MEMORY;
#else
    return ENOMEM;
#endif
  }
  *memptr = p;
  return 0; /*< success */
}

#ifndef GC_NO_VALLOC
GC_API GC_ATTR_MALLOC void *GC_CALL
GC_valloc(size_t lb)
{
  if (UNLIKELY(!GC_is_initialized))
    GC_init();
  GC_ASSERT(GC_real_page_size != 0);
  return GC_memalign(GC_real_page_size, lb);
}

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_pvalloc(size_t lb)
{
  if (UNLIKELY(!GC_is_initialized))
    GC_init();
  GC_ASSERT(GC_real_page_size != 0);
  lb = SIZET_SAT_ADD(lb, GC_real_page_size - 1) & ~(GC_real_page_size - 1);
  return GC_memalign(GC_real_page_size, lb);
}
#endif /* !GC_NO_VALLOC */

GC_API GC_ATTR_MALLOC char *GC_CALL
GC_strdup(const char *s)
{
  /*
   * Implementation of a variant of `strdup()` that uses the collector
   * to allocate a copy of the string.
   */
  char *copy;
  size_t lb;
  if (s == NULL)
    return NULL;
  lb = strlen(s) + 1;
  copy = (char *)GC_malloc_atomic(lb);
  if (UNLIKELY(NULL == copy)) {
#ifndef MSWINCE
    errno = ENOMEM;
#endif
    return NULL;
  }
  BCOPY(s, copy, lb);
  return copy;
}

GC_API GC_ATTR_MALLOC char *GC_CALL
GC_strndup(const char *str, size_t size)
{
  char *copy;
  /* Note: `str` is expected to be non-`NULL`. */
  size_t len = strlen(str);
  if (UNLIKELY(len > size))
    len = size;
  copy = (char *)GC_malloc_atomic(len + 1);
  if (UNLIKELY(NULL == copy)) {
#ifndef MSWINCE
    errno = ENOMEM;
#endif
    return NULL;
  }
  if (LIKELY(len > 0))
    BCOPY(str, copy, len);
  copy[len] = '\0';
  return copy;
}

#ifdef GC_REQUIRE_WCSDUP
#  include <wchar.h> /*< for `wcslen()` */

GC_API GC_ATTR_MALLOC wchar_t *GC_CALL
GC_wcsdup(const wchar_t *str)
{
  size_t lb = (wcslen(str) + 1) * sizeof(wchar_t);
  wchar_t *copy = (wchar_t *)GC_malloc_atomic(lb);

  if (UNLIKELY(NULL == copy)) {
#  ifndef MSWINCE
    errno = ENOMEM;
#  endif
    return NULL;
  }
  BCOPY(str, copy, lb);
  return copy;
}

#  if !defined(wcsdup) && defined(REDIRECT_MALLOC) \
      && !defined(REDIRECT_MALLOC_IN_HEADER)
wchar_t *
wcsdup(const wchar_t *str)
{
  return GC_wcsdup(str);
}
#  endif
#endif /* GC_REQUIRE_WCSDUP */

#ifndef CPPCHECK
GC_API void *GC_CALL
GC_malloc_stubborn(size_t lb)
{
  return GC_malloc(lb);
}

GC_API void GC_CALL
GC_change_stubborn(const void *p)
{
  UNUSED_ARG(p);
}
#endif /* !CPPCHECK */

GC_API void GC_CALL
GC_end_stubborn_change(const void *p)
{
  GC_dirty(p); /*< entire object */
}

GC_API void GC_CALL
GC_ptr_store_and_dirty(void *p, const void *q)
{
  *(const void **)p = q;
  GC_dirty(p);
  REACHABLE_AFTER_DIRTY(q);
}

/*
 * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
 * Copyright (c) 1991-1995 by Xerox Corporation.  All rights reserved.
 * Copyright (c) 2000 by Hewlett-Packard Company.  All rights reserved.
 * Copyright (c) 2008-2025 Ivan Maidanski
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */


/*
 * Make arguments appear live to compiler.  Put here to minimize the
 * risk of inlining.  Used to minimize junk left in registers.
 */
GC_ATTR_NOINLINE
void
GC_noop6(word arg1, word arg2, word arg3, word arg4, word arg5, word arg6)
{
  UNUSED_ARG(arg1);
  UNUSED_ARG(arg2);
  UNUSED_ARG(arg3);
  UNUSED_ARG(arg4);
  UNUSED_ARG(arg5);
  UNUSED_ARG(arg6);
  /* Avoid `GC_noop6` calls to be optimized away. */
#if defined(AO_HAVE_compiler_barrier) && !defined(BASE_ATOMIC_OPS_EMULATED)
  AO_compiler_barrier(); /*< to serve as a special side-effect */
#else
  GC_noop1(0);
#endif
}

GC_API void GC_CALL
GC_noop1(GC_word x)
{
#if defined(AO_HAVE_store) && defined(THREAD_SANITIZER)
  AO_store(&GC_noop_sink, (AO_t)x);
#else
  GC_noop_sink = x;
#endif
}

GC_API void GC_CALL
GC_noop1_ptr(volatile void *p)
{
#if CPP_PTRSZ > CPP_WORDSZ
#  if defined(AO_HAVE_store) && defined(THREAD_SANITIZER)
  GC_cptr_store(&GC_noop_sink_ptr, (ptr_t)CAST_AWAY_VOLATILE_PVOID(p));
#  else
  GC_noop_sink_ptr = (ptr_t)CAST_AWAY_VOLATILE_PVOID(p);
#  endif
#else
  GC_noop1(ADDR(p));
#endif
}

/*
 * Initialize `GC_obj_kinds` properly and standard free lists properly.
 * This must be done statically since they may be accessed before
 * `GC_init` is called.  It is done here, since we need to deal with
 * mark descriptors.  Note: `GC_obj_kinds[NORMAL].ok_descriptor` is
 * adjusted in `GC_init()` for `EXTRA_BYTES`.
 */
GC_INNER struct obj_kind GC_obj_kinds[MAXOBJKINDS] = {
  /* `PTRFREE` */
  { &GC_aobjfreelist[0], 0 /*< filled in dynamically */,
    /* `0 |` */ GC_DS_LENGTH, FALSE,
    FALSE
        /*, */ OK_DISCLAIM_INITZ },
  /* `NORMAL` */
  { &GC_objfreelist[0], 0,
    /* `0 |` */ GC_DS_LENGTH, TRUE /*< add length to descriptor template */,
    TRUE
        /*, */ OK_DISCLAIM_INITZ },
  /* `UNCOLLECTABLE` */
  { &GC_uobjfreelist[0], 0,
    /* `0 |` */ GC_DS_LENGTH, TRUE /*< add length to descriptor template */,
    TRUE
        /*, */ OK_DISCLAIM_INITZ },
#ifdef GC_ATOMIC_UNCOLLECTABLE
  /* `AUNCOLLECTABLE` */
  { &GC_auobjfreelist[0], 0,
    /* `0 |` */ GC_DS_LENGTH, FALSE,
    FALSE
        /*, */ OK_DISCLAIM_INITZ },
#endif
};

#ifndef GC_NO_DEINIT
/* Note: keep this close to `GC_obj_kinds` definition. */
GC_INNER void
GC_reset_obj_kinds(void)
{
  unsigned i;

  for (i = 0; i < GC_N_KINDS_INITIAL_VALUE; i++)
    GC_obj_kinds[i].ok_reclaim_list = NULL;
  GC_obj_kinds[PTRFREE].ok_freelist = &GC_aobjfreelist[0];
  GC_obj_kinds[NORMAL].ok_freelist = &GC_objfreelist[0];
  GC_obj_kinds[UNCOLLECTABLE].ok_freelist = &GC_uobjfreelist[0];
#  ifdef GC_ATOMIC_UNCOLLECTABLE
  GC_obj_kinds[AUNCOLLECTABLE].ok_freelist = &GC_auobjfreelist[0];
#  endif
  GC_obj_kinds[NORMAL].ok_descriptor = GC_DS_LENGTH;
  GC_n_kinds = GC_N_KINDS_INITIAL_VALUE;
}
#endif

#ifndef INITIAL_MARK_STACK_SIZE
/*
 * `INITIAL_MARK_STACK_SIZE * sizeof(mse)` should be a multiple of `HBLKSIZE`.
 * The incremental collector actually likes a larger size, since it wants to
 * push all marked dirty objects before marking anything new.
 * Currently we let it grow dynamically.
 */
#  define INITIAL_MARK_STACK_SIZE (1 * HBLKSIZE)
#endif

#if !defined(GC_DISABLE_INCREMENTAL)
/*
 * The number of dirty pages we marked from, excluding pointer-free pages,
 * etc.  Used for logging only.
 */
STATIC word GC_n_rescuing_pages = 0;
#endif

GC_API void GC_CALL
GC_set_pointer_mask(GC_word value)
{
#ifdef DYNAMIC_POINTER_MASK
  GC_ASSERT(value >= 0xff); /*< a simple sanity check */
  GC_pointer_mask = value;
#else
  if (value
#  ifdef POINTER_MASK
      != (word)(POINTER_MASK)
#  else
      != GC_WORD_MAX
#  endif
  ) {
    ABORT("Dynamic pointer mask/shift is unsupported");
  }
#endif
}

GC_API GC_word GC_CALL
GC_get_pointer_mask(void)
{
#ifdef DYNAMIC_POINTER_MASK
  GC_word value = GC_pointer_mask;

  if (0 == value) {
    GC_ASSERT(!GC_is_initialized);
    value = GC_WORD_MAX;
  }
  return value;
#elif defined(POINTER_MASK)
  return POINTER_MASK;
#else
  return GC_WORD_MAX;
#endif
}

GC_API void GC_CALL
GC_set_pointer_shift(unsigned value)
{
#ifdef DYNAMIC_POINTER_MASK
  GC_ASSERT(value < CPP_WORDSZ);
  GC_pointer_shift = (unsigned char)value;
#else
  if (value
#  ifdef POINTER_SHIFT
      != (unsigned)(POINTER_SHIFT)
#  endif
  ) {
    ABORT("Dynamic pointer mask/shift is unsupported");
  }
#endif
}

GC_API unsigned GC_CALL
GC_get_pointer_shift(void)
{
#ifdef DYNAMIC_POINTER_MASK
  return GC_pointer_shift;
#elif defined(POINTER_SHIFT)
  GC_STATIC_ASSERT((unsigned)(POINTER_SHIFT) < CPP_WORDSZ);
  return POINTER_SHIFT;
#else
  return 0;
#endif
}

GC_INNER GC_bool
GC_collection_in_progress(void)
{
  return GC_mark_state != MS_NONE;
}

GC_INNER void
GC_clear_hdr_marks(hdr *hhdr)
{
  size_t last_bit;

#ifdef AO_HAVE_load
  /* Atomic access is used to avoid racing with `GC_realloc`. */
  last_bit = FINAL_MARK_BIT(AO_load((volatile AO_t *)&hhdr->hb_sz));
#else
  /*
   * No race as `GC_realloc` holds the allocator lock while updating
   * `hb_sz` field.
   */
  last_bit = FINAL_MARK_BIT(hhdr->hb_sz);
#endif

  BZERO(CAST_AWAY_VOLATILE_PVOID(hhdr->hb_marks), sizeof(hhdr->hb_marks));
  set_mark_bit_from_hdr(hhdr, last_bit);
  hhdr->hb_n_marks = 0;
}

GC_INNER void
GC_set_hdr_marks(hdr *hhdr)
{
  size_t i;
  size_t sz = hhdr->hb_sz;
  size_t n_marks = FINAL_MARK_BIT(sz);

#ifdef USE_MARK_BYTES
  for (i = 0; i <= n_marks; i += MARK_BIT_OFFSET(sz)) {
    hhdr->hb_marks[i] = 1;
  }
#else
  /*
   * Note that all bits are set even in case of not `MARK_BIT_PER_OBJ`,
   * instead of setting every `n`-th bit where `n` is `MARK_BIT_OFFSET(sz)`.
   *  This is done for a performance reason.
   */
  for (i = 0; i < divWORDSZ(n_marks); ++i) {
    hhdr->hb_marks[i] = GC_WORD_MAX;
  }
  /* Set the remaining bits near the end (plus one bit past the end). */
  hhdr->hb_marks[i] = ((((word)1 << modWORDSZ(n_marks)) - 1) << 1) | 1;
#endif
#ifdef MARK_BIT_PER_OBJ
  hhdr->hb_n_marks = n_marks;
#else
  hhdr->hb_n_marks = HBLK_OBJS(sz);
#endif
}

/* Clear all mark bits associated with block `h`. */
static void GC_CALLBACK
clear_marks_for_block(struct hblk *h, void *dummy)
{
  hdr *hhdr = HDR(h);

  UNUSED_ARG(dummy);
  if (IS_UNCOLLECTABLE(hhdr->hb_obj_kind)) {
    /*
     * Mark bit for these is cleared only once the object is deallocated
     * explicitly.  This either frees the block, or the bit is cleared
     * once the object is on the free list.
     */
    return;
  }
  GC_clear_hdr_marks(hhdr);
#if defined(CPPCHECK)
  GC_noop1_ptr(h);
#endif
}

/* Slow but general routines for setting/clearing/getting mark bits. */

GC_API void GC_CALL
GC_set_mark_bit(const void *p)
{
  struct hblk *h = HBLKPTR(p);
  hdr *hhdr = HDR(h);
  size_t bit_no = MARK_BIT_NO((size_t)((ptr_t)p - (ptr_t)h), hhdr->hb_sz);

  if (!mark_bit_from_hdr(hhdr, bit_no)) {
    set_mark_bit_from_hdr(hhdr, bit_no);
    INCR_MARKS(hhdr);
  }
}

GC_API void GC_CALL
GC_clear_mark_bit(const void *p)
{
  struct hblk *h = HBLKPTR(p);
  hdr *hhdr = HDR(h);
  size_t bit_no = MARK_BIT_NO((size_t)((ptr_t)p - (ptr_t)h), hhdr->hb_sz);

  if (mark_bit_from_hdr(hhdr, bit_no)) {
    size_t n_marks = hhdr->hb_n_marks;

    GC_ASSERT(n_marks != 0);
    clear_mark_bit_from_hdr(hhdr, bit_no);
    n_marks--;
#ifdef PARALLEL_MARK
    /*
     * Do not decrement to zero.  The counts are approximate due to
     * concurrency issues, but we need to ensure that a count of zero
     * implies an empty block.
     */
    if (n_marks != 0 || !GC_parallel)
      hhdr->hb_n_marks = n_marks;
#else
    hhdr->hb_n_marks = n_marks;
#endif
  }
}

GC_API int GC_CALL
GC_is_marked(const void *p)
{
  struct hblk *h = HBLKPTR(p);
  hdr *hhdr = HDR(h);
  size_t bit_no = MARK_BIT_NO((size_t)((ptr_t)p - (ptr_t)h), hhdr->hb_sz);

  return (int)mark_bit_from_hdr(hhdr, bit_no); /*< 0 or 1 */
}

GC_INNER void
GC_clear_marks(void)
{
  /* The initialization is needed for `GC_push_roots()`. */
  GC_ASSERT(GC_is_initialized);

  GC_apply_to_all_blocks(clear_marks_for_block, NULL);
  GC_objects_are_marked = FALSE;
  GC_mark_state = MS_INVALID;
  GC_scan_ptr = NULL;
}

GC_INNER void
GC_initiate_gc(void)
{
  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT(GC_is_initialized);
#ifndef GC_DISABLE_INCREMENTAL
  if (GC_incremental) {
#  ifdef CHECKSUMS
    GC_read_dirty(FALSE);
    GC_check_dirty();
#  else
    GC_read_dirty(GC_mark_state == MS_INVALID);
#  endif
  }
  GC_n_rescuing_pages = 0;
#endif
  if (GC_mark_state == MS_NONE) {
    GC_mark_state = MS_PUSH_RESCUERS;
  } else {
    /* This is really a full collection, and mark bits are invalid. */
    GC_ASSERT(GC_mark_state == MS_INVALID);
  }
  GC_scan_ptr = NULL;
}

#ifdef PARALLEL_MARK
/* Initiate parallel marking. */
STATIC void GC_do_parallel_mark(void);
#endif

#ifdef GC_DISABLE_INCREMENTAL
#  define GC_push_next_marked_dirty(h) GC_push_next_marked(h)
#else
STATIC struct hblk *GC_push_next_marked_dirty(struct hblk *h);
#endif /* !GC_DISABLE_INCREMENTAL */

STATIC struct hblk *GC_push_next_marked(struct hblk *h);
STATIC struct hblk *GC_push_next_marked_uncollectable(struct hblk *h);

static void alloc_mark_stack(size_t);

static void
push_roots_and_advance(GC_bool push_all, ptr_t cold_gc_frame)
{
  if (GC_scan_ptr != NULL) {
    /* Not ready to push. */
    return;
  }
  GC_push_roots(push_all, cold_gc_frame);
  GC_objects_are_marked = TRUE;
  if (GC_mark_state != MS_INVALID)
    GC_mark_state = MS_ROOTS_PUSHED;
}

STATIC GC_on_mark_stack_empty_proc GC_on_mark_stack_empty = 0;

GC_API void GC_CALL
GC_set_on_mark_stack_empty(GC_on_mark_stack_empty_proc fn)
{
  LOCK();
  GC_on_mark_stack_empty = fn;
  UNLOCK();
}

GC_API GC_on_mark_stack_empty_proc GC_CALL
GC_get_on_mark_stack_empty(void)
{
  GC_on_mark_stack_empty_proc fn;

  READER_LOCK();
  fn = GC_on_mark_stack_empty;
  READER_UNLOCK();
  return fn;
}

#ifdef WRAP_MARK_SOME
/*
 * For Win32, this is called after we establish a structured exception
 * (or signal) handler, in case Windows unmaps one of our root segments.
 * Note that this code should never generate an incremental GC write fault.
 */
STATIC GC_bool
GC_mark_some_inner(ptr_t cold_gc_frame)
#else
GC_INNER GC_bool
GC_mark_some(ptr_t cold_gc_frame)
#endif
{
  GC_ASSERT(I_HOLD_LOCK());
  switch (GC_mark_state) {
  case MS_NONE:
    return TRUE;

  case MS_PUSH_RESCUERS:
    if (ADDR_GE((ptr_t)GC_mark_stack_top,
                (ptr_t)(GC_mark_stack_limit - INITIAL_MARK_STACK_SIZE / 2))) {
      /*
       * Go ahead and mark, even though that might cause us to see more
       * marked dirty objects later on.  Avoid this in the future.
       */
      GC_mark_stack_too_small = TRUE;
      MARK_FROM_MARK_STACK();
    } else {
      GC_scan_ptr = GC_push_next_marked_dirty(GC_scan_ptr);
#ifndef GC_DISABLE_INCREMENTAL
      if (NULL == GC_scan_ptr) {
        GC_COND_LOG_PRINTF("Marked from %lu dirty pages\n",
                           (unsigned long)GC_n_rescuing_pages);
      }
#endif
      push_roots_and_advance(FALSE, cold_gc_frame);
    }
    GC_ASSERT(GC_mark_state == MS_PUSH_RESCUERS
              || GC_mark_state == MS_ROOTS_PUSHED
              || GC_mark_state == MS_INVALID);
    break;

  case MS_PUSH_UNCOLLECTABLE:
    if (ADDR_GE((ptr_t)GC_mark_stack_top,
                (ptr_t)(GC_mark_stack + GC_mark_stack_size / 4))) {
#ifdef PARALLEL_MARK
      /* Avoid this, since we do not parallelize the marker here. */
      if (GC_parallel)
        GC_mark_stack_too_small = TRUE;
#endif
      MARK_FROM_MARK_STACK();
    } else {
      GC_scan_ptr = GC_push_next_marked_uncollectable(GC_scan_ptr);
      push_roots_and_advance(TRUE, cold_gc_frame);
    }
    GC_ASSERT(GC_mark_state == MS_PUSH_UNCOLLECTABLE
              || GC_mark_state == MS_ROOTS_PUSHED
              || GC_mark_state == MS_INVALID);
    break;

  case MS_ROOTS_PUSHED:
#ifdef PARALLEL_MARK
    /*
     * Eventually, incremental marking should run asynchronously
     * in multiple threads, without acquiring the allocator lock.
     * For now, parallel marker is disabled if there is a chance that
     * marking could be interrupted by a client-supplied time limit
     * or custom stop function.
     */
    if (GC_parallel && !GC_parallel_mark_disabled) {
      GC_do_parallel_mark();
      GC_ASSERT(ADDR_LT((ptr_t)GC_mark_stack_top, GC_first_nonempty));
      GC_mark_stack_top = GC_mark_stack - 1;
      if (GC_mark_stack_too_small) {
        alloc_mark_stack(2 * GC_mark_stack_size);
      }
      if (GC_mark_state == MS_ROOTS_PUSHED) {
        GC_mark_state = MS_NONE;
        return TRUE;
      }
      GC_ASSERT(GC_mark_state == MS_INVALID);
      break;
    }
#endif
    if (ADDR_GE((ptr_t)GC_mark_stack_top, (ptr_t)GC_mark_stack)) {
      MARK_FROM_MARK_STACK();
    } else {
      GC_on_mark_stack_empty_proc on_ms_empty = GC_on_mark_stack_empty;

      if (on_ms_empty != 0) {
        GC_mark_stack_top
            = on_ms_empty(GC_mark_stack_top, GC_mark_stack_limit);
        /* If we pushed new items, we need to continue processing. */
        if (ADDR_GE((ptr_t)GC_mark_stack_top, (ptr_t)GC_mark_stack))
          break;
      }
      if (GC_mark_stack_too_small) {
        alloc_mark_stack(2 * GC_mark_stack_size);
      }
      GC_mark_state = MS_NONE;
      return TRUE;
    }
    GC_ASSERT(GC_mark_state == MS_ROOTS_PUSHED || GC_mark_state == MS_INVALID);
    break;

  case MS_INVALID:
  case MS_PARTIALLY_INVALID:
    if (!GC_objects_are_marked) {
      GC_mark_state = MS_PUSH_UNCOLLECTABLE;
      break;
    }
    if (ADDR_GE((ptr_t)GC_mark_stack_top, (ptr_t)GC_mark_stack)) {
      MARK_FROM_MARK_STACK();
      GC_ASSERT(GC_mark_state == MS_PARTIALLY_INVALID
                || GC_mark_state == MS_INVALID);
      break;
    }
    if (NULL == GC_scan_ptr && GC_mark_state == MS_INVALID) {
      /*
       * About to start a heap scan for marked objects.
       * Mark stack is empty.  OK to reallocate.
       */
      if (GC_mark_stack_too_small) {
        alloc_mark_stack(2 * GC_mark_stack_size);
      }
      GC_mark_state = MS_PARTIALLY_INVALID;
    }
    GC_scan_ptr = GC_push_next_marked(GC_scan_ptr);
    if (GC_mark_state == MS_PARTIALLY_INVALID)
      push_roots_and_advance(TRUE, cold_gc_frame);
    GC_ASSERT(GC_mark_state == MS_ROOTS_PUSHED
              || GC_mark_state == MS_PARTIALLY_INVALID
              || GC_mark_state == MS_INVALID);
    break;

  default:
    ABORT("GC_mark_some: bad state");
  }
  return FALSE;
}

#ifdef PARALLEL_MARK
GC_INNER GC_bool GC_parallel_mark_disabled = FALSE;
#endif

#ifdef WRAP_MARK_SOME
GC_INNER GC_bool
GC_mark_some(ptr_t cold_gc_frame)
{
  GC_bool ret_val;

  if (GC_no_dls) {
    ret_val = GC_mark_some_inner(cold_gc_frame);
  } else {
    /*
     * Windows appears to asynchronously create and remove writable
     * memory mappings, for reasons we have not yet understood.
     * Since we look for writable regions to determine the root set, we
     * may try to mark from an address range that disappeared since we
     * started the collection.  Thus we have to recover from faults here.
     * This code seems to be necessary for WinCE (at least in the case
     * we would decide to add `MEM_PRIVATE` sections to data roots in
     * `GC_register_dynamic_libraries`).  It is conceivable that this is
     * the same issue as with terminating threads that we see with Linux
     * and `USE_PROC_FOR_LIBRARIES`.
     */
#  ifndef NO_SEH_AVAILABLE
    __try {
      ret_val = GC_mark_some_inner(cold_gc_frame);
    } __except (GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION
                    ? EXCEPTION_EXECUTE_HANDLER
                    : EXCEPTION_CONTINUE_SEARCH) {
      goto handle_ex;
    }
#  else
#    if defined(USE_PROC_FOR_LIBRARIES) && !defined(DEFAULT_VDB)
    if (GC_auto_incremental) {
      static GC_bool is_warned = FALSE;

      if (!is_warned) {
        is_warned = TRUE;
        WARN("Incremental GC incompatible with /proc roots\n", 0);
      }
      /* Unclear if this could still work... */
    }
#    endif
    /*
     * If `USE_PROC_FOR_LIBRARIES`, then we are handling the case in
     * which `/proc` is used for root finding, and we have threads.
     * We may find a stack for a thread that is in the process of
     * exiting, and disappears while we are marking it.
     * This seems extremely difficult to avoid otherwise.
     */
    GC_setup_temporary_fault_handler();
    if (SETJMP(GC_jmp_buf) != 0)
      goto handle_ex;
    ret_val = GC_mark_some_inner(cold_gc_frame);
    GC_reset_fault_handler();
#  endif
  }

#  if defined(GC_WIN32_THREADS) && !defined(GC_PTHREADS)
  /*
   * With `DllMain`-based thread tracking, a thread may have started
   * while we were marking.  This is logically equivalent to the
   * exception case; our results are invalid and we have to start over.
   * This cannot be prevented since we cannot block in `DllMain()`.
   */
  if (GC_started_thread_while_stopped())
    goto handle_thr_start;
#  endif
  return ret_val;

handle_ex:
  /* Exception handler starts here for all cases. */
#  if defined(NO_SEH_AVAILABLE)
  GC_reset_fault_handler();
#  endif
  {
    static word warned_gc_no;

    /* Report caught `ACCESS_VIOLATION`, once per collection. */
    if (warned_gc_no != GC_gc_no) {
      GC_COND_LOG_PRINTF("Memory mapping disappeared at collection #%lu\n",
                         (unsigned long)GC_gc_no + 1);
      warned_gc_no = GC_gc_no;
    }
  }
#  if defined(GC_WIN32_THREADS) && !defined(GC_PTHREADS)
handle_thr_start:
#  endif
  /*
   * We have bad roots on the mark stack - discard it.
   * Rescan from the marked objects; redetermine the roots.
   */
#  ifdef REGISTER_LIBRARIES_EARLY
  START_WORLD();
  GC_cond_register_dynamic_libraries();
  STOP_WORLD();
#  endif
  GC_invalidate_mark_state();
  GC_scan_ptr = NULL;
  return FALSE;
}
#endif /* WRAP_MARK_SOME */

GC_INNER void
GC_invalidate_mark_state(void)
{
  GC_mark_state = MS_INVALID;
  GC_mark_stack_top = GC_mark_stack - 1;
}

STATIC mse *
GC_signal_mark_stack_overflow(mse *msp)
{
  GC_mark_state = MS_INVALID;
#ifdef PARALLEL_MARK
  /*
   * We are using a `local_mark_stack` in parallel mode, so do
   * not signal the global mark stack to be resized.
   * That will be done in `GC_return_mark_stack` if required.
   */
  if (!GC_parallel)
    GC_mark_stack_too_small = TRUE;
#else
  GC_mark_stack_too_small = TRUE;
#endif
  GC_COND_LOG_PRINTF("Mark stack overflow; current size: %lu entries\n",
                     (unsigned long)GC_mark_stack_size);
#if defined(CPPCHECK)
  GC_noop1_ptr(msp);
#endif
  return msp - GC_MARK_STACK_DISCARDS;
}

GC_ATTR_NO_SANITIZE_ADDR_MEM_THREAD
GC_INNER mse *
GC_mark_from(mse *mark_stack_top, mse *mark_stack, mse *mark_stack_limit)
{
  GC_signed_word credit = HBLKSIZE; /*< remaining credit for marking work */
  word descr;
  ptr_t current_p;    /*< pointer to the current candidate pointer */
  ptr_t q;            /*< the candidate pointer itself */
  ptr_t limit = NULL; /*< the limit (incl.) of the current candidate range */
  ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr;
  ptr_t least_ha = (ptr_t)GC_least_plausible_heap_addr;
  DECLARE_HDR_CACHE;

#define SPLIT_RANGE_PTRS 128 /*< must be power of 2 */

  GC_objects_are_marked = TRUE;
  INIT_HDR_CACHE;
#if defined(OS2) || CPP_PTRSZ > CPP_WORDSZ
  /* OS/2: use untweaked variant to circumvent a compiler problem. */
  while (ADDR_GE((ptr_t)mark_stack_top, (ptr_t)mark_stack) && credit >= 0)
#else
  while (((((word)mark_stack_top - (word)mark_stack) | (word)credit) & SIGNB)
         == 0)
#endif
  {
    current_p = mark_stack_top->mse_start;
    descr = mark_stack_top->mse_descr;
  retry:
    /*
     * `current_p` and `descr` describe the current object.
     * `*mark_stack_top` is vacant.
     * The following is zero only for small objects described by a simple
     * length descriptor.  For many applications this is the common case,
     * so we try to detect it quickly.
     */
    if (descr & (~(word)(PTRS_TO_BYTES(SPLIT_RANGE_PTRS) - 1) | GC_DS_TAGS)) {
      word tag = descr & GC_DS_TAGS;

      GC_STATIC_ASSERT(GC_DS_TAGS == 0x3);
      switch (tag) {
      case GC_DS_LENGTH:
        /*
         * Large length.  Process part of the range to avoid pushing
         * too much on the stack.
         */

        /* Either it is a heap object or a region outside the heap. */
        GC_ASSERT(descr < GC_greatest_real_heap_addr - GC_least_real_heap_addr
                  || GC_least_real_heap_addr + sizeof(ptr_t)
                         >= ADDR(current_p) + descr
                  || ADDR(current_p) >= GC_greatest_real_heap_addr);
#ifdef PARALLEL_MARK
#  define SHARE_BYTES 2048
        if (descr > SHARE_BYTES && GC_parallel
            && ADDR_LT((ptr_t)mark_stack_top, (ptr_t)(mark_stack_limit - 1))) {
          word new_size = (descr >> 1) & ~(word)(sizeof(ptr_t) - 1);

          mark_stack_top->mse_start = current_p;
          /* This makes sure we handle misaligned pointers. */
          mark_stack_top->mse_descr
              = (new_size + sizeof(ptr_t)) | GC_DS_LENGTH;
          mark_stack_top++;
#  ifdef ENABLE_TRACE
          if (ADDR_INSIDE(GC_trace_ptr, current_p, current_p + descr)) {
            GC_log_printf("GC #%lu: large section; start %p, len %lu,"
                          " splitting (parallel) at %p\n",
                          (unsigned long)GC_gc_no, (void *)current_p,
                          (unsigned long)descr,
                          (void *)(current_p + new_size));
          }
#  endif
          current_p += new_size;
          descr -= new_size;
          goto retry;
        }
#endif /* PARALLEL_MARK */
        limit = current_p + PTRS_TO_BYTES(SPLIT_RANGE_PTRS - 1);
        mark_stack_top->mse_start = limit;
        mark_stack_top->mse_descr
            = descr - PTRS_TO_BYTES(SPLIT_RANGE_PTRS - 1);
#ifdef ENABLE_TRACE
        if (ADDR_INSIDE(GC_trace_ptr, current_p, current_p + descr)) {
          GC_log_printf("GC #%lu: large section; start %p, len %lu,"
                        " splitting at %p\n",
                        (unsigned long)GC_gc_no, (void *)current_p,
                        (unsigned long)descr, (void *)limit);
        }
#endif
        /*
         * Make sure that pointers overlapping the two ranges are
         * considered.
         */
        limit += sizeof(ptr_t) - ALIGNMENT;
        break;
      case GC_DS_BITMAP:
        mark_stack_top--;
#ifdef ENABLE_TRACE
        if (ADDR_INSIDE(GC_trace_ptr, current_p,
                        current_p + PTRS_TO_BYTES(BITMAP_BITS))) {
          GC_log_printf("GC #%lu: tracing from %p bitmap descr 0x%lx\n",
                        (unsigned long)GC_gc_no, (void *)current_p,
                        (unsigned long)descr);
        }
#endif
        descr &= ~(word)GC_DS_TAGS;
        credit -= (GC_signed_word)PTRS_TO_BYTES(CPP_PTRSZ / 2); /*< guess */
        for (; descr != 0;
             descr <<= 1, current_p += sizeof(ptr_t)) { /*< not `ALIGNMENT` */
          if ((descr & SIGNB) == 0)
            continue;
          LOAD_PTR_OR_CONTINUE(q, current_p);
          FIXUP_POINTER(q);
          if (ADDR_LT(least_ha, q) && ADDR_LT(q, greatest_ha)) {
            PREFETCH(q);
#ifdef ENABLE_TRACE
            if (GC_trace_ptr == current_p) {
              GC_log_printf("GC #%lu: considering(3) %p -> %p\n",
                            (unsigned long)GC_gc_no, (void *)current_p,
                            (void *)q);
            }
#endif
            PUSH_CONTENTS(q, mark_stack_top, mark_stack_limit, current_p);
          }
        }
        continue;
      case GC_DS_PROC:
        mark_stack_top--;
#ifdef ENABLE_TRACE
        if (ADDR_GE(GC_trace_ptr, current_p)) {
          const void *base = GC_base(current_p);

          if (base != NULL && GC_base(GC_trace_ptr) == base) {
            GC_log_printf("GC #%lu: tracing from %p, proc descr 0x%lx\n",
                          (unsigned long)GC_gc_no, (void *)current_p,
                          (unsigned long)descr);
          }
        }
#endif
        credit -= GC_PROC_BYTES;
        mark_stack_top = (*PROC(descr))((word *)current_p, mark_stack_top,
                                        mark_stack_limit, ENV(descr));
        continue;
      case GC_DS_PER_OBJECT:
        if (!(descr & SIGNB)) {
          /* Descriptor is in the object. */
          descr = *(word *)(current_p + descr - GC_DS_PER_OBJECT);
        } else {
          /*
           * Descriptor is in the type descriptor pointed to by the first
           * "pointer-sized" word of the object.
           */
          ptr_t type_descr = *(ptr_t *)current_p;

          /*
           * `type_descr` is either a valid pointer to the descriptor
           * structure, or this object was on a free list.
           * If it was anything but the last object on the free list,
           * we will misinterpret the next object on the free list as
           * the type descriptor, and get a zero GC descriptor, which
           * is ideal.  Unfortunately, we need to check for the last
           * object case explicitly.
           */
          if (UNLIKELY(NULL == type_descr)) {
            mark_stack_top--;
            continue;
          }
          descr = *(word *)(type_descr
                            - ((GC_signed_word)descr
                               + (GC_INDIR_PER_OBJ_BIAS - GC_DS_PER_OBJECT)));
        }
        if (0 == descr) {
          /*
           * Can happen either because we generated a zero GC descriptor
           * or we saw a pointer to a free object.
           */
          mark_stack_top--;
          continue;
        }
        goto retry;
      }
    } else {
      /* Small object with length descriptor. */
      mark_stack_top--;
#ifndef SMALL_CONFIG
      if (descr < sizeof(ptr_t))
        continue;
#endif
#ifdef ENABLE_TRACE
      if (ADDR_INSIDE(GC_trace_ptr, current_p, current_p + descr)) {
        GC_log_printf("GC #%lu: small object; start %p, len %lu\n",
                      (unsigned long)GC_gc_no, (void *)current_p,
                      (unsigned long)descr);
      }
#endif
      limit = current_p + descr;
    }
    /* The simple case in which we are scanning a range. */
    GC_ASSERT((ADDR(current_p) & (ALIGNMENT - 1)) == 0);
    credit -= limit - current_p;
    limit -= sizeof(ptr_t);
    {
#define PREF_DIST 4

#if !defined(SMALL_CONFIG) && !(defined(E2K) && defined(USE_PTR_HWTAG))
      ptr_t deferred;

#  ifdef CHERI_PURECAP
      /*
       * Check each pointer for validity before dereferencing to prevent
       * capability exceptions.  Utilize the pointer meta-data to speed-up
       * the loop.  If the loop is below the pointer bounds, skip the rest
       * of marking for that chunk.  If the limit capability restricts us to
       * reading fewer than size of a pointer, then there cannot possibly be
       * a pointer at `limit`'s pointer, and reading at that location will
       * raise a capability exception.
       */
      {
        word cap_limit = cheri_base_get(limit) + cheri_length_get(limit);

        if (ADDR(limit) + sizeof(ptr_t) > cap_limit) {
          /* Decrement limit so that it to be within bounds of `current_p`. */
          GC_ASSERT(cap_limit > sizeof(ptr_t));
          limit = (ptr_t)cheri_address_set(
              current_p, (cap_limit - sizeof(ptr_t)) & ~(sizeof(ptr_t) - 1));
          goto check_limit;
        }
      }
#  endif
      /*
       * Try to prefetch the next pointer to be examined as soon as possible.
       * Empirically, this also seems to help slightly without prefetches,
       * at least on Linux/i686.  Presumably this loop ends up with less
       * register pressure, and gcc thus ends up generating slightly better
       * code.  Overall gcc code quality for this loop is still not great.
       */
      for (;;) {
        PREFETCH(limit - PREF_DIST * CACHE_LINE_SIZE);
        GC_ASSERT(ADDR_GE(limit, current_p));
#  ifdef CHERI_PURECAP
        if (ADDR(limit) < cheri_base_get(limit))
          goto next_object;
        if (!HAS_TAG_AND_PERM_LOAD(limit)) {
          limit -= ALIGNMENT;
          goto check_limit;
        }
#  endif
        deferred = *(ptr_t *)limit;
        FIXUP_POINTER(deferred);
        limit -= ALIGNMENT;
#  ifdef CHERI_PURECAP
        if (!HAS_TAG_AND_PERM_LOAD(deferred))
          goto check_limit;
#  endif
        if (ADDR_LT(least_ha, deferred) && ADDR_LT(deferred, greatest_ha)) {
          PREFETCH(deferred);
          break;
        }
#  ifndef CHERI_PURECAP
        if (ADDR_LT(limit, current_p))
          goto next_object;
        /*
         * Unroll once, so we do not do too many of the prefetches based
         * on `limit`.
         */
        deferred = *(ptr_t *)limit;
        FIXUP_POINTER(deferred);
        limit -= ALIGNMENT;
        if (ADDR_LT(least_ha, deferred) && ADDR_LT(deferred, greatest_ha)) {
          PREFETCH(deferred);
          break;
        }
#  else
      check_limit:
#  endif
        if (ADDR_LT(limit, current_p))
          goto next_object;
      }
#endif

      for (; ADDR_GE(limit, current_p); current_p += ALIGNMENT) {
        /*
         * Empirically, unrolling this loop does not help a lot.
         * Since `PUSH_CONTENTS` expands to a lot of code, we do not.
         */
        LOAD_PTR_OR_CONTINUE(q, current_p);
        FIXUP_POINTER(q);
        PREFETCH(current_p + PREF_DIST * CACHE_LINE_SIZE);
        if (ADDR_LT(least_ha, q) && ADDR_LT(q, greatest_ha)) {
          /*
           * Prefetch the content of the object we just pushed.
           * It is likely we will need them soon.
           */
          PREFETCH(q);
#ifdef ENABLE_TRACE
          if (GC_trace_ptr == current_p) {
            GC_log_printf("GC #%lu: considering(1) %p -> %p\n",
                          (unsigned long)GC_gc_no, (void *)current_p,
                          (void *)q);
          }
#endif
          PUSH_CONTENTS(q, mark_stack_top, mark_stack_limit, current_p);
        }
      }

#if !defined(SMALL_CONFIG) && !(defined(E2K) && defined(USE_PTR_HWTAG))
      /*
       * We still need to mark the entry we previously prefetched.
       * We already know that it passes the preliminary pointer validity test.
       */
#  ifdef ENABLE_TRACE
      if (GC_trace_ptr == current_p) {
        GC_log_printf("GC #%lu: considering(2) %p -> %p\n",
                      (unsigned long)GC_gc_no, (void *)current_p,
                      (void *)deferred);
      }
#  endif
      PUSH_CONTENTS(deferred, mark_stack_top, mark_stack_limit, current_p);
    next_object:;
#endif
    }
  }
  return mark_stack_top;
}

#ifdef PARALLEL_MARK

/* Note: this is protected by the mark lock. */
STATIC GC_bool GC_help_wanted = FALSE;

/* Number of running helpers.  Protected by the mark lock. */
STATIC unsigned GC_helper_count = 0;

/*
 * Number of active helpers.  May increase and decrease within each
 * mark cycle; but once it returns to zero, it stays for the cycle.
 * Protected by the mark lock.
 */
STATIC unsigned GC_active_count = 0;

#  ifdef LINT2
#    define LOCAL_MARK_STACK_SIZE (HBLKSIZE / 8)
#  else
/*
 * Under normal circumstances, this is big enough to guarantee we do not
 * overflow half of it in a single call to `GC_mark_from`.
 */
#    define LOCAL_MARK_STACK_SIZE HBLKSIZE
#  endif

GC_INNER void
GC_wait_for_markers_init(void)
{
  GC_signed_word count;

  GC_ASSERT(I_HOLD_LOCK());
  if (0 == GC_markers_m1)
    return;

#  ifndef CAN_HANDLE_FORK
  GC_ASSERT(NULL == GC_main_local_mark_stack);
#  else
  if (NULL == GC_main_local_mark_stack)
#  endif
  {
    size_t bytes_to_get
        = ROUNDUP_PAGESIZE_IF_MMAP(LOCAL_MARK_STACK_SIZE * sizeof(mse));

    /*
     * Allocate the local mark stack for the thread that holds the
     * allocator lock.
     */
    GC_ASSERT(GC_page_size != 0);
    GC_main_local_mark_stack = (mse *)GC_os_get_mem(bytes_to_get);
    if (NULL == GC_main_local_mark_stack)
      ABORT("Insufficient memory for main local_mark_stack");
  }

  /*
   * Reuse the mark lock and builders count to synchronize marker threads
   * startup.
   */
  GC_acquire_mark_lock();
  GC_fl_builder_count += GC_markers_m1;
  count = GC_fl_builder_count;
  GC_release_mark_lock();
  if (count != 0) {
    GC_ASSERT(count > 0);
    GC_wait_for_reclaim();
  }
}

/*
 * Steal mark stack entries starting at `mse` `low` into mark stack `local`
 * until we either steal `mse` `high`, or we have `n_to_get` entries.
 * Return a pointer to the top of the local mark stack.  `*next` is replaced
 * by a pointer to the next unscanned mark stack entry.
 */
STATIC mse *
GC_steal_mark_stack(mse *low, mse *high, mse *local, size_t n_to_get,
                    mse **next)
{
  mse *p;
  mse *top = local - 1;
  size_t i = 0;

  GC_ASSERT(ADDR_GE((ptr_t)high, (ptr_t)(low - 1))
            && (word)(high - low + 1) <= GC_mark_stack_size);
  for (p = low; ADDR_GE((ptr_t)high, (ptr_t)p) && i <= n_to_get; ++p) {
    word descr = AO_load(&p->mse_descr);

    if (descr != 0) {
      /* Must be ordered after read of `mse_descr`. */
      AO_store_release_write(&p->mse_descr, 0);
      /*
       * More than one thread may get this entry, but that is only
       * a minor performance problem.
       */
      ++top;
      top->mse_start = p->mse_start;
      top->mse_descr = descr;
      GC_ASSERT((descr & GC_DS_TAGS) != GC_DS_LENGTH /* 0 */
                || descr < GC_greatest_real_heap_addr - GC_least_real_heap_addr
                || GC_least_real_heap_addr + sizeof(ptr_t)
                       >= ADDR(p->mse_start) + descr
                || ADDR(p->mse_start) >= GC_greatest_real_heap_addr);
      /* If this is a big object, count it as `descr / 256 + 1` objects. */
      ++i;
      if ((descr & GC_DS_TAGS) == GC_DS_LENGTH)
        i += (size_t)(descr >> 8);
    }
  }
  *next = p;
#  if defined(CPPCHECK)
  GC_noop1_ptr(local);
#  endif
  return top;
}

/* Copy back a local mark stack.  `low` and `high` are inclusive bounds. */
STATIC void
GC_return_mark_stack(mse *low, mse *high)
{
  mse *my_top;
  mse *my_start;
  size_t stack_size;

  if (ADDR_LT((ptr_t)high, (ptr_t)low))
    return;
  stack_size = high - low + 1;
  GC_acquire_mark_lock();
  /* Note: the concurrent modification is impossible. */
  my_top = GC_mark_stack_top;
  my_start = my_top + 1;
  if ((word)(my_start - GC_mark_stack + stack_size)
      > (word)GC_mark_stack_size) {
    GC_COND_LOG_PRINTF("No room to copy back mark stack\n");
    GC_mark_state = MS_INVALID;
    GC_mark_stack_too_small = TRUE;
    /* We drop the local mark stack.  We will fix things later. */
  } else {
    BCOPY(low, my_start, stack_size * sizeof(mse));
    GC_ASSERT((mse *)GC_cptr_load((volatile ptr_t *)&GC_mark_stack_top)
              == my_top);
    /* Ensures visibility of previously written stack contents. */
    GC_cptr_store_release_write((volatile ptr_t *)&GC_mark_stack_top,
                                (ptr_t)(my_top + stack_size));
  }
  GC_release_mark_lock();
  GC_notify_all_marker();
}

#  ifndef N_LOCAL_ITERS
#    define N_LOCAL_ITERS 1
#  endif

/*
 * Note: called only when the local and the main mark stacks are both
 * empty.
 */
static GC_bool
has_inactive_helpers(void)
{
  GC_bool res;

  GC_acquire_mark_lock();
  res = GC_active_count < GC_helper_count;
  GC_release_mark_lock();
  return res;
}

/*
 * Mark from the local mark stack.  On return, the local mark stack
 * is empty.  But this may be achieved by copying the local mark stack
 * back into the global one.  We do not hold the mark lock.
 */
STATIC void
GC_do_local_mark(mse *local_mark_stack, mse *local_top)
{
  unsigned n;

  for (;;) {
    for (n = 0; n < N_LOCAL_ITERS; ++n) {
      local_top = GC_mark_from(local_top, local_mark_stack,
                               local_mark_stack + LOCAL_MARK_STACK_SIZE);
      if (ADDR_LT((ptr_t)local_top, (ptr_t)local_mark_stack))
        return;
      if ((word)(local_top - local_mark_stack) >= LOCAL_MARK_STACK_SIZE / 2) {
        GC_return_mark_stack(local_mark_stack, local_top);
        return;
      }
    }
    if (ADDR_LT(GC_cptr_load((volatile ptr_t *)&GC_mark_stack_top),
                GC_cptr_load(&GC_first_nonempty))
        && ADDR_LT((ptr_t)(local_mark_stack + 1), (ptr_t)local_top)
        && has_inactive_helpers()) {
      /*
       * Try to share the load, since the main stack is empty, and the helper
       * threads are waiting for a refill.  The entries near the bottom of
       * the stack are likely to require more work.  Thus we return those,
       * even though it is harder.
       */
      mse *new_bottom = local_mark_stack + (local_top - local_mark_stack) / 2;

      GC_ASSERT(ADDR_LT((ptr_t)local_mark_stack, (ptr_t)new_bottom)
                && ADDR_LT((ptr_t)new_bottom, (ptr_t)local_top));
      GC_return_mark_stack(local_mark_stack, new_bottom - 1);
      memmove(local_mark_stack, new_bottom,
              (local_top - new_bottom + 1) * sizeof(mse));
      local_top -= new_bottom - local_mark_stack;
    }
  }
}

#  ifndef ENTRIES_TO_GET
#    define ENTRIES_TO_GET 5
#  endif

/*
 * Mark using the local mark stack until the global mark stack is empty and
 * there are no active workers.  Update `GC_first_nonempty` to reflect the
 * progress.  Caller holds the mark lock.  Caller has already incremented
 * `GC_helper_count`; we decrement it, and maintain `GC_active_count`.
 */
STATIC void
GC_mark_local(mse *local_mark_stack, int id)
{
  mse *my_first_nonempty;

  GC_active_count++;
  my_first_nonempty = (mse *)GC_cptr_load(&GC_first_nonempty);
  GC_ASSERT(ADDR_GE((ptr_t)my_first_nonempty, (ptr_t)GC_mark_stack));
  GC_ASSERT(
      ADDR_GE(GC_cptr_load((volatile ptr_t *)&GC_mark_stack_top) + sizeof(mse),
              (ptr_t)my_first_nonempty));
  GC_VERBOSE_LOG_PRINTF("Starting mark helper %d\n", id);
  GC_release_mark_lock();
  for (;;) {
    size_t n_on_stack, n_to_get;
    mse *my_top, *local_top;
    mse *global_first_nonempty = (mse *)GC_cptr_load(&GC_first_nonempty);

    GC_ASSERT(ADDR_GE((ptr_t)my_first_nonempty, (ptr_t)GC_mark_stack)
              && ADDR_GE(GC_cptr_load((volatile ptr_t *)&GC_mark_stack_top)
                             + sizeof(mse),
                         (ptr_t)my_first_nonempty));
    GC_ASSERT(ADDR_GE((ptr_t)global_first_nonempty, (ptr_t)GC_mark_stack));
    if (ADDR_LT((ptr_t)my_first_nonempty, (ptr_t)global_first_nonempty)) {
      my_first_nonempty = global_first_nonempty;
    } else if (ADDR_LT((ptr_t)global_first_nonempty,
                       (ptr_t)my_first_nonempty)) {
      (void)GC_cptr_compare_and_swap(&GC_first_nonempty,
                                     (ptr_t)global_first_nonempty,
                                     (ptr_t)my_first_nonempty);
      /*
       * If this fails, then we just go ahead, without updating
       * `GC_first_nonempty`.
       */
    }
    /*
     * Perhaps we should also update `GC_first_nonempty`, if it is less.
     * But that would require usage of the atomic updates.
     */
    my_top = (mse *)GC_cptr_load_acquire((volatile ptr_t *)&GC_mark_stack_top);
    if (ADDR_LT((ptr_t)my_top, (ptr_t)my_first_nonempty)) {
      GC_acquire_mark_lock();
      /*
       * Note: asynchronous modification is impossible here, since
       * we hold the mark lock.
       */
      my_top = GC_mark_stack_top;
      n_on_stack = my_top - my_first_nonempty + 1;
      if (0 == n_on_stack) {
        GC_active_count--;
        GC_ASSERT(GC_active_count <= GC_helper_count);
        /* Other markers may redeposit objects on the stack. */
        if (0 == GC_active_count)
          GC_notify_all_marker();
        while (GC_active_count > 0
               && ADDR_LT((ptr_t)GC_mark_stack_top,
                          GC_cptr_load(&GC_first_nonempty))) {
          /*
           * We will be notified if either `GC_active_count` reaches zero,
           * or if more objects are pushed on the global mark stack.
           */
          GC_wait_marker();
        }
        if (0 == GC_active_count
            && ADDR_LT((ptr_t)GC_mark_stack_top,
                       GC_cptr_load(&GC_first_nonempty))) {
          GC_bool need_to_notify = FALSE;

          /*
           * The above conditions cannot be falsified while we hold
           * the mark lock, since neither `GC_active_count` nor
           * `GC_mark_stack_top` can change.  `GC_first_nonempty` can
           * only be incremented asynchronously.  Thus we know that
           * both conditions are actually held simultaneously.
           */
          GC_helper_count--;
          if (0 == GC_helper_count)
            need_to_notify = TRUE;
          GC_VERBOSE_LOG_PRINTF("Finished mark helper %d\n", id);
          if (need_to_notify)
            GC_notify_all_marker();
          return;
        }
        /*
         * Else there is something on the stack again, or another helper
         * may push something.
         */
        GC_active_count++;
        GC_ASSERT(GC_active_count > 0);
        GC_release_mark_lock();
        continue;
      } else {
        GC_release_mark_lock();
      }
    } else {
      n_on_stack = my_top - my_first_nonempty + 1;
    }
    n_to_get = ENTRIES_TO_GET;
    if (n_on_stack < 2 * ENTRIES_TO_GET)
      n_to_get = 1;
    local_top
        = GC_steal_mark_stack(my_first_nonempty, my_top, local_mark_stack,
                              n_to_get, &my_first_nonempty);
    GC_ASSERT(ADDR_GE((ptr_t)my_first_nonempty, (ptr_t)GC_mark_stack)
              && ADDR_GE(GC_cptr_load((volatile ptr_t *)&GC_mark_stack_top)
                             + sizeof(mse),
                         (ptr_t)my_first_nonempty));
    GC_do_local_mark(local_mark_stack, local_top);
  }
}

/*
 * Perform parallel mark.  We hold the allocator lock, but not the mark lock.
 * Currently runs until the mark stack is empty.
 */
STATIC void
GC_do_parallel_mark(void)
{
  GC_ASSERT(I_HOLD_LOCK());
  GC_acquire_mark_lock();
  GC_ASSERT(!GC_help_wanted);
  GC_ASSERT(0 == GC_active_count && 0 == GC_helper_count);
  GC_VERBOSE_LOG_PRINTF("Starting marking for mark phase number %lu\n",
                        (unsigned long)GC_mark_no);

  GC_cptr_store(&GC_first_nonempty, (ptr_t)GC_mark_stack);
  GC_active_count = 0;
  GC_helper_count = 1;
  GC_help_wanted = TRUE;
  /* Wake up potential helpers. */
  GC_notify_all_marker();
  GC_mark_local(GC_main_local_mark_stack, 0);
  GC_help_wanted = FALSE;
  /* Done; clean up. */
  while (GC_helper_count > 0) {
    GC_wait_marker();
  }
  /* `GC_helper_count` cannot be incremented while not `GC_help_wanted`. */
  GC_VERBOSE_LOG_PRINTF("Finished marking for mark phase number %lu\n",
                        (unsigned long)GC_mark_no);
  GC_mark_no++;
  GC_release_mark_lock();
  GC_notify_all_marker();
}

GC_INNER void
GC_help_marker(word my_mark_no)
{
#  define my_id my_id_mse.mse_descr
  /*
   * Put `my_id` inside the structure to keep `local_mark_stack` aligned
   * explicitly.
   */
  mse my_id_mse;
  mse local_mark_stack[LOCAL_MARK_STACK_SIZE];
  /* Note: `local_mark_stack` is quite big (up to 128 KiB). */

  GC_ASSERT(I_DONT_HOLD_LOCK());
  GC_ASSERT(GC_parallel);
  while (GC_mark_no < my_mark_no
         || (!GC_help_wanted && GC_mark_no == my_mark_no)) {
    GC_wait_marker();
  }
  my_id = GC_helper_count;
  if (GC_mark_no != my_mark_no || my_id > (unsigned)GC_markers_m1) {
    /*
     * The second test is useful only if the original threads can also
     * act as helpers.  Under Linux they cannot.
     */
    return;
  }
  GC_helper_count = (unsigned)my_id + 1;
  GC_mark_local(local_mark_stack, (int)my_id);
  /* `GC_mark_local` decrements `GC_helper_count`. */
#  undef my_id
}

#endif /* PARALLEL_MARK */

/*
 * Allocate or reallocate space for mark stack of size `n` entries.
 * May silently fail.
 */
static void
alloc_mark_stack(size_t n)
{
#ifdef GWW_VDB
  static GC_bool GC_incremental_at_stack_alloc = FALSE;

  GC_bool recycle_old;
#endif
  mse *new_stack;

  GC_ASSERT(I_HOLD_LOCK());
  new_stack = (mse *)GC_scratch_alloc(n * sizeof(struct GC_ms_entry));
#ifdef GWW_VDB
  /*
   * Do not recycle a stack segment obtained with the wrong flags.
   * Win32 `GetWriteWatch` requires the right kind of memory.
   */
  recycle_old = !GC_auto_incremental || GC_incremental_at_stack_alloc;
  GC_incremental_at_stack_alloc = GC_auto_incremental;
#endif

  GC_mark_stack_too_small = FALSE;
  if (GC_mark_stack != NULL) {
    if (new_stack != 0) {
#ifdef GWW_VDB
      if (recycle_old)
#endif
      {
        /* Recycle old space. */
        GC_scratch_recycle_inner(
            GC_mark_stack, GC_mark_stack_size * sizeof(struct GC_ms_entry));
      }
      GC_mark_stack = new_stack;
      GC_mark_stack_size = n;
      /* FIXME: Do we need some way to reset `GC_mark_stack_size`? */
      GC_mark_stack_limit = new_stack + n;
      GC_COND_LOG_PRINTF("Grew mark stack to %lu frames\n",
                         (unsigned long)GC_mark_stack_size);
    } else {
      WARN("Failed to grow mark stack to %" WARN_PRIuPTR " frames\n", n);
    }
  } else if (NULL == new_stack) {
    GC_err_printf("No space for mark stack\n");
    EXIT();
  } else {
    GC_mark_stack = new_stack;
    GC_mark_stack_size = n;
    GC_mark_stack_limit = new_stack + n;
  }
  GC_mark_stack_top = GC_mark_stack - 1;
}

GC_INNER void
GC_mark_init(void)
{
  alloc_mark_stack(INITIAL_MARK_STACK_SIZE);
}

GC_API void GC_CALL
GC_push_all(void *bottom, void *top)
{
  mse *mark_stack_top;
  word length;

  bottom = PTR_ALIGN_UP((ptr_t)bottom, ALIGNMENT);
  top = PTR_ALIGN_DOWN((ptr_t)top, ALIGNMENT);
  if (ADDR_GE((ptr_t)bottom, (ptr_t)top))
    return;

  mark_stack_top = GC_mark_stack_top + 1;
  if (ADDR_GE((ptr_t)mark_stack_top, (ptr_t)GC_mark_stack_limit)) {
    ABORT("Unexpected mark stack overflow");
  }
  length = (word)((ptr_t)top - (ptr_t)bottom);
#if GC_DS_TAGS > ALIGNMENT - 1
  length = (length + GC_DS_TAGS) & ~(word)GC_DS_TAGS; /*< round up */
#endif
  mark_stack_top->mse_start = (ptr_t)bottom;
  mark_stack_top->mse_descr = length | GC_DS_LENGTH;
  GC_mark_stack_top = mark_stack_top;
}

GC_API struct GC_ms_entry *GC_CALL
GC_custom_push_range(void *bottom, void *top,
                     struct GC_ms_entry *mark_stack_top,
                     struct GC_ms_entry *mark_stack_limit)
{
  word length;

  bottom = PTR_ALIGN_UP((ptr_t)bottom, ALIGNMENT);
  top = PTR_ALIGN_DOWN((ptr_t)top, ALIGNMENT);
  if (ADDR_GE((ptr_t)bottom, (ptr_t)top))
    return mark_stack_top;

  length = (word)((ptr_t)top - (ptr_t)bottom);
#if GC_DS_TAGS > ALIGNMENT - 1
  length = (length + GC_DS_TAGS) & ~(word)GC_DS_TAGS; /*< round up */
#endif
  return GC_custom_push_proc(length | GC_DS_LENGTH, bottom, mark_stack_top,
                             mark_stack_limit);
}

GC_API struct GC_ms_entry *GC_CALL
GC_custom_push_proc(GC_word descr, void *obj,
                    struct GC_ms_entry *mark_stack_top,
                    struct GC_ms_entry *mark_stack_limit)
{
  mark_stack_top++;
  if (ADDR_GE((ptr_t)mark_stack_top, (ptr_t)mark_stack_limit)) {
    mark_stack_top = GC_signal_mark_stack_overflow(mark_stack_top);
  }
  mark_stack_top->mse_start = (ptr_t)obj;
  mark_stack_top->mse_descr = descr;
  return mark_stack_top;
}

GC_API void GC_CALL
GC_push_proc(GC_word descr, void *obj)
{
  GC_mark_stack_top = GC_custom_push_proc(descr, obj, GC_mark_stack_top,
                                          GC_mark_stack_limit);
}

#ifndef GC_DISABLE_INCREMENTAL

/*
 * Analogous to `GC_push_all`, but push only those pages `h` with
 * `dirty_fn(h) != 0`.  We use `GC_push_all` to actually push the block.
 * Used both to selectively push dirty pages, or to push a block in
 * a piecemeal fashion, to allow for more marking concurrency.
 * Will not overflow mark stack if `GC_push_all` pushes a small fixed
 * number of entries.  (This is invoked only if `GC_push_all` pushes
 * a single entry, or if it marks each object before pushing it, thus
 * ensuring progress in the event of a stack overflow.)
 */
STATIC void
GC_push_selected(ptr_t bottom, ptr_t top, GC_bool (*dirty_fn)(struct hblk *))
{
  struct hblk *h;

  bottom = PTR_ALIGN_UP(bottom, ALIGNMENT);
  top = PTR_ALIGN_DOWN(top, ALIGNMENT);
  if (ADDR_GE(bottom, top))
    return;

  h = HBLKPTR(bottom + HBLKSIZE);
  if (ADDR_GE((ptr_t)h, top)) {
    if ((*dirty_fn)(h - 1)) {
      GC_push_all(bottom, top);
    }
    return;
  }
  if ((*dirty_fn)(h - 1)) {
    if ((word)(GC_mark_stack_top - GC_mark_stack)
        > 3 * GC_mark_stack_size / 4) {
      GC_push_all(bottom, top);
      return;
    }
    GC_push_all(bottom, h);
  }

  while (ADDR_GE(top, (ptr_t)(h + 1))) {
    if ((*dirty_fn)(h)) {
      if ((word)(GC_mark_stack_top - GC_mark_stack)
          > 3 * GC_mark_stack_size / 4) {
        /* Danger of mark stack overflow. */
        GC_push_all(h, top);
        return;
      } else {
        GC_push_all(h, h + 1);
      }
    }
    h++;
  }

  if ((ptr_t)h != top && (*dirty_fn)(h)) {
    GC_push_all(h, top);
  }
}

GC_API void GC_CALL
GC_push_conditional(void *bottom, void *top, int all)
{
  if (!all) {
    GC_push_selected((ptr_t)bottom, (ptr_t)top, GC_page_was_dirty);
  } else {
#  ifdef PROC_VDB
    if (GC_auto_incremental) {
      /* Pages that were never dirtied cannot contain pointers. */
      GC_push_selected((ptr_t)bottom, (ptr_t)top, GC_page_was_ever_dirty);
    } else
#  endif
    /* else */ {
      GC_push_all(bottom, top);
    }
  }
}

#  ifndef NO_VDB_FOR_STATIC_ROOTS
#    ifndef PROC_VDB
/*
 * Same as `GC_page_was_dirty` but `h` is allowed to point to some page
 * in the registered static roots only.  Not used if the manual VDB is on.
 */
STATIC GC_bool
GC_static_page_was_dirty(struct hblk *h)
{
  return get_pht_entry_from_index(GC_grungy_pages, PHT_HASH(h));
}
#    endif

GC_INNER void
GC_push_conditional_static(void *bottom, void *top, GC_bool all)
{
#    ifdef PROC_VDB
  /*
   * Just redirect to the generic routine because `PROC_VDB`
   * implementation gets the dirty bits map for the whole process memory.
   */
  GC_push_conditional(bottom, top, all);
#    else
  if (all || !GC_is_vdb_for_static_roots()) {
    GC_push_all(bottom, top);
  } else {
    GC_push_selected((ptr_t)bottom, (ptr_t)top, GC_static_page_was_dirty);
  }
#    endif
}
#  endif /* !NO_VDB_FOR_STATIC_ROOTS */

#else
GC_API void GC_CALL
GC_push_conditional(void *bottom, void *top, int all)
{
  UNUSED_ARG(all);
  GC_push_all(bottom, top);
}
#endif /* GC_DISABLE_INCREMENTAL */

#if defined(DARWIN) && defined(THREADS)
void
GC_push_one(word p)
{
  GC_PUSH_ONE_STACK((ptr_t)p, MARKED_FROM_REGISTER);
}
#endif /* DARWIN && THREADS */

#if defined(GC_WIN32_THREADS)
GC_INNER void
GC_push_many_regs(const word *regs, unsigned count)
{
  unsigned i;

  for (i = 0; i < count; i++)
    GC_PUSH_ONE_STACK((ptr_t)regs[i], MARKED_FROM_REGISTER);
}
#endif /* GC_WIN32_THREADS */

GC_API struct GC_ms_entry *GC_CALL
GC_mark_and_push(void *obj, mse *mark_stack_top, mse *mark_stack_limit,
                 void **src)
{
  hdr *hhdr;

  PREFETCH(obj);
  GET_HDR(obj, hhdr);
  if ((UNLIKELY(IS_FORWARDING_ADDR_OR_NIL(hhdr))
       && (!GC_all_interior_pointers
           || NULL == (hhdr = GC_find_header(GC_base(obj)))))
      || UNLIKELY(HBLK_IS_FREE(hhdr))) {
    GC_ADD_TO_BLACK_LIST_NORMAL((ptr_t)obj, (ptr_t)src);
    return mark_stack_top;
  }
  return GC_push_contents_hdr((ptr_t)obj, mark_stack_top, mark_stack_limit,
                              (ptr_t)src, hhdr, TRUE);
}

GC_ATTR_NO_SANITIZE_ADDR
GC_INNER void
#if defined(PRINT_BLACK_LIST) || defined(KEEP_BACK_PTRS)
GC_mark_and_push_stack(ptr_t p, ptr_t source)
#else
GC_mark_and_push_stack(ptr_t p)
#  define source ((ptr_t)0)
#endif
{
  hdr *hhdr;
  ptr_t r = p;

  PREFETCH(p);
  GET_HDR(p, hhdr);
  if (UNLIKELY(IS_FORWARDING_ADDR_OR_NIL(hhdr))) {
    if (NULL == hhdr || (r = (ptr_t)GC_base(p)) == NULL
        || (hhdr = HDR(r)) == NULL) {
      GC_ADD_TO_BLACK_LIST_STACK(p, source);
      return;
    }
  }
  if (UNLIKELY(HBLK_IS_FREE(hhdr))) {
    GC_ADD_TO_BLACK_LIST_NORMAL(p, source);
    return;
  }
#ifdef THREADS
  /*
   * Pointer is on the stack.  We may have dirtied the object it points to,
   * but have not called `GC_dirty` yet.
   */
  GC_dirty(p); /*< entire object */
#endif
  GC_mark_stack_top = GC_push_contents_hdr(
      r, GC_mark_stack_top, GC_mark_stack_limit, source, hhdr, FALSE);
  /*
   * We silently ignore pointers to near the end of a block, which is
   * very mildly suboptimal.
   */
  /* FIXME: We should probably add a header word to address this. */
#undef source
}

#ifdef TRACE_BUF
#  ifndef TRACE_ENTRIES
#    define TRACE_ENTRIES 1000
#  endif

struct trace_entry {
  const char *caller_fn_name;
  word gc_no;
  word bytes_allocd;
  GC_hidden_pointer arg1;
  GC_hidden_pointer arg2;
} GC_trace_buf[TRACE_ENTRIES] = { { (const char *)NULL, 0, 0, 0, 0 } };

void
GC_add_trace_entry(const char *caller_fn_name, ptr_t arg1, ptr_t arg2)
{
  size_t i = GC_trace_buf_pos;

  GC_trace_buf[i].caller_fn_name = caller_fn_name;
  GC_trace_buf[i].gc_no = GC_gc_no;
  GC_trace_buf[i].bytes_allocd = GC_bytes_allocd;
  GC_trace_buf[i].arg1 = GC_HIDE_POINTER(arg1);
  GC_trace_buf[i].arg2 = GC_HIDE_POINTER(arg2);
  i++;
  if (i >= TRACE_ENTRIES)
    i = 0;
  GC_trace_buf_pos = i;
}

GC_API void GC_CALL
GC_print_trace_inner(GC_word gc_no)
{
  size_t i;

  for (i = GC_trace_buf_pos;; i--) {
    struct trace_entry *p;

    if (0 == i)
      i = TRACE_ENTRIES;
    p = &GC_trace_buf[i - 1];
    /*
     * Compare `gc_no` values (`p->gc_no` is less than given `gc_no`)
     * taking into account that the counter may overflow.
     */
    if (((p->gc_no - gc_no) & SIGNB) != 0 || NULL == p->caller_fn_name) {
      return;
    }
    GC_printf("Trace:%s (gc:%lu, bytes:%lu) %p, %p\n", p->caller_fn_name,
              (unsigned long)p->gc_no, (unsigned long)p->bytes_allocd,
              GC_REVEAL_POINTER(p->arg1), GC_REVEAL_POINTER(p->arg2));
    if (i == GC_trace_buf_pos + 1)
      break;
  }
  GC_printf("Trace incomplete\n");
}

GC_API void GC_CALL
GC_print_trace(GC_word gc_no)
{
  READER_LOCK();
  GC_print_trace_inner(gc_no);
  READER_UNLOCK();
}
#endif /* TRACE_BUF */

GC_ATTR_NO_SANITIZE_ADDR_MEM_THREAD
GC_API void GC_CALL
GC_push_all_eager(void *bottom, void *top)
{
  REGISTER ptr_t current_p;
  REGISTER word lim_addr;
  REGISTER ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr;
  REGISTER ptr_t least_ha = (ptr_t)GC_least_plausible_heap_addr;
#define GC_greatest_plausible_heap_addr greatest_ha
#define GC_least_plausible_heap_addr least_ha

  if (NULL == top)
    return;
  /* Check all pointers in range and push if they appear to be valid. */
  current_p = PTR_ALIGN_UP((ptr_t)bottom, ALIGNMENT);
  lim_addr = ADDR(PTR_ALIGN_DOWN((ptr_t)top, ALIGNMENT)) - sizeof(ptr_t);
#ifdef CHERI_PURECAP
  {
    word cap_limit = cheri_base_get(current_p) + cheri_length_get(current_p);

    if (lim_addr >= cap_limit)
      lim_addr = cap_limit - sizeof(ptr_t);
  }
#endif
  for (; ADDR(current_p) <= lim_addr; current_p += ALIGNMENT) {
    REGISTER ptr_t q;

    LOAD_PTR_OR_CONTINUE(q, current_p);
    GC_PUSH_ONE_STACK(q, current_p);
  }
#undef GC_greatest_plausible_heap_addr
#undef GC_least_plausible_heap_addr
}

GC_INNER void
GC_push_all_stack(ptr_t bottom, ptr_t top)
{
  GC_ASSERT(I_HOLD_LOCK());
#ifndef NEED_FIXUP_POINTER
  if (GC_all_interior_pointers
#  if defined(THREADS) && defined(MPROTECT_VDB)
      && !GC_auto_incremental
#  endif
      && ADDR_LT((ptr_t)GC_mark_stack_top,
                 (ptr_t)(GC_mark_stack_limit - INITIAL_MARK_STACK_SIZE / 8))) {
    GC_push_all(bottom, top);
  } else
#endif
  /* else */ {
    GC_push_all_eager(bottom, top);
  }
}

#if defined(WRAP_MARK_SOME) && defined(PARALLEL_MARK)
GC_ATTR_NO_SANITIZE_ADDR_MEM_THREAD
GC_INNER void
GC_push_conditional_eager(void *bottom, void *top, GC_bool all)
{
  REGISTER ptr_t current_p;
  REGISTER ptr_t lim;
  REGISTER ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr;
  REGISTER ptr_t least_ha = (ptr_t)GC_least_plausible_heap_addr;
#  define GC_greatest_plausible_heap_addr greatest_ha
#  define GC_least_plausible_heap_addr least_ha

  if (NULL == top)
    return;

  /* TODO: If not `all`, then scan only dirty pages. */
  (void)all;

  current_p = PTR_ALIGN_UP((ptr_t)bottom, ALIGNMENT);
  lim = PTR_ALIGN_DOWN((ptr_t)top, ALIGNMENT) - sizeof(ptr_t);
  for (; ADDR_GE(lim, current_p); current_p += ALIGNMENT) {
    REGISTER ptr_t q;

    LOAD_PTR_OR_CONTINUE(q, current_p);
    GC_PUSH_ONE_HEAP(q, current_p, GC_mark_stack_top);
  }
#  undef GC_greatest_plausible_heap_addr
#  undef GC_least_plausible_heap_addr
}
#endif /* WRAP_MARK_SOME && PARALLEL_MARK */

#if !defined(SMALL_CONFIG) && !defined(USE_MARK_BYTES) \
    && !defined(MARK_BIT_PER_OBJ) && GC_GRANULE_PTRS <= 4
#  define USE_PUSH_MARKED_ACCELERATORS
#  if GC_GRANULE_PTRS == 1
#    define PUSH_GRANULE(q)                                \
      do {                                                 \
        ptr_t qcontents = (q)[0];                          \
        GC_PUSH_ONE_HEAP(qcontents, q, GC_mark_stack_top); \
      } while (0)
#  elif GC_GRANULE_PTRS == 2
#    define PUSH_GRANULE(q)                                      \
      do {                                                       \
        ptr_t qcontents = (q)[0];                                \
        GC_PUSH_ONE_HEAP(qcontents, q, GC_mark_stack_top);       \
        qcontents = (q)[1];                                      \
        GC_PUSH_ONE_HEAP(qcontents, (q) + 1, GC_mark_stack_top); \
      } while (0)
#  else
#    define PUSH_GRANULE(q)                                      \
      do {                                                       \
        ptr_t qcontents = (q)[0];                                \
        GC_PUSH_ONE_HEAP(qcontents, q, GC_mark_stack_top);       \
        qcontents = (q)[1];                                      \
        GC_PUSH_ONE_HEAP(qcontents, (q) + 1, GC_mark_stack_top); \
        qcontents = (q)[2];                                      \
        GC_PUSH_ONE_HEAP(qcontents, (q) + 2, GC_mark_stack_top); \
        qcontents = (q)[3];                                      \
        GC_PUSH_ONE_HEAP(qcontents, (q) + 3, GC_mark_stack_top); \
      } while (0)
#  endif

/*
 * Push all objects reachable from marked objects in the given block
 * containing objects of size 1 granule.
 */
GC_ATTR_NO_SANITIZE_THREAD
STATIC void
GC_push_marked1(struct hblk *h, const hdr *hhdr)
{
  const word *mark_word_addr
      = (word *)CAST_AWAY_VOLATILE_PVOID(hhdr->hb_marks);
  ptr_t *p;
  ptr_t plim;

  /*
   * Allow registers to be used for some frequently accessed global variables.
   * Otherwise aliasing issues are likely to prevent that.
   */
  ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr;
  ptr_t least_ha = (ptr_t)GC_least_plausible_heap_addr;
  mse *mark_stack_top = GC_mark_stack_top;
  mse *mark_stack_limit = GC_mark_stack_limit;

#  undef GC_mark_stack_top
#  undef GC_mark_stack_limit
#  define GC_mark_stack_top mark_stack_top
#  define GC_mark_stack_limit mark_stack_limit
#  define GC_greatest_plausible_heap_addr greatest_ha
#  define GC_least_plausible_heap_addr least_ha

  p = (ptr_t *)h->hb_body;
  plim = (ptr_t)h + HBLKSIZE;

  /* Go through all granules in block. */
  while (ADDR_LT((ptr_t)p, plim)) {
    word mark_word = *mark_word_addr++;
    ptr_t *q;

    for (q = p; mark_word != 0; mark_word >>= 1) {
      if ((mark_word & 1) != 0)
        PUSH_GRANULE(q);
      q += GC_GRANULE_PTRS;
    }
    p += CPP_WORDSZ * GC_GRANULE_PTRS;
  }

#  undef GC_greatest_plausible_heap_addr
#  undef GC_least_plausible_heap_addr
#  undef GC_mark_stack_top
#  undef GC_mark_stack_limit
#  define GC_mark_stack_limit GC_arrays._mark_stack_limit
#  define GC_mark_stack_top GC_arrays._mark_stack_top
  GC_mark_stack_top = mark_stack_top;
}

#  ifndef UNALIGNED_PTRS
/*
 * Push all objects reachable from marked objects in the given block
 * of two-granule objects.
 */
GC_ATTR_NO_SANITIZE_THREAD
STATIC void
GC_push_marked2(struct hblk *h, const hdr *hhdr)
{
  const word *mark_word_addr
      = (word *)CAST_AWAY_VOLATILE_PVOID(hhdr->hb_marks);
  ptr_t *p;
  ptr_t plim;
  ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr;
  ptr_t least_ha = (ptr_t)GC_least_plausible_heap_addr;
  mse *mark_stack_top = GC_mark_stack_top;
  mse *mark_stack_limit = GC_mark_stack_limit;

#    undef GC_mark_stack_top
#    undef GC_mark_stack_limit
#    define GC_mark_stack_top mark_stack_top
#    define GC_mark_stack_limit mark_stack_limit
#    define GC_greatest_plausible_heap_addr greatest_ha
#    define GC_least_plausible_heap_addr least_ha

  p = (ptr_t *)h->hb_body;
  plim = (ptr_t)h + HBLKSIZE;

  /* Go through all granules in block. */
  while (ADDR_LT((ptr_t)p, plim)) {
    word mark_word = *mark_word_addr++;
    ptr_t *q;

    for (q = p; mark_word != 0; mark_word >>= 2) {
      if (mark_word & 1) {
        PUSH_GRANULE(q);
        PUSH_GRANULE(q + GC_GRANULE_PTRS);
      }
      q += 2 * GC_GRANULE_PTRS;
    }
    p += CPP_WORDSZ * GC_GRANULE_PTRS;
  }

#    undef GC_greatest_plausible_heap_addr
#    undef GC_least_plausible_heap_addr
#    undef GC_mark_stack_top
#    undef GC_mark_stack_limit
#    define GC_mark_stack_limit GC_arrays._mark_stack_limit
#    define GC_mark_stack_top GC_arrays._mark_stack_top
  GC_mark_stack_top = mark_stack_top;
}

#    if GC_GRANULE_PTRS < 4
/*
 * Push all objects reachable from marked objects in the given block of
 * four-granule objects.  There is a risk of mark stack overflow here.
 * But we handle that.  And only unmarked objects get pushed, so it is
 * not very likely.
 */
GC_ATTR_NO_SANITIZE_THREAD
STATIC void
GC_push_marked4(struct hblk *h, const hdr *hhdr)
{
  const word *mark_word_addr
      = (word *)CAST_AWAY_VOLATILE_PVOID(hhdr->hb_marks);
  ptr_t *p;
  ptr_t plim;
  ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr;
  ptr_t least_ha = (ptr_t)GC_least_plausible_heap_addr;
  mse *mark_stack_top = GC_mark_stack_top;
  mse *mark_stack_limit = GC_mark_stack_limit;

#      undef GC_mark_stack_top
#      undef GC_mark_stack_limit
#      define GC_mark_stack_top mark_stack_top
#      define GC_mark_stack_limit mark_stack_limit
#      define GC_greatest_plausible_heap_addr greatest_ha
#      define GC_least_plausible_heap_addr least_ha

  p = (ptr_t *)h->hb_body;
  plim = (ptr_t)h + HBLKSIZE;

  /* Go through all granules in block. */
  while (ADDR_LT((ptr_t)p, plim)) {
    word mark_word = *mark_word_addr++;
    ptr_t *q;

    for (q = p; mark_word != 0; mark_word >>= 4) {
      if (mark_word & 1) {
        PUSH_GRANULE(q);
        PUSH_GRANULE(q + GC_GRANULE_PTRS);
        PUSH_GRANULE(q + 2 * GC_GRANULE_PTRS);
        PUSH_GRANULE(q + 3 * GC_GRANULE_PTRS);
      }
      q += 4 * GC_GRANULE_PTRS;
    }
    p += CPP_WORDSZ * GC_GRANULE_PTRS;
  }
#      undef GC_greatest_plausible_heap_addr
#      undef GC_least_plausible_heap_addr
#      undef GC_mark_stack_top
#      undef GC_mark_stack_limit
#      define GC_mark_stack_limit GC_arrays._mark_stack_limit
#      define GC_mark_stack_top GC_arrays._mark_stack_top
  GC_mark_stack_top = mark_stack_top;
}
#    endif
#  endif
#endif /* !USE_MARK_BYTES && !MARK_BIT_PER_OBJ && !SMALL_CONFIG */

/* Push all objects reachable from marked objects in the given block. */
STATIC void
GC_push_marked(struct hblk *h, const hdr *hhdr)
{
  size_t sz = hhdr->hb_sz;
  ptr_t p;
  size_t bit_no;
  ptr_t plim;
  mse *mark_stack_top;
  mse *mark_stack_limit = GC_mark_stack_limit;

  /* Some quick shortcuts: */
  if ((/* `0 |` */ GC_DS_LENGTH) == hhdr->hb_descr)
    return;
  if (GC_block_empty(hhdr))
    return; /*< nothing marked */

#if !defined(GC_DISABLE_INCREMENTAL)
  GC_n_rescuing_pages++;
#endif
  GC_objects_are_marked = TRUE;
  switch (BYTES_TO_GRANULES(sz)) {
#ifdef USE_PUSH_MARKED_ACCELERATORS
  case 1:
    GC_push_marked1(h, hhdr);
    break;
#  ifndef UNALIGNED_PTRS
  case 2:
    GC_push_marked2(h, hhdr);
    break;
#    if GC_GRANULE_PTRS < 4
  case 4:
    GC_push_marked4(h, hhdr);
    break;
#    endif
#  endif /* !UNALIGNED_PTRS */
#else
  case 1: /*< to suppress "switch statement contains no case" warning */
#endif
  default:
    plim = sz > MAXOBJBYTES ? h->hb_body
                            : CAST_THRU_UINTPTR(ptr_t, (h + 1)->hb_body) - sz;
    mark_stack_top = GC_mark_stack_top;
    for (p = h->hb_body, bit_no = 0; ADDR_GE(plim, p);
         p += sz, bit_no += MARK_BIT_OFFSET(sz)) {
      /* Mark from fields inside the object. */
      if (mark_bit_from_hdr(hhdr, bit_no)) {
        mark_stack_top
            = GC_push_obj(p, hhdr, mark_stack_top, mark_stack_limit);
      }
    }
    GC_mark_stack_top = mark_stack_top;
  }
}

#ifdef ENABLE_DISCLAIM
/*
 * Unconditionally mark from all objects that have not been reclaimed.
 * This is useful in order to retain pointers reachable from the disclaim
 * notifiers.  To determine whether an object has been reclaimed, we
 * require that any live object has a nonzero as one of the two least
 * significant bits of the first "pointer-sized" word.  On the other hand,
 * the reclaimed object is a member of free lists, and thus contains
 * a pointer-aligned next-pointer as the first "pointer-sized" word.
 */
GC_ATTR_NO_SANITIZE_THREAD
STATIC void
GC_push_unconditionally(struct hblk *h, const hdr *hhdr)
{
  size_t sz = hhdr->hb_sz;
  ptr_t p;
  ptr_t plim;
  mse *mark_stack_top;
  mse *mark_stack_limit = GC_mark_stack_limit;

  if ((/* `0 |` */ GC_DS_LENGTH) == hhdr->hb_descr)
    return;

#  if !defined(GC_DISABLE_INCREMENTAL)
  GC_n_rescuing_pages++;
#  endif
  GC_objects_are_marked = TRUE;
  plim = sz > MAXOBJBYTES ? h->hb_body
                          : CAST_THRU_UINTPTR(ptr_t, (h + 1)->hb_body) - sz;
  mark_stack_top = GC_mark_stack_top;
  for (p = h->hb_body; ADDR_GE(plim, p); p += sz) {
    if ((ADDR(*(ptr_t *)p) & 0x3) != 0) {
      mark_stack_top = GC_push_obj(p, hhdr, mark_stack_top, mark_stack_limit);
    }
  }
  GC_mark_stack_top = mark_stack_top;
}
#endif /* ENABLE_DISCLAIM */

#ifndef GC_DISABLE_INCREMENTAL
/* Test whether any page in the given block is dirty. */
STATIC GC_bool
GC_block_was_dirty(struct hblk *h, const hdr *hhdr)
{
  size_t sz;
  ptr_t p;

#  ifdef AO_HAVE_load
  /* Atomic access is used to avoid racing with `GC_realloc`. */
  sz = AO_load((volatile AO_t *)&hhdr->hb_sz);
#  else
  sz = hhdr->hb_sz;
#  endif
  if (sz <= MAXOBJBYTES) {
    return GC_page_was_dirty(h);
  }

  for (p = (ptr_t)h; ADDR_LT(p, (ptr_t)h + sz); p += HBLKSIZE) {
    if (GC_page_was_dirty((struct hblk *)p))
      return TRUE;
  }
  return FALSE;
}
#endif /* GC_DISABLE_INCREMENTAL */

/*
 * Similar to `GC_push_marked`, but skip over unallocated blocks and
 * return address of next plausible block.
 */
STATIC struct hblk *
GC_push_next_marked(struct hblk *h)
{
  hdr *hhdr = HDR(h);

  if (UNLIKELY(IS_FORWARDING_ADDR_OR_NIL(hhdr) || HBLK_IS_FREE(hhdr))) {
    h = GC_next_block(h, FALSE);
    if (NULL == h)
      return NULL;
    hhdr = GC_find_header(h);
  } else {
#ifdef LINT2
    if (NULL == h)
      ABORT("Bad HDR() definition");
#endif
  }
  GC_push_marked(h, hhdr);
  return h + OBJ_SZ_TO_BLOCKS(hhdr->hb_sz);
}

#ifndef GC_DISABLE_INCREMENTAL
/* Identical to `GC_push_next_marked`, but mark only from dirty pages. */
STATIC struct hblk *
GC_push_next_marked_dirty(struct hblk *h)
{
  hdr *hhdr;

  GC_ASSERT(I_HOLD_LOCK());
  if (!GC_incremental)
    ABORT("Dirty bits not set up");
  for (;; h += OBJ_SZ_TO_BLOCKS(hhdr->hb_sz)) {
    hhdr = HDR(h);
    if (UNLIKELY(IS_FORWARDING_ADDR_OR_NIL(hhdr) || HBLK_IS_FREE(hhdr))) {
      h = GC_next_block(h, FALSE);
      if (NULL == h)
        return NULL;
      hhdr = GC_find_header(h);
    } else {
#  ifdef LINT2
      if (NULL == h)
        ABORT("Bad HDR() definition");
#  endif
    }
    if (GC_block_was_dirty(h, hhdr))
      break;
  }
#  ifdef ENABLE_DISCLAIM
  if ((hhdr->hb_flags & MARK_UNCONDITIONALLY) != 0) {
    GC_push_unconditionally(h, hhdr);

    /*
     * Then we may ask, why not also add the `MARK_UNCONDITIONALLY`
     * case to `GC_push_next_marked`, which is also applied to
     * uncollectible blocks?  But it seems to me that the function
     * does not need to scan uncollectible (and unconditionally
     * marked) blocks since those are already handled in the
     * `MS_PUSH_UNCOLLECTABLE` phase.
     */
  } else
#  endif
  /* else */ {
    GC_push_marked(h, hhdr);
  }
  return h + OBJ_SZ_TO_BLOCKS(hhdr->hb_sz);
}
#endif /* !GC_DISABLE_INCREMENTAL */

/*
 * Similar to `GC_push_next_marked`, but for uncollectible pages.
 * Needed since we do not clear marks for such pages, even for full
 * collections.
 */
STATIC struct hblk *
GC_push_next_marked_uncollectable(struct hblk *h)
{
  hdr *hhdr = HDR(h);

  for (;;) {
    if (UNLIKELY(IS_FORWARDING_ADDR_OR_NIL(hhdr) || HBLK_IS_FREE(hhdr))) {
      h = GC_next_block(h, FALSE);
      if (NULL == h)
        return NULL;
      hhdr = GC_find_header(h);
    } else {
#ifdef LINT2
      if (NULL == h)
        ABORT("Bad HDR() definition");
#endif
    }
    if (hhdr->hb_obj_kind == UNCOLLECTABLE) {
      GC_push_marked(h, hhdr);
      break;
    }
#ifdef ENABLE_DISCLAIM
    if ((hhdr->hb_flags & MARK_UNCONDITIONALLY) != 0) {
      GC_push_unconditionally(h, hhdr);
      break;
    }
#endif
    h += OBJ_SZ_TO_BLOCKS(hhdr->hb_sz);
    hhdr = HDR(h);
  }
  return h + OBJ_SZ_TO_BLOCKS(hhdr->hb_sz);
}

/*
 * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
 * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
 * Copyright (c) 2009-2025 Ivan Maidanski
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */


#if defined(E2K) && !defined(THREADS)
#  include <alloca.h>
#endif

/*
 * Data structure for list of root sets.
 * We keep a hash table, so that we can filter out duplicate additions.
 * Under Win32, we need to do a better job of filtering overlaps, so
 * we resort to sequential search, and pay the price.
 */

/* Register dynamic library data segments. */
int GC_no_dls = 0;

#if !defined(NO_DEBUGGING) || defined(GC_ASSERTIONS)
GC_INNER word
GC_compute_root_size(void)
{
  size_t i;
  word size = 0;

  for (i = 0; i < n_root_sets; i++) {
    size += (word)(GC_static_roots[i].r_end - GC_static_roots[i].r_start);
  }
  return size;
}
#endif /* !NO_DEBUGGING || GC_ASSERTIONS */

#if !defined(NO_DEBUGGING)
/* For the debugging purpose. */
void
GC_print_static_roots(void)
{
  size_t i;
  word size;

  for (i = 0; i < n_root_sets; i++) {
    GC_printf("From %p to %p%s\n", (void *)GC_static_roots[i].r_start,
              (void *)GC_static_roots[i].r_end,
              GC_static_roots[i].r_tmp ? " (temporary)" : "");
  }
  GC_printf("GC_root_size= %lu\n", (unsigned long)GC_root_size);

  if ((size = GC_compute_root_size()) != GC_root_size)
    GC_err_printf("GC_root_size incorrect!! Should be: %lu\n",
                  (unsigned long)size);
}
#endif /* !NO_DEBUGGING */

#ifndef ANY_MSWIN
GC_INLINE size_t
rt_hash(ptr_t addr)
{
  word val = ADDR(addr);

#  if CPP_WORDSZ > 4 * LOG_RT_SIZE
#    if CPP_WORDSZ > 8 * LOG_RT_SIZE
  val ^= val >> (8 * LOG_RT_SIZE);
#    endif
  val ^= val >> (4 * LOG_RT_SIZE);
#  endif
  val ^= val >> (2 * LOG_RT_SIZE);
  return (size_t)((val >> LOG_RT_SIZE) ^ val) & (RT_SIZE - 1);
}

GC_INNER void *
GC_roots_present(ptr_t b)
{
  size_t h;
  struct roots *p;

  GC_ASSERT(I_HOLD_READER_LOCK());
  h = rt_hash(b);
  for (p = GC_root_index[h]; p != NULL; p = p->r_next) {
    if (p->r_start == (ptr_t)b)
      break;
  }
  return p;
}

/* Add the given root structure to the index. */
GC_INLINE void
add_roots_to_index(struct roots *p)
{
  size_t h = rt_hash(p->r_start);

  p->r_next = GC_root_index[h];
  GC_root_index[h] = p;
}
#endif /* !ANY_MSWIN */

GC_API void GC_CALL
GC_add_roots(void *b, void *e)
{
  if (UNLIKELY(!GC_is_initialized))
    GC_init();
  LOCK();
  GC_add_roots_inner((ptr_t)b, (ptr_t)e, FALSE);
  UNLOCK();
}

GC_INNER void
GC_add_roots_inner(ptr_t b, ptr_t e, GC_bool tmp)
{
  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT(ADDR_GE(e, b));
  b = PTR_ALIGN_UP(b, ALIGNMENT);
  e = PTR_ALIGN_DOWN(e, ALIGNMENT);
  if (ADDR_GE(b, e)) {
    /* Nothing to do. */
    return;
  }

#ifdef ANY_MSWIN
  /*
   * Spend the time to ensure that there are no overlapping or adjacent
   * intervals.  This could be done faster with e.g. a balanced tree.
   * But the execution time here is virtually guaranteed to be dominated
   * by the time it takes to scan the roots.
   */
  {
    size_t i;
    struct roots *old = NULL; /*< initialized to prevent warning */

    for (i = 0; i < n_root_sets; i++) {
      old = GC_static_roots + i;
      if (ADDR_GE(old->r_end, b) && ADDR_GE(e, old->r_start)) {
        if (ADDR_LT(b, old->r_start)) {
          GC_root_size += (word)(old->r_start - b);
          old->r_start = b;
        }
        if (ADDR_LT(old->r_end, e)) {
          GC_root_size += (word)(e - old->r_end);
          old->r_end = e;
        }
        old->r_tmp &= tmp;
        break;
      }
    }
    if (i < n_root_sets) {
      /* Merge other overlapping intervals. */
      struct roots *other;

      for (i++; i < n_root_sets; i++) {
        other = GC_static_roots + i;
        b = other->r_start;
        e = other->r_end;
        if (ADDR_GE(old->r_end, b) && ADDR_GE(e, old->r_start)) {
          if (ADDR_LT(b, old->r_start)) {
            GC_root_size += (word)(old->r_start - b);
            old->r_start = b;
          }
          if (ADDR_LT(old->r_end, e)) {
            GC_root_size += (word)(e - old->r_end);
            old->r_end = e;
          }
          old->r_tmp &= other->r_tmp;
          /* Delete this entry. */
          GC_root_size -= (word)(other->r_end - other->r_start);
          other->r_start = GC_static_roots[n_root_sets - 1].r_start;
          other->r_end = GC_static_roots[n_root_sets - 1].r_end;
          n_root_sets--;
        }
      }
      return;
    }
  }
#else
  {
    struct roots *old = (struct roots *)GC_roots_present(b);

    if (old != NULL) {
      if (ADDR_GE(old->r_end, e)) {
        old->r_tmp &= tmp;
        /* Already there. */
        return;
      }
      if (old->r_tmp == tmp || !tmp) {
        /* Extend the existing root. */
        GC_root_size += (word)(e - old->r_end);
        old->r_end = e;
        old->r_tmp = tmp;
        return;
      }
      b = old->r_end;
    }
  }
#endif
  if (n_root_sets == MAX_ROOT_SETS) {
    ABORT("Too many root sets");
  }

#ifdef DEBUG_ADD_DEL_ROOTS
  GC_log_printf("Adding data root section %u: %p .. %p%s\n",
                (unsigned)n_root_sets, (void *)b, (void *)e,
                tmp ? " (temporary)" : "");
#endif
  GC_static_roots[n_root_sets].r_start = (ptr_t)b;
  GC_static_roots[n_root_sets].r_end = (ptr_t)e;
  GC_static_roots[n_root_sets].r_tmp = tmp;
#ifndef ANY_MSWIN
  GC_static_roots[n_root_sets].r_next = 0;
  add_roots_to_index(GC_static_roots + n_root_sets);
#endif
  GC_root_size += (word)(e - b);
  n_root_sets++;
}

GC_API void GC_CALL
GC_clear_roots(void)
{
  if (UNLIKELY(!GC_is_initialized))
    GC_init();
  LOCK();
#ifdef THREADS
  GC_roots_were_cleared = TRUE;
#endif
  n_root_sets = 0;
  GC_root_size = 0;
#ifndef ANY_MSWIN
  BZERO(GC_root_index, sizeof(GC_root_index));
#endif
#ifdef DEBUG_ADD_DEL_ROOTS
  GC_log_printf("Clear all data root sections\n");
#endif
  UNLOCK();
}

STATIC void
GC_remove_root_at_pos(size_t i)
{
  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT(i < n_root_sets);
#ifdef DEBUG_ADD_DEL_ROOTS
  GC_log_printf("Remove data root section at %u: %p .. %p%s\n", (unsigned)i,
                (void *)GC_static_roots[i].r_start,
                (void *)GC_static_roots[i].r_end,
                GC_static_roots[i].r_tmp ? " (temporary)" : "");
#endif
  GC_root_size
      -= (word)(GC_static_roots[i].r_end - GC_static_roots[i].r_start);
  GC_static_roots[i].r_start = GC_static_roots[n_root_sets - 1].r_start;
  GC_static_roots[i].r_end = GC_static_roots[n_root_sets - 1].r_end;
  GC_static_roots[i].r_tmp = GC_static_roots[n_root_sets - 1].r_tmp;
  n_root_sets--;
}

#ifndef ANY_MSWIN
STATIC void
GC_rebuild_root_index(void)
{
  size_t i;

  BZERO(GC_root_index, sizeof(GC_root_index));
  for (i = 0; i < n_root_sets; i++)
    add_roots_to_index(GC_static_roots + i);
}
#endif /* !ANY_MSWIN */

#if defined(ANY_MSWIN) || defined(DYNAMIC_LOADING)
STATIC void
GC_remove_tmp_roots(void)
{
  size_t i;
#  ifndef ANY_MSWIN
  size_t old_n_roots = n_root_sets;
#  endif

  GC_ASSERT(I_HOLD_LOCK());
  for (i = 0; i < n_root_sets;) {
    if (GC_static_roots[i].r_tmp) {
      GC_remove_root_at_pos(i);
    } else {
      i++;
    }
  }
#  ifndef ANY_MSWIN
  if (n_root_sets < old_n_roots)
    GC_rebuild_root_index();
#  endif
}
#endif /* ANY_MSWIN || DYNAMIC_LOADING */

STATIC void GC_remove_roots_inner(ptr_t b, ptr_t e);

GC_API void GC_CALL
GC_remove_roots(void *b, void *e)
{
  /* A quick check whether has nothing to do. */
  if (ADDR_GE(PTR_ALIGN_UP((ptr_t)b, ALIGNMENT),
              PTR_ALIGN_DOWN((ptr_t)e, ALIGNMENT)))
    return;

  LOCK();
  GC_remove_roots_inner((ptr_t)b, (ptr_t)e);
  UNLOCK();
}

STATIC void
GC_remove_roots_inner(ptr_t b, ptr_t e)
{
  size_t i;
#ifndef ANY_MSWIN
  size_t old_n_roots = n_root_sets;
#endif

  GC_ASSERT(I_HOLD_LOCK());
  for (i = 0; i < n_root_sets;) {
    if (ADDR_GE(GC_static_roots[i].r_start, b)
        && ADDR_GE(e, GC_static_roots[i].r_end)) {
      GC_remove_root_at_pos(i);
    } else {
      i++;
    }
  }
#ifndef ANY_MSWIN
  if (n_root_sets < old_n_roots)
    GC_rebuild_root_index();
#endif
}

#ifdef USE_PROC_FOR_LIBRARIES
/*
 * Exchange the elements of the roots table.  Requires rebuild of the roots
 * index table after the swap.
 */
GC_INLINE void
swap_static_roots(size_t i, size_t j)
{
  ptr_t r_start = GC_static_roots[i].r_start;
  ptr_t r_end = GC_static_roots[i].r_end;
  GC_bool r_tmp = GC_static_roots[i].r_tmp;

  GC_static_roots[i].r_start = GC_static_roots[j].r_start;
  GC_static_roots[i].r_end = GC_static_roots[j].r_end;
  GC_static_roots[i].r_tmp = GC_static_roots[j].r_tmp;
  /* No need to swap `r_next` values. */
  GC_static_roots[j].r_start = r_start;
  GC_static_roots[j].r_end = r_end;
  GC_static_roots[j].r_tmp = r_tmp;
}

GC_INNER void
GC_remove_roots_subregion(ptr_t b, ptr_t e)
{
  size_t i;
  GC_bool rebuild = FALSE;

  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT(ADDR(b) % ALIGNMENT == 0 && ADDR(e) % ALIGNMENT == 0);
  for (i = 0; i < n_root_sets; i++) {
    ptr_t r_start, r_end;

    if (GC_static_roots[i].r_tmp) {
      /* The remaining roots are skipped as they are all temporary. */
#  ifdef GC_ASSERTIONS
      size_t j;

      for (j = i + 1; j < n_root_sets; j++) {
        GC_ASSERT(GC_static_roots[j].r_tmp);
      }
#  endif
      break;
    }
    r_start = GC_static_roots[i].r_start;
    r_end = GC_static_roots[i].r_end;
    if (ADDR_GE(r_start, e) || LIKELY(ADDR_GE(b, r_end)))
      continue;

#  ifdef DEBUG_ADD_DEL_ROOTS
    GC_log_printf("Removing %p .. %p from root section %u (%p .. %p)\n",
                  (void *)b, (void *)e, (unsigned)i, (void *)r_start,
                  (void *)r_end);
#  endif
    if (ADDR_LT(r_start, b)) {
      GC_root_size -= (word)(r_end - b);
      GC_static_roots[i].r_end = b;
      /* No need to rebuild as hash does not use `r_end` value. */
      if (ADDR_LT(e, r_end)) {
        size_t j;

        if (rebuild) {
          GC_rebuild_root_index();
          rebuild = FALSE;
        }
        /* Note: updates `n_root_sets` as well. */
        GC_add_roots_inner(e, r_end, FALSE);
        for (j = i + 1; j < n_root_sets; j++)
          if (GC_static_roots[j].r_tmp)
            break;
        if (j < n_root_sets - 1 && !GC_static_roots[n_root_sets - 1].r_tmp) {
          /* Exchange the roots to have all temporary ones at the end. */
          swap_static_roots(j, n_root_sets - 1);
          rebuild = TRUE;
        }
      }
    } else {
      if (ADDR_LT(e, r_end)) {
        GC_root_size -= (word)(e - r_start);
        GC_static_roots[i].r_start = e;
      } else {
        GC_remove_root_at_pos(i);
        if (i + 1 < n_root_sets && GC_static_roots[i].r_tmp
            && !GC_static_roots[i + 1].r_tmp) {
          size_t j;

          for (j = i + 2; j < n_root_sets; j++)
            if (GC_static_roots[j].r_tmp)
              break;
          /* Exchange the roots to have all temporary ones at the end. */
          swap_static_roots(i, j - 1);
        }
        i--;
      }
      rebuild = TRUE;
    }
  }
  if (rebuild)
    GC_rebuild_root_index();
}
#endif /* USE_PROC_FOR_LIBRARIES */

#if !defined(NO_DEBUGGING)
GC_API int GC_CALL
GC_is_tmp_root(void *p)
{
#  ifndef HAS_REAL_READER_LOCK
  static size_t last_root_set; /*< initialized to 0; no shared access */
#  elif defined(AO_HAVE_load) || defined(AO_HAVE_store)
  static volatile AO_t last_root_set;
#  else
  /* Note: a race is acceptable, it is just a cached index. */
  static volatile size_t last_root_set;
#  endif
  size_t i;
  int res;

  READER_LOCK();
  /* First try the cached root. */
#  if defined(AO_HAVE_load) && defined(HAS_REAL_READER_LOCK)
  i = AO_load(&last_root_set);
#  else
  i = last_root_set;
#  endif
  if (i < n_root_sets
      && ADDR_INSIDE((ptr_t)p, GC_static_roots[i].r_start,
                     GC_static_roots[i].r_end)) {
    res = (int)GC_static_roots[i].r_tmp;
  } else {
    res = 0;
    for (i = 0; i < n_root_sets; i++) {
      if (ADDR_INSIDE((ptr_t)p, GC_static_roots[i].r_start,
                      GC_static_roots[i].r_end)) {
        res = (int)GC_static_roots[i].r_tmp;
#  if defined(AO_HAVE_store) && defined(HAS_REAL_READER_LOCK)
        AO_store(&last_root_set, i);
#  else
        last_root_set = i;
#  endif
        break;
      }
    }
  }
  READER_UNLOCK();
  return res;
}
#endif /* !NO_DEBUGGING */

GC_INNER ptr_t
GC_approx_sp(void)
{
  volatile ptr_t sp;

  /*
   * This also forces stack to grow if necessary.  Otherwise the later
   * accesses might cause the kernel to think we are doing something wrong.
   */
  STORE_APPROX_SP_TO(sp);
  return (/* no volatile */ ptr_t)sp;
}

GC_API void GC_CALL
GC_clear_exclusion_table(void)
{
#ifdef DEBUG_ADD_DEL_ROOTS
  GC_log_printf("Clear static root exclusions (%u elements)\n",
                (unsigned)GC_excl_table_entries);
#endif
  GC_excl_table_entries = 0;
}

/*
 * Return the first exclusion range that includes an address not lower
 * than `start_addr`.
 */
STATIC struct exclusion *
GC_next_exclusion(ptr_t start_addr)
{
  size_t low = 0;
  size_t high;

  if (UNLIKELY(0 == GC_excl_table_entries))
    return NULL;
  high = GC_excl_table_entries - 1;
  while (high > low) {
    size_t mid = (low + high) >> 1;

    /* `low` <= `mid` < `high`. */
    if (ADDR_GE(start_addr, GC_excl_table[mid].e_end)) {
      low = mid + 1;
    } else {
      high = mid;
    }
  }
  if (ADDR_GE(start_addr, GC_excl_table[low].e_end))
    return NULL;

  return GC_excl_table + low;
}

GC_INNER void
GC_exclude_static_roots_inner(ptr_t start, ptr_t finish)
{
  struct exclusion *next;
  size_t next_index;

  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT(ADDR(start) % ALIGNMENT == 0);
  GC_ASSERT(ADDR_LT(start, finish));

  next = GC_next_exclusion(start);
  if (next != NULL) {
    if (ADDR_LT(next->e_start, finish)) {
      /* Incomplete error check. */
      ABORT("Exclusion ranges overlap");
    }
    if (ADDR(next->e_start) == ADDR(finish)) {
      /* Extend old range backwards. */
      next->e_start = start;
#ifdef DEBUG_ADD_DEL_ROOTS
      GC_log_printf("Updating static root exclusion to %p .. %p\n",
                    (void *)start, (void *)next->e_end);
#endif
      return;
    }
  }

  next_index = GC_excl_table_entries;
  if (next_index >= MAX_EXCLUSIONS)
    ABORT("Too many exclusions");
  if (next != NULL) {
    size_t i;

    next_index = (size_t)(next - GC_excl_table);
    for (i = GC_excl_table_entries; i > next_index; --i) {
      GC_excl_table[i] = GC_excl_table[i - 1];
    }
  }
#ifdef DEBUG_ADD_DEL_ROOTS
  GC_log_printf("Adding static root exclusion at %u: %p .. %p\n",
                (unsigned)next_index, (void *)start, (void *)finish);
#endif
  GC_excl_table[next_index].e_start = start;
  GC_excl_table[next_index].e_end = finish;
  ++GC_excl_table_entries;
}

GC_API void GC_CALL
GC_exclude_static_roots(void *b, void *e)
{
  if (b == e) {
    /* Nothing to exclude. */
    return;
  }

  /* Round boundaries in direction reverse to that of `GC_add_roots`. */
#if ALIGNMENT > 1
  b = PTR_ALIGN_DOWN((ptr_t)b, ALIGNMENT);
  e = UNLIKELY(ADDR(e) > ~(word)(ALIGNMENT - 1))
          ? PTR_ALIGN_DOWN((ptr_t)e, ALIGNMENT) /*< overflow */
          : PTR_ALIGN_UP((ptr_t)e, ALIGNMENT);
#endif

  LOCK();
  GC_exclude_static_roots_inner((ptr_t)b, (ptr_t)e);
  UNLOCK();
}

#if defined(WRAP_MARK_SOME) && defined(PARALLEL_MARK)
#  define GC_PUSH_CONDITIONAL(b, t, all)                \
    (GC_parallel ? GC_push_conditional_eager(b, t, all) \
                 : GC_push_conditional_static(b, t, all))
#else
#  define GC_PUSH_CONDITIONAL(b, t, all) GC_push_conditional_static(b, t, all)
#endif

/* Invoke `GC_push_conditional` on ranges that are not excluded. */
STATIC void
GC_push_conditional_with_exclusions(ptr_t bottom, ptr_t top, GC_bool all)
{
  while (ADDR_LT(bottom, top)) {
    struct exclusion *next = GC_next_exclusion(bottom);
    ptr_t excl_start = top;

    if (next != NULL) {
      if (ADDR_GE(next->e_start, top)) {
        next = NULL;
      } else {
        excl_start = next->e_start;
      }
    }
    if (ADDR_LT(bottom, excl_start))
      GC_PUSH_CONDITIONAL(bottom, excl_start, all);
    if (NULL == next)
      break;
    bottom = next->e_end;
  }
}

#ifdef IA64
GC_INNER void
GC_push_all_register_sections(ptr_t bs_lo, ptr_t bs_hi, GC_bool eager,
                              struct GC_traced_stack_sect_s *traced_stack_sect)
{
  GC_ASSERT(I_HOLD_LOCK());
  while (traced_stack_sect != NULL) {
    ptr_t frame_bs_lo = traced_stack_sect->backing_store_end;

    GC_ASSERT(ADDR_GE(bs_hi, frame_bs_lo));
    if (eager) {
      GC_push_all_eager(frame_bs_lo, bs_hi);
    } else {
      GC_push_all_stack(frame_bs_lo, bs_hi);
    }
    bs_hi = traced_stack_sect->saved_backing_store_ptr;
    traced_stack_sect = traced_stack_sect->prev;
  }
  GC_ASSERT(ADDR_GE(bs_hi, bs_lo));
  if (eager) {
    GC_push_all_eager(bs_lo, bs_hi);
  } else {
    GC_push_all_stack(bs_lo, bs_hi);
  }
}
#endif /* IA64 */

#ifdef THREADS

GC_INNER void
GC_push_all_stack_sections(ptr_t lo /* top */, ptr_t hi /* bottom */,
                           struct GC_traced_stack_sect_s *traced_stack_sect)
{
  GC_ASSERT(I_HOLD_LOCK());
  while (traced_stack_sect != NULL) {
    GC_ASSERT(HOTTER_THAN(lo, (ptr_t)traced_stack_sect));
#  ifdef STACK_GROWS_UP
    GC_push_all_stack((ptr_t)traced_stack_sect, lo);
#  else
    GC_push_all_stack(lo, (ptr_t)traced_stack_sect);
#  endif
    lo = traced_stack_sect->saved_stack_ptr;
    GC_ASSERT(lo != NULL);
    traced_stack_sect = traced_stack_sect->prev;
  }
  GC_ASSERT(!HOTTER_THAN(hi, lo));
#  ifdef STACK_GROWS_UP
  /* We got them backwards! */
  GC_push_all_stack(hi, lo);
#  else
  GC_push_all_stack(lo, hi);
#  endif
}

#else /* !THREADS */

/*
 * Similar to `GC_push_all_eager`, but only the part hotter than
 * `cold_gc_frame` is scanned immediately.  Needed to ensure that
 * callee-save registers are not missed.  Treats all interior pointers
 * as valid and scans part of the area immediately, to make sure that
 * saved register values are not lost.  `cold_gc_frame` delimits the
 * stack section that must be scanned eagerly.  A zero value indicates
 * that no eager scanning is needed.  We do not need to worry about
 * the manual VDB case here, since this is only called in the
 * single-threaded case.  We assume that we cannot collect between
 * an assignment and the corresponding `GC_dirty()` call.
 */
STATIC void
GC_push_all_stack_partially_eager(ptr_t bottom, ptr_t top, ptr_t cold_gc_frame)
{
#  ifndef NEED_FIXUP_POINTER
  if (GC_all_interior_pointers) {
    /*
     * Push the hot end of the stack eagerly, so that register values saved
     * inside GC frames are marked before they disappear.  The rest of the
     * marking can be deferred until later.
     */
    if (0 == cold_gc_frame) {
      GC_push_all_stack(bottom, top);
      return;
    }
    GC_ASSERT(ADDR_GE(cold_gc_frame, bottom) && ADDR_GE(top, cold_gc_frame));
#    ifdef STACK_GROWS_UP
    GC_push_all(bottom, cold_gc_frame + sizeof(ptr_t));
    GC_push_all_eager(cold_gc_frame, top);
#    else
    GC_push_all(cold_gc_frame - sizeof(ptr_t), top);
    GC_push_all_eager(bottom, cold_gc_frame);
#    endif
  } else
#  endif
  /* else */ {
    GC_push_all_eager(bottom, top);
  }
#  ifdef TRACE_BUF
  GC_add_trace_entry("GC_push_all_stack", bottom, top);
#  endif
}

/* Similar to `GC_push_all_stack_sections()` but also uses `cold_gc_frame`. */
STATIC void
GC_push_all_stack_part_eager_sections(
    ptr_t lo /* top */, ptr_t hi /* bottom */, ptr_t cold_gc_frame,
    struct GC_traced_stack_sect_s *traced_stack_sect)
{
  GC_ASSERT(traced_stack_sect == NULL || cold_gc_frame == NULL
            || HOTTER_THAN(cold_gc_frame, (ptr_t)traced_stack_sect));

  while (traced_stack_sect != NULL) {
    GC_ASSERT(HOTTER_THAN(lo, (ptr_t)traced_stack_sect));
#  ifdef STACK_GROWS_UP
    GC_push_all_stack_partially_eager((ptr_t)traced_stack_sect, lo,
                                      cold_gc_frame);
#  else
    GC_push_all_stack_partially_eager(lo, (ptr_t)traced_stack_sect,
                                      cold_gc_frame);
#  endif
    lo = traced_stack_sect->saved_stack_ptr;
    GC_ASSERT(lo != NULL);
    traced_stack_sect = traced_stack_sect->prev;
    /* Note: use at most once. */
    cold_gc_frame = NULL;
  }

  GC_ASSERT(!HOTTER_THAN(hi, lo));
#  ifdef STACK_GROWS_UP
  /* We got them backwards! */
  GC_push_all_stack_partially_eager(hi, lo, cold_gc_frame);
#  else
  GC_push_all_stack_partially_eager(lo, hi, cold_gc_frame);
#  endif
}

#endif /* !THREADS */

/*
 * Push enough of the current stack eagerly to ensure that callee-save
 * registers saved in GC frames are scanned.  In the single-threaded case,
 * schedule the entire stack for scanning.  The 2nd argument (`context`)
 * is a pointer to the (possibly `NULL`) thread context, for (currently
 * hypothetical) more precise stack scanning.  In the presence of threads,
 * push enough of the current stack to ensure that callee-save registers
 * saved in collector frames have been seen.
 */
/* TODO: Merge it with per-thread stuff. */
STATIC void
GC_push_current_stack(ptr_t cold_gc_frame, void *context)
{
  UNUSED_ARG(context);
  GC_ASSERT(I_HOLD_LOCK());
#if defined(THREADS)
  /* `cold_gc_frame` is non-`NULL`. */
#  ifdef STACK_GROWS_UP
  GC_push_all_eager(cold_gc_frame, GC_approx_sp());
#  else
  GC_push_all_eager(GC_approx_sp(), cold_gc_frame);
  /*
   * For IA-64, the register stack backing store is handled in the
   * thread-specific code.
   */
#  endif
#else
  GC_push_all_stack_part_eager_sections(GC_approx_sp(), GC_stackbottom,
                                        cold_gc_frame, GC_traced_stack_sect);
#  ifdef IA64
  /*
   * We also need to push the register stack backing store.
   * This should really be done in the same way as the regular stack.
   * For now we fudge it a bit.  Note that the backing store grows up,
   * so we cannot use `GC_push_all_stack_partially_eager`.
   */
  {
    ptr_t bsp = GC_save_regs_ret_val;
    ptr_t cold_gc_bs_pointer = bsp - 2048;
    if (GC_all_interior_pointers
        && ADDR_LT(GC_register_stackbottom, cold_gc_bs_pointer)) {
      /*
       * Adjust `cold_gc_bs_pointer` if below our innermost
       * "traced stack section" in backing store.
       */
      if (GC_traced_stack_sect != NULL
          && ADDR_LT(cold_gc_bs_pointer,
                     GC_traced_stack_sect->backing_store_end)) {
        cold_gc_bs_pointer = GC_traced_stack_sect->backing_store_end;
      }
      GC_push_all_register_sections(GC_register_stackbottom,
                                    cold_gc_bs_pointer, FALSE,
                                    GC_traced_stack_sect);
      GC_push_all_eager(cold_gc_bs_pointer, bsp);
    } else {
      GC_push_all_register_sections(GC_register_stackbottom, bsp,
                                    TRUE /* `eager` */, GC_traced_stack_sect);
    }
    /*
     * All values should be sufficiently aligned that we do not have to
     * worry about the boundary.
     */
  }
#  elif defined(E2K)
  /* We also need to push procedure stack store.  Procedure stack grows up. */
  {
    ptr_t bs_lo;
    size_t stack_size;

    /* TODO: Support `ps_ofs` here and in `GC_do_blocking_inner`. */
    GET_PROCEDURE_STACK_LOCAL(0, &bs_lo, &stack_size);
    GC_push_all_eager(bs_lo, bs_lo + stack_size);
  }
#  endif
#endif /* !THREADS */
}

GC_INNER void (*GC_push_typed_structures)(void) = 0;

GC_INNER void
GC_cond_register_dynamic_libraries(void)
{
  GC_ASSERT(I_HOLD_LOCK());
#if defined(DYNAMIC_LOADING) && !defined(MSWIN_XBOX1) || defined(ANY_MSWIN)
  GC_remove_tmp_roots();
  if (!GC_no_dls)
    GC_register_dynamic_libraries();
#else
  GC_no_dls = TRUE;
#endif
}

STATIC void
GC_push_regs_and_stack(ptr_t cold_gc_frame)
{
  GC_ASSERT(I_HOLD_LOCK());
#ifdef THREADS
  if (NULL == cold_gc_frame) {
    /* `GC_push_all_stacks` should push registers and stack. */
    return;
  }
#endif
  GC_with_callee_saves_pushed(GC_push_current_stack, cold_gc_frame);
}

GC_INNER void
GC_push_roots(GC_bool all, ptr_t cold_gc_frame)
{
  size_t i;
  unsigned kind;

  GC_ASSERT(I_HOLD_LOCK());

  /* The initialization is needed for `GC_push_all_stacks`. */
  GC_ASSERT(GC_is_initialized);

  /*
   * Next push static data.  This must happen early on, since it is not
   * robust against mark stack overflow.  Re-register dynamic libraries,
   * in case one got added.  There is some argument for doing this as late
   * as possible, especially on Win32, where it can change asynchronously.
   * In those cases, we do it here.  But on other platforms, it is not safe
   * with the world stopped, so we do it earlier.
   */
#if !defined(REGISTER_LIBRARIES_EARLY)
  GC_cond_register_dynamic_libraries();
#endif

  /* Mark everything in static data areas. */
  for (i = 0; i < n_root_sets; i++) {
    GC_push_conditional_with_exclusions(GC_static_roots[i].r_start,
                                        GC_static_roots[i].r_end, all);
  }

  /*
   * Mark all free-list header blocks, if those were allocated from
   * the garbage-collected heap.  This makes sure they do not disappear
   * if we are not marking from static data.  It also saves us the trouble
   * of scanning them, and possibly that of marking the free lists.
   */
  for (kind = 0; kind < GC_n_kinds; kind++) {
    const void *base = GC_base(GC_obj_kinds[kind].ok_freelist);

    if (base != NULL) {
      GC_set_mark_bit(base);
    }
  }

  /*
   * Mark from the collector internal roots if those might otherwise
   * have been excluded.
   */
#ifndef GC_NO_FINALIZATION
  GC_push_finalizer_structures();
#endif
#ifdef THREADS
  if (GC_no_dls || GC_roots_were_cleared)
    GC_push_thread_structures();
#endif
  if (GC_push_typed_structures) {
    GC_push_typed_structures();
  }

#if defined(THREAD_LOCAL_ALLOC)
  /*
   * Mark thread-local free lists, even if their mark descriptor excludes
   * the link field.  If the world is not stopped, this is unsafe.
   * It is also unnecessary, since we will do this again with the world
   * stopped.
   */
  if (GC_world_stopped) {
    GC_mark_thread_local_free_lists();
  }
#endif

  /*
   * Now traverse stacks, and mark from register contents.
   * These must be done last, since they can legitimately overflow
   * the mark stack.  This is usually done by saving the current
   * context on the stack, and then just tracing from the stack.
   */
#ifdef STACK_NOT_SCANNED
  UNUSED_ARG(cold_gc_frame);
#else
  GC_push_regs_and_stack(cold_gc_frame);
#endif

  if (GC_push_other_roots != 0) {
    /*
     * In the multi-threaded case, this also pushes thread stacks.
     * Note that without the interior pointers recognition lots of stuff
     * may have already been pushed, and this should be careful about
     * mark stack overflows.
     */
    (*GC_push_other_roots)();
  }
}

/*
 * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
 * Copyright (c) 1991-1996 by Xerox Corporation.  All rights reserved.
 * Copyright (c) 1996-1999 by Silicon Graphics.  All rights reserved.
 * Copyright (c) 1999-2004 Hewlett-Packard Development Company, L.P.
 * Copyright (c) 2009-2025 Ivan Maidanski
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */


#ifdef ENABLE_DISCLAIM


#endif

#ifdef PARALLEL_MARK
GC_INNER GC_signed_word GC_fl_builder_count = 0;
#endif

/*
 * We defer printing of leaked objects until we are done with the
 * collection cycle, since the routine for printing objects needs
 * to run outside the collector, e.g. without the allocator lock.
 */

#ifndef NO_FIND_LEAK
#  ifndef MAX_LEAKED
#    define MAX_LEAKED 40
#  endif
STATIC ptr_t GC_leaked[MAX_LEAKED] = { NULL };
#endif

#if !defined(EAGER_SWEEP) && defined(ENABLE_DISCLAIM)
STATIC void GC_reclaim_unconditionally_marked(void);
#endif

#ifndef SHORT_DBG_HDRS


#  ifndef MAX_SMASHED
#    define MAX_SMASHED 20
#  endif

/*
 * List of smashed (clobbered) locations.  We defer printing these,
 * since we cannot always print them nicely with the allocator lock held.
 * We put them here instead of in `GC_arrays`, since it may be useful to
 * be able to look at them with the debugger.
 */
STATIC ptr_t GC_smashed[MAX_SMASHED] = { NULL };

GC_INNER void
GC_add_smashed(ptr_t smashed)
{
  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT(GC_is_marked(GC_base(smashed)));
  /* FIXME: Prevent adding an object while printing smashed list. */
  GC_smashed[GC_n_smashed] = smashed;
  /*
   * In case of overflow, we keep the first `MAX_SMASHED - 1` entries
   * plus the last one.
   */
  if (GC_n_smashed < MAX_SMASHED - 1)
    ++GC_n_smashed;
  GC_SET_HAVE_ERRORS();
}

GC_INNER void
GC_print_smashed_obj(const char *msg, void *p, ptr_t clobbered)
{
  oh *ohdr = (oh *)GC_base(p);

  GC_ASSERT(I_DONT_HOLD_LOCK());
#  ifdef LINT2
  if (!ohdr)
    ABORT("Invalid GC_print_smashed_obj argument");
#  endif
  if (ADDR_GE((ptr_t)(&ohdr->oh_sz), clobbered) || NULL == ohdr->oh_string) {
    GC_err_printf("%s %p in or near object at %p(<smashed>, appr. sz= %lu)\n",
                  msg, (void *)clobbered, p,
                  (unsigned long)(GC_size(ohdr) - DEBUG_BYTES));
  } else {
    GC_err_printf("%s %p in or near object at %p (%s:%d, sz= %lu)\n", msg,
                  (void *)clobbered, p,
                  ADDR(ohdr->oh_string) < HBLKSIZE ? "(smashed string)"
                  : ohdr->oh_string[0] == '\0'     ? "EMPTY(smashed?)"
                                                   : ohdr->oh_string,
                  GET_OH_LINENUM(ohdr), (unsigned long)ohdr->oh_sz);
    PRINT_CALL_CHAIN(ohdr);
  }
}

GC_INNER void
GC_print_all_smashed_proc(void)
{
  unsigned i;

  GC_ASSERT(I_DONT_HOLD_LOCK());
  if (GC_n_smashed == 0)
    return;
  GC_err_printf("GC_check_heap_block: found %u smashed heap objects:\n",
                GC_n_smashed);
  for (i = 0; i < GC_n_smashed; ++i) {
    ptr_t base = (ptr_t)GC_base(GC_smashed[i]);

#  ifdef LINT2
    if (!base)
      ABORT("Invalid GC_smashed element");
#  endif
    GC_print_smashed_obj("", base + sizeof(oh), GC_smashed[i]);
    GC_smashed[i] = 0;
  }
  GC_n_smashed = 0;
}

GC_INNER int
GC_has_other_debug_info(ptr_t base)
{
  ptr_t body = (ptr_t)((oh *)base + 1);
  size_t sz = GC_size(base);

  if (HBLKPTR(base) != HBLKPTR(body) || sz < DEBUG_BYTES + EXTRA_BYTES) {
    return 0;
  }
  if (((oh *)base)->oh_sf != (START_FLAG ^ (GC_uintptr_t)body)
      && ((GC_uintptr_t *)base)[BYTES_TO_PTRS(sz) - 1]
             != (END_FLAG ^ (GC_uintptr_t)body)) {
    return 0;
  }
  if (((oh *)base)->oh_sz == (GC_uintptr_t)sz) {
    /* Object may have had debug info, but has been deallocated. */
    return -1;
  }
  return 1;
}
#endif /* !SHORT_DBG_HDRS */

GC_INNER void
GC_default_print_heap_obj_proc(ptr_t p)
{
  ptr_t base = (ptr_t)GC_base(p);
  int kind = HDR(base)->hb_obj_kind;

  GC_err_printf("object at %p of appr. %lu bytes (%s)\n", (void *)base,
                (unsigned long)GC_size(base),
                kind == PTRFREE          ? "atomic"
                : IS_UNCOLLECTABLE(kind) ? "uncollectable"
                                         : "composite");
}

GC_INNER void (*GC_print_heap_obj)(ptr_t p) = GC_default_print_heap_obj_proc;

#if !defined(NO_FIND_LEAK) || !defined(SHORT_DBG_HDRS)
GC_INNER void
GC_print_all_errors(void)
{
  static GC_bool printing_errors = FALSE;
  GC_bool have_errors;
#  ifndef NO_FIND_LEAK
  unsigned i, n_leaked;
  ptr_t leaked[MAX_LEAKED];
#  endif

  LOCK();
  if (printing_errors) {
    UNLOCK();
    return;
  }
  have_errors = get_have_errors();
  printing_errors = TRUE;
#  ifndef NO_FIND_LEAK
  n_leaked = GC_n_leaked;
  if (n_leaked > 0) {
    GC_ASSERT(n_leaked <= MAX_LEAKED);
    BCOPY(GC_leaked, leaked, n_leaked * sizeof(ptr_t));
    GC_n_leaked = 0;
    BZERO(GC_leaked, n_leaked * sizeof(ptr_t));
  }
#  endif
  UNLOCK();

  if (GC_debugging_started) {
    GC_print_all_smashed();
  } else {
    have_errors = FALSE;
  }

#  ifndef NO_FIND_LEAK
  if (n_leaked > 0) {
    GC_err_printf("Found %u leaked objects:\n", n_leaked);
    have_errors = TRUE;
  }
  for (i = 0; i < n_leaked; i++) {
    ptr_t p = leaked[i];

#    ifndef SKIP_LEAKED_OBJECTS_PRINTING
    GC_print_heap_obj(p);
#    endif
    GC_free(p);
  }
#  endif

  if (have_errors
#  ifndef GC_ABORT_ON_LEAK
      && GETENV("GC_ABORT_ON_LEAK") != NULL
#  endif
  ) {
    ABORT("Leaked or smashed objects encountered");
  }

  LOCK();
  printing_errors = FALSE;
  UNLOCK();
}
#endif

/* The reclaim phase. */

GC_INNER GC_bool
GC_block_empty(const hdr *hhdr)
{
  return 0 == hhdr->hb_n_marks;
}

STATIC GC_bool
GC_block_nearly_full(const hdr *hhdr, size_t sz)
{
  return hhdr->hb_n_marks > HBLK_OBJS(sz) * 7 / 8;
}

/*
 * TODO: This should perhaps again be specialized for `USE_MARK_BYTES`
 * and `USE_MARK_BITS` cases.
 */

GC_INLINE ptr_t
GC_clear_block(ptr_t q, size_t sz, word *pcount)
{
  ptr_t *p = (ptr_t *)q;
  ptr_t plim = q + sz;

  /* Clear object, advance `p` to next object in the process. */
#ifdef USE_MARK_BYTES
  GC_ASSERT((sz & 1) == 0);
  GC_ASSERT((ADDR(p) & (2 * sizeof(ptr_t) - 1)) == 0);
  p[1] = NULL; /*< but do not clear link field */
  for (p += 2; ADDR_LT((ptr_t)p, plim); p += 2) {
    CLEAR_DOUBLE(p);
  }
#else
  /* Skip link field. */
  p++;

  while (ADDR_LT((ptr_t)p, plim)) {
    *p++ = NULL;
  }
#endif
  *pcount += sz;
  return (ptr_t)p;
}

/*
 * Restore unmarked small objects in `h` of size `sz` (in bytes) to the
 * object free list.  Returns the new list.  Clears unmarked objects.
 */
STATIC ptr_t
GC_reclaim_clear(struct hblk *hbp, const hdr *hhdr, size_t sz, ptr_t list,
                 word *pcount)
{
  size_t bit_no;
  ptr_t p, plim;

  GC_ASSERT(hhdr == GC_find_header(hbp));
#ifndef THREADS
  GC_ASSERT(sz == hhdr->hb_sz);
#else
  /* Skip the assertion because of a potential race with `GC_realloc`. */
#endif
  GC_ASSERT((sz & (sizeof(ptr_t) - 1)) == 0);

  /* Go through all objects in the block. */
  p = hbp->hb_body;
  plim = p + HBLKSIZE - sz;
  for (bit_no = 0; ADDR_GE(plim, p); bit_no += MARK_BIT_OFFSET(sz)) {
    if (mark_bit_from_hdr(hhdr, bit_no)) {
      p += sz;
    } else {
      /* The object is available - put it on list. */
      obj_link(p) = list;
      list = p;
      FREE_PROFILER_HOOK(p);
      p = GC_clear_block(p, sz, pcount);
    }
  }
  return list;
}

/* The same thing as `GC_reclaim_clear`, but do not clear objects. */
STATIC ptr_t
GC_reclaim_uninit(struct hblk *hbp, const hdr *hhdr, size_t sz, ptr_t list,
                  word *pcount)
{
  size_t bit_no;
  word n_bytes_found = 0;
  ptr_t p, plim;

#ifndef THREADS
  GC_ASSERT(sz == hhdr->hb_sz);
#endif

  /* Go through all objects in the block. */
  p = hbp->hb_body;
  plim = (ptr_t)hbp + HBLKSIZE - sz;
  for (bit_no = 0; ADDR_GE(plim, p); bit_no += MARK_BIT_OFFSET(sz), p += sz) {
    if (!mark_bit_from_hdr(hhdr, bit_no)) {
      n_bytes_found += sz;
      /* The object is available - put it on list. */
      obj_link(p) = list;
      list = p;
      FREE_PROFILER_HOOK(p);
    }
  }
  *pcount += n_bytes_found;
  return list;
}

#ifdef ENABLE_DISCLAIM
/*
 * Call reclaim notifier for block's kind on each unmarked object in block,
 * all within a pair of corresponding enter/leave callbacks.
 */
STATIC ptr_t
GC_disclaim_and_reclaim(struct hblk *hbp, hdr *hhdr, size_t sz, ptr_t list,
                        word *pcount)
{
  size_t bit_no;
  ptr_t p, plim;
  int(GC_CALLBACK * disclaim)(void *)
      = GC_obj_kinds[hhdr->hb_obj_kind].ok_disclaim_proc;

  GC_ASSERT(disclaim != 0);
#  ifndef THREADS
  GC_ASSERT(sz == hhdr->hb_sz);
#  endif
  p = hbp->hb_body;
  plim = p + HBLKSIZE - sz;

  for (bit_no = 0; ADDR_GE(plim, p); bit_no += MARK_BIT_OFFSET(sz)) {
    if (mark_bit_from_hdr(hhdr, bit_no)) {
      p += sz;
    } else if (disclaim(p)) {
      set_mark_bit_from_hdr(hhdr, bit_no);
      INCR_MARKS(hhdr);
      p += sz;
    } else {
      obj_link(p) = list;
      list = p;
      FREE_PROFILER_HOOK(p);
      p = GC_clear_block(p, sz, pcount);
    }
  }
  return list;
}
#endif /* ENABLE_DISCLAIM */

#ifndef NO_FIND_LEAK

#  ifndef SHORT_DBG_HDRS
STATIC GC_bool
GC_check_leaked(ptr_t base)
{
  size_t i;
  size_t lpw;
  ptr_t *p;

  if (
#    if defined(KEEP_BACK_PTRS) || defined(MAKE_BACK_GRAPH)
      (*(GC_uintptr_t *)base & 1) != 0 &&
#    endif
      GC_has_other_debug_info(base) >= 0)
    return TRUE; /*< object has leaked */

  /* Validate freed object's content. */
  p = (ptr_t *)(base + sizeof(oh));
  lpw = BYTES_TO_PTRS(HDR(base)->hb_sz - sizeof(oh));
  for (i = 0; i < lpw; ++i)
    if ((GC_uintptr_t)p[i] != GC_FREED_MEM_MARKER) {
      /* Do not reclaim it in this cycle. */
      GC_set_mark_bit(base);
      /* Alter-after-free has been detected. */
      GC_add_smashed((ptr_t)(&p[i]));
      /* Do not report any other smashed locations in the object. */
      break;
    }

  return FALSE; /*< `GC_debug_free()` has been called */
}
#  endif /* !SHORT_DBG_HDRS */

GC_INLINE void
GC_add_leaked(ptr_t leaked)
{
  GC_ASSERT(I_HOLD_LOCK());
#  ifndef SHORT_DBG_HDRS
  if (GC_findleak_delay_free && !GC_check_leaked(leaked))
    return;
#  endif

  GC_SET_HAVE_ERRORS();
  if (GC_n_leaked < MAX_LEAKED) {
    GC_leaked[GC_n_leaked++] = leaked;
    /* Make sure it is not reclaimed this cycle. */
    GC_set_mark_bit(leaked);
  }
}

/* Do not really reclaim objects, just check for unmarked ones. */
STATIC void
GC_reclaim_check(struct hblk *hbp, const hdr *hhdr, size_t sz)
{
  size_t bit_no;
  ptr_t p, plim;

#  ifndef THREADS
  GC_ASSERT(sz == hhdr->hb_sz);
#  endif
  /* Go through all objects in the block. */
  p = hbp->hb_body;
  plim = p + HBLKSIZE - sz;
  for (bit_no = 0; ADDR_GE(plim, p); bit_no += MARK_BIT_OFFSET(sz), p += sz) {
    if (!mark_bit_from_hdr(hhdr, bit_no))
      GC_add_leaked(p);
  }
}

#endif /* !NO_FIND_LEAK */

/*
 * Is a pointer-free block?  Same as `IS_PTRFREE()` macro but uses
 * unordered atomic access to avoid racing with `GC_realloc`.
 */
#ifdef AO_HAVE_load
#  define IS_PTRFREE_SAFE(hhdr) (AO_load((AO_t *)&(hhdr)->hb_descr) == 0)
#else
/*
 * No race as `GC_realloc` holds the allocator lock when updating
 * `hb_descr` field.
 */
#  define IS_PTRFREE_SAFE(hhdr) IS_PTRFREE(hhdr)
#endif

GC_INNER ptr_t
GC_reclaim_generic(struct hblk *hbp, hdr *hhdr, size_t sz, GC_bool init,
                   ptr_t list, word *pcount)
{
  ptr_t result;

#ifndef PARALLEL_MARK
  GC_ASSERT(I_HOLD_LOCK());
#endif
  GC_ASSERT(GC_find_header(hbp) == hhdr);
#ifndef GC_DISABLE_INCREMENTAL
  GC_remove_protection(hbp, 1, IS_PTRFREE_SAFE(hhdr));
#endif
#ifdef ENABLE_DISCLAIM
  if ((hhdr->hb_flags & HAS_DISCLAIM) != 0) {
    result = GC_disclaim_and_reclaim(hbp, hhdr, sz, list, pcount);
  } else
#endif
  /* else */ {
    if (init || GC_debugging_started) {
      result = GC_reclaim_clear(hbp, hhdr, sz, list, pcount);
    } else {
#ifndef AO_HAVE_load
      GC_ASSERT(IS_PTRFREE(hhdr));
#endif
      result = GC_reclaim_uninit(hbp, hhdr, sz, list, pcount);
    }
  }
  if (IS_UNCOLLECTABLE(hhdr->hb_obj_kind))
    GC_set_hdr_marks(hhdr);
  return result;
}

/*
 * Restore unmarked small objects in the block pointed to by `hbp` to
 * the appropriate object free list.  If entirely empty blocks are to
 * be completely deallocated, then caller should perform that check.
 */
STATIC void
GC_reclaim_small_nonempty_block(struct hblk *hbp, size_t sz,
                                GC_bool report_if_found)
{
  hdr *hhdr;

  GC_ASSERT(I_HOLD_LOCK());
  hhdr = HDR(hbp);
  hhdr->hb_last_reclaimed = (unsigned short)GC_gc_no;
  if (report_if_found) {
#ifndef NO_FIND_LEAK
    GC_reclaim_check(hbp, hhdr, sz);
#endif
  } else {
    struct obj_kind *ok = &GC_obj_kinds[hhdr->hb_obj_kind];
    void **flh = &ok->ok_freelist[BYTES_TO_GRANULES(sz)];

    *flh = GC_reclaim_generic(hbp, hhdr, sz, ok->ok_init, (ptr_t)(*flh),
                              (/* unsigned */ word *)&GC_bytes_found);
  }
}

#ifdef ENABLE_DISCLAIM
STATIC void
GC_disclaim_and_reclaim_or_free_small_block(struct hblk *hbp)
{
  hdr *hhdr;
  size_t sz;
  struct obj_kind *ok;
  void **flh;
  void *flh_next;

  GC_ASSERT(I_HOLD_LOCK());
  hhdr = HDR(hbp);
  sz = hhdr->hb_sz;
  ok = &GC_obj_kinds[hhdr->hb_obj_kind];
  flh = &ok->ok_freelist[BYTES_TO_GRANULES(sz)];

  hhdr->hb_last_reclaimed = (unsigned short)GC_gc_no;
  flh_next = GC_reclaim_generic(hbp, hhdr, sz, ok->ok_init, (ptr_t)(*flh),
                                (/* unsigned */ word *)&GC_bytes_found);
  if (hhdr->hb_n_marks) {
    *flh = flh_next;
  } else {
    GC_ASSERT(hbp == hhdr->hb_block);
    GC_bytes_found += (GC_signed_word)HBLKSIZE;
    GC_freehblk(hbp);
  }
}
#endif /* ENABLE_DISCLAIM */

/*
 * Restore an unmarked large object or an entirely empty block of
 * small objects to the heap block free list.  Otherwise enqueue the
 * block for later processing by `GC_reclaim_small_nonempty_block()`.
 * If `report_if_found` is `TRUE`, then process any block immediately,
 * and simply report free objects; do not actually reclaim them.
 */
STATIC void GC_CALLBACK
GC_reclaim_block(struct hblk *hbp, void *report_if_found)
{
  hdr *hhdr;
  size_t sz; /*< size of objects in current block */
  struct obj_kind *ok;

  GC_ASSERT(I_HOLD_LOCK());
#if defined(CPPCHECK)
  GC_noop1_ptr(report_if_found);
#endif
  hhdr = HDR(hbp);
  ok = &GC_obj_kinds[hhdr->hb_obj_kind];
#ifdef AO_HAVE_load
  /* Atomic access is used to avoid racing with `GC_realloc`. */
  sz = AO_load((volatile AO_t *)&hhdr->hb_sz);
#else
  /*
   * No race as `GC_realloc` holds the allocator lock while
   * updating `hb_sz`.
   */
  sz = hhdr->hb_sz;
#endif
  if (sz > MAXOBJBYTES) {
    /* The case of 1 big object. */
    if (!mark_bit_from_hdr(hhdr, 0)) {
      if (report_if_found) {
        GC_ASSERT(hbp == hhdr->hb_block);
#ifndef NO_FIND_LEAK
        GC_add_leaked((ptr_t)hbp);
#endif
      } else {
#ifdef ENABLE_DISCLAIM
        if (UNLIKELY((hhdr->hb_flags & HAS_DISCLAIM) != 0)) {
          if (ok->ok_disclaim_proc(hbp)) {
            /* Not disclaimed, thus resurrect the object. */
            set_mark_bit_from_hdr(hhdr, 0);
            goto in_use;
          }
        }
#endif
        GC_ASSERT(hbp == hhdr->hb_block);
        if (sz > HBLKSIZE) {
          GC_large_allocd_bytes -= HBLKSIZE * OBJ_SZ_TO_BLOCKS(sz);
        }
        GC_bytes_found += (GC_signed_word)sz;
        GC_freehblk(hbp);
        FREE_PROFILER_HOOK(hbp);
      }
    } else {
#ifdef ENABLE_DISCLAIM
    in_use:
#endif
      if (IS_PTRFREE_SAFE(hhdr)) {
        GC_atomic_in_use += sz;
      } else {
        GC_composite_in_use += sz;
      }
    }
  } else {
    GC_bool empty = GC_block_empty(hhdr);

#ifdef PARALLEL_MARK
    /*
     * Count can be low or one too high because we sometimes have to
     * ignore decrements.  Objects can also potentially be repeatedly
     * marked by each marker.  Here we assume 3 markers at most, but
     * this is extremely unlikely to fail spuriously with more.
     * And if it does, it should be looked at.
     */
    GC_ASSERT(sz != 0
              && (GC_markers_m1 > 1 ? 3 : GC_markers_m1 + 1)
                             * (HBLKSIZE / sz + 1)
                         + 16
                     >= hhdr->hb_n_marks);
#else
    GC_ASSERT(sz * hhdr->hb_n_marks <= HBLKSIZE);
#endif
#ifdef VALGRIND_TRACKING
    /*
     * Call `GC_free_profiler_hook()` on freed objects so that
     * a profiling tool could track the allocations.
     */
    {
      ptr_t p = hbp->hb_body;
      ptr_t plim = p + HBLKSIZE - sz;
      size_t bit_no;

      for (bit_no = 0; ADDR_GE(plim, p);
           bit_no += MARK_BIT_OFFSET(sz), p += sz) {
        if (!mark_bit_from_hdr(hhdr, bit_no))
          FREE_PROFILER_HOOK(p);
      }
    }
#endif
    GC_ASSERT(hbp == hhdr->hb_block);
    if (report_if_found) {
      GC_reclaim_small_nonempty_block(hbp, sz, TRUE /* `report_if_found` */);
    } else if (empty) {
#ifdef ENABLE_DISCLAIM
      if ((hhdr->hb_flags & HAS_DISCLAIM) != 0) {
        GC_disclaim_and_reclaim_or_free_small_block(hbp);
      } else
#endif
      /* else */ {
        GC_bytes_found += (GC_signed_word)HBLKSIZE;
        GC_freehblk(hbp);
        FREE_PROFILER_HOOK(hbp);
      }
    } else if (GC_find_leak_inner || !GC_block_nearly_full(hhdr, sz)) {
      /* Group of smaller objects, enqueue the real work. */
      struct hblk **rlh = ok->ok_reclaim_list;

      if (rlh != NULL) {
        rlh += BYTES_TO_GRANULES(sz);
        hhdr->hb_next = *rlh;
        *rlh = hbp;
      }
    } else {
      /* Not worth salvaging. */
    }
    /*
     * We used to do the `GC_block_nearly_full` check later, but we
     * already have the right cache context here.  Also doing it here
     * avoids some silly lock contention in `GC_malloc_many()`.
     */
    if (IS_PTRFREE_SAFE(hhdr)) {
      GC_atomic_in_use += (word)sz * hhdr->hb_n_marks;
    } else {
      GC_composite_in_use += (word)sz * hhdr->hb_n_marks;
    }
  }
}

#if !defined(NO_DEBUGGING)
/*
 * Routines to gather and print heap block info intended for debugging.
 * Otherwise should be called with the allocator lock held.
 */

struct Print_stats {
  size_t number_of_blocks;
  size_t total_bytes;
};

EXTERN_C_BEGIN /*< to avoid "no previous prototype" clang warning */
    unsigned
    GC_n_set_marks(const hdr *);
EXTERN_C_END

#  ifdef USE_MARK_BYTES
/*
 * Return the number of set mark bits in the given header.
 * Remains externally visible as used by GNU `gcj` currently.
 * There could be a race between `GC_clear_hdr_marks` and this
 * function but the latter is for a debug purpose.
 */
GC_ATTR_NO_SANITIZE_THREAD
unsigned
GC_n_set_marks(const hdr *hhdr)
{
  unsigned result = 0;
  size_t i;
  size_t offset = MARK_BIT_OFFSET(hhdr->hb_sz);
  size_t limit = FINAL_MARK_BIT(hhdr->hb_sz);

  for (i = 0; i < limit; i += offset) {
    result += (unsigned)hhdr->hb_marks[i];
  }

  /* The one should be set past the end. */
  GC_ASSERT(hhdr->hb_marks[limit]);
  return result;
}

#  else
/* Number of set bits in a word.  Not performance critical. */
static unsigned
count_ones(word v)
{
  unsigned result = 0;

  for (; v > 0; v >>= 1) {
    if (v & 1)
      result++;
  }
  return result;
}

unsigned
GC_n_set_marks(const hdr *hhdr)
{
  unsigned result = 0;
  size_t i;
#    ifdef MARK_BIT_PER_OBJ
  size_t n_objs = HBLK_OBJS(hhdr->hb_sz);
  size_t n_mark_words = divWORDSZ(n_objs > 0 ? n_objs : 1); /*< round down */

  for (i = 0; i <= n_mark_words; i++) {
    result += count_ones(hhdr->hb_marks[i]);
  }
#    else

  for (i = 0; i < HB_MARKS_SZ; i++) {
    result += count_ones(hhdr->hb_marks[i]);
  }
#    endif
  GC_ASSERT(result > 0);
  /* Exclude the one bit set past the end. */
  result--;

#    ifndef MARK_BIT_PER_OBJ
  if (IS_UNCOLLECTABLE(hhdr->hb_obj_kind)) {
    size_t lg = BYTES_TO_GRANULES(hhdr->hb_sz);

    /*
     * As mentioned in `GC_set_hdr_marks`, all the bits are set instead of
     * every `n`-th, thus the result should be adjusted.
     */
    GC_ASSERT((unsigned)lg != 0 && result % lg == 0);
    result /= (unsigned)lg;
  }
#    endif
  return result;
}
#  endif /* !USE_MARK_BYTES */

GC_API unsigned GC_CALL
GC_count_set_marks_in_hblk(const void *p)
{
  return GC_n_set_marks(HDR(p));
}

STATIC void GC_CALLBACK
GC_print_block_descr(struct hblk *h, void *raw_ps)
{
  const hdr *hhdr = HDR(h);
  size_t sz = hhdr->hb_sz;
  struct Print_stats *ps = (struct Print_stats *)raw_ps;
  size_t n_marks = (size_t)GC_n_set_marks(hhdr);
  size_t n_objs = HBLK_OBJS(sz);

#  ifndef PARALLEL_MARK
  GC_ASSERT(hhdr->hb_n_marks == n_marks);
#  endif
#  if defined(CPPCHECK)
  GC_noop1_ptr(h);
#  endif
  GC_ASSERT((n_objs > 0 ? n_objs : 1) >= n_marks);
  GC_printf("%u,%u,%u,%u\n", hhdr->hb_obj_kind, (unsigned)sz,
            (unsigned)n_marks, (unsigned)n_objs);
  ps->number_of_blocks++;
  ps->total_bytes += (sz + HBLKSIZE - 1) & ~(HBLKSIZE - 1); /*< round up */
}

void
GC_print_block_list(void)
{
  struct Print_stats pstats;

  GC_printf("kind(0=ptrfree/1=normal/2=unc.),"
            "obj_sz,#marks_set,#objs_in_block\n");
  BZERO(&pstats, sizeof(pstats));
  GC_apply_to_all_blocks(GC_print_block_descr, &pstats);
  GC_printf("blocks= %lu, total_bytes= %lu\n",
            (unsigned long)pstats.number_of_blocks,
            (unsigned long)pstats.total_bytes);
  if (pstats.total_bytes + GC_large_free_bytes != GC_heapsize)
    GC_err_printf("LOST SOME BLOCKS!! Total bytes should be: %lu\n",
                  (unsigned long)(GC_heapsize - GC_large_free_bytes));
}

GC_API void GC_CALL
GC_print_free_list(int kind, size_t lg)
{
  void *flh_next;
  int n;

  GC_ASSERT(kind < MAXOBJKINDS);
  GC_ASSERT(lg <= MAXOBJGRANULES);
  flh_next = GC_obj_kinds[kind].ok_freelist[lg];
  for (n = 0; flh_next != NULL; n++) {
    GC_printf("Free object in heap block %p [%d]: %p\n",
              (void *)HBLKPTR(flh_next), n, flh_next);
    flh_next = obj_link(flh_next);
  }
}
#endif /* !NO_DEBUGGING */

/*
 * Clear all `obj_link` pointers in the list of free objects `*flp`.
 * Clear `*flp`.  This must be done before dropping a list of free
 * `gcj`-style objects, since may otherwise end up with dangling
 * "descriptor" pointers.  It may help for other pointer-containing
 * objects.
 */
STATIC void
GC_clear_fl_links(void **flp)
{
  void *next;

  for (next = *flp; next != NULL; next = *flp) {
    *flp = NULL;
    flp = &obj_link(next);
  }
}

GC_INNER void
GC_start_reclaim(GC_bool report_if_found)
{
  int kind;

  GC_ASSERT(I_HOLD_LOCK());
#if defined(PARALLEL_MARK)
  GC_ASSERT(0 == GC_fl_builder_count);
#endif
  /* Reset in-use counters.  `GC_reclaim_block` recomputes them. */
  GC_composite_in_use = 0;
  GC_atomic_in_use = 0;

  /* Clear reclaim- and free-lists. */
  for (kind = 0; kind < (int)GC_n_kinds; kind++) {
    struct hblk **rlist = GC_obj_kinds[kind].ok_reclaim_list;
    GC_bool should_clobber = GC_obj_kinds[kind].ok_descriptor != 0;

    if (NULL == rlist) {
      /* Means this object kind is not used. */
      continue;
    }

    if (!report_if_found) {
      void **fop;
      void **lim = &GC_obj_kinds[kind].ok_freelist[MAXOBJGRANULES + 1];

      for (fop = GC_obj_kinds[kind].ok_freelist;
           ADDR_LT((ptr_t)fop, (ptr_t)lim); fop++) {
        if (*fop != NULL) {
          if (should_clobber) {
            GC_clear_fl_links(fop);
          } else {
            *fop = NULL;
          }
        }
      }
    } else {
      /* Free-list objects are marked, and it is safe to leave them. */
    }
    BZERO(rlist, (MAXOBJGRANULES + 1) * sizeof(void *));
  }

  /*
   * Go through all heap blocks, and reclaim unmarked objects or enqueue
   * the block for later processing.
   */
  GC_apply_to_all_blocks(GC_reclaim_block, NUMERIC_TO_VPTR(report_if_found));

#ifdef EAGER_SWEEP
  /*
   * This is a very stupid thing to do.  We make it possible anyway.
   */
  GC_reclaim_all((GC_stop_func)0, FALSE);
#elif defined(ENABLE_DISCLAIM)
  /*
   * However, make sure to clear reclaimable objects of kinds with
   * unconditional marking enabled before we do any significant
   * marking work.
   */
  GC_reclaim_unconditionally_marked();
#endif
#if defined(PARALLEL_MARK)
  GC_ASSERT(0 == GC_fl_builder_count);
#endif
}

GC_INNER void
GC_continue_reclaim(size_t lg, int kind)
{
  struct hblk *hbp;
  struct obj_kind *ok = &GC_obj_kinds[kind];
  struct hblk **rlh = ok->ok_reclaim_list;
  void **flh;

  GC_ASSERT(I_HOLD_LOCK());
  if (NULL == rlh) {
    /* No blocks of this kind. */
    return;
  }

  flh = &ok->ok_freelist[lg];
  for (rlh += lg; (hbp = *rlh) != NULL;) {
    const hdr *hhdr = HDR(hbp);

    *rlh = hhdr->hb_next;
    GC_reclaim_small_nonempty_block(hbp, hhdr->hb_sz, FALSE);
    if (*flh != NULL) {
      /* The appropriate free list is nonempty. */
      break;
    }
  }
}

GC_INNER GC_bool
GC_reclaim_all(GC_stop_func stop_func, GC_bool ignore_old)
{
  size_t lg;
  int kind;
  const hdr *hhdr;
  struct hblk *hbp;
  struct hblk **rlp;
  struct hblk **rlh;
#ifndef NO_CLOCK
  CLOCK_TYPE start_time = CLOCK_TYPE_INITIALIZER;

  if (GC_print_stats == VERBOSE)
    GET_TIME(start_time);
#endif
  GC_ASSERT(I_HOLD_LOCK());

  for (kind = 0; kind < (int)GC_n_kinds; kind++) {
    rlp = GC_obj_kinds[kind].ok_reclaim_list;
    if (NULL == rlp)
      continue;

    for (lg = 1; lg <= MAXOBJGRANULES; lg++) {
      for (rlh = rlp + lg; (hbp = *rlh) != NULL;) {
        if (stop_func != (GC_stop_func)0 && (*stop_func)()) {
          return FALSE;
        }
        hhdr = HDR(hbp);
        *rlh = hhdr->hb_next;
        if (!ignore_old || (word)hhdr->hb_last_reclaimed == GC_gc_no - 1) {
          /*
           * It is likely we will need it this time, too.  It has been
           * touched recently, so this should not trigger paging.
           */
          GC_reclaim_small_nonempty_block(hbp, hhdr->hb_sz, FALSE);
        }
      }
    }
  }
#ifndef NO_CLOCK
  if (GC_print_stats == VERBOSE) {
    CLOCK_TYPE done_time;

    GET_TIME(done_time);
    GC_verbose_log_printf("Disposing of reclaim lists took %lu ms %lu ns\n",
                          MS_TIME_DIFF(done_time, start_time),
                          NS_FRAC_TIME_DIFF(done_time, start_time));
  }
#endif
  return TRUE;
}

#if !defined(EAGER_SWEEP) && defined(ENABLE_DISCLAIM)
/*
 * We do an eager sweep on heap blocks where unconditional marking has
 * been enabled, so that any reclaimable objects have been reclaimed
 * before we start marking.  This is a simplified `GC_reclaim_all`
 * restricted to kinds where `ok_mark_unconditionally` is `TRUE`.
 */
STATIC void
GC_reclaim_unconditionally_marked(void)
{
  int kind;

  GC_ASSERT(I_HOLD_LOCK());
  for (kind = 0; kind < (int)GC_n_kinds; kind++) {
    size_t lg;
    struct obj_kind *ok = &GC_obj_kinds[kind];
    struct hblk **rlp = ok->ok_reclaim_list;

    if (NULL == rlp || !ok->ok_mark_unconditionally)
      continue;

    for (lg = 1; lg <= MAXOBJGRANULES; lg++) {
      struct hblk **rlh = rlp + lg;
      struct hblk *hbp;

      while ((hbp = *rlh) != NULL) {
        const hdr *hhdr = HDR(hbp);

        *rlh = hhdr->hb_next;
        GC_reclaim_small_nonempty_block(hbp, hhdr->hb_sz, FALSE);
      }
    }
  }
}
#endif /* !EAGER_SWEEP && ENABLE_DISCLAIM */

struct enumerate_reachable_s {
  GC_reachable_object_proc proc;
  void *client_data;
};

STATIC void GC_CALLBACK
GC_do_enumerate_reachable_objects(struct hblk *hbp, void *ed_ptr)
{
  const hdr *hhdr = HDR(hbp);
  ptr_t p, plim;
  const struct enumerate_reachable_s *ped
      = (struct enumerate_reachable_s *)ed_ptr;
  size_t sz = hhdr->hb_sz;
  size_t bit_no;

  if (GC_block_empty(hhdr))
    return;

  p = hbp->hb_body;
  if (sz > MAXOBJBYTES) {
    /* The case of 1 big object. */
    plim = p;
  } else {
    plim = p + HBLKSIZE - sz;
  }
  /* Go through all objects in the block. */
  for (bit_no = 0; ADDR_GE(plim, p); bit_no += MARK_BIT_OFFSET(sz), p += sz) {
    if (mark_bit_from_hdr(hhdr, bit_no)) {
      ped->proc(p, sz, ped->client_data);
    }
  }
}

GC_API void GC_CALL
GC_enumerate_reachable_objects_inner(GC_reachable_object_proc proc,
                                     void *client_data)
{
  struct enumerate_reachable_s ed;

  GC_ASSERT(I_HOLD_READER_LOCK());
  ed.proc = proc;
  ed.client_data = client_data;
  GC_apply_to_all_blocks(GC_do_enumerate_reachable_objects, &ed);
}

/*
 * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
 * Copyright (c) 1999-2000 by Hewlett-Packard Company.  All rights reserved.
 * Copyright (c) 2008-2025 Ivan Maidanski
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */


/*
 * Some simple primitives for allocation with explicit type information.
 * Simple objects are allocated such that they contain a `GC_descr` at the
 * end (in the last allocated word).  This descriptor may be a procedure
 * which then examines an extended descriptor passed as its environment.
 *
 * Arrays are treated as simple objects if they have sufficiently simple
 * structure.  Otherwise they are allocated from an array kind that supplies
 * a special mark procedure.  These arrays contain a pointer to a
 * `complex_descriptor` as their last "pointer-sized" word.
 * This is done because the environment field is too small, and the collector
 * must trace the `complex_descriptor`.
 *
 * Note that descriptors inside objects may appear cleared, if we encounter
 * a false reference to an object on a free list.  In the case of a simple
 * object, this is OK, since a zero descriptor corresponds to examining no
 * fields.  In the `complex_descriptor` case, we explicitly check for that
 * case.
 *
 * Note: major parts of this code have not been tested at all and are not
 * testable, since they are not accessible through the current interface.
 */

#include "gc/gc_typed.h"

/* Object kind for objects with indirect (possibly extended) descriptors. */
STATIC int GC_explicit_kind = 0;

/*
 * Object kind for objects with complex descriptors and
 * `GC_array_mark_proc`.
 */
STATIC int GC_array_kind = 0;

#define ED_INITIAL_SIZE 100

/* Indices of the typed mark procedures. */
STATIC unsigned GC_typed_mark_proc_index = 0;
STATIC unsigned GC_array_mark_proc_index = 0;

STATIC void
GC_push_typed_structures_proc(void)
{
  GC_PUSH_ALL_SYM(GC_ext_descriptors);
}

/*
 * Add a multi-word bitmap to `GC_ext_descriptors` arrays.
 * Returns starting index on success, -1 otherwise.
 */
STATIC GC_signed_word
GC_add_ext_descriptor(const word *bm, size_t nbits)
{
  GC_signed_word result;
  size_t i;
  size_t nwords = divWORDSZ(nbits + CPP_WORDSZ - 1);

  LOCK();
  while (UNLIKELY(GC_avail_descr + nwords >= GC_ed_size)) {
    typed_ext_descr_t *newExtD;
    size_t new_size;
    size_t ed_size = GC_ed_size;

    if (0 == ed_size) {
      GC_ASSERT(ADDR(&GC_ext_descriptors) % ALIGNMENT == 0);
      GC_push_typed_structures = GC_push_typed_structures_proc;
      UNLOCK();
      new_size = ED_INITIAL_SIZE;
    } else {
      UNLOCK();
      new_size = 2 * ed_size;
      if (new_size > MAX_ENV)
        return -1;
    }
    newExtD = (typed_ext_descr_t *)GC_malloc_atomic(
        new_size * sizeof(typed_ext_descr_t));
    if (NULL == newExtD)
      return -1;
    LOCK();
    if (ed_size == GC_ed_size) {
      if (GC_avail_descr != 0) {
        BCOPY(GC_ext_descriptors, newExtD,
              GC_avail_descr * sizeof(typed_ext_descr_t));
      }
      GC_ed_size = new_size;
      GC_ext_descriptors = newExtD;
    } else {
      /* Another thread is already resized it in the meantime. */
    }
  }
  result = (GC_signed_word)GC_avail_descr;
  for (i = 0; i < nwords - 1; i++) {
    GC_ext_descriptors[(size_t)result + i].ed_bitmap = bm[i];
    GC_ext_descriptors[(size_t)result + i].ed_continued = TRUE;
  }
  /* Clear irrelevant (highest) bits for the last element. */
  GC_ext_descriptors[(size_t)result + i].ed_bitmap
      = bm[i] & (GC_WORD_MAX >> (nwords * CPP_WORDSZ - nbits));
  GC_ext_descriptors[(size_t)result + i].ed_continued = FALSE;
  GC_avail_descr += nwords;
  GC_ASSERT(result >= 0);
  UNLOCK();
  return result;
}

/* Table of bitmap descriptors for `n` pointer-long all-pointer objects. */
STATIC GC_descr GC_bm_table[CPP_WORDSZ / 2] = { 0 };

/*
 * Return a descriptor for the concatenation of 2 objects, each one is
 * `lpw` pointers long and described by descriptor `d`.  The result is
 * known to be short enough to fit into a bitmap descriptor.
 * `d` is a `GC_DS_LENGTH` or `GC_DS_BITMAP` descriptor.
 */
STATIC GC_descr
GC_double_descr(GC_descr d, size_t lpw)
{
  GC_ASSERT(GC_bm_table[0] == GC_DS_BITMAP); /*< `bm` table is initialized */
  if ((d & GC_DS_TAGS) == GC_DS_LENGTH) {
    d = GC_bm_table[BYTES_TO_PTRS(d)];
  }
  d |= (d & ~(GC_descr)GC_DS_TAGS) >> lpw;
  return d;
}

STATIC mse *GC_CALLBACK GC_typed_mark_proc(word *addr, mse *mark_stack_top,
                                           mse *mark_stack_limit, word env);

STATIC mse *GC_CALLBACK GC_array_mark_proc(word *addr, mse *mark_stack_top,
                                           mse *mark_stack_limit, word env);

STATIC void
GC_init_explicit_typing(void)
{
  unsigned i;

  /*
   * Set up object kind with simple indirect descriptor.
   * Descriptor is in the last `word` of the object.
   */
  GC_typed_mark_proc_index = GC_new_proc_inner(GC_typed_mark_proc);
  GC_explicit_kind = (int)GC_new_kind_inner(
      GC_new_free_list_inner(),
      (PTRS_TO_BYTES(GC_WORD_MAX) | GC_DS_PER_OBJECT), TRUE, TRUE);

  /* Set up object kind with array descriptor. */
  GC_array_mark_proc_index = GC_new_proc_inner(GC_array_mark_proc);
  GC_array_kind = (int)GC_new_kind_inner(
      GC_new_free_list_inner(), GC_MAKE_PROC(GC_array_mark_proc_index, 0),
      FALSE, TRUE);

  GC_bm_table[0] = GC_DS_BITMAP;
  for (i = 1; i < CPP_WORDSZ / 2; i++) {
    GC_bm_table[i] = (GC_WORD_MAX << (CPP_WORDSZ - i)) | GC_DS_BITMAP;
  }
}

STATIC mse *GC_CALLBACK
GC_typed_mark_proc(word *addr, mse *mark_stack_top, mse *mark_stack_limit,
                   word env)
{
  word bm;
  ptr_t current_p = (ptr_t)addr;
  ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr;
  ptr_t least_ha = (ptr_t)GC_least_plausible_heap_addr;
  DECLARE_HDR_CACHE;

  /* The allocator lock is held by the collection initiating thread. */
  GC_ASSERT(GC_get_parallel() || I_HOLD_LOCK());
  bm = GC_ext_descriptors[env].ed_bitmap;

  INIT_HDR_CACHE;
  for (; bm != 0; bm >>= 1, current_p += sizeof(ptr_t)) {
    if (bm & 1) {
      ptr_t q;

      LOAD_PTR_OR_CONTINUE(q, current_p);
      FIXUP_POINTER(q);
      if (ADDR_LT(least_ha, q) && ADDR_LT(q, greatest_ha)) {
        PUSH_CONTENTS(q, mark_stack_top, mark_stack_limit, current_p);
      }
    }
  }
  if (GC_ext_descriptors[env].ed_continued) {
    /*
     * Push an entry with the rest of the descriptor back onto the stack.
     * Thus we never do too much work at once.  Note that we also cannot
     * overflow the mark stack unless we actually mark something.
     */
    mark_stack_top = GC_custom_push_proc(
        GC_MAKE_PROC(GC_typed_mark_proc_index, env + 1),
        (ptr_t *)addr + CPP_WORDSZ, mark_stack_top, mark_stack_limit);
  }
  return mark_stack_top;
}

GC_API GC_descr GC_CALL
GC_make_descriptor(const GC_word *bm, size_t len)
{
  GC_signed_word last_set_bit = (GC_signed_word)len - 1;
  GC_descr d;

#if defined(AO_HAVE_load_acquire) && defined(AO_HAVE_store_release)
  if (UNLIKELY(!AO_load_acquire(&GC_explicit_typing_initialized))) {
    LOCK();
    if (!GC_explicit_typing_initialized) {
      GC_init_explicit_typing();
      AO_store_release(&GC_explicit_typing_initialized, TRUE);
    }
    UNLOCK();
  }
#else
  LOCK();
  if (UNLIKELY(!GC_explicit_typing_initialized)) {
    GC_init_explicit_typing();
    GC_explicit_typing_initialized = TRUE;
  }
  UNLOCK();
#endif

  while (last_set_bit >= 0 && !GC_get_bit(bm, (word)last_set_bit))
    last_set_bit--;
  if (last_set_bit < 0) {
    /* No pointers. */
    return 0;
  }

#if ALIGNMENT == CPP_PTRSZ / 8
  {
    GC_signed_word i;

    for (i = 0; i < last_set_bit; i++) {
      if (!GC_get_bit(bm, (word)i))
        break;
    }
    if (i == last_set_bit) {
      /*
       * The initial section contains all pointers; use the length
       * descriptor.
       */
      return PTRS_TO_BYTES((word)last_set_bit + 1) | GC_DS_LENGTH;
    }
  }
#endif
  if (last_set_bit < BITMAP_BITS) {
    GC_signed_word i;

    /*
     * Hopefully the common case.  Build the bitmap descriptor (with the
     * bits reversed).
     */
    d = SIGNB;
    for (i = last_set_bit - 1; i >= 0; i--) {
      d >>= 1;
      if (GC_get_bit(bm, (word)i))
        d |= SIGNB;
    }
    d |= GC_DS_BITMAP;
  } else {
    GC_signed_word index = GC_add_ext_descriptor(bm, (size_t)last_set_bit + 1);

    if (UNLIKELY(index < 0)) {
      /* Out of memory: use a conservative approximation. */
      return PTRS_TO_BYTES((word)last_set_bit + 1) | GC_DS_LENGTH;
    }
#ifdef LINT2
    if ((word)index > MAX_ENV)
      ABORT("GC_add_ext_descriptor() result cannot exceed MAX_ENV");
#endif
    d = GC_MAKE_PROC(GC_typed_mark_proc_index, index);
  }
  return d;
}

static void
set_obj_descr(ptr_t op, GC_descr d)
{
  size_t sz;

  if (UNLIKELY(NULL == op))
    return;
  /*
   * It is not safe to use `GC_size_map[]` here as the table might be
   * updated asynchronously.
   */
  sz = GC_size(op);

  GC_ASSERT((sz & (GC_GRANULE_BYTES - 1)) == 0 && sz > sizeof(GC_descr));
#ifdef AO_HAVE_store_release
  AO_store_release((volatile AO_t *)&op[sz - sizeof(GC_descr)], d);
#else
  *(GC_descr *)&op[sz - sizeof(GC_descr)] = d;
#endif
}

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_malloc_explicitly_typed(size_t lb, GC_descr d)
{
  ptr_t op;

  GC_ASSERT(GC_explicit_typing_initialized);
  if (UNLIKELY(lb < sizeof(ptr_t) - sizeof(GC_descr) + 1)) {
    /* Ensure the descriptor does not occupy the first pointer place. */
    lb = sizeof(ptr_t) - sizeof(GC_descr) + 1;
  }
  op = (ptr_t)GC_malloc_kind(SIZET_SAT_ADD(lb, sizeof(GC_descr) - EXTRA_BYTES),
                             GC_explicit_kind);
  set_obj_descr(op, d);
  return op;
}

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_malloc_explicitly_typed_ignore_off_page(size_t lb, GC_descr d)
{
  ptr_t op;

  if (lb < HBLKSIZE - sizeof(GC_descr))
    return GC_malloc_explicitly_typed(lb, d);

  GC_ASSERT(GC_explicit_typing_initialized);
  /*
   * Note that ignore-off-page objects with the requested size of
   * at least `HBLKSIZE` do not have `EXTRA_BYTES` added by
   * `GC_generic_malloc_aligned()`.
   */
  op = (ptr_t)GC_clear_stack(
      GC_generic_malloc_aligned(SIZET_SAT_ADD(lb, sizeof(GC_descr)),
                                GC_explicit_kind, IGNORE_OFF_PAGE, 0));
  set_obj_descr(op, d);
  return op;
}

/*
 * Array descriptors.  `GC_array_mark_proc` understands these.
 * We may eventually need to add provisions for headers and trailers.
 * Hence we provide for tree structured descriptors, though we do not
 * really use them currently.
 */

/* This type describes simple array. */
struct LeafDescriptor {
  word ld_tag;
#define LEAF_TAG 1
  /* Bytes per element; nonzero, multiple of `ALIGNMENT`. */
  size_t ld_size;
  /* Number of elements. */
  size_t ld_nelements;
  /* A simple length, bitmap, or procedure descriptor. */
  GC_descr ld_descriptor;
};

struct ComplexArrayDescriptor {
  word ad_tag;
#define ARRAY_TAG 2
  size_t ad_nelements;
  union ComplexDescriptor *ad_element_descr;
};

struct SequenceDescriptor {
  word sd_tag;
#define SEQUENCE_TAG 3
  union ComplexDescriptor *sd_first;
  union ComplexDescriptor *sd_second;
};

typedef union ComplexDescriptor {
  struct LeafDescriptor ld;
  struct ComplexArrayDescriptor ad;
  struct SequenceDescriptor sd;
} complex_descriptor;

STATIC complex_descriptor *
GC_make_leaf_descriptor(size_t size, size_t nelements, GC_descr d)
{
  complex_descriptor *result
      = (complex_descriptor *)GC_malloc_atomic(sizeof(struct LeafDescriptor));

  GC_ASSERT(size != 0);
  if (UNLIKELY(NULL == result))
    return NULL;

  result->ld.ld_tag = LEAF_TAG;
  result->ld.ld_size = size;
  result->ld.ld_nelements = nelements;
  result->ld.ld_descriptor = d;
  return result;
}

STATIC complex_descriptor *
GC_make_sequence_descriptor(complex_descriptor *first,
                            complex_descriptor *second)
{
  /*
   * Note: for a reason, the sanitizer runtime complains of insufficient
   * space for `complex_descriptor` if the pointer type of `result` variable
   * is changed to.
   */
  struct SequenceDescriptor *result = (struct SequenceDescriptor *)GC_malloc(
      sizeof(struct SequenceDescriptor));

  if (UNLIKELY(NULL == result))
    return NULL;

  /*
   * Cannot result in overly conservative marking, since tags are very
   * small integers.  Probably faster than maintaining type information.
   */
  result->sd_tag = SEQUENCE_TAG;
  result->sd_first = first;
  result->sd_second = second;
  GC_dirty(result);
  REACHABLE_AFTER_DIRTY(first);
  REACHABLE_AFTER_DIRTY(second);
  return (complex_descriptor *)result;
}

#define NO_MEM (-1)
#define SIMPLE 0
#define LEAF 1
#define COMPLEX 2

/*
 * Build a descriptor for an array with `nelements` elements, each of
 * which can be described by a simple descriptor `d`.
 * We try to optimize some common cases.  If the result is `COMPLEX`,
 * a `complex_descriptor *` value is returned in `*pcomplex_d`.
 * If the result is `LEAF`, then a `LeafDescriptor` value is built in the
 * structure pointed to by `pleaf`.  The tag in the `*pleaf` structure
 * is not set.  If the result is `SIMPLE`, then a `GC_descr` value is
 * returned in `*psimple_d`.  If the result is `NO_MEM`, then we failed
 * to allocate the descriptor.  The implementation assumes `GC_DS_LENGTH`
 * is 0.  `*pleaf`, `*pcomplex_d` and `*psimple_d` may be used as
 * temporaries during the construction.
 */
STATIC int
GC_make_array_descriptor(size_t nelements, size_t size, GC_descr d,
                         GC_descr *psimple_d, complex_descriptor **pcomplex_d,
                         struct LeafDescriptor *pleaf)
{
  /*
   * For larger arrays, we try to combine descriptors of adjacent
   * descriptors to speed up marking, and to reduce the amount of space
   * needed on the mark stack.
   */
#define OPT_THRESHOLD 50

  GC_ASSERT(size != 0);
  if ((d & GC_DS_TAGS) == GC_DS_LENGTH) {
    if (d == (GC_descr)size) {
      /* Note: no overflow is guaranteed by caller. */
      *psimple_d = nelements * d;
      return SIMPLE;
    } else if (0 == d) {
      *psimple_d = 0;
      return SIMPLE;
    }
  }

  if (nelements <= OPT_THRESHOLD) {
    if (nelements <= 1) {
      *psimple_d = nelements == 1 ? d : 0;
      return SIMPLE;
    }
  } else if (size <= BITMAP_BITS / 2 && (d & GC_DS_TAGS) != GC_DS_PROC
             && (size & (sizeof(ptr_t) - 1)) == 0) {
    complex_descriptor *one_element, *beginning;
    int result = GC_make_array_descriptor(
        nelements / 2, 2 * size, GC_double_descr(d, BYTES_TO_PTRS(size)),
        psimple_d, pcomplex_d, pleaf);

    if ((nelements & 1) == 0 || UNLIKELY(NO_MEM == result))
      return result;

    one_element = GC_make_leaf_descriptor(size, 1, d);
    if (UNLIKELY(NULL == one_element))
      return NO_MEM;

    if (COMPLEX == result) {
      beginning = *pcomplex_d;
    } else {
      beginning
          = SIMPLE == result
                ? GC_make_leaf_descriptor(size, 1, *psimple_d)
                : GC_make_leaf_descriptor(pleaf->ld_size, pleaf->ld_nelements,
                                          pleaf->ld_descriptor);
      if (UNLIKELY(NULL == beginning))
        return NO_MEM;
    }
    *pcomplex_d = GC_make_sequence_descriptor(beginning, one_element);
    if (UNLIKELY(NULL == *pcomplex_d))
      return NO_MEM;

    return COMPLEX;
  }

  pleaf->ld_size = size;
  pleaf->ld_nelements = nelements;
  pleaf->ld_descriptor = d;
  return LEAF;
}

struct GC_calloc_typed_descr_s {
  complex_descriptor *complex_d; /*< the first field, the only pointer */
  struct LeafDescriptor leaf;
  GC_descr simple_d;
  word alloc_lb;             /*< of `size_t` type actually */
  GC_signed_word descr_type; /*< of `int` type actually */
};

GC_API int GC_CALL
GC_calloc_prepare_explicitly_typed(struct GC_calloc_typed_descr_s *pctd,
                                   size_t ctd_sz, size_t n, size_t lb,
                                   GC_descr d)
{
  GC_STATIC_ASSERT(sizeof(struct GC_calloc_typed_descr_opaque_s)
                   == sizeof(struct GC_calloc_typed_descr_s));
  GC_ASSERT(GC_explicit_typing_initialized);
  GC_ASSERT(sizeof(struct GC_calloc_typed_descr_s) == ctd_sz);
  (void)ctd_sz; /*< unused currently */
  if (UNLIKELY(0 == lb || 0 == n))
    lb = n = 1;
  if (UNLIKELY((lb | n) > GC_SQRT_SIZE_MAX) /*< fast initial check */
      && n > GC_SIZE_MAX / lb) {
    /* `n * lb` overflows. */
    pctd->alloc_lb = GC_SIZE_MAX;
    pctd->descr_type = NO_MEM;
    /* The rest of the fields are unset. */
    return 0; /*< failure */
  }

  pctd->descr_type = GC_make_array_descriptor(n, lb, d, &pctd->simple_d,
                                              &pctd->complex_d, &pctd->leaf);
  switch (pctd->descr_type) {
  case NO_MEM:
  case SIMPLE:
    pctd->alloc_lb = (word)lb * n;
    break;
  case LEAF:
    pctd->alloc_lb = SIZET_SAT_ADD(
        lb * n, (BYTES_TO_PTRS_ROUNDUP(sizeof(struct LeafDescriptor)) + 1)
                        * sizeof(ptr_t)
                    - EXTRA_BYTES);
    break;
  case COMPLEX:
    pctd->alloc_lb = SIZET_SAT_ADD(lb * n, sizeof(ptr_t) - EXTRA_BYTES);
    break;
  }
  return 1; /*< success */
}

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_calloc_do_explicitly_typed(const struct GC_calloc_typed_descr_s *pctd,
                              size_t ctd_sz)
{
  void *op;
  size_t lpw_m1;

  GC_ASSERT(sizeof(struct GC_calloc_typed_descr_s) == ctd_sz);
  (void)ctd_sz; /*< unused currently */
  switch (pctd->descr_type) {
  case NO_MEM:
    return (*GC_get_oom_fn())((size_t)pctd->alloc_lb);
  case SIMPLE:
    return GC_malloc_explicitly_typed((size_t)pctd->alloc_lb, pctd->simple_d);
  case LEAF:
  case COMPLEX:
    break;
  default:
    ABORT_RET("Bad descriptor type");
    return NULL;
  }
  op = GC_malloc_kind((size_t)pctd->alloc_lb, GC_array_kind);
  if (UNLIKELY(NULL == op))
    return NULL;

  lpw_m1 = BYTES_TO_PTRS(GC_size(op)) - 1;
  if (pctd->descr_type == LEAF) {
    /* Set up the descriptor inside the object itself. */
    struct LeafDescriptor *lp
        = (struct LeafDescriptor *)((ptr_t *)op + lpw_m1
                                    - BYTES_TO_PTRS_ROUNDUP(
                                        sizeof(struct LeafDescriptor)));

    lp->ld_tag = LEAF_TAG;
    lp->ld_size = pctd->leaf.ld_size;
    lp->ld_nelements = pctd->leaf.ld_nelements;
    lp->ld_descriptor = pctd->leaf.ld_descriptor;
    /*
     * Hold the allocator lock (in the reader mode which should be enough)
     * while writing the descriptor `word` to the object to ensure that
     * the descriptor contents are seen by `GC_array_mark_proc` as expected.
     */

    /*
     * TODO: It should be possible to replace locking with the atomic
     * operations (with the release barrier here) but, in this case,
     * avoiding the acquire barrier in `GC_array_mark_proc` seems to
     * be tricky as `GC_mark_some` might be invoked with the world running.
     */
    READER_LOCK();
    ((struct LeafDescriptor **)op)[lpw_m1] = lp;
    READER_UNLOCK_RELEASE();
  } else {
#ifndef GC_NO_FINALIZATION
    READER_LOCK();
    ((complex_descriptor **)op)[lpw_m1] = pctd->complex_d;
    READER_UNLOCK_RELEASE();

    GC_dirty((ptr_t *)op + lpw_m1);
    REACHABLE_AFTER_DIRTY(pctd->complex_d);

    /*
     * Make sure the descriptor is cleared once there is any danger
     * it may have been collected.
     */
    if (UNLIKELY(
            GC_general_register_disappearing_link((void **)op + lpw_m1, op)
            == GC_NO_MEMORY))
#endif
    {
      /* Could not register it due to lack of memory.  Punt. */
      return (*GC_get_oom_fn())((size_t)pctd->alloc_lb);
    }
  }
  return op;
}

GC_API GC_ATTR_MALLOC void *GC_CALL
GC_calloc_explicitly_typed(size_t n, size_t lb, GC_descr d)
{
  struct GC_calloc_typed_descr_s ctd;

  (void)GC_calloc_prepare_explicitly_typed(&ctd, sizeof(ctd), n, lb, d);
  return GC_calloc_do_explicitly_typed(&ctd, sizeof(ctd));
}

/*
 * Return the size of the object described by `complex_d`.
 * It would be faster to store this directly, or to compute it as part
 * of `GC_push_complex_descriptor`, but hopefully it does not matter.
 */
STATIC size_t
GC_descr_obj_size(complex_descriptor *complex_d)
{
  switch (complex_d->ad.ad_tag) {
  case LEAF_TAG:
    return complex_d->ld.ld_nelements * complex_d->ld.ld_size;
  case ARRAY_TAG:
    return complex_d->ad.ad_nelements
           * GC_descr_obj_size(complex_d->ad.ad_element_descr);
  case SEQUENCE_TAG:
    return GC_descr_obj_size(complex_d->sd.sd_first)
           + GC_descr_obj_size(complex_d->sd.sd_second);
  default:
    ABORT_RET("Bad complex descriptor");
    return 0;
  }
}

/*
 * Push descriptors for the object with the complex descriptor onto
 * the mark stack.  Return `NULL` if the mark stack overflowed.
 */
STATIC mse *
GC_push_complex_descriptor(ptr_t current, complex_descriptor *complex_d,
                           mse *msp, mse *msl)
{
  size_t i, nelements;
  size_t sz;
  GC_descr d;
  complex_descriptor *element_descr;

  switch (complex_d->ad.ad_tag) {
  case LEAF_TAG:
    d = complex_d->ld.ld_descriptor;
    nelements = complex_d->ld.ld_nelements;
    sz = complex_d->ld.ld_size;

    if (UNLIKELY(msl - msp <= (GC_signed_word)nelements))
      return NULL;
    GC_ASSERT(sz != 0);
    for (i = 0; i < nelements; i++) {
      msp++;
      msp->mse_start = current;
      msp->mse_descr = d;
      current += sz;
    }
    break;
  case ARRAY_TAG:
    element_descr = complex_d->ad.ad_element_descr;
    nelements = complex_d->ad.ad_nelements;
    sz = GC_descr_obj_size(element_descr);
    GC_ASSERT(sz != 0 || 0 == nelements);
    for (i = 0; i < nelements; i++) {
      msp = GC_push_complex_descriptor(current, element_descr, msp, msl);
      if (UNLIKELY(NULL == msp))
        return NULL;
      current += sz;
    }
    break;
  case SEQUENCE_TAG:
    sz = GC_descr_obj_size(complex_d->sd.sd_first);
    msp = GC_push_complex_descriptor(current, complex_d->sd.sd_first, msp,
                                     msl);
    if (UNLIKELY(NULL == msp))
      return NULL;
    GC_ASSERT(sz != 0);
    current += sz;
    msp = GC_push_complex_descriptor(current, complex_d->sd.sd_second, msp,
                                     msl);
    break;
  default:
    ABORT("Bad complex descriptor");
  }
  return msp;
}

GC_ATTR_NO_SANITIZE_THREAD
static complex_descriptor *
get_complex_descr(ptr_t *p, size_t lpw)
{
  return (complex_descriptor *)p[lpw - 1];
}

/* Used by `GC_calloc_do_explicitly_typed()` via `GC_array_kind`. */
STATIC mse *GC_CALLBACK
GC_array_mark_proc(word *addr, mse *mark_stack_top, mse *mark_stack_limit,
                   word env)
{
  size_t sz = HDR(addr)->hb_sz;
  size_t lpw = BYTES_TO_PTRS(sz);
  complex_descriptor *complex_d = get_complex_descr((ptr_t *)addr, lpw);
  mse *orig_mark_stack_top = mark_stack_top;
  mse *new_mark_stack_top;

  UNUSED_ARG(env);
  if (NULL == complex_d) {
    /* Found a reference to a free-list entry.  Ignore it. */
    return orig_mark_stack_top;
  }
  /*
   * In-use counts were already updated when array descriptor was pushed.
   * Here we only replace it by subobject descriptors, so no update is
   * necessary.
   */
  new_mark_stack_top = GC_push_complex_descriptor(
      (ptr_t)addr, complex_d, mark_stack_top, mark_stack_limit - 1);
  if (NULL == new_mark_stack_top) {
    /* Explicitly instruct Clang Static Analyzer that pointer is non-`NULL`. */
    if (NULL == mark_stack_top) {
      ABORT("Bad mark_stack_top");
    }

    /*
     * Does not fit.  Conservatively push the whole array as a unit and
     * request a mark stack expansion.  This cannot cause a mark stack
     * overflow, since it replaces the original array entry.
     */
#ifdef PARALLEL_MARK
    /* We might be using a `local_mark_stack` in the parallel collection. */
    if (GC_mark_stack + GC_mark_stack_size == mark_stack_limit)
#endif
    {
      GC_mark_stack_too_small = TRUE;
    }
    new_mark_stack_top = orig_mark_stack_top + 1;
    new_mark_stack_top->mse_start = (ptr_t)addr;
    new_mark_stack_top->mse_descr = sz | GC_DS_LENGTH;
  } else {
    /* Push descriptor itself. */
    new_mark_stack_top++;
    new_mark_stack_top->mse_start = (ptr_t)((ptr_t *)addr + lpw - 1);
    new_mark_stack_top->mse_descr = sizeof(ptr_t) | GC_DS_LENGTH;
  }
  return new_mark_stack_top;
}


/*
 * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
 * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
 * Copyright (c) 1999-2001 by Hewlett-Packard Company. All rights reserved.
 * Copyright (c) 2008-2025 Ivan Maidanski
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */


#include <limits.h>
#include <stdarg.h>

#if defined(SOLARIS) && defined(THREADS)
#  include <sys/syscall.h>
#endif

#if defined(UNIX_LIKE) || defined(CYGWIN32) || defined(SYMBIAN) \
    || (defined(CONSOLE_LOG) && defined(MSWIN32))
#  include <fcntl.h>
#  include <sys/stat.h>
#endif

#if defined(CONSOLE_LOG) && defined(MSWIN32) && !defined(__GNUC__)
#  include <io.h>
#endif

#ifdef NONSTOP
#  include <floss.h>
#endif

#ifdef THREADS
#  if defined(SN_TARGET_PSP2)
GC_INNER WapiMutex GC_allocate_ml_PSP2 = { 0, NULL };
#  elif defined(GC_DEFN_ALLOCATE_ML) && !defined(USE_RWLOCK) \
      || defined(SN_TARGET_PS3)
#    include <pthread.h>
GC_INNER pthread_mutex_t GC_allocate_ml;
#  else
/*
 * For other platforms with threads, the allocator lock and, possibly,
 * `GC_lock_holder` are defined in the thread support code.
 */
#  endif
#endif /* THREADS */

#ifdef DYNAMIC_LOADING
/*
 * We need to register the main data segment.  Returns `TRUE` unless
 * this is done implicitly as part of dynamic library registration.
 */
#  define GC_REGISTER_MAIN_STATIC_DATA() GC_register_main_static_data()
#elif defined(GC_DONT_REGISTER_MAIN_STATIC_DATA)
#  define GC_REGISTER_MAIN_STATIC_DATA() FALSE
#else
/*
 * Do not unnecessarily call `GC_register_main_static_data()` in case
 * `dyn_load.c` file is not linked in.
 */
#  define GC_REGISTER_MAIN_STATIC_DATA() TRUE
#endif

#ifdef NEED_CANCEL_DISABLE_COUNT
__thread unsigned char GC_cancel_disable_count = 0;
#endif

struct _GC_arrays GC_arrays /* `= { 0 }` */;

GC_INNER unsigned GC_n_mark_procs = GC_RESERVED_MARK_PROCS;

GC_INNER unsigned GC_n_kinds = GC_N_KINDS_INITIAL_VALUE;

ptr_t GC_stackbottom = 0;

#if defined(E2K) && defined(THREADS) || defined(IA64)
GC_INNER ptr_t GC_register_stackbottom = NULL;
#endif

int GC_dont_gc = FALSE;

int GC_dont_precollect = FALSE;

GC_bool GC_quiet = 0; /*< used also in `msvc_dbg.c` file */

#if !defined(NO_CLOCK) || !defined(SMALL_CONFIG)
GC_INNER int GC_print_stats = 0;
#endif

#ifdef MAKE_BACK_GRAPH
#  ifdef GC_PRINT_BACK_HEIGHT
GC_INNER GC_bool GC_print_back_height = TRUE;
#  else
GC_INNER GC_bool GC_print_back_height = FALSE;
#  endif
#endif

#ifndef NO_DEBUGGING
#  ifdef GC_DUMP_REGULARLY
GC_INNER GC_bool GC_dump_regularly = TRUE;
#  else
GC_INNER GC_bool GC_dump_regularly = FALSE;
#  endif
#  ifndef NO_CLOCK
/* The time that the collector was initialized at. */
STATIC CLOCK_TYPE GC_init_time = CLOCK_TYPE_INITIALIZER;
#  endif
#endif /* !NO_DEBUGGING */

#ifdef KEEP_BACK_PTRS
GC_INNER long GC_backtraces = 0;
#endif

#ifdef FIND_LEAK
int GC_find_leak = 1;
#else
int GC_find_leak = 0;
#endif

#if !defined(NO_FIND_LEAK) && !defined(SHORT_DBG_HDRS)
#  ifdef GC_FINDLEAK_DELAY_FREE
GC_INNER GC_bool GC_findleak_delay_free = TRUE;
#  else
GC_INNER GC_bool GC_findleak_delay_free = FALSE;
#  endif
#endif /* !NO_FIND_LEAK && !SHORT_DBG_HDRS */

#ifdef ALL_INTERIOR_POINTERS
int GC_all_interior_pointers = 1;
#else
int GC_all_interior_pointers = 0;
#endif

#ifdef FINALIZE_ON_DEMAND
int GC_finalize_on_demand = 1;
#else
int GC_finalize_on_demand = 0;
#endif

#ifdef JAVA_FINALIZATION
int GC_java_finalization = 1;
#else
int GC_java_finalization = 0;
#endif

/* All accesses to it should be synchronized to avoid data race. */
GC_finalizer_notifier_proc GC_finalizer_notifier
    = (GC_finalizer_notifier_proc)0;

#ifdef GC_FORCE_UNMAP_ON_GCOLLECT
GC_INNER GC_bool GC_force_unmap_on_gcollect = TRUE;
#else
GC_INNER GC_bool GC_force_unmap_on_gcollect = FALSE;
#endif

#ifndef GC_LARGE_ALLOC_WARN_INTERVAL
#  define GC_LARGE_ALLOC_WARN_INTERVAL 5
#endif

#ifndef NO_BLACK_LISTING
GC_INNER long GC_large_alloc_warn_interval = GC_LARGE_ALLOC_WARN_INTERVAL;
#endif

STATIC void *GC_CALLBACK
GC_default_oom_fn(size_t bytes_requested)
{
  UNUSED_ARG(bytes_requested);
  return NULL;
}

/* All accesses to it should be synchronized to avoid data race. */
GC_oom_func GC_oom_fn = GC_default_oom_fn;

#ifdef CAN_HANDLE_FORK
#  ifdef HANDLE_FORK
GC_INNER int GC_handle_fork = 1;
#  else
GC_INNER int GC_handle_fork = FALSE;
#  endif

#elif !defined(HAVE_NO_FORK)
GC_API void GC_CALL
GC_atfork_prepare(void)
{
#  ifdef THREADS
  ABORT("fork() handling unsupported");
#  endif
}

GC_API void GC_CALL
GC_atfork_parent(void)
{
  /* Empty. */
}

GC_API void GC_CALL
GC_atfork_child(void)
{
  /* Empty. */
}
#endif /* !CAN_HANDLE_FORK && !HAVE_NO_FORK */

GC_API void GC_CALL
GC_set_handle_fork(int value)
{
#ifdef CAN_HANDLE_FORK
  if (!GC_is_initialized) {
    /* Map all negative values except for -1 to a positive one. */
    GC_handle_fork = value >= -1 ? value : 1;
  }
#elif defined(THREADS) || (defined(DARWIN) && defined(MPROTECT_VDB))
  if (!GC_is_initialized && value) {
#  ifndef SMALL_CONFIG
    /* Initialize `GC_manual_vdb` and `GC_stderr`. */
    GC_init();
#    ifndef THREADS
    if (GC_manual_vdb)
      return;
#    endif
#  endif
    ABORT("fork() handling unsupported");
  }
#else
  /* No at-fork handler is needed in the single-threaded mode. */
  UNUSED_ARG(value);
#endif
}

/*
 * Set things up so that `GC_size_map[i] >= granules(i)`, but not too
 * much bigger and so that `GC_size_map` contains relatively few
 * distinct entries.  This was originally stolen from Russ Atkinson's
 * Cedar quantization algorithm (but we precompute it).
 */
STATIC void
GC_init_size_map(void)
{
  size_t i = 1;

  /* Map size 0 to something bigger; this avoids problems at lower levels. */
  GC_size_map[0] = 1;

  for (; i <= GRANULES_TO_BYTES(GC_TINY_FREELISTS - 1) - EXTRA_BYTES; i++) {
    GC_size_map[i] = ALLOC_REQUEST_GRANS(i);
#ifndef _MSC_VER
    /* Seems to tickle bug in VC++ 2008 for x86_64. */
    GC_ASSERT(GC_size_map[i] < GC_TINY_FREELISTS);
#endif
  }
  /* We leave the rest of the array to be filled in on demand. */
}

/*
 * The following is a gross hack to deal with a problem that can occur
 * on machines that are sloppy about stack frame sizes, notably SPARC.
 * Bogus pointers may be written to the stack and not cleared for
 * a LONG time, because they always fall into holes in stack frames
 * that are not written.  We partially address this by clearing
 * sections of the stack whenever we get control.
 */

#ifndef SMALL_CLEAR_SIZE
/* Clear this many words of the stack every time. */
#  define SMALL_CLEAR_SIZE 256
#endif

#if defined(ALWAYS_SMALL_CLEAR_STACK) || defined(STACK_NOT_SCANNED)
GC_API void *GC_CALL
GC_clear_stack(void *arg)
{
#  ifndef STACK_NOT_SCANNED
  volatile ptr_t dummy[SMALL_CLEAR_SIZE];

  BZERO(CAST_AWAY_VOLATILE_PVOID(dummy), sizeof(dummy));
#  endif
  return arg;
}
#else

#  ifdef THREADS
/* Clear this much sometimes. */
#    define BIG_CLEAR_SIZE 2048
#  else
#    define DEGRADE_RATE 50

STATIC word GC_bytes_allocd_at_reset = 0;

/* `GC_gc_no` value when we last did this. */
STATIC word GC_stack_last_cleared = 0;
#  endif

#  if defined(__APPLE_CC__) && !GC_CLANG_PREREQ(6, 0)
#    define CLEARSTACK_LIMIT_MODIFIER volatile /*< to workaround some bug */
#  else
#    define CLEARSTACK_LIMIT_MODIFIER /*< empty */
#  endif

EXTERN_C_BEGIN
void *GC_clear_stack_inner(void *, CLEARSTACK_LIMIT_MODIFIER ptr_t);
EXTERN_C_END

#  ifndef ASM_CLEAR_CODE
/*
 * Clear the stack up to about `limit`.  Return `arg`.  This function is
 * not `static` because it could also be erroneously defined in `.S` file,
 * so this error would be caught by the linker.
 */
void *
GC_clear_stack_inner(void *arg, CLEARSTACK_LIMIT_MODIFIER ptr_t limit)
{
#    define CLEAR_SIZE 213 /*< granularity */
  volatile ptr_t dummy[CLEAR_SIZE];

  BZERO(CAST_AWAY_VOLATILE_PVOID(dummy), sizeof(dummy));
  if (HOTTER_THAN((/* no volatile */ ptr_t)limit, GC_approx_sp())) {
    (void)GC_clear_stack_inner(arg, limit);
  }
  /*
   * Make sure the recursive call is not a tail call, and the `bzero` call
   * is not recognized as dead code.
   */
#    if defined(CPPCHECK)
  GC_noop1(ADDR(dummy[0]));
#    else
  GC_noop1(COVERT_DATAFLOW(ADDR(dummy)));
#    endif
  return arg;
}
#  endif /* !ASM_CLEAR_CODE */

#  ifdef THREADS
/* Used to occasionally clear a bigger chunk. */
/* TODO: Should be more random than it is... */
static unsigned
next_random_no(void)
{
#    ifdef AO_HAVE_fetch_and_add1
  static volatile AO_t random_no;

  return (unsigned)AO_fetch_and_add1(&random_no) % 13;
#    else
  static unsigned random_no = 0;

  return (random_no++) % 13;
#    endif
}
#  endif /* THREADS */

GC_API void *GC_CALL
GC_clear_stack(void *arg)
{
  /* Note: this is hotter than the actual stack pointer. */
  ptr_t sp = GC_approx_sp();
#  ifdef THREADS
  volatile ptr_t dummy[SMALL_CLEAR_SIZE];
#  endif

  /*
   * Extra bytes we clear every time.  This clears our own activation
   * record, and should cause more frequent clearing near the cold end
   * of the stack, a good thing.
   */
#  define SLOP 400

  /*
   * We make `GC_high_water` this much hotter than we really saw it,
   * to cover for the GC noise above our current frame.
   */
#  define GC_SLOP 4000

  /*
   * We restart the clearing process after this many bytes of allocation.
   * Otherwise very heavily recursive programs with sparse stacks may
   * result in heaps that grow almost without bounds.  As the heap gets
   * larger, collection frequency decreases, thus clearing frequency
   * would decrease, thus more junk remains accessible, thus the heap
   * gets larger...
   */
#  define CLEAR_THRESHOLD 100000

#  ifdef THREADS
  if (next_random_no() == 0) {
    ptr_t limit = sp;

    MAKE_HOTTER(limit, BIG_CLEAR_SIZE * sizeof(ptr_t));
    /*
     * Make it sufficiently aligned for assembly implementations
     * of `GC_clear_stack_inner`.
     */
    limit = PTR_ALIGN_DOWN(limit, 0x10);
    return GC_clear_stack_inner(arg, limit);
  }
  BZERO(CAST_AWAY_VOLATILE_PVOID(dummy), sizeof(dummy));
#  else
  if (GC_gc_no != GC_stack_last_cleared) {
    /* Start things over, so we clear the entire stack again. */
    if (UNLIKELY(NULL == GC_high_water))
      GC_high_water = (ptr_t)GC_stackbottom;
    GC_min_sp = GC_high_water;
    GC_stack_last_cleared = GC_gc_no;
    GC_bytes_allocd_at_reset = GC_bytes_allocd;
  }
  /* Adjust `GC_high_water`. */
  GC_ASSERT(GC_high_water != NULL);
  MAKE_COOLER(GC_high_water, PTRS_TO_BYTES(DEGRADE_RATE) + GC_SLOP);
  if (HOTTER_THAN(sp, GC_high_water))
    GC_high_water = sp;
  MAKE_HOTTER(GC_high_water, GC_SLOP);
  {
    ptr_t limit = GC_min_sp;

    MAKE_HOTTER(limit, SLOP);
    if (HOTTER_THAN(limit, sp)) {
      limit = PTR_ALIGN_DOWN(limit, 0x10);
      GC_min_sp = sp;
      return GC_clear_stack_inner(arg, limit);
    }
  }
  if (GC_bytes_allocd - GC_bytes_allocd_at_reset > CLEAR_THRESHOLD) {
    /* Restart clearing process, but limit how much clearing we do. */
    GC_min_sp = sp;
    MAKE_HOTTER(GC_min_sp, CLEAR_THRESHOLD / 4);
    if (HOTTER_THAN(GC_min_sp, GC_high_water))
      GC_min_sp = GC_high_water;
    GC_bytes_allocd_at_reset = GC_bytes_allocd;
  }
#  endif
  return arg;
}

#endif /* !ALWAYS_SMALL_CLEAR_STACK && !STACK_NOT_SCANNED */

GC_API void *GC_CALL
GC_base(void *p)
{
  ptr_t r = (ptr_t)p;
  struct hblk *h;
  bottom_index *bi;
  hdr *hhdr;
  ptr_t limit;
  size_t sz;

  if (UNLIKELY(!GC_is_initialized))
    return NULL;
  h = HBLKPTR(r);
  GET_BI(r, bi);
  hhdr = HDR_FROM_BI(bi, r);
  if (NULL == hhdr)
    return NULL;

  /*
   * If it is a pointer to the middle of a large object, then move it
   * to the beginning.
   */
  if (IS_FORWARDING_ADDR_OR_NIL(hhdr)) {
    h = GC_find_starting_hblk(h, &hhdr);
    r = (ptr_t)h;
  }
  if (HBLK_IS_FREE(hhdr))
    return NULL;

  /* Make sure `r` points to the beginning of the object. */
  r = PTR_ALIGN_DOWN(r, sizeof(ptr_t));

  sz = hhdr->hb_sz;
  r -= HBLKDISPL(r) % sz;
  limit = r + sz;
  if ((ADDR_LT((ptr_t)(h + 1), limit) && sz <= HBLKSIZE)
      || ADDR_GE((ptr_t)p, limit))
    return NULL;

  return r;
}

GC_API int GC_CALL
GC_is_heap_ptr(const void *p)
{
  bottom_index *bi;

  GC_ASSERT(GC_is_initialized);
  GET_BI(p, bi);
  return HDR_FROM_BI(bi, p) != 0;
}

GC_API size_t GC_CALL
GC_size(const void *p)
{
  const hdr *hhdr;

  /* Accept `NULL` for compatibility with `malloc_usable_size()`. */
  if (UNLIKELY(NULL == p))
    return 0;

  hhdr = HDR(p);
  return hhdr->hb_sz;
}

/*
 * These getters remain unsynchronized for compatibility (since some clients
 * could call some of them from a GC callback holding the allocator lock).
 */

GC_API size_t GC_CALL
GC_get_heap_size(void)
{
  /*
   * Ignore the memory space returned to OS (i.e. count only the space
   * owned by the garbage collector).
   */
  return (size_t)(GC_heapsize - GC_unmapped_bytes);
}

GC_API size_t GC_CALL
GC_get_obtained_from_os_bytes(void)
{
  return (size_t)GC_our_mem_bytes;
}

GC_API size_t GC_CALL
GC_get_free_bytes(void)
{
  /* Ignore the memory space returned to OS. */
  return (size_t)(GC_large_free_bytes - GC_unmapped_bytes);
}

GC_API size_t GC_CALL
GC_get_unmapped_bytes(void)
{
  return (size_t)GC_unmapped_bytes;
}

GC_API size_t GC_CALL
GC_get_bytes_since_gc(void)
{
  return (size_t)GC_bytes_allocd;
}

GC_API size_t GC_CALL
GC_get_total_bytes(void)
{
  return (size_t)(GC_bytes_allocd + GC_bytes_allocd_before_gc);
}

#ifndef GC_GET_HEAP_USAGE_NOT_NEEDED

GC_API size_t GC_CALL
GC_get_size_map_at(int i)
{
  if ((unsigned)i > MAXOBJBYTES)
    return GC_SIZE_MAX;
  return GRANULES_TO_BYTES(GC_size_map[i]);
}

GC_API void GC_CALL
GC_get_heap_usage_safe(GC_word *pheap_size, GC_word *pfree_bytes,
                       GC_word *punmapped_bytes, GC_word *pbytes_since_gc,
                       GC_word *ptotal_bytes)
{
  READER_LOCK();
  if (pheap_size != NULL)
    *pheap_size = GC_heapsize - GC_unmapped_bytes;
  if (pfree_bytes != NULL)
    *pfree_bytes = GC_large_free_bytes - GC_unmapped_bytes;
  if (punmapped_bytes != NULL)
    *punmapped_bytes = GC_unmapped_bytes;
  if (pbytes_since_gc != NULL)
    *pbytes_since_gc = GC_bytes_allocd;
  if (ptotal_bytes != NULL)
    *ptotal_bytes = GC_bytes_allocd + GC_bytes_allocd_before_gc;
  READER_UNLOCK();
}

/* Fill in GC statistics provided the destination is of enough size. */
static void
fill_prof_stats(struct GC_prof_stats_s *pstats)
{
  pstats->heapsize_full = GC_heapsize;
  pstats->free_bytes_full = GC_large_free_bytes;
  pstats->unmapped_bytes = GC_unmapped_bytes;
  pstats->bytes_allocd_since_gc = GC_bytes_allocd;
  pstats->allocd_bytes_before_gc = GC_bytes_allocd_before_gc;
  pstats->non_gc_bytes = GC_non_gc_bytes;
  pstats->gc_no = GC_gc_no; /*< could be -1 */
#  ifdef PARALLEL_MARK
  pstats->markers_m1 = (word)((GC_signed_word)GC_markers_m1);
#  else
  /* A single marker. */
  pstats->markers_m1 = 0;
#  endif
  pstats->bytes_reclaimed_since_gc
      = GC_bytes_found > 0 ? (word)GC_bytes_found : 0;
  pstats->reclaimed_bytes_before_gc = GC_reclaimed_bytes_before_gc;
  pstats->expl_freed_bytes_since_gc = GC_bytes_freed; /*< since gc-7.7 */
  pstats->obtained_from_os_bytes = GC_our_mem_bytes;  /*< since gc-8.2 */
}

#  include <string.h> /*< for `memset()` */

GC_API size_t GC_CALL
GC_get_prof_stats(struct GC_prof_stats_s *pstats, size_t stats_sz)
{
  struct GC_prof_stats_s stats;

  READER_LOCK();
  fill_prof_stats(stats_sz >= sizeof(stats) ? pstats : &stats);
  READER_UNLOCK();

  if (stats_sz == sizeof(stats)) {
    return sizeof(stats);
  } else if (stats_sz > sizeof(stats)) {
    /* Fill in the remaining part with -1. */
    memset((char *)pstats + sizeof(stats), 0xff, stats_sz - sizeof(stats));
    return sizeof(stats);
  } else {
    if (LIKELY(stats_sz > 0))
      BCOPY(&stats, pstats, stats_sz);
    return stats_sz;
  }
}

#  ifdef THREADS
GC_API size_t GC_CALL
GC_get_prof_stats_unsafe(struct GC_prof_stats_s *pstats, size_t stats_sz)
{
  struct GC_prof_stats_s stats;

  if (stats_sz >= sizeof(stats)) {
    fill_prof_stats(pstats);
    if (stats_sz > sizeof(stats))
      memset((char *)pstats + sizeof(stats), 0xff, stats_sz - sizeof(stats));
    return sizeof(stats);
  } else {
    if (LIKELY(stats_sz > 0)) {
      fill_prof_stats(&stats);
      BCOPY(&stats, pstats, stats_sz);
    }
    return stats_sz;
  }
}
#  endif /* THREADS */

#endif /* !GC_GET_HEAP_USAGE_NOT_NEEDED */

#if defined(THREADS) && !defined(SIGNAL_BASED_STOP_WORLD)
/* The collector does not use signals to suspend and restart threads. */

GC_API void GC_CALL
GC_set_suspend_signal(int sig)
{
  UNUSED_ARG(sig);
}

GC_API void GC_CALL
GC_set_thr_restart_signal(int sig)
{
  UNUSED_ARG(sig);
}

GC_API int GC_CALL
GC_get_suspend_signal(void)
{
  return -1;
}

GC_API int GC_CALL
GC_get_thr_restart_signal(void)
{
  return -1;
}
#endif /* THREADS && !SIGNAL_BASED_STOP_WORLD */

#if !defined(_MAX_PATH) && defined(ANY_MSWIN)
#  define _MAX_PATH MAX_PATH
#endif

#ifdef GC_READ_ENV_FILE
/* This works for Win32/WinCE for now.  Really useful only for WinCE. */

#  ifndef GC_ENVFILE_MAXLEN
#    define GC_ENVFILE_MAXLEN 0x4000
#  endif

#  define GC_ENV_FILE_EXT ".gc.env"

/* The routine initializes `GC_envfile_content` from the `.gc.env` file. */
STATIC void
GC_envfile_init(void)
{
#  ifdef ANY_MSWIN
  HANDLE hFile;
  char *content;
  size_t ofs, len;
  DWORD nBytesRead;
  TCHAR path[_MAX_PATH + 0x10]; /*< buffer for file path with extension */
  size_t bytes_to_get;

  GC_ASSERT(I_HOLD_LOCK());
  len = (size_t)GetModuleFileName(NULL /* `hModule` */, path, _MAX_PATH + 1);
  /* If `GetModuleFileName()` failed, then len is 0. */
  if (len > 4 && path[len - 4] == (TCHAR)'.') {
    /* Strip the executable file extension. */
    len -= 4;
  }
  BCOPY(TEXT(GC_ENV_FILE_EXT), &path[len], sizeof(TEXT(GC_ENV_FILE_EXT)));
  hFile = CreateFile(path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE,
                     NULL /* `lpSecurityAttributes` */, OPEN_EXISTING,
                     FILE_ATTRIBUTE_NORMAL, NULL /* `hTemplateFile` */);
  if (hFile == INVALID_HANDLE_VALUE) {
    /* The file is absent or the operation failed. */
    return;
  }
  len = (size_t)GetFileSize(hFile, NULL);
  if (len <= 1 || len >= GC_ENVFILE_MAXLEN) {
    CloseHandle(hFile);
    /* Invalid file length - ignoring the file content. */
    return;
  }
  /*
   * At this execution point, `GC_setpagesize()` and `GC_init_win32()`
   * must already be called (for `GET_MEM()` to work correctly).
   */
  GC_ASSERT(GC_page_size != 0);
  bytes_to_get = ROUNDUP_PAGESIZE_IF_MMAP(len + 1);
  content = GC_os_get_mem(bytes_to_get);
  if (content == NULL) {
    CloseHandle(hFile);
    /* An allocation failure. */
    return;
  }
  ofs = 0;
  nBytesRead = (DWORD)-1L;
  /* Last `ReadFile()` call should clear `nBytesRead` on success. */
  while (ReadFile(hFile, content + ofs, (DWORD)(len - ofs + 1), &nBytesRead,
                  NULL /* `lpOverlapped` */)
         && nBytesRead != 0) {
    if ((ofs += (size_t)nBytesRead) > len)
      break;
  }
  CloseHandle(hFile);
  if (ofs != len || nBytesRead != 0) {
    /* TODO: Recycle content. */
    /* Read operation has failed - ignoring the file content. */
    return;
  }
  content[ofs] = '\0';
  while (ofs-- > 0) {
    if (content[ofs] == '\r' || content[ofs] == '\n')
      content[ofs] = '\0';
  }
  GC_ASSERT(NULL == GC_envfile_content);
  GC_envfile_length = len + 1;
  GC_envfile_content = content;
#  endif
}

GC_INNER char *
GC_envfile_getenv(const char *name)
{
  char *p;
  const char *end_of_content;
  size_t namelen;

#  ifndef NO_GETENV
  /* Try the standard `getenv()` first. */
  p = getenv(name);
  if (p != NULL)
    return *p != '\0' ? p : NULL;
#  endif
  p = GC_envfile_content;
  if (NULL == p) {
    /* The `.gc.env` file is absent (or empty). */
    return NULL;
  }
  namelen = strlen(name);
  if (0 == namelen) {
    /* A sanity check. */
    return NULL;
  }
  for (end_of_content = p + GC_envfile_length;
       ADDR_LT((ptr_t)p, (ptr_t)end_of_content); p += strlen(p) + 1) {
    if (strncmp(p, name, namelen) == 0 && *(p += namelen) == '=') {
      /* The match is found; skip "=". */
      p++;
      return *p != '\0' ? p : NULL;
    }
    /* If not matching then skip to the next line. */
  }
  GC_ASSERT(p == end_of_content);
  /* No match is found. */
  return NULL;
}
#endif /* GC_READ_ENV_FILE */

GC_API int GC_CALL
GC_is_init_called(void)
{
  return (int)GC_is_initialized;
}

#if defined(GC_WIN32_THREADS) \
    && ((defined(MSWIN32) && !defined(CONSOLE_LOG)) || defined(MSWINCE))
GC_INNER CRITICAL_SECTION GC_write_cs;
#endif

#ifndef DONT_USE_ATEXIT
STATIC void
GC_exit_check(void)
{
  if (GC_find_leak && !GC_skip_collect_atexit) {
#  ifdef THREADS
    /*
     * Check that the thread executing at-exit functions is the same as
     * the one performed the GC initialization, otherwise the latter
     * thread might already be dead but still registered and this, as
     * a consequence, might cause a signal delivery fail when suspending
     * the threads on platforms that do not guarantee `ESRCH` returned
     * if the signal is not delivered.  It should also prevent
     * "Collecting from unknown thread" abort in `GC_push_all_stacks()`.
     */
    if (!GC_is_main_thread() || !GC_thread_is_registered())
      return;
#  endif
    GC_gcollect();
  }
}
#endif /* !DONT_USE_ATEXIT */

#if defined(UNIX_LIKE) && !defined(NO_DEBUGGING)
static void
looping_handler(int sig)
{
  GC_err_printf("Caught signal %d: looping in handler\n", sig);
  for (;;) {
    /* Empty. */
  }
}

static GC_bool installed_looping_handler = FALSE;

static void
maybe_install_looping_handler(void)
{
  /*
   * Install looping handler before the write fault handler,
   * so we handle write faults correctly.
   */
  if (!installed_looping_handler && GETENV("GC_LOOP_ON_ABORT") != NULL) {
    GC_set_and_save_fault_handler(looping_handler);
    installed_looping_handler = TRUE;
  }
}

#else /* !UNIX_LIKE */
#  define maybe_install_looping_handler()
#endif

#define GC_DEFAULT_STDERR_FD 2
#ifdef KOS
#  define GC_DEFAULT_STDOUT_FD GC_DEFAULT_STDERR_FD
#else
#  define GC_DEFAULT_STDOUT_FD 1
#endif

#if !defined(OS2) && !defined(GC_ANDROID_LOG) && !defined(NN_PLATFORM_CTR) \
    && !defined(NINTENDO_SWITCH)                                           \
    && (!defined(MSWIN32) || defined(CONSOLE_LOG)) && !defined(MSWINCE)
STATIC int GC_stdout = GC_DEFAULT_STDOUT_FD;
STATIC int GC_stderr = GC_DEFAULT_STDERR_FD;
STATIC int GC_log = GC_DEFAULT_STDERR_FD;

#  ifndef MSWIN32
GC_API void GC_CALL
GC_set_log_fd(int fd)
{
  GC_log = fd;
}
#  endif
#endif

#ifdef MSGBOX_ON_ERROR
STATIC void
GC_win32_MessageBoxA(const char *msg, const char *caption, unsigned flags)
{
#  ifndef DONT_USE_USER32_DLL
  /* Use static binding to `user32.dll` file. */
  (void)MessageBoxA(NULL, msg, caption, flags);
#  else
  /* This simplifies linking - resolve `MessageBoxA()` at run-time. */
  HINSTANCE hU32 = LoadLibrary(TEXT("user32.dll"));
  if (hU32) {
    FARPROC pfn = GetProcAddress(hU32, "MessageBoxA");
    if (pfn)
      (void)(*(int(WINAPI *)(HWND, LPCSTR, LPCSTR, UINT))(GC_funcptr_uint)pfn)(
          NULL /* `hWnd` */, msg, caption, flags);
    (void)FreeLibrary(hU32);
  }
#  endif
}
#endif /* MSGBOX_ON_ERROR */

#if defined(THREADS) && defined(UNIX_LIKE) && !defined(NO_GETCONTEXT)
static void
callee_saves_pushed_dummy_fn(ptr_t data, void *context)
{
  UNUSED_ARG(data);
  UNUSED_ARG(context);
}
#endif

#ifdef MANUAL_VDB
static GC_bool manual_vdb_allowed = TRUE;
#else
static GC_bool manual_vdb_allowed = FALSE;
#endif

GC_API void GC_CALL
GC_set_manual_vdb_allowed(int value)
{
  manual_vdb_allowed = (GC_bool)value;
}

GC_API int GC_CALL
GC_get_manual_vdb_allowed(void)
{
  return (int)manual_vdb_allowed;
}

GC_API unsigned GC_CALL
GC_get_supported_vdbs(void)
{
#ifdef GC_DISABLE_INCREMENTAL
  return GC_VDB_NONE;
#else
#  if defined(CPPCHECK)
  /* Workaround a warning about redundant `| 0`. */
  volatile unsigned zero = 0;
#  endif
  return
#  if defined(CPPCHECK)
      zero
#  else
      0
#  endif
#  ifndef NO_MANUAL_VDB
      | GC_VDB_MANUAL
#  endif
#  ifdef DEFAULT_VDB
      | GC_VDB_DEFAULT
#  endif
#  ifdef MPROTECT_VDB
      | GC_VDB_MPROTECT
#  endif
#  ifdef GWW_VDB
      | GC_VDB_GWW
#  endif
#  ifdef PROC_VDB
      | GC_VDB_PROC
#  endif
#  ifdef SOFT_VDB
      | GC_VDB_SOFT
#  endif
      ;
#endif
}

#ifndef GC_DISABLE_INCREMENTAL
static void
set_incremental_mode_on(void)
{
  GC_ASSERT(I_HOLD_LOCK());
#  ifndef NO_MANUAL_VDB
  if (manual_vdb_allowed) {
    GC_manual_vdb = TRUE;
    GC_incremental = TRUE;
  } else
#  endif
  /* else */ {
    /*
     * For `GWW_VDB` on Win32, this needs to happen before any heap memory
     * is allocated.
     */
    GC_incremental = GC_dirty_init();
  }
}
#endif /* !GC_DISABLE_INCREMENTAL */

STATIC word
GC_parse_mem_size_arg(const char *str)
{
  word result;
  char *endptr;
  char ch;

  if ('\0' == *str)
    return GC_WORD_MAX; /*< bad value */
  result = (word)STRTOULL(str, &endptr, 10);
  ch = *endptr;
  if (ch != '\0') {
    if (*(endptr + 1) != '\0')
      return GC_WORD_MAX;
    /* Allow "k", "M" or "G" suffix. */
    switch (ch) {
    case 'K':
    case 'k':
      result <<= 10;
      break;
#if CPP_WORDSZ >= 32
    case 'M':
    case 'm':
      result <<= 20;
      break;
    case 'G':
    case 'g':
      result <<= 30;
      break;
#endif
    default:
      result = GC_WORD_MAX;
    }
  }
  return result;
}

#define GC_LOG_STD_NAME "gc.log"

GC_API void GC_CALL
GC_init(void)
{
  word initial_heap_sz;
  IF_CANCEL(int cancel_state;)

  if (LIKELY(GC_is_initialized))
    return;
#ifdef REDIRECT_MALLOC
  {
    static GC_bool init_started = FALSE;

    if (init_started)
      ABORT("Redirected malloc() called during GC init");
    init_started = TRUE;
  }
#endif

#if defined(GC_INITIAL_HEAP_SIZE) && !defined(CPPCHECK)
  initial_heap_sz = GC_INITIAL_HEAP_SIZE;
#else
  initial_heap_sz = MINHINCR * HBLKSIZE;
#endif

  DISABLE_CANCEL(cancel_state);
  /*
   * Note that although we are nominally called with the allocator lock
   * held, now it is only really acquired once a second thread is created.
   * And the initialization code needs to run before then.  Thus we really
   * do not hold any locks, and can safely initialize them here.
   */
#ifdef THREADS
#  ifndef GC_ALWAYS_MULTITHREADED
  GC_ASSERT(!GC_need_to_lock);
#  endif
  {
#  if !defined(GC_BUILTIN_ATOMIC) && defined(HP_PA) \
      && (defined(USE_SPIN_LOCK) || defined(NEED_FAULT_HANDLER_LOCK))
    AO_TS_t ts_init = AO_TS_INITIALIZER;

    /* Arrays can only be initialized when declared. */
#    ifdef USE_SPIN_LOCK
    BCOPY(&ts_init, (/* no volatile */ void *)&GC_allocate_lock,
          sizeof(GC_allocate_lock));
#    endif
#    ifdef NEED_FAULT_HANDLER_LOCK
    BCOPY(&ts_init, (/* no volatile */ void *)&GC_fault_handler_lock,
          sizeof(GC_fault_handler_lock));
#    endif
#  else
#    ifdef USE_SPIN_LOCK
    GC_allocate_lock = AO_TS_INITIALIZER;
#    endif
#    ifdef NEED_FAULT_HANDLER_LOCK
    GC_fault_handler_lock = AO_TS_INITIALIZER;
#    endif
#  endif
  }
#  ifdef SN_TARGET_PS3
  {
    pthread_mutexattr_t mattr;

    if (pthread_mutexattr_init(&mattr) != 0)
      ABORT("pthread_mutexattr_init failed");
    if (pthread_mutex_init(&GC_allocate_ml, &mattr) != 0)
      ABORT("pthread_mutex_init failed");
    (void)pthread_mutexattr_destroy(&mattr);
  }
#  endif
#endif /* THREADS */
#if defined(GC_WIN32_THREADS) && !defined(GC_PTHREADS)
#  ifndef SPIN_COUNT
#    define SPIN_COUNT 4000
#  endif
#  ifdef USE_RWLOCK
  /* TODO: Probably use `SRWLOCK_INIT` instead. */
  InitializeSRWLock(&GC_allocate_ml);
#  elif defined(MSWINRT_FLAVOR)
  InitializeCriticalSectionAndSpinCount(&GC_allocate_ml, SPIN_COUNT);
#  else
  {
#    ifndef MSWINCE
    FARPROC pfn = 0;
    HMODULE hK32 = GetModuleHandle(TEXT("kernel32.dll"));
    if (hK32)
      pfn = GetProcAddress(hK32, "InitializeCriticalSectionAndSpinCount");
    if (pfn) {
      (*(BOOL(WINAPI *)(LPCRITICAL_SECTION, DWORD))(GC_funcptr_uint)pfn)(
          &GC_allocate_ml, SPIN_COUNT);
    } else
#    endif /* !MSWINCE */
      /* else */ InitializeCriticalSection(&GC_allocate_ml);
  }
#  endif
#endif /* GC_WIN32_THREADS && !GC_PTHREADS */
#if defined(GC_WIN32_THREADS) \
    && ((defined(MSWIN32) && !defined(CONSOLE_LOG)) || defined(MSWINCE))
  InitializeCriticalSection(&GC_write_cs);
#endif
#if defined(GC_ASSERTIONS) && defined(GC_ALWAYS_MULTITHREADED)
  /* Just to set `GC_lock_holder`. */
  LOCK();
#endif
#ifdef DYNAMIC_POINTER_MASK
  if (0 == GC_pointer_mask)
    GC_pointer_mask = GC_WORD_MAX;
#endif
  GC_setpagesize();
#ifdef MSWIN32
  GC_init_win32();
#endif
#ifdef GC_READ_ENV_FILE
  GC_envfile_init();
#endif
#if !defined(NO_CLOCK) || !defined(SMALL_CONFIG)
#  ifdef GC_PRINT_VERBOSE_STATS
  /*
   * This is useful for debugging and profiling on platforms with
   * missing `getenv()` (like WinCE).
   */
  GC_print_stats = VERBOSE;
#  else
  if (GETENV("GC_PRINT_VERBOSE_STATS") != NULL) {
    GC_print_stats = VERBOSE;
  } else if (GETENV("GC_PRINT_STATS") != NULL) {
    GC_print_stats = 1;
  }
#  endif
#endif
#if ((defined(UNIX_LIKE) && !defined(GC_ANDROID_LOG))                   \
     || (defined(CONSOLE_LOG) && defined(MSWIN32)) || defined(CYGWIN32) \
     || defined(SYMBIAN))                                               \
    && !defined(SMALL_CONFIG)
  {
    const char *fname = TRUSTED_STRING(GETENV("GC_LOG_FILE"));
#  ifdef GC_LOG_TO_FILE_ALWAYS
    if (NULL == fname)
      fname = GC_LOG_STD_NAME;
#  else
    if (fname != NULL)
#  endif
    {
#  if defined(_MSC_VER)
      int log_d = _open(fname, O_CREAT | O_WRONLY | O_APPEND);
#  else
      int log_d = open(fname, O_CREAT | O_WRONLY | O_APPEND, 0644);
#  endif
      if (log_d < 0) {
        GC_err_printf("Failed to open %s as log file\n", fname);
      } else {
        const char *str;
        GC_log = log_d;
        str = GETENV("GC_ONLY_LOG_TO_FILE");
#  ifdef GC_ONLY_LOG_TO_FILE
        /*
         * The similar environment variable set to "0"
         * overrides the effect of the macro defined.
         */
        if (str != NULL && str[0] == '0' && str[1] == '\0')
#  else
        /*
         * Otherwise setting the environment variable to anything other
         * than "0" will prevent from redirecting `stdout` and `stderr`
         * to the collector log file.
         */
        if (str == NULL || (str[0] == '0' && str[1] == '\0'))
#  endif
        {
          GC_stdout = log_d;
          GC_stderr = log_d;
        }
      }
    }
  }
#endif
#if !defined(NO_DEBUGGING) && !defined(GC_DUMP_REGULARLY)
  if (GETENV("GC_DUMP_REGULARLY") != NULL) {
    GC_dump_regularly = TRUE;
  }
#endif
#ifdef KEEP_BACK_PTRS
  {
    const char *str = GETENV("GC_BACKTRACES");

    if (str != NULL) {
      GC_backtraces = atol(str);
      if (str[0] == '\0')
        GC_backtraces = 1;
    }
  }
#endif
#ifndef NO_FIND_LEAK
  if (GETENV("GC_FIND_LEAK") != NULL) {
    GC_find_leak = 1;
  }
#  ifndef SHORT_DBG_HDRS
  if (GETENV("GC_FINDLEAK_DELAY_FREE") != NULL) {
    GC_findleak_delay_free = TRUE;
  }
#  endif
#endif
  if (GETENV("GC_ALL_INTERIOR_POINTERS") != NULL) {
    GC_all_interior_pointers = 1;
  }
  if (GETENV("GC_DONT_GC") != NULL) {
#if defined(LINT2) \
    && !(defined(GC_ASSERTIONS) && defined(GC_ALWAYS_MULTITHREADED))
    GC_disable();
#else
    GC_dont_gc = 1;
#endif
  }
#if !defined(SMALL_CONFIG) && !defined(GC_PRINT_BACK_HEIGHT)
  if (GETENV("GC_PRINT_BACK_HEIGHT") != NULL) {
#  ifdef MAKE_BACK_GRAPH
    GC_print_back_height = TRUE;
#  else
    GC_err_printf("Back height is not available!\n");
#  endif
  }
#endif
  {
    const char *str = GETENV("GC_TRACE");

    if (str != NULL) {
#ifndef ENABLE_TRACE
      WARN("Tracing not enabled: Ignoring GC_TRACE value\n", 0);
#else
      ptr_t p = MAKE_CPTR(STRTOULL(str, NULL, 16));

      if (ADDR(p) < 0x1000)
        WARN("Unlikely trace address: %p\n", p);
      GC_trace_ptr = p;
#endif
    }
  }
#ifdef GC_COLLECT_AT_MALLOC
  {
    const char *str = GETENV("GC_COLLECT_AT_MALLOC");

    if (str != NULL) {
      size_t min_lb = (size_t)STRTOULL(str, NULL, 10);

      if (min_lb > 0)
        GC_dbg_collect_at_malloc_min_lb = min_lb;
    }
  }
#endif
#if !defined(GC_DISABLE_INCREMENTAL) && !defined(NO_CLOCK)
  {
    const char *str = GETENV("GC_PAUSE_TIME_TARGET");

    if (str != NULL) {
      long time_limit = atol(str);

      if (time_limit > 0) {
        GC_time_limit = (unsigned long)time_limit;
      }
    }
  }
#endif
#ifndef SMALL_CONFIG
  {
    const char *str = GETENV("GC_FULL_FREQUENCY");

    if (str != NULL) {
      int full_freq = atoi(str);

      if (full_freq > 0)
        GC_full_freq = full_freq;
    }
  }
#endif
#ifndef NO_BLACK_LISTING
  {
    char const *str = GETENV("GC_LARGE_ALLOC_WARN_INTERVAL");

    if (str != NULL) {
      long interval = atol(str);

      if (interval <= 0) {
        WARN("GC_LARGE_ALLOC_WARN_INTERVAL environment variable has"
             " bad value - ignoring\n",
             0);
      } else {
        GC_large_alloc_warn_interval = interval;
      }
    }
  }
#endif
  {
    const char *str = GETENV("GC_FREE_SPACE_DIVISOR");

    if (str != NULL) {
      int space_divisor = atoi(str);

      if (space_divisor > 0)
        GC_free_space_divisor = (unsigned)space_divisor;
    }
  }
#ifdef USE_MUNMAP
  {
    const char *str = GETENV("GC_UNMAP_THRESHOLD");

    if (str != NULL) {
      if (str[0] == '0' && str[1] == '\0') {
        /* "0" is used to disable unmapping. */
        GC_unmap_threshold = 0;
      } else {
        int unmap_threshold = atoi(str);

        if (unmap_threshold > 0)
          GC_unmap_threshold = (unsigned)unmap_threshold;
      }
    }
  }
  {
    const char *str = GETENV("GC_FORCE_UNMAP_ON_GCOLLECT");

    if (str != NULL) {
      if (str[0] == '0' && str[1] == '\0') {
        /* "0" is used to turn off the mode. */
        GC_force_unmap_on_gcollect = FALSE;
      } else {
        GC_force_unmap_on_gcollect = TRUE;
      }
    }
  }
  {
    const char *str = GETENV("GC_USE_ENTIRE_HEAP");

    if (str != NULL) {
      if (str[0] == '0' && str[1] == '\0') {
        /* "0" is used to turn off the mode. */
        GC_use_entire_heap = FALSE;
      } else {
        GC_use_entire_heap = TRUE;
      }
    }
  }
#endif
#if !defined(NO_DEBUGGING) && !defined(NO_CLOCK)
  GET_TIME(GC_init_time);
#endif
  maybe_install_looping_handler();
#if ALIGNMENT > GC_DS_TAGS
  /* Adjust normal object descriptor for extra allocation. */
  if (EXTRA_BYTES != 0)
    GC_obj_kinds[NORMAL].ok_descriptor
        = ((~(word)ALIGNMENT) + 1) | GC_DS_LENGTH;
#endif
  GC_exclude_static_roots_inner(beginGC_arrays, endGC_arrays);
  GC_exclude_static_roots_inner(beginGC_obj_kinds, endGC_obj_kinds);
#if defined(USE_PROC_FOR_LIBRARIES) && defined(LINUX) && defined(THREADS)
  /*
   * TODO: `USE_PROC_FOR_LIBRARIES` with LinuxThreads performs poorly!
   * If thread stacks are cached, they tend to be scanned in entirety
   * as part of the root set.  This will grow them to maximum size, and
   * is generally not desirable.
   */
#endif
#if !defined(THREADS) || !(defined(SN_TARGET_PS3) || defined(SN_TARGET_PSP2))
  if (NULL == GC_stackbottom) {
    GC_stackbottom = GC_get_main_stack_base();
#  if (defined(LINUX) || defined(HPUX)) && defined(IA64)
    GC_register_stackbottom = GC_get_register_stack_base();
#  endif
  } else {
#  if (defined(LINUX) || defined(HPUX)) && defined(IA64)
    if (NULL == GC_register_stackbottom) {
      WARN("GC_register_stackbottom should be set with GC_stackbottom\n", 0);
      /*
       * The following may fail, since we may rely on alignment properties
       * that may not hold with `GC_stackbottom` value set by client.
       */
      GC_register_stackbottom = GC_get_register_stack_base();
    }
#  endif
  }
#endif
#if !defined(CPPCHECK)
  GC_STATIC_ASSERT(sizeof(size_t) <= sizeof(ptrdiff_t));
#  ifdef AO_HAVE_store
  /*
   * As of now, `hb_descr`, `mse_descr` and `hb_marks[i]` might be treated
   * as variables of `word` type but might be accessed atomically.
   */
  GC_STATIC_ASSERT(sizeof(AO_t) == sizeof(word));
#  endif
  GC_STATIC_ASSERT(sizeof(ptrdiff_t) == sizeof(word));
  GC_STATIC_ASSERT(sizeof(GC_signed_word) == sizeof(word));
  GC_STATIC_ASSERT(sizeof(word) * 8 == CPP_WORDSZ);
  GC_STATIC_ASSERT(sizeof(ptr_t) * 8 == CPP_PTRSZ);
  GC_STATIC_ASSERT(sizeof(ptr_t) == sizeof(GC_uintptr_t));
  GC_STATIC_ASSERT(sizeof(GC_oom_func) == sizeof(GC_funcptr_uint));
#  ifdef FUNCPTR_IS_DATAPTR
  GC_STATIC_ASSERT(sizeof(ptr_t) == sizeof(GC_funcptr_uint));
#  endif
  GC_STATIC_ASSERT((word)(-1) > (word)0); /*< `word` should be unsigned */
  /*
   * We no longer check for `(void *)-1 > NULL` since all pointers
   * are explicitly cast to `word` in every less/greater comparison.
   */
  GC_STATIC_ASSERT((GC_signed_word)(-1) < (GC_signed_word)0);
#endif
  GC_STATIC_ASSERT(sizeof(struct hblk) == HBLKSIZE);
#ifndef THREADS
  GC_ASSERT(!HOTTER_THAN(GC_stackbottom, GC_approx_sp()));
#endif
  GC_init_headers();
#ifdef SEARCH_FOR_DATA_START
  /*
   * For `MPROTECT_VDB`, the temporary fault handler should be installed
   * first, before the write fault one in `GC_dirty_init`.
   */
  if (GC_REGISTER_MAIN_STATIC_DATA())
    GC_init_linux_data_start();
#endif
#ifndef GC_DISABLE_INCREMENTAL
  if (GC_incremental || GETENV("GC_ENABLE_INCREMENTAL") != NULL) {
    set_incremental_mode_on();
    GC_ASSERT(0 == GC_bytes_allocd);
  }
#endif

  /*
   * Add the initial guess of root sets.  Do this first, since `sbrk(0)`
   * might be used.
   */
  if (GC_REGISTER_MAIN_STATIC_DATA())
    GC_register_data_segments();

  GC_bl_init();
  GC_mark_init();
  {
    const char *str = GETENV("GC_INITIAL_HEAP_SIZE");

    if (str != NULL) {
      word value = GC_parse_mem_size_arg(str);

      if (GC_WORD_MAX == value) {
        WARN("Bad initial heap size %s - ignoring\n", str);
      } else {
        initial_heap_sz = value;
      }
    }
  }
  {
    const char *str = GETENV("GC_MAXIMUM_HEAP_SIZE");

    if (str != NULL) {
      word max_heap_sz = GC_parse_mem_size_arg(str);

      if (max_heap_sz < initial_heap_sz || GC_WORD_MAX == max_heap_sz) {
        WARN("Bad maximum heap size %s - ignoring\n", str);
      } else {
        if (0 == GC_max_retries)
          GC_max_retries = 2;
        GC_set_max_heap_size(max_heap_sz);
      }
    }
  }
  if (initial_heap_sz != 0) {
    if (!GC_expand_hp_inner(divHBLKSZ(initial_heap_sz))) {
      GC_err_printf("Can't start up: not enough memory\n");
      EXIT();
    } else {
      GC_requested_heapsize += initial_heap_sz;
    }
  }
  if (GC_all_interior_pointers)
    GC_initialize_offsets();
  GC_register_displacement_inner(0);
#ifdef REDIR_MALLOC_AND_LINUXTHREADS
  if (!GC_all_interior_pointers) {
    /* TLS ABI uses "pointer-sized" offsets for `dtv`. */
    GC_register_displacement_inner(sizeof(void *));
  }
#endif
  GC_init_size_map();
  GC_is_initialized = TRUE;
#ifdef THREADS
#  if defined(LINT2) \
      && !(defined(GC_ASSERTIONS) && defined(GC_ALWAYS_MULTITHREADED))
  LOCK();
  GC_thr_init();
  UNLOCK();
#  else
  GC_thr_init();
#  endif
#endif
  COND_DUMP;
  /* Get black list set up and/or the incremental GC started. */
  if (!GC_dont_precollect || GC_incremental) {
#if defined(DYNAMIC_LOADING) && defined(DARWIN)
    GC_ASSERT(0 == GC_bytes_allocd);
#endif
    GC_gcollect_inner();
  }
#if defined(GC_ASSERTIONS) && defined(GC_ALWAYS_MULTITHREADED)
  UNLOCK();
#endif
#if defined(THREADS) && defined(UNIX_LIKE) && !defined(NO_GETCONTEXT)
  /* Ensure `getcontext_works` is set to avoid potential data race. */
  if (GC_dont_gc || GC_dont_precollect)
    GC_with_callee_saves_pushed(callee_saves_pushed_dummy_fn, NULL);
#endif
#ifndef DONT_USE_ATEXIT
  if (GC_find_leak) {
    /*
     * This is to give us at least one chance to detect leaks.
     * This may report some very benign leaks, but...
     */
    atexit(GC_exit_check);
  }
#endif
  /*
   * The rest of this again assumes we do not really hold the allocator
   * lock.
   */

#ifdef THREADS
  /* Initialize thread-local allocation. */
  GC_init_parallel();
#endif

#if defined(DYNAMIC_LOADING) && defined(DARWIN)
  /*
   * This must be called *without* the allocator lock held and before
   * any threads are created.
   */
  GC_init_dyld();
#endif
  RESTORE_CANCEL(cancel_state);
  /*
   * It is not safe to allocate any object till completion of `GC_init`
   * (in particular by `GC_thr_init`), i.e. before `GC_init_dyld()` call
   * and initialization of the incremental mode (if any).
   */
#if defined(GWW_VDB) && !defined(KEEP_BACK_PTRS)
  GC_ASSERT(GC_bytes_allocd + GC_bytes_allocd_before_gc == 0);
#endif
}

GC_API void GC_CALL
GC_enable_incremental(void)
{
#if !defined(GC_DISABLE_INCREMENTAL) && !defined(KEEP_BACK_PTRS)
  /*
   * If we are keeping back pointers, the collector itself dirties all pages
   * on which objects have been marked, making the incremental collection
   * pointless.
   */
  if (!GC_find_leak_inner && NULL == GETENV("GC_DISABLE_INCREMENTAL")) {
    LOCK();
    if (!GC_incremental) {
      GC_setpagesize();
      /* TODO: Should we skip enabling incremental if win32s? */

      /* Install the looping handler before write fault handler! */
      maybe_install_looping_handler();
      if (!GC_is_initialized) {
        /* Indicate the intention to turn it on. */
        GC_incremental = TRUE;
        UNLOCK();
        GC_init();
        LOCK();
      } else {
        set_incremental_mode_on();
      }
      /* Cannot easily do it if `GC_dont_gc`. */
      if (GC_incremental && !GC_dont_gc) {
        IF_CANCEL(int cancel_state;)

        DISABLE_CANCEL(cancel_state);
        if (GC_bytes_allocd > 0) {
          /* There may be unmarked reachable objects. */
          GC_gcollect_inner();
        } else {
          /*
           * We are OK in assuming everything is clean since nothing can
           * point to an unmarked object.
           */
#  ifdef CHECKSUMS
          GC_read_dirty(FALSE);
#  else
          GC_read_dirty(TRUE);
#  endif
        }
        RESTORE_CANCEL(cancel_state);
      }
    }
    UNLOCK();
    return;
  }
#endif
  GC_init();
}

GC_API void GC_CALL
GC_start_mark_threads(void)
{
#ifdef PARALLEL_MARK
  IF_CANCEL(int cancel_state;)

  DISABLE_CANCEL(cancel_state);
  LOCK();
  GC_start_mark_threads_inner();
  UNLOCK();
  RESTORE_CANCEL(cancel_state);
#else
  /* No action since parallel markers are disabled (or no POSIX `fork`). */
  GC_ASSERT(I_DONT_HOLD_LOCK());
#endif
}

#ifndef GC_NO_DEINIT
GC_API void GC_CALL
GC_deinit(void)
{
  /* Prevent duplicate resource close. */
  if (!GC_is_initialized)
    return;

  BZERO(&GC_arrays, sizeof(GC_arrays)); /*< clears GC_is_initialized */
  GC_gc_no = 0;
  GC_dont_gc = FALSE;
  GC_non_gc_bytes = 0;
  GC_reset_obj_kinds();
  GC_n_mark_procs = GC_RESERVED_MARK_PROCS;
  GC_reset_freelist();
#  ifdef CHECKSUMS
  GC_reset_check_page();
#  endif
#  ifdef THREADS
  GC_reset_threads();
#  endif
#  ifdef THREAD_LOCAL_ALLOC
  GC_reset_thread_local_initialization();
#  endif
#  if defined(GC_WIN32_THREADS) && (defined(MSWIN32) || defined(MSWINCE))
#    if !defined(CONSOLE_LOG) || defined(MSWINCE)
  DeleteCriticalSection(&GC_write_cs);
#    endif
#    if !defined(GC_PTHREADS) && !defined(USE_RWLOCK)
  DeleteCriticalSection(&GC_allocate_ml);
#    endif
#  endif
}
#endif

#if (defined(MSWIN32) && !defined(CONSOLE_LOG)) || defined(MSWINCE)

STATIC HANDLE GC_log = 0;

#  ifdef THREADS
#    if defined(PARALLEL_MARK) && !defined(GC_ALWAYS_MULTITHREADED)
#      define IF_NEED_TO_LOCK(x)            \
        if (GC_parallel || GC_need_to_lock) \
        x
#    else
#      define IF_NEED_TO_LOCK(x) \
        if (GC_need_to_lock)     \
        x
#    endif
#  else
#    define IF_NEED_TO_LOCK(x)
#  endif /* !THREADS */

#  ifdef MSWINRT_FLAVOR
#    include <windows.storage.h>

/*
 * This API function is defined in platform `roapi.h` file, but we cannot
 * include it here since it does not compile in C.
 */
DECLSPEC_IMPORT HRESULT WINAPI
RoGetActivationFactory(HSTRING activatableClassId, REFIID iid, void **factory);

static GC_bool
getWinRTLogPath(wchar_t *buf, size_t bufLen)
{
  static const GUID kIID_IApplicationDataStatics
      = { 0x5612147B, 0xE843, 0x45E3, 0x94, 0xD8, 0x06,
          0x16,       0x9E,   0x3C,   0x8E, 0x17 };
  static const GUID kIID_IStorageItem
      = { 0x4207A996, 0xCA2F, 0x42F7, 0xBD, 0xE8, 0x8B,
          0x10,       0x45,   0x7A,   0x7F, 0x30 };
  GC_bool result = FALSE;
  HSTRING_HEADER appDataClassNameHeader;
  HSTRING appDataClassName;
  __x_ABI_CWindows_CStorage_CIApplicationDataStatics *appDataStatics = 0;

  GC_ASSERT(bufLen > 0);
  if (SUCCEEDED(WindowsCreateStringReference(
          RuntimeClass_Windows_Storage_ApplicationData,
          (sizeof(RuntimeClass_Windows_Storage_ApplicationData) - 1)
              / sizeof(wchar_t),
          &appDataClassNameHeader, &appDataClassName))
      && SUCCEEDED(RoGetActivationFactory(
          appDataClassName, &kIID_IApplicationDataStatics, &appDataStatics))) {
    __x_ABI_CWindows_CStorage_CIApplicationData *appData = NULL;
    __x_ABI_CWindows_CStorage_CIStorageFolder *tempFolder = NULL;
    __x_ABI_CWindows_CStorage_CIStorageItem *tempFolderItem = NULL;
    HSTRING tempPath = NULL;

    if (SUCCEEDED(
            appDataStatics->lpVtbl->get_Current(appDataStatics, &appData))
        && SUCCEEDED(
            appData->lpVtbl->get_TemporaryFolder(appData, &tempFolder))
        && SUCCEEDED(tempFolder->lpVtbl->QueryInterface(
            tempFolder, &kIID_IStorageItem, &tempFolderItem))
        && SUCCEEDED(
            tempFolderItem->lpVtbl->get_Path(tempFolderItem, &tempPath))) {
      UINT32 tempPathLen;
      const wchar_t *tempPathBuf
          = WindowsGetStringRawBuffer(tempPath, &tempPathLen);

      buf[0] = '\0';
      if (wcsncat_s(buf, bufLen, tempPathBuf, tempPathLen) == 0
          && wcscat_s(buf, bufLen, L"\\") == 0
          && wcscat_s(buf, bufLen, TEXT(GC_LOG_STD_NAME)) == 0)
        result = TRUE;
      WindowsDeleteString(tempPath);
    }

    if (tempFolderItem != NULL)
      tempFolderItem->lpVtbl->Release(tempFolderItem);
    if (tempFolder != NULL)
      tempFolder->lpVtbl->Release(tempFolder);
    if (appData != NULL)
      appData->lpVtbl->Release(appData);
    appDataStatics->lpVtbl->Release(appDataStatics);
  }
  return result;
}
#  endif /* MSWINRT_FLAVOR */

STATIC HANDLE
GC_CreateLogFile(void)
{
  HANDLE hFile;
#  ifdef MSWINRT_FLAVOR
  TCHAR pathBuf[_MAX_PATH + 0x10]; /*< buffer for file path plus extension */

  hFile = INVALID_HANDLE_VALUE;
  if (getWinRTLogPath(pathBuf, _MAX_PATH + 1)) {
    CREATEFILE2_EXTENDED_PARAMETERS extParams;

    BZERO(&extParams, sizeof(extParams));
    extParams.dwSize = sizeof(extParams);
    extParams.dwFileAttributes = FILE_ATTRIBUTE_NORMAL;
    extParams.dwFileFlags
        = GC_print_stats == VERBOSE ? 0 : FILE_FLAG_WRITE_THROUGH;
    hFile = CreateFile2(pathBuf, GENERIC_WRITE, FILE_SHARE_READ, CREATE_ALWAYS,
                        &extParams);
  }

#  else
  TCHAR *logPath;
#    if defined(NO_GETENV_WIN32) && defined(CPPCHECK)
#      define appendToFile FALSE
#    else
  BOOL appendToFile = FALSE;
#    endif
#    if !defined(NO_GETENV_WIN32) || !defined(OLD_WIN32_LOG_FILE)
  TCHAR pathBuf[_MAX_PATH + 0x10]; /*< buffer for file path plus extension */

  logPath = pathBuf;
#    endif

  /* Use `GetEnvironmentVariable` instead of `GETENV` for Unicode support. */
#    ifndef NO_GETENV_WIN32
  if (GetEnvironmentVariable(TEXT("GC_LOG_FILE"), pathBuf, _MAX_PATH + 1) - 1U
      < (DWORD)_MAX_PATH) {
    appendToFile = TRUE;
  } else
#    endif
  /* else */ {
    /* Environment var not found or its value too long. */
#    ifdef OLD_WIN32_LOG_FILE
    logPath = TEXT(GC_LOG_STD_NAME);
#    else
    int len
        = (int)GetModuleFileName(NULL /* `hModule` */, pathBuf, _MAX_PATH + 1);
    /* If `GetModuleFileName()` has failed, then len is 0. */
    if (len > 4 && pathBuf[len - 4] == (TCHAR)'.') {
      /* Strip the executable file extension. */
      len -= 4;
    }
    BCOPY(TEXT(".") TEXT(GC_LOG_STD_NAME), &pathBuf[len],
          sizeof(TEXT(".") TEXT(GC_LOG_STD_NAME)));
#    endif
  }

  hFile = CreateFile(logPath, GENERIC_WRITE, FILE_SHARE_READ,
                     NULL /* `lpSecurityAttributes` */,
                     appendToFile ? OPEN_ALWAYS : CREATE_ALWAYS,
                     GC_print_stats == VERBOSE
                         ? FILE_ATTRIBUTE_NORMAL
                         :
                         /* immediately flush writes unless very verbose */
                         FILE_ATTRIBUTE_NORMAL | FILE_FLAG_WRITE_THROUGH,
                     NULL /* `hTemplateFile` */);

#    ifndef NO_GETENV_WIN32
  if (appendToFile && hFile != INVALID_HANDLE_VALUE) {
    LONG posHigh = 0;
    /* Seek to the file end (ignoring any error). */
    (void)SetFilePointer(hFile, 0, &posHigh, FILE_END);
  }
#    endif
#    undef appendToFile
#  endif
  return hFile;
}

STATIC int
GC_write(const char *buf, size_t len)
{
  BOOL res;
  DWORD written;
#  if defined(THREADS) && defined(GC_ASSERTIONS)
  /* This is to prevent infinite recursion at abort. */
  static GC_bool inside_write = FALSE;

  if (inside_write)
    return -1;
#  endif

  if (0 == len)
    return 0;
  IF_NEED_TO_LOCK(EnterCriticalSection(&GC_write_cs));
#  if defined(THREADS) && defined(GC_ASSERTIONS)
  if (GC_write_disabled) {
    inside_write = TRUE;
    ABORT("Assertion failure: GC_write called with write_disabled");
  }
#  endif
  if (0 == GC_log) {
    GC_log = GC_CreateLogFile();
  }
  if (GC_log == INVALID_HANDLE_VALUE) {
    IF_NEED_TO_LOCK(LeaveCriticalSection(&GC_write_cs));
#  ifdef NO_DEBUGGING
    /*
     * Ignore open log failure (e.g., it might be caused by read-only folder
     * of the client application).
     */
    return 0;
#  else
    return -1;
#  endif
  }
  res = WriteFile(GC_log, buf, (DWORD)len, &written, NULL);
#  if defined(_MSC_VER) && defined(_DEBUG) && !defined(NO_CRT) \
      && !defined(NO_CRTDBGREPORT)
#    ifdef MSWINCE
  /* There is no `CrtDbgReport()` in WinCE. */
  {
    WCHAR wbuf[1024];

    /* Always use Unicode variant of `OutputDebugString()`. */
    wbuf[MultiByteToWideChar(CP_ACP, 0 /* `dwFlags` */, buf, len, wbuf,
                             sizeof(wbuf) / sizeof(wbuf[0]) - 1)]
        = 0;
    OutputDebugStringW(wbuf);
  }
#    else
  _CrtDbgReport(_CRT_WARN, NULL, 0, NULL, "%.*s", len, buf);
#    endif
#  endif
  IF_NEED_TO_LOCK(LeaveCriticalSection(&GC_write_cs));
  return res ? (int)written : -1;
}

/* TODO: This is pretty ugly... */
#  define WRITE(f, buf, len) GC_write(buf, len)

#elif defined(OS2)
STATIC FILE *GC_stdout = NULL;
STATIC FILE *GC_stderr = NULL;
STATIC FILE *GC_log = NULL;

/* Initialize `GC_log` (and the friends) passed to `GC_write()`. */
STATIC void
GC_set_files(void)
{
  if (GC_stdout == NULL) {
    GC_stdout = stdout;
  }
  if (GC_stderr == NULL) {
    GC_stderr = stderr;
  }
  if (GC_log == NULL) {
    GC_log = stderr;
  }
}

GC_INLINE int
GC_write(FILE *f, const char *buf, size_t len)
{
  int res = fwrite(buf, 1, len, f);
  fflush(f);
  return res;
}

#  define WRITE(f, buf, len) (GC_set_files(), GC_write(f, buf, len))

#elif defined(GC_ANDROID_LOG)

#  include <android/log.h>

#  ifndef GC_ANDROID_LOG_TAG
#    define GC_ANDROID_LOG_TAG "BDWGC"
#  endif

#  define GC_stdout ANDROID_LOG_DEBUG
#  define GC_stderr ANDROID_LOG_ERROR
#  define GC_log GC_stdout

#  define WRITE(level, buf, unused_len) \
    __android_log_write(level, GC_ANDROID_LOG_TAG, buf)

#elif defined(NN_PLATFORM_CTR)
int n3ds_log_write(const char *text, int length);
#  define WRITE(level, buf, len) n3ds_log_write(buf, len)

#elif defined(NINTENDO_SWITCH)
int switch_log_write(const char *text, int length);
#  define WRITE(level, buf, len) switch_log_write(buf, len)

#else

#  if !defined(ECOS) && !defined(NOSYS) && !defined(PLATFORM_WRITE) \
      && !defined(SN_TARGET_PSP2)
#    include <errno.h>
#  endif

STATIC int
GC_write(int fd, const char *buf, size_t len)
{
#  if defined(ECOS) || defined(PLATFORM_WRITE) || defined(SN_TARGET_PSP2) \
      || defined(NOSYS)
  UNUSED_ARG(fd);
#    ifdef ECOS
  /* FIXME: This seems to be defined nowhere at present. */
  /* `_Jv_diag_write(buf, len);` */
#    else
  /* No writing. */
#    endif
  UNUSED_ARG(buf);
  return (int)len;
#  else
  size_t bytes_written = 0;
  IF_CANCEL(int cancel_state;)

  DISABLE_CANCEL(cancel_state);
  while (bytes_written < len) {
    int result;

#    if defined(SOLARIS) && defined(THREADS)
    result = syscall(SYS_write, fd, buf + bytes_written, len - bytes_written);
#    elif defined(_MSC_VER)
    result = _write(fd, buf + bytes_written, (unsigned)(len - bytes_written));
#    else
    result = (int)write(fd, buf + bytes_written, len - bytes_written);
#    endif
    if (result < 0) {
      if (EAGAIN == errno) {
        /* Resource is temporarily unavailable. */
        continue;
      }
      RESTORE_CANCEL(cancel_state);
      return -1;
    }
#    ifdef LINT2
    if ((unsigned)result > len - bytes_written)
      ABORT("write() result cannot be bigger than requested length");
#    endif
    bytes_written += (unsigned)result;
  }
  RESTORE_CANCEL(cancel_state);
  return (int)bytes_written;
#  endif
}

#  define WRITE(f, buf, len) GC_write(f, buf, len)
#endif /* !MSWINCE && !OS2 && !GC_ANDROID_LOG */

#ifndef GC_DISABLE_SNPRINTF
#  define BUFSZ 1024

#  if defined(DJGPP) || defined(__STRICT_ANSI__)
/* `vsnprintf` is missing in DJGPP (v2.0.3). */
#    define GC_VSNPRINTF(buf, bufsz, format, args) vsprintf(buf, format, args)
#  elif defined(_MSC_VER)
#    ifdef MSWINCE
/* `_vsnprintf` is deprecated in WinCE. */
#      define GC_VSNPRINTF StringCchVPrintfA
#    else
#      define GC_VSNPRINTF _vsnprintf
#    endif
#  else
#    define GC_VSNPRINTF vsnprintf
#  endif

/*
 * A variant of `printf` that is unlikely to call `malloc`, and is thus
 * safer to call from the collector in case `malloc` has been bound to
 * `GC_malloc`.  Floating-point arguments and formats should be avoided,
 * since the conversion is more likely to allocate memory.
 * Assumes that no more than `BUFSZ - 1` characters are written at once.
 */
#  define GC_PRINTF_FILLBUF(buf, format)                      \
    do {                                                      \
      va_list args;                                           \
      va_start(args, format);                                 \
      (buf)[sizeof(buf) - 1] = 0x15; /*< guard */             \
      (void)GC_VSNPRINTF(buf, sizeof(buf) - 1, format, args); \
      va_end(args);                                           \
      if ((buf)[sizeof(buf) - 1] != 0x15)                     \
        ABORT("GC_printf clobbered stack");                   \
    } while (0)

#  define DECL_BUF_AND_PRINTF_TO(buf, format) \
    char buf[BUFSZ + 1];                      \
    GC_PRINTF_FILLBUF(buf, format)
#else
/*
 * At most, when `vsnprintf()` is unavailable, we could only print the
 * format string as is, not handling the format specifiers (if any), thus
 * skipping the rest of the `printf` arguments.
 */
#  define DECL_BUF_AND_PRINTF_TO(buf, format) const char *buf = (format)
#endif /* GC_DISABLE_SNPRINTF */

void
GC_printf(const char *format, ...)
{
  if (!GC_quiet) {
    DECL_BUF_AND_PRINTF_TO(buf, format);
#ifdef NACL
    (void)WRITE(GC_stdout, buf, strlen(buf));
    /* Ignore errors silently. */
#else
    if (WRITE(GC_stdout, buf, strlen(buf)) < 0
#  if defined(CYGWIN32) || (defined(CONSOLE_LOG) && defined(MSWIN32))
        && GC_stdout != GC_DEFAULT_STDOUT_FD
#  endif
    ) {
      ABORT("write to stdout failed");
    }
#endif
  }
}

void
GC_err_printf(const char *format, ...)
{
  DECL_BUF_AND_PRINTF_TO(buf, format);
  GC_err_puts(buf);
}

void
GC_log_printf(const char *format, ...)
{
  DECL_BUF_AND_PRINTF_TO(buf, format);
#ifdef NACL
  (void)WRITE(GC_log, buf, strlen(buf));
#else
  if (WRITE(GC_log, buf, strlen(buf)) < 0
#  if defined(CYGWIN32) || (defined(CONSOLE_LOG) && defined(MSWIN32))
      && GC_log != GC_DEFAULT_STDERR_FD
#  endif
  ) {
    ABORT("write to GC log failed");
  }
#endif
}

#ifndef GC_ANDROID_LOG

#  define GC_warn_printf GC_err_printf

#else

GC_INNER void
GC_info_log_printf(const char *format, ...)
{
  DECL_BUF_AND_PRINTF_TO(buf, format);
  (void)WRITE(ANDROID_LOG_INFO, buf, 0 /* unused */);
}

GC_INNER void
GC_verbose_log_printf(const char *format, ...)
{
  DECL_BUF_AND_PRINTF_TO(buf, format);
  /* Note: write errors are ignored. */
  (void)WRITE(ANDROID_LOG_VERBOSE, buf, 0);
}

STATIC void
GC_warn_printf(const char *format, ...)
{
  DECL_BUF_AND_PRINTF_TO(buf, format);
  (void)WRITE(ANDROID_LOG_WARN, buf, 0);
}

#endif /* GC_ANDROID_LOG */

void
GC_err_puts(const char *s)
{
  /* Note: write errors are ignored. */
  (void)WRITE(GC_stderr, s, strlen(s));
}

STATIC void GC_CALLBACK
GC_default_warn_proc(const char *msg, GC_uintptr_t arg)
{
  /* TODO: Add assertion on argument to comply with `msg` (format). */
  GC_warn_printf(msg, arg);
}

GC_INNER GC_warn_proc GC_current_warn_proc = GC_default_warn_proc;

GC_API void GC_CALLBACK
GC_ignore_warn_proc(const char *msg, GC_uintptr_t arg)
{
  if (GC_print_stats) {
    /* Do not ignore warnings if stats printing is on. */
    GC_default_warn_proc(msg, arg);
  }
}

GC_API void GC_CALL
GC_set_warn_proc(GC_warn_proc p)
{
  GC_ASSERT(NONNULL_ARG_NOT_NULL(p));
  LOCK();
  GC_current_warn_proc = p;
  UNLOCK();
}

GC_API GC_warn_proc GC_CALL
GC_get_warn_proc(void)
{
  GC_warn_proc result;

  READER_LOCK();
  result = GC_current_warn_proc;
  READER_UNLOCK();
  return result;
}

/*
 * Print (or display) a message before abnormal exit (including abort).
 * Invoked from `ABORT(msg)` macro (where `msg` is non-`NULL`) and from
 * `EXIT()` macro (`msg` is `NULL` in that case).
 */
STATIC void GC_CALLBACK
GC_default_on_abort(const char *msg)
{
#if !defined(SMALL_CONFIG)
#  ifndef DONT_USE_ATEXIT
  /* Disable at-exit garbage collection. */
  GC_skip_collect_atexit = TRUE;
#  endif

  if (msg != NULL) {
#  ifdef MSGBOX_ON_ERROR
    GC_win32_MessageBoxA(msg, "Fatal error in GC", MB_ICONERROR | MB_OK);
    /* Also duplicate `msg` to the collector log file. */
#  endif

#  ifndef GC_ANDROID_LOG
    /*
     * Avoid calling `GC_err_printf()` here, as `GC_on_abort()` could
     * be called from it.  Note 1: this is not an atomic output.
     * Note 2: possible write errors are ignored.
     */
#    if defined(GC_WIN32_THREADS) && defined(GC_ASSERTIONS) \
        && ((defined(MSWIN32) && !defined(CONSOLE_LOG)) || defined(MSWINCE))
    if (!GC_write_disabled)
#    endif
    {
      if (WRITE(GC_stderr, msg, strlen(msg)) >= 0)
        (void)WRITE(GC_stderr, "\n", 1);
    }
#  else
    __android_log_assert("*" /* `cond` */, GC_ANDROID_LOG_TAG, "%s\n", msg);
#  endif
#  if defined(HAIKU) && !defined(DONT_CALL_DEBUGGER)
    /*
     * This will cause the crash reason to appear in any debug reports
     * generated (by the default system application crash dialog).
     */
    debugger(msg);
#  endif
  }

#  if !defined(NO_DEBUGGING) && !defined(GC_ANDROID_LOG)
  if (GETENV("GC_LOOP_ON_ABORT") != NULL) {
    /*
     * In many cases it is easier to debug a running process.
     * It is arguably nicer to sleep, but that makes it harder to look
     * at the thread if the debugger does not know much about threads.
     */
    for (;;) {
      /* Empty. */
    }
  }
#  endif
#else
  UNUSED_ARG(msg);
#endif
}

#ifndef SMALL_CONFIG
GC_abort_func GC_on_abort = GC_default_on_abort;
#endif

GC_API void GC_CALL
GC_set_abort_func(GC_abort_func fn)
{
  GC_ASSERT(NONNULL_ARG_NOT_NULL(fn));
  LOCK();
#ifndef SMALL_CONFIG
  GC_on_abort = fn;
#else
  UNUSED_ARG(fn);
#endif
  UNLOCK();
}

GC_API GC_abort_func GC_CALL
GC_get_abort_func(void)
{
  GC_abort_func fn;

  READER_LOCK();
#ifndef SMALL_CONFIG
  fn = GC_on_abort;
  GC_ASSERT(fn != 0);
#else
  fn = GC_default_on_abort;
#endif
  READER_UNLOCK();
  return fn;
}

#if defined(NEED_SNPRINTF_SLDS) /* && GC_DISABLE_SNPRINTF */
GC_INNER void
GC_snprintf_s_ld_s(char *buf, size_t buf_sz, const char *prefix, long lv,
                   const char *suffix)
{
  size_t len = strlen(prefix);

  GC_ASSERT(buf_sz > 0);
  /* Copy the prefix. */
  if (UNLIKELY(len >= buf_sz))
    len = buf_sz - 1;
  BCOPY(prefix, buf, len);
  buf += len;
  buf_sz -= len;

  /* Handle sign of the number. */
  if (lv >= 0) {
    lv = -lv;
  } else if (LIKELY(buf_sz > 1)) {
    *(buf++) = '-';
    buf_sz--;
  }

  /* Convert the decimal number to string.  (A trivial implementation.) */
  {
    char num_buf[20];
    size_t pos = sizeof(num_buf);

    do {
      long r = lv / 10;

      if (UNLIKELY(0 == pos))
        break; /*< overflow */
      num_buf[--pos] = (char)(r * 10 - lv + '0');
      lv = r;
    } while (lv < 0);
    len = sizeof(num_buf) - pos;
    if (UNLIKELY(len >= buf_sz))
      len = buf_sz - 1;
    BCOPY(&num_buf[pos], buf, len);
  }
  buf += len;
  buf_sz -= len;

  /* Copy the suffix (if any). */
  len = strlen(suffix);
  if (len > 0) {
    if (UNLIKELY(len >= buf_sz))
      len = buf_sz - 1;
    BCOPY(suffix, buf, len);
    buf += len;
  }
  *buf = '\0';
}
#endif /* NEED_SNPRINTF_SLDS */

GC_API void GC_CALL
GC_enable(void)
{
  LOCK();
  /* Ensure no counter underflow. */
  GC_ASSERT(GC_dont_gc != 0);
  GC_dont_gc--;
  if (!GC_dont_gc && GC_heapsize > GC_heapsize_on_gc_disable)
    WARN("Heap grown by %" WARN_PRIuPTR " KiB while GC was disabled\n",
         (GC_heapsize - GC_heapsize_on_gc_disable) >> 10);
  UNLOCK();
}

GC_API void GC_CALL
GC_disable(void)
{
  LOCK();
  if (!GC_dont_gc)
    GC_heapsize_on_gc_disable = GC_heapsize;
  GC_dont_gc++;
  UNLOCK();
}

GC_API int GC_CALL
GC_is_disabled(void)
{
  return GC_dont_gc != 0;
}

/* Helper procedures for new kind creation. */

GC_API void **GC_CALL
GC_new_free_list_inner(void)
{
  void *result;

  GC_ASSERT(I_HOLD_LOCK());
  result = GC_INTERNAL_MALLOC((MAXOBJGRANULES + 1) * sizeof(ptr_t), PTRFREE);
  if (NULL == result)
    ABORT("Failed to allocate free list for new kind");
  BZERO(result, (MAXOBJGRANULES + 1) * sizeof(ptr_t));
  return (void **)result;
}

GC_API void **GC_CALL
GC_new_free_list(void)
{
  void **result;

  LOCK();
  result = GC_new_free_list_inner();
  UNLOCK();
  return result;
}

GC_API unsigned GC_CALL
GC_new_kind_inner(void **fl, GC_word descr, int adjust, int clear)
{
  unsigned result = GC_n_kinds;

  GC_ASSERT(NONNULL_ARG_NOT_NULL(fl));
  GC_ASSERT(!adjust || 1 == adjust);
  /*
   * If an object is not needed to be cleared (when moved to the free list),
   * then its descriptor should be zero to denote a pointer-free object
   * (and, as a consequence, the size of the object should not be added to
   * the descriptor template).
   */
  GC_ASSERT(1 == clear || (0 == descr && !adjust && !clear));
  if (result < MAXOBJKINDS) {
    GC_ASSERT(result > 0);
    GC_n_kinds++;
    GC_obj_kinds[result].ok_freelist = fl;
    GC_obj_kinds[result].ok_reclaim_list = 0;
    GC_obj_kinds[result].ok_descriptor = descr;
    GC_obj_kinds[result].ok_relocate_descr = (GC_bool)adjust;
    GC_obj_kinds[result].ok_init = (GC_bool)clear;
#ifdef ENABLE_DISCLAIM
    GC_obj_kinds[result].ok_mark_unconditionally = FALSE;
    GC_obj_kinds[result].ok_disclaim_proc = 0;
#endif
  } else {
    ABORT("Too many kinds");
  }
  return result;
}

GC_API unsigned GC_CALL
GC_new_kind(void **fl, GC_word descr, int adjust, int clear)
{
  unsigned result;

  LOCK();
  result = GC_new_kind_inner(fl, descr, adjust, clear);
  UNLOCK();
  return result;
}

GC_API unsigned GC_CALL
GC_new_proc_inner(GC_mark_proc proc)
{
  unsigned result = GC_n_mark_procs;

  if (result < GC_MAX_MARK_PROCS) {
    GC_n_mark_procs++;
    GC_mark_procs[result] = proc;
  } else {
    ABORT("Too many mark procedures");
  }
  return result;
}

GC_API unsigned GC_CALL
GC_new_proc(GC_mark_proc proc)
{
  unsigned result;

  LOCK();
  result = GC_new_proc_inner(proc);
  UNLOCK();
  return result;
}

GC_API void *GC_CALL
GC_call_with_alloc_lock(GC_fn_type fn, void *client_data)
{
  void *result;

  LOCK();
  result = fn(client_data);
  UNLOCK();
  return result;
}

#ifdef THREADS
GC_API void GC_CALL
GC_alloc_lock(void)
{
  LOCK();
}

GC_API void GC_CALL
GC_alloc_unlock(void)
{
  UNLOCK();
}

GC_API void *GC_CALL
GC_call_with_reader_lock(GC_fn_type fn, void *client_data, int release)
{
  void *result;

  READER_LOCK();
  result = fn(client_data);
#  ifdef HAS_REAL_READER_LOCK
  if (release) {
    READER_UNLOCK_RELEASE();
#    ifdef LINT2
    GC_noop1((unsigned)release);
#    endif
    return result;
  }
#  else
  UNUSED_ARG(release);
#  endif
  READER_UNLOCK();
  return result;
}
#endif /* THREADS */

GC_ATTR_NOINLINE
GC_API void *GC_CALL
GC_call_with_stack_base(GC_stack_base_func fn, void *arg)
{
  struct GC_stack_base base;
  void *result;

  STORE_APPROX_SP_TO(*(volatile ptr_t *)&base.mem_base);
#ifdef IA64
  base.reg_base = GC_save_regs_in_stack();
  /*
   * TODO: Unnecessarily flushes register stack, but that probably
   * does not hurt.
   */
#elif defined(E2K)
  {
    unsigned long long sz_ull;

    GET_PROCEDURE_STACK_SIZE_INNER(&sz_ull);
    base.reg_base = NUMERIC_TO_VPTR(sz_ull);
  }
#endif
  result = (*(GC_stack_base_func volatile *)&fn)(&base, arg);
  /*
   * Strongly discourage the compiler from treating the above as
   * a tail call.
   */
  GC_noop1(COVERT_DATAFLOW(ADDR(&base)));
  return result;
}

#ifndef THREADS

GC_INNER ptr_t GC_blocked_sp = NULL;

#  ifdef IA64
STATIC ptr_t GC_blocked_register_sp = NULL;
#  endif

GC_INNER struct GC_traced_stack_sect_s *GC_traced_stack_sect = NULL;

/* This is nearly the same as in `pthread_support.c` file. */
GC_ATTR_NOINLINE
GC_API void *GC_CALL
GC_call_with_gc_active(GC_fn_type fn, void *client_data)
{
  struct GC_traced_stack_sect_s stacksect;
  GC_ASSERT(GC_is_initialized);

  /*
   * Adjust our stack bottom pointer (this could happen if
   * `GC_get_main_stack_base()` is unimplemented or broken for
   * the platform).  Note: `stacksect` variable is reused here.
   */
  STORE_APPROX_SP_TO(*(volatile ptr_t *)&stacksect.saved_stack_ptr);
  if (HOTTER_THAN(GC_stackbottom, stacksect.saved_stack_ptr))
    GC_stackbottom = stacksect.saved_stack_ptr;

  if (GC_blocked_sp == NULL) {
    /* We are not inside `GC_do_blocking()` - do nothing more. */
    client_data = (*(GC_fn_type volatile *)&fn)(client_data);
    /* Prevent treating the above as a tail call. */
    GC_noop1(COVERT_DATAFLOW(ADDR(&stacksect)));
    return client_data; /*< result */
  }

  /* Setup new "stack section". */
  stacksect.saved_stack_ptr = GC_blocked_sp;
#  ifdef IA64
  /* This is the same as in `GC_call_with_stack_base()`. */
  stacksect.backing_store_end = GC_save_regs_in_stack();
  /* Unnecessarily flushes register stack, but that probably does not hurt. */
  stacksect.saved_backing_store_ptr = GC_blocked_register_sp;
#  endif
  stacksect.prev = GC_traced_stack_sect;
  GC_blocked_sp = NULL;
  GC_traced_stack_sect = &stacksect;

  client_data = (*(GC_fn_type volatile *)&fn)(client_data);
  GC_ASSERT(GC_blocked_sp == NULL);
  GC_ASSERT(GC_traced_stack_sect == &stacksect);

#  if defined(CPPCHECK)
  GC_noop1_ptr(GC_traced_stack_sect);
  GC_noop1_ptr(GC_blocked_sp);
#  endif
  /* Restore original "stack section". */
  GC_traced_stack_sect = stacksect.prev;
#  ifdef IA64
  GC_blocked_register_sp = stacksect.saved_backing_store_ptr;
#  endif
  GC_blocked_sp = stacksect.saved_stack_ptr;

  return client_data; /*< result */
}

/* This is nearly the same as in `pthread_support.c` file. */
STATIC void
GC_do_blocking_inner(ptr_t data, void *context)
{
  UNUSED_ARG(context);
  GC_ASSERT(GC_is_initialized);
  GC_ASSERT(GC_blocked_sp == NULL);
#  ifdef SPARC
  GC_blocked_sp = GC_save_regs_in_stack();
#  else
  GC_blocked_sp = GC_approx_sp();
#    ifdef IA64
  GC_blocked_register_sp = GC_save_regs_in_stack();
#    endif
#  endif

  ((struct blocking_data *)data)->client_data /*< result */
      = ((struct blocking_data *)data)
            ->fn(((struct blocking_data *)data)->client_data);

  GC_ASSERT(GC_blocked_sp != NULL);
#  if defined(CPPCHECK)
  GC_noop1_ptr(GC_blocked_sp);
#  endif
  GC_blocked_sp = NULL;
}

GC_API void GC_CALL
GC_set_stackbottom(void *gc_thread_handle, const struct GC_stack_base *sb)
{
  GC_ASSERT(sb->mem_base != NULL);
  GC_ASSERT(NULL == gc_thread_handle || &GC_stackbottom == gc_thread_handle);
  GC_ASSERT(NULL == GC_blocked_sp
            && NULL == GC_traced_stack_sect); /*< for now */
  UNUSED_ARG(gc_thread_handle);

  GC_stackbottom = (char *)sb->mem_base;
#  ifdef IA64
  GC_register_stackbottom = (ptr_t)sb->reg_base;
#  endif
}

GC_API void *GC_CALL
GC_get_my_stackbottom(struct GC_stack_base *sb)
{
  GC_ASSERT(GC_is_initialized);
  sb->mem_base = GC_stackbottom;
#  ifdef IA64
  sb->reg_base = GC_register_stackbottom;
#  elif defined(E2K)
  sb->reg_base = NULL;
#  endif
  return &GC_stackbottom; /*< `gc_thread_handle` */
}

#endif /* !THREADS */

GC_API void *GC_CALL
GC_do_blocking(GC_fn_type fn, void *client_data)
{
  struct blocking_data my_data;

  my_data.fn = fn;
  my_data.client_data = client_data;
  GC_with_callee_saves_pushed(GC_do_blocking_inner, (ptr_t)(&my_data));
  return my_data.client_data; /*< result */
}

#if !defined(NO_DEBUGGING)
GC_API void GC_CALL
GC_dump(void)
{
  READER_LOCK();
  GC_dump_named(NULL);
  READER_UNLOCK();
}

GC_API void GC_CALL
GC_dump_named(const char *name)
{
#  ifndef NO_CLOCK
  CLOCK_TYPE current_time;

  GET_TIME(current_time);
#  endif
  if (name != NULL) {
    GC_printf("\n***GC Dump %s\n", name);
  } else {
    GC_printf("\n***GC Dump collection #%lu\n", (unsigned long)GC_gc_no);
  }
#  ifndef NO_CLOCK
  /* Note that the time is wrapped in ~49 days if `sizeof(long) == 4`. */
  GC_printf("Time since GC init: %lu ms\n",
            MS_TIME_DIFF(current_time, GC_init_time));
#  endif

  GC_printf("\n***Static roots:\n");
  GC_print_static_roots();
  GC_printf("\n***Heap sections:\n");
  GC_print_heap_sects();
  GC_printf("\n***Free blocks:\n");
  GC_print_hblkfreelist();
  GC_printf("\n***Blocks in use:\n");
  GC_print_block_list();
#  ifndef GC_NO_FINALIZATION
  GC_dump_finalization();
#  endif
}
#endif /* !NO_DEBUGGING */

GC_API GC_word GC_CALL
GC_get_memory_use(void)
{
  word bytes;

  READER_LOCK();
  GC_ASSERT(GC_heapsize >= GC_large_free_bytes);
  bytes = GC_heapsize - GC_large_free_bytes;
  READER_UNLOCK();
  return bytes;
}

/* Getter functions for the public read-only variables. */

GC_API GC_word GC_CALL
GC_get_gc_no(void)
{
  return GC_gc_no;
}

#ifndef PARALLEL_MARK
GC_API void GC_CALL
GC_set_markers_count(unsigned markers)
{
  UNUSED_ARG(markers);
}
#endif

GC_API int GC_CALL
GC_get_parallel(void)
{
#ifdef THREADS
  return GC_parallel;
#else
  return 0;
#endif
}

/*
 * Setter and getter functions for the public R/W function variables.
 * These functions are synchronized (like `GC_set_warn_proc()` and
 * `GC_get_warn_proc()`).
 */

GC_API void GC_CALL
GC_set_oom_fn(GC_oom_func fn)
{
  GC_ASSERT(NONNULL_ARG_NOT_NULL(fn));
  LOCK();
  GC_oom_fn = fn;
  UNLOCK();
}

GC_API GC_oom_func GC_CALL
GC_get_oom_fn(void)
{
  GC_oom_func fn;

  READER_LOCK();
  fn = GC_oom_fn;
  READER_UNLOCK();
  return fn;
}

GC_API void GC_CALL
GC_set_on_heap_resize(GC_on_heap_resize_proc fn)
{
  /* `fn` may be 0 (means no event notifier). */
  LOCK();
  GC_on_heap_resize = fn;
  UNLOCK();
}

GC_API GC_on_heap_resize_proc GC_CALL
GC_get_on_heap_resize(void)
{
  GC_on_heap_resize_proc fn;

  READER_LOCK();
  fn = GC_on_heap_resize;
  READER_UNLOCK();
  return fn;
}

GC_API void GC_CALL
GC_set_finalizer_notifier(GC_finalizer_notifier_proc fn)
{
  /* `fn` may be 0 (means no finalizer notifier). */
  LOCK();
  GC_finalizer_notifier = fn;
  UNLOCK();
}

GC_API GC_finalizer_notifier_proc GC_CALL
GC_get_finalizer_notifier(void)
{
  GC_finalizer_notifier_proc fn;

  READER_LOCK();
  fn = GC_finalizer_notifier;
  READER_UNLOCK();
  return fn;
}

/*
 * Setter and getter functions for the public numeric R/W variables.
 * It is safe to call these functions even before `GC_INIT()`.
 * These functions are unsynchronized and, if called after `GC_INIT()`,
 * should be typically invoked inside the context of
 * `GC_call_with_alloc_lock()` (or `GC_call_with_reader_lock()` in case
 * of the getters) to prevent data race (unless it is guaranteed the
 * collector is not multi-threaded at that execution point).
 */

GC_API void GC_CALL
GC_set_find_leak(int value)
{
  /* `value` is of boolean type. */
#ifdef NO_FIND_LEAK
  if (value)
    ABORT("Find-leak mode is unsupported");
#else
  GC_find_leak = value;
#endif
}

GC_API int GC_CALL
GC_get_find_leak(void)
{
  return GC_find_leak_inner;
}

GC_API void GC_CALL
GC_set_all_interior_pointers(int value)
{
  GC_all_interior_pointers = value ? 1 : 0;
  if (GC_is_initialized) {
    /*
     * It is not recommended to change `GC_all_interior_pointers` value
     * after the collector is initialized but it seems it could work
     * correctly even after switching the mode.
     */
    LOCK();
    /* Note: this resets manual offsets as well. */
    GC_initialize_offsets();
#ifndef NO_BLACK_LISTING
    if (!GC_all_interior_pointers)
      GC_bl_init_no_interiors();
#endif
    UNLOCK();
  }
}

GC_API int GC_CALL
GC_get_all_interior_pointers(void)
{
  return GC_all_interior_pointers;
}

GC_API void GC_CALL
GC_set_finalize_on_demand(int value)
{
  /* Note: -1 was used to retrieve old value in gc-7.2. */
  GC_ASSERT(value != -1);
  /* `value` is of boolean type. */
  GC_finalize_on_demand = value;
}

GC_API int GC_CALL
GC_get_finalize_on_demand(void)
{
  return GC_finalize_on_demand;
}

GC_API void GC_CALL
GC_set_java_finalization(int value)
{
  /* Note: -1 was used to retrieve old value in gc-7.2. */
  GC_ASSERT(value != -1);
  /* `value` is of boolean type. */
  GC_java_finalization = value;
}

GC_API int GC_CALL
GC_get_java_finalization(void)
{
  return GC_java_finalization;
}

GC_API void GC_CALL
GC_set_dont_expand(int value)
{
  /* Note: -1 was used to retrieve old value in gc-7.2. */
  GC_ASSERT(value != -1);
  /* `value` is of boolean type. */
  GC_dont_expand = value;
}

GC_API int GC_CALL
GC_get_dont_expand(void)
{
  return GC_dont_expand;
}

GC_API void GC_CALL
GC_set_no_dls(int value)
{
  /* Note: -1 was used to retrieve old value in gc-7.2. */
  GC_ASSERT(value != -1);
  /* `value` is of boolean type. */
  GC_no_dls = value;
}

GC_API int GC_CALL
GC_get_no_dls(void)
{
  return GC_no_dls;
}

GC_API void GC_CALL
GC_set_non_gc_bytes(GC_word value)
{
  GC_non_gc_bytes = value;
}

GC_API GC_word GC_CALL
GC_get_non_gc_bytes(void)
{
  return GC_non_gc_bytes;
}

GC_API void GC_CALL
GC_set_free_space_divisor(GC_word value)
{
  GC_ASSERT(value > 0);
  GC_free_space_divisor = value;
}

GC_API GC_word GC_CALL
GC_get_free_space_divisor(void)
{
  return GC_free_space_divisor;
}

GC_API void GC_CALL
GC_set_max_retries(GC_word value)
{
  /* Note: -1 was used to retrieve old value in gc-7.2. */
  GC_ASSERT((GC_signed_word)value != -1);
  GC_max_retries = value;
}

GC_API GC_word GC_CALL
GC_get_max_retries(void)
{
  return GC_max_retries;
}

GC_API void GC_CALL
GC_set_dont_precollect(int value)
{
  /* Note: -1 was used to retrieve old value in gc-7.2. */
  GC_ASSERT(value != -1);
  /* `value` is of boolean type. */
  GC_dont_precollect = value;
}

GC_API int GC_CALL
GC_get_dont_precollect(void)
{
  return GC_dont_precollect;
}

GC_API void GC_CALL
GC_set_full_freq(int value)
{
  GC_ASSERT(value >= 0);
  GC_full_freq = value;
}

GC_API int GC_CALL
GC_get_full_freq(void)
{
  return GC_full_freq;
}

GC_API void GC_CALL
GC_set_time_limit(unsigned long value)
{
  /* Note: -1 was used to retrieve old value in gc-7.2. */
  GC_ASSERT((long)value != -1L);
  GC_time_limit = value;
}

GC_API unsigned long GC_CALL
GC_get_time_limit(void)
{
  return GC_time_limit;
}

GC_API void GC_CALL
GC_set_force_unmap_on_gcollect(int value)
{
  GC_force_unmap_on_gcollect = (GC_bool)value;
}

GC_API int GC_CALL
GC_get_force_unmap_on_gcollect(void)
{
  return (int)GC_force_unmap_on_gcollect;
}

GC_API GC_OOM_ABORT_THROW_ATTRIBUTE void GC_CALL
GC_abort_on_oom(void)
{
  GC_err_printf("Insufficient memory for the allocation\n");
  EXIT();
}

GC_API size_t GC_CALL
GC_get_hblk_size(void)
{
  return (size_t)HBLKSIZE;
}

/*
 * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
 * Copyright (c) 1991-1995 by Xerox Corporation.  All rights reserved.
 * Copyright (c) 1996-1999 by Silicon Graphics.  All rights reserved.
 * Copyright (c) 1999 by Hewlett-Packard Company.  All rights reserved.
 * Copyright (c) 2008-2025 Ivan Maidanski
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */


#if (defined(MPROTECT_VDB) && !defined(MSWIN32) && !defined(MSWINCE)) \
    || (defined(SOLARIS) && defined(THREADS)) || defined(OPENBSD)
#  include <signal.h>
#endif

#if defined(UNIX_LIKE) || defined(CYGWIN32) || defined(NACL) \
    || defined(SYMBIAN)
#  include <fcntl.h>
#endif

#ifdef LINUX
#  include <ctype.h>
#endif

/*
 * Blatantly OS-dependent routines, except for those that are related
 * to dynamic loading.
 */

#ifdef IRIX5
#  include <malloc.h> /*< for locking */
#  include <sys/uio.h>
#endif

#if defined(MMAP_SUPPORTED) || defined(ADD_HEAP_GUARD_PAGES)
#  if defined(USE_MUNMAP) && !defined(USE_MMAP) && !defined(CPPCHECK)
#    error Invalid config: USE_MUNMAP requires USE_MMAP
#  endif
#  include <sys/mman.h>
#  include <sys/stat.h>
#endif

#if defined(LINUX) && defined(SPECIFIC_MAIN_STACKBOTTOM)        \
    || defined(ADD_HEAP_GUARD_PAGES) || defined(MMAP_SUPPORTED) \
    || defined(NEED_PROC_MAPS)
#  include <errno.h>
#endif

#if defined(DARWIN) && !defined(DYNAMIC_LOADING) \
    && !defined(GC_DONT_REGISTER_MAIN_STATIC_DATA)
#  include <mach-o/getsect.h> /*< for `get_etext` and friends */
#endif

#ifdef DJGPP
/*
 * Apparently necessary for djgpp 2.01.  May cause problems with
 * other versions.
 */
typedef long unsigned int caddr_t;
#endif

#ifdef NO_EXECUTE_PERMISSION
STATIC GC_bool GC_pages_executable = FALSE;
#else
STATIC GC_bool GC_pages_executable = TRUE;
#endif

/* Note: it is undefined later on `GC_pages_executable` real use. */
#define IGNORE_PAGES_EXECUTABLE 1

#if ((defined(LINUX) && defined(SPECIFIC_MAIN_STACKBOTTOM)                  \
      || defined(NEED_PROC_MAPS) || defined(PROC_VDB) || defined(SOFT_VDB)) \
     && !defined(PROC_READ))                                                \
    || defined(CPPCHECK)
/* Note: should probably call the real `read()`, if later is wrapped. */
#  define PROC_READ read
#endif

#if defined(LINUX) && defined(SPECIFIC_MAIN_STACKBOTTOM) \
    || defined(NEED_PROC_MAPS)
/*
 * Repeatedly perform a `read()` call until the buffer is filled up,
 * or we encounter EOF (end of file) or an error.
 */
STATIC ssize_t
GC_repeat_read(int f, char *buf, size_t count)
{
  size_t num_read = 0;

  ASSERT_CANCEL_DISABLED();
  while (num_read < count) {
    ssize_t result = PROC_READ(f, buf + num_read, count - num_read);

    if (result < 0)
      return result;
    if (0 == result)
      break;
#  ifdef LINT2
    if ((size_t)result > count - num_read)
      ABORT("read() result cannot be bigger than requested length");
#  endif
    num_read += (size_t)result;
  }
  return num_read;
}
#endif /* LINUX && SPECIFIC_MAIN_STACKBOTTOM || NEED_PROC_MAPS */

#ifdef NEED_PROC_MAPS
/*
 * We need to parse `/proc/self/maps` pseudo-file, either to find
 * dynamic libraries, and/or to find the register backing store
 * base (the IA-64 case).  Do it once here.
 */

#  ifndef SINGLE_THREADED_PROCESS
/*
 * Determine the length of a file by incrementally reading it into a buffer.
 * This would be silly to use it on a file supporting `lseek`, but Linux
 * `/proc` files usually do not.  As of Linux 4.15.0, `lseek(SEEK_END)` fails
 * for `/proc/self/maps` file.
 */
STATIC size_t
GC_get_file_len(int f)
{
  size_t total = 0;
#    define GET_FILE_LEN_BUF_SZ 500
  char buf[GET_FILE_LEN_BUF_SZ];

  ASSERT_CANCEL_DISABLED();
  for (;;) {
    ssize_t result = PROC_READ(f, buf, sizeof(buf));

    if (result < 0) {
      /* An error has occurred. */
      return 0;
    }
    if (0 == result)
      break;
#    ifdef LINT2
    if ((size_t)result >= GC_SIZE_MAX - total)
      ABORT("Too big file is passed to GC_get_file_len");
#    endif
    total += (size_t)result;
  }
  return total;
}

STATIC size_t
GC_get_maps_len(void)
{
  int f = open("/proc/self/maps", O_RDONLY);
  size_t result;

  if (f < 0) {
    /* Treat missing file as empty. */
    return 0;
  }
  result = GC_get_file_len(f);
  close(f);
  return result;
}
#  endif /* !SINGLE_THREADED_PROCESS */

GC_INNER const char *
GC_get_maps(void)
{
  ssize_t result;
  static char *maps_buf = NULL;
  static size_t maps_buf_sz = 1;
  size_t maps_size;
#  ifndef SINGLE_THREADED_PROCESS
  size_t old_maps_size = 0;
#  endif

  /* The buffer is essentially `static`, so there must be a single client. */
  GC_ASSERT(I_HOLD_LOCK());

  /*
   * Note that in the presence of threads in the process (even if the
   * collector itself is built single-threaded), the `maps` file can
   * essentially shrink asynchronously and unexpectedly as threads
   * that we already think of as dead release their stacks.
   * And there is no easy way to read the entire file atomically.
   * This is arguably a misfeature of the `/proc/self/maps` interface.
   * Since we expect the file can grow asynchronously in rare cases,
   * it should suffice to first determine the size (using `read()`),
   * and then to reread the file.  If the size is inconsistent, then
   * we have to retry.  This only matters with threads enabled, and
   * if we use this to locate the data roots (not the default).
   */

#  ifndef SINGLE_THREADED_PROCESS
  /* Determine the initial size of `/proc/self/maps` file. */
  maps_size = GC_get_maps_len();
  if (0 == maps_size)
    ABORT("Cannot determine length of /proc/self/maps");
#  else
  maps_size = 4000; /*< guess */
#  endif

  /*
   * Read `/proc/self/maps` file, growing `maps_buf` as necessary.
   * Note that we may not allocate conventionally, and thus cannot
   * use `stdio` functionality.
   */
  do {
    int f;

    while (maps_size >= maps_buf_sz) {
#  ifdef LINT2
      /* Workaround passing tainted `maps_buf` to a tainted sink. */
      GC_noop1_ptr(maps_buf);
#  else
      GC_scratch_recycle_no_gww(maps_buf, maps_buf_sz);
#  endif
      /* Grow only by powers of 2, since we leak "too small" buffers. */
      while (maps_size >= maps_buf_sz)
        maps_buf_sz *= 2;
      maps_buf = GC_scratch_alloc(maps_buf_sz);
      if (NULL == maps_buf)
        ABORT_ARG1("Insufficient space for /proc/self/maps buffer",
                   ", %lu bytes requested", (unsigned long)maps_buf_sz);
#  ifndef SINGLE_THREADED_PROCESS
      /*
       * Recompute initial length, since we allocated.
       * This can only happen a few times per program execution.
       */
      maps_size = GC_get_maps_len();
      if (0 == maps_size)
        ABORT("Cannot determine length of /proc/self/maps");
#  endif
    }
    GC_ASSERT(maps_buf_sz >= maps_size + 1);
    f = open("/proc/self/maps", O_RDONLY);
    if (-1 == f)
      ABORT_ARG1("Cannot open /proc/self/maps", ": errno= %d", errno);
#  ifndef SINGLE_THREADED_PROCESS
    old_maps_size = maps_size;
#  endif
    maps_size = 0;
    do {
      result = GC_repeat_read(f, maps_buf, maps_buf_sz - 1);
      if (result < 0) {
        ABORT_ARG1("Failed to read /proc/self/maps", ": errno= %d", errno);
      }
      maps_size += (size_t)result;
    } while ((size_t)result == maps_buf_sz - 1);
    close(f);
    if (0 == maps_size)
      ABORT("Empty /proc/self/maps");
#  ifndef SINGLE_THREADED_PROCESS
    if (maps_size > old_maps_size) {
      /* This might be caused by e.g. thread creation. */
      WARN("Unexpected asynchronous /proc/self/maps growth"
           " (to %" WARN_PRIuPTR " bytes)\n",
           maps_size);
    }
#  endif
  } while (maps_size >= maps_buf_sz
#  ifndef SINGLE_THREADED_PROCESS
           || maps_size < old_maps_size
#  endif
  );
  maps_buf[maps_size] = '\0';
  return maps_buf;
}

/*
 * `GC_parse_map_entry` parses an entry from `/proc/self/maps` file so we
 * can locate all writable data segments that belong to shared libraries.
 * The format of one of these entries and the fields we care about
 * is as follows:
 * ```
 * XXXXXXXX-XXXXXXXX r-xp 00000000 30:05 260537     name-of-mapping...\n
 * ^^^^^^^^ ^^^^^^^^ ^^^^          ^^
 * *p_start *p_end   *p_prot       *p_maj_dev
 * ```
 *
 * Note that since about August 2003 kernels, the columns no longer have
 * fixed offsets on 64-bit kernels.  Hence we no longer rely on fixed
 * offsets anywhere, which is safer anyway.
 */

#  if defined(DYNAMIC_LOADING) && defined(USE_PROC_FOR_LIBRARIES) \
      || defined(IA64) || defined(INCLUDE_LINUX_THREAD_DESCR)     \
      || (defined(CHECK_SOFT_VDB) && defined(MPROTECT_VDB))       \
      || defined(REDIR_MALLOC_AND_LINUXTHREADS)
GC_INNER const char *
GC_parse_map_entry(const char *maps_ptr, ptr_t *p_start, ptr_t *p_end,
                   const char **p_prot, unsigned *p_maj_dev,
                   const char **p_mapping_name)
{
  const unsigned char *start_start, *end_start, *maj_dev_start;
  const unsigned char *p; /*< unsigned for `isspace`, `isxdigit` */

  if (maps_ptr == NULL || *maps_ptr == '\0') {
    return NULL;
  }

  p = (const unsigned char *)maps_ptr;
  while (isspace(*p))
    ++p;
  start_start = p;
  GC_ASSERT(isxdigit(*start_start));
  *p_start = (ptr_t)strtoul((const char *)start_start, (char **)&p, 16);
  GC_ASSERT(*p == '-');

  ++p;
  end_start = p;
  GC_ASSERT(isxdigit(*end_start));
  *p_end = (ptr_t)strtoul((const char *)end_start, (char **)&p, 16);
  GC_ASSERT(isspace(*p));

  while (isspace(*p))
    ++p;
  GC_ASSERT(*p == 'r' || *p == '-');
  *p_prot = (const char *)p;
  /* Skip past protection field to offset field. */
  while (!isspace(*p))
    ++p;
  while (isspace(*p))
    p++;
  GC_ASSERT(isxdigit(*p));
  /* Skip past offset field, which we ignore. */
  while (!isspace(*p))
    ++p;
  while (isspace(*p))
    p++;
  maj_dev_start = p;
  GC_ASSERT(isxdigit(*maj_dev_start));
  *p_maj_dev = strtoul((const char *)maj_dev_start, NULL, 16);

  if (p_mapping_name != NULL) {
    while (*p && *p != '\n' && *p != '/' && *p != '[')
      p++;
    *p_mapping_name = (const char *)p;
  }
  while (*p && *p++ != '\n') {
    /* Empty. */
  }
  return (const char *)p;
}
#  endif /* REDIRECT_MALLOC || DYNAMIC_LOADING || IA64 || ... */

#  if defined(IA64) || defined(INCLUDE_LINUX_THREAD_DESCR) \
      || (defined(CHECK_SOFT_VDB) && defined(MPROTECT_VDB))
GC_INNER GC_bool
GC_enclosing_writable_mapping(ptr_t addr, ptr_t *startp, ptr_t *endp)
{
  const char *prot;
  ptr_t my_start, my_end;
  const char *maps_ptr;
  unsigned maj_dev;

  GC_ASSERT(I_HOLD_LOCK());
  maps_ptr = GC_get_maps();
  for (;;) {
    maps_ptr = GC_parse_map_entry(maps_ptr, &my_start, &my_end, &prot,
                                  &maj_dev, NULL);
    if (NULL == maps_ptr)
      break;

    if (ADDR_INSIDE(addr, my_start, my_end)) {
      if (prot[1] != 'w' || maj_dev != 0)
        break;
      *startp = my_start;
      *endp = my_end;
      return TRUE;
    }
  }
  return FALSE;
}
#  endif /* IA64 || INCLUDE_LINUX_THREAD_DESCR */

#  ifdef REDIR_MALLOC_AND_LINUXTHREADS
GC_INNER GC_bool
GC_text_mapping(const char *nm, ptr_t *startp, ptr_t *endp)
{
  size_t nm_len;
  const char *prot, *map_path;
  ptr_t my_start, my_end;
  unsigned int maj_dev;
  const char *maps_ptr;

  GC_ASSERT(I_HOLD_LOCK());
  maps_ptr = GC_get_maps();
  nm_len = strlen(nm);
  for (;;) {
    maps_ptr = GC_parse_map_entry(maps_ptr, &my_start, &my_end, &prot,
                                  &maj_dev, &map_path);
    if (NULL == maps_ptr)
      break;

    if (prot[0] == 'r' && prot[1] == '-' && prot[2] == 'x') {
      const char *p = map_path;

      /* Set `p` to point just past last slash, if any. */
      while (*p != '\0' && *p != '\n' && *p != ' ' && *p != '\t') {
        ++p;
      }
      while (ADDR_GE((ptr_t)p, (ptr_t)map_path) && *p != '/') {
        --p;
      }
      ++p;

      if (strncmp(nm, p, nm_len) == 0) {
        *startp = my_start;
        *endp = my_end;
        return TRUE;
      }
    }
  }
  return FALSE;
}
#  endif /* REDIR_MALLOC_AND_LINUXTHREADS */

#  ifdef IA64
static ptr_t
backing_store_base_from_proc(void)
{
  ptr_t my_start, my_end;

  GC_ASSERT(I_HOLD_LOCK());
  if (!GC_enclosing_writable_mapping(GC_save_regs_in_stack(), &my_start,
                                     &my_end)) {
    GC_COND_LOG_PRINTF("Failed to find backing store base from /proc\n");
    return 0;
  }
  return my_start;
}
#  endif

#endif /* NEED_PROC_MAPS */

#if defined(SEARCH_FOR_DATA_START)
/*
 * The i686 case can be handled without a search.  The Alpha case used to
 * be handled differently as well, but the rules changed for recent Linux
 * versions.  This seems to be the easiest way to cover all versions.
 */

#  if defined(LINUX) || defined(HURD)
/*
 * Some Linux distributions arrange to define `__data_start`.
 * Some define `data_start` as a weak symbol.  The latter is technically
 * broken, since the user program may define `data_start`, in which
 * case we lose.  Nonetheless, we try both, preferring `__data_start`.
 * We assume gcc-compatible pragmas.
 */
EXTERN_C_BEGIN
#    pragma weak __data_start
#    pragma weak data_start
extern int __data_start[], data_start[];
EXTERN_C_END
#  elif defined(NETBSD)
EXTERN_C_BEGIN
extern char **environ;
EXTERN_C_END
#  endif

ptr_t GC_data_start = NULL;

GC_INNER void
GC_init_linux_data_start(void)
{
  ptr_t data_end = DATAEND;

#  if (defined(LINUX) || defined(HURD)) && defined(USE_PROG_DATA_START)
  /*
   * Try the easy approaches first.  However, this may lead to wrong
   * data start value if the collector code is put into a shared library
   * (directly or indirectly) which is linked with `-Bsymbolic-functions`
   * option.  Thus, the following is not used by default.
   */
  if (COVERT_DATAFLOW(ADDR(__data_start)) != 0) {
    GC_data_start = (ptr_t)(__data_start);
  } else {
    GC_data_start = (ptr_t)(data_start);
  }
  if (COVERT_DATAFLOW(ADDR(GC_data_start)) != 0) {
    if (ADDR_LT(data_end, GC_data_start))
      ABORT_ARG2("Wrong __data_start/_end pair", ": %p .. %p",
                 (void *)GC_data_start, (void *)data_end);
    return;
  }
#    ifdef DEBUG_ADD_DEL_ROOTS
  GC_log_printf("__data_start not provided\n");
#    endif
#  endif /* LINUX */

  if (GC_no_dls) {
    /*
     * Not needed, avoids the `SIGSEGV` caused by `GC_find_limit` which
     * complicates debugging.
     */
    GC_data_start = data_end; /*< set data root size to 0 */
    return;
  }

#  ifdef NETBSD
  /*
   * This may need to be `environ`, without the underscore, for
   * some versions.
   */
  GC_data_start = (ptr_t)GC_find_limit(&environ, FALSE);
#  else
  GC_data_start = (ptr_t)GC_find_limit(data_end, FALSE);
#  endif
}
#endif /* SEARCH_FOR_DATA_START */

#ifdef ECOS
static void *
tiny_sbrk(ptrdiff_t increment)
{
  /*
   * TODO: This is a simple way of allocating memory which is compatible with
   * ECOS early releases.  Later releases use a more sophisticated means of
   * allocating memory than this simple static allocator, but this method is
   * at least bound to work.
   */
  void *p = &GC_ecos_memory[GC_ecos_brk_idx];

  GC_ASSERT(GC_ecos_brk_idx <= sizeof(GC_ecos_memory));
  if ((size_t)increment > sizeof(GC_ecos_memory) - GC_ecos_brk_idx)
    return NULL;
  GC_ecos_brk_idx += (size_t)increment;
  return p;
}
#  define sbrk tiny_sbrk
#endif /* ECOS */

#if defined(ADDRESS_SANITIZER)                         \
    && (defined(UNIX_LIKE) || defined(NEED_FIND_LIMIT) \
        || defined(MPROTECT_VDB))                      \
    && !defined(CUSTOM_ASAN_DEF_OPTIONS)
EXTERN_C_BEGIN
GC_API const char *__asan_default_options(void);
EXTERN_C_END

/*
 * To tell ASan to allow the collector to use its own `SIGBUS` and `SIGSEGV`
 * handlers.  The function is exported just to be visible to ASan library.
 */
GC_API const char *
__asan_default_options(void)
{
  return "allow_user_segv_handler=1";
}
#endif

#ifdef OPENBSD
static struct sigaction old_segv_act;
STATIC JMP_BUF GC_jmp_buf_openbsd;

STATIC void
GC_fault_handler_openbsd(int sig)
{
  UNUSED_ARG(sig);
  LONGJMP(GC_jmp_buf_openbsd, 1);
}

static volatile int firstpass;

/*
 * Return first addressable location that is greater than `p` or return
 * `bound`.
 */
STATIC ptr_t
GC_skip_hole_openbsd(ptr_t p, ptr_t bound)
{
  static volatile ptr_t result;
  struct sigaction act;
  size_t pgsz;

  GC_ASSERT(I_HOLD_LOCK());
  pgsz = (size_t)sysconf(_SC_PAGESIZE);
  GC_ASSERT(ADDR(bound) >= (word)pgsz);

  act.sa_handler = GC_fault_handler_openbsd;
  sigemptyset(&act.sa_mask);
  act.sa_flags = SA_NODEFER | SA_RESTART;
  /* `act.sa_restorer` is deprecated and should not be initialized. */
  sigaction(SIGSEGV, &act, &old_segv_act);

  firstpass = 1;
  result = PTR_ALIGN_DOWN(p, pgsz);
  if (SETJMP(GC_jmp_buf_openbsd) != 0 || firstpass) {
    firstpass = 0;
    if (ADDR_GE(result, bound - pgsz)) {
      result = bound;
    } else {
      /*
       * Notes: no overflow is expected; do not use compound assignment
       * with `volatile`-qualified left operand.
       */
      result = result + pgsz;
      GC_noop1((word)(unsigned char)(*result));
    }
  }

  sigaction(SIGSEGV, &old_segv_act, 0);
  return result;
}
#endif /* OPENBSD */

#ifdef OS2

#  include <stddef.h>

#  if !defined(__IBMC__) && !defined(__WATCOMC__) /*< e.g. EMX */

struct exe_hdr {
  unsigned short magic_number;
  unsigned short padding[29];
  long new_exe_offset;
};

#    define E_MAGIC(x) (x).magic_number
#    define EMAGIC 0x5A4D
#    define E_LFANEW(x) (x).new_exe_offset

struct e32_exe {
  unsigned char magic_number[2];
  unsigned char byte_order;
  unsigned char word_order;
  unsigned long exe_format_level;
  unsigned short cpu;
  unsigned short os;
  unsigned long padding1[13];
  unsigned long object_table_offset;
  unsigned long object_count;
  unsigned long padding2[31];
};

#    define E32_MAGIC1(x) (x).magic_number[0]
#    define E32MAGIC1 'L'
#    define E32_MAGIC2(x) (x).magic_number[1]
#    define E32MAGIC2 'X'
#    define E32_BORDER(x) (x).byte_order
#    define E32LEBO 0
#    define E32_WORDER(x) (x).word_order
#    define E32LEWO 0
#    define E32_CPU(x) (x).cpu
#    define E32CPU286 1
#    define E32_OBJTAB(x) (x).object_table_offset
#    define E32_OBJCNT(x) (x).object_count

struct o32_obj {
  unsigned long size;
  unsigned long base;
  unsigned long flags;
  unsigned long pagemap;
  unsigned long mapsize;
  unsigned long reserved;
};

#    define O32_FLAGS(x) (x).flags
#    define OBJREAD 0x0001L
#    define OBJWRITE 0x0002L
#    define OBJINVALID 0x0080L
#    define O32_SIZE(x) (x).size
#    define O32_BASE(x) (x).base

#  else /* IBM's compiler */

/* A kludge to get around what appears to be a header file bug. */
#    ifndef WORD
#      define WORD unsigned short
#    endif
#    ifndef DWORD
#      define DWORD unsigned long
#    endif

#    define EXE386 1
#    include <exe386.h>
#    include <newexe.h>

#  endif /* __IBMC__ */

#  define INCL_DOSERRORS
#  define INCL_DOSEXCEPTIONS
#  define INCL_DOSFILEMGR
#  define INCL_DOSMEMMGR
#  define INCL_DOSMISC
#  define INCL_DOSMODULEMGR
#  define INCL_DOSPROCESS
#  include <os2.h>

#endif /* OS2 */

GC_INNER size_t GC_page_size = 0;
#ifdef REAL_PAGESIZE_NEEDED
GC_INNER size_t GC_real_page_size = 0;
#endif

#ifdef SOFT_VDB
STATIC unsigned GC_log_pagesize = 0;
#endif

#ifdef ANY_MSWIN

#  ifndef VER_PLATFORM_WIN32_CE
#    define VER_PLATFORM_WIN32_CE 3
#  endif

#  if defined(MSWINCE) && defined(THREADS)
GC_INNER GC_bool GC_dont_query_stack_min = FALSE;
#  endif

GC_INNER SYSTEM_INFO GC_sysinfo;

#  ifndef CYGWIN32
#    define is_writable(prot)                               \
      ((prot) == PAGE_READWRITE || (prot) == PAGE_WRITECOPY \
       || (prot) == PAGE_EXECUTE_READWRITE                  \
       || (prot) == PAGE_EXECUTE_WRITECOPY)
/*
 * Return the number of bytes that are writable starting at `p`.
 * The pointer `p` is assumed to be page-aligned.  If `base` is not `NULL`,
 * then `*base` becomes the beginning of the allocation region containing `p`.
 */
STATIC word
GC_get_writable_length(ptr_t p, ptr_t *base)
{
  MEMORY_BASIC_INFORMATION buf;
  word result;
  word protect;

  result = VirtualQuery(p, &buf, sizeof(buf));
  if (result != sizeof(buf))
    ABORT("Weird VirtualQuery result");
  if (base != 0)
    *base = (ptr_t)(buf.AllocationBase);
  protect = buf.Protect & ~(word)(PAGE_GUARD | PAGE_NOCACHE);
  if (!is_writable(protect) || buf.State != MEM_COMMIT)
    return 0;
  return buf.RegionSize;
}

GC_API int GC_CALL
GC_get_stack_base(struct GC_stack_base *sb)
{
  /*
   * Note: this function should not acquire the allocator lock as it is
   * used by `GC_DllMain`.
   */
  ptr_t trunc_sp;
  word size;

  /*
   * Set page size if it is not ready (so client can use this function even
   * before the collector is initialized).
   */
  if (!GC_page_size)
    GC_setpagesize();

  trunc_sp = PTR_ALIGN_DOWN(GC_approx_sp(), GC_page_size);
  /*
   * FIXME: This will not work if called from a deeply recursive
   * client code (and the committed stack space has grown).
   */
  size = GC_get_writable_length(trunc_sp, 0);
  GC_ASSERT(size != 0);
  sb->mem_base = trunc_sp + size;
  return GC_SUCCESS;
}
#  else /* CYGWIN32 */
GC_API int GC_CALL
GC_get_stack_base(struct GC_stack_base *sb)
{
  /*
   * An alternate variant for Cygwin (adapted from Dave Korn's gcc version
   * of boehm-gc).
   */
#    ifdef X86_64
  sb->mem_base = ((NT_TIB *)NtCurrentTeb())->StackBase;
#    else
  void *_tlsbase;

  __asm__("movl %%fs:4, %0" : "=r"(_tlsbase));
  sb->mem_base = _tlsbase;
#    endif
  return GC_SUCCESS;
}
#  endif /* CYGWIN32 */
#  define HAVE_GET_STACK_BASE

#elif defined(OS2)

static int
os2_getpagesize(void)
{
  ULONG result[1];

  if (DosQuerySysInfo(QSV_PAGE_SIZE, QSV_PAGE_SIZE, (void *)result,
                      sizeof(ULONG))
      != NO_ERROR) {
    WARN("DosQuerySysInfo failed\n", 0);
    result[0] = 4096;
  }
  return (int)result[0];
}

#endif /* !ANY_MSWIN && OS2 */

GC_INNER void
GC_setpagesize(void)
{
#ifdef ANY_MSWIN
  GetSystemInfo(&GC_sysinfo);
#  ifdef ALT_PAGESIZE_USED
  /*
   * Allocations made with `mmap()` are aligned to the allocation
   * granularity, which (at least on Win64) is not the same as the
   * page size.  Probably we could distinguish the allocation
   * granularity from the actual page size, but in practice there
   * is no good reason to make allocations smaller than
   * `dwAllocationGranularity`, so we just use it instead of the
   * actual page size here (as Cygwin itself does in many cases).
   */
  GC_page_size = (size_t)GC_sysinfo.dwAllocationGranularity;
#    ifdef REAL_PAGESIZE_NEEDED
  GC_real_page_size = (size_t)GC_sysinfo.dwPageSize;
  GC_ASSERT(GC_page_size >= GC_real_page_size);
#    endif
#  else
  GC_page_size = (size_t)GC_sysinfo.dwPageSize;
#  endif
#  if defined(MSWINCE) && !defined(_WIN32_WCE_EMULATION)
  {
    OSVERSIONINFO verInfo;

    /* Check the current WinCE version. */
    verInfo.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
    if (!GetVersionEx(&verInfo))
      ABORT("GetVersionEx failed");
    if (verInfo.dwPlatformId == VER_PLATFORM_WIN32_CE
        && verInfo.dwMajorVersion < 6) {
      /*
       * Only the first 32 MB of address space belongs to the
       * current process (unless WinCE 6.0+ or emulation).
       */
      GC_sysinfo.lpMaximumApplicationAddress = (LPVOID)((word)32 << 20);
#    ifdef THREADS
      /*
       * On some old WinCE versions, it is observed that
       * `VirtualQuery()` calls do not work properly when used to
       * get thread current stack committed minimum.
       */
      if (verInfo.dwMajorVersion < 5)
        GC_dont_query_stack_min = TRUE;
#    endif
    }
  }
#  endif
#else
#  ifdef ALT_PAGESIZE_USED
#    ifdef REAL_PAGESIZE_NEEDED
  GC_real_page_size = (size_t)GETPAGESIZE();
#    endif
  /* It is acceptable to fake it. */
  GC_page_size = HBLKSIZE;
#  else
  GC_page_size = (size_t)GETPAGESIZE();
#    if !defined(CPPCHECK)
  if (0 == GC_page_size)
    ABORT("getpagesize failed");
#    endif
#  endif
#endif /* !ANY_MSWIN */
#ifdef SOFT_VDB
  {
    size_t pgsize;
    unsigned log_pgsize = 0;

#  if !defined(CPPCHECK)
    if (((GC_page_size - 1) & GC_page_size) != 0) {
      /* Not a power of two. */
      ABORT("Invalid page size");
    }
#  endif
    for (pgsize = GC_page_size; pgsize > 1; pgsize >>= 1)
      log_pgsize++;
    GC_log_pagesize = log_pgsize;
  }
#endif
}

#ifdef EMBOX
#  include <kernel/thread/thread_stack.h>
#  include <pthread.h>

GC_API int GC_CALL
GC_get_stack_base(struct GC_stack_base *sb)
{
  pthread_t self = pthread_self();
  void *stack_addr = thread_stack_get(self);

  /* TODO: Use `pthread_getattr_np`, `pthread_attr_getstack` alternatively. */
#  ifdef STACK_GROWS_UP
  sb->mem_base = stack_addr;
#  else
  sb->mem_base = (ptr_t)stack_addr + thread_stack_get_size(self);
#  endif
  return GC_SUCCESS;
}
#  define HAVE_GET_STACK_BASE
#endif /* EMBOX */

#ifdef OS2
GC_API int GC_CALL
GC_get_stack_base(struct GC_stack_base *sb)
{
  PTIB ptib; /*< thread information block */
  PPIB ppib;

  if (DosGetInfoBlocks(&ptib, &ppib) != NO_ERROR) {
    WARN("DosGetInfoBlocks failed\n", 0);
    return GC_UNIMPLEMENTED;
  }
  sb->mem_base = ptib->tib_pstacklimit;
  return GC_SUCCESS;
}
#  define HAVE_GET_STACK_BASE
#endif /* OS2 */

#ifdef SERENITY
#  include <serenity.h>

GC_API int GC_CALL
GC_get_stack_base(struct GC_stack_base *sb)
{
  uintptr_t base;
  size_t size;

  if (get_stack_bounds(&base, &size) < 0) {
    WARN("get_stack_bounds failed\n", 0);
    return GC_UNIMPLEMENTED;
  }
  sb->mem_base = base + size;
  return GC_SUCCESS;
}
#  define HAVE_GET_STACK_BASE
#endif /* SERENITY */

#if defined(NEED_FIND_LIMIT)                                 \
    || (defined(UNIX_LIKE) && !defined(NO_DEBUGGING))        \
    || (defined(USE_PROC_FOR_LIBRARIES) && defined(THREADS)) \
    || (defined(WRAP_MARK_SOME) && defined(NO_SEH_AVAILABLE))

#  include <signal.h>

#  ifdef USE_SEGV_SIGACT
#    ifndef OPENBSD
static struct sigaction old_segv_act;
#    endif
#    ifdef USE_BUS_SIGACT
static struct sigaction old_bus_act;
#    endif
#  else
static GC_fault_handler_t old_segv_hand;
#    ifdef HAVE_SIGBUS
static GC_fault_handler_t old_bus_hand;
#    endif
#  endif /* !USE_SEGV_SIGACT */

GC_INNER void
GC_set_and_save_fault_handler(GC_fault_handler_t h)
{
#  ifdef USE_SEGV_SIGACT
  struct sigaction act;

  act.sa_handler = h;
#    ifdef SIGACTION_FLAGS_NODEFER_HACK
  /* Was necessary for Solaris 2.3 and very temporary NetBSD bugs. */
  act.sa_flags = SA_RESTART | SA_NODEFER;
#    else
  act.sa_flags = SA_RESTART;
#    endif

  (void)sigemptyset(&act.sa_mask);
  /* `act.sa_restorer` is deprecated and should not be initialized. */
#    if defined(IRIX5) && defined(THREADS)
  /*
   * Older versions have a bug related to retrieving and setting
   * a handler at the same time.
   */
  (void)sigaction(SIGSEGV, 0, &old_segv_act);
  (void)sigaction(SIGSEGV, &act, 0);
#    else
  (void)sigaction(SIGSEGV, &act, &old_segv_act);
#      ifdef USE_BUS_SIGACT
  /*
   * `pthreads` library does not exist under Irix 5.x, so we do not have
   * to worry of the multi-threaded case.
   */
  (void)sigaction(SIGBUS, &act, &old_bus_act);
#      endif
#    endif /* !IRIX5 || !THREADS */
#  else
  old_segv_hand = signal(SIGSEGV, h);
#    ifdef HAVE_SIGBUS
  old_bus_hand = signal(SIGBUS, h);
#    endif
#  endif /* !USE_SEGV_SIGACT */
#  if defined(CPPCHECK) && defined(ADDRESS_SANITIZER)
  GC_noop1((word)(GC_funcptr_uint)(&__asan_default_options));
#  endif
}
#endif /* NEED_FIND_LIMIT || UNIX_LIKE || WRAP_MARK_SOME */

#if defined(NEED_FIND_LIMIT)                                 \
    || (defined(USE_PROC_FOR_LIBRARIES) && defined(THREADS)) \
    || (defined(WRAP_MARK_SOME) && defined(NO_SEH_AVAILABLE))
GC_INNER JMP_BUF GC_jmp_buf;

STATIC void
GC_fault_handler(int sig)
{
  UNUSED_ARG(sig);
  LONGJMP(GC_jmp_buf, 1);
}

GC_INNER void
GC_setup_temporary_fault_handler(void)
{
  /*
   * Handler is process-wide, so this should only happen in one thread
   * at a time.
   */
  GC_ASSERT(I_HOLD_LOCK());
  GC_set_and_save_fault_handler(GC_fault_handler);
}

GC_INNER void
GC_reset_fault_handler(void)
{
#  ifdef USE_SEGV_SIGACT
  (void)sigaction(SIGSEGV, &old_segv_act, 0);
#    ifdef USE_BUS_SIGACT
  (void)sigaction(SIGBUS, &old_bus_act, 0);
#    endif
#  else
  (void)signal(SIGSEGV, old_segv_hand);
#    ifdef HAVE_SIGBUS
  (void)signal(SIGBUS, old_bus_hand);
#    endif
#  endif
}
#endif /* NEED_FIND_LIMIT || USE_PROC_FOR_LIBRARIES || WRAP_MARK_SOME */

#if defined(NEED_FIND_LIMIT) \
    || (defined(USE_PROC_FOR_LIBRARIES) && defined(THREADS))
#  define MIN_PAGE_SIZE 256 /*< smallest conceivable page size, in bytes */

/*
 * Return the first non-addressable location greater than `p` (if `up`)
 * or the smallest location `q` such that [`q`,`p`) is addressable (if
 * not `up`).  We assume that `p` (if `up`) or `p - 1` (if not `up`) is
 * addressable.
 */
GC_ATTR_NO_SANITIZE_ADDR
STATIC ptr_t
GC_find_limit_with_bound(ptr_t p, GC_bool up, ptr_t bound)
{
  /*
   * This is safer if `static`, since otherwise it may not be preserved
   * across the `longjmp`.  Can safely be `static` since it is only called
   * with the allocator lock held.
   */
  static volatile ptr_t result;

  GC_ASSERT(up ? ADDR(bound) >= MIN_PAGE_SIZE
               : ADDR(bound) <= ~(word)MIN_PAGE_SIZE);
  GC_ASSERT(I_HOLD_LOCK());
  result = PTR_ALIGN_DOWN(p, MIN_PAGE_SIZE);
  GC_setup_temporary_fault_handler();
  if (SETJMP(GC_jmp_buf) == 0) {
    for (;;) {
      if (up) {
        if (ADDR_GE(result, bound - MIN_PAGE_SIZE)) {
          result = bound;
          break;
        }
        /*
         * Notes: no overflow is expected; do not use compound assignment
         * with `volatile`-qualified left operand.
         */
        result = result + MIN_PAGE_SIZE;
      } else {
        if (ADDR_GE(bound + MIN_PAGE_SIZE, result)) {
          /*
           * This is to compensate further result increment (we do not
           * modify `up` variable since it might be clobbered by `setjmp()`
           * otherwise).
           */
          result = bound - MIN_PAGE_SIZE;
          break;
        }
        /* See the notes for the case when `up` is `TRUE`. */
        result = result - MIN_PAGE_SIZE;
      }
      GC_noop1((word)(unsigned char)(*result));
    }
  }
  GC_reset_fault_handler();
  return up ? result : result + MIN_PAGE_SIZE;
}

void *
GC_find_limit(void *p, int up)
{
  ptr_t bound;

#  ifdef CHERI_PURECAP
  bound = (ptr_t)cheri_address_set(p, cheri_base_get(p)
                                          + (up ? cheri_length_get(p) : 0));
#  else
  bound = up ? MAKE_CPTR(GC_WORD_MAX) : NULL;
#  endif
  return GC_find_limit_with_bound((ptr_t)p, (GC_bool)up, bound);
}
#endif /* NEED_FIND_LIMIT || USE_PROC_FOR_LIBRARIES */

#if defined(HPUX) && defined(IA64)
#  include <sys/param.h>
#  include <sys/pstat.h>

GC_INNER ptr_t
GC_get_register_stack_base(void)
{
  struct pst_vm_status vm_status;

  int i = 0;
  while (pstat_getprocvm(&vm_status, sizeof(vm_status), 0, i++) == 1) {
    if (vm_status.pst_type == PS_RSESTACK) {
      return (ptr_t)vm_status.pst_vaddr;
    }
  }

  /* Old way to get the register stack bottom. */
  GC_ASSERT(GC_stackbottom != NULL);
  return PTR_ALIGN_DOWN(GC_stackbottom - BACKING_STORE_DISPLACEMENT - 1,
                        BACKING_STORE_ALIGNMENT);
}
#endif /* HPUX && IA64 */

#if defined(LINUX) && defined(IA64)
#  ifdef USE_LIBC_PRIVATES
EXTERN_C_BEGIN
#    pragma weak __libc_ia64_register_backing_store_base
extern ptr_t __libc_ia64_register_backing_store_base;
EXTERN_C_END
#  endif

GC_INNER ptr_t
GC_get_register_stack_base(void)
{
  ptr_t result;

  GC_ASSERT(I_HOLD_LOCK());
#  ifdef USE_LIBC_PRIVATES
  {
    ptr_t *p_libc_ia64_register_backing_store_base
        = &__libc_ia64_register_backing_store_base;

#    ifdef CPPCHECK
    /*
     * Workaround a warning that the address of the global symbol
     * (which is a weak one) cannot be null.
     */
    GC_noop1_ptr(&p_libc_ia64_register_backing_store_base);
#    endif
    if (p_libc_ia64_register_backing_store_base != NULL
        && __libc_ia64_register_backing_store_base != NULL) {
      /*
       * `glibc` 2.2.4 has a bug such that for dynamically linked
       * executables `__libc_ia64_register_backing_store_base` is
       * defined but uninitialized during constructor calls.
       * Hence we check for both nonzero address and value.
       */
      return __libc_ia64_register_backing_store_base;
    }
  }
#  endif
  result = backing_store_base_from_proc();
  if (0 == result) {
    /* This works better than a constant displacement heuristic. */
    result = (ptr_t)GC_find_limit(GC_save_regs_in_stack(), FALSE);
  }
  return result;
}
#endif /* LINUX && IA64 */

#ifdef SPECIFIC_MAIN_STACKBOTTOM

#  ifdef HPUX
#    include <sys/param.h>
#    include <sys/pstat.h>

static ptr_t
os_main_stackbottom(void)
{
  struct pst_vm_status vm_status;
  int i = 0;

  while (pstat_getprocvm(&vm_status, sizeof(vm_status), 0, i++) == 1) {
    if (vm_status.pst_type == PS_STACK)
      return (ptr_t)vm_status.pst_vaddr;
  }

  /* Old way to get the stack bottom. */
#    ifdef STACK_GROWS_UP
  return (ptr_t)GC_find_limit(GC_approx_sp(), FALSE);
#    else
  return (ptr_t)GC_find_limit(GC_approx_sp(), TRUE /* `up` */);
#    endif
}

#  elif defined(LINUX)
#    include <sys/stat.h>

/* Number of fields preceding `startstack` one in `/proc/self/stat` file. */
#    define STAT_SKIP 27

#    ifdef USE_LIBC_PRIVATES
EXTERN_C_BEGIN
#      pragma weak __libc_stack_end
extern ptr_t __libc_stack_end;
EXTERN_C_END
#    endif

static ptr_t
os_main_stackbottom(void)
{
  /*
   * We read the stack bottom value from `/proc/self/stat` file.
   * We do this using direct I/O system calls in order to avoid
   * calling `malloc` in case `REDIRECT_MALLOC` is defined.
   */
#    define STAT_BUF_SIZE 4096
  unsigned char stat_buf[STAT_BUF_SIZE];
  int f;
  word addr;
  ssize_t i, buf_offset = 0, len;

  /*
   * First try the easy way.  This should work for `glibc` 2.2.
   * This fails in a prelinked (`prelink` command) executable
   * since the correct value of `__libc_stack_end` never becomes
   * visible to us.  The second test is a workaround for this.
   */
#    ifdef USE_LIBC_PRIVATES
  ptr_t *p_libc_stack_end = &__libc_stack_end;

#      ifdef CPPCHECK
  GC_noop1_ptr(&p_libc_stack_end);
#      endif
  if (p_libc_stack_end != NULL && __libc_stack_end != NULL) {
#      ifdef IA64
    /*
     * Some versions of `glibc` set the address 16 bytes too low
     * while the initialization code is running.
     */
    if ((ADDR(__libc_stack_end) & 0xfff) + 0x10 < 0x1000) {
      return __libc_stack_end + 0x10;
    } else {
      /* It is not safe to add 16 bytes.  Thus, fall back to using `/proc`. */
    }
#      elif defined(SPARC)
    /*
     * Older versions of `glibc` for 64-bit SPARC do not set this
     * variable correctly, it gets set to either zero or one.
     */
    if (ADDR(__libc_stack_end) != 1)
      return __libc_stack_end;
#      else
    return __libc_stack_end;
#      endif
  }
#    endif

  f = open("/proc/self/stat", O_RDONLY);
  if (-1 == f)
    ABORT_ARG1("Could not open /proc/self/stat", ": errno= %d", errno);
  len = GC_repeat_read(f, (char *)stat_buf, sizeof(stat_buf));
  if (len < 0)
    ABORT_ARG1("Failed to read /proc/self/stat", ": errno= %d", errno);
  close(f);

  /*
   * Skip the required number of fields.  This number is hopefully constant
   * across all Linux implementations.
   */
  for (i = 0; i < STAT_SKIP; ++i) {
    while (buf_offset < len && isspace(stat_buf[buf_offset++])) {
      /* Empty. */
    }
    while (buf_offset < len && !isspace(stat_buf[buf_offset++])) {
      /* Empty. */
    }
  }
  /* Skip spaces. */
  while (buf_offset < len && isspace(stat_buf[buf_offset])) {
    buf_offset++;
  }
  /* Find the end of the number and cut the buffer there. */
  for (i = 0; buf_offset + i < len; i++) {
    if (!isdigit(stat_buf[buf_offset + i]))
      break;
  }
  if (buf_offset + i >= len)
    ABORT("Could not parse /proc/self/stat");
  stat_buf[buf_offset + i] = '\0';

  addr = (word)STRTOULL((char *)stat_buf + buf_offset, NULL, 10);
  if (addr < 0x100000 || addr % ALIGNMENT != 0)
    ABORT_ARG1("Absurd stack bottom value", ": 0x%lx", (unsigned long)addr);
  return MAKE_CPTR(addr);
}

#  elif defined(QNX)
static ptr_t
os_main_stackbottom(void)
{
  /*
   * TODO: This approach is not very exact but it works for the tests,
   * at least, unlike other available heuristics.
   */
  return (ptr_t)__builtin_frame_address(0);
}

#  elif defined(FREEBSD)
#    include <sys/sysctl.h>

/*
 * This uses an undocumented `sysctl` call, but at least one expert
 * believes it will stay.
 */
static ptr_t
os_main_stackbottom(void)
{
  int nm[2] = { CTL_KERN, KERN_USRSTACK };
  ptr_t base;
  size_t len = sizeof(ptr_t);
  int r = sysctl(nm, 2, &base, &len, NULL, 0);

  if (r != 0)
    ABORT("Error getting main stack base");
  return base;
}
#  endif

#endif /* SPECIFIC_MAIN_STACKBOTTOM */

#if defined(ECOS) || defined(NOSYS)
GC_INNER ptr_t
GC_get_main_stack_base(void)
{
  return STACKBOTTOM;
}
#  define GET_MAIN_STACKBASE_SPECIAL

#elif defined(SYMBIAN)
EXTERN_C_BEGIN
extern int GC_get_main_symbian_stack_base(void);
EXTERN_C_END

GC_INNER ptr_t
GC_get_main_stack_base(void)
{
  return (ptr_t)GC_get_main_symbian_stack_base();
}
#  define GET_MAIN_STACKBASE_SPECIAL

#elif defined(EMSCRIPTEN)
#  include <emscripten/stack.h>

GC_INNER ptr_t
GC_get_main_stack_base(void)
{
  return (ptr_t)emscripten_stack_get_base();
}
#  define GET_MAIN_STACKBASE_SPECIAL

#elif !defined(ANY_MSWIN) && !defined(EMBOX) && !defined(OS2)        \
    && !(defined(OPENBSD) && defined(THREADS)) && !defined(SERENITY) \
    && (!(defined(SOLARIS) && defined(THREADS)) || defined(_STRICT_STDC))

#  if (defined(HAVE_PTHREAD_ATTR_GET_NP) || defined(HAVE_PTHREAD_GETATTR_NP)) \
      && (defined(THREADS) || defined(USE_GET_STACKBASE_FOR_MAIN))
#    include <pthread.h>
#    ifdef HAVE_PTHREAD_NP_H
#      include <pthread_np.h> /*< for `pthread_attr_get_np()` */
#    endif
#  elif defined(DARWIN) && !defined(NO_PTHREAD_GET_STACKADDR_NP)
/*
 * We could use `pthread_get_stackaddr_np` even in case of a single-threaded
 * collector build (there is no `-lpthread` option on Darwin).
 */
#    include <pthread.h>
#    undef STACKBOTTOM
#    define STACKBOTTOM (ptr_t) pthread_get_stackaddr_np(pthread_self())
#  endif

GC_INNER ptr_t
GC_get_main_stack_base(void)
{
  ptr_t result;
#  if (defined(HAVE_PTHREAD_ATTR_GET_NP) || defined(HAVE_PTHREAD_GETATTR_NP)) \
      && (defined(USE_GET_STACKBASE_FOR_MAIN)                                 \
          || (defined(THREADS) && !defined(REDIRECT_MALLOC)))
  pthread_attr_t attr;
  void *stackaddr;
  size_t size;

#    ifdef HAVE_PTHREAD_ATTR_GET_NP
  if (pthread_attr_init(&attr) == 0
      && (pthread_attr_get_np(pthread_self(), &attr) == 0
              ? TRUE
              : (pthread_attr_destroy(&attr), FALSE)))
#    else /* HAVE_PTHREAD_GETATTR_NP */
  if (pthread_getattr_np(pthread_self(), &attr) == 0)
#    endif
  {
    if (pthread_attr_getstack(&attr, &stackaddr, &size) == 0
        && stackaddr != NULL) {
      (void)pthread_attr_destroy(&attr);
#    ifndef STACK_GROWS_UP
      stackaddr = (char *)stackaddr + size;
#    endif
      return (ptr_t)stackaddr;
    }
    (void)pthread_attr_destroy(&attr);
  }
  WARN("pthread_getattr_np or pthread_attr_getstack failed"
       " for main thread\n",
       0);
#  endif
#  ifdef STACKBOTTOM
  result = STACKBOTTOM;
#  else
#    ifdef HEURISTIC1
#      define STACKBOTTOM_ALIGNMENT_M1 ((word)STACK_GRAN - 1)
#      ifdef STACK_GROWS_UP
  result = PTR_ALIGN_DOWN(GC_approx_sp(), STACKBOTTOM_ALIGNMENT_M1 + 1);
#      else
  result = PTR_ALIGN_UP(GC_approx_sp(), STACKBOTTOM_ALIGNMENT_M1 + 1);
#      endif
#    elif defined(SPECIFIC_MAIN_STACKBOTTOM)
  result = os_main_stackbottom();
#    elif defined(HEURISTIC2)
  {
    ptr_t sp = GC_approx_sp();

#      ifdef STACK_GROWS_UP
    result = (ptr_t)GC_find_limit(sp, FALSE);
#      else
    result = (ptr_t)GC_find_limit(sp, TRUE /* `up` */);
#      endif
#      if defined(HEURISTIC2_LIMIT) && !defined(CPPCHECK)
    if (HOTTER_THAN(HEURISTIC2_LIMIT, result)
        && HOTTER_THAN(sp, HEURISTIC2_LIMIT))
      result = HEURISTIC2_LIMIT;
#      endif
  }
#    elif defined(STACK_NOT_SCANNED) || defined(CPPCHECK)
  result = NULL;
#    else
#      error None of HEURISTIC* and *STACKBOTTOM defined!
#    endif
#    if !defined(STACK_GROWS_UP) && !defined(CPPCHECK)
  if (NULL == result)
    result = MAKE_CPTR((GC_signed_word)(-sizeof(ptr_t)));
#    endif
#  endif
#  if !defined(CPPCHECK)
  GC_ASSERT(HOTTER_THAN(GC_approx_sp(), result));
#  endif
  return result;
}
#  define GET_MAIN_STACKBASE_SPECIAL
#endif /* !ANY_MSWIN && !EMBOX && !OS2 && !SERENITY */

#if (defined(HAVE_PTHREAD_ATTR_GET_NP) || defined(HAVE_PTHREAD_GETATTR_NP)) \
    && defined(THREADS) && !defined(HAVE_GET_STACK_BASE)
#  include <pthread.h>
#  ifdef HAVE_PTHREAD_NP_H
#    include <pthread_np.h>
#  endif

GC_API int GC_CALL
GC_get_stack_base(struct GC_stack_base *b)
{
  pthread_attr_t attr;
  size_t size;

#  ifdef HAVE_PTHREAD_ATTR_GET_NP
  if (pthread_attr_init(&attr) != 0)
    ABORT("pthread_attr_init failed");
  if (pthread_attr_get_np(pthread_self(), &attr) != 0) {
    WARN("pthread_attr_get_np failed\n", 0);
    (void)pthread_attr_destroy(&attr);
    return GC_UNIMPLEMENTED;
  }
#  else /* HAVE_PTHREAD_GETATTR_NP */
  if (pthread_getattr_np(pthread_self(), &attr) != 0) {
    WARN("pthread_getattr_np failed\n", 0);
    return GC_UNIMPLEMENTED;
  }
#  endif
  if (pthread_attr_getstack(&attr, &b->mem_base, &size) != 0) {
    ABORT("pthread_attr_getstack failed");
  }
  (void)pthread_attr_destroy(&attr);
#  ifndef STACK_GROWS_UP
  b->mem_base = (char *)b->mem_base + size;
#  endif
#  ifdef IA64
  /*
   * We could try `backing_store_base_from_proc`, but that is safe only
   * if no mappings are being asynchronously created.  Subtracting the size
   * from the stack base does not work for at least the main thread.
   */
  LOCK();
  {
    IF_CANCEL(int cancel_state;)
    ptr_t bsp;
    ptr_t next_stack;

    DISABLE_CANCEL(cancel_state);
    bsp = GC_save_regs_in_stack();
    next_stack = GC_greatest_stack_base_below(bsp);
    if (NULL == next_stack) {
      b->reg_base = GC_find_limit(bsp, FALSE);
    } else {
      /*
       * Avoid walking backwards into preceding memory stack and
       * growing it.
       */
      b->reg_base = GC_find_limit_with_bound(bsp, FALSE, next_stack);
    }
    RESTORE_CANCEL(cancel_state);
  }
  UNLOCK();
#  elif defined(E2K)
  b->reg_base = NULL;
#  endif
  return GC_SUCCESS;
}
#  define HAVE_GET_STACK_BASE
#endif /* THREADS && (HAVE_PTHREAD_ATTR_GET_NP || HAVE_PTHREAD_GETATTR_NP) */

#if defined(DARWIN) && defined(THREADS) \
    && !defined(NO_PTHREAD_GET_STACKADDR_NP)
#  include <pthread.h>

GC_API int GC_CALL
GC_get_stack_base(struct GC_stack_base *b)
{
  /*
   * `pthread_get_stackaddr_np()` should return stack bottom (highest
   * stack address plus 1).
   */
  b->mem_base = pthread_get_stackaddr_np(pthread_self());
  GC_ASSERT(HOTTER_THAN(GC_approx_sp(), (ptr_t)b->mem_base));
  return GC_SUCCESS;
}
#  define HAVE_GET_STACK_BASE
#endif /* DARWIN && THREADS && !NO_PTHREAD_GET_STACKADDR_NP */

#if defined(OPENBSD) && defined(THREADS)
#  include <pthread.h>
#  include <pthread_np.h>
#  include <sys/signal.h>

GC_API int GC_CALL
GC_get_stack_base(struct GC_stack_base *sb)
{
  stack_t stack;

  /* Find the stack using `pthread_stackseg_np()`. */
  if (pthread_stackseg_np(pthread_self(), &stack))
    ABORT("pthread_stackseg_np(self) failed");
  sb->mem_base = stack.ss_sp;
  return GC_SUCCESS;
}
#  define HAVE_GET_STACK_BASE
#endif /* OPENBSD && THREADS */

#if defined(SOLARIS) && defined(THREADS) && !defined(_STRICT_STDC)

#  include <pthread.h>
#  include <thread.h>

/*
 * These variables are used to cache `ss_sp` value for the primordial
 * thread (it is better not to call `thr_stksegment()` twice for this
 * thread - see JDK bug #4352906).
 * Note: `stackbase_main_self` set to zero means `stackbase_main_ss_sp`
 * value is unset.
 */
static pthread_t stackbase_main_self = 0;
static void *stackbase_main_ss_sp = NULL;

#  ifdef CAN_HANDLE_FORK
GC_INNER void
GC_stackbase_info_update_after_fork(void)
{
  if (stackbase_main_self == GC_parent_pthread_self) {
    /* The primordial thread has forked the process. */
    stackbase_main_self = pthread_self();
  } else {
    stackbase_main_self = 0;
  }
}
#  endif

GC_API int GC_CALL
GC_get_stack_base(struct GC_stack_base *b)
{
  stack_t s;
  pthread_t self = pthread_self();

  if (self == stackbase_main_self) {
    /*
     * If the client calls `GC_get_stack_base()` from the main thread,
     * then just return the cached value.
     */
    b->mem_base = stackbase_main_ss_sp;
    GC_ASSERT(b->mem_base != NULL);
    return GC_SUCCESS;
  }

  if (thr_stksegment(&s)) {
    /*
     * According to the manual, the only failure error code returned is
     * `EAGAIN` meaning "the information is not available due to the thread
     * is not yet completely initialized or it is an internal thread" - this
     * should not happen here.
     */
    ABORT("thr_stksegment failed");
  }
  /* `s.ss_sp` holds the pointer to the stack bottom. */
  GC_ASSERT(HOTTER_THAN(GC_approx_sp(), (ptr_t)s.ss_sp));

  if (!stackbase_main_self && thr_main() != 0) {
    /*
     * Cache the stack bottom pointer for the primordial thread
     * (this is done during `GC_init`, so there is no race).
     */
    stackbase_main_ss_sp = s.ss_sp;
    stackbase_main_self = self;
  }

  b->mem_base = s.ss_sp;
  return GC_SUCCESS;
}
#  define HAVE_GET_STACK_BASE
#endif /* SOLARIS && THREADS */

#if defined(RTEMS) && defined(THREADS)
GC_API int GC_CALL
GC_get_stack_base(struct GC_stack_base *sb)
{
  sb->mem_base = rtems_get_stack_bottom();
  return GC_SUCCESS;
}
#  define HAVE_GET_STACK_BASE
#endif /* RTEMS && THREADS */

#ifndef HAVE_GET_STACK_BASE

#  ifdef NEED_FIND_LIMIT
GC_API int GC_CALL
GC_get_stack_base(struct GC_stack_base *b)
{
  IF_CANCEL(int cancel_state;)

  /*
   * Note: using the `GC_find_limit` variant is risky; in the IA-64 case,
   * e.g., there is no guard page between the stack of one thread and the
   * register backing store of the next; thus this is likely to identify way
   * too large a "stack" and thus at least result in disastrous performance.
   */
  /* TODO: Implement better strategies here. */
  LOCK();
  /* TODO: `DISABLE_CANCEL` may be unnecessary? */
  DISABLE_CANCEL(cancel_state);
#    ifdef STACK_GROWS_UP
  b->mem_base = GC_find_limit(GC_approx_sp(), FALSE);
#    else
  b->mem_base = GC_find_limit(GC_approx_sp(), TRUE /* `up` */);
#    endif
#    ifdef IA64
  b->reg_base = GC_find_limit(GC_save_regs_in_stack(), FALSE);
#    elif defined(E2K)
  b->reg_base = NULL;
#    endif
  RESTORE_CANCEL(cancel_state);
  UNLOCK();
  return GC_SUCCESS;
}
#  else /* !NEED_FIND_LIMIT */
GC_API int GC_CALL
GC_get_stack_base(struct GC_stack_base *b)
{
#    if defined(GET_MAIN_STACKBASE_SPECIAL) && !defined(THREADS) \
        && !defined(IA64)
  b->mem_base = GC_get_main_stack_base();
  return GC_SUCCESS;
#    else
  UNUSED_ARG(b);
  return GC_UNIMPLEMENTED;
#    endif
}
#  endif

#endif /* !HAVE_GET_STACK_BASE */

#ifndef GET_MAIN_STACKBASE_SPECIAL
GC_INNER ptr_t
GC_get_main_stack_base(void)
{
  /* Default implementation. */
  struct GC_stack_base sb;

  if (GC_get_stack_base(&sb) != GC_SUCCESS)
    ABORT("GC_get_stack_base failed");
  GC_ASSERT(HOTTER_THAN(GC_approx_sp(), (ptr_t)sb.mem_base));
  return (ptr_t)sb.mem_base;
}
#endif /* !GET_MAIN_STACKBASE_SPECIAL */

/*
 * Register static data segment(s) as roots.  If more data segments are
 * added later, then they need to be registered at that point (as we do
 * with SunOS dynamic loading), or `GC_mark_roots` needs to check for them.
 */

#ifdef ANY_MSWIN

#  if defined(GWW_VDB)
#    ifndef MEM_WRITE_WATCH
#      define MEM_WRITE_WATCH 0x200000
#    endif
#    ifndef WRITE_WATCH_FLAG_RESET
#      define WRITE_WATCH_FLAG_RESET 1
#    endif

/*
 * Since we cannot easily check whether `ULONG_PTR` and `SIZE_T` are
 * defined in Win32 `basetsd.h` file, we define own `ULONG_PTR`.
 */
#    define GC_ULONG_PTR word

typedef UINT(WINAPI *GetWriteWatch_type)(DWORD, PVOID,
                                         GC_ULONG_PTR /* `SIZE_T` */, PVOID *,
                                         GC_ULONG_PTR *, PULONG);
static FARPROC GetWriteWatch_func;
static DWORD GetWriteWatch_alloc_flag;

#    define GC_GWW_AVAILABLE() (GetWriteWatch_func != 0)

static void
detect_GetWriteWatch(void)
{
  static GC_bool done;
  HMODULE hK32;
  if (done)
    return;

#    if defined(MPROTECT_VDB)
  {
    char *str = GETENV("GC_USE_GETWRITEWATCH");
#      if defined(GC_PREFER_MPROTECT_VDB)
    if (NULL == str || (*str == '0' && *(str + 1) == '\0')) {
      /*
       * `GC_USE_GETWRITEWATCH` environment variable is unset or set to "0".
       * Falling back to `MPROTECT_VDB` strategy.
       */
      done = TRUE;
      /* This should work as if `GWW_VDB` macro is not defined. */
      return;
    }
#      else
    if (str != NULL && *str == '0' && *(str + 1) == '\0') {
      /*
       * `GC_USE_GETWRITEWATCH` environment variable is set "0".
       * Falling back to `MPROTECT_VDB` strategy.
       */
      done = TRUE;
      return;
    }
#      endif
  }
#    endif

#    if defined(MSWINRT_FLAVOR) && defined(FUNCPTR_IS_DATAPTR)
  {
    MEMORY_BASIC_INFORMATION memInfo;
    SIZE_T result = VirtualQuery(CAST_THRU_UINTPTR(void *, GetProcAddress),
                                 &memInfo, sizeof(memInfo));

    if (result != sizeof(memInfo))
      ABORT("Weird VirtualQuery result");
    hK32 = (HMODULE)memInfo.AllocationBase;
  }
#    else
  hK32 = GetModuleHandle(TEXT("kernel32.dll"));
#    endif
  if (hK32 != (HMODULE)0
      && (GetWriteWatch_func = GetProcAddress(hK32, "GetWriteWatch")) != 0) {
    void *page;

    GC_ASSERT(GC_page_size != 0);
    /*
     * Also check whether `VirtualAlloc()` accepts `MEM_WRITE_WATCH`,
     * as some versions of `kernel32.dll` library have one but not the other,
     * making the feature completely broken.
     */
    page = VirtualAlloc(NULL, GC_page_size, MEM_WRITE_WATCH | MEM_RESERVE,
                        PAGE_READWRITE);
    if (page != NULL) {
      PVOID pages[16];
      GC_ULONG_PTR count = sizeof(pages) / sizeof(PVOID);
      DWORD page_size;
      /*
       * Check that it actually works.  In spite of some documentation
       * it actually seems to exist on Win2K.
       * This test may be unnecessary, but...
       */
      if ((*(GetWriteWatch_type)(GC_funcptr_uint)GetWriteWatch_func)(
              WRITE_WATCH_FLAG_RESET, page, GC_page_size, pages, &count,
              &page_size)
          != 0) {
        /* `GetWriteWatch()` always fails. */
        GetWriteWatch_func = 0;
      } else {
        GetWriteWatch_alloc_flag = MEM_WRITE_WATCH;
      }
      VirtualFree(page, 0 /* `dwSize` */, MEM_RELEASE);
    } else {
      /* `GetWriteWatch` will be useless. */
      GetWriteWatch_func = 0;
    }
  }
  done = TRUE;
}

#  else
#    define GetWriteWatch_alloc_flag 0
#  endif /* !GWW_VDB */

#  ifdef MSWIN32
/*
 * Unfortunately, we have to handle win32s very differently from Windows NT,
 * since `VirtualQuery()` has very different semantics.  In particular,
 * under win32s a `VirtualQuery()` call on an unmapped page returns
 * an invalid result.  Under Windows NT, `GC_register_data_segments()` is
 * a no-op and all real work is done by `GC_register_dynamic_libraries()`.
 * Under win32s, we cannot find the data segments associated with DLL files.
 * We register the main data segment here.
 */

GC_INNER GC_bool GC_no_win32_dlls = FALSE;

GC_INNER GC_bool GC_wnt = FALSE;

GC_INNER void
GC_init_win32(void)
{
#    if defined(_WIN64) || (defined(_MSC_VER) && _MSC_VER >= 1800)
  /*
   * MS Visual Studio 2013 deprecates `GetVersion`, but on the other hand
   * it cannot be used to target pre-Win2K.
   */
  GC_wnt = TRUE;
#    else
  /*
   * Set `GC_wnt`.  If we are running under win32s, assume that no DLL file
   * will be loaded.  I doubt anyone still runs win32s, but...
   */
  DWORD v = GetVersion();

  GC_wnt = !(v & (DWORD)0x80000000UL);
  GC_no_win32_dlls |= ((!GC_wnt) && (v & 0xff) <= 3);
#    endif
#    ifdef USE_MUNMAP
  if (GC_no_win32_dlls) {
    /*
     * Turn off unmapping for safety (since may not work well with
     * `GlobalAlloc()`).
     */
    GC_unmap_threshold = 0;
  }
#    endif
}

/*
 * Return the smallest address `p` such that `VirtualQuery()` returns
 * correct results for all addresses between `p` and `start`.
 * Assumes `VirtualQuery()` returns correct information for `start`.
 */
STATIC ptr_t
GC_least_described_address(ptr_t start)
{
  ptr_t limit = (ptr_t)GC_sysinfo.lpMinimumApplicationAddress;
  ptr_t p = PTR_ALIGN_DOWN(start, GC_page_size);

  GC_ASSERT(GC_page_size != 0);
  for (;;) {
    MEMORY_BASIC_INFORMATION buf;
    size_t result;
    ptr_t q;

    if (UNLIKELY(ADDR(p) <= (word)GC_page_size))
      break; /*< avoid underflow */
    q = p - GC_page_size;
    if (ADDR_LT(q, limit))
      break;

    result = VirtualQuery((LPVOID)q, &buf, sizeof(buf));
    if (result != sizeof(buf) || 0 == buf.AllocationBase)
      break;
    p = (ptr_t)buf.AllocationBase;
  }
  return p;
}

STATIC void
GC_register_root_section(ptr_t static_root)
{
  ptr_t p, base, limit;

  GC_ASSERT(I_HOLD_LOCK());
  if (!GC_no_win32_dlls)
    return;

  p = GC_least_described_address(static_root);
  base = limit = p;
  while (ADDR_LT(p, (ptr_t)GC_sysinfo.lpMaximumApplicationAddress)) {
    MEMORY_BASIC_INFORMATION buf;
    size_t result = VirtualQuery((LPVOID)p, &buf, sizeof(buf));

    if (result != sizeof(buf) || 0 == buf.AllocationBase
        || GC_is_heap_base(buf.AllocationBase))
      break;
    if (ADDR(p) > GC_WORD_MAX - buf.RegionSize) {
      /* Avoid overflow. */
      break;
    }
    if (buf.State == MEM_COMMIT && is_writable(buf.Protect)) {
      if (p != limit) {
        if (base != limit)
          GC_add_roots_inner(base, limit, FALSE);
        base = p;
      }
      limit = p + buf.RegionSize;
    }
    p += buf.RegionSize;
  }
  if (base != limit)
    GC_add_roots_inner(base, limit, FALSE);
}
#  endif /* MSWIN32 */

#  if defined(USE_WINALLOC) && !defined(REDIRECT_MALLOC)
/*
 * We maintain a linked list of `AllocationBase` values (that we know)
 * correspond to `malloc` heap sections.  Currently this is only called
 * during a collection.  But there is some hope that for long-running
 * programs we will eventually see most heap sections.
 *
 * In the long run, it would be more reliable to occasionally walk
 * the `malloc` heap with `HeapWalk()` on the default heap.
 * But that apparently works only for NT-based Windows.
 */

/* Note: initialized to approximate largest root size. */
#    define GC_INITIAL_MAX_ROOT_SIZE 100000
STATIC size_t GC_max_root_size = GC_INITIAL_MAX_ROOT_SIZE;

/* In the long run, a better data structure would also be nice... */
STATIC struct GC_malloc_heap_list {
  void *allocation_base;
  struct GC_malloc_heap_list *next;
} *GC_malloc_heap_l = NULL;

/*
 * Is `p` the base of one of the `malloc` heap sections we already
 * know about?
 */
STATIC GC_bool
GC_is_malloc_heap_base(const void *p)
{
  struct GC_malloc_heap_list *q;

  for (q = GC_malloc_heap_l; q != NULL; q = q->next) {
    if (q->allocation_base == p)
      return TRUE;
  }
  return FALSE;
}

STATIC void *
GC_get_allocation_base(void *p)
{
  MEMORY_BASIC_INFORMATION buf;
  size_t result = VirtualQuery(p, &buf, sizeof(buf));

  if (result != sizeof(buf)) {
    ABORT("Weird VirtualQuery result");
  }
  return buf.AllocationBase;
}

GC_INNER void
GC_add_current_malloc_heap(void)
{
  struct GC_malloc_heap_list *new_l = (struct GC_malloc_heap_list *)malloc(
      sizeof(struct GC_malloc_heap_list));
  void *candidate;

  if (NULL == new_l)
    return;
  /* Explicitly set to suppress "maybe-uninitialized" gcc warning. */
  new_l->allocation_base = NULL;

  candidate = GC_get_allocation_base(new_l);
  if (GC_is_malloc_heap_base(candidate)) {
    /* Try a little harder to find `malloc` heap. */
    size_t req_size = 10000;

    do {
      void *p = malloc(req_size);

      if (NULL == p) {
        free(new_l);
        return;
      }
      candidate = GC_get_allocation_base(p);
      free(p);
      req_size *= 2;
    } while (GC_is_malloc_heap_base(candidate)
             && req_size < GC_max_root_size / 10 && req_size < 500000);
    if (GC_is_malloc_heap_base(candidate)) {
      free(new_l);
      return;
    }
  }
  GC_COND_LOG_PRINTF("Found new system malloc AllocationBase at %p\n",
                     candidate);
  new_l->allocation_base = candidate;
  new_l->next = GC_malloc_heap_l;
  GC_malloc_heap_l = new_l;
}

#    ifndef GC_NO_DEINIT
/*
 * Free all the linked list nodes.  Could be invoked at process exit
 * to avoid memory leak complains of a dynamic code analysis tool.
 */
STATIC void
GC_free_malloc_heap_list(void)
{
  struct GC_malloc_heap_list *q = GC_malloc_heap_l;

  GC_malloc_heap_l = NULL;
  GC_max_root_size = GC_INITIAL_MAX_ROOT_SIZE;
  while (q != NULL) {
    struct GC_malloc_heap_list *next = q->next;

    free(q);
    q = next;
  }
}
#    endif
#  endif /* USE_WINALLOC && !REDIRECT_MALLOC */

GC_INNER GC_bool
GC_is_heap_base(const void *p)
{
  size_t i;

#  if defined(USE_WINALLOC) && !defined(REDIRECT_MALLOC)
  if (GC_root_size > GC_max_root_size)
    GC_max_root_size = GC_root_size;
  if (GC_is_malloc_heap_base(p))
    return TRUE;
#  endif
  for (i = 0; i < GC_n_heap_bases; i++) {
    if (GC_heap_bases[i] == p)
      return TRUE;
  }
  return FALSE;
}

GC_INNER void
GC_register_data_segments(void)
{
#  ifdef MSWIN32
  /* Note: any other GC global variable would fit too. */
  GC_register_root_section((ptr_t)&GC_pages_executable);
#  endif
}

#endif /* ANY_MSWIN */

#ifdef DATASTART_USES_XGETDATASTART
#  ifdef CHERI_PURECAP
#    include <link.h>

/*
 * The CheriBSD LLVM compiler declares `etext`, `edata` and `end` as
 * typeless variables.  If the collector library is statically linked
 * with the executable, these capabilities are compiled with the
 * read-only permissions and bounds that span the `.data` and `.bss`
 * sections.  If the collector is compiled as a shared library, these
 * symbols are compiled with zero bounds and cannot be dereferenced;
 * instead, the read-only capability returned by the loader is used.
 */

struct scan_bounds_s {
  word start_addr;
  word end_addr;
  ptr_t ld_cap;
};

static int
ld_cap_search(struct dl_phdr_info *info, size_t size, void *cd)
{
  struct scan_bounds_s *region = (struct scan_bounds_s *)cd;
  ptr_t load_ptr = (ptr_t)info->dlpi_addr;

  UNUSED_ARG(size);
  if (!SPANNING_CAPABILITY(load_ptr, region->start_addr, region->end_addr))
    return 0;

  region->ld_cap = (ptr_t)cheri_bounds_set(
      cheri_address_set(load_ptr, region->start_addr),
      region->end_addr - region->start_addr);
  return 1; /*< stop */
}

static ptr_t
derive_cap_from_ldr(ptr_t range_start, ptr_t range_end)
{
  word scan_start = ADDR(range_start);
  word scan_end = ADDR(range_end);
  struct scan_bounds_s region;

  /* If symbols already span the required range, return one of them. */
  if (SPANNING_CAPABILITY(range_start, scan_start, scan_end))
    return range_start;
  if (SPANNING_CAPABILITY(range_end, scan_start, scan_end))
    return range_end;

  /*
   * Fall-back option: derive `.data` plus `.bss` end pointer from the
   * read-only capability provided by loader.
   */
  region.start_addr = scan_start;
  region.end_addr = scan_end;
  region.ld_cap = NULL; /*< prevent a compiler warning */
  if (!dl_iterate_phdr(ld_cap_search, &region))
    ABORT("Cannot find static roots for capability system");
  GC_ASSERT(region.ld_cap != NULL);
  return region.ld_cap;
}
#  endif /* CHERI_PURECAP */

GC_INNER ptr_t
GC_SysVGetDataStart(size_t max_page_size, ptr_t etext_ptr)
{
  volatile ptr_t result;

  GC_ASSERT(max_page_size % ALIGNMENT == 0);
  result = PTR_ALIGN_UP(etext_ptr, ALIGNMENT);
#  ifdef CHERI_PURECAP
  result = derive_cap_from_ldr(result, DATAEND);
#  endif

  GC_setup_temporary_fault_handler();
  if (SETJMP(GC_jmp_buf) == 0) {
    /*
     * Note that this is not equivalent to just adding `max_page_size` to
     * `etext_ptr` because the latter is not guaranteed to be multiple of
     * the page size.
     */
    ptr_t next_page = PTR_ALIGN_UP(result, max_page_size);

#  ifdef FREEBSD
    /*
     * It is unclear whether this should be identical to the below, or
     * whether it should apply to non-x86 architectures.  For now we
     * do not assume that there is always an empty page after `etext`.
     * But in some cases there actually seems to be slightly more.
     * It also deals with holes between read-only and writable data.
     *
     * Try reading at the address.  This should happen before there is
     * another thread.
     */
    for (; ADDR_LT(next_page, DATAEND); next_page += max_page_size) {
      GC_noop1((word)(*(volatile unsigned char *)next_page));
    }
#  else
    result = next_page + (ADDR(result) & ((word)max_page_size - 1));
    /* Try writing to the address. */
    {
#    ifdef AO_HAVE_fetch_and_add
      volatile AO_t zero = 0;

      (void)AO_fetch_and_add((volatile AO_t *)result, zero);
#    else
      /* Fall back to non-atomic fetch-and-store. */
      char v = *result;

#      ifdef CPPCHECK
      GC_noop1_ptr(&v);
#      endif
      *result = v;
#    endif
    }
#  endif
    GC_reset_fault_handler();
  } else {
    GC_reset_fault_handler();
    /*
     * We got here via a `longjmp`.  The address is not readable.
     * This is known to happen under Solaris 2.4 with gcc, which places
     * string constants in the `text` segment, but after `etext`.
     * Use plan B.  Note that we now know there is a gap between `text`
     * and `data` segments, so plan A brought us something.
     */
#  ifdef CHERI_PURECAP
    result = (ptr_t)GC_find_limit(cheri_address_set(result, ADDR(DATAEND)),
                                  FALSE);
#  else
    result = (ptr_t)GC_find_limit(DATAEND, FALSE);
#  endif
  }
  return (ptr_t)CAST_AWAY_VOLATILE_PVOID(result);
}
#endif /* DATASTART_USES_XGETDATASTART */

#if defined(OS2)
GC_INNER void
GC_register_data_segments(void)
{
  PTIB ptib;
  PPIB ppib;
  HMODULE module_handle;
#  define PBUFSIZ 512
  UCHAR path[PBUFSIZ];
  FILE *myexefile;
  struct exe_hdr hdrdos; /*< MSDOS header */
  struct e32_exe hdr386; /*< real header for my executable */
  struct o32_obj seg;    /*< current segment */
  int nsegs;

#  if defined(CPPCHECK)
  hdrdos.padding[0] = 0; /*< to prevent "field unused" warnings */
  hdr386.exe_format_level = 0;
  hdr386.os = 0;
  hdr386.padding1[0] = 0;
  hdr386.padding2[0] = 0;
  seg.pagemap = 0;
  seg.mapsize = 0;
  seg.reserved = 0;
#  endif
  if (DosGetInfoBlocks(&ptib, &ppib) != NO_ERROR) {
    ABORT("DosGetInfoBlocks failed");
  }
  module_handle = ppib->pib_hmte;
  if (DosQueryModuleName(module_handle, PBUFSIZ, path) != NO_ERROR) {
    ABORT("DosQueryModuleName failed");
  }
  myexefile = fopen(path, "rb");
  if (myexefile == 0) {
    ABORT_ARG1("Failed to open executable", ": %s", path);
  }
  if (fread((char *)&hdrdos, 1, sizeof(hdrdos), myexefile) < sizeof(hdrdos)) {
    ABORT_ARG1("Could not read MSDOS header", " from: %s", path);
  }
  if (E_MAGIC(hdrdos) != EMAGIC) {
    ABORT_ARG1("Bad DOS magic number", " in file: %s", path);
  }
  if (fseek(myexefile, E_LFANEW(hdrdos), SEEK_SET) != 0) {
    ABORT_ARG1("Bad DOS magic number", " in file: %s", path);
  }
  if (fread((char *)&hdr386, 1, sizeof(hdr386), myexefile) < sizeof(hdr386)) {
    ABORT_ARG1("Could not read OS/2 header", " from: %s", path);
  }
  if (E32_MAGIC1(hdr386) != E32MAGIC1 || E32_MAGIC2(hdr386) != E32MAGIC2) {
    ABORT_ARG1("Bad OS/2 magic number", " in file: %s", path);
  }
  if (E32_BORDER(hdr386) != E32LEBO || E32_WORDER(hdr386) != E32LEWO) {
    ABORT_ARG1("Bad byte order in executable", " file: %s", path);
  }
  if (E32_CPU(hdr386) == E32CPU286) {
    ABORT_ARG1("GC cannot handle 80286 executables", ": %s", path);
  }
  if (fseek(myexefile, E_LFANEW(hdrdos) + E32_OBJTAB(hdr386), SEEK_SET) != 0) {
    ABORT_ARG1("Seek to object table failed", " in file: %s", path);
  }
  for (nsegs = E32_OBJCNT(hdr386); nsegs > 0; nsegs--) {
    int flags;
    if (fread((char *)&seg, 1, sizeof(seg), myexefile) < sizeof(seg)) {
      ABORT_ARG1("Could not read obj table entry", " from file: %s", path);
    }
    flags = O32_FLAGS(seg);
    if (!(flags & OBJWRITE))
      continue;
    if (!(flags & OBJREAD))
      continue;
    if (flags & OBJINVALID) {
      GC_err_printf("Object with invalid pages?\n");
      continue;
    }
    GC_add_roots_inner((ptr_t)O32_BASE(seg),
                       (ptr_t)(O32_BASE(seg) + O32_SIZE(seg)), FALSE);
  }
  (void)fclose(myexefile);
}

#elif defined(OPENBSD)
GC_INNER void
GC_register_data_segments(void)
{
  /*
   * Depending on arch alignment, there can be multiple holes between
   * `DATASTART` and `DATAEND`.  Scan in `DATASTART` .. `DATAEND` and
   * register each region.
   */
  ptr_t region_start = DATASTART;

  GC_ASSERT(I_HOLD_LOCK());
  if (ADDR(region_start) - 1U >= ADDR(DATAEND))
    ABORT_ARG2("Wrong DATASTART/END pair", ": %p .. %p", (void *)region_start,
               (void *)DATAEND);
  for (;;) {
    ptr_t region_end = GC_find_limit_with_bound(region_start, TRUE, DATAEND);

    GC_add_roots_inner(region_start, region_end, FALSE);
    if (ADDR_GE(region_end, DATAEND))
      break;
    region_start = GC_skip_hole_openbsd(region_end, DATAEND);
  }
}

#elif !defined(ANY_MSWIN)
GC_INNER void
GC_register_data_segments(void)
{
  GC_ASSERT(I_HOLD_LOCK());
#  if !defined(DYNAMIC_LOADING) && defined(GC_DONT_REGISTER_MAIN_STATIC_DATA)
  /*
   * Avoid even referencing `DATASTART` and `DATAEND` as they are
   * unnecessary and cause linker errors when bitcode is enabled.
   * `GC_register_data_segments` is not called anyway.
   */
#  elif defined(DYNAMIC_LOADING) && (defined(DARWIN) || defined(HAIKU))
  /* No-op.  `GC_register_main_static_data()` always returns `FALSE`. */
#  elif defined(REDIRECT_MALLOC) && defined(SOLARIS) && defined(THREADS)
  /*
   * As of Solaris 2.3, the Solaris threads implementation allocates
   * the data structure for the initial thread with `sbrk` at the
   * process startup.  It needs to be scanned, so that we do not lose
   * some `malloc`-allocated data structures hanging from it.
   * We are on thin ice here...
   */
  GC_ASSERT(DATASTART);
  {
    ptr_t p = (ptr_t)sbrk(0);

    if (ADDR_LT(DATASTART, p))
      GC_add_roots_inner(DATASTART, p, FALSE);
  }
#  else
  /*
   * Note: subtract one is to also check for `NULL` without a compiler
   * warning.
   */
  if (ADDR(DATASTART) - 1U >= ADDR(DATAEND)) {
    ABORT_ARG2("Wrong DATASTART/END pair", ": %p .. %p", (void *)DATASTART,
               (void *)DATAEND);
  }
  GC_add_roots_inner(DATASTART, DATAEND, FALSE);
#    ifdef GC_HAVE_DATAREGION2
  if (ADDR(DATASTART2) - 1U >= ADDR(DATAEND2))
    ABORT_ARG2("Wrong DATASTART/END2 pair", ": %p .. %p", (void *)DATASTART2,
               (void *)DATAEND2);
  GC_add_roots_inner(DATASTART2, DATAEND2, FALSE);
#    endif
#  endif
  /*
   * Dynamic libraries are added at every collection, since they
   * may change.
   */
}
#endif /* !ANY_MSWIN && !OPENBSD && !OS2 */

/* Auxiliary routines for obtaining memory from OS. */

#ifdef NEED_UNIX_GET_MEM

#  define SBRK_ARG_T ptrdiff_t

#  if defined(MMAP_SUPPORTED)

#    ifdef USE_MMAP_FIXED
/*
 * Seems to yield better performance on Solaris 2, but can be unreliable
 * if something is already mapped at the address.
 */
#      define GC_MMAP_FLAGS MAP_FIXED | MAP_PRIVATE
#    else
#      define GC_MMAP_FLAGS MAP_PRIVATE
#    endif

#    ifdef USE_MMAP_ANON
#      define zero_fd -1
#      if defined(MAP_ANONYMOUS) && !defined(CPPCHECK)
#        define OPT_MAP_ANON MAP_ANONYMOUS
#      else
#        define OPT_MAP_ANON MAP_ANON
#      endif
#    else
static int zero_fd = -1;
#      define OPT_MAP_ANON 0
#    endif

#    ifndef MSWIN_XBOX1
#      if defined(SYMBIAN) && !defined(USE_MMAP_ANON)
EXTERN_C_BEGIN
extern char *GC_get_private_path_and_zero_file(void);
EXTERN_C_END
#      endif

STATIC void *
GC_unix_mmap_get_mem(size_t bytes)
{
  void *result;
  static word last_addr = HEAP_START;

#      ifndef USE_MMAP_ANON
  static GC_bool initialized = FALSE;

  if (UNLIKELY(!initialized)) {
#        ifdef SYMBIAN
    char *path = GC_get_private_path_and_zero_file();
    if (path != NULL) {
      zero_fd = open(path, O_RDWR | O_CREAT, 0644);
      free(path);
    }
#        else
    zero_fd = open("/dev/zero", O_RDONLY);
#        endif
    if (zero_fd == -1)
      ABORT("Could not open /dev/zero");
    if (fcntl(zero_fd, F_SETFD, FD_CLOEXEC) == -1)
      WARN("Could not set FD_CLOEXEC for /dev/zero\n", 0);

    initialized = TRUE;
  }
#      endif

  GC_ASSERT(GC_page_size != 0);
  if (bytes & (GC_page_size - 1))
    ABORT("Bad GET_MEM arg");
  /*
   * Note: it is essential for CHERI to have only address part in
   * `last_addr` without metadata (thus the variable is of `word` type
   * intentionally), otherwise `mmap()` fails setting `errno` to `EPROT`.
   */
  result
      = mmap(MAKE_CPTR(last_addr), bytes,
             (PROT_READ | PROT_WRITE) | (GC_pages_executable ? PROT_EXEC : 0),
             GC_MMAP_FLAGS | OPT_MAP_ANON, zero_fd, 0 /* `offset` */);
#      undef IGNORE_PAGES_EXECUTABLE

  if (UNLIKELY(MAP_FAILED == result)) {
    if (HEAP_START == last_addr && GC_pages_executable
        && (EACCES == errno || EPERM == errno))
      ABORT("Cannot allocate executable pages");
    return NULL;
  }
#      ifdef LINUX
  GC_ASSERT(ADDR(result) <= ~(word)(GC_page_size - 1) - bytes);
  /* The following `PTR_ALIGN_UP()` cannot overflow. */
#      else
  if (UNLIKELY(ADDR(result) > ~(word)(GC_page_size - 1) - bytes)) {
    /*
     * Oops.  We got the end of the address space.  This is not usable
     * by arbitrary C code, since one-past-end pointers do not work,
     * so we discard it and try again.  Leave the last page mapped,
     * so we cannot repeat.
     */
    (void)munmap(result, ~(GC_page_size - 1) - (size_t)ADDR(result));
    return GC_unix_mmap_get_mem(bytes);
  }
#      endif
  if ((ADDR(result) % HBLKSIZE) != 0)
    ABORT("Memory returned by mmap is not aligned to HBLKSIZE");
  last_addr = ADDR(result) + bytes;
  GC_ASSERT((last_addr & (GC_page_size - 1)) == 0);
  return result;
}
#    endif /* !MSWIN_XBOX1 */

#  endif /* MMAP_SUPPORTED */

#  if defined(USE_MMAP)

GC_INNER void *
GC_unix_get_mem(size_t bytes)
{
  return GC_unix_mmap_get_mem(bytes);
}

#  else /* !USE_MMAP */

STATIC void *
GC_unix_sbrk_get_mem(size_t bytes)
{
  void *result;

#    ifdef IRIX5
  /*
   * Bare `sbrk()` is not thread-safe.  Play by `malloc` rules.
   * The equivalent may be needed on other systems as well.
   */
  __LOCK_MALLOC();
#    endif
  {
    ptr_t cur_brk = (ptr_t)sbrk(0);
    SBRK_ARG_T lsbs = ADDR(cur_brk) & (GC_page_size - 1);

    GC_ASSERT(GC_page_size != 0);
    if (UNLIKELY((SBRK_ARG_T)bytes < 0)) {
      /* Value of `bytes` is too big. */
      result = NULL;
      goto out;
    }
    if (lsbs != 0) {
      if ((ptr_t)sbrk((SBRK_ARG_T)GC_page_size - lsbs) == (ptr_t)(-1)) {
        result = NULL;
        goto out;
      }
    }
#    ifdef ADD_HEAP_GUARD_PAGES
    /*
     * This is useful for catching severe memory overwrite problems
     * that span heap sections.  It should not otherwise be turned on.
     */
    {
      ptr_t guard = (ptr_t)sbrk((SBRK_ARG_T)GC_page_size);
      if (mprotect(guard, GC_page_size, PROT_NONE) != 0)
        ABORT("ADD_HEAP_GUARD_PAGES: mprotect failed");
    }
#    endif
    result = sbrk((SBRK_ARG_T)bytes);
    if (UNLIKELY(ADDR(result) == GC_WORD_MAX))
      result = NULL;
  }
out:
#    ifdef IRIX5
  __UNLOCK_MALLOC();
#    endif
  return result;
}

GC_INNER void *
GC_unix_get_mem(size_t bytes)
{
#    if defined(MMAP_SUPPORTED)
  /* By default, we try both `sbrk` and `mmap`, in that order. */
  static GC_bool sbrk_failed = FALSE;
  void *result = NULL;

  if (GC_pages_executable) {
    /*
     * If the allocated memory should have the execute permission,
     * then `sbrk()` cannot be used.
     */
    return GC_unix_mmap_get_mem(bytes);
  }
  if (!sbrk_failed)
    result = GC_unix_sbrk_get_mem(bytes);
  if (NULL == result) {
    sbrk_failed = TRUE;
    result = GC_unix_mmap_get_mem(bytes);
    if (NULL == result) {
      /* Try `sbrk()` again, in case `sbrk` memory became available. */
      result = GC_unix_sbrk_get_mem(bytes);
    }
  }
  return result;
#    else /* !MMAP_SUPPORTED */
  return GC_unix_sbrk_get_mem(bytes);
#    endif
}

#  endif /* !USE_MMAP */

#endif /* NEED_UNIX_GET_MEM */

#if defined(OS2)
GC_INNER void *
GC_get_mem(size_t bytes)
{
  void *result = NULL;
  int retry;

  GC_ASSERT(GC_page_size != 0);
  bytes = SIZET_SAT_ADD(bytes, GC_page_size);
  for (retry = 0;; retry++) {
    if (DosAllocMem(&result, bytes,
                    (PAG_READ | PAG_WRITE | PAG_COMMIT)
                        | (GC_pages_executable ? PAG_EXECUTE : 0))
            == NO_ERROR
        && LIKELY(result != NULL))
      break;
    /*
     * TODO: Unclear the purpose of the retry.  (Probably, if `DosAllocMem`
     * returns memory at address zero, then just retry once.)
     */
    if (retry >= 1)
      return NULL;
  }
  return HBLKPTR((ptr_t)result + GC_page_size - 1);
}

#elif defined(MSWIN_XBOX1)
GC_INNER void *
GC_get_mem(size_t bytes)
{
  if (UNLIKELY(0 == bytes))
    return NULL;
  return VirtualAlloc(NULL, bytes, MEM_COMMIT | MEM_TOP_DOWN, PAGE_READWRITE);
}

#elif defined(MSWINCE)
GC_INNER void *
GC_get_mem(size_t bytes)
{
  void *result = NULL; /*< initialized to prevent a compiler warning */
  size_t i;

  GC_ASSERT(GC_page_size != 0);
  bytes = ROUNDUP_PAGESIZE(bytes);

  /* Try to find reserved, uncommitted pages. */
  for (i = 0; i < GC_n_heap_bases; i++) {
    if (((word)(-(GC_signed_word)GC_heap_lengths[i])
         & (GC_sysinfo.dwAllocationGranularity - 1))
        >= bytes) {
      result = GC_heap_bases[i] + GC_heap_lengths[i];
      break;
    }
  }

  if (i == GC_n_heap_bases) {
    /* Reserve more pages. */
    size_t res_bytes
        = SIZET_SAT_ADD(bytes, (size_t)GC_sysinfo.dwAllocationGranularity - 1)
          & ~((size_t)GC_sysinfo.dwAllocationGranularity - 1);
    /*
     * If we ever support `MPROTECT_VDB` here, we will probably need
     * to ensure that `res_bytes` is greater (strictly) than `bytes`,
     * so that `VirtualProtect()` never spans regions.  It seems to be
     * fine for a `VirtualFree()` argument to span regions, so we
     * should be OK for now.
     */
    result = VirtualAlloc(NULL, res_bytes, MEM_RESERVE | MEM_TOP_DOWN,
                          GC_pages_executable ? PAGE_EXECUTE_READWRITE
                                              : PAGE_READWRITE);
    if (HBLKDISPL(result) != 0) {
      /*
       * If I read the documentation correctly, this can only happen
       * if `HBLKSIZE` is greater than 64 KB or not a power of 2.
       */
      ABORT("Bad VirtualAlloc result");
    }
    if (GC_n_heap_bases >= MAX_HEAP_SECTS)
      ABORT("Too many heap sections");
    if (UNLIKELY(NULL == result))
      return NULL;
    GC_heap_bases[GC_n_heap_bases] = (ptr_t)result;
    GC_heap_lengths[GC_n_heap_bases] = 0;
    GC_n_heap_bases++;
  }

  /* Commit pages. */
  result = VirtualAlloc(result, bytes, MEM_COMMIT,
                        GC_pages_executable ? PAGE_EXECUTE_READWRITE
                                            : PAGE_READWRITE);
#  undef IGNORE_PAGES_EXECUTABLE

  if (HBLKDISPL(result) != 0)
    ABORT("Bad VirtualAlloc result");
  if (LIKELY(result != NULL))
    GC_heap_lengths[i] += bytes;
  return result;
}

#elif defined(CYGWIN32) || defined(MSWIN32)
#  ifdef USE_GLOBAL_ALLOC
#    define GLOBAL_ALLOC_TEST 1
#  else
#    define GLOBAL_ALLOC_TEST GC_no_win32_dlls
#  endif

#  if (defined(GC_USE_MEM_TOP_DOWN) && defined(USE_WINALLOC)) \
      || defined(CPPCHECK)
/*
 * Use `GC_USE_MEM_TOP_DOWN` for better 64-bit testing.
 * Otherwise all addresses tend to end up in the first 4 GB, hiding bugs.
 */
DWORD GC_mem_top_down = MEM_TOP_DOWN;
#  else
#    define GC_mem_top_down 0
#  endif /* !GC_USE_MEM_TOP_DOWN */

GC_INNER void *
GC_get_mem(size_t bytes)
{
  void *result;

#  ifndef USE_WINALLOC
  result = GC_unix_get_mem(bytes);
#  else
#    if defined(MSWIN32) && !defined(MSWINRT_FLAVOR)
  if (GLOBAL_ALLOC_TEST) {
    /*
     * `VirtualAlloc()` does not like `PAGE_EXECUTE_READWRITE`.
     * There are also unconfirmed rumors of other problems, so we
     * dodge the issue.
     */
    result = GlobalAlloc(0, SIZET_SAT_ADD(bytes, HBLKSIZE));
    /* Align it at `HBLKSIZE` boundary (`NULL` value remains unchanged). */
    result = PTR_ALIGN_UP((ptr_t)result, HBLKSIZE);
  } else
#    endif
  /* else */ {
    /*
     * `VirtualProtect()` only works on regions returned by a single
     * `VirtualAlloc()` call.  Thus we allocate one extra page, which will
     * prevent merging of blocks in separate regions, and eliminate any
     * temptation to call `VirtualProtect()` on a range spanning regions.
     * This wastes a small amount of memory, and risks increased
     * fragmentation.  But better alternatives would require effort.
     */
#    ifdef MPROTECT_VDB
    /*
     * We cannot check for `GC_incremental` here (because
     * `GC_enable_incremental()` might be called some time later after
     * the collector initialization).
     */
#      ifdef GWW_VDB
#        define VIRTUAL_ALLOC_PAD (GC_GWW_AVAILABLE() ? 0 : 1)
#      else
#        define VIRTUAL_ALLOC_PAD 1
#      endif
#    else
#      define VIRTUAL_ALLOC_PAD 0
#    endif
    /*
     * Pass `MEM_WRITE_WATCH` only if `GetWriteWatch`-based VDB is
     * enabled and `GetWriteWatch()` is available.  Otherwise we waste
     * resources or possibly cause `VirtualAlloc()` to fail (observed
     * in Windows 2000 SP2).
     */
    result = VirtualAlloc(
        NULL, SIZET_SAT_ADD(bytes, VIRTUAL_ALLOC_PAD),
        MEM_COMMIT | MEM_RESERVE | GetWriteWatch_alloc_flag | GC_mem_top_down,
        GC_pages_executable ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE);
#    undef IGNORE_PAGES_EXECUTABLE
  }
#  endif
  if (HBLKDISPL(result) != 0)
    ABORT("Bad VirtualAlloc result");
  if (GC_n_heap_bases >= MAX_HEAP_SECTS)
    ABORT("Too many heap sections");
  if (LIKELY(result != NULL))
    GC_heap_bases[GC_n_heap_bases++] = (ptr_t)result;
  return result;
}
#endif /* CYGWIN32 || MSWIN32 */

#if (defined(ANY_MSWIN) || defined(MSWIN_XBOX1)) && !defined(GC_NO_DEINIT)
GC_API void GC_CALL
GC_win32_free_heap(void)
{
#  if defined(USE_WINALLOC) && !defined(REDIRECT_MALLOC)
  GC_free_malloc_heap_list();
#  endif
#  if defined(CYGWIN32) || defined(MSWIN32)
#    ifndef MSWINRT_FLAVOR
#      ifdef MSWIN32
  if (GLOBAL_ALLOC_TEST)
#      endif
  {
    while (GC_n_heap_bases > 0) {
      GC_n_heap_bases--;
#      ifdef CYGWIN32
      /* FIXME: Is it OK to use non-GC `free()` here? */
#      else
      GlobalFree(GC_heap_bases[GC_n_heap_bases]);
#      endif
      GC_heap_bases[GC_n_heap_bases] = 0;
    }
    return;
  }
#    endif /* !MSWINRT_FLAVOR */
#    ifndef CYGWIN32
  /* Avoiding `VirtualAlloc` leak. */
  while (GC_n_heap_bases > 0) {
    VirtualFree(GC_heap_bases[--GC_n_heap_bases], 0, MEM_RELEASE);
    GC_heap_bases[GC_n_heap_bases] = 0;
  }
#    endif
#  endif
}
#endif

#if (defined(USE_MUNMAP) || defined(MPROTECT_VDB)) && !defined(USE_WINALLOC)
#  define ABORT_ON_REMAP_FAIL(C_msg_prefix, start_addr, len)             \
    ABORT_ARG3(C_msg_prefix " failed", " at %p (length %lu), errno= %d", \
               (void *)(start_addr), (unsigned long)(len), errno)
#endif

#ifdef USE_MUNMAP

#  if !defined(NN_PLATFORM_CTR) && !defined(MSWIN32) && !defined(MSWINCE) \
      && !defined(MSWIN_XBOX1)
#    ifdef SN_TARGET_PS3
#      include <sys/memory.h>
#    else
#      include <sys/mman.h>
#    endif
#    include <sys/stat.h>
#  endif

/*
 * Compute a page-aligned starting address for the memory unmap
 * operation on a block of size `bytes` starting at `start`.
 * Return `NULL` if the block is too small to make this feasible.
 */
STATIC ptr_t
GC_unmap_start(ptr_t start, size_t bytes)
{
  ptr_t result;

  GC_ASSERT(GC_page_size != 0);
  result = PTR_ALIGN_UP(start, GC_page_size);
  if (ADDR_LT(start + bytes, result + GC_page_size))
    return NULL;

  return result;
}

/*
 * We assume that `GC_remap` is called on exactly the same range as
 * the previous call to `GC_unmap`.  It is safe to consistently round
 * the endpoints in both places.
 */

static void
block_unmap_inner(ptr_t start_addr, size_t len)
{
  if (0 == start_addr)
    return;

#  ifdef USE_WINALLOC
  /*
   * Under Win32/WinCE we commit (map) and decommit (unmap) memory
   * using `VirtualAlloc()` and `VirtualFree()`.  These functions
   * work on individual allocations of virtual memory, made
   * previously using `VirtualAlloc()` with the `MEM_RESERVE` flag.
   * The ranges we need to (de)commit may span several of these
   * allocations; therefore we use `VirtualQuery()` to check
   * allocation lengths, and split up the range as necessary.
   */
  while (len != 0) {
    MEMORY_BASIC_INFORMATION mem_info;
    word free_len;

    if (VirtualQuery(start_addr, &mem_info, sizeof(mem_info))
        != sizeof(mem_info))
      ABORT("Weird VirtualQuery result");
    free_len = (len < mem_info.RegionSize) ? len : mem_info.RegionSize;
    if (!VirtualFree(start_addr, free_len, MEM_DECOMMIT))
      ABORT("VirtualFree failed");
    GC_unmapped_bytes += free_len;
    start_addr += free_len;
    len -= free_len;
  }
#  else
  if (len != 0) {
#    ifdef SN_TARGET_PS3
    ps3_free_mem(start_addr, len);
#    elif defined(AIX) || defined(COSMO) || defined(CYGWIN32) \
        || defined(HPUX)                                      \
        || (defined(LINUX) && !defined(PREFER_MMAP_PROT_NONE))
    /*
     * On AIX, `mmap(PROT_NONE)` fails with `ENOMEM` unless the
     * environment variable `XPG_SUS_ENV` is set to `ON`.
     * On Cygwin, calling `mmap()` with the new protection flags on
     * an existing memory map with `MAP_FIXED` is broken.
     * However, calling `mprotect()` on the given address range
     * with `PROT_NONE` seems to work fine.  On Linux, low `RLIMIT_AS`
     * value may lead to `mmap()` failure.
     */
#      if (defined(COSMO) || defined(LINUX)) \
          && !defined(FORCE_MPROTECT_BEFORE_MADVISE)
    /* On Linux, at least, `madvise()` should be sufficient. */
#      else
    if (mprotect(start_addr, len, PROT_NONE))
      ABORT_ON_REMAP_FAIL("unmap: mprotect", start_addr, len);
#      endif
#      if !defined(CYGWIN32)
    /*
     * On Linux (and some other platforms probably), `mprotect(PROT_NONE)`
     * is just disabling access to the pages but not returning them to OS.
     */
    if (madvise(start_addr, len, MADV_DONTNEED) == -1)
      ABORT_ON_REMAP_FAIL("unmap: madvise", start_addr, len);
#      endif
#    else
    /*
     * We immediately remap it to prevent an intervening `mmap()` from
     * accidentally grabbing the same address space.
     */
    void *result = mmap(start_addr, len, PROT_NONE,
                        MAP_PRIVATE | MAP_FIXED | OPT_MAP_ANON, zero_fd,
                        0 /* `offset` */);

    if (UNLIKELY(MAP_FAILED == result))
      ABORT_ON_REMAP_FAIL("unmap: mmap", start_addr, len);
    if (result != start_addr)
      ABORT("unmap: mmap() result differs from start_addr");
#      if defined(CPPCHECK) || defined(LINT2)
    /* Explicitly store the resource handle to a global variable. */
    GC_noop1_ptr(result);
#      endif
#    endif
    GC_unmapped_bytes += len;
  }
#  endif
}

/* Compute end address for an unmap operation on the indicated block. */
GC_INLINE ptr_t
GC_unmap_end(ptr_t start, size_t bytes)
{
  return (ptr_t)HBLK_PAGE_ALIGNED(start + bytes);
}

GC_INNER void
GC_unmap(ptr_t start, size_t bytes)
{
  ptr_t start_addr = GC_unmap_start(start, bytes);
  ptr_t end_addr = GC_unmap_end(start, bytes);

  block_unmap_inner(start_addr, (size_t)(end_addr - start_addr));
}

GC_INNER void
GC_remap(ptr_t start, size_t bytes)
{
  ptr_t start_addr = GC_unmap_start(start, bytes);
  ptr_t end_addr = GC_unmap_end(start, bytes);
  word len = (word)(end_addr - start_addr);
  if (0 == start_addr) {
    return;
  }

  /* FIXME: Handle out-of-memory correctly (at least for Win32). */
#  ifdef USE_WINALLOC
  while (len != 0) {
    MEMORY_BASIC_INFORMATION mem_info;
    word alloc_len;
    ptr_t result;

    if (VirtualQuery(start_addr, &mem_info, sizeof(mem_info))
        != sizeof(mem_info))
      ABORT("Weird VirtualQuery result");
    alloc_len = (len < mem_info.RegionSize) ? len : mem_info.RegionSize;
    result = (ptr_t)VirtualAlloc(start_addr, alloc_len, MEM_COMMIT,
                                 GC_pages_executable ? PAGE_EXECUTE_READWRITE
                                                     : PAGE_READWRITE);
    if (result != start_addr) {
      if (GetLastError() == ERROR_NOT_ENOUGH_MEMORY
          || GetLastError() == ERROR_OUTOFMEMORY) {
        ABORT("Not enough memory to process remapping");
      } else {
        ABORT("VirtualAlloc remapping failed");
      }
    }
#    ifdef LINT2
    GC_noop1_ptr(result);
#    endif
    GC_ASSERT(GC_unmapped_bytes >= alloc_len);
    GC_unmapped_bytes -= alloc_len;
    start_addr += alloc_len;
    len -= alloc_len;
  }
#    undef IGNORE_PAGES_EXECUTABLE
#  else
  /* It was already remapped with `PROT_NONE`. */
  {
#    if !defined(SN_TARGET_PS3) && !defined(FORCE_MPROTECT_BEFORE_MADVISE) \
        && (defined(LINUX) && !defined(PREFER_MMAP_PROT_NONE)              \
            || defined(COSMO))
    /* Nothing to unprotect as `madvise()` is just a hint. */
#    elif defined(COSMO) || defined(NACL) || defined(NETBSD)
    /*
     * NaCl does not expose `mprotect`, but `mmap` should work fine.
     * In case of NetBSD, `mprotect` fails (unlike `mmap`) even without
     * `PROT_EXEC` if PaX `MPROTECT` feature is enabled.
     */
    void *result = mmap(
        start_addr, len,
        (PROT_READ | PROT_WRITE) | (GC_pages_executable ? PROT_EXEC : 0),
        MAP_PRIVATE | MAP_FIXED | OPT_MAP_ANON, zero_fd, 0 /* `offset` */);
    if (UNLIKELY(MAP_FAILED == result))
      ABORT_ON_REMAP_FAIL("remap: mmap", start_addr, len);
    if (result != start_addr)
      ABORT("remap: mmap() result differs from start_addr");
#      if defined(CPPCHECK) || defined(LINT2)
    GC_noop1_ptr(result);
#      endif
#      undef IGNORE_PAGES_EXECUTABLE
#    else
    if (mprotect(start_addr, len,
                 (PROT_READ | PROT_WRITE)
                     | (GC_pages_executable ? PROT_EXEC : 0)))
      ABORT_ON_REMAP_FAIL("remap: mprotect", start_addr, len);
#      undef IGNORE_PAGES_EXECUTABLE
#    endif /* !NACL */
  }
  GC_ASSERT(GC_unmapped_bytes >= len);
  GC_unmapped_bytes -= len;
#  endif
}

GC_INNER void
GC_unmap_gap(ptr_t start1, size_t bytes1, ptr_t start2, size_t bytes2)
{
  ptr_t start1_addr = GC_unmap_start(start1, bytes1);
  ptr_t end1_addr = GC_unmap_end(start1, bytes1);
  ptr_t start2_addr = GC_unmap_start(start2, bytes2);
  ptr_t start_addr = end1_addr;
  ptr_t end_addr = start2_addr;

  GC_ASSERT(start1 + bytes1 == start2);
  if (0 == start1_addr)
    start_addr = GC_unmap_start(start1, bytes1 + bytes2);
  if (0 == start2_addr)
    end_addr = GC_unmap_end(start1, bytes1 + bytes2);
  block_unmap_inner(start_addr, (size_t)(end_addr - start_addr));
}

#endif /* USE_MUNMAP */

/*
 * Routine for pushing any additional roots.  In the multi-threaded
 * environment, this is also responsible for marking from thread stacks.
 */
#ifndef THREADS

#  if defined(EMSCRIPTEN) && defined(EMSCRIPTEN_ASYNCIFY)
#    include <emscripten.h>

static void
scan_regs_cb(void *begin, void *finish)
{
  GC_push_all_stack((ptr_t)begin, (ptr_t)finish);
}

STATIC void GC_CALLBACK
GC_default_push_other_roots(void)
{
  /* Note: this needs `-sASYNCIFY` linker flag. */
  emscripten_scan_registers(scan_regs_cb);
}

#  else
#    define GC_default_push_other_roots 0
#  endif

#else /* THREADS */

#  if defined(SN_TARGET_PS3)
STATIC void GC_CALLBACK
GC_default_push_other_roots(void)
{
  ABORT("GC_default_push_other_roots is not implemented");
}

GC_INNER void
GC_push_thread_structures(void)
{
  ABORT("GC_push_thread_structures is not implemented");
}

#  else /* GC_PTHREADS, etc. */
STATIC void GC_CALLBACK
GC_default_push_other_roots(void)
{
  GC_push_all_stacks();
}
#  endif

#endif /* THREADS */

GC_push_other_roots_proc GC_push_other_roots = GC_default_push_other_roots;

GC_API void GC_CALL
GC_set_push_other_roots(GC_push_other_roots_proc fn)
{
  GC_push_other_roots = fn;
}

GC_API GC_push_other_roots_proc GC_CALL
GC_get_push_other_roots(void)
{
  return GC_push_other_roots;
}

#if defined(SOFT_VDB) && !defined(NO_SOFT_VDB_LINUX_VER_RUNTIME_CHECK) \
    || (defined(GLIBC_2_19_TSX_BUG) && defined(GC_PTHREADS_PARAMARK))
GC_INNER int
GC_parse_version(int *pminor, const char *pverstr)
{
  char *endp;
  unsigned long value = strtoul(pverstr, &endp, 10);
  int major = (int)value;

  if (major < 0 || (char *)pverstr == endp || (unsigned)major != value) {
    /* Parse error. */
    return -1;
  }
  if (*endp != '.') {
    /* No minor part. */
    *pminor = -1;
  } else {
    value = strtoul(endp + 1, &endp, 10);
    *pminor = (int)value;
    if (*pminor < 0 || (unsigned)(*pminor) != value) {
      return -1;
    }
  }
  return major;
}
#endif

/*
 * Routines for accessing dirty bits on virtual pages.  There are 6 ways to
 * maintain this information, as of now:
 *
 *   - `DEFAULT_VDB`: A simple dummy implementation that treats every page
 *     as possibly dirty.  This makes incremental collection useless, but
 *     the implementation is still correct.
 *
 *   - `MANUAL_VDB`: Stacks and static data are always considered dirty.
 *     Heap pages are considered dirty if `GC_dirty(p)` has been called on
 *     some `p` pointing to somewhere inside an object on that page.
 *     A `GC_dirty()` call on a large object directly dirties only a single
 *     page, but for the manual VDB we are careful to treat an object with
 *     a dirty page as completely dirty.  In order to avoid races, an object
 *     must be marked dirty after it is written, and a reference to the
 *     object must be kept on a stack or in a register in the interim.
 *     With threads enabled, an object directly reachable from the stack at
 *     the time of a collection is treated as dirty.  In the single-threaded
 *     mode, it suffices to ensure that no collection can take place between
 *     the pointer assignment and the `GC_dirty()` call.
 *
 *   - `PROC_VDB`: Use the `/proc` facility for reading dirty bits.
 *     Only works under some SVR4 variants.  Even then, it may be too slow
 *     to be entirely satisfactory.  Requires reading dirty bits for entire
 *     address space.  Implementations tend to assume that the client is
 *     a (slow) debugger.
 *
 *   - `SOFT_VDB`: Use the `/proc` facility for reading soft-dirty PTEs
 *     (page table entries).  Works on Linux 3.18+ if the kernel is
 *     properly configured.  The proposed implementation iterates over
 *     `GC_heap_sects` and `GC_static_roots` examining the soft-dirty bit
 *     of the `word` elements in `/proc/self/pagemap` file corresponding to
 *     the pages of the sections; finally all soft-dirty bits of the process
 *     are cleared (by writing some special value to `/proc/self/clear_refs`
 *     file).  In case the soft-dirty bit is not supported by the kernel,
 *     `MPROTECT_VDB` may be defined as a fall back strategy.
 *
 *   - `MPROTECT_VDB`: Protect pages and then catch the faults to keep
 *     track of dirtied pages.  The implementation (and implementability)
 *     is highly system-dependent.  This usually fails when system calls
 *     write to a protected page.  We prevent the `read` system call from
 *     doing so.  It is the clients responsibility to make sure that other
 *     system calls are similarly protected or write only to the stack.
 *
 *   - `GWW_VDB`: Use the Win32 `GetWriteWatch` function, if available, to
 *     read dirty bits.  In case it is not available (because we are
 *     running on Windows 95, Windows 2000 or earlier), `MPROTECT_VDB` may
 *     be defined as a fall back strategy.
 */

#if (defined(CHECKSUMS) && defined(GWW_VDB)) || defined(PROC_VDB)
/* Add all pages in `pht2` to `pht1`. */
STATIC void
GC_or_pages(page_hash_table pht1, const word *pht2)
{
  size_t i;

  for (i = 0; i < PHT_SIZE; i++)
    pht1[i] |= pht2[i];
}
#endif /* CHECKSUMS && GWW_VDB || PROC_VDB */

#ifdef GWW_VDB

/*
 * Note: this is still susceptible to overflow, if there are very large
 * allocations, and everything is dirty.
 */
#  define GC_GWW_BUF_LEN (MAXHINCR * HBLKSIZE / 4096 /* x86 page size */)
static PVOID gww_buf[GC_GWW_BUF_LEN];

#  ifndef MPROTECT_VDB
#    define GC_gww_dirty_init GC_dirty_init
#  endif

GC_INNER GC_bool
GC_gww_dirty_init(void)
{
  /* No assumption about the allocator lock. */
  detect_GetWriteWatch();
  return GC_GWW_AVAILABLE();
}

GC_INLINE void
GC_gww_read_dirty(GC_bool output_unneeded)
{
  size_t i;

  GC_ASSERT(I_HOLD_LOCK());
  if (!output_unneeded)
    BZERO(GC_grungy_pages, sizeof(GC_grungy_pages));

  for (i = 0; i < GC_n_heap_sects; ++i) {
    GC_ULONG_PTR count;

    do {
      PVOID *pages = gww_buf;
      DWORD page_size;

      count = GC_GWW_BUF_LEN;
      /*
       * `GetWriteWatch()` is documented as returning nonzero when
       * it fails, but the documentation does not explicitly say why
       * it would fail or what its behavior will be if it fails.
       * It does appear to fail, at least on recent Win2K instances,
       * if the underlying memory was not allocated with the appropriate
       * flag.  This is common if `GC_enable_incremental` is called
       * shortly after the collector initialization.  To avoid modifying
       * the interface, we silently work around such a failure, it only
       * affects the initial (small) heap allocation.  If there are
       * more dirty pages than will fit in the buffer, this is not
       * treated as a failure; we must check the page count in the
       * loop condition.  Since each partial call will reset the
       * status of some pages, this should eventually terminate even
       * in the overflow case.
       */
      if ((*(GetWriteWatch_type)(GC_funcptr_uint)GetWriteWatch_func)(
              WRITE_WATCH_FLAG_RESET, GC_heap_sects[i].hs_start,
              GC_heap_sects[i].hs_bytes, pages, &count, &page_size)
          != 0) {
        static int warn_count = 0;
        static const struct hblk *last_warned = NULL;
        struct hblk *start = (struct hblk *)GC_heap_sects[i].hs_start;
        size_t nblocks = divHBLKSZ(GC_heap_sects[i].hs_bytes);

        if (i != 0 && last_warned != start && warn_count++ < 5) {
          last_warned = start;
          WARN("GC_gww_read_dirty unexpectedly failed at %p:"
               " Falling back to marking all pages dirty\n",
               start);
        }
        if (!output_unneeded) {
          size_t j;

          for (j = 0; j < nblocks; ++j) {
            size_t index = PHT_HASH(start + j);

            set_pht_entry_from_index(GC_grungy_pages, index);
          }
        }
        /* Done with this section. */
        count = 1;
      } else if (!output_unneeded) { /*< succeeded */
        const PVOID *pages_end = pages + count;

        while (pages != pages_end) {
          struct hblk *h = (struct hblk *)(*pages++);
          ptr_t h_end = (ptr_t)h + page_size;

          do {
            set_pht_entry_from_index(GC_grungy_pages, PHT_HASH(h));
            h++;
          } while (ADDR_LT((ptr_t)h, h_end));
        }
      }
    } while (count == GC_GWW_BUF_LEN);
    /*
     * FIXME: It is unclear from Microsoft's documentation if this loop
     * is useful.  We suspect the call just fails if the buffer fills up.
     * But that should still be handled correctly.
     */
  }

#  ifdef CHECKSUMS
  GC_ASSERT(!output_unneeded);
  GC_or_pages(GC_written_pages, GC_grungy_pages);
#  endif
}

#elif defined(SOFT_VDB)
static int clear_refs_fd = -1;
#  define GC_GWW_AVAILABLE() (clear_refs_fd != -1)
#else
#  define GC_GWW_AVAILABLE() FALSE
#endif /* !GWW_VDB && !SOFT_VDB */

#ifdef DEFAULT_VDB
/*
 * The client asserts that unallocated pages in the heap are never
 * written.
 */

GC_INNER GC_bool
GC_dirty_init(void)
{
  GC_VERBOSE_LOG_PRINTF("Initializing DEFAULT_VDB...\n");
  /* `GC_dirty_pages` and `GC_grungy_pages` are already cleared. */
  return TRUE;
}
#endif /* DEFAULT_VDB */

#if !defined(NO_MANUAL_VDB) || defined(MPROTECT_VDB)
#  if !defined(THREADS) || defined(HAVE_LOCKFREE_AO_OR)
#    ifdef MPROTECT_VDB
#      define async_set_pht_entry_from_index(db, index) \
        set_pht_entry_from_index_concurrent_volatile(db, index)
#    else
#      define async_set_pht_entry_from_index(db, index) \
        set_pht_entry_from_index_concurrent(db, index)
#    endif
#  elif defined(NEED_FAULT_HANDLER_LOCK)
/*
 * We need to lock around the bitmap update (in the write fault
 * handler or `GC_dirty`) in order to avoid the risk of losing a bit.
 * We do this with a test-and-set spin lock if possible.
 */
static void
async_set_pht_entry_from_index(volatile page_hash_table db, size_t index)
{
  GC_acquire_dirty_lock();
  set_pht_entry_from_index(db, index);
  GC_release_dirty_lock();
}
#  else /* THREADS && !NEED_FAULT_HANDLER_LOCK */
#    error No test_and_set operation: Introduces a race.
#  endif
#endif /* !NO_MANUAL_VDB || MPROTECT_VDB */

#ifdef MPROTECT_VDB
/*
 * This implementation maintains dirty bits itself by catching write
 * faults and keeping track of them.  We assume nobody else catches
 * `SIGBUS` or `SIGSEGV`.  We assume no write faults occur in system
 * calls.  This means that clients must ensure that system calls do
 * not write to the write-protected heap.  Probably the best way to
 * do this is to ensure that system calls write at most to
 * pointer-free objects in the heap, and do even that only if we are
 * on a platform on which those are not protected (or the collector
 * is built with `DONT_PROTECT_PTRFREE` defined).  We assume the page
 * size is a multiple of `HBLKSIZE`.
 */

#  ifdef DARWIN
/* `#define BROKEN_EXCEPTION_HANDLING` */

/*
 * Using `vm_protect` (a `mach` `syscall`) over `mprotect` (a BSD `syscall`)
 * seems to decrease the likelihood of some of the problems described below.
 */
#    include <mach/vm_map.h>
STATIC mach_port_t GC_task_self = 0;
#    define PROTECT_INNER(addr, len, allow_write, C_msg_prefix)            \
      if (vm_protect(GC_task_self, (vm_address_t)(addr), (vm_size_t)(len), \
                     FALSE,                                                \
                     VM_PROT_READ | ((allow_write) ? VM_PROT_WRITE : 0)    \
                         | (GC_pages_executable ? VM_PROT_EXECUTE : 0))    \
          == KERN_SUCCESS) {                                               \
      } else                                                               \
        ABORT(C_msg_prefix "vm_protect() failed")

#  elif !defined(USE_WINALLOC)
#    include <sys/mman.h>
#    if !defined(AIX) && !defined(CYGWIN32) && !defined(HAIKU)
#      include <sys/syscall.h>
#    endif

#    define PROTECT_INNER(addr, len, allow_write, C_msg_prefix)           \
      if (mprotect((caddr_t)(addr), (size_t)(len),                        \
                   PROT_READ | ((allow_write) ? PROT_WRITE : 0)           \
                       | (GC_pages_executable ? PROT_EXEC : 0))           \
          >= 0) {                                                         \
      } else if (GC_pages_executable) {                                   \
        ABORT_ON_REMAP_FAIL(C_msg_prefix "mprotect vdb executable pages", \
                            addr, len);                                   \
      } else                                                              \
        ABORT_ON_REMAP_FAIL(C_msg_prefix "mprotect vdb", addr, len)
#    undef IGNORE_PAGES_EXECUTABLE

#  else /* USE_WINALLOC */
static DWORD protect_junk;
#    define PROTECT_INNER(addr, len, allow_write, C_msg_prefix)             \
      if (VirtualProtect(addr, len,                                         \
                         GC_pages_executable                                \
                             ? ((allow_write) ? PAGE_EXECUTE_READWRITE      \
                                              : PAGE_EXECUTE_READ)          \
                         : (allow_write) ? PAGE_READWRITE                   \
                                         : PAGE_READONLY,                   \
                         &protect_junk)) {                                  \
      } else                                                                \
        ABORT_ARG1(C_msg_prefix "VirtualProtect failed", ": errcode= 0x%X", \
                   (unsigned)GetLastError())
#  endif /* USE_WINALLOC */

#  define PROTECT(addr, len) PROTECT_INNER(addr, len, FALSE, "")
#  define UNPROTECT(addr, len) PROTECT_INNER(addr, len, TRUE, "un-")

#  if defined(MSWIN32)
typedef LPTOP_LEVEL_EXCEPTION_FILTER SIG_HNDLR_PTR;
#    undef SIG_DFL
#    define SIG_DFL ((LPTOP_LEVEL_EXCEPTION_FILTER)(~(GC_funcptr_uint)0))
#  elif defined(MSWINCE)
typedef LONG(WINAPI *SIG_HNDLR_PTR)(struct _EXCEPTION_POINTERS *);
#    undef SIG_DFL
#    define SIG_DFL ((SIG_HNDLR_PTR)(~(GC_funcptr_uint)0))
#  elif defined(DARWIN)
#    ifdef BROKEN_EXCEPTION_HANDLING
typedef void (*SIG_HNDLR_PTR)();
#    endif
#  else
typedef void (*SIG_HNDLR_PTR)(int, siginfo_t *, void *);
typedef void (*PLAIN_HNDLR_PTR)(int);
#  endif /* !DARWIN && !MSWIN32 && !MSWINCE */

#  ifndef DARWIN
/* Also old `MSWIN32` `ACCESS_VIOLATION` filter. */
STATIC SIG_HNDLR_PTR GC_old_segv_handler = 0;
#    ifdef USE_BUS_SIGACT
STATIC SIG_HNDLR_PTR GC_old_bus_handler = 0;
STATIC GC_bool GC_old_bus_handler_used_si = FALSE;
#    endif
#    if !defined(MSWIN32) && !defined(MSWINCE)
STATIC GC_bool GC_old_segv_handler_used_si = FALSE;
#    endif
#  endif /* !DARWIN */

#  ifdef THREADS
/*
 * This function is used only by the fault handler.  Potential data
 * race between this function and `GC_install_header`, `GC_remove_header`
 * should not be harmful because the added or removed header should be
 * already unprotected.
 */
GC_ATTR_NO_SANITIZE_THREAD
static GC_bool
is_header_found_async(const void *p)
{
#    ifdef HASH_TL
  hdr *result;

  GET_HDR(p, result);
  return result != NULL;
#    else
  return HDR_INNER(p) != NULL;
#    endif
}
#  else
#    define is_header_found_async(p) (HDR(p) != NULL)
#  endif /* !THREADS */

#  ifndef DARWIN

#    if !defined(MSWIN32) && !defined(MSWINCE)
#      include <errno.h>
#      ifdef USE_BUS_SIGACT
#        define SIG_OK (sig == SIGBUS || sig == SIGSEGV)
#      else
/* Catch `SIGSEGV` but ignore `SIGBUS`. */
#        define SIG_OK (sig == SIGSEGV)
#      endif
#      if defined(FREEBSD) || defined(OPENBSD)
#        ifndef SEGV_ACCERR
#          define SEGV_ACCERR 2
#        endif
#        if defined(AARCH64) || defined(ARM32) || defined(MIPS) \
            || (__FreeBSD__ >= 7 || defined(OPENBSD))
#          define CODE_OK (si->si_code == SEGV_ACCERR)
#        elif defined(POWERPC)
/* Pretend that we are AIM. */
#          define AIM
#          include <machine/trap.h>
#          define CODE_OK \
            (si->si_code == EXC_DSI || si->si_code == SEGV_ACCERR)
#        else
#          define CODE_OK \
            (si->si_code == BUS_PAGE_FAULT || si->si_code == SEGV_ACCERR)
#        endif
#      elif defined(OSF1)
#        define CODE_OK (si->si_code == 2) /*< experimentally determined */
#      elif defined(IRIX5)
#        define CODE_OK (si->si_code == EACCES)
#      elif defined(AIX) || defined(COSMO) || defined(CYGWIN32) \
          || defined(HAIKU) || defined(HURD) || defined(LINUX)  \
          || defined(NETBSD)
/*
 * Linux/i686: Empirically `c.trapno == 14`, but is that useful?
 * Should probably consider alignment issues on other architectures.
 */
#        define CODE_OK TRUE
#      elif defined(HPUX)
#        define CODE_OK                                                 \
          (si->si_code == SEGV_ACCERR || si->si_code == BUS_ADRERR      \
           || si->si_code == BUS_UNKNOWN || si->si_code == SEGV_UNKNOWN \
           || si->si_code == BUS_OBJERR)
#      elif defined(SUNOS5SIGS)
#        define CODE_OK (si->si_code == SEGV_ACCERR)
#      endif
#      ifndef NO_GETCONTEXT
#        include <ucontext.h>
#      endif
STATIC void
GC_write_fault_handler(int sig, siginfo_t *si, void *raw_sc)
#    else /* MSWIN32 || MSWINCE */
#      define SIG_OK \
        (exc_info->ExceptionRecord->ExceptionCode == STATUS_ACCESS_VIOLATION)
#      define CODE_OK                                       \
        (exc_info->ExceptionRecord->ExceptionInformation[0] \
         == 1) /*< write fault */
STATIC LONG WINAPI
GC_write_fault_handler(struct _EXCEPTION_POINTERS *exc_info)
#    endif
{
#    if !defined(MSWIN32) && !defined(MSWINCE)
  char *addr = (char *)si->si_addr;
#    else
  char *addr = (char *)exc_info->ExceptionRecord->ExceptionInformation[1];
#    endif

  if (SIG_OK && CODE_OK) {
    struct hblk *h = HBLK_PAGE_ALIGNED(addr);
    GC_bool in_allocd_block;
    size_t i;

    GC_ASSERT(GC_page_size != 0);
#    ifdef CHECKSUMS
    GC_record_fault(h);
#    endif
#    ifdef SUNOS5SIGS
    /* Address is only within the correct physical page. */
    in_allocd_block = FALSE;
    for (i = 0; i < divHBLKSZ(GC_page_size); i++) {
      if (is_header_found_async(&h[i])) {
        in_allocd_block = TRUE;
        break;
      }
    }
#    else
    in_allocd_block = is_header_found_async(addr);
#    endif
    if (!in_allocd_block) {
      /*
       * FIXME: We should make sure that we invoke the old handler with the
       * appropriate calling sequence, which often depends on `SA_SIGINFO`.
       */

      /* Heap blocks now begin and end on page boundaries. */
      SIG_HNDLR_PTR old_handler;

#    if defined(MSWIN32) || defined(MSWINCE)
      old_handler = GC_old_segv_handler;
#    else
      GC_bool used_si;

#      ifdef USE_BUS_SIGACT
      if (sig == SIGBUS) {
        old_handler = GC_old_bus_handler;
        used_si = GC_old_bus_handler_used_si;
      } else
#      endif
      /* else */ {
        old_handler = GC_old_segv_handler;
        used_si = GC_old_segv_handler_used_si;
      }
#    endif

      if ((GC_funcptr_uint)old_handler == (GC_funcptr_uint)SIG_DFL) {
#    if !defined(MSWIN32) && !defined(MSWINCE)
        ABORT_ARG1("Unexpected segmentation fault outside heap", " at %p",
                   (void *)addr);
#    else
        return EXCEPTION_CONTINUE_SEARCH;
#    endif
      } else {
        /*
         * FIXME: This code should probably check if the old signal handler
         * used the traditional style and if so, call it using that style.
         */
#    if defined(MSWIN32) || defined(MSWINCE)
        return (*old_handler)(exc_info);
#    else
        if (used_si)
          ((SIG_HNDLR_PTR)old_handler)(sig, si, raw_sc);
        else
          /* FIXME: Should pass nonstandard arguments as well. */
          ((PLAIN_HNDLR_PTR)(GC_funcptr_uint)old_handler)(sig);
        return;
#    endif
      }
    }
    UNPROTECT(h, GC_page_size);
    /*
     * We need to make sure that no collection occurs between the
     * `UNPROTECT()` call and the setting of the dirty bit.
     * Otherwise a write by a third thread might go unnoticed.
     * Reversing the order is just as bad, since we would end up
     * unprotecting a page in a collection cycle during which it is not
     * marked.  Currently we do this by disabling the thread stopping
     * signals while this handler is running.  An alternative might be
     * to record the fact that we are about to unprotect, or have just
     * unprotected a page in the collector's thread structure, and then
     * to have the thread stopping code set the dirty flag, if necessary.
     */
    for (i = 0; i < divHBLKSZ(GC_page_size); i++) {
      size_t index = PHT_HASH(h + i);

      async_set_pht_entry_from_index(GC_dirty_pages, index);
    }
    /*
     * The `write()` may not take place before dirty bits are read.
     * But then we will fault again...
     */
#    if defined(MSWIN32) || defined(MSWINCE)
    return EXCEPTION_CONTINUE_EXECUTION;
#    else
    return;
#    endif
  }
#    if defined(MSWIN32) || defined(MSWINCE)
  return EXCEPTION_CONTINUE_SEARCH;
#    else
  ABORT_ARG1("Unexpected bus error or segmentation fault", " at %p",
             (void *)addr);
#    endif
}

#    if defined(GC_WIN32_THREADS) && !defined(CYGWIN32)
GC_INNER void
GC_set_write_fault_handler(void)
{
  SetUnhandledExceptionFilter(GC_write_fault_handler);
}
#    endif

#    ifdef SOFT_VDB
static GC_bool soft_dirty_init(void);
#    endif

GC_INNER GC_bool
GC_dirty_init(void)
{
#    if !defined(MSWIN32) && !defined(MSWINCE)
  struct sigaction act, oldact;
#    endif

  GC_ASSERT(I_HOLD_LOCK());
#    ifdef COUNT_PROTECTED_REGIONS
  GC_ASSERT(GC_page_size != 0);
  if ((GC_signed_word)(GC_heapsize / (word)GC_page_size)
      >= ((GC_signed_word)GC_UNMAPPED_REGIONS_SOFT_LIMIT
          - GC_num_unmapped_regions)
             * 2) {
    GC_COND_LOG_PRINTF("Cannot turn on GC incremental mode"
                       " as heap contains too many pages\n");
    return FALSE;
  }
#    endif
#    if !defined(MSWIN32) && !defined(MSWINCE)
  act.sa_flags = SA_RESTART | SA_SIGINFO;
  act.sa_sigaction = GC_write_fault_handler;
  (void)sigemptyset(&act.sa_mask);
#      ifdef SIGNAL_BASED_STOP_WORLD
  /*
   * Arrange to postpone the signal while we are in a write fault handler.
   * This effectively makes the handler atomic w.r.t. stopping the world
   * for the collection.
   */
  (void)sigaddset(&act.sa_mask, GC_get_suspend_signal());
#      endif
#    endif /* !MSWIN32 */
  GC_VERBOSE_LOG_PRINTF(
      "Initializing mprotect virtual dirty bit implementation\n");
  if (GC_page_size % HBLKSIZE != 0) {
    ABORT("Page size not multiple of HBLKSIZE");
  }
#    ifdef GWW_VDB
  if (GC_gww_dirty_init()) {
    GC_COND_LOG_PRINTF("Using GetWriteWatch()\n");
    return TRUE;
  }
#    elif defined(SOFT_VDB)
#      ifdef CHECK_SOFT_VDB
  if (!soft_dirty_init())
    ABORT("Soft-dirty bit support is missing");
#      else
  if (soft_dirty_init()) {
    GC_COND_LOG_PRINTF("Using soft-dirty bit feature\n");
    return TRUE;
  }
#      endif
#    endif
#    ifdef MSWIN32
  GC_old_segv_handler = SetUnhandledExceptionFilter(GC_write_fault_handler);
  if (GC_old_segv_handler != NULL) {
    GC_COND_LOG_PRINTF("Replaced other UnhandledExceptionFilter\n");
  } else {
    GC_old_segv_handler = SIG_DFL;
  }
#    elif defined(MSWINCE)
  {
    /* `MPROTECT_VDB` is unsupported for WinCE at present. */
    /* FIXME: Implement (if possible). */
  }
#    else
  /* `act.sa_restorer` is deprecated and should not be initialized. */
#      if defined(IRIX5) && defined(THREADS)
  sigaction(SIGSEGV, 0, &oldact);
  sigaction(SIGSEGV, &act, 0);
#      else
  {
    int res = sigaction(SIGSEGV, &act, &oldact);
    if (res != 0)
      ABORT("Sigaction failed");
  }
#      endif
  if (oldact.sa_flags & SA_SIGINFO) {
    GC_old_segv_handler = oldact.sa_sigaction;
    GC_old_segv_handler_used_si = TRUE;
  } else {
    GC_old_segv_handler = (SIG_HNDLR_PTR)(GC_funcptr_uint)oldact.sa_handler;
    GC_old_segv_handler_used_si = FALSE;
  }
  if ((GC_funcptr_uint)GC_old_segv_handler == (GC_funcptr_uint)SIG_IGN) {
    WARN("Previously ignored segmentation violation!?\n", 0);
    GC_old_segv_handler = (SIG_HNDLR_PTR)(GC_funcptr_uint)SIG_DFL;
  }
  if ((GC_funcptr_uint)GC_old_segv_handler != (GC_funcptr_uint)SIG_DFL) {
    GC_VERBOSE_LOG_PRINTF("Replaced other SIGSEGV handler\n");
  }
#      ifdef USE_BUS_SIGACT
  sigaction(SIGBUS, &act, &oldact);
  if ((oldact.sa_flags & SA_SIGINFO) != 0) {
    GC_old_bus_handler = oldact.sa_sigaction;
    GC_old_bus_handler_used_si = TRUE;
  } else {
    GC_old_bus_handler = (SIG_HNDLR_PTR)(GC_funcptr_uint)oldact.sa_handler;
  }
  if ((GC_funcptr_uint)GC_old_bus_handler == (GC_funcptr_uint)SIG_IGN) {
    WARN("Previously ignored bus error!?\n", 0);
    GC_old_bus_handler = (SIG_HNDLR_PTR)(GC_funcptr_uint)SIG_DFL;
  } else if ((GC_funcptr_uint)GC_old_bus_handler != (GC_funcptr_uint)SIG_DFL) {
    GC_VERBOSE_LOG_PRINTF("Replaced other SIGBUS handler\n");
  }
#      endif
#    endif /* !MSWIN32 && !MSWINCE */
#    if defined(CPPCHECK) && defined(ADDRESS_SANITIZER)
  GC_noop1((word)(GC_funcptr_uint)(&__asan_default_options));
#    endif
  return TRUE;
}
#  endif /* !DARWIN */

STATIC void
GC_protect_heap(void)
{
  size_t i;

  GC_ASSERT(GC_page_size != 0);
  for (i = 0; i < GC_n_heap_sects; i++) {
    ptr_t start = GC_heap_sects[i].hs_start;
    size_t len = GC_heap_sects[i].hs_bytes;
    struct hblk *current;
    struct hblk *current_start; /*< start of block to be protected */
    ptr_t limit;

    GC_ASSERT((ADDR(start) & (GC_page_size - 1)) == 0);
    GC_ASSERT((len & (GC_page_size - 1)) == 0);
#  ifndef DONT_PROTECT_PTRFREE
    /*
     * We avoid protecting pointer-free objects unless the page size
     * differs from `HBLKSIZE`.
     */
    if (GC_page_size != HBLKSIZE) {
      PROTECT(start, len);
      continue;
    }
#  endif

    current_start = (struct hblk *)start;
    limit = start + len;
    for (current = current_start;;) {
      size_t nblocks = 0;
      GC_bool is_ptrfree = TRUE;

      if (ADDR_LT((ptr_t)current, limit)) {
        hdr *hhdr;

        GET_HDR(current, hhdr);
        if (IS_FORWARDING_ADDR_OR_NIL(hhdr)) {
          /*
           * This can happen only if we are at the beginning of a heap
           * segment, and a block spans heap segments.  We will handle
           * that block as part of the preceding segment.
           */
          GC_ASSERT(current_start == current);

          current_start = ++current;
          continue;
        }
        if (HBLK_IS_FREE(hhdr)) {
          GC_ASSERT(modHBLKSZ(hhdr->hb_sz) == 0);
          nblocks = divHBLKSZ(hhdr->hb_sz);
        } else {
          nblocks = OBJ_SZ_TO_BLOCKS(hhdr->hb_sz);
          is_ptrfree = IS_PTRFREE(hhdr);
        }
      }
      if (is_ptrfree) {
        if (ADDR_LT((ptr_t)current_start, (ptr_t)current)) {
#  ifdef DONT_PROTECT_PTRFREE
          ptr_t cur_aligned = PTR_ALIGN_UP((ptr_t)current, GC_page_size);

          current_start = HBLK_PAGE_ALIGNED(current_start);
          /*
           * Adjacent free blocks might be protected too because
           * of the alignment by the page size.
           */
          PROTECT(current_start, cur_aligned - (ptr_t)current_start);
#  else
          PROTECT(current_start, (ptr_t)current - (ptr_t)current_start);
#  endif
        }
        if (ADDR_GE((ptr_t)current, limit))
          break;
      }
      current += nblocks;
      if (is_ptrfree)
        current_start = current;
    }
  }
}

#  if defined(CAN_HANDLE_FORK) && defined(DARWIN) && defined(THREADS) \
      || defined(COUNT_PROTECTED_REGIONS)
/* Remove protection for the entire heap not updating `GC_dirty_pages`. */
STATIC void
GC_unprotect_all_heap(void)
{
  size_t i;

  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT(GC_auto_incremental);
  for (i = 0; i < GC_n_heap_sects; i++) {
    UNPROTECT(GC_heap_sects[i].hs_start, GC_heap_sects[i].hs_bytes);
  }
}
#  endif

#  ifdef COUNT_PROTECTED_REGIONS
GC_INNER void
GC_handle_protected_regions_limit(void)
{
  GC_ASSERT(GC_page_size != 0);
  /*
   * To prevent exceeding the limit of `vm.max_map_count`, the most
   * trivial (though highly restrictive) way is to turn off the
   * incremental collection mode (based on `mprotect`) once the number
   * of pages in the heap reaches that limit.
   */
  if (GC_auto_incremental && !GC_GWW_AVAILABLE()
      && (GC_signed_word)(GC_heapsize / (word)GC_page_size)
             >= ((GC_signed_word)GC_UNMAPPED_REGIONS_SOFT_LIMIT
                 - GC_num_unmapped_regions)
                    * 2) {
    GC_unprotect_all_heap();
#    ifdef DARWIN
    GC_task_self = 0;
#    endif
    GC_incremental = FALSE;
    WARN("GC incremental mode is turned off"
         " to prevent hitting VM maps limit\n",
         0);
  }
}
#  endif /* COUNT_PROTECTED_REGIONS */

#endif /* MPROTECT_VDB */

#if !defined(THREADS) && (defined(PROC_VDB) || defined(SOFT_VDB))
static pid_t saved_proc_pid; /*< `pid` used to compose `/proc` file names */
#endif

#ifdef PROC_VDB
/*
 * This implementation assumes the Solaris new structured `/proc`
 * pseudo-file-system from which we can read page modified bits.
 * This facility is far from optimal (e.g. we would like to get the
 * info for only some of the address space), but it avoids intercepting
 * system calls.
 */

#  include <errno.h>
#  include <sys/signal.h>
#  include <sys/stat.h>
#  include <sys/syscall.h>

#  ifdef GC_NO_SYS_FAULT_H
/* This exists only to check `PROC_VDB` code compilation (on Linux). */
#    define PG_MODIFIED 1
struct prpageheader {
  long dummy[2]; /*< `pr_tstamp` */
  long pr_nmap;
  long pr_npage;
};
struct prasmap {
  GC_uintptr_t pr_vaddr;
  size_t pr_npage;
  char dummy1[64 + 8]; /*< `pr_mapname`, `pr_offset` */
  int pr_mflags;
  int pr_pagesize;
  int dummy2[2]; /*< `pr_shmid`, `pr_filler` */
};
#  else
/* Use the new structured `/proc` definitions. */
#    include <procfs.h>
#  endif

#  define INITIAL_BUF_SZ 8192
STATIC size_t GC_proc_buf_size = INITIAL_BUF_SZ;
STATIC char *GC_proc_buf = NULL;
STATIC int GC_proc_fd = -1;

static GC_bool
proc_dirty_open_files(void)
{
  char buf[6 + 20 + 9 + 1];
  pid_t pid = getpid();

  GC_snprintf_s_ld_s(buf, sizeof(buf), "/proc/", (long)pid, "/pagedata");
  GC_proc_fd = open(buf, O_RDONLY);
  if (-1 == GC_proc_fd) {
    WARN("/proc open failed; cannot enable GC incremental mode\n", 0);
    return FALSE;
  }
  if (syscall(SYS_fcntl, GC_proc_fd, F_SETFD, FD_CLOEXEC) == -1)
    WARN("Could not set FD_CLOEXEC for /proc\n", 0);
#  ifndef THREADS
  /* Updated on success only. */
  saved_proc_pid = pid;
#  endif
  return TRUE;
}

#  ifdef CAN_HANDLE_FORK
GC_INNER void
GC_dirty_update_child(void)
{
  GC_ASSERT(I_HOLD_LOCK());
  if (-1 == GC_proc_fd) {
    /* The GC incremental mode is off. */
    return;
  }
  close(GC_proc_fd);
  if (!proc_dirty_open_files()) {
    /* Should be safe to turn it off. */
    GC_incremental = FALSE;
  }
}
#  endif /* CAN_HANDLE_FORK */

GC_INNER GC_bool
GC_dirty_init(void)
{
  GC_ASSERT(I_HOLD_LOCK());
  if (GC_bytes_allocd != 0 || GC_bytes_allocd_before_gc != 0) {
    memset(GC_written_pages, 0xff, sizeof(page_hash_table));
    GC_VERBOSE_LOG_PRINTF(
        "Allocated %lu bytes: all pages may have been written\n",
        (unsigned long)(GC_bytes_allocd + GC_bytes_allocd_before_gc));
  }
  if (!proc_dirty_open_files())
    return FALSE;
  GC_proc_buf = GC_scratch_alloc(GC_proc_buf_size);
  if (GC_proc_buf == NULL)
    ABORT("Insufficient space for /proc read");
  return TRUE;
}

GC_INLINE void
GC_proc_read_dirty(GC_bool output_unneeded)
{
  size_t i, nmaps;
  ssize_t pagedata_len;
  char *bufp = GC_proc_buf;

  GC_ASSERT(I_HOLD_LOCK());
#  ifndef THREADS
  /*
   * If the current `pid` differs from the saved one, then we are in
   * the forked (child) process, the current `/proc` file should be
   * closed, the new one should be opened with the updated path.
   * Note, this is not needed for the multi-threaded case because
   * `fork_child_proc()` reopens the file right after `fork()` call.
   */
  if (getpid() != saved_proc_pid
      && (-1 == GC_proc_fd /*< no need to retry */
          || (close(GC_proc_fd), !proc_dirty_open_files()))) {
    /* Failed to reopen the file.  Punt! */
    if (!output_unneeded)
      memset(GC_grungy_pages, 0xff, sizeof(page_hash_table));
    memset(GC_written_pages, 0xff, sizeof(page_hash_table));
    return;
  }
#  endif

  for (;;) {
    char *new_buf;
    size_t new_size;

    pagedata_len = PROC_READ(GC_proc_fd, bufp, GC_proc_buf_size);
    if (LIKELY(pagedata_len != -1))
      break;
    if (errno != E2BIG) {
      WARN("read /proc failed, errno= %" WARN_PRIdPTR "\n",
           (GC_signed_word)errno);
      /* Punt. */
      if (!output_unneeded)
        memset(GC_grungy_pages, 0xff, sizeof(page_hash_table));
      memset(GC_written_pages, 0xff, sizeof(page_hash_table));
      return;
    }
    /* Retry with larger buffer. */
    new_size = 2 * GC_proc_buf_size;
    /*
     * Alternatively, we could use `fstat()` to determine the required
     * buffer size.
     */
#  ifdef DEBUG_DIRTY_BITS
    GC_log_printf("Growing proc buf to %lu bytes at collection #%lu\n",
                  (unsigned long)new_size, (unsigned long)GC_gc_no + 1);
#  endif
    new_buf = GC_scratch_alloc(new_size);
    if (new_buf != NULL) {
      GC_scratch_recycle_no_gww(bufp, GC_proc_buf_size);
      GC_proc_buf = bufp = new_buf;
      GC_proc_buf_size = new_size;
    }
  }
  GC_ASSERT((size_t)pagedata_len <= GC_proc_buf_size);

  /* Copy dirty bits into `GC_grungy_pages`. */
  BZERO(GC_grungy_pages, sizeof(GC_grungy_pages));
  nmaps = (size_t)(((struct prpageheader *)bufp)->pr_nmap);
#  ifdef DEBUG_DIRTY_BITS
  GC_log_printf("Proc VDB read: pr_nmap= %u, pr_npage= %ld\n", (unsigned)nmaps,
                ((struct prpageheader *)bufp)->pr_npage);
#  endif
#  if defined(GC_NO_SYS_FAULT_H) && defined(CPPCHECK)
  GC_noop1(((struct prpageheader *)bufp)->dummy[0]);
#  endif
  bufp += sizeof(struct prpageheader);
  for (i = 0; i < nmaps; i++) {
    struct prasmap *map = (struct prasmap *)bufp;
    ptr_t vaddr, limit;
    unsigned long npages = 0;
    unsigned pagesize;

    bufp += sizeof(struct prasmap);
    /* Ensure no buffer overrun. */
    if (bufp - GC_proc_buf < pagedata_len)
      npages = (unsigned long)map->pr_npage;
    if (bufp - GC_proc_buf > pagedata_len - (ssize_t)npages)
      ABORT("Wrong pr_nmap or pr_npage read from /proc");

    vaddr = (ptr_t)map->pr_vaddr;
    pagesize = (unsigned)map->pr_pagesize;
#  if defined(GC_NO_SYS_FAULT_H) && defined(CPPCHECK)
    GC_noop1(map->dummy1[0] + map->dummy2[0]);
#  endif
#  ifdef DEBUG_DIRTY_BITS
    GC_log_printf("pr_vaddr= %p, npage= %lu, mflags= 0x%x, pagesize= 0x%x\n",
                  (void *)vaddr, npages, map->pr_mflags, pagesize);
#  endif
    if (0 == pagesize || ((pagesize - 1) & pagesize) != 0)
      ABORT("Wrong pagesize read from /proc");

    limit = vaddr + pagesize * npages;
    for (; ADDR_LT(vaddr, limit); vaddr += pagesize) {
      if ((*bufp++) & PG_MODIFIED) {
        struct hblk *h;
        ptr_t next_vaddr = vaddr + pagesize;

#  ifdef DEBUG_DIRTY_BITS
        GC_log_printf("dirty page at: %p\n", (void *)vaddr);
#  endif
        for (h = (struct hblk *)vaddr; ADDR_LT((ptr_t)h, next_vaddr); h++) {
          size_t index = PHT_HASH(h);

          set_pht_entry_from_index(GC_grungy_pages, index);
        }
      }
    }
    /*
     * According to the new structured `pagedata` file format, an 8-byte
     * alignment is enforced (preceding the next `struct prasmap`)
     * regardless of the pointer size.
     */
    bufp = PTR_ALIGN_UP(bufp, 8);
  }
#  ifdef DEBUG_DIRTY_BITS
  GC_log_printf("Proc VDB read done\n");
#  endif

  /* Update `GC_written_pages` (even if `output_unneeded`). */
  GC_or_pages(GC_written_pages, GC_grungy_pages);
}

#endif /* PROC_VDB */

#ifdef SOFT_VDB
#  ifndef VDB_BUF_SZ
#    define VDB_BUF_SZ 16384
#  endif

static int
open_proc_fd(pid_t pid, const char *slash_filename, int mode)
{
  int f;
  char buf[6 + 20 + 11 + 1];

  GC_snprintf_s_ld_s(buf, sizeof(buf), "/proc/", (long)pid, slash_filename);
  f = open(buf, mode);
  if (-1 == f) {
    WARN("/proc/self%s open failed; cannot enable GC incremental mode\n",
         slash_filename);
  } else if (fcntl(f, F_SETFD, FD_CLOEXEC) == -1) {
    WARN("Could not set FD_CLOEXEC for /proc\n", 0);
  }
  return f;
}

#  include <stdint.h> /*< for `uint64_t` */

typedef uint64_t pagemap_elem_t;

static pagemap_elem_t *soft_vdb_buf;
static int pagemap_fd;

static GC_bool
soft_dirty_open_files(void)
{
  pid_t pid = getpid();

  clear_refs_fd = open_proc_fd(pid, "/clear_refs", O_WRONLY);
  if (-1 == clear_refs_fd)
    return FALSE;
  pagemap_fd = open_proc_fd(pid, "/pagemap", O_RDONLY);
  if (-1 == pagemap_fd) {
    close(clear_refs_fd);
    clear_refs_fd = -1;
    return FALSE;
  }
#  ifndef THREADS
  /* Updated on success only. */
  saved_proc_pid = pid;
#  endif
  return TRUE;
}

#  ifdef CAN_HANDLE_FORK
GC_INNER void
GC_dirty_update_child(void)
{
  GC_ASSERT(I_HOLD_LOCK());
  if (-1 == clear_refs_fd) {
    /* The GC incremental mode is off. */
    return;
  }
  close(clear_refs_fd);
  close(pagemap_fd);
  if (!soft_dirty_open_files())
    GC_incremental = FALSE;
}
#  endif /* CAN_HANDLE_FORK */

/* Clear soft-dirty bits from the task's PTEs. */
static void
clear_soft_dirty_bits(void)
{
  ssize_t res = write(clear_refs_fd, "4\n", 2);

  if (res != 2)
    ABORT_ARG1("Failed to write to /proc/self/clear_refs", ": errno= %d",
               res < 0 ? errno : 0);
}

/* The bit 55 of the 64-bit `qword` of `pagemap` file is the soft-dirty one. */
#  define PM_SOFTDIRTY_MASK ((pagemap_elem_t)1 << 55)

static GC_bool
detect_soft_dirty_supported(ptr_t vaddr)
{
  off_t fpos;
  pagemap_elem_t buf[1];

  GC_ASSERT(GC_log_pagesize != 0);
  /* Make it dirty. */
  *vaddr = 1;
  fpos = (off_t)((ADDR(vaddr) >> GC_log_pagesize) * sizeof(pagemap_elem_t));

  for (;;) {
    /* Read the relevant PTE from the `pagemap` file. */
    if (lseek(pagemap_fd, fpos, SEEK_SET) == (off_t)(-1))
      return FALSE;
    if (PROC_READ(pagemap_fd, buf, sizeof(buf)) != (int)sizeof(buf))
      return FALSE;

    /* Is the soft-dirty bit unset? */
    if ((buf[0] & PM_SOFTDIRTY_MASK) == 0)
      return FALSE;

    if (0 == *vaddr)
      break;
    /*
     * Retry to check that writing to `clear_refs` works as expected.
     * This malfunction of the soft-dirty bits implementation is
     * observed on some Linux kernels on Power9 (e.g. in Fedora 36).
     */
    clear_soft_dirty_bits();
    *vaddr = 0;
  }
  return TRUE; /*< success */
}

#  ifndef NO_SOFT_VDB_LINUX_VER_RUNTIME_CHECK
#    include <string.h> /*< for strcmp() */
#    include <sys/utsname.h>

/* Ensure the linux (kernel) major/minor version is as given or higher. */
static GC_bool
ensure_min_linux_ver(int major, int minor)
{
  struct utsname info;
  int actual_major;
  int actual_minor = -1;

  if (uname(&info) == -1) {
    /* `uname()` has failed, should not happen actually. */
    return FALSE;
  }
  if (strcmp(info.sysname, "Linux")) {
    WARN("Cannot ensure Linux version as running on other OS: %s\n",
         info.sysname);
    return FALSE;
  }
  actual_major = GC_parse_version(&actual_minor, info.release);
  return actual_major > major
         || (actual_major == major && actual_minor >= minor);
}
#  endif

#  ifdef MPROTECT_VDB
static GC_bool
soft_dirty_init(void)
#  else
GC_INNER GC_bool
GC_dirty_init(void)
#  endif
{
#  if defined(MPROTECT_VDB) && !defined(CHECK_SOFT_VDB)
  char *str = GETENV("GC_USE_GETWRITEWATCH");
#    ifdef GC_PREFER_MPROTECT_VDB
  if (NULL == str || (*str == '0' && *(str + 1) == '\0')) {
    /* The environment variable is unset or set to "0". */
    return FALSE;
  }
#    else
  if (str != NULL && *str == '0' && *(str + 1) == '\0') {
    /* The environment variable is set "0". */
    return FALSE;
  }
#    endif
#  endif
  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT(NULL == soft_vdb_buf);
#  ifndef NO_SOFT_VDB_LINUX_VER_RUNTIME_CHECK
  if (!ensure_min_linux_ver(3, 18)) {
    GC_COND_LOG_PRINTF(
        "Running on old kernel lacking correct soft-dirty bit support\n");
    return FALSE;
  }
#  endif
  if (!soft_dirty_open_files())
    return FALSE;
  soft_vdb_buf = (pagemap_elem_t *)GC_scratch_alloc(VDB_BUF_SZ);
  if (NULL == soft_vdb_buf)
    ABORT("Insufficient space for /proc pagemap buffer");
  if (!detect_soft_dirty_supported((ptr_t)soft_vdb_buf)) {
    GC_COND_LOG_PRINTF("Soft-dirty bit is not supported by kernel\n");
    /* Release the resources. */
    GC_scratch_recycle_no_gww(soft_vdb_buf, VDB_BUF_SZ);
    soft_vdb_buf = NULL;
    close(clear_refs_fd);
    clear_refs_fd = -1;
    close(pagemap_fd);
    return FALSE;
  }
  return TRUE;
}

static off_t pagemap_buf_fpos; /*< valid only if `pagemap_buf_len > 0` */

static size_t pagemap_buf_len;

/*
 * Read bytes from `/proc/self/pagemap` file at given file position.
 * `len` is the maximum number of bytes to read; `*pres` is the amount
 * of bytes actually read (always bigger than 0 but never exceeds `len`);
 * `next_fpos_hint` is the file position of the next bytes block to read
 * ahead if possible (0 means no information provided).
 */
static const pagemap_elem_t *
pagemap_buffered_read(size_t *pres, off_t fpos, size_t len,
                      off_t next_fpos_hint)
{
  ssize_t res;
  size_t ofs;

  GC_ASSERT(GC_page_size != 0);
  GC_ASSERT(len > 0);
  if (pagemap_buf_fpos <= fpos
      && fpos < pagemap_buf_fpos + (off_t)pagemap_buf_len) {
    /* The requested data is already in the buffer. */
    ofs = (size_t)(fpos - pagemap_buf_fpos);
    res = (ssize_t)(pagemap_buf_fpos + pagemap_buf_len - fpos);
  } else {
    off_t aligned_pos = fpos
                        & ~(off_t)(GC_page_size < VDB_BUF_SZ ? GC_page_size - 1
                                                             : VDB_BUF_SZ - 1);

    for (;;) {
      size_t count;

      if ((0 == pagemap_buf_len
           || pagemap_buf_fpos + (off_t)pagemap_buf_len != aligned_pos)
          && lseek(pagemap_fd, aligned_pos, SEEK_SET) == (off_t)(-1))
        ABORT_ARG2("Failed to lseek /proc/self/pagemap",
                   ": offset= %lu, errno= %d", (unsigned long)fpos, errno);

      /* How much to read at once? */
      ofs = (size_t)(fpos - aligned_pos);
      GC_ASSERT(ofs < VDB_BUF_SZ);
      if (next_fpos_hint > aligned_pos
          && next_fpos_hint - aligned_pos < VDB_BUF_SZ) {
        count = VDB_BUF_SZ;
      } else {
        count = len + ofs;
        if (count > VDB_BUF_SZ)
          count = VDB_BUF_SZ;
      }

      GC_ASSERT(count % sizeof(pagemap_elem_t) == 0);
      res = PROC_READ(pagemap_fd, soft_vdb_buf, count);
      if (res > (ssize_t)ofs)
        break;
      if (res <= 0)
        ABORT_ARG1("Failed to read /proc/self/pagemap", ": errno= %d",
                   res < 0 ? errno : 0);
      /* Retry (once) w/o page-alignment. */
      aligned_pos = fpos;
    }

    /* Save the buffer (file window) position and size. */
    pagemap_buf_fpos = aligned_pos;
    pagemap_buf_len = (size_t)res;
    res -= (ssize_t)ofs;
  }

  GC_ASSERT(ofs % sizeof(pagemap_elem_t) == 0);
  *pres = (size_t)res < len ? (size_t)res : len;
  return &soft_vdb_buf[ofs / sizeof(pagemap_elem_t)];
}

static void
soft_set_grungy_pages(ptr_t start, ptr_t limit, ptr_t next_start_hint,
                      GC_bool is_static_root)
{
  ptr_t vaddr = (ptr_t)HBLK_PAGE_ALIGNED(start);
  off_t next_fpos_hint = (off_t)((ADDR(next_start_hint) >> GC_log_pagesize)
                                 * sizeof(pagemap_elem_t));

  GC_ASSERT(I_HOLD_LOCK());
  GC_ASSERT(modHBLKSZ(ADDR(start)) == 0);
  GC_ASSERT(GC_log_pagesize != 0);
  while (ADDR_LT(vaddr, limit)) {
    size_t res;
    ptr_t limit_buf;
    word vlen_p = ADDR(limit) - ADDR(vaddr) + GC_page_size - 1;
    const pagemap_elem_t *bufp = pagemap_buffered_read(
        &res,
        (off_t)((ADDR(vaddr) >> GC_log_pagesize) * sizeof(pagemap_elem_t)),
        (size_t)((vlen_p >> GC_log_pagesize) * sizeof(pagemap_elem_t)),
        next_fpos_hint);

    if (res % sizeof(pagemap_elem_t) != 0) {
      /* Punt. */
      memset(GC_grungy_pages, 0xff, sizeof(page_hash_table));
      WARN("Incomplete read of pagemap, not multiple of entry size\n", 0);
      break;
    }

    limit_buf = vaddr + ((res / sizeof(pagemap_elem_t)) << GC_log_pagesize);
    for (; ADDR_LT(vaddr, limit_buf); vaddr += GC_page_size, bufp++) {
      if ((*bufp & PM_SOFTDIRTY_MASK) != 0) {
        struct hblk *h;
        ptr_t next_vaddr = vaddr + GC_page_size;

        if (UNLIKELY(ADDR_LT(limit, next_vaddr))) {
          next_vaddr = limit;
        }

        /*
         * If the bit is set, the respective PTE was written to
         * since clearing the soft-dirty bits.
         */
#  ifdef DEBUG_DIRTY_BITS
        if (is_static_root)
          GC_log_printf("static root dirty page at: %p\n", (void *)vaddr);
#  endif
        h = (struct hblk *)vaddr;
        if (UNLIKELY(ADDR_LT(vaddr, start))) {
          h = (struct hblk *)start;
        }
        for (; ADDR_LT((ptr_t)h, next_vaddr); h++) {
          size_t index = PHT_HASH(h);

          /*
           * Filter out the blocks without pointers.  It might worth for
           * the case when the heap is large enough for the hash collisions
           * to occur frequently.  Thus, off by default.
           */
#  if defined(FILTER_PTRFREE_HBLKS_IN_SOFT_VDB) || defined(CHECKSUMS) \
      || defined(DEBUG_DIRTY_BITS)
          if (!is_static_root) {
            hdr *hhdr;

#    ifdef CHECKSUMS
            set_pht_entry_from_index(GC_written_pages, index);
#    endif
            GET_HDR(h, hhdr);
            if (NULL == hhdr)
              continue;

            (void)GC_find_starting_hblk(h, &hhdr);
            if (HBLK_IS_FREE(hhdr) || IS_PTRFREE(hhdr))
              continue;
#    ifdef DEBUG_DIRTY_BITS
            GC_log_printf("dirty page (hblk) at: %p\n", (void *)h);
#    endif
          }
#  else
          UNUSED_ARG(is_static_root);
#  endif
          set_pht_entry_from_index(GC_grungy_pages, index);
        }
      } else {
#  if defined(CHECK_SOFT_VDB) /* `&& defined(MPROTECT_VDB)` */
        /*
         * Ensure that each clean page according to the soft-dirty VDB is
         * also identified such by the `mprotect`-based one.
         */
        if (!is_static_root
            && get_pht_entry_from_index(GC_dirty_pages, PHT_HASH(vaddr))) {
          ptr_t my_start, my_end; /*< the values are not used */

          /*
           * There could be a hash collision, thus we need to verify the
           * page is clean using slow `GC_get_maps()`.
           */
          if (GC_enclosing_writable_mapping(vaddr, &my_start, &my_end)) {
            ABORT("Inconsistent soft-dirty against mprotect dirty bits");
          }
        }
#  endif
      }
    }
    /* Read the next portion of `pagemap` file if incomplete. */
  }
}

GC_INLINE void
GC_soft_read_dirty(GC_bool output_unneeded)
{
  GC_ASSERT(I_HOLD_LOCK());
#  ifndef THREADS
  /* Similar as for `GC_proc_read_dirty`. */
  if (getpid() != saved_proc_pid
      && (-1 == clear_refs_fd /*< no need to retry */
          || (close(clear_refs_fd), close(pagemap_fd),
              !soft_dirty_open_files()))) {
    /* Failed to reopen the files. */
    if (!output_unneeded) {
      /* Punt. */
      memset(GC_grungy_pages, 0xff, sizeof(page_hash_table));
#    ifdef CHECKSUMS
      memset(GC_written_pages, 0xff, sizeof(page_hash_table));
#    endif
    }
    return;
  }
#  endif

  if (!output_unneeded) {
    size_t i;

    BZERO(GC_grungy_pages, sizeof(GC_grungy_pages));
    pagemap_buf_len = 0; /*< invalidate `soft_vdb_buf` */

    for (i = 0; i < GC_n_heap_sects; ++i) {
      ptr_t start = GC_heap_sects[i].hs_start;

      soft_set_grungy_pages(
          start, start + GC_heap_sects[i].hs_bytes,
          i + 1 < GC_n_heap_sects ? GC_heap_sects[i + 1].hs_start : NULL,
          FALSE);
    }

#  ifndef NO_VDB_FOR_STATIC_ROOTS
    for (i = 0; i < n_root_sets; ++i) {
      soft_set_grungy_pages(
          (ptr_t)HBLKPTR(GC_static_roots[i].r_start), GC_static_roots[i].r_end,
          i + 1 < n_root_sets ? GC_static_roots[i + 1].r_start : NULL, TRUE);
    }
#  endif
  }

  clear_soft_dirty_bits();
}
#endif /* SOFT_VDB */

#ifndef NO_MANUAL_VDB
GC_INNER GC_bool GC_manual_vdb = FALSE;

void
GC_dirty_inner(const void *p)
{
  size_t index = PHT_HASH(p);

#  if defined(MPROTECT_VDB)
  /*
   * Do not update `GC_dirty_pages` if it should be followed by the
   * page unprotection.
   */
  GC_ASSERT(GC_manual_vdb);
#  endif
  async_set_pht_entry_from_index(GC_dirty_pages, index);
}
#endif /* !NO_MANUAL_VDB */

#ifndef GC_DISABLE_INCREMENTAL
GC_INNER void
GC_read_dirty(GC_bool output_unneeded)
{
  GC_ASSERT(I_HOLD_LOCK());
#  ifdef DEBUG_DIRTY_BITS
  GC_log_printf("read dirty begin\n");
#  endif
  if (GC_manual_vdb
#  if defined(MPROTECT_VDB)
      || !GC_GWW_AVAILABLE()
#  endif
  ) {
    if (!output_unneeded)
      BCOPY(CAST_AWAY_VOLATILE_PVOID(GC_dirty_pages), GC_grungy_pages,
            sizeof(GC_dirty_pages));
    BZERO(CAST_AWAY_VOLATILE_PVOID(GC_dirty_pages), sizeof(GC_dirty_pages));
#  ifdef MPROTECT_VDB
    if (!GC_manual_vdb)
      GC_protect_heap();
#  endif
    return;
  }

#  ifdef GWW_VDB
  GC_gww_read_dirty(output_unneeded);
#  elif defined(PROC_VDB)
  GC_proc_read_dirty(output_unneeded);
#  elif defined(SOFT_VDB)
  GC_soft_read_dirty(output_unneeded);
#  endif
#  if defined(CHECK_SOFT_VDB) /* `&& defined(MPROTECT_VDB)` */
  BZERO(CAST_AWAY_VOLATILE_PVOID(GC_dirty_pages), sizeof(GC_dirty_pages));
  GC_protect_heap();
#  endif
}

#  if !defined(NO_VDB_FOR_STATIC_ROOTS) && !defined(PROC_VDB)
GC_INNER GC_bool
GC_is_vdb_for_static_roots(void)
{
  if (GC_manual_vdb)
    return FALSE;
#    if defined(MPROTECT_VDB)
  /* Currently used only in conjunction with `SOFT_VDB`. */
  return GC_GWW_AVAILABLE();
#    else
#      ifndef LINT2
  GC_ASSERT(GC_incremental);
#      endif
  return TRUE;
#    endif
}
#  endif

GC_INNER GC_bool
GC_page_was_dirty(struct hblk *h)
{
  size_t index;

#  ifdef DEFAULT_VDB
  if (!GC_manual_vdb)
    return TRUE;
#  elif defined(PROC_VDB)
  /* Unless manual VDB is on, the bitmap covers all process memory. */
  if (GC_manual_vdb)
#  endif
  {
    if (NULL == HDR(h))
      return TRUE;
  }
  index = PHT_HASH(h);
  return get_pht_entry_from_index(GC_grungy_pages, index);
}

#  if defined(CHECKSUMS) || defined(PROC_VDB)
GC_INNER GC_bool
GC_page_was_ever_dirty(struct hblk *h)
{
#    if defined(GWW_VDB) || defined(PROC_VDB) || defined(SOFT_VDB)
  size_t index;

#      ifdef MPROTECT_VDB
  if (!GC_GWW_AVAILABLE())
    return TRUE;
#      endif
#      if defined(PROC_VDB)
  if (GC_manual_vdb)
#      endif
  {
    if (NULL == HDR(h))
      return TRUE;
  }
  index = PHT_HASH(h);
  return get_pht_entry_from_index(GC_written_pages, index);
#    else
  /* TODO: Implement for `MANUAL_VDB` case. */
  UNUSED_ARG(h);
  return TRUE;
#    endif
}
#  endif /* CHECKSUMS || PROC_VDB */

GC_INNER void
GC_remove_protection(struct hblk *h, size_t nblocks, GC_bool is_ptrfree)
{
#  ifdef MPROTECT_VDB
  struct hblk *current;
  struct hblk *h_trunc; /*< truncated to page boundary */
  ptr_t h_end;          /*< page boundary following the block end */
#  endif

#  ifndef PARALLEL_MARK
  GC_ASSERT(I_HOLD_LOCK());
#  endif
#  ifdef MPROTECT_VDB
  /*
   * Note it is not allowed to call `GC_printf` (and the friends)
   * in this function, see Win32 `GC_stop_world` for the details.
   */
#    ifdef DONT_PROTECT_PTRFREE
  if (is_ptrfree)
    return;
#    endif
  if (!GC_auto_incremental || GC_GWW_AVAILABLE())
    return;
  GC_ASSERT(GC_page_size != 0);
  h_trunc = HBLK_PAGE_ALIGNED(h);
  h_end = PTR_ALIGN_UP((ptr_t)(h + nblocks), GC_page_size);
  /*
   * Note that we cannot examine `GC_dirty_pages` to check whether the
   * page at `h_trunc` has already been marked dirty as there could be
   * a hash collision.
   */
  for (current = h_trunc; ADDR_LT((ptr_t)current, h_end); ++current) {
    size_t index = PHT_HASH(current);

#    ifndef DONT_PROTECT_PTRFREE
    if (!is_ptrfree
        || !ADDR_INSIDE((ptr_t)current, (ptr_t)h, (ptr_t)(h + nblocks)))
#    endif
    {
      async_set_pht_entry_from_index(GC_dirty_pages, index);
    }
  }
  UNPROTECT(h_trunc, h_end - (ptr_t)h_trunc);
#  else
  /* Ignore write hints.  They do not help us here. */
  UNUSED_ARG(h);
  UNUSED_ARG(nblocks);
  UNUSED_ARG(is_ptrfree);
#  endif
}
#endif /* !GC_DISABLE_INCREMENTAL */

#if defined(MPROTECT_VDB) && defined(DARWIN)
/*
 * The following sources were used as a "reference" for this exception
 * handling code:
 *   - Apple's mach/xnu documentation;
 *   - Timothy J. Wood's "Mach Exception Handlers 101" post to the omnigroup's
 *     macosx-dev list;
 *   - macosx-nat.c from Apple's GDB source code.
 */

/*
 * The bug that caused all this trouble should now be fixed.
 * This should eventually be removed if all goes well.
 */

#  include <mach/exception.h>
#  include <mach/mach.h>
#  include <mach/mach_error.h>
#  include <mach/task.h>

EXTERN_C_BEGIN

/*
 * Some of the following prototypes are missing in any header, although
 * they are documented.  Some are in platform `mach/exc.h` file.
 */
extern boolean_t exc_server(mach_msg_header_t *, mach_msg_header_t *);

extern kern_return_t exception_raise(mach_port_t, mach_port_t, mach_port_t,
                                     exception_type_t, exception_data_t,
                                     mach_msg_type_number_t);

extern kern_return_t exception_raise_state(
    mach_port_t, mach_port_t, mach_port_t, exception_type_t, exception_data_t,
    mach_msg_type_number_t, thread_state_flavor_t *, thread_state_t,
    mach_msg_type_number_t, thread_state_t, mach_msg_type_number_t *);

extern kern_return_t exception_raise_state_identity(
    mach_port_t, mach_port_t, mach_port_t, exception_type_t, exception_data_t,
    mach_msg_type_number_t, thread_state_flavor_t *, thread_state_t,
    mach_msg_type_number_t, thread_state_t, mach_msg_type_number_t *);

GC_API_OSCALL kern_return_t catch_exception_raise(
    mach_port_t exception_port, mach_port_t thread, mach_port_t task,
    exception_type_t exception, exception_data_t code,
    mach_msg_type_number_t code_count);

GC_API_OSCALL kern_return_t catch_exception_raise_state(
    mach_port_name_t exception_port, int exception, exception_data_t code,
    mach_msg_type_number_t codeCnt, int flavor, thread_state_t old_state,
    int old_stateCnt, thread_state_t new_state, int new_stateCnt);

GC_API_OSCALL kern_return_t catch_exception_raise_state_identity(
    mach_port_name_t exception_port, mach_port_t thread, mach_port_t task,
    int exception, exception_data_t code, mach_msg_type_number_t codeCnt,
    int flavor, thread_state_t old_state, int old_stateCnt,
    thread_state_t new_state, int new_stateCnt);

EXTERN_C_END

/* These should never be called, but just in case... */
GC_API_OSCALL kern_return_t
catch_exception_raise_state(mach_port_name_t exception_port, int exception,
                            exception_data_t code,
                            mach_msg_type_number_t codeCnt, int flavor,
                            thread_state_t old_state, int old_stateCnt,
                            thread_state_t new_state, int new_stateCnt)
{
  UNUSED_ARG(exception_port);
  UNUSED_ARG(exception);
  UNUSED_ARG(code);
  UNUSED_ARG(codeCnt);
  UNUSED_ARG(flavor);
  UNUSED_ARG(old_state);
  UNUSED_ARG(old_stateCnt);
  UNUSED_ARG(new_state);
  UNUSED_ARG(new_stateCnt);
  ABORT_RET("Unexpected catch_exception_raise_state invocation");
  return KERN_INVALID_ARGUMENT;
}

GC_API_OSCALL kern_return_t
catch_exception_raise_state_identity(
    mach_port_name_t exception_port, mach_port_t thread, mach_port_t task,
    int exception, exception_data_t code, mach_msg_type_number_t codeCnt,
    int flavor, thread_state_t old_state, int old_stateCnt,
    thread_state_t new_state, int new_stateCnt)
{
  UNUSED_ARG(exception_port);
  UNUSED_ARG(thread);
  UNUSED_ARG(task);
  UNUSED_ARG(exception);
  UNUSED_ARG(code);
  UNUSED_ARG(codeCnt);
  UNUSED_ARG(flavor);
  UNUSED_ARG(old_state);
  UNUSED_ARG(old_stateCnt);
  UNUSED_ARG(new_state);
  UNUSED_ARG(new_stateCnt);
  ABORT_RET("Unexpected catch_exception_raise_state_identity invocation");
  return KERN_INVALID_ARGUMENT;
}

#  define MAX_EXCEPTION_PORTS 16

static struct {
  mach_msg_type_number_t count;
  exception_mask_t masks[MAX_EXCEPTION_PORTS];
  exception_handler_t ports[MAX_EXCEPTION_PORTS];
  exception_behavior_t behaviors[MAX_EXCEPTION_PORTS];
  thread_state_flavor_t flavors[MAX_EXCEPTION_PORTS];
} GC_old_exc_ports;

STATIC struct ports_s {
  void (*volatile os_callback[3])(void);
  mach_port_t exception;
#  if defined(THREADS)
  mach_port_t reply;
#  endif
} GC_ports = { { /*< this is to prevent stripping these routines as dead */
                 (void (*)(void))catch_exception_raise,
                 (void (*)(void))catch_exception_raise_state,
                 (void (*)(void))catch_exception_raise_state_identity },
#  ifdef THREADS
               0 /* `exception` */,
#  endif
               0 };

typedef struct {
  mach_msg_header_t head;
} GC_msg_t;

typedef enum {
  GC_MP_NORMAL,
  GC_MP_DISCARDING,
  GC_MP_STOPPED
} GC_mprotect_state_t;

#  ifdef THREADS
/*
 * FIXME: 1 and 2 seem to be safe to use in the `msgh_id` field, but it
 * is not documented.  Use the source and see if they should be OK.
 */
#    define ID_STOP 1
#    define ID_RESUME 2

/* This value is only used on the reply port. */
#    define ID_ACK 3

STATIC GC_mprotect_state_t GC_mprotect_state = GC_MP_NORMAL;

/* The following should *only* be called when the world is stopped. */
STATIC void
GC_mprotect_thread_notify(mach_msg_id_t id)
{
  struct buf_s {
    GC_msg_t msg;
    mach_msg_trailer_t trailer;
  } buf;
  mach_msg_return_t r;

  /* remote, local */
  buf.msg.head.msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_MAKE_SEND, 0);
  buf.msg.head.msgh_size = sizeof(buf.msg);
  buf.msg.head.msgh_remote_port = GC_ports.exception;
  buf.msg.head.msgh_local_port = MACH_PORT_NULL;
  buf.msg.head.msgh_id = id;

  r = mach_msg(&buf.msg.head, MACH_SEND_MSG | MACH_RCV_MSG | MACH_RCV_LARGE,
               sizeof(buf.msg), sizeof(buf), GC_ports.reply,
               MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
  if (r != MACH_MSG_SUCCESS)
    ABORT("mach_msg failed in GC_mprotect_thread_notify");
  if (buf.msg.head.msgh_id != ID_ACK)
    ABORT("Invalid ack in GC_mprotect_thread_notify");
}

/* Should only be called by the `mprotect` thread. */
STATIC void
GC_mprotect_thread_reply(void)
{
  GC_msg_t msg;
  mach_msg_return_t r;

  /* remote, local */
  msg.head.msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_MAKE_SEND, 0);
  msg.head.msgh_size = sizeof(msg);
  msg.head.msgh_remote_port = GC_ports.reply;
  msg.head.msgh_local_port = MACH_PORT_NULL;
  msg.head.msgh_id = ID_ACK;

  r = mach_msg(&msg.head, MACH_SEND_MSG, sizeof(msg), 0, MACH_PORT_NULL,
               MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
  if (r != MACH_MSG_SUCCESS)
    ABORT("mach_msg failed in GC_mprotect_thread_reply");
}

GC_INNER void
GC_mprotect_stop(void)
{
  GC_mprotect_thread_notify(ID_STOP);
}

GC_INNER void
GC_mprotect_resume(void)
{
  GC_mprotect_thread_notify(ID_RESUME);
}

#    ifdef CAN_HANDLE_FORK
GC_INNER void
GC_dirty_update_child(void)
{
  GC_ASSERT(I_HOLD_LOCK());
  if (0 == GC_task_self) {
    /* The GC incremental mode is off. */
    return;
  }

  GC_ASSERT(GC_mprotect_state == GC_MP_NORMAL);
  GC_task_self = mach_task_self(); /*< needed by `UNPROTECT()` */
  GC_unprotect_all_heap();

  /* Restore the old task exception ports. */
  /* TODO: Should we do it in `fork_prepare_proc`/`fork_parent_proc`? */
  if (GC_old_exc_ports.count > 0) {
    /* TODO: Should we check `GC_old_exc_ports.count <= 1`? */
    if (task_set_exception_ports(
            GC_task_self, GC_old_exc_ports.masks[0], GC_old_exc_ports.ports[0],
            GC_old_exc_ports.behaviors[0], GC_old_exc_ports.flavors[0])
        != KERN_SUCCESS)
      ABORT("task_set_exception_ports failed (in child)");
  }

  /* TODO: Re-enable incremental mode in child. */
  GC_task_self = 0;
  GC_incremental = FALSE;
}
#    endif /* CAN_HANDLE_FORK */

#  else
/* The compiler should optimize away any `GC_mprotect_state` computations. */
#    define GC_mprotect_state GC_MP_NORMAL
#  endif /* !THREADS */

struct mp_reply_s {
  mach_msg_header_t head;
  char data[256];
};

struct mp_msg_s {
  mach_msg_header_t head;
  mach_msg_body_t msgh_body;
  char data[1024];
};

STATIC void *
GC_mprotect_thread(void *arg)
{
  mach_msg_return_t r;
  /*
   * These two structures contain some private kernel data.  We do not need
   * to access any of it so we do not bother defining a proper structure.
   * The correct definitions are in the `xnu` source code.
   */
  struct mp_reply_s reply;
  struct mp_msg_s msg;
  mach_msg_id_t id;

  if (ADDR(arg) == GC_WORD_MAX)
    return 0; /*< to prevent a compiler warning */
#  if defined(CPPCHECK)
  reply.data[0] = 0; /*< to prevent "field unused" warnings */
  msg.data[0] = 0;
#  endif

#  if defined(HAVE_PTHREAD_SETNAME_NP_WITHOUT_TID)
  (void)pthread_setname_np("GC-mprotect");
#  endif
#  if defined(THREADS) && !defined(GC_NO_THREADS_DISCOVERY)
  GC_darwin_register_self_mach_handler();
#  endif

  for (;;) {
    r = mach_msg(
        &msg.head,
        MACH_RCV_MSG | MACH_RCV_LARGE
            | (GC_mprotect_state == GC_MP_DISCARDING ? MACH_RCV_TIMEOUT : 0),
        0, sizeof(msg), GC_ports.exception,
        GC_mprotect_state == GC_MP_DISCARDING ? 0 : MACH_MSG_TIMEOUT_NONE,
        MACH_PORT_NULL);
    id = r == MACH_MSG_SUCCESS ? msg.head.msgh_id : -1;

#  if defined(THREADS)
    if (GC_mprotect_state == GC_MP_DISCARDING) {
      if (r == MACH_RCV_TIMED_OUT) {
        GC_mprotect_state = GC_MP_STOPPED;
        GC_mprotect_thread_reply();
        continue;
      }
      if (r == MACH_MSG_SUCCESS && (id == ID_STOP || id == ID_RESUME))
        ABORT("Out of order mprotect thread request");
    }
#  endif /* THREADS */

    if (r != MACH_MSG_SUCCESS) {
      ABORT_ARG2("mach_msg failed", ": errcode= %d (%s)", (int)r,
                 mach_error_string(r));
    }

    switch (id) {
#  if defined(THREADS)
    case ID_STOP:
      if (GC_mprotect_state != GC_MP_NORMAL)
        ABORT("Called mprotect_stop when state wasn't normal");
      GC_mprotect_state = GC_MP_DISCARDING;
      break;
    case ID_RESUME:
      if (GC_mprotect_state != GC_MP_STOPPED)
        ABORT("Called mprotect_resume when state wasn't stopped");
      GC_mprotect_state = GC_MP_NORMAL;
      GC_mprotect_thread_reply();
      break;
#  endif /* THREADS */
    default:
      /* Handle the message (it calls `catch_exception_raise`). */
      if (!exc_server(&msg.head, &reply.head))
        ABORT("exc_server failed");
      /* Send the reply. */
      r = mach_msg(&reply.head, MACH_SEND_MSG, reply.head.msgh_size, 0,
                   MACH_PORT_NULL, MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
      if (r != MACH_MSG_SUCCESS) {
        /*
         * This will fail if the thread dies, but the thread should
         * not die...
         */
#  ifdef BROKEN_EXCEPTION_HANDLING
        GC_err_printf("mach_msg failed with %d %s while sending "
                      "exc reply\n",
                      (int)r, mach_error_string(r));
#  else
        ABORT("mach_msg failed while sending exception reply");
#  endif
      }
    }
  }
}

/*
 * All this `SIGBUS` code should not be necessary.  All protection
 * faults should be going through the `mach` exception handler.
 * However, it seems a `SIGBUS` is occasionally sent for some unknown
 * reason; even more odd, it seems to be meaningless and safe to ignore.
 */
#  ifdef BROKEN_EXCEPTION_HANDLING

/*
 * Updates to this are not atomic, but the `SIGBUS` signals seem pretty rare.
 * Even if this does not get updated property, it is not really a problem.
 */
STATIC int GC_sigbus_count = 0;

STATIC void
GC_darwin_sigbus(int num, siginfo_t *sip, void *context)
{
  if (num != SIGBUS)
    ABORT("Got a non-sigbus signal in the sigbus handler");

  /*
   * Ugh... some seem safe to ignore, but too many in a row probably means
   * trouble.  `GC_sigbus_count` is reset for each `mach` exception that
   * is handled.
   */
  if (GC_sigbus_count >= 8)
    ABORT("Got many SIGBUS signals in a row!");
  GC_sigbus_count++;
  WARN("Ignoring SIGBUS\n", 0);
}
#  endif /* BROKEN_EXCEPTION_HANDLING */

GC_INNER GC_bool
GC_dirty_init(void)
{
  kern_return_t r;
  mach_port_t me;
  pthread_t thread;
  pthread_attr_t attr;
  exception_mask_t mask;

  GC_ASSERT(I_HOLD_LOCK());
#  if defined(CAN_HANDLE_FORK) && !defined(THREADS)
  if (GC_handle_fork) {
    /*
     * To both support GC incremental mode and GC functions usage in
     * the forked child process, `pthread_atfork` should be used to
     * install handlers that switch off `GC_incremental` in the child
     * gracefully (unprotecting all pages and clearing
     * `GC_mach_handler_thread`).  For now, we just disable incremental
     * mode if `fork()` handling is requested by the client.
     */
    WARN("Can't turn on GC incremental mode as fork()"
         " handling requested\n",
         0);
    return FALSE;
  }
#  endif

  GC_VERBOSE_LOG_PRINTF("Initializing mach/darwin mprotect"
                        " virtual dirty bit implementation\n");
#  ifdef BROKEN_EXCEPTION_HANDLING
  WARN("Enabling workarounds for various darwin exception handling bugs\n", 0);
#  endif
  if (GC_page_size % HBLKSIZE != 0) {
    ABORT("Page size not multiple of HBLKSIZE");
  }

  GC_task_self = me = mach_task_self();
  GC_ASSERT(me != 0);

  r = mach_port_allocate(me, MACH_PORT_RIGHT_RECEIVE, &GC_ports.exception);
  /* TODO: Call `WARN()` and return `FALSE` in case of a failure. */
  if (r != KERN_SUCCESS)
    ABORT("mach_port_allocate failed (exception port)");

  r = mach_port_insert_right(me, GC_ports.exception, GC_ports.exception,
                             MACH_MSG_TYPE_MAKE_SEND);
  if (r != KERN_SUCCESS)
    ABORT("mach_port_insert_right failed (exception port)");

#  if defined(THREADS)
  r = mach_port_allocate(me, MACH_PORT_RIGHT_RECEIVE, &GC_ports.reply);
  if (r != KERN_SUCCESS)
    ABORT("mach_port_allocate failed (reply port)");
#  endif

  /* The exceptions we want to catch. */
  mask = EXC_MASK_BAD_ACCESS;
  r = task_get_exception_ports(me, mask, GC_old_exc_ports.masks,
                               &GC_old_exc_ports.count, GC_old_exc_ports.ports,
                               GC_old_exc_ports.behaviors,
                               GC_old_exc_ports.flavors);
  if (r != KERN_SUCCESS)
    ABORT("task_get_exception_ports failed");

  r = task_set_exception_ports(me, mask, GC_ports.exception, EXCEPTION_DEFAULT,
                               GC_MACH_THREAD_STATE);
  if (r != KERN_SUCCESS)
    ABORT("task_set_exception_ports failed");

  if (pthread_attr_init(&attr) != 0)
    ABORT("pthread_attr_init failed");
  if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
    ABORT("pthread_attr_setdetachedstate failed");
  /* This will call the real `pthreads` routine, not our wrapper. */
  if (GC_inner_pthread_create(&thread, &attr, GC_mprotect_thread, NULL) != 0)
    ABORT("pthread_create failed");
  (void)pthread_attr_destroy(&attr);

  /* Setup the handler for ignoring the meaningless `SIGBUS` signals. */
#  ifdef BROKEN_EXCEPTION_HANDLING
  {
    struct sigaction sa, oldsa;
    sa.sa_handler = (SIG_HNDLR_PTR)GC_darwin_sigbus;
    sigemptyset(&sa.sa_mask);
    sa.sa_flags = SA_RESTART | SA_SIGINFO;
    /* `sa.sa_restorer` is deprecated and should not be initialized. */
    if (sigaction(SIGBUS, &sa, &oldsa) < 0)
      ABORT("sigaction failed");
    if ((GC_funcptr_uint)oldsa.sa_handler != (GC_funcptr_uint)SIG_DFL) {
      GC_VERBOSE_LOG_PRINTF("Repla