First Commit

2025-11-18 14:18:26 -07:00
parent 33eb6e3707
commit 27277ec342
6106 changed files with 3571167 additions and 0 deletions
--- a/3rdparty/cpuinfo/.gitignore
+++ b/3rdparty/cpuinfo/.gitignore
@@ -0,0 +1,21 @@
+# Ninja files
+build.ninja
+
+# Build objects and artifacts
+deps/
+build/
+bin/
+lib/
+libs/
+obj/
+*.pyc
+*.pyo
+
+# System files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
--- a/3rdparty/cpuinfo/CMakeLists.txt
+++ b/3rdparty/cpuinfo/CMakeLists.txt
@@ -0,0 +1,925 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 3.5 FATAL_ERROR)
+
+# ---[ Setup project
+PROJECT(
+    cpuinfo
+    LANGUAGES C
+    )
+
+# ---[ Options.
+SET(CPUINFO_LIBRARY_TYPE "default" CACHE STRING "Type of cpuinfo library (shared, static, or default) to build")
+SET_PROPERTY(CACHE CPUINFO_LIBRARY_TYPE PROPERTY STRINGS default static shared)
+SET(CPUINFO_RUNTIME_TYPE "default" CACHE STRING "Type of runtime library (shared, static, or default) to use")
+SET_PROPERTY(CACHE CPUINFO_RUNTIME_TYPE PROPERTY STRINGS default static shared)
+SET(CPUINFO_LOG_LEVEL "default" CACHE STRING "Minimum logging level (info with lower severity will be ignored)")
+SET_PROPERTY(CACHE CPUINFO_LOG_LEVEL PROPERTY STRINGS default debug info warning error fatal none)
+IF(ANDROID)
+  OPTION(CPUINFO_LOG_TO_STDIO "Log errors, warnings, and information to stdout/stderr" OFF)
+ELSE()
+  OPTION(CPUINFO_LOG_TO_STDIO "Log errors, warnings, and information to stdout/stderr" ON)
+ENDIF()
+OPTION(CPUINFO_BUILD_TOOLS "Build command-line tools" OFF)
+OPTION(CPUINFO_BUILD_UNIT_TESTS "Build cpuinfo unit tests" OFF)
+OPTION(CPUINFO_BUILD_MOCK_TESTS "Build cpuinfo mock tests" OFF)
+OPTION(CPUINFO_BUILD_BENCHMARKS "Build cpuinfo micro-benchmarks" OFF)
+OPTION(CPUINFO_BUILD_PKG_CONFIG "Build pkg-config manifest" OFF)
+OPTION(USE_SYSTEM_LIBS "Use system libraries instead of downloading and building them" OFF)
+OPTION(USE_SYSTEM_GOOGLEBENCHMARK "Use system Google Benchmark library instead of downloading and building it" ${USE_SYSTEM_LIBS})
+OPTION(USE_SYSTEM_GOOGLETEST "Use system Google Test library instead of downloading and building it" ${USE_SYSTEM_LIBS})
+
+# ---[ CMake options
+INCLUDE(GNUInstallDirs)
+
+IF(CPUINFO_BUILD_UNIT_TESTS OR CPUINFO_BUILD_MOCK_TESTS)
+  ENABLE_TESTING()
+ENDIF()
+
+MACRO(CPUINFO_TARGET_ENABLE_C99 target)
+  SET_TARGET_PROPERTIES(${target} PROPERTIES
+    C_STANDARD 99
+    C_EXTENSIONS NO)
+ENDMACRO()
+
+MACRO(CPUINFO_TARGET_ENABLE_CXX11 target)
+  SET_TARGET_PROPERTIES(${target} PROPERTIES
+    CXX_STANDARD 14
+    CXX_EXTENSIONS NO)
+ENDMACRO()
+
+MACRO(CPUINFO_TARGET_RUNTIME_LIBRARY target)
+  IF(MSVC AND NOT CPUINFO_RUNTIME_TYPE STREQUAL "default")
+    IF(CPUINFO_RUNTIME_TYPE STREQUAL "shared")
+      TARGET_COMPILE_OPTIONS(${target} PRIVATE
+        "/MD$<$<CONFIG:Debug>:d>")
+    ELSEIF(CPUINFO_RUNTIME_TYPE STREQUAL "static")
+      TARGET_COMPILE_OPTIONS(${target} PRIVATE
+        "/MT$<$<CONFIG:Debug>:d>")
+    ENDIF()
+  ENDIF()
+ENDMACRO()
+
+# -- [ Determine whether building for Apple's desktop or mobile OSes
+IF(CMAKE_SYSTEM_NAME MATCHES "^(Darwin|iOS|tvOS|watchOS)$")
+  SET(IS_APPLE_OS TRUE)
+ELSE()
+  SET(IS_APPLE_OS FALSE)
+ENDIF()
+
+# -- [ Determine target processor
+SET(CPUINFO_TARGET_PROCESSOR "${CMAKE_SYSTEM_PROCESSOR}")
+IF(CMAKE_SYSTEM_NAME MATCHES "FreeBSD" AND CPUINFO_TARGET_PROCESSOR STREQUAL "amd64")
+  SET(CPUINFO_TARGET_PROCESSOR "AMD64")
+ENDIF()
+IF(IS_APPLE_OS AND CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64.*)$")
+  SET(CPUINFO_TARGET_PROCESSOR "${CMAKE_OSX_ARCHITECTURES}")
+ELSEIF(CMAKE_GENERATOR MATCHES "^Visual Studio " AND CMAKE_VS_PLATFORM_NAME)
+  IF(CMAKE_VS_PLATFORM_NAME STREQUAL "Win32")
+    SET(CPUINFO_TARGET_PROCESSOR "x86")
+  ELSEIF(CMAKE_VS_PLATFORM_NAME STREQUAL "x64")
+    SET(CPUINFO_TARGET_PROCESSOR "x86_64")
+  ELSEIF(CMAKE_VS_PLATFORM_NAME STREQUAL "ARM64")
+    SET(CPUINFO_TARGET_PROCESSOR "arm64")
+  ELSEIF(CMAKE_VS_PLATFORM_NAME MATCHES "^(ARM64EC|arm64ec|ARM64E|arm64e)")
+    SET(CPUINFO_TARGET_PROCESSOR "arm64")
+  ELSE()
+    MESSAGE(FATAL_ERROR "Unsupported Visual Studio architecture \"${CMAKE_VS_PLATFORM_NAME}\"")
+  ENDIF()
+ENDIF()
+
+# ---[ Build flags
+SET(CPUINFO_SUPPORTED_PLATFORM TRUE)
+IF(NOT CMAKE_SYSTEM_PROCESSOR)
+  IF(NOT IOS)
+    MESSAGE(WARNING
+      "Target processor architecture is not specified. "
+      "cpuinfo will compile, but cpuinfo_initialize() will always fail.")
+    SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
+  ENDIF()
+ELSEIF(NOT CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?|armv[5-8].*|aarch64|arm64.*|ARM64.*|riscv(32|64))$")
+  MESSAGE(WARNING
+    "Target processor architecture \"${CPUINFO_TARGET_PROCESSOR}\" is not supported in cpuinfo. "
+    "cpuinfo will compile, but cpuinfo_initialize() will always fail.")
+  SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
+ENDIF()
+
+
+IF(NOT CMAKE_SYSTEM_NAME)
+    MESSAGE(WARNING
+      "Target operating system is not specified. "
+      "cpuinfo will compile, but cpuinfo_initialize() will always fail.")
+  SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
+ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Windows|WindowsStore|CYGWIN|MSYS|Darwin|Linux|Android|FreeBSD)$")
+  IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14" AND NOT IS_APPLE_OS)
+    MESSAGE(WARNING
+      "Target operating system \"${CMAKE_SYSTEM_NAME}\" is not supported in cpuinfo. "
+      "cpuinfo will compile, but cpuinfo_initialize() will always fail.")
+    SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
+  ENDIF()
+ENDIF()
+
+IF(CPUINFO_SUPPORTED_PLATFORM)
+  IF(CPUINFO_BUILD_MOCK_TESTS OR CPUINFO_BUILD_UNIT_TESTS OR CPUINFO_BUILD_BENCHMARKS)
+    ENABLE_LANGUAGE(CXX)
+  ENDIF()
+ENDIF()
+
+# ---[ Download deps
+SET(CONFU_DEPENDENCIES_SOURCE_DIR ${CMAKE_SOURCE_DIR}/deps
+  CACHE PATH "Confu-style dependencies source directory")
+SET(CONFU_DEPENDENCIES_BINARY_DIR ${CMAKE_BINARY_DIR}/deps
+  CACHE PATH "Confu-style dependencies binary directory")
+
+IF(CPUINFO_SUPPORTED_PLATFORM AND (CPUINFO_BUILD_MOCK_TESTS OR CPUINFO_BUILD_UNIT_TESTS))
+  IF(USE_SYSTEM_GOOGLETEST)
+    FIND_PACKAGE(GTest REQUIRED)
+  ELSEIF(NOT DEFINED GOOGLETEST_SOURCE_DIR)
+    MESSAGE(STATUS "Downloading Google Test to ${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest (define GOOGLETEST_SOURCE_DIR to avoid it)")
+    CONFIGURE_FILE(cmake/DownloadGoogleTest.cmake "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download/CMakeLists.txt")
+    EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
+      WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download")
+    EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" --build .
+      WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download")
+    SET(GOOGLETEST_SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest" CACHE STRING "Google Test source directory")
+  ENDIF()
+ENDIF()
+
+IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_BENCHMARKS)
+  IF(USE_SYSTEM_GOOGLEBENCHMARK)
+    FIND_PACKAGE(benchmark REQUIRED)
+  ELSEIF(NOT DEFINED GOOGLEBENCHMARK_SOURCE_DIR)
+    MESSAGE(STATUS "Downloading Google Benchmark to ${CONFU_DEPENDENCIES_SOURCE_DIR}/googlebenchmark (define GOOGLEBENCHMARK_SOURCE_DIR to avoid it)")
+    CONFIGURE_FILE(cmake/DownloadGoogleBenchmark.cmake "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark-download/CMakeLists.txt")
+    EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
+      WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark-download")
+    EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" --build .
+      WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark-download")
+    SET(GOOGLEBENCHMARK_SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googlebenchmark" CACHE STRING "Google Benchmark source directory")
+  ENDIF()
+ENDIF()
+
+# ---[ cpuinfo library
+SET(CPUINFO_SRCS src/api.c src/cache.c src/init.c src/log.c)
+
+IF(CPUINFO_SUPPORTED_PLATFORM)
+  IF(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND (CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$"))
+    LIST(APPEND CPUINFO_SRCS
+      src/x86/init.c
+      src/x86/info.c
+      src/x86/vendor.c
+      src/x86/uarch.c
+      src/x86/name.c
+      src/x86/topology.c
+      src/x86/isa.c
+      src/x86/cache/init.c
+      src/x86/cache/descriptor.c
+      src/x86/cache/deterministic.c)
+    IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
+      LIST(APPEND CPUINFO_SRCS
+        src/x86/linux/init.c
+        src/x86/linux/cpuinfo.c)
+    ELSEIF(IS_APPLE_OS)
+      LIST(APPEND CPUINFO_SRCS src/x86/mach/init.c)
+    ELSEIF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|WindowsStore|CYGWIN|MSYS)$")
+      LIST(APPEND CPUINFO_SRCS src/x86/windows/init.c)
+    ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "FreeBSD")
+      LIST(APPEND CPUINFO_SRCS src/x86/freebsd/init.c)
+    ENDIF()
+  ELSEIF(CMAKE_SYSTEM_NAME MATCHES "^Windows" AND CPUINFO_TARGET_PROCESSOR MATCHES "^(ARM64|arm64)$")
+    LIST(APPEND CPUINFO_SRCS
+      src/arm/windows/init-by-logical-sys-info.c
+      src/arm/windows/init.c)
+  ELSEIF(CPUINFO_TARGET_PROCESSOR MATCHES "^(armv[5-8].*|aarch64|arm64.*)$" OR IOS_ARCH MATCHES "^(armv7.*|arm64.*)$")
+    LIST(APPEND CPUINFO_SRCS
+      src/arm/uarch.c
+      src/arm/cache.c)
+    IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
+      LIST(APPEND CPUINFO_SRCS
+        src/arm/linux/init.c
+        src/arm/linux/cpuinfo.c
+        src/arm/linux/clusters.c
+        src/arm/linux/chipset.c
+        src/arm/linux/midr.c
+        src/arm/linux/hwcap.c)
+      IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^armv[5-8]")
+        LIST(APPEND CPUINFO_SRCS src/arm/linux/aarch32-isa.c)
+        IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND ANDROID_ABI STREQUAL "armeabi")
+          SET_SOURCE_FILES_PROPERTIES(src/arm/linux/aarch32-isa.c PROPERTIES COMPILE_FLAGS -marm)
+        ENDIF()
+      ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64)$")
+        LIST(APPEND CPUINFO_SRCS src/arm/linux/aarch64-isa.c)
+      ENDIF()
+    ELSEIF(IS_APPLE_OS AND CPUINFO_TARGET_PROCESSOR MATCHES "arm64.*")
+      LIST(APPEND CPUINFO_SRCS src/arm/mach/init.c)
+    ENDIF()
+    IF(CMAKE_SYSTEM_NAME STREQUAL "Android")
+      LIST(APPEND CPUINFO_SRCS
+        src/arm/android/properties.c)
+    ENDIF()
+  ELSEIF(CPUINFO_TARGET_PROCESSOR MATCHES "^(riscv(32|64))$")
+    LIST(APPEND CPUINFO_SRCS
+      src/riscv/uarch.c)
+    IF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+      LIST(APPEND CPUINFO_SRCS
+        src/riscv/linux/init.c
+	src/riscv/linux/riscv-hw.c
+	src/riscv/linux/riscv-isa.c)
+    ENDIF()
+  ENDIF()
+
+  IF(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+    LIST(APPEND CPUINFO_SRCS
+      src/emscripten/init.c)
+  ENDIF()
+
+  IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
+    LIST(APPEND CPUINFO_SRCS
+      src/linux/smallfile.c
+      src/linux/multiline.c
+      src/linux/cpulist.c
+      src/linux/processors.c)
+  ELSEIF(IS_APPLE_OS)
+    LIST(APPEND CPUINFO_SRCS src/mach/topology.c)
+  ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "FreeBSD")
+    LIST(APPEND CPUINFO_SRCS src/freebsd/topology.c)
+  ENDIF()
+
+  IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android" OR CMAKE_SYSTEM_NAME STREQUAL "FreeBSD")
+    SET(CMAKE_THREAD_PREFER_PTHREAD TRUE)
+    SET(THREADS_PREFER_PTHREAD_FLAG TRUE)
+    FIND_PACKAGE(Threads REQUIRED)
+  ENDIF()
+ENDIF()
+
+IF(CPUINFO_LIBRARY_TYPE STREQUAL "default")
+  ADD_LIBRARY(cpuinfo ${CPUINFO_SRCS})
+ELSEIF(CPUINFO_LIBRARY_TYPE STREQUAL "shared")
+  ADD_LIBRARY(cpuinfo SHARED ${CPUINFO_SRCS})
+ELSEIF(CPUINFO_LIBRARY_TYPE STREQUAL "static")
+  ADD_LIBRARY(cpuinfo STATIC ${CPUINFO_SRCS})
+ELSE()
+  MESSAGE(FATAL_ERROR "Unsupported library type ${CPUINFO_LIBRARY_TYPE}")
+ENDIF()
+ADD_LIBRARY(cpuinfo_internals STATIC ${CPUINFO_SRCS})
+CPUINFO_TARGET_ENABLE_C99(cpuinfo)
+CPUINFO_TARGET_ENABLE_C99(cpuinfo_internals)
+CPUINFO_TARGET_RUNTIME_LIBRARY(cpuinfo)
+IF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|WindowsStore|CYGWIN|MSYS)$")
+  # Target Windows 7+ API
+  TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE _WIN32_WINNT=0x0601 _CRT_SECURE_NO_WARNINGS)
+  TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE _WIN32_WINNT=0x0601 _CRT_SECURE_NO_WARNINGS)
+  # Explicitly link Kernel32 for UWP build
+  if(CMAKE_SYSTEM_NAME STREQUAL "WindowsStore")
+    TARGET_LINK_LIBRARIES(cpuinfo PUBLIC Kernel32)
+  endif()
+ENDIF()
+IF(ANDROID AND NOT CPUINFO_LOG_TO_STDIO)
+  TARGET_LINK_LIBRARIES(cpuinfo PRIVATE "log")
+ENDIF()
+SET_TARGET_PROPERTIES(cpuinfo PROPERTIES PUBLIC_HEADER include/cpuinfo.h)
+TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
+TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PRIVATE src)
+TARGET_INCLUDE_DIRECTORIES(cpuinfo_internals BEFORE PUBLIC include src)
+TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE "CPUINFO_LOG_TO_STDIO=$<BOOL:${CPUINFO_LOG_TO_STDIO}>")
+IF(CPUINFO_LOG_LEVEL STREQUAL "default")
+  # default logging level: error (subject to change)
+  TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE "CPUINFO_LOG_LEVEL=2")
+ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "debug")
+  TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE "CPUINFO_LOG_LEVEL=5")
+ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "info")
+  TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE "CPUINFO_LOG_LEVEL=4")
+ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "warning")
+  TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE "CPUINFO_LOG_LEVEL=3")
+ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "error")
+  TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE "CPUINFO_LOG_LEVEL=2")
+ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "fatal")
+  TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE "CPUINFO_LOG_LEVEL=1")
+ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "none")
+  TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE "CPUINFO_LOG_LEVEL=0")
+ELSE()
+  MESSAGE(FATAL_ERROR "Unsupported logging level ${CPUINFO_LOG_LEVEL}")
+ENDIF()
+TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE "CPUINFO_LOG_LEVEL=0")
+TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE "CPUINFO_LOG_TO_STDIO=1")
+
+IF(CPUINFO_SUPPORTED_PLATFORM)
+  TARGET_COMPILE_DEFINITIONS(cpuinfo INTERFACE CPUINFO_SUPPORTED_PLATFORM=1)
+  IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
+    TARGET_LINK_LIBRARIES(cpuinfo PUBLIC ${CMAKE_THREAD_LIBS_INIT})
+    TARGET_LINK_LIBRARIES(cpuinfo_internals PUBLIC ${CMAKE_THREAD_LIBS_INIT})
+    TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE _GNU_SOURCE=1)
+    TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE _GNU_SOURCE=1)
+  ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "FreeBSD")
+    TARGET_LINK_LIBRARIES(cpuinfo PUBLIC ${CMAKE_THREAD_LIBS_INIT})
+    TARGET_LINK_LIBRARIES(cpuinfo_internals PUBLIC ${CMAKE_THREAD_LIBS_INIT})
+  ENDIF()
+ELSE()
+  TARGET_COMPILE_DEFINITIONS(cpuinfo INTERFACE CPUINFO_SUPPORTED_PLATFORM=0)
+ENDIF()
+
+ADD_LIBRARY(${PROJECT_NAME}::cpuinfo ALIAS cpuinfo)
+
+# support find_package(cpuinfo CONFIG)
+INCLUDE(CMakePackageConfigHelpers)
+GET_FILENAME_COMPONENT(CONFIG_FILE_PATH ${CMAKE_CURRENT_BINARY_DIR}/cpuinfo-config.cmake ABSOLUTE)
+CONFIGURE_PACKAGE_CONFIG_FILE(
+  cmake/cpuinfo-config.cmake.in ${CONFIG_FILE_PATH}
+  INSTALL_DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME})
+INSTALL(FILES ${CONFIG_FILE_PATH}
+  DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}) # cpuinfo_DIR ${prefix}/share/cpuinfo
+
+INSTALL(TARGETS cpuinfo
+  EXPORT cpuinfo-targets
+  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
+
+INSTALL(EXPORT cpuinfo-targets
+  NAMESPACE ${PROJECT_NAME}:: # IMPORTED cpuinfo::cpuinfo
+  DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME})
+
+# ---[ cpuinfo micro-benchmarks
+IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_BENCHMARKS)
+  # ---[ Build google benchmark
+  IF(NOT TARGET benchmark AND NOT USE_SYSTEM_GOOGLEBENCHMARK)
+    SET(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "")
+    ADD_SUBDIRECTORY(
+      "${GOOGLEBENCHMARK_SOURCE_DIR}"
+      "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark")
+  ENDIF()
+
+  IF(CMAKE_SYSTEM_NAME MATCHES "^(Linux|Android)$")
+    ADD_EXECUTABLE(get-current-bench bench/get-current.cc)
+    TARGET_LINK_LIBRARIES(get-current-bench cpuinfo benchmark)
+  ENDIF()
+
+  ADD_EXECUTABLE(init-bench bench/init.cc)
+  TARGET_LINK_LIBRARIES(init-bench cpuinfo benchmark)
+ENDIF()
+
+IF(CPUINFO_SUPPORTED_PLATFORM)
+  IF(CPUINFO_BUILD_MOCK_TESTS OR CPUINFO_BUILD_UNIT_TESTS)
+    # ---[ Build google test
+    IF(NOT TARGET gtest AND NOT USE_SYSTEM_GOOGLETEST)
+      IF(MSVC AND NOT CPUINFO_RUNTIME_TYPE STREQUAL "static")
+        SET(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
+      ENDIF()
+      ADD_SUBDIRECTORY(
+        "${GOOGLETEST_SOURCE_DIR}"
+        "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest")
+    ENDIF()
+  ENDIF()
+ENDIF()
+
+# ---[ cpuinfo mock library and mock tests
+IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_MOCK_TESTS)
+  SET(CPUINFO_MOCK_SRCS "${CPUINFO_SRCS}")
+  IF(CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$")
+    LIST(APPEND CPUINFO_MOCK_SRCS src/x86/mockcpuid.c)
+  ENDIF()
+  IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
+    LIST(APPEND CPUINFO_MOCK_SRCS src/linux/mockfile.c)
+  ENDIF()
+
+  ADD_LIBRARY(cpuinfo_mock STATIC ${CPUINFO_MOCK_SRCS})
+  CPUINFO_TARGET_ENABLE_C99(cpuinfo_mock)
+  CPUINFO_TARGET_RUNTIME_LIBRARY(cpuinfo_mock)
+  SET_TARGET_PROPERTIES(cpuinfo_mock PROPERTIES PUBLIC_HEADER include/cpuinfo.h)
+  TARGET_INCLUDE_DIRECTORIES(cpuinfo_mock BEFORE PUBLIC include)
+  TARGET_INCLUDE_DIRECTORIES(cpuinfo_mock BEFORE PRIVATE src)
+  TARGET_COMPILE_DEFINITIONS(cpuinfo_mock PUBLIC "CPUINFO_MOCK=1")
+  TARGET_COMPILE_DEFINITIONS(cpuinfo_mock PRIVATE "CPUINFO_LOG_LEVEL=5")
+  TARGET_COMPILE_DEFINITIONS(cpuinfo_mock PRIVATE "CPUINFO_LOG_TO_STDIO=1")
+  IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
+    TARGET_LINK_LIBRARIES(cpuinfo_mock PUBLIC ${CMAKE_THREAD_LIBS_INIT})
+    TARGET_COMPILE_DEFINITIONS(cpuinfo_mock PRIVATE _GNU_SOURCE=1)
+  ENDIF()
+
+  IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv5te|armv7-a)$")
+    ADD_EXECUTABLE(atm7029b-tablet-test test/mock/atm7029b-tablet.cc)
+    TARGET_INCLUDE_DIRECTORIES(atm7029b-tablet-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(atm7029b-tablet-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME atm7029b-tablet-test COMMAND atm7029b-tablet-test)
+
+    ADD_EXECUTABLE(blu-r1-hd-test test/mock/blu-r1-hd.cc)
+    TARGET_INCLUDE_DIRECTORIES(blu-r1-hd-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(blu-r1-hd-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME blu-r1-hd-test COMMAND blu-r1-hd-test)
+
+    ADD_EXECUTABLE(galaxy-a3-2016-eu-test test/mock/galaxy-a3-2016-eu.cc)
+    TARGET_INCLUDE_DIRECTORIES(galaxy-a3-2016-eu-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(galaxy-a3-2016-eu-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME galaxy-a3-2016-eu-test COMMAND galaxy-a3-2016-eu-test)
+
+    ADD_EXECUTABLE(galaxy-a8-2016-duos-test test/mock/galaxy-a8-2016-duos.cc)
+    TARGET_INCLUDE_DIRECTORIES(galaxy-a8-2016-duos-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(galaxy-a8-2016-duos-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME galaxy-a8-2016-duos-test COMMAND galaxy-a8-2016-duos-test)
+
+    ADD_EXECUTABLE(galaxy-grand-prime-value-edition-test test/mock/galaxy-grand-prime-value-edition.cc)
+    TARGET_INCLUDE_DIRECTORIES(galaxy-grand-prime-value-edition-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(galaxy-grand-prime-value-edition-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME galaxy-grand-prime-value-edition-test COMMAND galaxy-grand-prime-value-edition-test)
+
+    ADD_EXECUTABLE(galaxy-j1-2016-test test/mock/galaxy-j1-2016.cc)
+    TARGET_INCLUDE_DIRECTORIES(galaxy-j1-2016-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(galaxy-j1-2016-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME galaxy-j1-2016-test COMMAND galaxy-j1-2016-test)
+
+    ADD_EXECUTABLE(galaxy-j5-test test/mock/galaxy-j5.cc)
+    TARGET_INCLUDE_DIRECTORIES(galaxy-j5-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(galaxy-j5-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME galaxy-j5-test COMMAND galaxy-j5-test)
+
+    ADD_EXECUTABLE(galaxy-j7-prime-test test/mock/galaxy-j7-prime.cc)
+    TARGET_INCLUDE_DIRECTORIES(galaxy-j7-prime-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(galaxy-j7-prime-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME galaxy-j7-prime-test COMMAND galaxy-j7-prime-test)
+
+    ADD_EXECUTABLE(galaxy-j7-tmobile-test test/mock/galaxy-j7-tmobile.cc)
+    TARGET_INCLUDE_DIRECTORIES(galaxy-j7-tmobile-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(galaxy-j7-tmobile-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME galaxy-j7-tmobile-test COMMAND galaxy-j7-tmobile-test)
+
+    ADD_EXECUTABLE(galaxy-j7-uae-test test/mock/galaxy-j7-uae.cc)
+    TARGET_INCLUDE_DIRECTORIES(galaxy-j7-uae-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(galaxy-j7-uae-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME galaxy-j7-uae-test COMMAND galaxy-j7-uae-test)
+
+    ADD_EXECUTABLE(galaxy-s3-us-test test/mock/galaxy-s3-us.cc)
+    TARGET_INCLUDE_DIRECTORIES(galaxy-s3-us-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(galaxy-s3-us-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME galaxy-s3-us-test COMMAND galaxy-s3-us-test)
+
+    ADD_EXECUTABLE(galaxy-s4-us-test test/mock/galaxy-s4-us.cc)
+    TARGET_INCLUDE_DIRECTORIES(galaxy-s4-us-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(galaxy-s4-us-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME galaxy-s4-us-test COMMAND galaxy-s4-us-test)
+
+    ADD_EXECUTABLE(galaxy-s5-global-test test/mock/galaxy-s5-global.cc)
+    TARGET_INCLUDE_DIRECTORIES(galaxy-s5-global-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(galaxy-s5-global-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME galaxy-s5-global-test COMMAND galaxy-s5-global-test)
+
+    ADD_EXECUTABLE(galaxy-s5-us-test test/mock/galaxy-s5-us.cc)
+    TARGET_INCLUDE_DIRECTORIES(galaxy-s5-us-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(galaxy-s5-us-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME galaxy-s5-us-test COMMAND galaxy-s5-us-test)
+
+    ADD_EXECUTABLE(galaxy-tab-3-7.0-test test/mock/galaxy-tab-3-7.0.cc)
+    TARGET_INCLUDE_DIRECTORIES(galaxy-tab-3-7.0-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(galaxy-tab-3-7.0-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME galaxy-tab-3-7.0-test COMMAND galaxy-tab-3-7.0-test)
+
+    ADD_EXECUTABLE(galaxy-tab-3-lite-test test/mock/galaxy-tab-3-lite.cc)
+    TARGET_INCLUDE_DIRECTORIES(galaxy-tab-3-lite-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(galaxy-tab-3-lite-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME galaxy-tab-3-lite-test COMMAND galaxy-tab-3-lite-test)
+
+    ADD_EXECUTABLE(galaxy-win-duos-test test/mock/galaxy-win-duos.cc)
+    TARGET_INCLUDE_DIRECTORIES(galaxy-win-duos-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(galaxy-win-duos-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME galaxy-win-duos-test COMMAND galaxy-win-duos-test)
+
+    ADD_EXECUTABLE(huawei-ascend-p7-test test/mock/huawei-ascend-p7.cc)
+    TARGET_INCLUDE_DIRECTORIES(huawei-ascend-p7-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(huawei-ascend-p7-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME huawei-ascend-p7-test COMMAND huawei-ascend-p7-test)
+
+    ADD_EXECUTABLE(huawei-honor-6-test test/mock/huawei-honor-6.cc)
+    TARGET_INCLUDE_DIRECTORIES(huawei-honor-6-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(huawei-honor-6-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME huawei-honor-6-test COMMAND huawei-honor-6-test)
+
+    ADD_EXECUTABLE(lenovo-a6600-plus-test test/mock/lenovo-a6600-plus.cc)
+    TARGET_INCLUDE_DIRECTORIES(lenovo-a6600-plus-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(lenovo-a6600-plus-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME lenovo-a6600-plus-test COMMAND lenovo-a6600-plus-test)
+
+    ADD_EXECUTABLE(lenovo-vibe-x2-test test/mock/lenovo-vibe-x2.cc)
+    TARGET_INCLUDE_DIRECTORIES(lenovo-vibe-x2-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(lenovo-vibe-x2-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME lenovo-vibe-x2-test COMMAND lenovo-vibe-x2-test)
+
+    ADD_EXECUTABLE(lg-k10-eu-test test/mock/lg-k10-eu.cc)
+    TARGET_INCLUDE_DIRECTORIES(lg-k10-eu-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(lg-k10-eu-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME lg-k10-eu-test COMMAND lg-k10-eu-test)
+
+    ADD_EXECUTABLE(lg-optimus-g-pro-test test/mock/lg-optimus-g-pro.cc)
+    TARGET_INCLUDE_DIRECTORIES(lg-optimus-g-pro-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(lg-optimus-g-pro-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME lg-optimus-g-pro-test COMMAND lg-optimus-g-pro-test)
+
+    ADD_EXECUTABLE(moto-e-gen1-test test/mock/moto-e-gen1.cc)
+    TARGET_INCLUDE_DIRECTORIES(moto-e-gen1-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(moto-e-gen1-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME moto-e-gen1-test COMMAND moto-e-gen1-test)
+
+    ADD_EXECUTABLE(moto-g-gen1-test test/mock/moto-g-gen1.cc)
+    TARGET_INCLUDE_DIRECTORIES(moto-g-gen1-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(moto-g-gen1-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME moto-g-gen1-test COMMAND moto-g-gen1-test)
+
+    ADD_EXECUTABLE(moto-g-gen2-test test/mock/moto-g-gen2.cc)
+    TARGET_INCLUDE_DIRECTORIES(moto-g-gen2-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(moto-g-gen2-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME moto-g-gen2-test COMMAND moto-g-gen2-test)
+
+    ADD_EXECUTABLE(moto-g-gen3-test test/mock/moto-g-gen3.cc)
+    TARGET_INCLUDE_DIRECTORIES(moto-g-gen3-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(moto-g-gen3-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME moto-g-gen3-test COMMAND moto-g-gen3-test)
+
+    ADD_EXECUTABLE(moto-g-gen4-test test/mock/moto-g-gen4.cc)
+    TARGET_INCLUDE_DIRECTORIES(moto-g-gen4-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(moto-g-gen4-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME moto-g-gen4-test COMMAND moto-g-gen4-test)
+
+    ADD_EXECUTABLE(moto-g-gen5-test test/mock/moto-g-gen5.cc)
+    TARGET_INCLUDE_DIRECTORIES(moto-g-gen5-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(moto-g-gen5-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME moto-g-gen5-test COMMAND moto-g-gen5-test)
+
+    ADD_EXECUTABLE(nexus-s-test test/mock/nexus-s.cc)
+    TARGET_INCLUDE_DIRECTORIES(nexus-s-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(nexus-s-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME nexus-s-test COMMAND nexus-s-test)
+
+    ADD_EXECUTABLE(nexus4-test test/mock/nexus4.cc)
+    TARGET_INCLUDE_DIRECTORIES(nexus4-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(nexus4-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME nexus4-test COMMAND nexus4-test)
+
+    ADD_EXECUTABLE(nexus6-test test/mock/nexus6.cc)
+    TARGET_INCLUDE_DIRECTORIES(nexus6-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(nexus6-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME nexus6-test COMMAND nexus6-test)
+
+    ADD_EXECUTABLE(nexus10-test test/mock/nexus10.cc)
+    TARGET_INCLUDE_DIRECTORIES(nexus10-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(nexus10-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME nexus10-test COMMAND nexus10-test)
+
+    ADD_EXECUTABLE(padcod-10.1-test test/mock/padcod-10.1.cc)
+    TARGET_INCLUDE_DIRECTORIES(padcod-10.1-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(padcod-10.1-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME padcod-10.1-test COMMAND padcod-10.1-test)
+
+    ADD_EXECUTABLE(xiaomi-redmi-2a-test test/mock/xiaomi-redmi-2a.cc)
+    TARGET_INCLUDE_DIRECTORIES(xiaomi-redmi-2a-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(xiaomi-redmi-2a-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME xiaomi-redmi-2a-test COMMAND xiaomi-redmi-2a-test)
+
+    ADD_EXECUTABLE(xperia-sl-test test/mock/xperia-sl.cc)
+    TARGET_INCLUDE_DIRECTORIES(xperia-sl-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(xperia-sl-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME xperia-sl-test COMMAND xperia-sl-test)
+  ENDIF()
+
+  IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv5te|armv7-a|aarch64)$")
+    ADD_EXECUTABLE(alcatel-revvl-test test/mock/alcatel-revvl.cc)
+    TARGET_INCLUDE_DIRECTORIES(alcatel-revvl-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(alcatel-revvl-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME alcatel-revvl-test COMMAND alcatel-revvl-test)
+
+    ADD_EXECUTABLE(galaxy-a8-2018-test test/mock/galaxy-a8-2018.cc)
+    TARGET_INCLUDE_DIRECTORIES(galaxy-a8-2018-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(galaxy-a8-2018-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME galaxy-a8-2018-test COMMAND galaxy-a8-2018-test)
+
+    ADD_EXECUTABLE(galaxy-c9-pro-test test/mock/galaxy-c9-pro.cc)
+    TARGET_INCLUDE_DIRECTORIES(galaxy-c9-pro-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(galaxy-c9-pro-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME galaxy-c9-pro-test COMMAND galaxy-c9-pro-test)
+
+    ADD_EXECUTABLE(galaxy-s6-test test/mock/galaxy-s6.cc)
+    TARGET_INCLUDE_DIRECTORIES(galaxy-s6-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(galaxy-s6-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME galaxy-s6-test COMMAND galaxy-s6-test)
+
+    ADD_EXECUTABLE(galaxy-s7-us-test test/mock/galaxy-s7-us.cc)
+    TARGET_INCLUDE_DIRECTORIES(galaxy-s7-us-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(galaxy-s7-us-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME galaxy-s7-us-test COMMAND galaxy-s7-us-test)
+
+    ADD_EXECUTABLE(galaxy-s7-global-test test/mock/galaxy-s7-global.cc)
+    TARGET_INCLUDE_DIRECTORIES(galaxy-s7-global-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(galaxy-s7-global-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME galaxy-s7-global-test COMMAND galaxy-s7-global-test)
+
+    ADD_EXECUTABLE(galaxy-s8-us-test test/mock/galaxy-s8-us.cc)
+    TARGET_INCLUDE_DIRECTORIES(galaxy-s8-us-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(galaxy-s8-us-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME galaxy-s8-us-test COMMAND galaxy-s8-us-test)
+
+    ADD_EXECUTABLE(galaxy-s8-global-test test/mock/galaxy-s8-global.cc)
+    TARGET_INCLUDE_DIRECTORIES(galaxy-s8-global-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(galaxy-s8-global-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME galaxy-s8-global-test COMMAND galaxy-s8-global-test)
+
+    ADD_EXECUTABLE(galaxy-s9-us-test test/mock/galaxy-s9-us.cc)
+    TARGET_INCLUDE_DIRECTORIES(galaxy-s9-us-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(galaxy-s9-us-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME galaxy-s9-us-test COMMAND galaxy-s9-us-test)
+
+    ADD_EXECUTABLE(galaxy-s9-global-test test/mock/galaxy-s9-global.cc)
+    TARGET_INCLUDE_DIRECTORIES(galaxy-s9-global-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(galaxy-s9-global-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME galaxy-s9-global-test COMMAND galaxy-s9-global-test)
+
+    ADD_EXECUTABLE(huawei-mate-8-test test/mock/huawei-mate-8.cc)
+    TARGET_INCLUDE_DIRECTORIES(huawei-mate-8-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(huawei-mate-8-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME huawei-mate-8-test COMMAND huawei-mate-8-test)
+
+    ADD_EXECUTABLE(huawei-mate-9-test test/mock/huawei-mate-9.cc)
+    TARGET_INCLUDE_DIRECTORIES(huawei-mate-9-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(huawei-mate-9-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME huawei-mate-9-test COMMAND huawei-mate-9-test)
+
+    ADD_EXECUTABLE(huawei-mate-10-test test/mock/huawei-mate-10.cc)
+    TARGET_INCLUDE_DIRECTORIES(huawei-mate-10-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(huawei-mate-10-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME huawei-mate-10-test COMMAND huawei-mate-10-test)
+
+    ADD_EXECUTABLE(huawei-mate-20-test test/mock/huawei-mate-20.cc)
+    TARGET_INCLUDE_DIRECTORIES(huawei-mate-20-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(huawei-mate-20-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME huawei-mate-20-test COMMAND huawei-mate-20-test)
+
+    ADD_EXECUTABLE(huawei-p8-lite-test test/mock/huawei-p8-lite.cc)
+    TARGET_INCLUDE_DIRECTORIES(huawei-p8-lite-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(huawei-p8-lite-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME huawei-p8-lite-test COMMAND huawei-p8-lite-test)
+
+    ADD_EXECUTABLE(huawei-p9-lite-test test/mock/huawei-p9-lite.cc)
+    TARGET_INCLUDE_DIRECTORIES(huawei-p9-lite-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(huawei-p9-lite-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME huawei-p9-lite-test COMMAND huawei-p9-lite-test)
+
+    ADD_EXECUTABLE(huawei-p20-pro-test test/mock/huawei-p20-pro.cc)
+    TARGET_INCLUDE_DIRECTORIES(huawei-p20-pro-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(huawei-p20-pro-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME huawei-p20-pro-test COMMAND huawei-p20-pro-test)
+
+    ADD_EXECUTABLE(iconia-one-10-test test/mock/iconia-one-10.cc)
+    TARGET_INCLUDE_DIRECTORIES(iconia-one-10-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(iconia-one-10-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME iconia-one-10-test COMMAND iconia-one-10-test)
+
+    ADD_EXECUTABLE(meizu-pro-6-test test/mock/meizu-pro-6.cc)
+    TARGET_INCLUDE_DIRECTORIES(meizu-pro-6-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(meizu-pro-6-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME meizu-pro-6-test COMMAND meizu-pro-6-test)
+
+    ADD_EXECUTABLE(meizu-pro-6s-test test/mock/meizu-pro-6s.cc)
+    TARGET_INCLUDE_DIRECTORIES(meizu-pro-6s-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(meizu-pro-6s-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME meizu-pro-6s-test COMMAND meizu-pro-6s-test)
+
+    ADD_EXECUTABLE(meizu-pro-7-plus-test test/mock/meizu-pro-7-plus.cc)
+    TARGET_INCLUDE_DIRECTORIES(meizu-pro-7-plus-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(meizu-pro-7-plus-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME meizu-pro-7-plus-test COMMAND meizu-pro-7-plus-test)
+
+    ADD_EXECUTABLE(nexus5x-test test/mock/nexus5x.cc)
+    TARGET_INCLUDE_DIRECTORIES(nexus5x-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(nexus5x-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME nexus5x-test COMMAND nexus5x-test)
+
+    ADD_EXECUTABLE(nexus6p-test test/mock/nexus6p.cc)
+    TARGET_INCLUDE_DIRECTORIES(nexus6p-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(nexus6p-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME nexus6p-test COMMAND nexus6p-test)
+
+    ADD_EXECUTABLE(nexus9-test test/mock/nexus9.cc)
+    TARGET_INCLUDE_DIRECTORIES(nexus9-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(nexus9-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME nexus9-test COMMAND nexus9-test)
+
+    ADD_EXECUTABLE(oneplus-3t-test test/mock/oneplus-3t.cc)
+    TARGET_INCLUDE_DIRECTORIES(oneplus-3t-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(oneplus-3t-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME oneplus-3t-test COMMAND oneplus-3t-test)
+
+    ADD_EXECUTABLE(oneplus-5-test test/mock/oneplus-5.cc)
+    TARGET_INCLUDE_DIRECTORIES(oneplus-5-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(oneplus-5-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME oneplus-5-test COMMAND oneplus-5-test)
+
+    ADD_EXECUTABLE(oneplus-5t-test test/mock/oneplus-5t.cc)
+    TARGET_INCLUDE_DIRECTORIES(oneplus-5t-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(oneplus-5t-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME oneplus-5t-test COMMAND oneplus-5t-test)
+
+    ADD_EXECUTABLE(oppo-a37-test test/mock/oppo-a37.cc)
+    TARGET_INCLUDE_DIRECTORIES(oppo-a37-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(oppo-a37-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME oppo-a37-test COMMAND oppo-a37-test)
+
+    ADD_EXECUTABLE(oppo-r9-test test/mock/oppo-r9.cc)
+    TARGET_INCLUDE_DIRECTORIES(oppo-r9-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(oppo-r9-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME oppo-r9-test COMMAND oppo-r9-test)
+
+    ADD_EXECUTABLE(oppo-r15-test test/mock/oppo-r15.cc)
+    TARGET_INCLUDE_DIRECTORIES(oppo-r15-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(oppo-r15-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME oppo-r15-test COMMAND oppo-r15-test)
+
+    ADD_EXECUTABLE(pixel-test test/mock/pixel.cc)
+    TARGET_INCLUDE_DIRECTORIES(pixel-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(pixel-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME pixel-test COMMAND pixel-test)
+
+    ADD_EXECUTABLE(pixel-c-test test/mock/pixel-c.cc)
+    TARGET_INCLUDE_DIRECTORIES(pixel-c-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(pixel-c-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME pixel-c-test COMMAND pixel-c-test)
+
+    ADD_EXECUTABLE(pixel-xl-test test/mock/pixel-xl.cc)
+    TARGET_INCLUDE_DIRECTORIES(pixel-xl-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(pixel-xl-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME pixel-xl-test COMMAND pixel-xl-test)
+
+    ADD_EXECUTABLE(pixel-2-xl-test test/mock/pixel-2-xl.cc)
+    TARGET_INCLUDE_DIRECTORIES(pixel-2-xl-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(pixel-2-xl-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME pixel-2-xl-test COMMAND pixel-2-xl-test)
+
+    ADD_EXECUTABLE(xiaomi-mi-5c-test test/mock/xiaomi-mi-5c.cc)
+    TARGET_INCLUDE_DIRECTORIES(xiaomi-mi-5c-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(xiaomi-mi-5c-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME xiaomi-mi-5c-test COMMAND xiaomi-mi-5c-test)
+
+    ADD_EXECUTABLE(xiaomi-redmi-note-3-test test/mock/xiaomi-redmi-note-3.cc)
+    TARGET_INCLUDE_DIRECTORIES(xiaomi-redmi-note-3-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(xiaomi-redmi-note-3-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME xiaomi-redmi-note-3-test COMMAND xiaomi-redmi-note-3-test)
+
+    ADD_EXECUTABLE(xiaomi-redmi-note-4-test test/mock/xiaomi-redmi-note-4.cc)
+    TARGET_INCLUDE_DIRECTORIES(xiaomi-redmi-note-4-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(xiaomi-redmi-note-4-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME xiaomi-redmi-note-4-test COMMAND xiaomi-redmi-note-4-test)
+
+    ADD_EXECUTABLE(xperia-c4-dual-test test/mock/xperia-c4-dual.cc)
+    TARGET_INCLUDE_DIRECTORIES(xperia-c4-dual-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(xperia-c4-dual-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME xperia-c4-dual-test COMMAND xperia-c4-dual-test)
+  ENDIF()
+
+  IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(i686|x86_64)$")
+    ADD_EXECUTABLE(alldocube-iwork8-test test/mock/alldocube-iwork8.cc)
+    TARGET_INCLUDE_DIRECTORIES(alldocube-iwork8-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(alldocube-iwork8-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME alldocube-iwork8-test COMMAND alldocube-iwork8-test)
+
+    ADD_EXECUTABLE(leagoo-t5c-test test/mock/leagoo-t5c.cc)
+    TARGET_INCLUDE_DIRECTORIES(leagoo-t5c-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(leagoo-t5c-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME leagoo-t5c-test COMMAND leagoo-t5c-test)
+
+    ADD_EXECUTABLE(memo-pad-7-test test/mock/memo-pad-7.cc)
+    TARGET_INCLUDE_DIRECTORIES(memo-pad-7-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(memo-pad-7-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME memo-pad-7-test COMMAND memo-pad-7-test)
+
+    ADD_EXECUTABLE(zenfone-c-test test/mock/zenfone-c.cc)
+    TARGET_INCLUDE_DIRECTORIES(zenfone-c-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(zenfone-c-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME zenfone-c-test COMMAND zenfone-c-test)
+
+    ADD_EXECUTABLE(zenfone-2-test test/mock/zenfone-2.cc)
+    TARGET_INCLUDE_DIRECTORIES(zenfone-2-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(zenfone-2-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME zenfone-2-test COMMAND zenfone-2-test)
+
+    ADD_EXECUTABLE(zenfone-2e-test test/mock/zenfone-2e.cc)
+    TARGET_INCLUDE_DIRECTORIES(zenfone-2e-test BEFORE PRIVATE test/mock)
+    TARGET_LINK_LIBRARIES(zenfone-2e-test PRIVATE cpuinfo_mock gtest)
+    ADD_TEST(NAME zenfone-2e-test COMMAND zenfone-2e-test)
+  ENDIF()
+ENDIF()
+
+# ---[ cpuinfo unit tests
+IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_UNIT_TESTS)
+  ADD_EXECUTABLE(init-test test/init.cc)
+  CPUINFO_TARGET_ENABLE_CXX11(init-test)
+  CPUINFO_TARGET_RUNTIME_LIBRARY(init-test)
+  TARGET_LINK_LIBRARIES(init-test PRIVATE cpuinfo gtest gtest_main)
+  ADD_TEST(NAME init-test COMMAND init-test)
+
+  IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
+    ADD_EXECUTABLE(get-current-test test/get-current.cc)
+    CPUINFO_TARGET_ENABLE_CXX11(get-current-test)
+    CPUINFO_TARGET_RUNTIME_LIBRARY(get-current-test)
+    TARGET_LINK_LIBRARIES(get-current-test PRIVATE cpuinfo gtest gtest_main)
+    ADD_TEST(NAME get-current-test COMMAND get-current-test)
+  ENDIF()
+
+  IF(CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$")
+    ADD_EXECUTABLE(brand-string-test test/name/brand-string.cc)
+    CPUINFO_TARGET_ENABLE_CXX11(brand-string-test)
+    CPUINFO_TARGET_RUNTIME_LIBRARY(brand-string-test)
+    TARGET_LINK_LIBRARIES(brand-string-test PRIVATE cpuinfo_internals gtest gtest_main)
+    ADD_TEST(NAME brand-string-test COMMAND brand-string-test)
+  ENDIF()
+
+  IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv[5-8].*|aarch64)$")
+    ADD_LIBRARY(android_properties_interface STATIC test/name/android-properties-interface.c)
+    CPUINFO_TARGET_ENABLE_C99(android_properties_interface)
+    CPUINFO_TARGET_RUNTIME_LIBRARY(android_properties_interface)
+    TARGET_LINK_LIBRARIES(android_properties_interface PRIVATE cpuinfo_internals)
+
+    ADD_EXECUTABLE(chipset-test
+      test/name/proc-cpuinfo-hardware.cc
+      test/name/ro-product-board.cc
+      test/name/ro-board-platform.cc
+      test/name/ro-mediatek-platform.cc
+      test/name/ro-arch.cc
+      test/name/ro-chipname.cc
+      test/name/android-properties.cc)
+    CPUINFO_TARGET_ENABLE_CXX11(chipset-test)
+    CPUINFO_TARGET_RUNTIME_LIBRARY(chipset-test)
+    TARGET_LINK_LIBRARIES(chipset-test PRIVATE android_properties_interface gtest gtest_main)
+    ADD_TEST(NAME chipset-test COMMAND chipset-test)
+
+    ADD_EXECUTABLE(cache-test test/arm-cache.cc)
+    CPUINFO_TARGET_ENABLE_CXX11(cache-test)
+    CPUINFO_TARGET_RUNTIME_LIBRARY(cache-test)
+    TARGET_COMPILE_DEFINITIONS(cache-test PRIVATE __STDC_LIMIT_MACROS=1 __STDC_CONSTANT_MACROS=1)
+    TARGET_LINK_LIBRARIES(cache-test PRIVATE cpuinfo_internals gtest gtest_main)
+    ADD_TEST(NAME cache-test COMMAND cache-test)
+  ENDIF()
+ENDIF()
+
+# ---[ Helper and debug tools
+IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_TOOLS)
+  ADD_EXECUTABLE(isa-info tools/isa-info.c)
+  CPUINFO_TARGET_ENABLE_C99(isa-info)
+  CPUINFO_TARGET_RUNTIME_LIBRARY(isa-info)
+  TARGET_LINK_LIBRARIES(isa-info PRIVATE cpuinfo)
+  INSTALL(TARGETS isa-info RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+
+  ADD_EXECUTABLE(cpu-info tools/cpu-info.c)
+  CPUINFO_TARGET_ENABLE_C99(cpu-info)
+  CPUINFO_TARGET_RUNTIME_LIBRARY(cpu-info)
+  TARGET_LINK_LIBRARIES(cpu-info PRIVATE cpuinfo)
+  INSTALL(TARGETS cpu-info RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+
+  ADD_EXECUTABLE(cache-info tools/cache-info.c)
+  CPUINFO_TARGET_ENABLE_C99(cache-info)
+  CPUINFO_TARGET_RUNTIME_LIBRARY(cache-info)
+  TARGET_LINK_LIBRARIES(cache-info PRIVATE cpuinfo)
+  INSTALL(TARGETS cache-info RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+
+  IF(CMAKE_SYSTEM_NAME MATCHES "^(Android|Linux)$" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv[5-8].*|aarch64)$")
+    ADD_EXECUTABLE(auxv-dump tools/auxv-dump.c)
+    CPUINFO_TARGET_ENABLE_C99(auxv-dump)
+    CPUINFO_TARGET_RUNTIME_LIBRARY(auxv-dump)
+    TARGET_LINK_LIBRARIES(auxv-dump PRIVATE ${CMAKE_DL_LIBS} cpuinfo)
+
+    ADD_EXECUTABLE(cpuinfo-dump tools/cpuinfo-dump.c)
+    CPUINFO_TARGET_ENABLE_C99(cpuinfo-dump)
+    CPUINFO_TARGET_RUNTIME_LIBRARY(cpuinfo-dump)
+  ENDIF()
+
+  IF(CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$")
+    ADD_EXECUTABLE(cpuid-dump tools/cpuid-dump.c)
+    CPUINFO_TARGET_ENABLE_C99(cpuid-dump)
+    CPUINFO_TARGET_RUNTIME_LIBRARY(cpuid-dump)
+    TARGET_INCLUDE_DIRECTORIES(cpuid-dump BEFORE PRIVATE src)
+    TARGET_INCLUDE_DIRECTORIES(cpuid-dump BEFORE PRIVATE include)
+    INSTALL(TARGETS cpuid-dump RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+  ENDIF()
+ENDIF()
+
+# ---[ pkg-config manifest. This is mostly from JsonCpp...
+IF(CPUINFO_BUILD_PKG_CONFIG)
+
+  FUNCTION(JOIN_PATHS joined_path first_path_segment)
+    SET(temp_path "${first_path_segment}")
+    FOREACH(current_segment IN LISTS ARGN)
+      IF(NOT ("${current_segment}" STREQUAL ""))
+        IF(IS_ABSOLUTE "${current_segment}")
+          SET(temp_path "${current_segment}")
+        ELSE()
+          SET(temp_path "${temp_path}/${current_segment}")
+        ENDIF()
+      ENDIF()
+    ENDFOREACH()
+    SET(${joined_path} "${temp_path}" PARENT_SCOPE)
+  ENDFUNCTION()
+
+  JOIN_PATHS(libdir_for_pc_file "\${exec_prefix}" "${CMAKE_INSTALL_LIBDIR}")
+  JOIN_PATHS(includedir_for_pc_file "\${prefix}" "${CMAKE_INSTALL_INCLUDEDIR}")
+
+  CONFIGURE_FILE(
+    "libcpuinfo.pc.in"
+    "libcpuinfo.pc"
+    @ONLY)
+
+  INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/libcpuinfo.pc"
+  DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
+
+ENDIF()
--- a/3rdparty/cpuinfo/LICENSE
+++ b/3rdparty/cpuinfo/LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) 2019 Google LLC
+Copyright (c) 2017-2018 Facebook Inc.
+Copyright (C) 2012-2017 Georgia Institute of Technology
+Copyright (C) 2010-2012 Marat Dukhan
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/3rdparty/cpuinfo/README.md
+++ b/3rdparty/cpuinfo/README.md
@@ -0,0 +1,318 @@
+# CPU INFOrmation library
+
+[![BSD (2 clause) License](https://img.shields.io/badge/License-BSD%202--Clause%20%22Simplified%22%20License-blue.svg)](https://github.com/pytorch/cpuinfo/blob/master/LICENSE)
+[![Linux/Mac build status](https://img.shields.io/travis/pytorch/cpuinfo.svg)](https://travis-ci.org/pytorch/cpuinfo)
+[![Windows build status](https://ci.appveyor.com/api/projects/status/g5khy9nr0xm458t7/branch/master?svg=true)](https://ci.appveyor.com/project/MaratDukhan/cpuinfo/branch/master)
+
+cpuinfo is a library to detect essential for performance optimization information about host CPU.
+
+## Features
+
+- **Cross-platform** availability:
+  - Linux, Windows, macOS, Android, and iOS operating systems
+  - x86, x86-64, ARM, and ARM64 architectures
+- Modern **C/C++ interface**
+  - Thread-safe
+  - No memory allocation after initialization
+  - No exceptions thrown
+- Detection of **supported instruction sets**, up to AVX512 (x86) and ARMv8.3 extensions
+- Detection of SoC and core information:
+  - **Processor (SoC) name**
+  - Vendor and **microarchitecture** for each CPU core
+  - ID (**MIDR** on ARM, **CPUID** leaf 1 EAX value on x86) for each CPU core
+- Detection of **cache information**:
+  - Cache type (instruction/data/unified), size and line size
+  - Cache associativity
+  - Cores and logical processors (hyper-threads) sharing the cache
+- Detection of **topology information** (relative between logical processors, cores, and processor packages)
+- Well-tested **production-quality** code:
+  - 60+ mock tests based on data from real devices
+  - Includes work-arounds for common bugs in hardware and OS kernels
+  - Supports systems with heterogenous cores, such as **big.LITTLE** and Max.Med.Min
+- Permissive **open-source** license (Simplified BSD)
+
+## Examples
+
+Log processor name:
+
+```c
+cpuinfo_initialize();
+printf("Running on %s CPU\n", cpuinfo_get_package(0)->name);
+```
+
+Detect if target is a 32-bit or 64-bit ARM system:
+
+```c
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+    /* 32-bit ARM-specific code here */
+#endif
+```
+
+Check if the host CPU supports ARM NEON
+
+```c
+cpuinfo_initialize();
+if (cpuinfo_has_arm_neon()) {
+    neon_implementation(arguments);
+}
+```
+
+Check if the host CPU supports x86 AVX
+
+```c
+cpuinfo_initialize();
+if (cpuinfo_has_x86_avx()) {
+    avx_implementation(arguments);
+}
+```
+
+Check if the thread runs on a Cortex-A53 core
+
+```c
+cpuinfo_initialize();
+switch (cpuinfo_get_current_core()->uarch) {
+    case cpuinfo_uarch_cortex_a53:
+        cortex_a53_implementation(arguments);
+        break;
+    default:
+        generic_implementation(arguments);
+        break;
+}
+```
+
+Get the size of level 1 data cache on the fastest core in the processor (e.g. big core in big.LITTLE ARM systems):
+
+```c
+cpuinfo_initialize();
+const size_t l1_size = cpuinfo_get_processor(0)->cache.l1d->size;
+```
+
+Pin thread to cores sharing L2 cache with the current core (Linux or Android)
+
+```c
+cpuinfo_initialize();
+cpu_set_t cpu_set;
+CPU_ZERO(&cpu_set);
+const struct cpuinfo_cache* current_l2 = cpuinfo_get_current_processor()->cache.l2;
+for (uint32_t i = 0; i < current_l2->processor_count; i++) {
+    CPU_SET(cpuinfo_get_processor(current_l2->processor_start + i)->linux_id, &cpu_set);
+}
+pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpu_set);
+```
+
+## Use via pkg-config
+
+If you would like to provide your project's build environment with the necessary compiler and linker flags in a portable manner, the library by default when built enables `CPUINFO_BUILD_PKG_CONFIG` and will generate a [pkg-config](https://www.freedesktop.org/wiki/Software/pkg-config/) manifest (_libcpuinfo.pc_). Here are several examples of how to use it:
+
+### Command Line
+
+If you used your distro's package manager to install the library, you can verify that it is available to your build environment like so:
+
+```console
+$ pkg-config --cflags --libs libcpuinfo
+-I/usr/include/x86_64-linux-gnu/ -L/lib/x86_64-linux-gnu/ -lcpuinfo
+```
+
+If you have installed the library from source into a non-standard prefix, pkg-config may need help finding it:
+
+```console
+$ PKG_CONFIG_PATH="/home/me/projects/cpuinfo/prefix/lib/pkgconfig/:$PKG_CONFIG_PATH" pkg-config --cflags --libs libcpuinfo
+-I/home/me/projects/cpuinfo/prefix/include -L/home/me/projects/cpuinfo/prefix/lib -lcpuinfo
+```
+
+### GNU Autotools
+
+To [use](https://autotools.io/pkgconfig/pkg_check_modules.html) with the GNU Autotools include the following snippet in your project's `configure.ac`:
+
+```makefile
+# CPU INFOrmation library...
+PKG_CHECK_MODULES(
+    [libcpuinfo], [libcpuinfo], [],
+    [AC_MSG_ERROR([libcpuinfo missing...])])
+YOURPROJECT_CXXFLAGS="$YOURPROJECT_CXXFLAGS $libcpuinfo_CFLAGS"
+YOURPROJECT_LIBS="$YOURPROJECT_LIBS $libcpuinfo_LIBS"
+```
+
+### Meson
+
+To use with Meson you just need to add `dependency('libcpuinfo')` as a dependency for your executable.
+
+```meson
+project(
+    'MyCpuInfoProject',
+    'cpp',
+    meson_version: '>=0.55.0'
+)
+
+executable(
+    'MyCpuInfoExecutable',
+    sources: 'main.cpp',
+    dependencies: dependency('libcpuinfo')
+)
+```
+
+### Bazel
+
+This project can be built using [Bazel](https://bazel.build/install). 
+
+You can also use this library as a dependency to your Bazel project. Add to the `WORKSPACE` file:
+
+```python
+load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
+
+git_repository(
+    name = "org_pytorch_cpuinfo",
+    branch = "master",
+    remote = "https://github.com/Vertexwahn/cpuinfo.git",
+)
+```
+
+And to your `BUILD` file:
+
+```python
+cc_binary(
+    name = "cpuinfo_test",
+    srcs = [
+        # ...
+    ],
+    deps = [
+        "@org_pytorch_cpuinfo//:cpuinfo",
+    ],
+)
+```
+
+### CMake
+
+To use with CMake use the [FindPkgConfig](https://cmake.org/cmake/help/latest/module/FindPkgConfig.html) module. Here is an example:
+
+```cmake
+cmake_minimum_required(VERSION 3.6)
+project("MyCpuInfoProject")
+
+find_package(PkgConfig)
+pkg_check_modules(CpuInfo REQUIRED IMPORTED_TARGET libcpuinfo)
+
+add_executable(${PROJECT_NAME} main.cpp)
+target_link_libraries(${PROJECT_NAME} PkgConfig::CpuInfo)
+```
+
+### Makefile
+
+To use within a vanilla makefile, you can call pkg-config directly to supply compiler and linker flags using shell substitution.
+
+```makefile
+CFLAGS=-g3 -Wall -Wextra -Werror ...
+LDFLAGS=-lfoo ...
+...
+CFLAGS+= $(pkg-config --cflags libcpuinfo)
+LDFLAGS+= $(pkg-config --libs libcpuinfo)
+```
+
+## Exposed information
+- [x] Processor (SoC) name
+- [x] Microarchitecture
+- [x] Usable instruction sets
+- [ ] CPU frequency
+- [x] Cache
+  - [x] Size
+  - [x] Associativity
+  - [x] Line size
+  - [x] Number of partitions
+  - [x] Flags (unified, inclusive, complex hash function)
+  - [x] Topology (logical processors that share this cache level)
+- [ ] TLB
+  - [ ] Number of entries
+  - [ ] Associativity
+  - [ ] Covered page types (instruction, data)
+  - [ ] Covered page sizes
+- [x] Topology information
+  - [x] Logical processors
+  - [x] Cores
+  - [x] Packages (sockets)
+
+## Supported environments:
+- [x] Android
+  - [x] x86 ABI
+  - [x] x86_64 ABI
+  - [x] armeabi ABI
+  - [x] armeabiv7-a ABI
+  - [x] arm64-v8a ABI
+  - [ ] ~~mips ABI~~
+  - [ ] ~~mips64 ABI~~
+- [x] Linux
+  - [x] x86
+  - [x] x86-64
+  - [x] 32-bit ARM (ARMv5T and later)
+  - [x] ARM64
+  - [ ] PowerPC64
+- [x] iOS
+  - [x] x86 (iPhone simulator)
+  - [x] x86-64 (iPhone simulator)
+  - [x] ARMv7
+  - [x] ARM64
+- [x] macOS
+  - [x] x86
+  - [x] x86-64
+  - [x] ARM64 (Apple silicon)
+- [x] Windows
+  - [x] x86
+  - [x] x86-64
+  - [x] arm64
+
+## Methods
+
+- Processor (SoC) name detection
+  - [x] Using CPUID leaves 0x80000002–0x80000004 on x86/x86-64
+  - [x] Using `/proc/cpuinfo` on ARM
+  - [x] Using `ro.chipname`, `ro.board.platform`, `ro.product.board`, `ro.mediatek.platform`, `ro.arch` properties (Android)
+  - [ ] Using kernel log (`dmesg`) on ARM Linux
+  - [x] Using Windows registry on ARM64 Windows
+- Vendor and microarchitecture detection
+  - [x] Intel-designed x86/x86-64 cores (up to Sunny Cove, Goldmont Plus, and Knights Mill)
+  - [x] AMD-designed x86/x86-64 cores (up to Puma/Jaguar and Zen 2)
+  - [ ] VIA-designed x86/x86-64 cores
+  - [ ] Other x86 cores (DM&P, RDC, Transmeta, Cyrix, Rise)
+  - [x] ARM-designed ARM cores (up to Cortex-A55, Cortex-A77, and Neoverse E1/V1/N2/V2)
+  - [x] Qualcomm-designed ARM cores (Scorpion, Krait, and Kryo)
+  - [x] Nvidia-designed ARM cores (Denver and Carmel)
+  - [x] Samsung-designed ARM cores (Exynos)
+  - [x] Intel-designed ARM cores (XScale up to 3rd-gen)
+  - [x] Apple-designed ARM cores (up to Lightning and Thunder)
+  - [x] Cavium-designed ARM cores (ThunderX)
+  - [x] AppliedMicro-designed ARM cores (X-Gene)
+- Instruction set detection
+  - [x] Using CPUID (x86/x86-64)
+  - [x] Using `/proc/cpuinfo` on 32-bit ARM EABI (Linux)
+  - [x] Using microarchitecture heuristics on (32-bit ARM)
+  - [x] Using `FPSID` and `WCID` registers (32-bit ARM)
+  - [x] Using `getauxval` (Linux/ARM)
+  - [x] Using `/proc/self/auxv` (Android/ARM)
+  - [ ] Using instruction probing on ARM (Linux)
+  - [ ] Using CPUID registers on ARM64 (Linux)
+  - [x] Using IsProcessorFeaturePresent on ARM64 Windows
+- Cache detection
+  - [x] Using CPUID leaf 0x00000002 (x86/x86-64)
+  - [x] Using CPUID leaf 0x00000004 (non-AMD x86/x86-64)
+  - [ ] Using CPUID leaves 0x80000005-0x80000006 (AMD x86/x86-64)
+  - [x] Using CPUID leaf 0x8000001D (AMD x86/x86-64)
+  - [x] Using `/proc/cpuinfo` (Linux/pre-ARMv7)
+  - [x] Using microarchitecture heuristics (ARM)
+  - [x] Using chipset name (ARM)
+  - [x] Using `sysctlbyname` (Mach)
+  - [x] Using sysfs `typology` directories (ARM/Linux)
+  - [ ] Using sysfs `cache` directories (Linux)
+  - [x] Using `GetLogicalProcessorInformationEx` on ARM64 Windows
+- TLB detection
+  - [x] Using CPUID leaf 0x00000002 (x86/x86-64)
+  - [ ] Using CPUID leaves 0x80000005-0x80000006 and 0x80000019 (AMD x86/x86-64)
+  - [x] Using microarchitecture heuristics (ARM)
+- Topology detection
+  - [x] Using CPUID leaf 0x00000001 on x86/x86-64 (legacy APIC ID)
+  - [x] Using CPUID leaf 0x0000000B on x86/x86-64 (Intel APIC ID)
+  - [ ] Using CPUID leaf 0x8000001E on x86/x86-64 (AMD APIC ID)
+  - [x] Using `/proc/cpuinfo` (Linux)
+  - [x] Using `host_info` (Mach)
+  - [x] Using `GetLogicalProcessorInformationEx` (Windows)
+  - [x] Using sysfs (Linux)
+  - [x] Using chipset name (ARM/Linux)
+
--- a/3rdparty/cpuinfo/cmake/DownloadGoogleBenchmark.cmake
+++ b/3rdparty/cpuinfo/cmake/DownloadGoogleBenchmark.cmake
@@ -0,0 +1,15 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR)
+
+PROJECT(googlebenchmark-download NONE)
+
+INCLUDE(ExternalProject)
+ExternalProject_Add(googlebenchmark
+	URL https://github.com/google/benchmark/archive/v1.6.1.zip
+	URL_HASH SHA256=367e963b8620080aff8c831e24751852cffd1f74ea40f25d9cc1b667a9dd5e45
+	SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googlebenchmark"
+	BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark"
+	CONFIGURE_COMMAND ""
+	BUILD_COMMAND ""
+	INSTALL_COMMAND ""
+	TEST_COMMAND ""
+)
--- a/3rdparty/cpuinfo/cmake/DownloadGoogleTest.cmake
+++ b/3rdparty/cpuinfo/cmake/DownloadGoogleTest.cmake
@@ -0,0 +1,15 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR)
+
+PROJECT(googletest-download NONE)
+
+INCLUDE(ExternalProject)
+ExternalProject_Add(googletest
+	URL https://github.com/google/googletest/archive/release-1.11.0.zip
+	URL_HASH SHA256=353571c2440176ded91c2de6d6cd88ddd41401d14692ec1f99e35d013feda55a
+	SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest"
+	BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest"
+	CONFIGURE_COMMAND ""
+	BUILD_COMMAND ""
+	INSTALL_COMMAND ""
+	TEST_COMMAND ""
+)
--- a/3rdparty/cpuinfo/cmake/cpuinfo-config.cmake.in
+++ b/3rdparty/cpuinfo/cmake/cpuinfo-config.cmake.in
@@ -0,0 +1,12 @@
+@PACKAGE_INIT@
+
+get_filename_component(_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
+file(GLOB CONFIG_FILES "${_DIR}/cpuinfo-config-*.cmake")
+foreach(f ${CONFIG_FILES})
+  include(${f})
+endforeach()
+
+# ${_DIR}/cpuinfo-targets-*.cmake will be included here
+include("${_DIR}/cpuinfo-targets.cmake")
+
+check_required_components(@PROJECT_NAME@)
--- a/3rdparty/cpuinfo/cpuinfo.vcxproj
+++ b/3rdparty/cpuinfo/cpuinfo.vcxproj
@@ -0,0 +1,119 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$(SolutionDir)common\vsprops\BaseProjectConfig.props" />
+  <Import Project="$(SolutionDir)common\vsprops\WinSDK.props" />
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{7E183337-A7E9-460C-9D3D-568BC9F9BCC1}</ProjectGuid>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset Condition="!$(Configuration.Contains(Clang))">$(DefaultPlatformToolset)</PlatformToolset>
+    <PlatformToolset Condition="$(Configuration.Contains(Clang))">ClangCL</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization Condition="$(Configuration.Contains(Release))">true</WholeProgramOptimization>
+    <UseDebugLibraries Condition="$(Configuration.Contains(Debug))">true</UseDebugLibraries>
+    <UseDebugLibraries Condition="!$(Configuration.Contains(Debug))">false</UseDebugLibraries>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings" />
+  <ImportGroup Label="PropertySheets">
+    <Import Project="..\DefaultProjectRootDir.props" />
+    <Import Project="..\3rdparty.props" />
+    <Import Condition="$(Configuration.Contains(Debug))" Project="..\..\common\vsprops\CodeGen_Debug.props" />
+    <Import Condition="$(Configuration.Contains(Devel))" Project="..\..\common\vsprops\CodeGen_Devel.props" />
+    <Import Condition="$(Configuration.Contains(Release))" Project="..\..\common\vsprops\CodeGen_Release.props" />
+    <Import Condition="!$(Configuration.Contains(Release))" Project="..\..\common\vsprops\IncrementalLinking.props" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
+  </PropertyGroup>
+  <ItemGroup>
+    <ClCompile Include="src\arm\cache.c">
+      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="src\arm\uarch.c">
+      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="src\arm\windows\init-by-logical-sys-info.c">
+      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="src\arm\windows\init.c">
+      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="deps\clog\src\clog.c" />
+    <ClCompile Include="src\api.c" />
+    <ClCompile Include="src\cache.c" />
+    <ClCompile Include="src\init.c" />
+    <ClCompile Include="src\x86\cache\descriptor.c">
+      <ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="src\x86\cache\deterministic.c">
+      <ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="src\x86\cache\init.c">
+      <ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="src\x86\info.c">
+      <ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="src\x86\init.c">
+      <ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="src\x86\isa.c">
+      <ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="src\x86\name.c">
+      <ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="src\x86\topology.c">
+      <ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="src\x86\uarch.c">
+      <ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="src\x86\vendor.c">
+      <ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="src\x86\windows\init.c">
+      <ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="src\arm\api.h">
+      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
+    </ClInclude>
+    <ClInclude Include="src\arm\midr.h">
+      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
+    </ClInclude>
+    <ClInclude Include="src\arm\windows\windows-arm-init.h">
+      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
+    </ClInclude>
+    <ClInclude Include="deps\clog\include\clog.h" />
+    <ClInclude Include="include\cpuinfo.h" />
+    <ClInclude Include="src\cpuinfo\common.h" />
+    <ClInclude Include="src\cpuinfo\internal-api.h" />
+    <ClInclude Include="src\cpuinfo\log.h" />
+    <ClInclude Include="src\cpuinfo\utils.h" />
+    <ClInclude Include="src\x86\api.h">
+      <ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
+    </ClInclude>
+    <ClInclude Include="src\x86\cpuid.h">
+      <ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
+    </ClInclude>
+    <ClInclude Include="src\x86\windows\api.h">
+      <ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
+    </ClInclude>
+  </ItemGroup>
+  <ItemDefinitionGroup>
+    <ClCompile>
+      <PreprocessorDefinitions>CPUINFO_LOG_LEVEL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <WarningLevel>TurnOffAllWarnings</WarningLevel>
+      <AdditionalIncludeDirectories>$(ProjectDir)include;$(ProjectDir)src;$(ProjectDir)deps\clog\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <ObjectFileName>$(IntDir)%(RelativeDir)</ObjectFileName>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets" />
+</Project>
--- a/3rdparty/cpuinfo/cpuinfo.vcxproj.filters
+++ b/3rdparty/cpuinfo/cpuinfo.vcxproj.filters
@@ -0,0 +1,115 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="x86">
+      <UniqueIdentifier>{8fc9f543-ff04-48fb-ae1a-7c575a8aed13}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="x86\windows">
+      <UniqueIdentifier>{0b540baa-aafb-4e51-8cbf-b7e7c00d9a4d}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="x86\descriptor">
+      <UniqueIdentifier>{53ef3c40-8e03-46d1-aeb3-6446c40469da}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="cpuinfo">
+      <UniqueIdentifier>{26002d26-399a-41bb-93cb-42fb9be21c1f}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="clog">
+      <UniqueIdentifier>{7f0aba4c-ca06-4a7b-aed1-4f1e6976e839}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="arm">
+      <UniqueIdentifier>{ac4549d3-f60f-4e60-bf43-86d1c253cf3f}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="arm\windows">
+      <UniqueIdentifier>{41fcb23a-e77b-4b5c-8238-e9b92bf1f3c6}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="src\x86\isa.c">
+      <Filter>x86</Filter>
+    </ClCompile>
+    <ClCompile Include="src\x86\name.c">
+      <Filter>x86</Filter>
+    </ClCompile>
+    <ClCompile Include="src\x86\topology.c">
+      <Filter>x86</Filter>
+    </ClCompile>
+    <ClCompile Include="src\x86\uarch.c">
+      <Filter>x86</Filter>
+    </ClCompile>
+    <ClCompile Include="src\x86\vendor.c">
+      <Filter>x86</Filter>
+    </ClCompile>
+    <ClCompile Include="src\x86\info.c">
+      <Filter>x86</Filter>
+    </ClCompile>
+    <ClCompile Include="src\x86\init.c">
+      <Filter>x86</Filter>
+    </ClCompile>
+    <ClCompile Include="src\x86\windows\init.c">
+      <Filter>x86\windows</Filter>
+    </ClCompile>
+    <ClCompile Include="src\x86\cache\deterministic.c">
+      <Filter>x86\descriptor</Filter>
+    </ClCompile>
+    <ClCompile Include="src\x86\cache\init.c">
+      <Filter>x86\descriptor</Filter>
+    </ClCompile>
+    <ClCompile Include="src\x86\cache\descriptor.c">
+      <Filter>x86\descriptor</Filter>
+    </ClCompile>
+    <ClCompile Include="src\api.c" />
+    <ClCompile Include="src\cache.c" />
+    <ClCompile Include="src\init.c" />
+    <ClCompile Include="deps\clog\src\clog.c">
+      <Filter>clog</Filter>
+    </ClCompile>
+    <ClCompile Include="src\arm\cache.c">
+      <Filter>arm</Filter>
+    </ClCompile>
+    <ClCompile Include="src\arm\uarch.c">
+      <Filter>arm</Filter>
+    </ClCompile>
+    <ClCompile Include="src\arm\windows\init.c">
+      <Filter>arm\windows</Filter>
+    </ClCompile>
+    <ClCompile Include="src\arm\windows\init-by-logical-sys-info.c">
+      <Filter>arm\windows</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="src\x86\api.h">
+      <Filter>x86</Filter>
+    </ClInclude>
+    <ClInclude Include="src\x86\cpuid.h">
+      <Filter>x86</Filter>
+    </ClInclude>
+    <ClInclude Include="src\x86\windows\api.h">
+      <Filter>x86\windows</Filter>
+    </ClInclude>
+    <ClInclude Include="src\cpuinfo\internal-api.h">
+      <Filter>cpuinfo</Filter>
+    </ClInclude>
+    <ClInclude Include="src\cpuinfo\log.h">
+      <Filter>cpuinfo</Filter>
+    </ClInclude>
+    <ClInclude Include="src\cpuinfo\utils.h">
+      <Filter>cpuinfo</Filter>
+    </ClInclude>
+    <ClInclude Include="src\cpuinfo\common.h">
+      <Filter>cpuinfo</Filter>
+    </ClInclude>
+    <ClInclude Include="include\cpuinfo.h" />
+    <ClInclude Include="deps\clog\include\clog.h">
+      <Filter>clog</Filter>
+    </ClInclude>
+    <ClInclude Include="src\arm\api.h">
+      <Filter>arm</Filter>
+    </ClInclude>
+    <ClInclude Include="src\arm\midr.h">
+      <Filter>arm</Filter>
+    </ClInclude>
+    <ClInclude Include="src\arm\windows\windows-arm-init.h">
+      <Filter>arm\windows</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
--- a/3rdparty/cpuinfo/include/cpuinfo-mock.h
+++ b/3rdparty/cpuinfo/include/cpuinfo-mock.h
@@ -0,0 +1,76 @@
+#pragma once
+#ifndef CPUINFO_MOCK_H
+#define CPUINFO_MOCK_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <cpuinfo.h>
+#if defined(__linux__)
+#include <sys/types.h>
+#endif
+
+#if !defined(CPUINFO_MOCK) || !(CPUINFO_MOCK)
+#error This header is intended only for test use
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if CPUINFO_ARCH_ARM
+void CPUINFO_ABI cpuinfo_set_fpsid(uint32_t fpsid);
+void CPUINFO_ABI cpuinfo_set_wcid(uint32_t wcid);
+#endif /* CPUINFO_ARCH_ARM */
+
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+struct cpuinfo_mock_cpuid {
+	uint32_t input_eax;
+	uint32_t input_ecx;
+	uint32_t eax;
+	uint32_t ebx;
+	uint32_t ecx;
+	uint32_t edx;
+};
+
+void CPUINFO_ABI cpuinfo_mock_set_cpuid(struct cpuinfo_mock_cpuid* dump, size_t entries);
+void CPUINFO_ABI cpuinfo_mock_get_cpuid(uint32_t eax, uint32_t regs[4]);
+void CPUINFO_ABI cpuinfo_mock_get_cpuidex(uint32_t eax, uint32_t ecx, uint32_t regs[4]);
+#endif /* CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 */
+
+struct cpuinfo_mock_file {
+	const char* path;
+	size_t size;
+	const char* content;
+	size_t offset;
+};
+
+struct cpuinfo_mock_property {
+	const char* key;
+	const char* value;
+};
+
+#if defined(__linux__)
+void CPUINFO_ABI cpuinfo_mock_filesystem(struct cpuinfo_mock_file* files);
+int CPUINFO_ABI cpuinfo_mock_open(const char* path, int oflag);
+int CPUINFO_ABI cpuinfo_mock_close(int fd);
+ssize_t CPUINFO_ABI cpuinfo_mock_read(int fd, void* buffer, size_t capacity);
+
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+void CPUINFO_ABI cpuinfo_set_hwcap(uint32_t hwcap);
+#endif
+#if CPUINFO_ARCH_ARM
+void CPUINFO_ABI cpuinfo_set_hwcap2(uint64_t hwcap2);
+#endif
+#endif
+
+#if defined(__ANDROID__)
+void CPUINFO_ABI cpuinfo_mock_android_properties(struct cpuinfo_mock_property* properties);
+void CPUINFO_ABI cpuinfo_mock_gl_renderer(const char* renderer);
+#endif
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* CPUINFO_MOCK_H */
--- a/3rdparty/cpuinfo/include/cpuinfo.h
+++ b/3rdparty/cpuinfo/include/cpuinfo.h
--- a/3rdparty/cpuinfo/src/api.c
+++ b/3rdparty/cpuinfo/src/api.c
@@ -0,0 +1,416 @@
+#include <stdbool.h>
+#include <stddef.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+
+#ifdef __linux__
+#include <linux/api.h>
+
+#include <sys/syscall.h>
+#include <unistd.h>
+#if !defined(__NR_getcpu)
+#include <asm-generic/unistd.h>
+#endif
+#endif
+
+bool cpuinfo_is_initialized = false;
+
+struct cpuinfo_processor* cpuinfo_processors = NULL;
+struct cpuinfo_core* cpuinfo_cores = NULL;
+struct cpuinfo_cluster* cpuinfo_clusters = NULL;
+struct cpuinfo_package* cpuinfo_packages = NULL;
+struct cpuinfo_cache* cpuinfo_cache[cpuinfo_cache_level_max] = {NULL};
+
+uint32_t cpuinfo_processors_count = 0;
+uint32_t cpuinfo_cores_count = 0;
+uint32_t cpuinfo_clusters_count = 0;
+uint32_t cpuinfo_packages_count = 0;
+uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max] = {0};
+uint32_t cpuinfo_max_cache_size = 0;
+
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64
+struct cpuinfo_uarch_info* cpuinfo_uarchs = NULL;
+uint32_t cpuinfo_uarchs_count = 0;
+#else
+struct cpuinfo_uarch_info cpuinfo_global_uarch = {cpuinfo_uarch_unknown};
+#endif
+
+#ifdef __linux__
+uint32_t cpuinfo_linux_cpu_max = 0;
+const struct cpuinfo_processor** cpuinfo_linux_cpu_to_processor_map = NULL;
+const struct cpuinfo_core** cpuinfo_linux_cpu_to_core_map = NULL;
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64
+const uint32_t* cpuinfo_linux_cpu_to_uarch_index_map = NULL;
+#endif
+#endif
+
+const struct cpuinfo_processor* cpuinfo_get_processors(void) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "processors");
+	}
+	return cpuinfo_processors;
+}
+
+const struct cpuinfo_core* cpuinfo_get_cores(void) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "core");
+	}
+	return cpuinfo_cores;
+}
+
+const struct cpuinfo_cluster* cpuinfo_get_clusters(void) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "clusters");
+	}
+	return cpuinfo_clusters;
+}
+
+const struct cpuinfo_package* cpuinfo_get_packages(void) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "packages");
+	}
+	return cpuinfo_packages;
+}
+
+const struct cpuinfo_uarch_info* cpuinfo_get_uarchs() {
+	if (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarchs");
+	}
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64
+	return cpuinfo_uarchs;
+#else
+	return &cpuinfo_global_uarch;
+#endif
+}
+
+const struct cpuinfo_processor* cpuinfo_get_processor(uint32_t index) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "processor");
+	}
+	if CPUINFO_UNLIKELY (index >= cpuinfo_processors_count) {
+		return NULL;
+	}
+	return &cpuinfo_processors[index];
+}
+
+const struct cpuinfo_core* cpuinfo_get_core(uint32_t index) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "core");
+	}
+	if CPUINFO_UNLIKELY (index >= cpuinfo_cores_count) {
+		return NULL;
+	}
+	return &cpuinfo_cores[index];
+}
+
+const struct cpuinfo_cluster* cpuinfo_get_cluster(uint32_t index) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "cluster");
+	}
+	if CPUINFO_UNLIKELY (index >= cpuinfo_clusters_count) {
+		return NULL;
+	}
+	return &cpuinfo_clusters[index];
+}
+
+const struct cpuinfo_package* cpuinfo_get_package(uint32_t index) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "package");
+	}
+	if CPUINFO_UNLIKELY (index >= cpuinfo_packages_count) {
+		return NULL;
+	}
+	return &cpuinfo_packages[index];
+}
+
+const struct cpuinfo_uarch_info* cpuinfo_get_uarch(uint32_t index) {
+	if (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarch");
+	}
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64
+	if CPUINFO_UNLIKELY (index >= cpuinfo_uarchs_count) {
+		return NULL;
+	}
+	return &cpuinfo_uarchs[index];
+#else
+	if CPUINFO_UNLIKELY (index != 0) {
+		return NULL;
+	}
+	return &cpuinfo_global_uarch;
+#endif
+}
+
+uint32_t cpuinfo_get_processors_count(void) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "processors_count");
+	}
+	return cpuinfo_processors_count;
+}
+
+uint32_t cpuinfo_get_cores_count(void) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "cores_count");
+	}
+	return cpuinfo_cores_count;
+}
+
+uint32_t cpuinfo_get_clusters_count(void) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "clusters_count");
+	}
+	return cpuinfo_clusters_count;
+}
+
+uint32_t cpuinfo_get_packages_count(void) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "packages_count");
+	}
+	return cpuinfo_packages_count;
+}
+
+uint32_t cpuinfo_get_uarchs_count(void) {
+	if (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarchs_count");
+	}
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64
+	return cpuinfo_uarchs_count;
+#else
+	return 1;
+#endif
+}
+
+const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_caches(void) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1i_caches");
+	}
+	return cpuinfo_cache[cpuinfo_cache_level_1i];
+}
+
+const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1d_caches(void) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1d_caches");
+	}
+	return cpuinfo_cache[cpuinfo_cache_level_1d];
+}
+
+const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l2_caches(void) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l2_caches");
+	}
+	return cpuinfo_cache[cpuinfo_cache_level_2];
+}
+
+const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l3_caches(void) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l3_caches");
+	}
+	return cpuinfo_cache[cpuinfo_cache_level_3];
+}
+
+const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l4_caches(void) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l4_caches");
+	}
+	return cpuinfo_cache[cpuinfo_cache_level_4];
+}
+
+const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_cache(uint32_t index) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1i_cache");
+	}
+	if CPUINFO_UNLIKELY (index >= cpuinfo_cache_count[cpuinfo_cache_level_1i]) {
+		return NULL;
+	}
+	return &cpuinfo_cache[cpuinfo_cache_level_1i][index];
+}
+
+const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1d_cache(uint32_t index) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1d_cache");
+	}
+	if CPUINFO_UNLIKELY (index >= cpuinfo_cache_count[cpuinfo_cache_level_1d]) {
+		return NULL;
+	}
+	return &cpuinfo_cache[cpuinfo_cache_level_1d][index];
+}
+
+const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l2_cache(uint32_t index) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l2_cache");
+	}
+	if CPUINFO_UNLIKELY (index >= cpuinfo_cache_count[cpuinfo_cache_level_2]) {
+		return NULL;
+	}
+	return &cpuinfo_cache[cpuinfo_cache_level_2][index];
+}
+
+const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l3_cache(uint32_t index) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l3_cache");
+	}
+	if CPUINFO_UNLIKELY (index >= cpuinfo_cache_count[cpuinfo_cache_level_3]) {
+		return NULL;
+	}
+	return &cpuinfo_cache[cpuinfo_cache_level_3][index];
+}
+
+const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l4_cache(uint32_t index) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l4_cache");
+	}
+	if CPUINFO_UNLIKELY (index >= cpuinfo_cache_count[cpuinfo_cache_level_4]) {
+		return NULL;
+	}
+	return &cpuinfo_cache[cpuinfo_cache_level_4][index];
+}
+
+uint32_t CPUINFO_ABI cpuinfo_get_l1i_caches_count(void) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1i_caches_count");
+	}
+	return cpuinfo_cache_count[cpuinfo_cache_level_1i];
+}
+
+uint32_t CPUINFO_ABI cpuinfo_get_l1d_caches_count(void) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1d_caches_count");
+	}
+	return cpuinfo_cache_count[cpuinfo_cache_level_1d];
+}
+
+uint32_t CPUINFO_ABI cpuinfo_get_l2_caches_count(void) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l2_caches_count");
+	}
+	return cpuinfo_cache_count[cpuinfo_cache_level_2];
+}
+
+uint32_t CPUINFO_ABI cpuinfo_get_l3_caches_count(void) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l3_caches_count");
+	}
+	return cpuinfo_cache_count[cpuinfo_cache_level_3];
+}
+
+uint32_t CPUINFO_ABI cpuinfo_get_l4_caches_count(void) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l4_caches_count");
+	}
+	return cpuinfo_cache_count[cpuinfo_cache_level_4];
+}
+
+uint32_t CPUINFO_ABI cpuinfo_get_max_cache_size(void) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "max_cache_size");
+	}
+	return cpuinfo_max_cache_size;
+}
+
+const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_current_processor(void) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_processor");
+	}
+#ifdef __linux__
+	/* Initializing this variable silences a MemorySanitizer error. */
+	unsigned cpu = 0;
+	if CPUINFO_UNLIKELY (syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) {
+		return 0;
+	}
+	if CPUINFO_UNLIKELY ((uint32_t)cpu >= cpuinfo_linux_cpu_max) {
+		return 0;
+	}
+	return cpuinfo_linux_cpu_to_processor_map[cpu];
+#else
+	return NULL;
+#endif
+}
+
+const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_core");
+	}
+#ifdef __linux__
+	/* Initializing this variable silences a MemorySanitizer error. */
+	unsigned cpu = 0;
+	if CPUINFO_UNLIKELY (syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) {
+		return 0;
+	}
+	if CPUINFO_UNLIKELY ((uint32_t)cpu >= cpuinfo_linux_cpu_max) {
+		return 0;
+	}
+	return cpuinfo_linux_cpu_to_core_map[cpu];
+#else
+	return NULL;
+#endif
+}
+
+uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_uarch_index");
+	}
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64
+#ifdef __linux__
+	if (cpuinfo_linux_cpu_to_uarch_index_map == NULL) {
+		/* Special case: avoid syscall on systems with only a single
+		 * type of cores
+		 */
+		return 0;
+	}
+
+	/* General case */
+	/* Initializing this variable silences a MemorySanitizer error. */
+	unsigned cpu = 0;
+	if CPUINFO_UNLIKELY (syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) {
+		return 0;
+	}
+	if CPUINFO_UNLIKELY ((uint32_t)cpu >= cpuinfo_linux_cpu_max) {
+		return 0;
+	}
+	return cpuinfo_linux_cpu_to_uarch_index_map[cpu];
+#else
+	/* Fallback: pretend to be on the big core. */
+	return 0;
+#endif
+#else
+	/* Only ARM/ARM64/RISCV processors may include cores of different types
+	 * in the same package. */
+	return 0;
+#endif
+}
+
+uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index_with_default(uint32_t default_uarch_index) {
+	if CPUINFO_UNLIKELY (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal(
+			"cpuinfo_get_%s called before cpuinfo is initialized", "current_uarch_index_with_default");
+	}
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64
+#ifdef __linux__
+	if (cpuinfo_linux_cpu_to_uarch_index_map == NULL) {
+		/* Special case: avoid syscall on systems with only a single
+		 * type of cores
+		 */
+		return 0;
+	}
+
+	/* General case */
+	/* Initializing this variable silences a MemorySanitizer error. */
+	unsigned cpu = 0;
+	if CPUINFO_UNLIKELY (syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) {
+		return default_uarch_index;
+	}
+	if CPUINFO_UNLIKELY ((uint32_t)cpu >= cpuinfo_linux_cpu_max) {
+		return default_uarch_index;
+	}
+	return cpuinfo_linux_cpu_to_uarch_index_map[cpu];
+#else
+	/* Fallback: no API to query current core, use default uarch index. */
+	return default_uarch_index;
+#endif
+#else
+	/* Only ARM/ARM64/RISCV processors may include cores of different types
+	 * in the same package. */
+	return 0;
+#endif
+}
--- a/3rdparty/cpuinfo/src/arm/android/api.h
+++ b/3rdparty/cpuinfo/src/arm/android/api.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <arm/api.h>
+#include <arm/linux/api.h>
+#include <cpuinfo.h>
+#include <cpuinfo/common.h>
+
+enum cpuinfo_android_chipset_property {
+	cpuinfo_android_chipset_property_proc_cpuinfo_hardware = 0,
+	cpuinfo_android_chipset_property_ro_product_board,
+	cpuinfo_android_chipset_property_ro_board_platform,
+	cpuinfo_android_chipset_property_ro_mediatek_platform,
+	cpuinfo_android_chipset_property_ro_arch,
+	cpuinfo_android_chipset_property_ro_chipname,
+	cpuinfo_android_chipset_property_ro_hardware_chipname,
+	cpuinfo_android_chipset_property_max,
+};
+
+CPUINFO_INTERNAL void cpuinfo_arm_android_parse_properties(
+	struct cpuinfo_android_properties properties[restrict static 1]);
--- a/3rdparty/cpuinfo/src/arm/android/properties.c
+++ b/3rdparty/cpuinfo/src/arm/android/properties.c
@@ -0,0 +1,66 @@
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/system_properties.h>
+
+#include <arm/android/api.h>
+#include <arm/linux/api.h>
+#include <cpuinfo/log.h>
+#include <linux/api.h>
+
+#if CPUINFO_MOCK
+#include <cpuinfo-mock.h>
+
+static struct cpuinfo_mock_property* cpuinfo_mock_properties = NULL;
+
+void CPUINFO_ABI cpuinfo_mock_android_properties(struct cpuinfo_mock_property* properties) {
+	cpuinfo_log_info("Android properties mocking enabled");
+	cpuinfo_mock_properties = properties;
+}
+
+static int cpuinfo_android_property_get(const char* key, char* value) {
+	if (cpuinfo_mock_properties != NULL) {
+		for (const struct cpuinfo_mock_property* prop = cpuinfo_mock_properties; prop->key != NULL; prop++) {
+			if (strncmp(key, prop->key, CPUINFO_BUILD_PROP_NAME_MAX) == 0) {
+				strncpy(value, prop->value, CPUINFO_BUILD_PROP_VALUE_MAX);
+				return (int)strnlen(prop->value, CPUINFO_BUILD_PROP_VALUE_MAX);
+			}
+		}
+	}
+	*value = '\0';
+	return 0;
+}
+#else
+static inline int cpuinfo_android_property_get(const char* key, char* value) {
+	return __system_property_get(key, value);
+}
+#endif
+
+void cpuinfo_arm_android_parse_properties(struct cpuinfo_android_properties properties[restrict static 1]) {
+	const int ro_product_board_length =
+		cpuinfo_android_property_get("ro.product.board", properties->ro_product_board);
+	cpuinfo_log_debug("read ro.product.board = \"%.*s\"", ro_product_board_length, properties->ro_product_board);
+
+	const int ro_board_platform_length =
+		cpuinfo_android_property_get("ro.board.platform", properties->ro_board_platform);
+	cpuinfo_log_debug("read ro.board.platform = \"%.*s\"", ro_board_platform_length, properties->ro_board_platform);
+
+	const int ro_mediatek_platform_length =
+		cpuinfo_android_property_get("ro.mediatek.platform", properties->ro_mediatek_platform);
+	cpuinfo_log_debug(
+		"read ro.mediatek.platform = \"%.*s\"", ro_mediatek_platform_length, properties->ro_mediatek_platform);
+
+	const int ro_arch_length = cpuinfo_android_property_get("ro.arch", properties->ro_arch);
+	cpuinfo_log_debug("read ro.arch = \"%.*s\"", ro_arch_length, properties->ro_arch);
+
+	const int ro_chipname_length = cpuinfo_android_property_get("ro.chipname", properties->ro_chipname);
+	cpuinfo_log_debug("read ro.chipname = \"%.*s\"", ro_chipname_length, properties->ro_chipname);
+
+	const int ro_hardware_chipname_length =
+		cpuinfo_android_property_get("ro.hardware.chipname", properties->ro_hardware_chipname);
+	cpuinfo_log_debug(
+		"read ro.hardware.chipname = \"%.*s\"", ro_hardware_chipname_length, properties->ro_hardware_chipname);
+}
--- a/3rdparty/cpuinfo/src/arm/api.h
+++ b/3rdparty/cpuinfo/src/arm/api.h
@@ -0,0 +1,133 @@
+#pragma once
+
+#ifdef _MSC_VER
+#define RESTRICT_STATIC /* nothing for MSVC */
+#else
+#define RESTRICT_STATIC restrict static
+#endif
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/common.h>
+
+enum cpuinfo_arm_chipset_vendor {
+	cpuinfo_arm_chipset_vendor_unknown = 0,
+	cpuinfo_arm_chipset_vendor_qualcomm,
+	cpuinfo_arm_chipset_vendor_mediatek,
+	cpuinfo_arm_chipset_vendor_samsung,
+	cpuinfo_arm_chipset_vendor_hisilicon,
+	cpuinfo_arm_chipset_vendor_actions,
+	cpuinfo_arm_chipset_vendor_allwinner,
+	cpuinfo_arm_chipset_vendor_amlogic,
+	cpuinfo_arm_chipset_vendor_broadcom,
+	cpuinfo_arm_chipset_vendor_lg,
+	cpuinfo_arm_chipset_vendor_leadcore,
+	cpuinfo_arm_chipset_vendor_marvell,
+	cpuinfo_arm_chipset_vendor_mstar,
+	cpuinfo_arm_chipset_vendor_novathor,
+	cpuinfo_arm_chipset_vendor_nvidia,
+	cpuinfo_arm_chipset_vendor_pinecone,
+	cpuinfo_arm_chipset_vendor_renesas,
+	cpuinfo_arm_chipset_vendor_rockchip,
+	cpuinfo_arm_chipset_vendor_spreadtrum,
+	cpuinfo_arm_chipset_vendor_telechips,
+	cpuinfo_arm_chipset_vendor_texas_instruments,
+	cpuinfo_arm_chipset_vendor_unisoc,
+	cpuinfo_arm_chipset_vendor_wondermedia,
+	cpuinfo_arm_chipset_vendor_max,
+};
+
+enum cpuinfo_arm_chipset_series {
+	cpuinfo_arm_chipset_series_unknown = 0,
+	cpuinfo_arm_chipset_series_qualcomm_qsd,
+	cpuinfo_arm_chipset_series_qualcomm_msm,
+	cpuinfo_arm_chipset_series_qualcomm_apq,
+	cpuinfo_arm_chipset_series_qualcomm_snapdragon,
+	cpuinfo_arm_chipset_series_mediatek_mt,
+	cpuinfo_arm_chipset_series_samsung_exynos,
+	cpuinfo_arm_chipset_series_hisilicon_k3v,
+	cpuinfo_arm_chipset_series_hisilicon_hi,
+	cpuinfo_arm_chipset_series_hisilicon_kirin,
+	cpuinfo_arm_chipset_series_actions_atm,
+	cpuinfo_arm_chipset_series_allwinner_a,
+	cpuinfo_arm_chipset_series_amlogic_aml,
+	cpuinfo_arm_chipset_series_amlogic_s,
+	cpuinfo_arm_chipset_series_broadcom_bcm,
+	cpuinfo_arm_chipset_series_lg_nuclun,
+	cpuinfo_arm_chipset_series_leadcore_lc,
+	cpuinfo_arm_chipset_series_marvell_pxa,
+	cpuinfo_arm_chipset_series_mstar_6a,
+	cpuinfo_arm_chipset_series_novathor_u,
+	cpuinfo_arm_chipset_series_nvidia_tegra_t,
+	cpuinfo_arm_chipset_series_nvidia_tegra_ap,
+	cpuinfo_arm_chipset_series_nvidia_tegra_sl,
+	cpuinfo_arm_chipset_series_pinecone_surge_s,
+	cpuinfo_arm_chipset_series_renesas_mp,
+	cpuinfo_arm_chipset_series_rockchip_rk,
+	cpuinfo_arm_chipset_series_spreadtrum_sc,
+	cpuinfo_arm_chipset_series_telechips_tcc,
+	cpuinfo_arm_chipset_series_texas_instruments_omap,
+	cpuinfo_arm_chipset_series_unisoc_t,
+	cpuinfo_arm_chipset_series_unisoc_ums,
+	cpuinfo_arm_chipset_series_wondermedia_wm,
+	cpuinfo_arm_chipset_series_max,
+};
+
+#define CPUINFO_ARM_CHIPSET_SUFFIX_MAX 8
+
+struct cpuinfo_arm_chipset {
+	enum cpuinfo_arm_chipset_vendor vendor;
+	enum cpuinfo_arm_chipset_series series;
+	uint32_t model;
+	char suffix[CPUINFO_ARM_CHIPSET_SUFFIX_MAX];
+};
+
+#define CPUINFO_ARM_CHIPSET_NAME_MAX CPUINFO_PACKAGE_NAME_MAX
+
+#ifndef __cplusplus
+CPUINFO_INTERNAL void cpuinfo_arm_chipset_to_string(
+	const struct cpuinfo_arm_chipset chipset[RESTRICT_STATIC 1],
+	char name[RESTRICT_STATIC CPUINFO_ARM_CHIPSET_NAME_MAX]);
+
+CPUINFO_INTERNAL void cpuinfo_arm_fixup_chipset(
+	struct cpuinfo_arm_chipset chipset[RESTRICT_STATIC 1],
+	uint32_t cores,
+	uint32_t max_cpu_freq_max);
+
+CPUINFO_INTERNAL void cpuinfo_arm_decode_vendor_uarch(
+	uint32_t midr,
+#if CPUINFO_ARCH_ARM
+	bool has_vfpv4,
+#endif
+	enum cpuinfo_vendor vendor[RESTRICT_STATIC 1],
+	enum cpuinfo_uarch uarch[RESTRICT_STATIC 1]);
+
+CPUINFO_INTERNAL void cpuinfo_arm_decode_cache(
+	enum cpuinfo_uarch uarch,
+	uint32_t cluster_cores,
+	uint32_t midr,
+	const struct cpuinfo_arm_chipset chipset[RESTRICT_STATIC 1],
+	uint32_t cluster_id,
+	uint32_t arch_version,
+	struct cpuinfo_cache l1i[RESTRICT_STATIC 1],
+	struct cpuinfo_cache l1d[RESTRICT_STATIC 1],
+	struct cpuinfo_cache l2[RESTRICT_STATIC 1],
+	struct cpuinfo_cache l3[RESTRICT_STATIC 1]);
+
+CPUINFO_INTERNAL uint32_t
+cpuinfo_arm_compute_max_cache_size(const struct cpuinfo_processor processor[RESTRICT_STATIC 1]);
+#else /* defined(__cplusplus) */
+CPUINFO_INTERNAL void cpuinfo_arm_decode_cache(
+	enum cpuinfo_uarch uarch,
+	uint32_t cluster_cores,
+	uint32_t midr,
+	const struct cpuinfo_arm_chipset chipset[1],
+	uint32_t cluster_id,
+	uint32_t arch_version,
+	struct cpuinfo_cache l1i[1],
+	struct cpuinfo_cache l1d[1],
+	struct cpuinfo_cache l2[1],
+	struct cpuinfo_cache l3[1]);
+#endif
--- a/3rdparty/cpuinfo/src/arm/cache.c
+++ b/3rdparty/cpuinfo/src/arm/cache.c
--- a/3rdparty/cpuinfo/src/arm/linux/aarch32-isa.c
+++ b/3rdparty/cpuinfo/src/arm/linux/aarch32-isa.c
@@ -0,0 +1,364 @@
+#include <stdint.h>
+
+#if CPUINFO_MOCK
+#include <cpuinfo-mock.h>
+#endif
+#include <arm/linux/api.h>
+#include <arm/linux/cp.h>
+#include <arm/midr.h>
+#include <cpuinfo/log.h>
+
+#if CPUINFO_MOCK
+uint32_t cpuinfo_arm_fpsid = 0;
+uint32_t cpuinfo_arm_mvfr0 = 0;
+uint32_t cpuinfo_arm_wcid = 0;
+
+void cpuinfo_set_fpsid(uint32_t fpsid) {
+	cpuinfo_arm_fpsid = fpsid;
+}
+
+void cpuinfo_set_wcid(uint32_t wcid) {
+	cpuinfo_arm_wcid = wcid;
+}
+#endif
+
+void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
+	uint32_t features,
+	uint64_t features2,
+	uint32_t midr,
+	uint32_t architecture_version,
+	uint32_t architecture_flags,
+	const struct cpuinfo_arm_chipset chipset[restrict static 1],
+	struct cpuinfo_arm_isa isa[restrict static 1]) {
+	if (architecture_version < 8) {
+		const uint32_t armv8_features2_mask = CPUINFO_ARM_LINUX_FEATURE2_AES |
+			CPUINFO_ARM_LINUX_FEATURE2_PMULL | CPUINFO_ARM_LINUX_FEATURE2_SHA1 |
+			CPUINFO_ARM_LINUX_FEATURE2_SHA2 | CPUINFO_ARM_LINUX_FEATURE2_CRC32;
+		if (features2 & armv8_features2_mask) {
+			architecture_version = 8;
+		}
+	}
+	if (architecture_version >= 8) {
+		/*
+		 * ARMv7 code running on ARMv8: IDIV, VFP, NEON are always
+		 * supported, but may be not reported in /proc/cpuinfo features.
+		 */
+		isa->armv5e = true;
+		isa->armv6 = true;
+		isa->armv6k = true;
+		isa->armv7 = true;
+		isa->armv7mp = true;
+		isa->armv8 = true;
+		isa->thumb = true;
+		isa->thumb2 = true;
+		isa->idiv = true;
+		isa->vfpv3 = true;
+		isa->d32 = true;
+		isa->fp16 = true;
+		isa->fma = true;
+		isa->neon = true;
+
+		/*
+		 * NEON FP16 compute extension and VQRDMLAH/VQRDMLSH
+		 * instructions are not indicated in /proc/cpuinfo. Use a
+		 * MIDR-based heuristic to whitelist processors known to support
+		 * it:
+		 * - Processors with Cortex-A55 cores
+		 * - Processors with Cortex-A75 cores
+		 * - Processors with Cortex-A76 cores
+		 * - Processors with Cortex-A77 cores
+		 * - Processors with Cortex-A78 cores
+		 * - Processors with Cortex-A510 cores
+		 * - Processors with Cortex-A710 cores
+		 * - Processors with Cortex-A715 cores
+		 * - Processors with Cortex-X1 cores
+		 * - Processors with Cortex-X2 cores
+		 * - Processors with Cortex-X3 cores
+		 * - Processors with Exynos M4 cores
+		 * - Processors with Exynos M5 cores
+		 * - Neoverse N1 cores
+		 * - Neoverse N2 cores
+		 * - Neoverse V1 cores
+		 * - Neoverse V2 cores
+		 */
+		if (chipset->series == cpuinfo_arm_chipset_series_samsung_exynos && chipset->model == 9810) {
+			/* Only little cores of Exynos 9810 support FP16 & RDM
+			 */
+			cpuinfo_log_warning(
+				"FP16 arithmetics and RDM disabled: only little cores in Exynos 9810 support these extensions");
+		} else {
+			switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
+				case UINT32_C(0x4100D050): /* Cortex-A55 */
+				case UINT32_C(0x4100D0A0): /* Cortex-A75 */
+				case UINT32_C(0x4100D0B0): /* Cortex-A76 */
+				case UINT32_C(0x4100D0C0): /* Neoverse N1 */
+				case UINT32_C(0x4100D0D0): /* Cortex-A77 */
+				case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
+				case UINT32_C(0x4100D400): /* Neoverse V1 */
+				case UINT32_C(0x4100D410): /* Cortex-A78 */
+				case UINT32_C(0x4100D440): /* Cortex-X1 */
+				case UINT32_C(0x4100D460): /* Cortex-A510 */
+				case UINT32_C(0x4100D470): /* Cortex-A710 */
+				case UINT32_C(0x4100D480): /* Cortex-X2 */
+				case UINT32_C(0x4100D490): /* Neoverse N2 */
+				case UINT32_C(0x4100D4D0): /* Cortex-A715 */
+				case UINT32_C(0x4100D4E0): /* Cortex-X3 */
+				case UINT32_C(0x4100D4F0): /* Neoverse V2 */
+				case UINT32_C(0x4800D400): /* Cortex-A76
+							      (HiSilicon) */
+				case UINT32_C(0x51008020): /* Kryo 385 Gold
+							      (Cortex-A75) */
+				case UINT32_C(0x51008030): /* Kryo 385 Silver
+							      (Cortex-A55) */
+				case UINT32_C(0x51008040): /* Kryo 485 Gold
+							      (Cortex-A76) */
+				case UINT32_C(0x51008050): /* Kryo 485 Silver
+							      (Cortex-A55) */
+				case UINT32_C(0x53000030): /* Exynos M4 */
+				case UINT32_C(0x53000040): /* Exynos M5 */
+					isa->fp16arith = true;
+					isa->rdm = true;
+					break;
+			}
+		}
+
+		/*
+		 * NEON VDOT instructions are not indicated in /proc/cpuinfo.
+		 * Use a MIDR-based heuristic to whitelist processors known to
+		 * support it:
+		 * - Processors with Cortex-A76 cores
+		 * - Processors with Cortex-A77 cores
+		 * - Processors with Cortex-A78 cores
+		 * - Processors with Cortex-A510 cores
+		 * - Processors with Cortex-A710 cores
+		 * - Processors with Cortex-A715 cores
+		 * - Processors with Cortex-X1 cores
+		 * - Processors with Cortex-X2 cores
+		 * - Processors with Cortex-X3 cores
+		 * - Processors with Exynos M4 cores
+		 * - Processors with Exynos M5 cores
+		 * - Neoverse N1 cores
+		 * - Neoverse N2 cores
+		 * - Neoverse V1 cores
+		 * - Neoverse V2 cores
+		 */
+		if (chipset->series == cpuinfo_arm_chipset_series_spreadtrum_sc && chipset->model == 9863) {
+			cpuinfo_log_warning(
+				"VDOT instructions disabled: cause occasional SIGILL on Spreadtrum SC9863A");
+		} else if (chipset->series == cpuinfo_arm_chipset_series_unisoc_t && chipset->model == 310) {
+			cpuinfo_log_warning("VDOT instructions disabled: cause occasional SIGILL on Unisoc T310");
+		} else if (chipset->series == cpuinfo_arm_chipset_series_unisoc_ums && chipset->model == 312) {
+			cpuinfo_log_warning("VDOT instructions disabled: cause occasional SIGILL on Unisoc UMS312");
+		} else {
+			switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
+				case UINT32_C(0x4100D0B0): /* Cortex-A76 */
+				case UINT32_C(0x4100D0C0): /* Neoverse N1 */
+				case UINT32_C(0x4100D0D0): /* Cortex-A77 */
+				case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
+				case UINT32_C(0x4100D400): /* Neoverse V1 */
+				case UINT32_C(0x4100D410): /* Cortex-A78 */
+				case UINT32_C(0x4100D440): /* Cortex-X1 */
+				case UINT32_C(0x4100D460): /* Cortex-A510 */
+				case UINT32_C(0x4100D470): /* Cortex-A710 */
+				case UINT32_C(0x4100D480): /* Cortex-X2 */
+				case UINT32_C(0x4100D490): /* Neoverse N2 */
+				case UINT32_C(0x4100D4D0): /* Cortex-A715 */
+				case UINT32_C(0x4100D4E0): /* Cortex-X3 */
+				case UINT32_C(0x4100D4F0): /* Neoverse V2 */
+				case UINT32_C(0x4800D400): /* Cortex-A76
+							      (HiSilicon) */
+				case UINT32_C(0x51008040): /* Kryo 485 Gold
+							      (Cortex-A76) */
+				case UINT32_C(0x51008050): /* Kryo 485 Silver
+							      (Cortex-A55) */
+				case UINT32_C(0x53000030): /* Exynos M4 */
+				case UINT32_C(0x53000040): /* Exynos M5 */
+					isa->dot = true;
+					break;
+				case UINT32_C(0x4100D050): /* Cortex A55: revision 1
+							      or later only */
+					isa->dot = !!(midr_get_variant(midr) >= 1);
+					break;
+				case UINT32_C(0x4100D0A0): /* Cortex A75: revision 2
+							      or later only */
+					isa->dot = !!(midr_get_variant(midr) >= 2);
+					break;
+			}
+		}
+	} else {
+		/* ARMv7 or lower: use feature flags to detect optional features
+		 */
+
+		/*
+		 * ARM11 (ARM 1136/1156/1176/11 MPCore) processors can report v7
+		 * architecture even though they support only ARMv6 instruction
+		 * set.
+		 */
+		if (architecture_version == 7 && midr_is_arm11(midr)) {
+			cpuinfo_log_warning(
+				"kernel-reported architecture ARMv7 ignored due to mismatch with processor microarchitecture (ARM11)");
+			architecture_version = 6;
+		}
+
+		if (architecture_version < 7) {
+			const uint32_t armv7_features_mask = CPUINFO_ARM_LINUX_FEATURE_VFPV3 |
+				CPUINFO_ARM_LINUX_FEATURE_VFPV3D16 | CPUINFO_ARM_LINUX_FEATURE_VFPD32 |
+				CPUINFO_ARM_LINUX_FEATURE_VFPV4 | CPUINFO_ARM_LINUX_FEATURE_NEON |
+				CPUINFO_ARM_LINUX_FEATURE_IDIVT | CPUINFO_ARM_LINUX_FEATURE_IDIVA;
+			if (features & armv7_features_mask) {
+				architecture_version = 7;
+			}
+		}
+		if ((architecture_version >= 6) || (features & CPUINFO_ARM_LINUX_FEATURE_EDSP) ||
+		    (architecture_flags & CPUINFO_ARM_LINUX_ARCH_E)) {
+			isa->armv5e = true;
+		}
+		if (architecture_version >= 6) {
+			isa->armv6 = true;
+		}
+		if (architecture_version >= 7) {
+			isa->armv6k = true;
+			isa->armv7 = true;
+
+			/*
+			 * ARMv7 MP extension (PLDW instruction) is not
+			 * indicated in /proc/cpuinfo. Use heuristic list of
+			 * supporting processors:
+			 * - Processors supporting UDIV/SDIV instructions
+			 * ("idiva" + "idivt" features in /proc/cpuinfo)
+			 * - Cortex-A5
+			 * - Cortex-A9
+			 * - Dual-Core Scorpion
+			 * - Krait (supports UDIV/SDIV, but kernels may not
+			 * report it in /proc/cpuinfo)
+			 *
+			 * TODO: check single-core Qualcomm Scorpion.
+			 */
+			switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
+				case UINT32_C(0x4100C050): /* Cortex-A5 */
+				case UINT32_C(0x4100C090): /* Cortex-A9 */
+				case UINT32_C(0x510002D0): /* Scorpion (dual-core) */
+				case UINT32_C(0x510004D0): /* Krait (dual-core) */
+				case UINT32_C(0x510006F0): /* Krait (quad-core) */
+					isa->armv7mp = true;
+					break;
+				default:
+					/* In practice IDIV instruction implies
+					 * ARMv7+MP ISA */
+					isa->armv7mp = (features & CPUINFO_ARM_LINUX_FEATURE_IDIV) ==
+						CPUINFO_ARM_LINUX_FEATURE_IDIV;
+					break;
+			}
+		}
+
+		if (features & CPUINFO_ARM_LINUX_FEATURE_IWMMXT) {
+#if !defined(__ARM_ARCH_8A__) && !(defined(__ARM_ARCH) && (__ARM_ARCH >= 8))
+			const uint32_t wcid = read_wcid();
+			cpuinfo_log_debug("WCID = 0x%08" PRIx32, wcid);
+			const uint32_t coprocessor_type = (wcid >> 8) & UINT32_C(0xFF);
+			if (coprocessor_type >= 0x10) {
+				isa->wmmx = true;
+				if (coprocessor_type >= 0x20) {
+					isa->wmmx2 = true;
+				}
+			} else {
+				cpuinfo_log_warning(
+					"WMMX ISA disabled: OS reported iwmmxt feature, "
+					"but WCID coprocessor type 0x%" PRIx32 " indicates no WMMX support",
+					coprocessor_type);
+			}
+#else
+			cpuinfo_log_warning(
+				"WMMX ISA disabled: OS reported iwmmxt feature, "
+				"but there is no iWMMXt coprocessor");
+#endif
+		}
+
+		if ((features & CPUINFO_ARM_LINUX_FEATURE_THUMB) || (architecture_flags & CPUINFO_ARM_LINUX_ARCH_T)) {
+			isa->thumb = true;
+
+			/*
+			 * There is no separate feature flag for Thumb 2.
+			 * All ARMv7 processors and ARM 1156 support Thumb 2.
+			 */
+			if (architecture_version >= 7 || midr_is_arm1156(midr)) {
+				isa->thumb2 = true;
+			}
+		}
+		if (features & CPUINFO_ARM_LINUX_FEATURE_THUMBEE) {
+			isa->thumbee = true;
+		}
+		if ((features & CPUINFO_ARM_LINUX_FEATURE_JAVA) || (architecture_flags & CPUINFO_ARM_LINUX_ARCH_J)) {
+			isa->jazelle = true;
+		}
+
+		/* Qualcomm Krait may have buggy kernel configuration that
+		 * doesn't report IDIV */
+		if ((features & CPUINFO_ARM_LINUX_FEATURE_IDIV) == CPUINFO_ARM_LINUX_FEATURE_IDIV ||
+		    midr_is_krait(midr)) {
+			isa->idiv = true;
+		}
+
+		const uint32_t vfp_mask = CPUINFO_ARM_LINUX_FEATURE_VFP | CPUINFO_ARM_LINUX_FEATURE_VFPV3 |
+			CPUINFO_ARM_LINUX_FEATURE_VFPV3D16 | CPUINFO_ARM_LINUX_FEATURE_VFPD32 |
+			CPUINFO_ARM_LINUX_FEATURE_VFPV4 | CPUINFO_ARM_LINUX_FEATURE_NEON;
+		if (features & vfp_mask) {
+			const uint32_t vfpv3_mask = CPUINFO_ARM_LINUX_FEATURE_VFPV3 |
+				CPUINFO_ARM_LINUX_FEATURE_VFPV3D16 | CPUINFO_ARM_LINUX_FEATURE_VFPD32 |
+				CPUINFO_ARM_LINUX_FEATURE_VFPV4 | CPUINFO_ARM_LINUX_FEATURE_NEON;
+			if ((architecture_version >= 7) || (features & vfpv3_mask)) {
+				isa->vfpv3 = true;
+
+				const uint32_t d32_mask =
+					CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_NEON;
+				if (features & d32_mask) {
+					isa->d32 = true;
+				}
+			} else {
+#if defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_8A__) || defined(__ARM_ARCH) && (__ARM_ARCH >= 7)
+				isa->vfpv3 = true;
+#else
+				const uint32_t fpsid = read_fpsid();
+				cpuinfo_log_debug("FPSID = 0x%08" PRIx32, fpsid);
+				const uint32_t subarchitecture = (fpsid >> 16) & UINT32_C(0x7F);
+				if (subarchitecture >= 0x01) {
+					isa->vfpv2 = true;
+				}
+#endif
+			}
+		}
+		if (features & CPUINFO_ARM_LINUX_FEATURE_NEON) {
+			isa->neon = true;
+		}
+
+		/*
+		 * There is no separate feature flag for FP16 support.
+		 * VFPv4 implies VFPv3-FP16 support (and in practice, NEON-HP as
+		 * well). Additionally, ARM Cortex-A9 and Qualcomm Scorpion
+		 * support FP16.
+		 */
+		if ((features & CPUINFO_ARM_LINUX_FEATURE_VFPV4) || midr_is_cortex_a9(midr) || midr_is_scorpion(midr)) {
+			isa->fp16 = true;
+		}
+
+		if (features & CPUINFO_ARM_LINUX_FEATURE_VFPV4) {
+			isa->fma = true;
+		}
+	}
+
+	if (features2 & CPUINFO_ARM_LINUX_FEATURE2_AES) {
+		isa->aes = true;
+	}
+	if (features2 & CPUINFO_ARM_LINUX_FEATURE2_PMULL) {
+		isa->pmull = true;
+	}
+	if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SHA1) {
+		isa->sha1 = true;
+	}
+	if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SHA2) {
+		isa->sha2 = true;
+	}
+	if (features2 & CPUINFO_ARM_LINUX_FEATURE2_CRC32) {
+		isa->crc32 = true;
+	}
+}
--- a/3rdparty/cpuinfo/src/arm/linux/aarch64-isa.c
+++ b/3rdparty/cpuinfo/src/arm/linux/aarch64-isa.c
@@ -0,0 +1,194 @@
+#include <stdint.h>
+
+#include <arm/linux/api.h>
+#include <cpuinfo/log.h>
+
+#include <sys/prctl.h>
+
+void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
+	uint32_t features,
+	uint64_t features2,
+	uint32_t midr,
+	const struct cpuinfo_arm_chipset chipset[restrict static 1],
+	struct cpuinfo_arm_isa isa[restrict static 1]) {
+	if (features & CPUINFO_ARM_LINUX_FEATURE_AES) {
+		isa->aes = true;
+	}
+	if (features & CPUINFO_ARM_LINUX_FEATURE_PMULL) {
+		isa->pmull = true;
+	}
+	if (features & CPUINFO_ARM_LINUX_FEATURE_SHA1) {
+		isa->sha1 = true;
+	}
+	if (features & CPUINFO_ARM_LINUX_FEATURE_SHA2) {
+		isa->sha2 = true;
+	}
+	if (features & CPUINFO_ARM_LINUX_FEATURE_CRC32) {
+		isa->crc32 = true;
+	}
+	if (features & CPUINFO_ARM_LINUX_FEATURE_ATOMICS) {
+		isa->atomics = true;
+	}
+
+	/*
+	 * Some phones ship with an old kernel configuration that doesn't report
+	 * NEON FP16 compute extension and SQRDMLAH/SQRDMLSH/UQRDMLAH/UQRDMLSH
+	 * instructions. Use a MIDR-based heuristic to whitelist processors
+	 * known to support it:
+	 * - Processors with Cortex-A55 cores
+	 * - Processors with Cortex-A65 cores
+	 * - Processors with Cortex-A75 cores
+	 * - Processors with Cortex-A76 cores
+	 * - Processors with Cortex-A77 cores
+	 * - Processors with Exynos M4 cores
+	 * - Processors with Exynos M5 cores
+	 * - Neoverse N1 cores
+	 * - Neoverse V1 cores
+	 * - Neoverse N2 cores
+	 * - Neoverse V2 cores
+	 */
+	if (chipset->series == cpuinfo_arm_chipset_series_samsung_exynos && chipset->model == 9810) {
+		/* Exynos 9810 reports that it supports FP16 compute, but in
+		 * fact only little cores do */
+		cpuinfo_log_warning(
+			"FP16 arithmetics and RDM disabled: only little cores in Exynos 9810 support these extensions");
+	} else {
+		const uint32_t fp16arith_mask = CPUINFO_ARM_LINUX_FEATURE_FPHP | CPUINFO_ARM_LINUX_FEATURE_ASIMDHP;
+		switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
+			case UINT32_C(0x4100D050): /* Cortex-A55 */
+			case UINT32_C(0x4100D060): /* Cortex-A65 */
+			case UINT32_C(0x4100D0A0): /* Cortex-A75 */
+			case UINT32_C(0x4100D0B0): /* Cortex-A76 */
+			case UINT32_C(0x4100D0C0): /* Neoverse N1 */
+			case UINT32_C(0x4100D0D0): /* Cortex-A77 */
+			case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
+			case UINT32_C(0x4100D400): /* Neoverse V1 */
+			case UINT32_C(0x4100D490): /* Neoverse N2 */
+			case UINT32_C(0x4100D4F0): /* Neoverse V2 */
+			case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
+			case UINT32_C(0x51008020): /* Kryo 385 Gold (Cortex-A75) */
+			case UINT32_C(0x51008030): /* Kryo 385 Silver (Cortex-A55) */
+			case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */
+			case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */
+			case UINT32_C(0x53000030): /* Exynos M4 */
+			case UINT32_C(0x53000040): /* Exynos M5 */
+				isa->fp16arith = true;
+				isa->rdm = true;
+				break;
+			default:
+				if ((features & fp16arith_mask) == fp16arith_mask) {
+					isa->fp16arith = true;
+				} else if (features & CPUINFO_ARM_LINUX_FEATURE_FPHP) {
+					cpuinfo_log_warning(
+						"FP16 arithmetics disabled: detected support only for scalar operations");
+				} else if (features & CPUINFO_ARM_LINUX_FEATURE_ASIMDHP) {
+					cpuinfo_log_warning(
+						"FP16 arithmetics disabled: detected support only for SIMD operations");
+				}
+				if (features & CPUINFO_ARM_LINUX_FEATURE_ASIMDRDM) {
+					isa->rdm = true;
+				}
+				break;
+		}
+	}
+	if (features2 & CPUINFO_ARM_LINUX_FEATURE2_I8MM) {
+		isa->i8mm = true;
+	}
+
+	/*
+	 * Many phones ship with an old kernel configuration that doesn't report
+	 * UDOT/SDOT instructions. Use a MIDR-based heuristic to whitelist
+	 * processors known to support it.
+	 */
+	switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
+		case UINT32_C(0x4100D060): /* Cortex-A65 */
+		case UINT32_C(0x4100D0B0): /* Cortex-A76 */
+		case UINT32_C(0x4100D0C0): /* Neoverse N1 */
+		case UINT32_C(0x4100D0D0): /* Cortex-A77 */
+		case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
+		case UINT32_C(0x4100D400): /* Neoverse V1 */
+		case UINT32_C(0x4100D490): /* Neoverse N2 */
+		case UINT32_C(0x4100D4A0): /* Neoverse E1 */
+		case UINT32_C(0x4100D4F0): /* Neoverse V2 */
+		case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
+		case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */
+		case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */
+		case UINT32_C(0x53000030): /* Exynos-M4 */
+		case UINT32_C(0x53000040): /* Exynos-M5 */
+			isa->dot = true;
+			break;
+		case UINT32_C(0x4100D050): /* Cortex A55: revision 1 or later only */
+			isa->dot = !!(midr_get_variant(midr) >= 1);
+			break;
+		case UINT32_C(0x4100D0A0): /* Cortex A75: revision 2 or later only */
+			isa->dot = !!(midr_get_variant(midr) >= 2);
+			break;
+		default:
+			if (features & CPUINFO_ARM_LINUX_FEATURE_ASIMDDP) {
+				isa->dot = true;
+			}
+			break;
+	}
+	if (features & CPUINFO_ARM_LINUX_FEATURE_JSCVT) {
+		isa->jscvt = true;
+	}
+	if (features & CPUINFO_ARM_LINUX_FEATURE_JSCVT) {
+		isa->jscvt = true;
+	}
+	if (features & CPUINFO_ARM_LINUX_FEATURE_FCMA) {
+		isa->fcma = true;
+	}
+	if (features & CPUINFO_ARM_LINUX_FEATURE_SVE) {
+		isa->sve = true;
+	}
+	if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SVE2) {
+		isa->sve2 = true;
+	}
+	if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME) {
+		isa->sme = true;
+	}
+	if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME2) {
+		isa->sme2 = true;
+	}
+	if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME2P1) {
+		isa->sme2p1 = true;
+	}
+	if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME_I16I32) {
+		isa->sme_i16i32 = true;
+	}
+	if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME_BI32I32) {
+		isa->sme_bi32i32 = true;
+	}
+	if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME_B16B16) {
+		isa->sme_b16b16 = true;
+	}
+	if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME_F16F16) {
+		isa->sme_f16f16 = true;
+	}
+	// SVEBF16 is set iff SVE and BF16 are both supported, but the SVEBF16
+	// feature flag was added in Linux kernel before the BF16 feature flag,
+	// so we check for either.
+	if (features2 & (CPUINFO_ARM_LINUX_FEATURE2_BF16 | CPUINFO_ARM_LINUX_FEATURE2_SVEBF16)) {
+		isa->bf16 = true;
+	}
+	if (features & CPUINFO_ARM_LINUX_FEATURE_ASIMDFHM) {
+		isa->fhm = true;
+	}
+
+#ifndef PR_SVE_GET_VL
+#define PR_SVE_GET_VL 51
+#endif
+
+#ifndef PR_SVE_VL_LEN_MASK
+#define PR_SVE_VL_LEN_MASK 0xffff
+#endif
+
+	int ret = prctl(PR_SVE_GET_VL);
+	if (ret < 0) {
+		cpuinfo_log_warning("No SVE support on this machine");
+		isa->svelen = 0; // Assume no SVE support if the call fails
+	} else {
+		// Mask out the SVE vector length bits
+		isa->svelen = ret & PR_SVE_VL_LEN_MASK;
+	}
+}
--- a/3rdparty/cpuinfo/src/arm/linux/api.h
+++ b/3rdparty/cpuinfo/src/arm/linux/api.h
@@ -0,0 +1,392 @@
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <arm/api.h>
+#include <arm/midr.h>
+#include <cpuinfo.h>
+#include <cpuinfo/common.h>
+#include <linux/api.h>
+
+/* No hard limit in the kernel, maximum length observed on non-rogue kernels is
+ * 64 */
+#define CPUINFO_HARDWARE_VALUE_MAX 64
+/* No hard limit in the kernel, maximum length on Raspberry Pi is 8. Add 1
+ * symbol to detect overly large revision strings */
+#define CPUINFO_REVISION_VALUE_MAX 9
+
+#ifdef __ANDROID__
+/* As per include/sys/system_properties.h in Android NDK */
+#define CPUINFO_BUILD_PROP_NAME_MAX 32
+#define CPUINFO_BUILD_PROP_VALUE_MAX 92
+
+struct cpuinfo_android_properties {
+	char proc_cpuinfo_hardware[CPUINFO_HARDWARE_VALUE_MAX];
+	char ro_product_board[CPUINFO_BUILD_PROP_VALUE_MAX];
+	char ro_board_platform[CPUINFO_BUILD_PROP_VALUE_MAX];
+	char ro_mediatek_platform[CPUINFO_BUILD_PROP_VALUE_MAX];
+	char ro_arch[CPUINFO_BUILD_PROP_VALUE_MAX];
+	char ro_chipname[CPUINFO_BUILD_PROP_VALUE_MAX];
+	char ro_hardware_chipname[CPUINFO_BUILD_PROP_VALUE_MAX];
+};
+#endif
+
+#define CPUINFO_ARM_LINUX_ARCH_T UINT32_C(0x00000001)
+#define CPUINFO_ARM_LINUX_ARCH_E UINT32_C(0x00000002)
+#define CPUINFO_ARM_LINUX_ARCH_J UINT32_C(0x00000004)
+
+#define CPUINFO_ARM_LINUX_ARCH_TE UINT32_C(0x00000003)
+#define CPUINFO_ARM_LINUX_ARCH_TEJ UINT32_C(0x00000007)
+
+struct cpuinfo_arm_linux_proc_cpuinfo_cache {
+	uint32_t i_size;
+	uint32_t i_assoc;
+	uint32_t i_line_length;
+	uint32_t i_sets;
+	uint32_t d_size;
+	uint32_t d_assoc;
+	uint32_t d_line_length;
+	uint32_t d_sets;
+};
+
+#if CPUINFO_ARCH_ARM
+/* arch/arm/include/uapi/asm/hwcap.h */
+
+#define CPUINFO_ARM_LINUX_FEATURE_SWP UINT32_C(0x00000001)
+#define CPUINFO_ARM_LINUX_FEATURE_HALF UINT32_C(0x00000002)
+#define CPUINFO_ARM_LINUX_FEATURE_THUMB UINT32_C(0x00000004)
+#define CPUINFO_ARM_LINUX_FEATURE_26BIT UINT32_C(0x00000008)
+#define CPUINFO_ARM_LINUX_FEATURE_FASTMULT UINT32_C(0x00000010)
+#define CPUINFO_ARM_LINUX_FEATURE_FPA UINT32_C(0x00000020)
+#define CPUINFO_ARM_LINUX_FEATURE_VFP UINT32_C(0x00000040)
+#define CPUINFO_ARM_LINUX_FEATURE_EDSP UINT32_C(0x00000080)
+#define CPUINFO_ARM_LINUX_FEATURE_JAVA UINT32_C(0x00000100)
+#define CPUINFO_ARM_LINUX_FEATURE_IWMMXT UINT32_C(0x00000200)
+#define CPUINFO_ARM_LINUX_FEATURE_CRUNCH UINT32_C(0x00000400)
+#define CPUINFO_ARM_LINUX_FEATURE_THUMBEE UINT32_C(0x00000800)
+#define CPUINFO_ARM_LINUX_FEATURE_NEON UINT32_C(0x00001000)
+#define CPUINFO_ARM_LINUX_FEATURE_VFPV3 UINT32_C(0x00002000)
+#define CPUINFO_ARM_LINUX_FEATURE_VFPV3D16                                  \
+	UINT32_C(0x00004000) /* Also set for VFPv4 with 16 double-precision \
+				registers */
+#define CPUINFO_ARM_LINUX_FEATURE_TLS UINT32_C(0x00008000)
+#define CPUINFO_ARM_LINUX_FEATURE_VFPV4 UINT32_C(0x00010000)
+#define CPUINFO_ARM_LINUX_FEATURE_IDIVA UINT32_C(0x00020000)
+#define CPUINFO_ARM_LINUX_FEATURE_IDIVT UINT32_C(0x00040000)
+#define CPUINFO_ARM_LINUX_FEATURE_IDIV UINT32_C(0x00060000)
+#define CPUINFO_ARM_LINUX_FEATURE_VFPD32 UINT32_C(0x00080000)
+#define CPUINFO_ARM_LINUX_FEATURE_LPAE UINT32_C(0x00100000)
+#define CPUINFO_ARM_LINUX_FEATURE_EVTSTRM UINT32_C(0x00200000)
+
+#define CPUINFO_ARM_LINUX_FEATURE2_AES UINT32_C(0x00000001)
+#define CPUINFO_ARM_LINUX_FEATURE2_PMULL UINT32_C(0x00000002)
+#define CPUINFO_ARM_LINUX_FEATURE2_SHA1 UINT32_C(0x00000004)
+#define CPUINFO_ARM_LINUX_FEATURE2_SHA2 UINT32_C(0x00000008)
+#define CPUINFO_ARM_LINUX_FEATURE2_CRC32 UINT32_C(0x00000010)
+#elif CPUINFO_ARCH_ARM64
+/* arch/arm64/include/uapi/asm/hwcap.h */
+#define CPUINFO_ARM_LINUX_FEATURE_FP UINT32_C(0x00000001)
+#define CPUINFO_ARM_LINUX_FEATURE_ASIMD UINT32_C(0x00000002)
+#define CPUINFO_ARM_LINUX_FEATURE_EVTSTRM UINT32_C(0x00000004)
+#define CPUINFO_ARM_LINUX_FEATURE_AES UINT32_C(0x00000008)
+#define CPUINFO_ARM_LINUX_FEATURE_PMULL UINT32_C(0x00000010)
+#define CPUINFO_ARM_LINUX_FEATURE_SHA1 UINT32_C(0x00000020)
+#define CPUINFO_ARM_LINUX_FEATURE_SHA2 UINT32_C(0x00000040)
+#define CPUINFO_ARM_LINUX_FEATURE_CRC32 UINT32_C(0x00000080)
+#define CPUINFO_ARM_LINUX_FEATURE_ATOMICS UINT32_C(0x00000100)
+#define CPUINFO_ARM_LINUX_FEATURE_FPHP UINT32_C(0x00000200)
+#define CPUINFO_ARM_LINUX_FEATURE_ASIMDHP UINT32_C(0x00000400)
+#define CPUINFO_ARM_LINUX_FEATURE_CPUID UINT32_C(0x00000800)
+#define CPUINFO_ARM_LINUX_FEATURE_ASIMDRDM UINT32_C(0x00001000)
+#define CPUINFO_ARM_LINUX_FEATURE_JSCVT UINT32_C(0x00002000)
+#define CPUINFO_ARM_LINUX_FEATURE_FCMA UINT32_C(0x00004000)
+#define CPUINFO_ARM_LINUX_FEATURE_LRCPC UINT32_C(0x00008000)
+#define CPUINFO_ARM_LINUX_FEATURE_DCPOP UINT32_C(0x00010000)
+#define CPUINFO_ARM_LINUX_FEATURE_SHA3 UINT32_C(0x00020000)
+#define CPUINFO_ARM_LINUX_FEATURE_SM3 UINT32_C(0x00040000)
+#define CPUINFO_ARM_LINUX_FEATURE_SM4 UINT32_C(0x00080000)
+#define CPUINFO_ARM_LINUX_FEATURE_ASIMDDP UINT32_C(0x00100000)
+#define CPUINFO_ARM_LINUX_FEATURE_SHA512 UINT32_C(0x00200000)
+#define CPUINFO_ARM_LINUX_FEATURE_SVE UINT32_C(0x00400000)
+#define CPUINFO_ARM_LINUX_FEATURE_ASIMDFHM UINT32_C(0x00800000)
+#define CPUINFO_ARM_LINUX_FEATURE_DIT UINT32_C(0x01000000)
+#define CPUINFO_ARM_LINUX_FEATURE_USCAT UINT32_C(0x02000000)
+#define CPUINFO_ARM_LINUX_FEATURE_ILRCPC UINT32_C(0x04000000)
+#define CPUINFO_ARM_LINUX_FEATURE_FLAGM UINT32_C(0x08000000)
+#define CPUINFO_ARM_LINUX_FEATURE_SSBS UINT32_C(0x10000000)
+#define CPUINFO_ARM_LINUX_FEATURE_SB UINT32_C(0x20000000)
+#define CPUINFO_ARM_LINUX_FEATURE_PACA UINT32_C(0x40000000)
+#define CPUINFO_ARM_LINUX_FEATURE_PACG UINT32_C(0x80000000)
+
+#define CPUINFO_ARM_LINUX_FEATURE2_DCPODP UINT32_C(0x00000001)
+#define CPUINFO_ARM_LINUX_FEATURE2_SVE2 UINT32_C(0x00000002)
+#define CPUINFO_ARM_LINUX_FEATURE2_SVEAES UINT32_C(0x00000004)
+#define CPUINFO_ARM_LINUX_FEATURE2_SVEPMULL UINT32_C(0x00000008)
+#define CPUINFO_ARM_LINUX_FEATURE2_SVEBITPERM UINT32_C(0x00000010)
+#define CPUINFO_ARM_LINUX_FEATURE2_SVESHA3 UINT32_C(0x00000020)
+#define CPUINFO_ARM_LINUX_FEATURE2_SVESM4 UINT32_C(0x00000040)
+#define CPUINFO_ARM_LINUX_FEATURE2_FLAGM2 UINT32_C(0x00000080)
+#define CPUINFO_ARM_LINUX_FEATURE2_FRINT UINT32_C(0x00000100)
+#define CPUINFO_ARM_LINUX_FEATURE2_SVEI8MM UINT32_C(0x00000200)
+#define CPUINFO_ARM_LINUX_FEATURE2_SVEF32MM UINT32_C(0x00000400)
+#define CPUINFO_ARM_LINUX_FEATURE2_SVEF64MM UINT32_C(0x00000800)
+#define CPUINFO_ARM_LINUX_FEATURE2_SVEBF16 UINT32_C(0x00001000)
+#define CPUINFO_ARM_LINUX_FEATURE2_I8MM UINT32_C(0x00002000)
+#define CPUINFO_ARM_LINUX_FEATURE2_BF16 UINT32_C(0x00004000)
+#define CPUINFO_ARM_LINUX_FEATURE2_DGH UINT32_C(0x00008000)
+#define CPUINFO_ARM_LINUX_FEATURE2_RNG UINT32_C(0x00010000)
+#define CPUINFO_ARM_LINUX_FEATURE2_BTI UINT32_C(0x00020000)
+#define CPUINFO_ARM_LINUX_FEATURE2_SME UINT32_C(0x00800000)
+#define CPUINFO_ARM_LINUX_FEATURE2_SME2 UINT64_C(0x0000002000000000)
+#define CPUINFO_ARM_LINUX_FEATURE2_SME2P1 UINT64_C(0x0000004000000000)
+#define CPUINFO_ARM_LINUX_FEATURE2_SME_I16I32 UINT64_C(0x0000008000000000)
+#define CPUINFO_ARM_LINUX_FEATURE2_SME_BI32I32 UINT64_C(0x0000010000000000)
+#define CPUINFO_ARM_LINUX_FEATURE2_SME_B16B16 UINT64_C(0x0000020000000000)
+#define CPUINFO_ARM_LINUX_FEATURE2_SME_F16F16 UINT64_C(0x0000040000000000)
+#endif
+
+#define CPUINFO_ARM_LINUX_VALID_ARCHITECTURE UINT32_C(0x00010000)
+#define CPUINFO_ARM_LINUX_VALID_IMPLEMENTER UINT32_C(0x00020000)
+#define CPUINFO_ARM_LINUX_VALID_VARIANT UINT32_C(0x00040000)
+#define CPUINFO_ARM_LINUX_VALID_PART UINT32_C(0x00080000)
+#define CPUINFO_ARM_LINUX_VALID_REVISION UINT32_C(0x00100000)
+#define CPUINFO_ARM_LINUX_VALID_PROCESSOR UINT32_C(0x00200000)
+#define CPUINFO_ARM_LINUX_VALID_FEATURES UINT32_C(0x00400000)
+#if CPUINFO_ARCH_ARM
+#define CPUINFO_ARM_LINUX_VALID_ICACHE_SIZE UINT32_C(0x01000000)
+#define CPUINFO_ARM_LINUX_VALID_ICACHE_SETS UINT32_C(0x02000000)
+#define CPUINFO_ARM_LINUX_VALID_ICACHE_WAYS UINT32_C(0x04000000)
+#define CPUINFO_ARM_LINUX_VALID_ICACHE_LINE UINT32_C(0x08000000)
+#define CPUINFO_ARM_LINUX_VALID_DCACHE_SIZE UINT32_C(0x10000000)
+#define CPUINFO_ARM_LINUX_VALID_DCACHE_SETS UINT32_C(0x20000000)
+#define CPUINFO_ARM_LINUX_VALID_DCACHE_WAYS UINT32_C(0x40000000)
+#define CPUINFO_ARM_LINUX_VALID_DCACHE_LINE UINT32_C(0x80000000)
+#endif
+
+#define CPUINFO_ARM_LINUX_VALID_INFO UINT32_C(0x007F0000)
+#define CPUINFO_ARM_LINUX_VALID_MIDR UINT32_C(0x003F0000)
+#if CPUINFO_ARCH_ARM
+#define CPUINFO_ARM_LINUX_VALID_ICACHE UINT32_C(0x0F000000)
+#define CPUINFO_ARM_LINUX_VALID_DCACHE UINT32_C(0xF0000000)
+#define CPUINFO_ARM_LINUX_VALID_CACHE_LINE UINT32_C(0x88000000)
+#endif
+
+struct cpuinfo_arm_linux_processor {
+	uint32_t architecture_version;
+#if CPUINFO_ARCH_ARM
+	uint32_t architecture_flags;
+	struct cpuinfo_arm_linux_proc_cpuinfo_cache proc_cpuinfo_cache;
+#endif
+	uint32_t features;
+	uint64_t features2;
+	/**
+	 * Main ID Register value.
+	 */
+	uint32_t midr;
+	enum cpuinfo_vendor vendor;
+	enum cpuinfo_uarch uarch;
+	uint32_t uarch_index;
+	/**
+	 * ID of the physical package which includes this logical processor.
+	 * The value is parsed from
+	 * /sys/devices/system/cpu/cpu<N>/topology/physical_package_id
+	 */
+	uint32_t package_id;
+	/**
+	 * Minimum processor ID on the package which includes this logical
+	 * processor. This value can serve as an ID for the cluster of logical
+	 * processors: it is the same for all logical processors on the same
+	 * package.
+	 */
+	uint32_t package_leader_id;
+	/**
+	 * Number of logical processors in the package.
+	 */
+	uint32_t package_processor_count;
+	/**
+	 * Maximum frequency, in kHZ.
+	 * The value is parsed from
+	 * /sys/devices/system/cpu/cpu<N>/cpufreq/cpuinfo_max_freq If failed to
+	 * read or parse the file, the value is 0.
+	 */
+	uint32_t max_frequency;
+	/**
+	 * Minimum frequency, in kHZ.
+	 * The value is parsed from
+	 * /sys/devices/system/cpu/cpu<N>/cpufreq/cpuinfo_min_freq If failed to
+	 * read or parse the file, the value is 0.
+	 */
+	uint32_t min_frequency;
+	/** Linux processor ID */
+	uint32_t system_processor_id;
+	uint32_t flags;
+};
+
+struct cpuinfo_arm_linux_cluster {
+	uint32_t processor_id_min;
+	uint32_t processor_id_max;
+};
+
+/* Returns true if the two processors do belong to the same cluster */
+static inline bool cpuinfo_arm_linux_processor_equals(
+	struct cpuinfo_arm_linux_processor processor_i[restrict static 1],
+	struct cpuinfo_arm_linux_processor processor_j[restrict static 1]) {
+	const uint32_t joint_flags = processor_i->flags & processor_j->flags;
+
+	bool same_max_frequency = false;
+	if (joint_flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
+		if (processor_i->max_frequency != processor_j->max_frequency) {
+			return false;
+		} else {
+			same_max_frequency = true;
+		}
+	}
+
+	bool same_min_frequency = false;
+	if (joint_flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) {
+		if (processor_i->min_frequency != processor_j->min_frequency) {
+			return false;
+		} else {
+			same_min_frequency = true;
+		}
+	}
+
+	if ((joint_flags & CPUINFO_ARM_LINUX_VALID_MIDR) == CPUINFO_ARM_LINUX_VALID_MIDR) {
+		if (processor_i->midr == processor_j->midr) {
+			if (midr_is_cortex_a53(processor_i->midr)) {
+				return same_min_frequency & same_max_frequency;
+			} else {
+				return true;
+			}
+		}
+	}
+
+	return same_max_frequency && same_min_frequency;
+}
+
+/* Returns true if the two processors certainly don't belong to the same cluster
+ */
+static inline bool cpuinfo_arm_linux_processor_not_equals(
+	struct cpuinfo_arm_linux_processor processor_i[restrict static 1],
+	struct cpuinfo_arm_linux_processor processor_j[restrict static 1]) {
+	const uint32_t joint_flags = processor_i->flags & processor_j->flags;
+
+	if (joint_flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
+		if (processor_i->max_frequency != processor_j->max_frequency) {
+			return true;
+		}
+	}
+
+	if (joint_flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) {
+		if (processor_i->min_frequency != processor_j->min_frequency) {
+			return true;
+		}
+	}
+
+	if ((joint_flags & CPUINFO_ARM_LINUX_VALID_MIDR) == CPUINFO_ARM_LINUX_VALID_MIDR) {
+		if (processor_i->midr != processor_j->midr) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+CPUINFO_INTERNAL bool cpuinfo_arm_linux_parse_proc_cpuinfo(
+	char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX],
+	char revision[restrict static CPUINFO_REVISION_VALUE_MAX],
+	uint32_t max_processors_count,
+	struct cpuinfo_arm_linux_processor processors[restrict static max_processors_count]);
+
+#if CPUINFO_ARCH_ARM
+CPUINFO_INTERNAL bool cpuinfo_arm_linux_hwcap_from_getauxval(
+	uint32_t hwcap[restrict static 1],
+	uint64_t hwcap2[restrict static 1]);
+CPUINFO_INTERNAL bool cpuinfo_arm_linux_hwcap_from_procfs(
+	uint32_t hwcap[restrict static 1],
+	uint64_t hwcap2[restrict static 1]);
+
+CPUINFO_INTERNAL void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
+	uint32_t features,
+	uint64_t features2,
+	uint32_t midr,
+	uint32_t architecture_version,
+	uint32_t architecture_flags,
+	const struct cpuinfo_arm_chipset chipset[restrict static 1],
+	struct cpuinfo_arm_isa isa[restrict static 1]);
+#elif CPUINFO_ARCH_ARM64
+CPUINFO_INTERNAL void cpuinfo_arm_linux_hwcap_from_getauxval(
+	uint32_t hwcap[restrict static 1],
+	uint64_t hwcap2[restrict static 1]);
+
+CPUINFO_INTERNAL void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
+	uint32_t features,
+	uint64_t features2,
+	uint32_t midr,
+	const struct cpuinfo_arm_chipset chipset[restrict static 1],
+	struct cpuinfo_arm_isa isa[restrict static 1]);
+#endif
+
+#if defined(__ANDROID__)
+CPUINFO_INTERNAL struct cpuinfo_arm_chipset cpuinfo_arm_android_decode_chipset(
+	const struct cpuinfo_android_properties properties[restrict static 1],
+	uint32_t cores,
+	uint32_t max_cpu_freq_max);
+#else
+CPUINFO_INTERNAL struct cpuinfo_arm_chipset cpuinfo_arm_linux_decode_chipset(
+	const char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX],
+	const char revision[restrict static CPUINFO_REVISION_VALUE_MAX],
+	uint32_t cores,
+	uint32_t max_cpu_freq_max);
+#endif
+
+CPUINFO_INTERNAL struct cpuinfo_arm_chipset cpuinfo_arm_linux_decode_chipset_from_proc_cpuinfo_hardware(
+	const char proc_cpuinfo_hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX],
+	uint32_t cores,
+	uint32_t max_cpu_freq_max,
+	bool is_tegra);
+
+#ifdef __ANDROID__
+CPUINFO_INTERNAL struct cpuinfo_arm_chipset cpuinfo_arm_android_decode_chipset_from_ro_product_board(
+	const char ro_product_board[restrict static CPUINFO_BUILD_PROP_VALUE_MAX],
+	uint32_t cores,
+	uint32_t max_cpu_freq_max);
+CPUINFO_INTERNAL struct cpuinfo_arm_chipset cpuinfo_arm_android_decode_chipset_from_ro_board_platform(
+	const char ro_board_platform[restrict static CPUINFO_BUILD_PROP_VALUE_MAX],
+	uint32_t cores,
+	uint32_t max_cpu_freq_max);
+CPUINFO_INTERNAL struct cpuinfo_arm_chipset cpuinfo_arm_android_decode_chipset_from_ro_mediatek_platform(
+	const char ro_mediatek_platform[restrict static CPUINFO_BUILD_PROP_VALUE_MAX]);
+CPUINFO_INTERNAL struct cpuinfo_arm_chipset cpuinfo_arm_android_decode_chipset_from_ro_arch(
+	const char ro_arch[restrict static CPUINFO_BUILD_PROP_VALUE_MAX]);
+CPUINFO_INTERNAL struct cpuinfo_arm_chipset cpuinfo_arm_android_decode_chipset_from_ro_chipname(
+	const char ro_chipname[restrict static CPUINFO_BUILD_PROP_VALUE_MAX]);
+CPUINFO_INTERNAL struct cpuinfo_arm_chipset cpuinfo_arm_android_decode_chipset_from_ro_hardware_chipname(
+	const char ro_hardware_chipname[restrict static CPUINFO_BUILD_PROP_VALUE_MAX]);
+#else
+CPUINFO_INTERNAL struct cpuinfo_arm_chipset cpuinfo_arm_linux_decode_chipset_from_proc_cpuinfo_revision(
+	const char proc_cpuinfo_revision[restrict static CPUINFO_REVISION_VALUE_MAX]);
+#endif
+
+CPUINFO_INTERNAL bool cpuinfo_arm_linux_detect_core_clusters_by_heuristic(
+	uint32_t usable_processors,
+	uint32_t max_processors,
+	struct cpuinfo_arm_linux_processor processors[restrict static max_processors]);
+
+CPUINFO_INTERNAL void cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan(
+	uint32_t max_processors,
+	struct cpuinfo_arm_linux_processor processors[restrict static max_processors]);
+
+CPUINFO_INTERNAL void cpuinfo_arm_linux_count_cluster_processors(
+	uint32_t max_processors,
+	struct cpuinfo_arm_linux_processor processors[restrict static max_processors]);
+
+CPUINFO_INTERNAL uint32_t cpuinfo_arm_linux_detect_cluster_midr(
+	const struct cpuinfo_arm_chipset chipset[restrict static 1],
+	uint32_t max_processors,
+	uint32_t usable_processors,
+	struct cpuinfo_arm_linux_processor processors[restrict static max_processors]);
+
+extern CPUINFO_INTERNAL const uint32_t* cpuinfo_linux_cpu_to_uarch_index_map;
+extern CPUINFO_INTERNAL uint32_t cpuinfo_linux_cpu_to_uarch_index_map_entries;
--- a/3rdparty/cpuinfo/src/arm/linux/chipset.c
+++ b/3rdparty/cpuinfo/src/arm/linux/chipset.c
--- a/3rdparty/cpuinfo/src/arm/linux/clusters.c
+++ b/3rdparty/cpuinfo/src/arm/linux/clusters.c
@@ -0,0 +1,632 @@
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <arm/linux/api.h>
+#include <cpuinfo.h>
+#if defined(__ANDROID__)
+#include <arm/android/api.h>
+#endif
+#include <arm/api.h>
+#include <arm/midr.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+#include <linux/api.h>
+
+static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) {
+	return (bitfield & mask) == mask;
+}
+
+/*
+ * Assigns logical processors to clusters of cores using heuristic based on the
+ * typical configuration of clusters for 5, 6, 8, and 10 cores:
+ * - 5 cores (ARM32 Android only): 2 clusters of 4+1 cores
+ * - 6 cores: 2 clusters of 4+2 cores
+ * - 8 cores: 2 clusters of 4+4 cores
+ * - 10 cores: 3 clusters of 4+4+2 cores
+ *
+ * The function must be called after parsing OS-provided information on core
+ * clusters. Its purpose is to detect clusters of cores when OS-provided
+ * information is lacking or incomplete, i.e.
+ * - Linux kernel is not configured to report information in sysfs topology
+ * leaf.
+ * - Linux kernel reports topology information only for online cores, and only
+ * cores on one cluster are online, e.g.:
+ *   - Exynos 8890 has 8 cores in 4+4 clusters, but only the first cluster of 4
+ * cores is reported, and cluster configuration of logical processors 4-7 is not
+ * reported (all remaining processors 4-7 form cluster 1)
+ *   - MT6797 has 10 cores in 4+4+2, but only the first cluster of 4 cores is
+ * reported, and cluster configuration of logical processors 4-9 is not reported
+ * (processors 4-7 form cluster 1, and processors 8-9 form cluster 2).
+ *
+ * Heuristic assignment of processors to the above pre-defined clusters fails if
+ * such assignment would contradict information provided by the operating
+ * system:
+ * - Any of the OS-reported processor clusters is different than the
+ * corresponding heuristic cluster.
+ * - Processors in a heuristic cluster have no OS-provided cluster siblings
+ * information, but have known and different minimum/maximum frequency.
+ * - Processors in a heuristic cluster have no OS-provided cluster siblings
+ * information, but have known and different MIDR components.
+ *
+ * If the heuristic assignment of processors to clusters of cores fails, all
+ * processors' clusters are unchanged.
+ *
+ * @param usable_processors - number of processors in the @p processors array
+ * with CPUINFO_LINUX_FLAG_VALID flags.
+ * @param max_processors - number of elements in the @p processors array.
+ * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE
+ * and PRESENT flags, minimum/maximum frequency, MIDR information, and core
+ * cluster (package siblings list) information.
+ *
+ * @retval true if the heuristic successfully assigned all processors into
+ * clusters of cores.
+ * @retval false if known details about processors contradict the heuristic
+ * configuration of core clusters.
+ */
+bool cpuinfo_arm_linux_detect_core_clusters_by_heuristic(
+	uint32_t usable_processors,
+	uint32_t max_processors,
+	struct cpuinfo_arm_linux_processor processors[restrict static max_processors]) {
+	uint32_t cluster_processors[3];
+	switch (usable_processors) {
+		case 10:
+			cluster_processors[0] = 4;
+			cluster_processors[1] = 4;
+			cluster_processors[2] = 2;
+			break;
+		case 8:
+			cluster_processors[0] = 4;
+			cluster_processors[1] = 4;
+			break;
+		case 6:
+			cluster_processors[0] = 4;
+			cluster_processors[1] = 2;
+			break;
+#if defined(__ANDROID__) && CPUINFO_ARCH_ARM
+		case 5:
+			/*
+			 * The only processor with 5 cores is Leadcore L1860C
+			 * (ARMv7, mobile), but this configuration is not too
+			 * unreasonable for a virtualized ARM server.
+			 */
+			cluster_processors[0] = 4;
+			cluster_processors[1] = 1;
+			break;
+#endif
+		default:
+			return false;
+	}
+
+	/*
+	 * Assignment of processors to core clusters is done in two passes:
+	 * 1. Verify that the clusters proposed by heuristic are compatible with
+	 * known details about processors.
+	 * 2. If verification passed, update core clusters for the processors.
+	 */
+
+	uint32_t cluster = 0;
+	uint32_t expected_cluster_processors = 0;
+	uint32_t cluster_start, cluster_flags, cluster_midr, cluster_max_frequency, cluster_min_frequency;
+	bool expected_cluster_exists;
+	for (uint32_t i = 0; i < max_processors; i++) {
+		if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			if (expected_cluster_processors == 0) {
+				/* Expect this processor to start a new cluster
+				 */
+
+				expected_cluster_exists = !!(processors[i].flags & CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER);
+				if (expected_cluster_exists) {
+					if (processors[i].package_leader_id != i) {
+						cpuinfo_log_debug(
+							"heuristic detection of core clusters failed: "
+							"processor %" PRIu32
+							" is expected to start a new cluster #%" PRIu32 " with %" PRIu32
+							" cores, "
+							"but system siblings lists reported it as a sibling of processor %" PRIu32,
+							i,
+							cluster,
+							cluster_processors[cluster],
+							processors[i].package_leader_id);
+						return false;
+					}
+				} else {
+					cluster_flags = 0;
+				}
+
+				cluster_start = i;
+				expected_cluster_processors = cluster_processors[cluster++];
+			} else {
+				/* Expect this processor to belong to the same
+				 * cluster as processor */
+
+				if (expected_cluster_exists) {
+					/*
+					 * The cluster suggested by the
+					 * heuristic was already parsed from
+					 * system siblings lists. For all
+					 * processors we expect in the cluster,
+					 * check that:
+					 * - They have pre-assigned cluster from
+					 * siblings lists
+					 * (CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER
+					 * flag).
+					 * - They were assigned to the same
+					 * cluster based on siblings lists
+					 *   (package_leader_id points to the
+					 * first processor in the cluster).
+					 */
+
+					if ((processors[i].flags & CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER) == 0) {
+						cpuinfo_log_debug(
+							"heuristic detection of core clusters failed: "
+							"processor %" PRIu32
+							" is expected to belong to the cluster of processor %" PRIu32
+							", "
+							"but system siblings lists did not report it as a sibling of processor %" PRIu32,
+							i,
+							cluster_start,
+							cluster_start);
+						return false;
+					}
+					if (processors[i].package_leader_id != cluster_start) {
+						cpuinfo_log_debug(
+							"heuristic detection of core clusters failed: "
+							"processor %" PRIu32
+							" is expected to belong to the cluster of processor %" PRIu32
+							", "
+							"but system siblings lists reported it to belong to the cluster of processor %" PRIu32,
+							i,
+							cluster_start,
+							cluster_start);
+						return false;
+					}
+				} else {
+					/*
+					 * The cluster suggest by the heuristic
+					 * was not parsed from system siblings
+					 * lists. For all processors we expect
+					 * in the cluster, check that:
+					 * - They have no pre-assigned cluster
+					 * from siblings lists.
+					 * - If their min/max CPU frequency is
+					 * known, it is the same.
+					 * - If any part of their MIDR
+					 * (Implementer, Variant, Part,
+					 * Revision) is known, it is the same.
+					 */
+
+					if (processors[i].flags & CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER) {
+						cpuinfo_log_debug(
+							"heuristic detection of core clusters failed: "
+							"processor %" PRIu32
+							" is expected to be unassigned to any cluster, "
+							"but system siblings lists reported it to belong to the cluster of processor %" PRIu32,
+							i,
+							processors[i].package_leader_id);
+						return false;
+					}
+
+					if (processors[i].flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) {
+						if (cluster_flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) {
+							if (cluster_min_frequency != processors[i].min_frequency) {
+								cpuinfo_log_debug(
+									"heuristic detection of core clusters failed: "
+									"minimum frequency of processor %" PRIu32
+									" (%" PRIu32
+									" KHz) is different than of its expected cluster (%" PRIu32
+									" KHz)",
+									i,
+									processors[i].min_frequency,
+									cluster_min_frequency);
+								return false;
+							}
+						} else {
+							cluster_min_frequency = processors[i].min_frequency;
+							cluster_flags |= CPUINFO_LINUX_FLAG_MIN_FREQUENCY;
+						}
+					}
+
+					if (processors[i].flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
+						if (cluster_flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
+							if (cluster_max_frequency != processors[i].max_frequency) {
+								cpuinfo_log_debug(
+									"heuristic detection of core clusters failed: "
+									"maximum frequency of processor %" PRIu32
+									" (%" PRIu32
+									" KHz) is different than of its expected cluster (%" PRIu32
+									" KHz)",
+									i,
+									processors[i].max_frequency,
+									cluster_max_frequency);
+								return false;
+							}
+						} else {
+							cluster_max_frequency = processors[i].max_frequency;
+							cluster_flags |= CPUINFO_LINUX_FLAG_MAX_FREQUENCY;
+						}
+					}
+
+					if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) {
+						if (cluster_flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) {
+							if ((cluster_midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) !=
+							    (processors[i].midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK)) {
+								cpuinfo_log_debug(
+									"heuristic detection of core clusters failed: "
+									"CPU Implementer of processor %" PRIu32
+									" (0x%02" PRIx32
+									") is different than of its expected cluster (0x%02" PRIx32
+									")",
+									i,
+									midr_get_implementer(processors[i].midr),
+									midr_get_implementer(cluster_midr));
+								return false;
+							}
+						} else {
+							cluster_midr =
+								midr_copy_implementer(cluster_midr, processors[i].midr);
+							cluster_flags |= CPUINFO_ARM_LINUX_VALID_IMPLEMENTER;
+						}
+					}
+
+					if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_VARIANT) {
+						if (cluster_flags & CPUINFO_ARM_LINUX_VALID_VARIANT) {
+							if ((cluster_midr & CPUINFO_ARM_MIDR_VARIANT_MASK) !=
+							    (processors[i].midr & CPUINFO_ARM_MIDR_VARIANT_MASK)) {
+								cpuinfo_log_debug(
+									"heuristic detection of core clusters failed: "
+									"CPU Variant of processor %" PRIu32
+									" (0x%" PRIx32
+									") is different than of its expected cluster (0x%" PRIx32
+									")",
+									i,
+									midr_get_variant(processors[i].midr),
+									midr_get_variant(cluster_midr));
+								return false;
+							}
+						} else {
+							cluster_midr =
+								midr_copy_variant(cluster_midr, processors[i].midr);
+							cluster_flags |= CPUINFO_ARM_LINUX_VALID_VARIANT;
+						}
+					}
+
+					if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_PART) {
+						if (cluster_flags & CPUINFO_ARM_LINUX_VALID_PART) {
+							if ((cluster_midr & CPUINFO_ARM_MIDR_PART_MASK) !=
+							    (processors[i].midr & CPUINFO_ARM_MIDR_PART_MASK)) {
+								cpuinfo_log_debug(
+									"heuristic detection of core clusters failed: "
+									"CPU Part of processor %" PRIu32
+									" (0x%03" PRIx32
+									") is different than of its expected cluster (0x%03" PRIx32
+									")",
+									i,
+									midr_get_part(processors[i].midr),
+									midr_get_part(cluster_midr));
+								return false;
+							}
+						} else {
+							cluster_midr = midr_copy_part(cluster_midr, processors[i].midr);
+							cluster_flags |= CPUINFO_ARM_LINUX_VALID_PART;
+						}
+					}
+
+					if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_REVISION) {
+						if (cluster_flags & CPUINFO_ARM_LINUX_VALID_REVISION) {
+							if ((cluster_midr & CPUINFO_ARM_MIDR_REVISION_MASK) !=
+							    (processors[i].midr & CPUINFO_ARM_MIDR_REVISION_MASK)) {
+								cpuinfo_log_debug(
+									"heuristic detection of core clusters failed: "
+									"CPU Revision of processor %" PRIu32
+									" (0x%" PRIx32
+									") is different than of its expected cluster (0x%" PRIx32
+									")",
+									i,
+									midr_get_revision(cluster_midr),
+									midr_get_revision(processors[i].midr));
+								return false;
+							}
+						} else {
+							cluster_midr =
+								midr_copy_revision(cluster_midr, processors[i].midr);
+							cluster_flags |= CPUINFO_ARM_LINUX_VALID_REVISION;
+						}
+					}
+				}
+			}
+			expected_cluster_processors--;
+		}
+	}
+
+	/* Verification passed, assign all processors to new clusters */
+	cluster = 0;
+	expected_cluster_processors = 0;
+	for (uint32_t i = 0; i < max_processors; i++) {
+		if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			if (expected_cluster_processors == 0) {
+				/* Expect this processor to start a new cluster
+				 */
+
+				cluster_start = i;
+				expected_cluster_processors = cluster_processors[cluster++];
+			} else {
+				/* Expect this processor to belong to the same
+				 * cluster as processor */
+
+				if (!(processors[i].flags & CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER)) {
+					cpuinfo_log_debug(
+						"assigned processor %" PRIu32 " to cluster of processor %" PRIu32
+						" based on heuristic",
+						i,
+						cluster_start);
+				}
+
+				processors[i].package_leader_id = cluster_start;
+				processors[i].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER;
+			}
+			expected_cluster_processors--;
+		}
+	}
+	return true;
+}
+
+/*
+ * Assigns logical processors to clusters of cores in sequential manner:
+ * - Clusters detected from OS-provided information are unchanged:
+ *   - Processors assigned to these clusters stay assigned to the same clusters
+ *   - No new processors are added to these clusters
+ * - Processors without pre-assigned cluster are clustered in one sequential
+ * scan:
+ *   - If known details (min/max frequency, MIDR components) of a processor are
+ * compatible with a preceding processor, without pre-assigned cluster, the
+ * processor is assigned to the cluster of the preceding processor.
+ *   - If known details (min/max frequency, MIDR components) of a processor are
+ * not compatible with a preceding processor, the processor is assigned to a
+ * newly created cluster.
+ *
+ * The function must be called after parsing OS-provided information on core
+ * clusters, and usually is called only if heuristic assignment of processors to
+ * clusters (cpuinfo_arm_linux_cluster_processors_by_heuristic) failed.
+ *
+ * Its purpose is to detect clusters of cores when OS-provided information is
+ * lacking or incomplete, i.e.
+ * - Linux kernel is not configured to report information in sysfs topology
+ * leaf.
+ * - Linux kernel reports topology information only for online cores, and all
+ * cores on some of the clusters are offline.
+ *
+ * Sequential assignment of processors to clusters always succeeds, and upon
+ * exit, all usable processors in the
+ * @p processors array have cluster information.
+ *
+ * @param max_processors - number of elements in the @p processors array.
+ * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE
+ * and PRESENT flags, minimum/maximum frequency, MIDR information, and core
+ * cluster (package siblings list) information.
+ *
+ * @retval true if the heuristic successfully assigned all processors into
+ * clusters of cores.
+ * @retval false if known details about processors contradict the heuristic
+ * configuration of core clusters.
+ */
+void cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan(
+	uint32_t max_processors,
+	struct cpuinfo_arm_linux_processor processors[restrict static max_processors]) {
+	uint32_t cluster_flags = 0;
+	uint32_t cluster_processors = 0;
+	uint32_t cluster_start, cluster_midr, cluster_max_frequency, cluster_min_frequency;
+	for (uint32_t i = 0; i < max_processors; i++) {
+		if ((processors[i].flags & (CPUINFO_LINUX_FLAG_VALID | CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER)) ==
+		    CPUINFO_LINUX_FLAG_VALID) {
+			if (cluster_processors == 0) {
+				goto new_cluster;
+			}
+
+			if (processors[i].flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) {
+				if (cluster_flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) {
+					if (cluster_min_frequency != processors[i].min_frequency) {
+						cpuinfo_log_info(
+							"minimum frequency of processor %" PRIu32 " (%" PRIu32
+							" KHz) is different than of preceding cluster (%" PRIu32
+							" KHz); "
+							"processor %" PRIu32 " starts to a new cluster",
+							i,
+							processors[i].min_frequency,
+							cluster_min_frequency,
+							i);
+						goto new_cluster;
+					}
+				} else {
+					cluster_min_frequency = processors[i].min_frequency;
+					cluster_flags |= CPUINFO_LINUX_FLAG_MIN_FREQUENCY;
+				}
+			}
+
+			if (processors[i].flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
+				if (cluster_flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
+					if (cluster_max_frequency != processors[i].max_frequency) {
+						cpuinfo_log_debug(
+							"maximum frequency of processor %" PRIu32 " (%" PRIu32
+							" KHz) is different than of preceding cluster (%" PRIu32
+							" KHz); "
+							"processor %" PRIu32 " starts a new cluster",
+							i,
+							processors[i].max_frequency,
+							cluster_max_frequency,
+							i);
+						goto new_cluster;
+					}
+				} else {
+					cluster_max_frequency = processors[i].max_frequency;
+					cluster_flags |= CPUINFO_LINUX_FLAG_MAX_FREQUENCY;
+				}
+			}
+
+			if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) {
+				if (cluster_flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) {
+					if ((cluster_midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) !=
+					    (processors[i].midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK)) {
+						cpuinfo_log_debug(
+							"CPU Implementer of processor %" PRIu32 " (0x%02" PRIx32
+							") is different than of preceding cluster (0x%02" PRIx32
+							"); "
+							"processor %" PRIu32 " starts to a new cluster",
+							i,
+							midr_get_implementer(processors[i].midr),
+							midr_get_implementer(cluster_midr),
+							i);
+						goto new_cluster;
+					}
+				} else {
+					cluster_midr = midr_copy_implementer(cluster_midr, processors[i].midr);
+					cluster_flags |= CPUINFO_ARM_LINUX_VALID_IMPLEMENTER;
+				}
+			}
+
+			if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_VARIANT) {
+				if (cluster_flags & CPUINFO_ARM_LINUX_VALID_VARIANT) {
+					if ((cluster_midr & CPUINFO_ARM_MIDR_VARIANT_MASK) !=
+					    (processors[i].midr & CPUINFO_ARM_MIDR_VARIANT_MASK)) {
+						cpuinfo_log_debug(
+							"CPU Variant of processor %" PRIu32 " (0x%" PRIx32
+							") is different than of its expected cluster (0x%" PRIx32
+							")"
+							"processor %" PRIu32 " starts to a new cluster",
+							i,
+							midr_get_variant(processors[i].midr),
+							midr_get_variant(cluster_midr),
+							i);
+						goto new_cluster;
+					}
+				} else {
+					cluster_midr = midr_copy_variant(cluster_midr, processors[i].midr);
+					cluster_flags |= CPUINFO_ARM_LINUX_VALID_VARIANT;
+				}
+			}
+
+			if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_PART) {
+				if (cluster_flags & CPUINFO_ARM_LINUX_VALID_PART) {
+					if ((cluster_midr & CPUINFO_ARM_MIDR_PART_MASK) !=
+					    (processors[i].midr & CPUINFO_ARM_MIDR_PART_MASK)) {
+						cpuinfo_log_debug(
+							"CPU Part of processor %" PRIu32 " (0x%03" PRIx32
+							") is different than of its expected cluster (0x%03" PRIx32
+							")"
+							"processor %" PRIu32 " starts to a new cluster",
+							i,
+							midr_get_part(processors[i].midr),
+							midr_get_part(cluster_midr),
+							i);
+						goto new_cluster;
+					}
+				} else {
+					cluster_midr = midr_copy_part(cluster_midr, processors[i].midr);
+					cluster_flags |= CPUINFO_ARM_LINUX_VALID_PART;
+				}
+			}
+
+			if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_REVISION) {
+				if (cluster_flags & CPUINFO_ARM_LINUX_VALID_REVISION) {
+					if ((cluster_midr & CPUINFO_ARM_MIDR_REVISION_MASK) !=
+					    (processors[i].midr & CPUINFO_ARM_MIDR_REVISION_MASK)) {
+						cpuinfo_log_debug(
+							"CPU Revision of processor %" PRIu32 " (0x%" PRIx32
+							") is different than of its expected cluster (0x%" PRIx32
+							")"
+							"processor %" PRIu32 " starts to a new cluster",
+							i,
+							midr_get_revision(cluster_midr),
+							midr_get_revision(processors[i].midr),
+							i);
+						goto new_cluster;
+					}
+				} else {
+					cluster_midr = midr_copy_revision(cluster_midr, processors[i].midr);
+					cluster_flags |= CPUINFO_ARM_LINUX_VALID_REVISION;
+				}
+			}
+
+			/* All checks passed, attach processor to the preceding
+			 * cluster */
+			cluster_processors++;
+			processors[i].package_leader_id = cluster_start;
+			processors[i].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER;
+			cpuinfo_log_debug(
+				"assigned processor %" PRIu32 " to preceding cluster of processor %" PRIu32,
+				i,
+				cluster_start);
+			continue;
+
+		new_cluster:
+			/* Create a new cluster starting with processor i */
+			cluster_start = i;
+			processors[i].package_leader_id = i;
+			processors[i].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER;
+			cluster_processors = 1;
+
+			/* Copy known information from processor to cluster, and
+			 * set the flags accordingly */
+			cluster_flags = 0;
+			if (processors[i].flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) {
+				cluster_min_frequency = processors[i].min_frequency;
+				cluster_flags |= CPUINFO_LINUX_FLAG_MIN_FREQUENCY;
+			}
+			if (processors[i].flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
+				cluster_max_frequency = processors[i].max_frequency;
+				cluster_flags |= CPUINFO_LINUX_FLAG_MAX_FREQUENCY;
+			}
+			if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) {
+				cluster_midr = midr_copy_implementer(cluster_midr, processors[i].midr);
+				cluster_flags |= CPUINFO_ARM_LINUX_VALID_IMPLEMENTER;
+			}
+			if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_VARIANT) {
+				cluster_midr = midr_copy_variant(cluster_midr, processors[i].midr);
+				cluster_flags |= CPUINFO_ARM_LINUX_VALID_VARIANT;
+			}
+			if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_PART) {
+				cluster_midr = midr_copy_part(cluster_midr, processors[i].midr);
+				cluster_flags |= CPUINFO_ARM_LINUX_VALID_PART;
+			}
+			if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_REVISION) {
+				cluster_midr = midr_copy_revision(cluster_midr, processors[i].midr);
+				cluster_flags |= CPUINFO_ARM_LINUX_VALID_REVISION;
+			}
+		}
+	}
+}
+
+/*
+ * Counts the number of logical processors in each core cluster.
+ * This function should be called after all processors are assigned to core
+ * clusters.
+ *
+ * @param max_processors - number of elements in the @p processors array.
+ * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE
+ * and PRESENT flags, and decoded core cluster (package_leader_id) information.
+ *                             The function expects the value of
+ * processors[i].package_processor_count to be zero. Upon return,
+ * processors[i].package_processor_count will contain the number of logical
+ *                             processors in the respective core cluster.
+ */
+void cpuinfo_arm_linux_count_cluster_processors(
+	uint32_t max_processors,
+	struct cpuinfo_arm_linux_processor processors[restrict static max_processors]) {
+	/* First pass: accumulate the number of processors at the group leader's
+	 * package_processor_count */
+	for (uint32_t i = 0; i < max_processors; i++) {
+		if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			const uint32_t package_leader_id = processors[i].package_leader_id;
+			processors[package_leader_id].package_processor_count += 1;
+		}
+	}
+	/* Second pass: copy the package_processor_count from the group leader
+	 * processor */
+	for (uint32_t i = 0; i < max_processors; i++) {
+		if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			const uint32_t package_leader_id = processors[i].package_leader_id;
+			processors[i].package_processor_count = processors[package_leader_id].package_processor_count;
+		}
+	}
+}
--- a/3rdparty/cpuinfo/src/arm/linux/cp.h
+++ b/3rdparty/cpuinfo/src/arm/linux/cp.h
@@ -0,0 +1,49 @@
+#include <stdint.h>
+
+#if CPUINFO_MOCK
+extern uint32_t cpuinfo_arm_fpsid;
+extern uint32_t cpuinfo_arm_mvfr0;
+extern uint32_t cpuinfo_arm_wcid;
+
+static inline uint32_t read_fpsid(void) {
+	return cpuinfo_arm_fpsid;
+}
+
+static inline uint32_t read_mvfr0(void) {
+	return cpuinfo_arm_mvfr0;
+}
+
+static inline uint32_t read_wcid(void) {
+	return cpuinfo_arm_wcid;
+}
+#else
+#if !defined(__ARM_ARCH_7A__) && !defined(__ARM_ARCH_8A__) && !(defined(__ARM_ARCH) && (__ARM_ARCH >= 7))
+/*
+ * CoProcessor 10 is inaccessible from user mode since ARMv7,
+ * and clang refuses to compile inline assembly when targeting ARMv7+
+ */
+static inline uint32_t read_fpsid(void) {
+	uint32_t fpsid;
+	__asm__ __volatile__("MRC p10, 0x7, %[fpsid], cr0, cr0, 0" : [fpsid] "=r"(fpsid));
+	return fpsid;
+}
+
+static inline uint32_t read_mvfr0(void) {
+	uint32_t mvfr0;
+	__asm__ __volatile__("MRC p10, 0x7, %[mvfr0], cr7, cr0, 0" : [mvfr0] "=r"(mvfr0));
+	return mvfr0;
+}
+#endif
+#if !defined(__ARM_ARCH_8A__) && !(defined(__ARM_ARCH) && (__ARM_ARCH >= 8))
+/*
+ * In ARMv8, AArch32 state supports only conceptual coprocessors CP10, CP11,
+ * CP14, and CP15. AArch64 does not support the concept of coprocessors. and
+ * clang refuses to compile inline assembly when targeting ARMv8+
+ */
+static inline uint32_t read_wcid(void) {
+	uint32_t wcid;
+	__asm__ __volatile__("MRC p1, 0, %[wcid], c0, c0" : [wcid] "=r"(wcid));
+	return wcid;
+}
+#endif
+#endif
--- a/3rdparty/cpuinfo/src/arm/linux/cpuinfo.c
+++ b/3rdparty/cpuinfo/src/arm/linux/cpuinfo.c
--- a/3rdparty/cpuinfo/src/arm/linux/hwcap.c
+++ b/3rdparty/cpuinfo/src/arm/linux/hwcap.c
@@ -0,0 +1,154 @@
+#include <limits.h>
+#include <string.h>
+
+#include <dlfcn.h>
+#include <elf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#if CPUINFO_MOCK
+#include <cpuinfo-mock.h>
+#endif
+#include <arm/linux/api.h>
+#include <cpuinfo.h>
+#include <cpuinfo/log.h>
+
+#if CPUINFO_ARCH_ARM64 ||                                                     \
+	CPUINFO_ARCH_ARM && defined(__GLIBC__) && defined(__GLIBC_MINOR__) && \
+		(__GLIBC__ > 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ >= 16)
+#include <sys/auxv.h>
+#else
+#define AT_HWCAP 16
+#define AT_HWCAP2 26
+#endif
+
+#if CPUINFO_MOCK
+static uint32_t mock_hwcap = 0;
+void cpuinfo_set_hwcap(uint32_t hwcap) {
+	mock_hwcap = hwcap;
+}
+
+static uint64_t mock_hwcap2 = 0;
+void cpuinfo_set_hwcap2(uint64_t hwcap2) {
+	mock_hwcap2 = hwcap2;
+}
+#endif
+
+#if CPUINFO_ARCH_ARM
+typedef unsigned long (*getauxval_function_t)(unsigned long);
+
+bool cpuinfo_arm_linux_hwcap_from_getauxval(uint32_t hwcap[restrict static 1], uint64_t hwcap2[restrict static 1]) {
+#if CPUINFO_MOCK
+	*hwcap = mock_hwcap;
+	*hwcap2 = mock_hwcap2;
+	return true;
+#elif defined(__ANDROID__)
+	/* Android: dynamically check if getauxval is supported */
+	void* libc = NULL;
+	getauxval_function_t getauxval = NULL;
+
+	dlerror();
+	libc = dlopen("libc.so", RTLD_LAZY);
+	if (libc == NULL) {
+		cpuinfo_log_warning("failed to load libc.so: %s", dlerror());
+		goto cleanup;
+	}
+
+	getauxval = (getauxval_function_t)dlsym(libc, "getauxval");
+	if (getauxval == NULL) {
+		cpuinfo_log_info("failed to locate getauxval in libc.so: %s", dlerror());
+		goto cleanup;
+	}
+
+	*hwcap = getauxval(AT_HWCAP);
+	*hwcap2 = getauxval(AT_HWCAP2);
+
+cleanup:
+	if (libc != NULL) {
+		dlclose(libc);
+		libc = NULL;
+	}
+	return getauxval != NULL;
+#elif defined(__GLIBC__) && defined(__GLIBC_MINOR__) && (__GLIBC__ > 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ >= 16)
+	/* GNU/Linux: getauxval is supported since glibc-2.16 */
+	*hwcap = getauxval(AT_HWCAP);
+	*hwcap2 = getauxval(AT_HWCAP2);
+	return true;
+#else
+	return false;
+#endif
+}
+
+#ifdef __ANDROID__
+bool cpuinfo_arm_linux_hwcap_from_procfs(uint32_t hwcap[restrict static 1], uint64_t hwcap2[restrict static 1]) {
+#if CPUINFO_MOCK
+	*hwcap = mock_hwcap;
+	*hwcap2 = mock_hwcap2;
+	return true;
+#else
+	uint64_t hwcaps[2] = {0, 0};
+	bool result = false;
+	int file = -1;
+
+	file = open("/proc/self/auxv", O_RDONLY);
+	if (file == -1) {
+		cpuinfo_log_warning("failed to open /proc/self/auxv: %s", strerror(errno));
+		goto cleanup;
+	}
+
+	ssize_t bytes_read;
+	do {
+		Elf32_auxv_t elf_auxv;
+		bytes_read = read(file, &elf_auxv, sizeof(Elf32_auxv_t));
+		if (bytes_read < 0) {
+			cpuinfo_log_warning("failed to read /proc/self/auxv: %s", strerror(errno));
+			goto cleanup;
+		} else if (bytes_read > 0) {
+			if (bytes_read == sizeof(elf_auxv)) {
+				switch (elf_auxv.a_type) {
+					case AT_HWCAP:
+						hwcaps[0] = (uint32_t)elf_auxv.a_un.a_val;
+						break;
+					case AT_HWCAP2:
+						hwcaps[1] = (uint64_t)elf_auxv.a_un.a_val;
+						break;
+				}
+			} else {
+				cpuinfo_log_warning(
+					"failed to read %zu bytes from /proc/self/auxv: %zu bytes available",
+					sizeof(elf_auxv),
+					(size_t)bytes_read);
+				goto cleanup;
+			}
+		}
+	} while (bytes_read == sizeof(Elf32_auxv_t));
+
+	/* Success, commit results */
+	*hwcap = hwcaps[0];
+	*hwcap2 = hwcaps[1];
+	result = true;
+
+cleanup:
+	if (file != -1) {
+		close(file);
+		file = -1;
+	}
+	return result;
+#endif
+}
+#endif /* __ANDROID__ */
+#elif CPUINFO_ARCH_ARM64
+void cpuinfo_arm_linux_hwcap_from_getauxval(uint32_t hwcap[restrict static 1], uint64_t hwcap2[restrict static 1]) {
+#if CPUINFO_MOCK
+	*hwcap = mock_hwcap;
+	*hwcap2 = mock_hwcap2;
+#else
+	*hwcap = (uint32_t)getauxval(AT_HWCAP);
+	*hwcap2 = (uint64_t)getauxval(AT_HWCAP2);
+	return;
+#endif
+}
+#endif
--- a/3rdparty/cpuinfo/src/arm/linux/init.c
+++ b/3rdparty/cpuinfo/src/arm/linux/init.c
@@ -0,0 +1,888 @@
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <arm/linux/api.h>
+#include <cpuinfo.h>
+#if defined(__ANDROID__)
+#include <arm/android/api.h>
+#endif
+#include <arm/api.h>
+#include <arm/midr.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+#include <linux/api.h>
+
+struct cpuinfo_arm_isa cpuinfo_isa = {0};
+
+static struct cpuinfo_package package = {{0}};
+
+static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) {
+	return (bitfield & mask) == mask;
+}
+
+static inline uint32_t min(uint32_t a, uint32_t b) {
+	return a < b ? a : b;
+}
+
+static inline int cmp(uint32_t a, uint32_t b) {
+	return (a > b) - (a < b);
+}
+
+static bool cluster_siblings_parser(
+	uint32_t processor,
+	uint32_t siblings_start,
+	uint32_t siblings_end,
+	struct cpuinfo_arm_linux_processor* processors) {
+	processors[processor].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER;
+	uint32_t package_leader_id = processors[processor].package_leader_id;
+
+	for (uint32_t sibling = siblings_start; sibling < siblings_end; sibling++) {
+		if (!bitmask_all(processors[sibling].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			cpuinfo_log_info(
+				"invalid processor %" PRIu32 " reported as a sibling for processor %" PRIu32,
+				sibling,
+				processor);
+			continue;
+		}
+
+		const uint32_t sibling_package_leader_id = processors[sibling].package_leader_id;
+		if (sibling_package_leader_id < package_leader_id) {
+			package_leader_id = sibling_package_leader_id;
+		}
+
+		processors[sibling].package_leader_id = package_leader_id;
+		processors[sibling].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER;
+	}
+
+	processors[processor].package_leader_id = package_leader_id;
+
+	return true;
+}
+
+static int cmp_arm_linux_processor(const void* ptr_a, const void* ptr_b) {
+	const struct cpuinfo_arm_linux_processor* processor_a = (const struct cpuinfo_arm_linux_processor*)ptr_a;
+	const struct cpuinfo_arm_linux_processor* processor_b = (const struct cpuinfo_arm_linux_processor*)ptr_b;
+
+	/* Move usable processors towards the start of the array */
+	const bool usable_a = bitmask_all(processor_a->flags, CPUINFO_LINUX_FLAG_VALID);
+	const bool usable_b = bitmask_all(processor_b->flags, CPUINFO_LINUX_FLAG_VALID);
+	if (usable_a != usable_b) {
+		return (int)usable_b - (int)usable_a;
+	}
+
+	/* Compare based on core type (e.g. Cortex-A57 < Cortex-A53) */
+	const uint32_t midr_a = processor_a->midr;
+	const uint32_t midr_b = processor_b->midr;
+	if (midr_a != midr_b) {
+		const uint32_t score_a = midr_score_core(midr_a);
+		const uint32_t score_b = midr_score_core(midr_b);
+		if (score_a != score_b) {
+			return score_a > score_b ? -1 : 1;
+		}
+	}
+
+	/* Compare based on core frequency (e.g. 2.0 GHz < 1.2 GHz) */
+	const uint32_t frequency_a = processor_a->max_frequency;
+	const uint32_t frequency_b = processor_b->max_frequency;
+	if (frequency_a != frequency_b) {
+		return frequency_a > frequency_b ? -1 : 1;
+	}
+
+	/* Compare based on cluster leader id (i.e. cluster 1 < cluster 0) */
+	const uint32_t cluster_a = processor_a->package_leader_id;
+	const uint32_t cluster_b = processor_b->package_leader_id;
+	if (cluster_a != cluster_b) {
+		return cluster_a > cluster_b ? -1 : 1;
+	}
+
+	/* Compare based on system processor id (i.e. processor 0 < processor 1)
+	 */
+	const uint32_t id_a = processor_a->system_processor_id;
+	const uint32_t id_b = processor_b->system_processor_id;
+	return cmp(id_a, id_b);
+}
+
+void cpuinfo_arm_linux_init(void) {
+	struct cpuinfo_arm_linux_processor* arm_linux_processors = NULL;
+	struct cpuinfo_processor* processors = NULL;
+	struct cpuinfo_core* cores = NULL;
+	struct cpuinfo_cluster* clusters = NULL;
+	struct cpuinfo_uarch_info* uarchs = NULL;
+	struct cpuinfo_cache* l1i = NULL;
+	struct cpuinfo_cache* l1d = NULL;
+	struct cpuinfo_cache* l2 = NULL;
+	struct cpuinfo_cache* l3 = NULL;
+	const struct cpuinfo_processor** linux_cpu_to_processor_map = NULL;
+	const struct cpuinfo_core** linux_cpu_to_core_map = NULL;
+	uint32_t* linux_cpu_to_uarch_index_map = NULL;
+
+	const uint32_t max_processors_count = cpuinfo_linux_get_max_processors_count();
+	cpuinfo_log_debug("system maximum processors count: %" PRIu32, max_processors_count);
+
+	const uint32_t max_possible_processors_count =
+		1 + cpuinfo_linux_get_max_possible_processor(max_processors_count);
+	cpuinfo_log_debug("maximum possible processors count: %" PRIu32, max_possible_processors_count);
+	const uint32_t max_present_processors_count = 1 + cpuinfo_linux_get_max_present_processor(max_processors_count);
+	cpuinfo_log_debug("maximum present processors count: %" PRIu32, max_present_processors_count);
+
+	uint32_t valid_processor_mask = 0;
+	uint32_t arm_linux_processors_count = max_processors_count;
+	if (max_present_processors_count != 0) {
+		arm_linux_processors_count = min(arm_linux_processors_count, max_present_processors_count);
+		valid_processor_mask = CPUINFO_LINUX_FLAG_PRESENT;
+	}
+	if (max_possible_processors_count != 0) {
+		arm_linux_processors_count = min(arm_linux_processors_count, max_possible_processors_count);
+		valid_processor_mask |= CPUINFO_LINUX_FLAG_POSSIBLE;
+	}
+	if ((max_present_processors_count | max_possible_processors_count) == 0) {
+		cpuinfo_log_error("failed to parse both lists of possible and present processors");
+		return;
+	}
+
+	arm_linux_processors = calloc(arm_linux_processors_count, sizeof(struct cpuinfo_arm_linux_processor));
+	if (arm_linux_processors == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " ARM logical processors",
+			arm_linux_processors_count * sizeof(struct cpuinfo_arm_linux_processor),
+			arm_linux_processors_count);
+		return;
+	}
+
+	if (max_possible_processors_count) {
+		cpuinfo_linux_detect_possible_processors(
+			arm_linux_processors_count,
+			&arm_linux_processors->flags,
+			sizeof(struct cpuinfo_arm_linux_processor),
+			CPUINFO_LINUX_FLAG_POSSIBLE);
+	}
+
+	if (max_present_processors_count) {
+		cpuinfo_linux_detect_present_processors(
+			arm_linux_processors_count,
+			&arm_linux_processors->flags,
+			sizeof(struct cpuinfo_arm_linux_processor),
+			CPUINFO_LINUX_FLAG_PRESENT);
+	}
+
+#if defined(__ANDROID__)
+	struct cpuinfo_android_properties android_properties;
+	cpuinfo_arm_android_parse_properties(&android_properties);
+#else
+	char proc_cpuinfo_hardware[CPUINFO_HARDWARE_VALUE_MAX];
+#endif
+	char proc_cpuinfo_revision[CPUINFO_REVISION_VALUE_MAX];
+
+	if (!cpuinfo_arm_linux_parse_proc_cpuinfo(
+#if defined(__ANDROID__)
+		    android_properties.proc_cpuinfo_hardware,
+#else
+		    proc_cpuinfo_hardware,
+#endif
+		    proc_cpuinfo_revision,
+		    arm_linux_processors_count,
+		    arm_linux_processors)) {
+		cpuinfo_log_error("failed to parse processor information from /proc/cpuinfo");
+		return;
+	}
+
+	for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+		if (bitmask_all(arm_linux_processors[i].flags, valid_processor_mask)) {
+			arm_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_VALID;
+			cpuinfo_log_debug(
+				"parsed processor %" PRIu32 " MIDR 0x%08" PRIx32, i, arm_linux_processors[i].midr);
+		}
+	}
+
+	uint32_t valid_processors = 0, last_midr = 0;
+#if CPUINFO_ARCH_ARM
+	uint32_t last_architecture_version = 0, last_architecture_flags = 0;
+#endif
+	for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+		arm_linux_processors[i].system_processor_id = i;
+		if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			if (arm_linux_processors[i].flags & CPUINFO_ARM_LINUX_VALID_PROCESSOR) {
+				/*
+				 * Processor is in possible and present lists,
+				 * and also reported in /proc/cpuinfo. This
+				 * processor is availble for compute.
+				 */
+				valid_processors += 1;
+			} else {
+				/*
+				 * Processor is in possible and present lists,
+				 * but not reported in /proc/cpuinfo. This is
+				 * fairly common: high-index processors can be
+				 * not reported if they are offline.
+				 */
+				cpuinfo_log_info("processor %" PRIu32 " is not listed in /proc/cpuinfo", i);
+			}
+
+			if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_ARM_LINUX_VALID_MIDR)) {
+				last_midr = arm_linux_processors[i].midr;
+			}
+#if CPUINFO_ARCH_ARM
+			if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_ARM_LINUX_VALID_ARCHITECTURE)) {
+				last_architecture_version = arm_linux_processors[i].architecture_version;
+				last_architecture_flags = arm_linux_processors[i].architecture_flags;
+			}
+#endif
+		} else {
+			/* Processor reported in /proc/cpuinfo, but not in
+			 * possible and/or present lists: log and ignore */
+			if (!(arm_linux_processors[i].flags & CPUINFO_ARM_LINUX_VALID_PROCESSOR)) {
+				cpuinfo_log_warning("invalid processor %" PRIu32 " reported in /proc/cpuinfo", i);
+			}
+		}
+	}
+
+#if defined(__ANDROID__)
+	const struct cpuinfo_arm_chipset chipset =
+		cpuinfo_arm_android_decode_chipset(&android_properties, valid_processors, 0);
+#else
+	const struct cpuinfo_arm_chipset chipset =
+		cpuinfo_arm_linux_decode_chipset(proc_cpuinfo_hardware, proc_cpuinfo_revision, valid_processors, 0);
+#endif
+
+#if CPUINFO_ARCH_ARM
+	uint32_t isa_features = 0;
+	uint64_t isa_features2 = 0;
+#ifdef __ANDROID__
+	/*
+	 * On Android before API 20, libc.so does not provide getauxval
+	 * function. Thus, we try to dynamically find it, or use two fallback
+	 * mechanisms:
+	 * 1. dlopen libc.so, and try to find getauxval
+	 * 2. Parse /proc/self/auxv procfs file
+	 * 3. Use features reported in /proc/cpuinfo
+	 */
+	if (!cpuinfo_arm_linux_hwcap_from_getauxval(&isa_features, &isa_features2)) {
+		/* getauxval can't be used, fall back to parsing /proc/self/auxv
+		 */
+		if (!cpuinfo_arm_linux_hwcap_from_procfs(&isa_features, &isa_features2)) {
+			/*
+			 * Reading /proc/self/auxv failed, probably due to file
+			 * permissions. Use information from /proc/cpuinfo to
+			 * detect ISA.
+			 *
+			 * If different processors report different ISA
+			 * features, take the intersection.
+			 */
+			uint32_t processors_with_features = 0;
+			for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+				if (bitmask_all(
+					    arm_linux_processors[i].flags,
+					    CPUINFO_LINUX_FLAG_VALID | CPUINFO_ARM_LINUX_VALID_FEATURES)) {
+					if (processors_with_features == 0) {
+						isa_features = arm_linux_processors[i].features;
+						isa_features2 = arm_linux_processors[i].features2;
+					} else {
+						isa_features &= arm_linux_processors[i].features;
+						isa_features2 &= arm_linux_processors[i].features2;
+					}
+					processors_with_features += 1;
+				}
+			}
+		}
+	}
+#else
+	/* On GNU/Linux getauxval is always available */
+	cpuinfo_arm_linux_hwcap_from_getauxval(&isa_features, &isa_features2);
+#endif
+	cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
+		isa_features,
+		isa_features2,
+		last_midr,
+		last_architecture_version,
+		last_architecture_flags,
+		&chipset,
+		&cpuinfo_isa);
+#elif CPUINFO_ARCH_ARM64
+	uint32_t isa_features = 0;
+	uint64_t isa_features2 = 0;
+	/* getauxval is always available on ARM64 Android */
+	cpuinfo_arm_linux_hwcap_from_getauxval(&isa_features, &isa_features2);
+	cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
+		isa_features, isa_features2, last_midr, &chipset, &cpuinfo_isa);
+#endif
+
+	/* Detect min/max frequency and package ID */
+	for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+		if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			const uint32_t max_frequency = cpuinfo_linux_get_processor_max_frequency(i);
+			if (max_frequency != 0) {
+				arm_linux_processors[i].max_frequency = max_frequency;
+				arm_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_MAX_FREQUENCY;
+			}
+
+			const uint32_t min_frequency = cpuinfo_linux_get_processor_min_frequency(i);
+			if (min_frequency != 0) {
+				arm_linux_processors[i].min_frequency = min_frequency;
+				arm_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_MIN_FREQUENCY;
+			}
+
+			if (cpuinfo_linux_get_processor_package_id(i, &arm_linux_processors[i].package_id)) {
+				arm_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_PACKAGE_ID;
+			}
+		}
+	}
+
+	/* Initialize topology group IDs */
+	for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+		arm_linux_processors[i].package_leader_id = i;
+	}
+
+	/* Propagate topology group IDs among siblings */
+	bool detected_core_siblings_list_node = false;
+	bool detected_cluster_cpus_list_node = false;
+	for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+		if (!bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			continue;
+		}
+
+		if (!bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_PACKAGE_ID)) {
+			continue;
+		}
+
+		/* Use the cluster_cpus_list topology node if available. If not
+		 * found, cache the result to avoid repeatedly attempting to
+		 * read the non-existent paths.
+		 * */
+		if (!detected_core_siblings_list_node && !detected_cluster_cpus_list_node) {
+			if (cpuinfo_linux_detect_cluster_cpus(
+				    arm_linux_processors_count,
+				    i,
+				    (cpuinfo_siblings_callback)cluster_siblings_parser,
+				    arm_linux_processors)) {
+				detected_cluster_cpus_list_node = true;
+				continue;
+			} else {
+				detected_core_siblings_list_node = true;
+			}
+		}
+
+		/* The cached result above will guarantee only one of the blocks
+		 * below will execute, with a bias towards cluster_cpus_list.
+		 **/
+		if (detected_core_siblings_list_node) {
+			cpuinfo_linux_detect_core_siblings(
+				arm_linux_processors_count,
+				i,
+				(cpuinfo_siblings_callback)cluster_siblings_parser,
+				arm_linux_processors);
+		}
+
+		if (detected_cluster_cpus_list_node) {
+			cpuinfo_linux_detect_cluster_cpus(
+				arm_linux_processors_count,
+				i,
+				(cpuinfo_siblings_callback)cluster_siblings_parser,
+				arm_linux_processors);
+		}
+	}
+
+	/* Propagate all cluster IDs */
+	uint32_t clustered_processors = 0;
+	for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+		if (bitmask_all(
+			    arm_linux_processors[i].flags,
+			    CPUINFO_LINUX_FLAG_VALID | CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER)) {
+			clustered_processors += 1;
+
+			const uint32_t package_leader_id = arm_linux_processors[i].package_leader_id;
+			if (package_leader_id < i) {
+				arm_linux_processors[i].package_leader_id =
+					arm_linux_processors[package_leader_id].package_leader_id;
+			}
+
+			cpuinfo_log_debug(
+				"processor %" PRIu32 " clustered with processor %" PRIu32
+				" as inferred from system siblings lists",
+				i,
+				arm_linux_processors[i].package_leader_id);
+		}
+	}
+
+	if (clustered_processors != valid_processors) {
+		/*
+		 * Topology information about some or all logical processors may
+		 * be unavailable, for the following reasons:
+		 * - Linux kernel is too old, or configured without support for
+		 * topology information in sysfs.
+		 * - Core is offline, and Linux kernel is configured to not
+		 * report topology for offline cores.
+		 *
+		 * In this case, we assign processors to clusters using two
+		 * methods:
+		 * - Try heuristic cluster configurations (e.g. 6-core SoC
+		 * usually has 4+2 big.LITTLE configuration).
+		 * - If heuristic failed, assign processors to core clusters in
+		 * a sequential scan.
+		 */
+		if (!cpuinfo_arm_linux_detect_core_clusters_by_heuristic(
+			    valid_processors, arm_linux_processors_count, arm_linux_processors)) {
+			cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan(
+				arm_linux_processors_count, arm_linux_processors);
+		}
+	}
+
+	cpuinfo_arm_linux_count_cluster_processors(arm_linux_processors_count, arm_linux_processors);
+
+	const uint32_t cluster_count = cpuinfo_arm_linux_detect_cluster_midr(
+		&chipset, arm_linux_processors_count, valid_processors, arm_linux_processors);
+
+	/* Initialize core vendor, uarch, MIDR, and frequency for every logical
+	 * processor */
+	for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+		if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			const uint32_t cluster_leader = arm_linux_processors[i].package_leader_id;
+			if (cluster_leader == i) {
+				/* Cluster leader: decode core vendor and uarch
+				 */
+				cpuinfo_arm_decode_vendor_uarch(
+					arm_linux_processors[cluster_leader].midr,
+#if CPUINFO_ARCH_ARM
+					!!(arm_linux_processors[cluster_leader].features &
+					   CPUINFO_ARM_LINUX_FEATURE_VFPV4),
+#endif
+					&arm_linux_processors[cluster_leader].vendor,
+					&arm_linux_processors[cluster_leader].uarch);
+			} else {
+				/* Cluster non-leader: copy vendor, uarch, MIDR,
+				 * and frequency from cluster leader */
+				arm_linux_processors[i].flags |= arm_linux_processors[cluster_leader].flags &
+					(CPUINFO_ARM_LINUX_VALID_MIDR | CPUINFO_LINUX_FLAG_MAX_FREQUENCY);
+				arm_linux_processors[i].midr = arm_linux_processors[cluster_leader].midr;
+				arm_linux_processors[i].vendor = arm_linux_processors[cluster_leader].vendor;
+				arm_linux_processors[i].uarch = arm_linux_processors[cluster_leader].uarch;
+				arm_linux_processors[i].max_frequency =
+					arm_linux_processors[cluster_leader].max_frequency;
+			}
+		}
+	}
+
+	for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+		if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			cpuinfo_log_debug(
+				"post-analysis processor %" PRIu32 ": MIDR %08" PRIx32 " frequency %" PRIu32,
+				i,
+				arm_linux_processors[i].midr,
+				arm_linux_processors[i].max_frequency);
+		}
+	}
+
+	qsort(arm_linux_processors,
+	      arm_linux_processors_count,
+	      sizeof(struct cpuinfo_arm_linux_processor),
+	      cmp_arm_linux_processor);
+
+	for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+		if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			cpuinfo_log_debug(
+				"post-sort processor %" PRIu32 ": system id %" PRIu32 " MIDR %08" PRIx32
+				" frequency %" PRIu32,
+				i,
+				arm_linux_processors[i].system_processor_id,
+				arm_linux_processors[i].midr,
+				arm_linux_processors[i].max_frequency);
+		}
+	}
+
+	uint32_t uarchs_count = 0;
+	enum cpuinfo_uarch last_uarch;
+	for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+		if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			if (uarchs_count == 0 || arm_linux_processors[i].uarch != last_uarch) {
+				last_uarch = arm_linux_processors[i].uarch;
+				uarchs_count += 1;
+			}
+			arm_linux_processors[i].uarch_index = uarchs_count - 1;
+		}
+	}
+
+	/*
+	 * Assumptions:
+	 * - No SMP (i.e. each core supports only one hardware thread).
+	 * - Level 1 instruction and data caches are private to the core
+	 * clusters.
+	 * - Level 2 and level 3 cache is shared between cores in the same
+	 * cluster.
+	 */
+	cpuinfo_arm_chipset_to_string(&chipset, package.name);
+	package.processor_count = valid_processors;
+	package.core_count = valid_processors;
+	package.cluster_count = cluster_count;
+
+	processors = calloc(valid_processors, sizeof(struct cpuinfo_processor));
+	if (processors == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " logical processors",
+			valid_processors * sizeof(struct cpuinfo_processor),
+			valid_processors);
+		goto cleanup;
+	}
+
+	cores = calloc(valid_processors, sizeof(struct cpuinfo_core));
+	if (cores == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " cores",
+			valid_processors * sizeof(struct cpuinfo_core),
+			valid_processors);
+		goto cleanup;
+	}
+
+	clusters = calloc(cluster_count, sizeof(struct cpuinfo_cluster));
+	if (clusters == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " core clusters",
+			cluster_count * sizeof(struct cpuinfo_cluster),
+			cluster_count);
+		goto cleanup;
+	}
+
+	uarchs = calloc(uarchs_count, sizeof(struct cpuinfo_uarch_info));
+	if (uarchs == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " microarchitectures",
+			uarchs_count * sizeof(struct cpuinfo_uarch_info),
+			uarchs_count);
+		goto cleanup;
+	}
+
+	linux_cpu_to_processor_map = calloc(arm_linux_processors_count, sizeof(struct cpuinfo_processor*));
+	if (linux_cpu_to_processor_map == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for %" PRIu32 " logical processor mapping entries",
+			arm_linux_processors_count * sizeof(struct cpuinfo_processor*),
+			arm_linux_processors_count);
+		goto cleanup;
+	}
+
+	linux_cpu_to_core_map = calloc(arm_linux_processors_count, sizeof(struct cpuinfo_core*));
+	if (linux_cpu_to_core_map == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for %" PRIu32 " core mapping entries",
+			arm_linux_processors_count * sizeof(struct cpuinfo_core*),
+			arm_linux_processors_count);
+		goto cleanup;
+	}
+
+	if (uarchs_count > 1) {
+		linux_cpu_to_uarch_index_map = calloc(arm_linux_processors_count, sizeof(uint32_t));
+		if (linux_cpu_to_uarch_index_map == NULL) {
+			cpuinfo_log_error(
+				"failed to allocate %zu bytes for %" PRIu32 " uarch index mapping entries",
+				arm_linux_processors_count * sizeof(uint32_t),
+				arm_linux_processors_count);
+			goto cleanup;
+		}
+	}
+
+	l1i = calloc(valid_processors, sizeof(struct cpuinfo_cache));
+	if (l1i == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " L1I caches",
+			valid_processors * sizeof(struct cpuinfo_cache),
+			valid_processors);
+		goto cleanup;
+	}
+
+	l1d = calloc(valid_processors, sizeof(struct cpuinfo_cache));
+	if (l1d == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " L1D caches",
+			valid_processors * sizeof(struct cpuinfo_cache),
+			valid_processors);
+		goto cleanup;
+	}
+
+	uint32_t uarchs_index = 0;
+	for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+		if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			if (uarchs_index == 0 || arm_linux_processors[i].uarch != last_uarch) {
+				last_uarch = arm_linux_processors[i].uarch;
+				uarchs[uarchs_index] = (struct cpuinfo_uarch_info){
+					.uarch = arm_linux_processors[i].uarch,
+					.midr = arm_linux_processors[i].midr,
+				};
+				uarchs_index += 1;
+			}
+			uarchs[uarchs_index - 1].processor_count += 1;
+			uarchs[uarchs_index - 1].core_count += 1;
+		}
+	}
+
+	uint32_t l2_count = 0, l3_count = 0, big_l3_size = 0, cluster_id = UINT32_MAX;
+	/* Indication whether L3 (if it exists) is shared between all cores */
+	bool shared_l3 = true;
+	/* Populate cache information structures in l1i, l1d */
+	for (uint32_t i = 0; i < valid_processors; i++) {
+		if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) {
+			cluster_id += 1;
+			clusters[cluster_id] = (struct cpuinfo_cluster){
+				.processor_start = i,
+				.processor_count = arm_linux_processors[i].package_processor_count,
+				.core_start = i,
+				.core_count = arm_linux_processors[i].package_processor_count,
+				.cluster_id = cluster_id,
+				.package = &package,
+				.vendor = arm_linux_processors[i].vendor,
+				.uarch = arm_linux_processors[i].uarch,
+				.midr = arm_linux_processors[i].midr,
+			};
+		}
+
+		processors[i].smt_id = 0;
+		processors[i].core = cores + i;
+		processors[i].cluster = clusters + cluster_id;
+		processors[i].package = &package;
+		processors[i].linux_id = (int)arm_linux_processors[i].system_processor_id;
+		processors[i].cache.l1i = l1i + i;
+		processors[i].cache.l1d = l1d + i;
+		linux_cpu_to_processor_map[arm_linux_processors[i].system_processor_id] = &processors[i];
+
+		cores[i].processor_start = i;
+		cores[i].processor_count = 1;
+		cores[i].core_id = i;
+		cores[i].cluster = clusters + cluster_id;
+		cores[i].package = &package;
+		cores[i].vendor = arm_linux_processors[i].vendor;
+		cores[i].uarch = arm_linux_processors[i].uarch;
+		cores[i].midr = arm_linux_processors[i].midr;
+		linux_cpu_to_core_map[arm_linux_processors[i].system_processor_id] = &cores[i];
+
+		if (linux_cpu_to_uarch_index_map != NULL) {
+			linux_cpu_to_uarch_index_map[arm_linux_processors[i].system_processor_id] =
+				arm_linux_processors[i].uarch_index;
+		}
+
+		struct cpuinfo_cache temp_l2 = {0}, temp_l3 = {0};
+		cpuinfo_arm_decode_cache(
+			arm_linux_processors[i].uarch,
+			arm_linux_processors[i].package_processor_count,
+			arm_linux_processors[i].midr,
+			&chipset,
+			cluster_id,
+			arm_linux_processors[i].architecture_version,
+			&l1i[i],
+			&l1d[i],
+			&temp_l2,
+			&temp_l3);
+		l1i[i].processor_start = l1d[i].processor_start = i;
+		l1i[i].processor_count = l1d[i].processor_count = 1;
+#if CPUINFO_ARCH_ARM
+		/* L1I reported in /proc/cpuinfo overrides defaults */
+		if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_ARM_LINUX_VALID_ICACHE)) {
+			l1i[i] = (struct cpuinfo_cache){
+				.size = arm_linux_processors[i].proc_cpuinfo_cache.i_size,
+				.associativity = arm_linux_processors[i].proc_cpuinfo_cache.i_assoc,
+				.sets = arm_linux_processors[i].proc_cpuinfo_cache.i_sets,
+				.partitions = 1,
+				.line_size = arm_linux_processors[i].proc_cpuinfo_cache.i_line_length};
+		}
+		/* L1D reported in /proc/cpuinfo overrides defaults */
+		if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_ARM_LINUX_VALID_DCACHE)) {
+			l1d[i] = (struct cpuinfo_cache){
+				.size = arm_linux_processors[i].proc_cpuinfo_cache.d_size,
+				.associativity = arm_linux_processors[i].proc_cpuinfo_cache.d_assoc,
+				.sets = arm_linux_processors[i].proc_cpuinfo_cache.d_sets,
+				.partitions = 1,
+				.line_size = arm_linux_processors[i].proc_cpuinfo_cache.d_line_length};
+		}
+#endif
+
+		if (temp_l3.size != 0) {
+			/*
+			 * Assumptions:
+			 * - L2 is private to each core
+			 * - L3 is shared by cores in the same cluster
+			 * - If cores in different clusters report the same L3,
+			 * it is shared between all cores.
+			 */
+			l2_count += 1;
+			if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) {
+				if (cluster_id == 0) {
+					big_l3_size = temp_l3.size;
+					l3_count = 1;
+				} else if (temp_l3.size != big_l3_size) {
+					/* If some cores have different L3 size,
+					 * L3 is not shared between all cores */
+					shared_l3 = false;
+					l3_count += 1;
+				}
+			}
+		} else {
+			/* If some cores don't have L3 cache, L3 is not shared
+			 * between all cores
+			 */
+			shared_l3 = false;
+			if (temp_l2.size != 0) {
+				/* Assume L2 is shared by cores in the same
+				 * cluster */
+				if (arm_linux_processors[i].package_leader_id ==
+				    arm_linux_processors[i].system_processor_id) {
+					l2_count += 1;
+				}
+			}
+		}
+	}
+
+	if (l2_count != 0) {
+		l2 = calloc(l2_count, sizeof(struct cpuinfo_cache));
+		if (l2 == NULL) {
+			cpuinfo_log_error(
+				"failed to allocate %zu bytes for descriptions of %" PRIu32 " L2 caches",
+				l2_count * sizeof(struct cpuinfo_cache),
+				l2_count);
+			goto cleanup;
+		}
+
+		if (l3_count != 0) {
+			l3 = calloc(l3_count, sizeof(struct cpuinfo_cache));
+			if (l3 == NULL) {
+				cpuinfo_log_error(
+					"failed to allocate %zu bytes for descriptions of %" PRIu32 " L3 caches",
+					l3_count * sizeof(struct cpuinfo_cache),
+					l3_count);
+				goto cleanup;
+			}
+		}
+	}
+
+	cluster_id = UINT32_MAX;
+	uint32_t l2_index = UINT32_MAX, l3_index = UINT32_MAX;
+	for (uint32_t i = 0; i < valid_processors; i++) {
+		if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) {
+			cluster_id++;
+		}
+
+		struct cpuinfo_cache dummy_l1i, dummy_l1d, temp_l2 = {0}, temp_l3 = {0};
+		cpuinfo_arm_decode_cache(
+			arm_linux_processors[i].uarch,
+			arm_linux_processors[i].package_processor_count,
+			arm_linux_processors[i].midr,
+			&chipset,
+			cluster_id,
+			arm_linux_processors[i].architecture_version,
+			&dummy_l1i,
+			&dummy_l1d,
+			&temp_l2,
+			&temp_l3);
+
+		if (temp_l3.size != 0) {
+			/*
+			 * Assumptions:
+			 * - L2 is private to each core
+			 * - L3 is shared by cores in the same cluster
+			 * - If cores in different clusters report the same L3,
+			 * it is shared between all cores.
+			 */
+			l2_index += 1;
+			l2[l2_index] = (struct cpuinfo_cache){
+				.size = temp_l2.size,
+				.associativity = temp_l2.associativity,
+				.sets = temp_l2.sets,
+				.partitions = 1,
+				.line_size = temp_l2.line_size,
+				.flags = temp_l2.flags,
+				.processor_start = i,
+				.processor_count = 1,
+			};
+			processors[i].cache.l2 = l2 + l2_index;
+			if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) {
+				l3_index += 1;
+				if (l3_index < l3_count) {
+					l3[l3_index] = (struct cpuinfo_cache){
+						.size = temp_l3.size,
+						.associativity = temp_l3.associativity,
+						.sets = temp_l3.sets,
+						.partitions = 1,
+						.line_size = temp_l3.line_size,
+						.flags = temp_l3.flags,
+						.processor_start = i,
+						.processor_count = shared_l3
+							? valid_processors
+							: arm_linux_processors[i].package_processor_count,
+					};
+				}
+			}
+			if (shared_l3) {
+				processors[i].cache.l3 = l3;
+			} else if (l3_index < l3_count) {
+				processors[i].cache.l3 = l3 + l3_index;
+			}
+		} else if (temp_l2.size != 0) {
+			/* Assume L2 is shared by cores in the same cluster */
+			if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) {
+				l2_index += 1;
+				l2[l2_index] = (struct cpuinfo_cache){
+					.size = temp_l2.size,
+					.associativity = temp_l2.associativity,
+					.sets = temp_l2.sets,
+					.partitions = 1,
+					.line_size = temp_l2.line_size,
+					.flags = temp_l2.flags,
+					.processor_start = i,
+					.processor_count = arm_linux_processors[i].package_processor_count,
+				};
+			}
+			processors[i].cache.l2 = l2 + l2_index;
+		}
+	}
+
+	/* Commit */
+	cpuinfo_processors = processors;
+	cpuinfo_cores = cores;
+	cpuinfo_clusters = clusters;
+	cpuinfo_packages = &package;
+	cpuinfo_uarchs = uarchs;
+	cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
+	cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
+	cpuinfo_cache[cpuinfo_cache_level_2] = l2;
+	cpuinfo_cache[cpuinfo_cache_level_3] = l3;
+
+	cpuinfo_processors_count = valid_processors;
+	cpuinfo_cores_count = valid_processors;
+	cpuinfo_clusters_count = cluster_count;
+	cpuinfo_packages_count = 1;
+	cpuinfo_uarchs_count = uarchs_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_1i] = valid_processors;
+	cpuinfo_cache_count[cpuinfo_cache_level_1d] = valid_processors;
+	cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
+	cpuinfo_max_cache_size = cpuinfo_arm_compute_max_cache_size(&processors[0]);
+
+	cpuinfo_linux_cpu_max = arm_linux_processors_count;
+	cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map;
+	cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map;
+	cpuinfo_linux_cpu_to_uarch_index_map = linux_cpu_to_uarch_index_map;
+
+	__sync_synchronize();
+
+	cpuinfo_is_initialized = true;
+
+	processors = NULL;
+	cores = NULL;
+	clusters = NULL;
+	uarchs = NULL;
+	l1i = l1d = l2 = l3 = NULL;
+	linux_cpu_to_processor_map = NULL;
+	linux_cpu_to_core_map = NULL;
+	linux_cpu_to_uarch_index_map = NULL;
+
+cleanup:
+	free(arm_linux_processors);
+	free(processors);
+	free(cores);
+	free(clusters);
+	free(uarchs);
+	free(l1i);
+	free(l1d);
+	free(l2);
+	free(l3);
+	free(linux_cpu_to_processor_map);
+	free(linux_cpu_to_core_map);
+	free(linux_cpu_to_uarch_index_map);
+}
--- a/3rdparty/cpuinfo/src/arm/linux/midr.c
+++ b/3rdparty/cpuinfo/src/arm/linux/midr.c
--- a/3rdparty/cpuinfo/src/arm/mach/init.c
+++ b/3rdparty/cpuinfo/src/arm/mach/init.c
@@ -0,0 +1,692 @@
+#include <alloca.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <errno.h>
+#include <mach/machine.h>
+#include <sys/sysctl.h>
+#include <sys/types.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+#include <mach/api.h>
+
+/* Polyfill recent CPUFAMILY_ARM_* values for older SDKs */
+#ifndef CPUFAMILY_ARM_VORTEX_TEMPEST
+#define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07D34B9F
+#endif
+#ifndef CPUFAMILY_ARM_LIGHTNING_THUNDER
+#define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504D2
+#endif
+#ifndef CPUFAMILY_ARM_FIRESTORM_ICESTORM
+#define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1B588BB3
+#endif
+#ifndef CPUFAMILY_ARM_AVALANCHE_BLIZZARD
+#define CPUFAMILY_ARM_AVALANCHE_BLIZZARD 0xDA33D83D
+#endif
+
+struct cpuinfo_arm_isa cpuinfo_isa = {
+	.aes = true,
+	.sha1 = true,
+	.sha2 = true,
+	.pmull = true,
+	.crc32 = true,
+};
+
+static uint32_t get_sys_info(int type_specifier, const char* name) {
+	size_t size = 0;
+	uint32_t result = 0;
+	int mib[2] = {CTL_HW, type_specifier};
+	if (sysctl(mib, 2, NULL, &size, NULL, 0) != 0) {
+		cpuinfo_log_info("sysctl(\"%s\") failed: %s", name, strerror(errno));
+	} else if (size == sizeof(uint32_t)) {
+		sysctl(mib, 2, &result, &size, NULL, 0);
+		cpuinfo_log_debug("%s: %" PRIu32 ", size = %lu", name, result, size);
+	} else {
+		cpuinfo_log_info("sysctl does not support non-integer lookup for (\"%s\")", name);
+	}
+	return result;
+}
+
+static uint32_t get_sys_info_by_name(const char* type_specifier) {
+	size_t size = 0;
+	uint32_t result = 0;
+	if (sysctlbyname(type_specifier, NULL, &size, NULL, 0) != 0) {
+		cpuinfo_log_info("sysctlbyname(\"%s\") failed: %s", type_specifier, strerror(errno));
+	} else if (size == sizeof(uint32_t)) {
+		sysctlbyname(type_specifier, &result, &size, NULL, 0);
+		cpuinfo_log_debug("%s: %" PRIu32 ", size = %lu", type_specifier, result, size);
+	} else {
+		cpuinfo_log_info("sysctl does not support non-integer lookup for (\"%s\")", type_specifier);
+	}
+	return result;
+}
+
+static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t core_index, uint32_t core_count) {
+	switch (cpu_family) {
+		case CPUFAMILY_ARM_CYCLONE:
+			return cpuinfo_uarch_cyclone;
+		case CPUFAMILY_ARM_TYPHOON:
+			return cpuinfo_uarch_typhoon;
+		case CPUFAMILY_ARM_TWISTER:
+			return cpuinfo_uarch_twister;
+		case CPUFAMILY_ARM_HURRICANE:
+			return cpuinfo_uarch_hurricane;
+		case CPUFAMILY_ARM_MONSOON_MISTRAL:
+			/* 2x Monsoon + 4x Mistral cores */
+			return core_index < 2 ? cpuinfo_uarch_monsoon : cpuinfo_uarch_mistral;
+		case CPUFAMILY_ARM_VORTEX_TEMPEST:
+			/* Hexa-core: 2x Vortex + 4x Tempest; Octa-core: 4x
+			 * Cortex + 4x Tempest */
+			return core_index + 4 < core_count ? cpuinfo_uarch_vortex : cpuinfo_uarch_tempest;
+		case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+			/* Hexa-core: 2x Lightning + 4x Thunder; Octa-core
+			 * (presumed): 4x Lightning + 4x Thunder */
+			return core_index + 4 < core_count ? cpuinfo_uarch_lightning : cpuinfo_uarch_thunder;
+		case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
+			/* Hexa-core: 2x Firestorm + 4x Icestorm; Octa-core: 4x
+			 * Firestorm + 4x Icestorm */
+			return core_index + 4 < core_count ? cpuinfo_uarch_firestorm : cpuinfo_uarch_icestorm;
+		case CPUFAMILY_ARM_AVALANCHE_BLIZZARD:
+			/* Hexa-core: 2x Avalanche + 4x Blizzard */
+			return core_index + 4 < core_count ? cpuinfo_uarch_avalanche : cpuinfo_uarch_blizzard;
+		default:
+			/* Use hw.cpusubtype for detection */
+			break;
+	}
+
+	return cpuinfo_uarch_unknown;
+}
+
+static int read_package_name_from_brand_string(char* package_name) {
+	size_t size;
+	if (sysctlbyname("machdep.cpu.brand_string", NULL, &size, NULL, 0) != 0) {
+	sysctlfail:
+		cpuinfo_log_warning("sysctlbyname(\"machdep.cpu.brand_string\") failed: %s", strerror(errno));
+		return false;
+	}
+
+	char* brand_string = alloca(size);
+	if (sysctlbyname("machdep.cpu.brand_string", brand_string, &size, NULL, 0) != 0)
+		goto sysctlfail;
+	cpuinfo_log_debug("machdep.cpu.brand_string: %s", brand_string);
+
+	strlcpy(package_name, brand_string, CPUINFO_PACKAGE_NAME_MAX);
+	return true;
+}
+
+static int decode_package_name_from_hw_machine(char* package_name) {
+	size_t size;
+	if (sysctlbyname("hw.machine", NULL, &size, NULL, 0) != 0) {
+		cpuinfo_log_warning("sysctlbyname(\"hw.machine\") failed: %s", strerror(errno));
+		return false;
+	}
+
+	char* machine_name = alloca(size);
+	if (sysctlbyname("hw.machine", machine_name, &size, NULL, 0) != 0) {
+		cpuinfo_log_warning("sysctlbyname(\"hw.machine\") failed: %s", strerror(errno));
+		return false;
+	}
+	cpuinfo_log_debug("hw.machine: %s", machine_name);
+
+	char name[10];
+	uint32_t major = 0, minor = 0;
+	if (sscanf(machine_name, "%9[^,0123456789]%" SCNu32 ",%" SCNu32, name, &major, &minor) != 3) {
+		cpuinfo_log_warning("parsing \"hw.machine\" failed: %s", strerror(errno));
+		return false;
+	}
+
+	uint32_t chip_model = 0;
+	char suffix = '\0';
+	if (strcmp(name, "iPhone") == 0) {
+		/*
+		 * iPhone 4 and up are supported:
+		 *  - iPhone 4       [A4]:  iPhone3,1, iPhone3,2, iPhone3,3
+		 *  - iPhone 4S      [A5]:  iPhone4,1
+		 *  - iPhone 5       [A6]:  iPhone5,1, iPhone5,2
+		 *  - iPhone 5c      [A6]:  iPhone5,3, iPhone5,4
+		 *  - iPhone 5s      [A7]:  iPhone6,1, iPhone6,2
+		 *  - iPhone 6       [A8]:  iPhone7,2
+		 *  - iPhone 6 Plus  [A8]:  iPhone7,1
+		 *  - iPhone 6s      [A9]:  iPhone8,1
+		 *  - iPhone 6s Plus [A9]:  iPhone8,2
+		 *  - iPhone SE      [A9]:  iPhone8,4
+		 *  - iPhone 7       [A10]: iPhone9,1, iPhone9,3
+		 *  - iPhone 7 Plus  [A10]: iPhone9,2, iPhone9,4
+		 *  - iPhone 8       [A11]: iPhone10,1, iPhone10,4
+		 *  - iPhone 8 Plus  [A11]: iPhone10,2, iPhone10,5
+		 *  - iPhone X       [A11]: iPhone10,3, iPhone10,6
+		 *  - iPhone XS      [A12]: iPhone11,2,
+		 *  - iPhone XS Max  [A12]: iPhone11,4, iPhone11,6
+		 *  - iPhone XR      [A12]: iPhone11,8
+		 */
+		chip_model = major + 1;
+	} else if (strcmp(name, "iPad") == 0) {
+		switch (major) {
+			/* iPad 2 and up are supported */
+			case 2:
+				/*
+				 * iPad 2    [A5]: iPad2,1, iPad2,2, iPad2,3,
+				 * iPad2,4 iPad mini [A5]: iPad2,5, iPad2,6,
+				 * iPad2,7
+				 */
+				chip_model = major + 3;
+				break;
+			case 3:
+				/*
+				 * iPad 3rd Gen [A5X]: iPad3,1, iPad3,2, iPad3,3
+				 * iPad 4th Gen [A6X]: iPad3,4, iPad3,5, iPad3,6
+				 */
+				chip_model = (minor <= 3) ? 5 : 6;
+				suffix = 'X';
+				break;
+			case 4:
+				/*
+				 * iPad Air         [A7]: iPad4,1, iPad4,2,
+				 * iPad4,3 iPad mini Retina [A7]: iPad4,4,
+				 * iPad4,5, iPad4,6 iPad mini 3      [A7]:
+				 * iPad4,7, iPad4,8, iPad4,9
+				 */
+				chip_model = major + 3;
+				break;
+			case 5:
+				/*
+				 * iPad mini 4 [A8]:  iPad5,1, iPad5,2
+				 * iPad Air 2  [A8X]: iPad5,3, iPad5,4
+				 */
+				chip_model = major + 3;
+				suffix = (minor <= 2) ? '\0' : 'X';
+				break;
+			case 6:
+				/*
+				 * iPad Pro 9.7" [A9X]: iPad6,3, iPad6,4
+				 * iPad Pro      [A9X]: iPad6,7, iPad6,8
+				 * iPad 5th Gen  [A9]:  iPad6,11, iPad6,12
+				 */
+				chip_model = major + 3;
+				suffix = minor <= 8 ? 'X' : '\0';
+				break;
+			case 7:
+				/*
+				 * iPad Pro 12.9" [A10X]: iPad7,1, iPad7,2
+				 * iPad Pro 10.5" [A10X]: iPad7,3, iPad7,4
+				 * iPad 6th Gen   [A10]:  iPad7,5, iPad7,6
+				 */
+				chip_model = major + 3;
+				suffix = minor <= 4 ? 'X' : '\0';
+				break;
+			default:
+				cpuinfo_log_info("unknown iPad: %s", machine_name);
+				break;
+		}
+	} else if (strcmp(name, "iPod") == 0) {
+		switch (major) {
+			case 5:
+				chip_model = 5;
+				break;
+				/* iPod touch (5th Gen) [A5]: iPod5,1 */
+			case 7:
+				/* iPod touch (6th Gen, 2015) [A8]: iPod7,1 */
+				chip_model = 8;
+				break;
+			default:
+				cpuinfo_log_info("unknown iPod: %s", machine_name);
+				break;
+		}
+	} else {
+		cpuinfo_log_info("unknown device: %s", machine_name);
+	}
+	if (chip_model != 0) {
+		snprintf(package_name, CPUINFO_PACKAGE_NAME_MAX, "Apple A%" PRIu32 "%c", chip_model, suffix);
+		return true;
+	}
+	return false;
+}
+
+void cpuinfo_arm_mach_init(void) {
+	struct cpuinfo_processor* processors = NULL;
+	struct cpuinfo_core* cores = NULL;
+	struct cpuinfo_cluster* clusters = NULL;
+	struct cpuinfo_package* packages = NULL;
+	struct cpuinfo_uarch_info* uarchs = NULL;
+	struct cpuinfo_cache* l1i = NULL;
+	struct cpuinfo_cache* l1d = NULL;
+	struct cpuinfo_cache* l2 = NULL;
+	struct cpuinfo_cache* l3 = NULL;
+
+	struct cpuinfo_mach_topology mach_topology = cpuinfo_mach_detect_topology();
+	processors = calloc(mach_topology.threads, sizeof(struct cpuinfo_processor));
+	if (processors == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " logical processors",
+			mach_topology.threads * sizeof(struct cpuinfo_processor),
+			mach_topology.threads);
+		goto cleanup;
+	}
+	cores = calloc(mach_topology.cores, sizeof(struct cpuinfo_core));
+	if (cores == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " cores",
+			mach_topology.cores * sizeof(struct cpuinfo_core),
+			mach_topology.cores);
+		goto cleanup;
+	}
+	packages = calloc(mach_topology.packages, sizeof(struct cpuinfo_package));
+	if (packages == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " packages",
+			mach_topology.packages * sizeof(struct cpuinfo_package),
+			mach_topology.packages);
+		goto cleanup;
+	}
+
+	const uint32_t threads_per_core = mach_topology.threads / mach_topology.cores;
+	const uint32_t threads_per_package = mach_topology.threads / mach_topology.packages;
+	const uint32_t cores_per_package = mach_topology.cores / mach_topology.packages;
+
+	for (uint32_t i = 0; i < mach_topology.packages; i++) {
+		packages[i] = (struct cpuinfo_package){
+			.processor_start = i * threads_per_package,
+			.processor_count = threads_per_package,
+			.core_start = i * cores_per_package,
+			.core_count = cores_per_package,
+		};
+		if (!read_package_name_from_brand_string(packages[i].name))
+			decode_package_name_from_hw_machine(packages[i].name);
+	}
+
+	const uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily");
+
+	/*
+	 * iOS 15 and macOS 12 added sysctls for ARM features, use them where
+	 * possible. Otherwise, fallback to hardcoded set of CPUs with known
+	 * support.
+	 */
+	const uint32_t has_feat_lse = get_sys_info_by_name("hw.optional.arm.FEAT_LSE");
+	if (has_feat_lse != 0) {
+		cpuinfo_isa.atomics = true;
+	} else {
+		// Mandatory in ARMv8.1-A, list only cores released before iOS
+		// 15 / macOS 12
+		switch (cpu_family) {
+			case CPUFAMILY_ARM_MONSOON_MISTRAL:
+			case CPUFAMILY_ARM_VORTEX_TEMPEST:
+			case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+			case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
+				cpuinfo_isa.atomics = true;
+		}
+	}
+
+	const uint32_t has_feat_rdm = get_sys_info_by_name("hw.optional.arm.FEAT_RDM");
+	if (has_feat_rdm != 0) {
+		cpuinfo_isa.rdm = true;
+	} else {
+		// Optional in ARMv8.2-A (implemented in Apple cores),
+		// list only cores released before iOS 15 / macOS 12
+		switch (cpu_family) {
+			case CPUFAMILY_ARM_MONSOON_MISTRAL:
+			case CPUFAMILY_ARM_VORTEX_TEMPEST:
+			case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+			case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
+				cpuinfo_isa.rdm = true;
+		}
+	}
+
+	const uint32_t has_feat_fp16 = get_sys_info_by_name("hw.optional.arm.FEAT_FP16");
+	if (has_feat_fp16 != 0) {
+		cpuinfo_isa.fp16arith = true;
+	} else {
+		// Optional in ARMv8.2-A (implemented in Apple cores),
+		// list only cores released before iOS 15 / macOS 12
+		switch (cpu_family) {
+			case CPUFAMILY_ARM_MONSOON_MISTRAL:
+			case CPUFAMILY_ARM_VORTEX_TEMPEST:
+			case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+			case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
+				cpuinfo_isa.fp16arith = true;
+		}
+	}
+
+	const uint32_t has_feat_fhm = get_sys_info_by_name("hw.optional.arm.FEAT_FHM");
+	if (has_feat_fhm != 0) {
+		cpuinfo_isa.fhm = true;
+	} else {
+		// Prior to iOS 15, use 'hw.optional.armv8_2_fhm'
+		const uint32_t has_feat_fhm_legacy = get_sys_info_by_name("hw.optional.armv8_2_fhm");
+		if (has_feat_fhm_legacy != 0) {
+			cpuinfo_isa.fhm = true;
+		} else {
+			// Mandatory in ARMv8.4-A when FP16 arithmetics is
+			// implemented, list only cores released before iOS 15 /
+			// macOS 12
+			switch (cpu_family) {
+				case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+				case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
+					cpuinfo_isa.fhm = true;
+			}
+		}
+	}
+
+	const uint32_t has_feat_bf16 = get_sys_info_by_name("hw.optional.arm.FEAT_BF16");
+	if (has_feat_bf16 != 0) {
+		cpuinfo_isa.bf16 = true;
+	}
+
+	const uint32_t has_feat_fcma = get_sys_info_by_name("hw.optional.arm.FEAT_FCMA");
+	if (has_feat_fcma != 0) {
+		cpuinfo_isa.fcma = true;
+	} else {
+		// Mandatory in ARMv8.3-A, list only cores released before iOS
+		// 15 / macOS 12
+		switch (cpu_family) {
+			case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+			case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
+				cpuinfo_isa.fcma = true;
+		}
+	}
+
+	const uint32_t has_feat_jscvt = get_sys_info_by_name("hw.optional.arm.FEAT_JSCVT");
+	if (has_feat_jscvt != 0) {
+		cpuinfo_isa.jscvt = true;
+	} else {
+		// Mandatory in ARMv8.3-A, list only cores released before iOS
+		// 15 / macOS 12
+		switch (cpu_family) {
+			case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+			case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
+				cpuinfo_isa.jscvt = true;
+		}
+	}
+
+	const uint32_t has_feat_dotprod = get_sys_info_by_name("hw.optional.arm.FEAT_DotProd");
+	if (has_feat_dotprod != 0) {
+		cpuinfo_isa.dot = true;
+	} else {
+		// Mandatory in ARMv8.4-A, list only cores released before iOS
+		// 15 / macOS 12
+		switch (cpu_family) {
+			case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+			case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
+				cpuinfo_isa.dot = true;
+		}
+	}
+
+	const uint32_t has_feat_i8mm = get_sys_info_by_name("hw.optional.arm.FEAT_I8MM");
+	if (has_feat_i8mm != 0) {
+		cpuinfo_isa.i8mm = true;
+	}
+
+	const uint32_t has_feat_sme = get_sys_info_by_name("hw.optional.arm.FEAT_SME");
+	if (has_feat_sme != 0) {
+		cpuinfo_isa.sme = true;
+	}
+
+	const uint32_t has_feat_sme2 = get_sys_info_by_name("hw.optional.arm.FEAT_SME2");
+	if (has_feat_sme2 != 0) {
+		cpuinfo_isa.sme2 = true;
+	}
+
+	uint32_t num_clusters = 1;
+	for (uint32_t i = 0; i < mach_topology.cores; i++) {
+		cores[i] = (struct cpuinfo_core){
+			.processor_start = i * threads_per_core,
+			.processor_count = threads_per_core,
+			.core_id = i % cores_per_package,
+			.package = packages + i / cores_per_package,
+			.vendor = cpuinfo_vendor_apple,
+			.uarch = decode_uarch(cpu_family, i, mach_topology.cores),
+		};
+		if (i != 0 && cores[i].uarch != cores[i - 1].uarch) {
+			num_clusters++;
+		}
+	}
+	for (uint32_t i = 0; i < mach_topology.threads; i++) {
+		const uint32_t smt_id = i % threads_per_core;
+		const uint32_t core_id = i / threads_per_core;
+		const uint32_t package_id = i / threads_per_package;
+
+		processors[i].smt_id = smt_id;
+		processors[i].core = &cores[core_id];
+		processors[i].package = &packages[package_id];
+	}
+
+	clusters = calloc(num_clusters, sizeof(struct cpuinfo_cluster));
+	if (clusters == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " clusters",
+			num_clusters * sizeof(struct cpuinfo_cluster),
+			num_clusters);
+		goto cleanup;
+	}
+	uarchs = calloc(num_clusters, sizeof(struct cpuinfo_uarch_info));
+	if (uarchs == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " uarchs",
+			num_clusters * sizeof(enum cpuinfo_uarch),
+			num_clusters);
+		goto cleanup;
+	}
+	uint32_t cluster_idx = UINT32_MAX;
+	for (uint32_t i = 0; i < mach_topology.cores; i++) {
+		if (i == 0 || cores[i].uarch != cores[i - 1].uarch) {
+			cluster_idx++;
+			uarchs[cluster_idx] = (struct cpuinfo_uarch_info){
+				.uarch = cores[i].uarch,
+				.processor_count = 1,
+				.core_count = 1,
+			};
+			clusters[cluster_idx] = (struct cpuinfo_cluster){
+				.processor_start = i * threads_per_core,
+				.processor_count = 1,
+				.core_start = i,
+				.core_count = 1,
+				.cluster_id = cluster_idx,
+				.package = cores[i].package,
+				.vendor = cores[i].vendor,
+				.uarch = cores[i].uarch,
+			};
+		} else {
+			uarchs[cluster_idx].processor_count++;
+			uarchs[cluster_idx].core_count++;
+			clusters[cluster_idx].processor_count++;
+			clusters[cluster_idx].core_count++;
+		}
+		cores[i].cluster = &clusters[cluster_idx];
+	}
+
+	for (uint32_t i = 0; i < mach_topology.threads; i++) {
+		const uint32_t core_id = i / threads_per_core;
+		processors[i].cluster = cores[core_id].cluster;
+	}
+
+	for (uint32_t i = 0; i < mach_topology.packages; i++) {
+		packages[i].cluster_start = 0;
+		packages[i].cluster_count = num_clusters;
+	}
+
+	const uint32_t cacheline_size = get_sys_info(HW_CACHELINE, "HW_CACHELINE");
+	const uint32_t l1d_cache_size = get_sys_info(HW_L1DCACHESIZE, "HW_L1DCACHESIZE");
+	const uint32_t l1i_cache_size = get_sys_info(HW_L1ICACHESIZE, "HW_L1ICACHESIZE");
+	const uint32_t l2_cache_size = get_sys_info(HW_L2CACHESIZE, "HW_L2CACHESIZE");
+	const uint32_t l3_cache_size = get_sys_info(HW_L3CACHESIZE, "HW_L3CACHESIZE");
+	const uint32_t l1_cache_associativity = 4;
+	const uint32_t l2_cache_associativity = 8;
+	const uint32_t l3_cache_associativity = 16;
+	const uint32_t cache_partitions = 1;
+	const uint32_t cache_flags = 0;
+
+	uint32_t threads_per_l1 = 0, l1_count = 0;
+	if (l1i_cache_size != 0 || l1d_cache_size != 0) {
+		/* Assume L1 caches are private to each core */
+		threads_per_l1 = 1;
+		l1_count = mach_topology.threads / threads_per_l1;
+		cpuinfo_log_debug("detected %" PRIu32 " L1 caches", l1_count);
+	}
+
+	uint32_t threads_per_l2 = 0, l2_count = 0;
+	if (l2_cache_size != 0) {
+		/* Assume L2 cache is shared between all cores */
+		threads_per_l2 = mach_topology.cores;
+		l2_count = 1;
+		cpuinfo_log_debug("detected %" PRIu32 " L2 caches", l2_count);
+	}
+
+	uint32_t threads_per_l3 = 0, l3_count = 0;
+	if (l3_cache_size != 0) {
+		/* Assume L3 cache is shared between all cores */
+		threads_per_l3 = mach_topology.cores;
+		l3_count = 1;
+		cpuinfo_log_debug("detected %" PRIu32 " L3 caches", l3_count);
+	}
+
+	if (l1i_cache_size != 0) {
+		l1i = calloc(l1_count, sizeof(struct cpuinfo_cache));
+		if (l1i == NULL) {
+			cpuinfo_log_error(
+				"failed to allocate %zu bytes for descriptions of %" PRIu32 " L1I caches",
+				l1_count * sizeof(struct cpuinfo_cache),
+				l1_count);
+			goto cleanup;
+		}
+		for (uint32_t c = 0; c < l1_count; c++) {
+			l1i[c] = (struct cpuinfo_cache){
+				.size = l1i_cache_size,
+				.associativity = l1_cache_associativity,
+				.sets = l1i_cache_size / (l1_cache_associativity * cacheline_size),
+				.partitions = cache_partitions,
+				.line_size = cacheline_size,
+				.flags = cache_flags,
+				.processor_start = c * threads_per_l1,
+				.processor_count = threads_per_l1,
+			};
+		}
+		for (uint32_t t = 0; t < mach_topology.threads; t++) {
+			processors[t].cache.l1i = &l1i[t / threads_per_l1];
+		}
+	}
+
+	if (l1d_cache_size != 0) {
+		l1d = calloc(l1_count, sizeof(struct cpuinfo_cache));
+		if (l1d == NULL) {
+			cpuinfo_log_error(
+				"failed to allocate %zu bytes for descriptions of %" PRIu32 " L1D caches",
+				l1_count * sizeof(struct cpuinfo_cache),
+				l1_count);
+			goto cleanup;
+		}
+		for (uint32_t c = 0; c < l1_count; c++) {
+			l1d[c] = (struct cpuinfo_cache){
+				.size = l1d_cache_size,
+				.associativity = l1_cache_associativity,
+				.sets = l1d_cache_size / (l1_cache_associativity * cacheline_size),
+				.partitions = cache_partitions,
+				.line_size = cacheline_size,
+				.flags = cache_flags,
+				.processor_start = c * threads_per_l1,
+				.processor_count = threads_per_l1,
+			};
+		}
+		for (uint32_t t = 0; t < mach_topology.threads; t++) {
+			processors[t].cache.l1d = &l1d[t / threads_per_l1];
+		}
+	}
+
+	if (l2_count != 0) {
+		l2 = calloc(l2_count, sizeof(struct cpuinfo_cache));
+		if (l2 == NULL) {
+			cpuinfo_log_error(
+				"failed to allocate %zu bytes for descriptions of %" PRIu32 " L2 caches",
+				l2_count * sizeof(struct cpuinfo_cache),
+				l2_count);
+			goto cleanup;
+		}
+		for (uint32_t c = 0; c < l2_count; c++) {
+			l2[c] = (struct cpuinfo_cache){
+				.size = l2_cache_size,
+				.associativity = l2_cache_associativity,
+				.sets = l2_cache_size / (l2_cache_associativity * cacheline_size),
+				.partitions = cache_partitions,
+				.line_size = cacheline_size,
+				.flags = cache_flags,
+				.processor_start = c * threads_per_l2,
+				.processor_count = threads_per_l2,
+			};
+		}
+		for (uint32_t t = 0; t < mach_topology.threads; t++) {
+			processors[t].cache.l2 = &l2[0];
+		}
+	}
+
+	if (l3_count != 0) {
+		l3 = calloc(l3_count, sizeof(struct cpuinfo_cache));
+		if (l3 == NULL) {
+			cpuinfo_log_error(
+				"failed to allocate %zu bytes for descriptions of %" PRIu32 " L3 caches",
+				l3_count * sizeof(struct cpuinfo_cache),
+				l3_count);
+			goto cleanup;
+		}
+		for (uint32_t c = 0; c < l3_count; c++) {
+			l3[c] = (struct cpuinfo_cache){
+				.size = l3_cache_size,
+				.associativity = l3_cache_associativity,
+				.sets = l3_cache_size / (l3_cache_associativity * cacheline_size),
+				.partitions = cache_partitions,
+				.line_size = cacheline_size,
+				.flags = cache_flags,
+				.processor_start = c * threads_per_l3,
+				.processor_count = threads_per_l3,
+			};
+		}
+		for (uint32_t t = 0; t < mach_topology.threads; t++) {
+			processors[t].cache.l3 = &l3[0];
+		}
+	}
+
+	/* Commit changes */
+	cpuinfo_processors = processors;
+	cpuinfo_cores = cores;
+	cpuinfo_clusters = clusters;
+	cpuinfo_packages = packages;
+	cpuinfo_uarchs = uarchs;
+	cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
+	cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
+	cpuinfo_cache[cpuinfo_cache_level_2] = l2;
+	cpuinfo_cache[cpuinfo_cache_level_3] = l3;
+
+	cpuinfo_processors_count = mach_topology.threads;
+	cpuinfo_cores_count = mach_topology.cores;
+	cpuinfo_clusters_count = num_clusters;
+	cpuinfo_packages_count = mach_topology.packages;
+	cpuinfo_uarchs_count = num_clusters;
+	cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
+	cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
+
+	__sync_synchronize();
+
+	cpuinfo_is_initialized = true;
+
+	processors = NULL;
+	cores = NULL;
+	clusters = NULL;
+	packages = NULL;
+	uarchs = NULL;
+	l1i = l1d = l2 = l3 = NULL;
+
+cleanup:
+	free(processors);
+	free(cores);
+	free(clusters);
+	free(packages);
+	free(uarchs);
+	free(l1i);
+	free(l1d);
+	free(l2);
+	free(l3);
+}
--- a/3rdparty/cpuinfo/src/arm/midr.h
+++ b/3rdparty/cpuinfo/src/arm/midr.h
@@ -0,0 +1,273 @@
+#pragma once
+#include <stdint.h>
+
+#define CPUINFO_ARM_MIDR_IMPLEMENTER_MASK UINT32_C(0xFF000000)
+#define CPUINFO_ARM_MIDR_VARIANT_MASK UINT32_C(0x00F00000)
+#define CPUINFO_ARM_MIDR_ARCHITECTURE_MASK UINT32_C(0x000F0000)
+#define CPUINFO_ARM_MIDR_PART_MASK UINT32_C(0x0000FFF0)
+#define CPUINFO_ARM_MIDR_REVISION_MASK UINT32_C(0x0000000F)
+
+#define CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET 24
+#define CPUINFO_ARM_MIDR_VARIANT_OFFSET 20
+#define CPUINFO_ARM_MIDR_ARCHITECTURE_OFFSET 16
+#define CPUINFO_ARM_MIDR_PART_OFFSET 4
+#define CPUINFO_ARM_MIDR_REVISION_OFFSET 0
+
+#define CPUINFO_ARM_MIDR_ARM1156 UINT32_C(0x410FB560)
+#define CPUINFO_ARM_MIDR_CORTEX_A7 UINT32_C(0x410FC070)
+#define CPUINFO_ARM_MIDR_CORTEX_A9 UINT32_C(0x410FC090)
+#define CPUINFO_ARM_MIDR_CORTEX_A15 UINT32_C(0x410FC0F0)
+#define CPUINFO_ARM_MIDR_CORTEX_A17 UINT32_C(0x410FC0E0)
+#define CPUINFO_ARM_MIDR_CORTEX_A35 UINT32_C(0x410FD040)
+#define CPUINFO_ARM_MIDR_CORTEX_A53 UINT32_C(0x410FD030)
+#define CPUINFO_ARM_MIDR_CORTEX_A55 UINT32_C(0x410FD050)
+#define CPUINFO_ARM_MIDR_CORTEX_A57 UINT32_C(0x410FD070)
+#define CPUINFO_ARM_MIDR_CORTEX_A72 UINT32_C(0x410FD080)
+#define CPUINFO_ARM_MIDR_CORTEX_A73 UINT32_C(0x410FD090)
+#define CPUINFO_ARM_MIDR_CORTEX_A75 UINT32_C(0x410FD0A0)
+#define CPUINFO_ARM_MIDR_KRYO280_GOLD UINT32_C(0x51AF8001)
+#define CPUINFO_ARM_MIDR_KRYO280_SILVER UINT32_C(0x51AF8014)
+#define CPUINFO_ARM_MIDR_KRYO385_GOLD UINT32_C(0x518F802D)
+#define CPUINFO_ARM_MIDR_KRYO385_SILVER UINT32_C(0x518F803C)
+#define CPUINFO_ARM_MIDR_KRYO_SILVER_821 UINT32_C(0x510F2010)
+#define CPUINFO_ARM_MIDR_KRYO_GOLD UINT32_C(0x510F2050)
+#define CPUINFO_ARM_MIDR_KRYO_SILVER_820 UINT32_C(0x510F2110)
+#define CPUINFO_ARM_MIDR_EXYNOS_M1_M2 UINT32_C(0x530F0010)
+#define CPUINFO_ARM_MIDR_DENVER2 UINT32_C(0x4E0F0030)
+#define CPUINFO_ARM_MIDR_AMPERE_ALTRA UINT32_C(0x413fd0c1)
+
+inline static uint32_t midr_set_implementer(uint32_t midr, uint32_t implementer) {
+	return (midr & ~CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) |
+		((implementer << CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET) & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK);
+}
+
+inline static uint32_t midr_set_variant(uint32_t midr, uint32_t variant) {
+	return (midr & ~CPUINFO_ARM_MIDR_VARIANT_MASK) |
+		((variant << CPUINFO_ARM_MIDR_VARIANT_OFFSET) & CPUINFO_ARM_MIDR_VARIANT_MASK);
+}
+
+inline static uint32_t midr_set_architecture(uint32_t midr, uint32_t architecture) {
+	return (midr & ~CPUINFO_ARM_MIDR_ARCHITECTURE_MASK) |
+		((architecture << CPUINFO_ARM_MIDR_ARCHITECTURE_OFFSET) & CPUINFO_ARM_MIDR_ARCHITECTURE_MASK);
+}
+
+inline static uint32_t midr_set_part(uint32_t midr, uint32_t part) {
+	return (midr & ~CPUINFO_ARM_MIDR_PART_MASK) |
+		((part << CPUINFO_ARM_MIDR_PART_OFFSET) & CPUINFO_ARM_MIDR_PART_MASK);
+}
+
+inline static uint32_t midr_set_revision(uint32_t midr, uint32_t revision) {
+	return (midr & ~CPUINFO_ARM_MIDR_REVISION_MASK) |
+		((revision << CPUINFO_ARM_MIDR_REVISION_OFFSET) & CPUINFO_ARM_MIDR_REVISION_MASK);
+}
+
+inline static uint32_t midr_get_variant(uint32_t midr) {
+	return (midr & CPUINFO_ARM_MIDR_VARIANT_MASK) >> CPUINFO_ARM_MIDR_VARIANT_OFFSET;
+}
+
+inline static uint32_t midr_get_implementer(uint32_t midr) {
+	return (midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) >> CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET;
+}
+
+inline static uint32_t midr_get_part(uint32_t midr) {
+	return (midr & CPUINFO_ARM_MIDR_PART_MASK) >> CPUINFO_ARM_MIDR_PART_OFFSET;
+}
+
+inline static uint32_t midr_get_revision(uint32_t midr) {
+	return (midr & CPUINFO_ARM_MIDR_REVISION_MASK) >> CPUINFO_ARM_MIDR_REVISION_OFFSET;
+}
+
+inline static uint32_t midr_copy_implementer(uint32_t midr, uint32_t other_midr) {
+	return (midr & ~CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) | (other_midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK);
+}
+
+inline static uint32_t midr_copy_variant(uint32_t midr, uint32_t other_midr) {
+	return (midr & ~CPUINFO_ARM_MIDR_VARIANT_MASK) | (other_midr & CPUINFO_ARM_MIDR_VARIANT_MASK);
+}
+
+inline static uint32_t midr_copy_architecture(uint32_t midr, uint32_t other_midr) {
+	return (midr & ~CPUINFO_ARM_MIDR_ARCHITECTURE_MASK) | (other_midr & CPUINFO_ARM_MIDR_ARCHITECTURE_MASK);
+}
+
+inline static uint32_t midr_copy_part(uint32_t midr, uint32_t other_midr) {
+	return (midr & ~CPUINFO_ARM_MIDR_PART_MASK) | (other_midr & CPUINFO_ARM_MIDR_PART_MASK);
+}
+
+inline static uint32_t midr_copy_revision(uint32_t midr, uint32_t other_midr) {
+	return (midr & ~CPUINFO_ARM_MIDR_REVISION_MASK) | (other_midr & CPUINFO_ARM_MIDR_REVISION_MASK);
+}
+
+inline static bool midr_is_arm1156(uint32_t midr) {
+	const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
+	return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_ARM1156 & uarch_mask);
+}
+
+inline static bool midr_is_arm11(uint32_t midr) {
+	return (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | 0x0000F000)) == UINT32_C(0x4100B000);
+}
+
+inline static bool midr_is_cortex_a9(uint32_t midr) {
+	const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
+	return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_CORTEX_A9 & uarch_mask);
+}
+
+inline static bool midr_is_scorpion(uint32_t midr) {
+	switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
+		case UINT32_C(0x510000F0):
+		case UINT32_C(0x510002D0):
+			return true;
+		default:
+			return false;
+	}
+}
+
+inline static bool midr_is_krait(uint32_t midr) {
+	switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
+		case UINT32_C(0x510004D0):
+		case UINT32_C(0x510006F0):
+			return true;
+		default:
+			return false;
+	}
+}
+
+inline static bool midr_is_cortex_a53(uint32_t midr) {
+	const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
+	return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_CORTEX_A53 & uarch_mask);
+}
+
+inline static bool midr_is_qualcomm_cortex_a53_silver(uint32_t midr) {
+	const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
+	return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_KRYO280_SILVER & uarch_mask);
+}
+
+inline static bool midr_is_qualcomm_cortex_a55_silver(uint32_t midr) {
+	const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
+	return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_KRYO385_SILVER & uarch_mask);
+}
+
+inline static bool midr_is_kryo280_gold(uint32_t midr) {
+	const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
+	return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_KRYO280_GOLD & uarch_mask);
+}
+
+inline static bool midr_is_kryo_silver(uint32_t midr) {
+	const uint32_t uarch_mask =
+		CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_ARCHITECTURE_MASK | CPUINFO_ARM_MIDR_PART_MASK;
+	switch (midr & uarch_mask) {
+		case CPUINFO_ARM_MIDR_KRYO_SILVER_820:
+		case CPUINFO_ARM_MIDR_KRYO_SILVER_821:
+			return true;
+		default:
+			return false;
+	}
+}
+
+inline static bool midr_is_kryo_gold(uint32_t midr) {
+	const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
+	return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_KRYO_GOLD & uarch_mask);
+}
+
+inline static bool midr_is_ampere_altra(uint32_t midr) {
+	const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
+	return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_AMPERE_ALTRA & uarch_mask);
+}
+
+inline static uint32_t midr_score_core(uint32_t midr) {
+	const uint32_t core_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
+	switch (midr & core_mask) {
+		case UINT32_C(0x53000030): /* Exynos M4 */
+		case UINT32_C(0x53000040): /* Exynos M5 */
+		case UINT32_C(0x4100D440): /* Cortex-X1 */
+		case UINT32_C(0x4100D480): /* Cortex-X2 */
+		case UINT32_C(0x4100D4E0): /* Cortex-X3 */
+			/* These cores are in big role w.r.t
+			 * Cortex-A75/-A76/-A77/-A78/-A710/-A715
+			 */
+			return 6;
+		case UINT32_C(0x4100D080): /* Cortex-A72 */
+		case UINT32_C(0x4100D090): /* Cortex-A73 */
+		case UINT32_C(0x4100D0A0): /* Cortex-A75 */
+		case UINT32_C(0x4100D0B0): /* Cortex-A76 */
+		case UINT32_C(0x4100D0D0): /* Cortex-A77 */
+		case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
+		case UINT32_C(0x4100D410): /* Cortex-A78 */
+		case UINT32_C(0x4100D470): /* Cortex-A710 */
+		case UINT32_C(0x4100D4D0): /* Cortex-A715 */
+		case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
+		case UINT32_C(0x4E000030): /* Denver 2 */
+		case UINT32_C(0x51002050): /* Kryo Gold */
+		case UINT32_C(0x51008000): /* Kryo 260 / 280 Gold */
+		case UINT32_C(0x51008020): /* Kryo 385 Gold */
+		case UINT32_C(0x51008040): /* Kryo 485 Gold / Gold Prime */
+		case UINT32_C(0x53000010): /* Exynos M1 and Exynos M2 */
+		case UINT32_C(0x53000020): /* Exynos M3 */
+#if CPUINFO_ARCH_ARM
+		case UINT32_C(0x4100C0F0): /* Cortex-A15 */
+		case UINT32_C(0x4100C0E0): /* Cortex-A17 */
+		case UINT32_C(0x4100C0D0): /* Rockchip RK3288 cores */
+		case UINT32_C(0x4100C0C0): /* Cortex-A12 */
+#endif /* CPUINFO_ARCH_ARM */
+			/* These cores are always in big role */
+			return 5;
+		case UINT32_C(0x4100D070): /* Cortex-A57 */
+			/* Cortex-A57 can be in LITTLE role w.r.t. Denver 2, or
+			 * in big role w.r.t. Cortex-A53 */
+			return 4;
+#if CPUINFO_ARCH_ARM64
+		case UINT32_C(0x4100D060): /* Cortex-A65 */
+#endif /* CPUINFO_ARCH_ARM64 */
+		case UINT32_C(0x4100D030): /* Cortex-A53 */
+		case UINT32_C(0x4100D050): /* Cortex-A55 */
+		case UINT32_C(0x4100D460): /* Cortex-A510 */
+			/* Cortex-A53 is usually in LITTLE role, but can be in
+			 * big role w.r.t. Cortex-A35 */
+			return 2;
+		case UINT32_C(0x4100D040): /* Cortex-A35 */
+#if CPUINFO_ARCH_ARM
+		case UINT32_C(0x4100C070): /* Cortex-A7 */
+#endif /* CPUINFO_ARCH_ARM */
+		case UINT32_C(0x51008050): /* Kryo 485 Silver */
+		case UINT32_C(0x51008030): /* Kryo 385 Silver */
+		case UINT32_C(0x51008010): /* Kryo 260 / 280 Silver */
+		case UINT32_C(0x51002110): /* Kryo Silver (Snapdragon 820) */
+		case UINT32_C(0x51002010): /* Kryo Silver (Snapdragon 821) */
+			/* These cores are always in LITTLE core */
+			return 1;
+		default:
+			/*
+			 * Unknown cores, or cores which do not have big/LITTLE
+			 * roles. To be future-proof w.r.t. cores not yet
+			 * recognized in cpuinfo, assume position between
+			 * Cortex-A57/A72/A73/A75 and Cortex-A53/A55. Then at
+			 * least future cores paired with one of these known
+			 * cores will be properly scored.
+			 */
+			return 3;
+	}
+}
+
+inline static uint32_t midr_little_core_for_big(uint32_t midr) {
+	const uint32_t core_mask =
+		CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_ARCHITECTURE_MASK | CPUINFO_ARM_MIDR_PART_MASK;
+	switch (midr & core_mask) {
+		case CPUINFO_ARM_MIDR_CORTEX_A75:
+			return CPUINFO_ARM_MIDR_CORTEX_A55;
+		case CPUINFO_ARM_MIDR_CORTEX_A73:
+		case CPUINFO_ARM_MIDR_CORTEX_A72:
+		case CPUINFO_ARM_MIDR_CORTEX_A57:
+		case CPUINFO_ARM_MIDR_EXYNOS_M1_M2:
+			return CPUINFO_ARM_MIDR_CORTEX_A53;
+		case CPUINFO_ARM_MIDR_CORTEX_A17:
+		case CPUINFO_ARM_MIDR_CORTEX_A15:
+			return CPUINFO_ARM_MIDR_CORTEX_A7;
+		case CPUINFO_ARM_MIDR_KRYO280_GOLD:
+			return CPUINFO_ARM_MIDR_KRYO280_SILVER;
+		case CPUINFO_ARM_MIDR_KRYO_GOLD:
+			return CPUINFO_ARM_MIDR_KRYO_SILVER_820;
+		case CPUINFO_ARM_MIDR_DENVER2:
+			return CPUINFO_ARM_MIDR_CORTEX_A57;
+		default:
+			return midr;
+	}
+}
--- a/3rdparty/cpuinfo/src/arm/tlb.c
+++ b/3rdparty/cpuinfo/src/arm/tlb.c
@@ -0,0 +1,154 @@
+
+
+switch (uarch) {
+	case cpuinfo_uarch_cortex_a5:
+		/*
+		 * Cortex-A5 Technical Reference Manual:
+		 * 6.3.1. Micro TLB
+		 *   The first level of caching for the page table information
+		 * is a micro TLB of 10 entries that is implemented on each of
+		 * the instruction and data sides. 6.3.2. Main TLB Misses from
+		 * the instruction and data micro TLBs are handled by a unified
+		 * main TLB. The main TLB is 128-entry two-way set-associative.
+		 */
+		break;
+	case cpuinfo_uarch_cortex_a7:
+		/*
+		 * Cortex-A7 MPCore Technical Reference Manual:
+		 * 5.3.1. Micro TLB
+		 *   The first level of caching for the page table information
+		 * is a micro TLB of 10 entries that is implemented on each of
+		 * the instruction and data sides. 5.3.2. Main TLB Misses from
+		 * the micro TLBs are handled by a unified main TLB. This is a
+		 * 256-entry 2-way set-associative structure. The main TLB
+		 * supports all the VMSAv7 page sizes of 4KB, 64KB, 1MB and 16MB
+		 * in addition to the LPAE page sizes of 2MB and 1G.
+		 */
+		break;
+	case cpuinfo_uarch_cortex_a8:
+		/*
+		 * Cortex-A8 Technical Reference Manual:
+		 * 6.1. About the MMU
+		 *    The MMU features include the following:
+		 *     - separate, fully-associative, 32-entry data and
+		 * instruction TLBs
+		 *     - TLB entries that support 4KB, 64KB, 1MB, and 16MB pages
+		 */
+		break;
+	case cpuinfo_uarch_cortex_a9:
+		/*
+		 * ARM Cortex‑A9 Technical Reference Manual:
+		 * 6.2.1 Micro TLB
+		 *    The first level of caching for the page table information
+		 * is a micro TLB of 32 entries on the data side, and
+		 * configurable 32 or 64 entries on the instruction side. 6.2.2
+		 * Main TLB The main TLB is implemented as a combination of:
+		 *     - A fully-associative, lockable array of four elements.
+		 *     - A 2-way associative structure of 2x32, 2x64, 2x128 or
+		 * 2x256 entries.
+		 */
+		break;
+	case cpuinfo_uarch_cortex_a15:
+		/*
+		 * ARM Cortex-A15 MPCore Processor Technical Reference Manual:
+		 * 5.2.1. L1 instruction TLB
+		 *    The L1 instruction TLB is a 32-entry fully-associative
+		 * structure. This TLB caches entries at the 4KB granularity of
+		 * Virtual Address (VA) to Physical Address (PA) mapping only.
+		 * If the page tables map the memory region to a larger
+		 * granularity than 4K, it only allocates one mapping for the
+		 * particular 4K region to which the current access
+		 * corresponds. 5.2.2. L1 data TLB There are two separate
+		 * 32-entry fully-associative TLBs that are used for data loads
+		 * and stores, respectively. Similar to the L1 instruction TLB,
+		 * both of these cache entries at the 4KB granularity of VA to
+		 * PA mappings only. At implementation time, the Cortex-A15
+		 * MPCore processor can be configured with the -l1tlb_1m option,
+		 * to have the L1 data TLB cache entries at both the 4KB and 1MB
+		 * granularity. With this configuration, any translation that
+		 * results in a 1MB or larger page is cached in the L1 data TLB
+		 * as a 1MB entry. Any translation that results in a page
+		 * smaller than 1MB is cached in the L1 data TLB as a 4KB entry.
+		 * By default, all translations are cached in the L1 data TLB as
+		 * a 4KB entry. 5.2.3. L2 TLB Misses from the L1 instruction and
+		 * data TLBs are handled by a unified L2 TLB. This is a
+		 * 512-entry 4-way set-associative structure. The L2 TLB
+		 * supports all the VMSAv7 page sizes of 4K, 64K, 1MB and 16MB
+		 * in addition to the LPAE page sizes of 2MB and 1GB.
+		 */
+		break;
+	case cpuinfo_uarch_cortex_a17:
+		/*
+		 * ARM Cortex-A17 MPCore Processor Technical Reference Manual:
+		 * 5.2.1. Instruction micro TLB
+		 *    The instruction micro TLB is implemented as a 32, 48 or 64
+		 * entry, fully-associative structure. This TLB caches entries
+		 * at the 4KB and 1MB granularity of Virtual Address (VA) to
+		 * Physical Address (PA) mapping only. If the translation tables
+		 * map the memory region to a larger granularity than 4KB or
+		 * 1MB, it only allocates one mapping for the particular 4KB
+		 * region to which the current access corresponds. 5.2.2. Data
+		 * micro TLB The data micro TLB is a 32 entry fully-associative
+		 * TLB that is used for data loads and stores. The cache entries
+		 * have a 4KB and 1MB granularity of VA to PA mappings
+		 * only. 5.2.3. Unified main TLB Misses from the instruction and
+		 * data micro TLBs are handled by a unified main TLB. This is a
+		 * 1024 entry 4-way set-associative structure. The main TLB
+		 * supports all the VMSAv7 page sizes of 4K, 64K, 1MB and 16MB
+		 * in addition to the LPAE page sizes of 2MB and 1GB.
+		 */
+		break;
+	case cpuinfo_uarch_cortex_a35:
+		/*
+		 * ARM Cortex‑A35 Processor Technical Reference Manual:
+		 * A6.2 TLB Organization
+		 *   Micro TLB
+		 *     The first level of caching for the translation table
+		 * information is a micro TLB of ten entries that is implemented
+		 * on each of the instruction and data sides. Main TLB A unified
+		 * main TLB handles misses from the micro TLBs. It has a
+		 * 512-entry, 2-way, set-associative structure and supports all
+		 * VMSAv8 block sizes, except 1GB. If it fetches a 1GB block,
+		 * the TLB splits it into 512MB blocks and stores the
+		 * appropriate block for the lookup.
+		 */
+		break;
+	case cpuinfo_uarch_cortex_a53:
+		/*
+		 * ARM Cortex-A53 MPCore Processor Technical Reference Manual:
+		 * 5.2.1. Micro TLB
+		 *    The first level of caching for the translation table
+		 * information is a micro TLB of ten entries that is implemented
+		 * on each of the instruction and data sides. 5.2.2. Main TLB A
+		 * unified main TLB handles misses from the micro TLBs. This is
+		 * a 512-entry, 4-way, set-associative structure. The main TLB
+		 * supports all VMSAv8 block sizes, except 1GB. If a 1GB block
+		 * is fetched, it is split into 512MB blocks and the appropriate
+		 * block for the lookup stored.
+		 */
+		break;
+	case cpuinfo_uarch_cortex_a57:
+		/*
+		 * ARM® Cortex-A57 MPCore Processor Technical Reference Manual:
+		 * 5.2.1 L1 instruction TLB
+		 *    The L1 instruction TLB is a 48-entry fully-associative
+		 * structure. This TLB caches entries of three different page
+		 * sizes, natively 4KB, 64KB, and 1MB, of VA to PA mappings. If
+		 * the page tables map the memory region to a larger granularity
+		 * than 1MB, it only allocates one mapping for the particular
+		 * 1MB region to which the current access corresponds. 5.2.2 L1
+		 * data TLB The L1 data TLB is a 32-entry fully-associative TLB
+		 * that is used for data loads and stores. This TLB caches
+		 * entries of three different page sizes, natively 4KB, 64KB,
+		 * and 1MB, of VA to PA mappings. 5.2.3 L2 TLB Misses from the
+		 * L1 instruction and data TLBs are handled by a unified L2 TLB.
+		 * This is a 1024-entry 4-way set-associative structure. The L2
+		 * TLB supports the page sizes of 4K, 64K, 1MB and 16MB. It also
+		 * supports page sizes of 2MB and 1GB for the long descriptor
+		 * format translation in AArch32 state and in AArch64 state when
+		 * using the 4KB translation granule. In addition, the L2 TLB
+		 * supports the 512MB page map size defined for the AArch64
+		 * translations that use a 64KB translation granule.
+		 */
+		break;
+}
--- a/3rdparty/cpuinfo/src/arm/uarch.c
+++ b/3rdparty/cpuinfo/src/arm/uarch.c
@@ -0,0 +1,429 @@
+#include <stdint.h>
+
+#include <arm/api.h>
+#include <arm/midr.h>
+#include <cpuinfo/log.h>
+
+void cpuinfo_arm_decode_vendor_uarch(
+	uint32_t midr,
+#if CPUINFO_ARCH_ARM
+	bool has_vfpv4,
+#endif /* CPUINFO_ARCH_ARM */
+	enum cpuinfo_vendor vendor[RESTRICT_STATIC 1],
+	enum cpuinfo_uarch uarch[RESTRICT_STATIC 1]) {
+	switch (midr_get_implementer(midr)) {
+		case 'A':
+			*vendor = cpuinfo_vendor_arm;
+			switch (midr_get_part(midr)) {
+#if CPUINFO_ARCH_ARM
+				case 0xC05:
+					*uarch = cpuinfo_uarch_cortex_a5;
+					break;
+				case 0xC07:
+					*uarch = cpuinfo_uarch_cortex_a7;
+					break;
+				case 0xC08:
+					*uarch = cpuinfo_uarch_cortex_a8;
+					break;
+				case 0xC09:
+					*uarch = cpuinfo_uarch_cortex_a9;
+					break;
+				case 0xC0C:
+					*uarch = cpuinfo_uarch_cortex_a12;
+					break;
+				case 0xC0E:
+					*uarch = cpuinfo_uarch_cortex_a17;
+					break;
+				case 0xC0D:
+					/*
+					 * Rockchip RK3288 only.
+					 * Core information is ambiguous: some
+					 * sources specify Cortex-A12, others -
+					 * Cortex-A17. Assume it is Cortex-A12.
+					 */
+					*uarch = cpuinfo_uarch_cortex_a12;
+					break;
+				case 0xC0F:
+					*uarch = cpuinfo_uarch_cortex_a15;
+					break;
+#endif /* CPUINFO_ARCH_ARM */
+				case 0xD01:
+					*uarch = cpuinfo_uarch_cortex_a32;
+					break;
+				case 0xD03:
+					*uarch = cpuinfo_uarch_cortex_a53;
+					break;
+				case 0xD04:
+					*uarch = cpuinfo_uarch_cortex_a35;
+					break;
+				case 0xD05:
+					// Note: use Variant, not Revision,
+					// field
+					*uarch = (midr & CPUINFO_ARM_MIDR_VARIANT_MASK) == 0
+						? cpuinfo_uarch_cortex_a55r0
+						: cpuinfo_uarch_cortex_a55;
+					break;
+				case 0xD06:
+					*uarch = cpuinfo_uarch_cortex_a65;
+					break;
+				case 0xD07:
+					*uarch = cpuinfo_uarch_cortex_a57;
+					break;
+				case 0xD08:
+					*uarch = cpuinfo_uarch_cortex_a72;
+					break;
+				case 0xD09:
+					*uarch = cpuinfo_uarch_cortex_a73;
+					break;
+				case 0xD0A:
+					*uarch = cpuinfo_uarch_cortex_a75;
+					break;
+				case 0xD0B:
+					*uarch = cpuinfo_uarch_cortex_a76;
+					break;
+				case 0xD0C:
+					*uarch = cpuinfo_uarch_neoverse_n1;
+					break;
+				case 0xD0D:
+					*uarch = cpuinfo_uarch_cortex_a77;
+					break;
+				case 0xD0E: /* Cortex-A76AE */
+					*uarch = cpuinfo_uarch_cortex_a76;
+					break;
+				case 0xD40: /* Neoverse V1 */
+					*uarch = cpuinfo_uarch_neoverse_v1;
+					break;
+				case 0xD41: /* Cortex-A78 */
+					*uarch = cpuinfo_uarch_cortex_a78;
+					break;
+				case 0xD44: /* Cortex-X1 */
+					*uarch = cpuinfo_uarch_cortex_x1;
+					break;
+				case 0xD46: /* Cortex-A510 */
+					*uarch = cpuinfo_uarch_cortex_a510;
+					break;
+				case 0xD47: /* Cortex-A710 */
+					*uarch = cpuinfo_uarch_cortex_a710;
+					break;
+				case 0xD48: /* Cortex-X2 */
+					*uarch = cpuinfo_uarch_cortex_x2;
+					break;
+				case 0xD49: /* Neoverse N2 */
+					*uarch = cpuinfo_uarch_neoverse_n2;
+					break;
+#if CPUINFO_ARCH_ARM64
+				case 0xD4A:
+					*uarch = cpuinfo_uarch_neoverse_e1;
+					break;
+#endif /* CPUINFO_ARCH_ARM64 */
+				case 0xD4D: /* Cortex-A715 */
+					*uarch = cpuinfo_uarch_cortex_a715;
+					break;
+				case 0xD4E: /* Cortex-X3 */
+					*uarch = cpuinfo_uarch_cortex_x3;
+					break;
+				case 0xD4F: /* Neoverse V2 */
+					*uarch = cpuinfo_uarch_neoverse_v2;
+					break;
+				default:
+					switch (midr_get_part(midr) >> 8) {
+#if CPUINFO_ARCH_ARM
+						case 7:
+							*uarch = cpuinfo_uarch_arm7;
+							break;
+						case 9:
+							*uarch = cpuinfo_uarch_arm9;
+							break;
+						case 11:
+							*uarch = cpuinfo_uarch_arm11;
+							break;
+#endif /* CPUINFO_ARCH_ARM */
+						default:
+							cpuinfo_log_warning(
+								"unknown ARM CPU part 0x%03" PRIx32 " ignored",
+								midr_get_part(midr));
+					}
+			}
+			break;
+		case 'B':
+			*vendor = cpuinfo_vendor_broadcom;
+			switch (midr_get_part(midr)) {
+				case 0x00F:
+					*uarch = cpuinfo_uarch_brahma_b15;
+					break;
+				case 0x100:
+					*uarch = cpuinfo_uarch_brahma_b53;
+					break;
+#if CPUINFO_ARCH_ARM64
+				case 0x516:
+					/* Broadcom Vulkan was sold to Cavium
+					 * before it reached the market, so we
+					 * identify it as Cavium ThunderX2 */
+					*vendor = cpuinfo_vendor_cavium;
+					*uarch = cpuinfo_uarch_thunderx2;
+					break;
+#endif /* CPUINFO_ARCH_ARM64 */
+				default:
+					cpuinfo_log_warning(
+						"unknown Broadcom CPU part 0x%03" PRIx32 " ignored",
+						midr_get_part(midr));
+			}
+			break;
+#if CPUINFO_ARCH_ARM64
+		case 'C':
+			*vendor = cpuinfo_vendor_cavium;
+			switch (midr_get_part(midr)) {
+				case 0x0A0: /* ThunderX */
+				case 0x0A1: /* ThunderX 88XX */
+				case 0x0A2: /* ThunderX 81XX */
+				case 0x0A3: /* ThunderX 83XX */
+					*uarch = cpuinfo_uarch_thunderx;
+					break;
+				case 0x0AF: /* ThunderX2 99XX */
+					*uarch = cpuinfo_uarch_thunderx2;
+					break;
+				default:
+					cpuinfo_log_warning(
+						"unknown Cavium CPU part 0x%03" PRIx32 " ignored", midr_get_part(midr));
+			}
+			break;
+#endif /* CPUINFO_ARCH_ARM64 */
+		case 'H':
+			*vendor = cpuinfo_vendor_huawei;
+			switch (midr_get_part(midr)) {
+#if CPUINFO_ARCH_ARM64
+				case 0xD01: /* Kunpeng 920 series */
+					*uarch = cpuinfo_uarch_taishan_v110;
+					break;
+#endif /* CPUINFO_ARCH_ARM64 */
+				case 0xD40: /* Kirin 980 Big/Medium cores ->
+					       Cortex-A76 */
+					*vendor = cpuinfo_vendor_arm;
+					*uarch = cpuinfo_uarch_cortex_a76;
+					break;
+				default:
+					cpuinfo_log_warning(
+						"unknown Huawei CPU part 0x%03" PRIx32 " ignored", midr_get_part(midr));
+			}
+			break;
+#if CPUINFO_ARCH_ARM
+		case 'i':
+			*vendor = cpuinfo_vendor_intel;
+			switch (midr_get_part(midr) >> 8) {
+				case 2: /* PXA 210/25X/26X */
+				case 4: /* PXA 27X */
+				case 6: /* PXA 3XX */
+					*uarch = cpuinfo_uarch_xscale;
+					break;
+				default:
+					cpuinfo_log_warning(
+						"unknown Intel CPU part 0x%03" PRIx32 " ignored", midr_get_part(midr));
+			}
+			break;
+#endif /* CPUINFO_ARCH_ARM */
+		case 'N':
+			*vendor = cpuinfo_vendor_nvidia;
+			switch (midr_get_part(midr)) {
+				case 0x000:
+					*uarch = cpuinfo_uarch_denver;
+					break;
+				case 0x003:
+					*uarch = cpuinfo_uarch_denver2;
+					break;
+				case 0x004:
+					*uarch = cpuinfo_uarch_carmel;
+					break;
+				default:
+					cpuinfo_log_warning(
+						"unknown Nvidia CPU part 0x%03" PRIx32 " ignored", midr_get_part(midr));
+			}
+			break;
+		case 'P':
+			*vendor = cpuinfo_vendor_apm;
+			switch (midr_get_part(midr)) {
+				case 0x000:
+					*uarch = cpuinfo_uarch_xgene;
+					break;
+				default:
+					cpuinfo_log_warning(
+						"unknown Applied Micro CPU part 0x%03" PRIx32 " ignored",
+						midr_get_part(midr));
+			}
+			break;
+		case 'Q':
+			*vendor = cpuinfo_vendor_qualcomm;
+			switch (midr_get_part(midr)) {
+#if CPUINFO_ARCH_ARM
+				case 0x00F:
+					/* Mostly Scorpions, but some Cortex A5
+					 * may report this value as well
+					 */
+					if (has_vfpv4) {
+						/* Unlike Scorpion, Cortex-A5
+						 * comes with VFPv4 */
+						*vendor = cpuinfo_vendor_arm;
+						*uarch = cpuinfo_uarch_cortex_a5;
+					} else {
+						*uarch = cpuinfo_uarch_scorpion;
+					}
+					break;
+				case 0x02D: /* Dual-core Scorpions */
+					*uarch = cpuinfo_uarch_scorpion;
+					break;
+				case 0x04D:
+					/*
+					 * Dual-core Krait:
+					 * - r1p0 -> Krait 200
+					 * - r1p4 -> Krait 200
+					 * - r2p0 -> Krait 300
+					 */
+				case 0x06F:
+					/*
+					 * Quad-core Krait:
+					 * - r0p1 -> Krait 200
+					 * - r0p2 -> Krait 200
+					 * - r1p0 -> Krait 300
+					 * - r2p0 -> Krait 400 (Snapdragon 800
+					 * MSMxxxx)
+					 * - r2p1 -> Krait 400 (Snapdragon 801
+					 * MSMxxxxPRO)
+					 * - r3p1 -> Krait 450
+					 */
+					*uarch = cpuinfo_uarch_krait;
+					break;
+#endif /* CPUINFO_ARCH_ARM */
+				case 0x201: /* Qualcomm Snapdragon 821:
+					       Low-power Kryo "Silver" */
+				case 0x205: /* Qualcomm Snapdragon 820 & 821:
+					       High-performance Kryo "Gold" */
+				case 0x211: /* Qualcomm Snapdragon 820:
+					       Low-power Kryo "Silver" */
+					*uarch = cpuinfo_uarch_kryo;
+					break;
+				case 0x800: /* High-performance Kryo 260 (r10p2)
+					       / Kryo 280 (r10p1) "Gold" ->
+					       Cortex-A73 */
+					*vendor = cpuinfo_vendor_arm;
+					*uarch = cpuinfo_uarch_cortex_a73;
+					break;
+				case 0x801: /* Low-power Kryo 260 / 280 "Silver"
+					       -> Cortex-A53 */
+					*vendor = cpuinfo_vendor_arm;
+					*uarch = cpuinfo_uarch_cortex_a53;
+					break;
+				case 0x802: /* High-performance Kryo 385 "Gold"
+					       -> Cortex-A75 */
+					*vendor = cpuinfo_vendor_arm;
+					*uarch = cpuinfo_uarch_cortex_a75;
+					break;
+				case 0x803: /* Low-power Kryo 385 "Silver" ->
+					       Cortex-A55r0 */
+					*vendor = cpuinfo_vendor_arm;
+					*uarch = cpuinfo_uarch_cortex_a55r0;
+					break;
+				case 0x804: /* High-performance Kryo 485 "Gold"
+					       / "Gold Prime" -> Cortex-A76 */
+					*vendor = cpuinfo_vendor_arm;
+					*uarch = cpuinfo_uarch_cortex_a76;
+					break;
+				case 0x805: /* Low-performance Kryo 485 "Silver"
+					       -> Cortex-A55 */
+					*vendor = cpuinfo_vendor_arm;
+					*uarch = cpuinfo_uarch_cortex_a55;
+					break;
+#if CPUINFO_ARCH_ARM64
+				case 0x001:
+					*uarch = cpuinfo_uarch_oryon;
+					break;
+				case 0xC00:
+					*uarch = cpuinfo_uarch_falkor;
+					break;
+				case 0xC01:
+					*uarch = cpuinfo_uarch_saphira;
+					break;
+#endif /* CPUINFO_ARCH_ARM64 */
+				default:
+					cpuinfo_log_warning(
+						"unknown Qualcomm CPU part 0x%03" PRIx32 " ignored",
+						midr_get_part(midr));
+			}
+			break;
+		case 'S':
+			*vendor = cpuinfo_vendor_samsung;
+			switch (midr & (CPUINFO_ARM_MIDR_VARIANT_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
+				case 0x00100010:
+					/*
+					 * Exynos 8890 MIDR = 0x531F0011, assume
+					 * Exynos M1 has:
+					 * - CPU variant 0x1
+					 * - CPU part 0x001
+					 */
+					*uarch = cpuinfo_uarch_exynos_m1;
+					break;
+				case 0x00400010:
+					/*
+					 * Exynos 8895 MIDR = 0x534F0010, assume
+					 * Exynos M2 has:
+					 * - CPU variant 0x4
+					 * - CPU part 0x001
+					 */
+					*uarch = cpuinfo_uarch_exynos_m2;
+					break;
+				case 0x00100020:
+					/*
+					 * Exynos 9810 MIDR = 0x531F0020, assume
+					 * Exynos M3 has:
+					 * - CPU variant 0x1
+					 * - CPU part 0x002
+					 */
+					*uarch = cpuinfo_uarch_exynos_m3;
+					break;
+				case 0x00100030:
+					/*
+					 * Exynos 9820 MIDR = 0x531F0030, assume
+					 * Exynos M4 has:
+					 * - CPU variant 0x1
+					 * - CPU part 0x003
+					 */
+					*uarch = cpuinfo_uarch_exynos_m4;
+					break;
+				case 0x00100040:
+					/*
+					 * Exynos 9820 MIDR = 0x531F0040, assume
+					 * Exynos M5 has:
+					 * - CPU variant 0x1
+					 * - CPU part 0x004
+					 */
+					*uarch = cpuinfo_uarch_exynos_m5;
+					break;
+				default:
+					cpuinfo_log_warning(
+						"unknown Samsung CPU variant 0x%01" PRIx32 " part 0x%03" PRIx32
+						" ignored",
+						midr_get_variant(midr),
+						midr_get_part(midr));
+			}
+			break;
+#if CPUINFO_ARCH_ARM
+		case 'V':
+			*vendor = cpuinfo_vendor_marvell;
+			switch (midr_get_part(midr)) {
+				case 0x581: /* PJ4 / PJ4B */
+				case 0x584: /* PJ4B-MP / PJ4C */
+					*uarch = cpuinfo_uarch_pj4;
+					break;
+				default:
+					cpuinfo_log_warning(
+						"unknown Marvell CPU part 0x%03" PRIx32 " ignored",
+						midr_get_part(midr));
+			}
+			break;
+#endif /* CPUINFO_ARCH_ARM */
+		default:
+			cpuinfo_log_warning(
+				"unknown CPU implementer '%c' (0x%02" PRIx32 ") with CPU part 0x%03" PRIx32 " ignored",
+				(char)midr_get_implementer(midr),
+				midr_get_implementer(midr),
+				midr_get_part(midr));
+	}
+}
--- a/3rdparty/cpuinfo/src/arm/windows/init-by-logical-sys-info.c
+++ b/3rdparty/cpuinfo/src/arm/windows/init-by-logical-sys-info.c
@@ -0,0 +1,912 @@
+#include <errno.h>
+#include <malloc.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+
+#include "windows-arm-init.h"
+
+#define MAX_NR_OF_CACHES (cpuinfo_cache_level_max - 1)
+
+/* Call chain:
+ * cpu_info_init_by_logical_sys_info
+ * 		read_packages_for_processors
+ * 		read_cores_for_processors
+ * 		read_caches_for_processors
+ * 			read_all_logical_processor_info_of_relation
+ * 				parse_relation_processor_info
+ * 					store_package_info_per_processor
+ * 					store_core_info_per_processor
+ * 				parse_relation_cache_info
+ * 					store_cache_info_per_processor
+ */
+
+static uint32_t count_logical_processors(const uint32_t max_group_count, uint32_t* global_proc_index_per_group);
+
+static uint32_t read_packages_for_processors(
+	struct cpuinfo_processor* processors,
+	const uint32_t number_of_processors,
+	const uint32_t* global_proc_index_per_group,
+	const struct woa_chip_info* chip_info);
+
+static uint32_t read_cores_for_processors(
+	struct cpuinfo_processor* processors,
+	const uint32_t number_of_processors,
+	const uint32_t* global_proc_index_per_group,
+	struct cpuinfo_core* cores,
+	const struct woa_chip_info* chip_info);
+
+static uint32_t read_caches_for_processors(
+	struct cpuinfo_processor* processors,
+	const uint32_t number_of_processors,
+	struct cpuinfo_cache* caches,
+	uint32_t* numbers_of_caches,
+	const uint32_t* global_proc_index_per_group,
+	const struct woa_chip_info* chip_info);
+
+static uint32_t read_all_logical_processor_info_of_relation(
+	LOGICAL_PROCESSOR_RELATIONSHIP info_type,
+	struct cpuinfo_processor* processors,
+	const uint32_t number_of_processors,
+	struct cpuinfo_cache* caches,
+	uint32_t* numbers_of_caches,
+	struct cpuinfo_core* cores,
+	const uint32_t* global_proc_index_per_group,
+	const struct woa_chip_info* chip_info);
+
+static bool parse_relation_processor_info(
+	struct cpuinfo_processor* processors,
+	uint32_t nr_of_processors,
+	const uint32_t* global_proc_index_per_group,
+	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info,
+	const uint32_t info_id,
+	struct cpuinfo_core* cores,
+	const struct woa_chip_info* chip_info);
+
+static bool parse_relation_cache_info(
+	struct cpuinfo_processor* processors,
+	struct cpuinfo_cache* caches,
+	uint32_t* numbers_of_caches,
+	const uint32_t* global_proc_index_per_group,
+	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info);
+
+static void store_package_info_per_processor(
+	struct cpuinfo_processor* processors,
+	const uint32_t processor_global_index,
+	const uint32_t package_id,
+	const uint32_t group_id,
+	const uint32_t processor_id_in_group);
+
+static void store_core_info_per_processor(
+	struct cpuinfo_processor* processors,
+	const uint32_t processor_global_index,
+	const uint32_t core_id,
+	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX core_info,
+	struct cpuinfo_core* cores,
+	const struct woa_chip_info* chip_info);
+
+static void store_cache_info_per_processor(
+	struct cpuinfo_processor* processors,
+	const uint32_t processor_global_index,
+	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info,
+	struct cpuinfo_cache* current_cache);
+
+static bool connect_packages_cores_clusters_by_processors(
+	struct cpuinfo_processor* processors,
+	const uint32_t nr_of_processors,
+	struct cpuinfo_package* packages,
+	const uint32_t nr_of_packages,
+	struct cpuinfo_cluster* clusters,
+	struct cpuinfo_core* cores,
+	const uint32_t nr_of_cores,
+	const struct woa_chip_info* chip_info,
+	enum cpuinfo_vendor vendor);
+
+static inline uint32_t low_index_from_kaffinity(KAFFINITY kaffinity);
+
+bool cpu_info_init_by_logical_sys_info(const struct woa_chip_info* chip_info, const enum cpuinfo_vendor vendor) {
+	struct cpuinfo_processor* processors = NULL;
+	struct cpuinfo_package* packages = NULL;
+	struct cpuinfo_cluster* clusters = NULL;
+	struct cpuinfo_core* cores = NULL;
+	struct cpuinfo_cache* caches = NULL;
+	struct cpuinfo_uarch_info* uarchs = NULL;
+
+	uint32_t nr_of_packages = 0;
+	uint32_t nr_of_cores = 0;
+	uint32_t nr_of_all_caches = 0;
+	uint32_t numbers_of_caches[MAX_NR_OF_CACHES] = {0};
+
+	uint32_t nr_of_uarchs = 0;
+	bool result = false;
+
+	HANDLE heap = GetProcessHeap();
+
+	/* 1. Count available logical processor groups and processors */
+	const uint32_t max_group_count = (uint32_t)GetMaximumProcessorGroupCount();
+	cpuinfo_log_debug("detected %" PRIu32 " processor group(s)", max_group_count);
+	/* We need to store the absolute processor ID offsets for every groups,
+	 * because
+	 *  1. We can't assume every processor groups include the same number of
+	 *     logical processors.
+	 *  2. Every processor groups know its group number and processor IDs
+	 * within the group, but not the global processor IDs.
+	 *  3. We need to list every logical processors by global IDs.
+	 */
+	uint32_t* global_proc_index_per_group = (uint32_t*)HeapAlloc(heap, 0, max_group_count * sizeof(uint32_t));
+	if (global_proc_index_per_group == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " processor groups",
+			max_group_count * sizeof(struct cpuinfo_processor),
+			max_group_count);
+		goto clean_up;
+	}
+
+	uint32_t nr_of_processors = count_logical_processors(max_group_count, global_proc_index_per_group);
+	processors = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_processors * sizeof(struct cpuinfo_processor));
+	if (processors == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " logical processors",
+			nr_of_processors * sizeof(struct cpuinfo_processor),
+			nr_of_processors);
+		goto clean_up;
+	}
+
+	/* 2. Read topology information via MSDN API: packages, cores and
+	 * caches*/
+	nr_of_packages =
+		read_packages_for_processors(processors, nr_of_processors, global_proc_index_per_group, chip_info);
+	if (!nr_of_packages) {
+		cpuinfo_log_error("error in reading package information");
+		goto clean_up;
+	}
+	cpuinfo_log_debug("detected %" PRIu32 " processor package(s)", nr_of_packages);
+
+	/* We need the EfficiencyClass to parse uarch from the core information,
+	 * but we need to iterate first to count cores and allocate memory then
+	 * we will iterate again to read and store data to cpuinfo_core
+	 * structures.
+	 */
+	nr_of_cores =
+		read_cores_for_processors(processors, nr_of_processors, global_proc_index_per_group, NULL, chip_info);
+	if (!nr_of_cores) {
+		cpuinfo_log_error("error in reading core information");
+		goto clean_up;
+	}
+	cpuinfo_log_debug("detected %" PRIu32 " processor core(s)", nr_of_cores);
+
+	/* There is no API to read number of caches, so we need to iterate twice
+	   on caches:
+		1. Count all type of caches -> allocate memory
+		2. Read out cache data and store to allocated memory
+	 */
+	nr_of_all_caches = read_caches_for_processors(
+		processors, nr_of_processors, caches, numbers_of_caches, global_proc_index_per_group, chip_info);
+	if (!nr_of_all_caches) {
+		cpuinfo_log_error("error in reading cache information");
+		goto clean_up;
+	}
+	cpuinfo_log_debug("detected %" PRIu32 " processor cache(s)", nr_of_all_caches);
+
+	/* 3. Allocate memory for package, cluster, core and cache structures */
+	packages = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_packages * sizeof(struct cpuinfo_package));
+	if (packages == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " physical packages",
+			nr_of_packages * sizeof(struct cpuinfo_package),
+			nr_of_packages);
+		goto clean_up;
+	}
+
+	/* We don't have cluster information so we explicitly set clusters to
+	 * equal to cores. */
+	clusters = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_cores * sizeof(struct cpuinfo_cluster));
+	if (clusters == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " core clusters",
+			nr_of_cores * sizeof(struct cpuinfo_cluster),
+			nr_of_cores);
+		goto clean_up;
+	}
+
+	cores = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_cores * sizeof(struct cpuinfo_core));
+	if (cores == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " cores",
+			nr_of_cores * sizeof(struct cpuinfo_core),
+			nr_of_cores);
+		goto clean_up;
+	}
+
+	/* We allocate one contiguous cache array for all caches, then use
+	 * offsets per cache type. */
+	caches = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_all_caches * sizeof(struct cpuinfo_cache));
+	if (caches == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " caches",
+			nr_of_all_caches * sizeof(struct cpuinfo_cache),
+			nr_of_all_caches);
+		goto clean_up;
+	}
+
+	/* 4.Read missing topology information that can't be saved without
+	 * counted allocate structures in the first round.
+	 */
+	nr_of_all_caches = read_caches_for_processors(
+		processors, nr_of_processors, caches, numbers_of_caches, global_proc_index_per_group, chip_info);
+	if (!nr_of_all_caches) {
+		cpuinfo_log_error("error in reading cache information");
+		goto clean_up;
+	}
+
+	nr_of_cores =
+		read_cores_for_processors(processors, nr_of_processors, global_proc_index_per_group, cores, chip_info);
+	if (!nr_of_cores) {
+		cpuinfo_log_error("error in reading core information");
+		goto clean_up;
+	}
+
+	/* 5. Now that we read out everything from the system we can, fill the
+	 * package, cluster and core structures respectively.
+	 */
+	result = connect_packages_cores_clusters_by_processors(
+		processors,
+		nr_of_processors,
+		packages,
+		nr_of_packages,
+		clusters,
+		cores,
+		nr_of_cores,
+		chip_info,
+		vendor);
+	if (!result) {
+		cpuinfo_log_error("error in connecting information");
+		goto clean_up;
+	}
+
+	/* 6. Count and store uarchs of cores, assuming same uarchs are
+	 * neighbors */
+	enum cpuinfo_uarch prev_uarch = cpuinfo_uarch_unknown;
+	for (uint32_t i = 0; i < nr_of_cores; i++) {
+		if (prev_uarch != cores[i].uarch) {
+			nr_of_uarchs++;
+			prev_uarch = cores[i].uarch;
+		}
+	}
+	uarchs = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_uarchs * sizeof(struct cpuinfo_uarch_info));
+	if (uarchs == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " uarchs",
+			nr_of_uarchs * sizeof(struct cpuinfo_uarch_info),
+			nr_of_uarchs);
+		goto clean_up;
+	}
+	prev_uarch = cpuinfo_uarch_unknown;
+	for (uint32_t i = 0, uarch_index = 0; i < nr_of_cores; i++) {
+		if (prev_uarch != cores[i].uarch) {
+			if (i != 0) {
+				uarch_index++;
+			}
+			if (uarch_index >= nr_of_uarchs) {
+				cpuinfo_log_error("more uarchs detected than reported");
+			}
+			prev_uarch = cores[i].uarch;
+			uarchs[uarch_index].uarch = cores[i].uarch;
+			uarchs[uarch_index].core_count = 1;
+			uarchs[uarch_index].processor_count = cores[i].processor_count;
+		} else if (prev_uarch != cpuinfo_uarch_unknown) {
+			uarchs[uarch_index].core_count++;
+			uarchs[uarch_index].processor_count += cores[i].processor_count;
+		}
+	}
+
+	/* 7. Commit changes */
+	cpuinfo_processors = processors;
+	cpuinfo_packages = packages;
+	cpuinfo_clusters = clusters;
+	cpuinfo_cores = cores;
+	cpuinfo_uarchs = uarchs;
+
+	cpuinfo_processors_count = nr_of_processors;
+	cpuinfo_packages_count = nr_of_packages;
+	cpuinfo_clusters_count = nr_of_cores;
+	cpuinfo_cores_count = nr_of_cores;
+	cpuinfo_uarchs_count = nr_of_uarchs;
+
+	for (uint32_t i = 0; i < MAX_NR_OF_CACHES; i++) {
+		cpuinfo_cache_count[i] = numbers_of_caches[i];
+	}
+	cpuinfo_cache[cpuinfo_cache_level_1i] = caches;
+	cpuinfo_cache[cpuinfo_cache_level_1d] =
+		cpuinfo_cache[cpuinfo_cache_level_1i] + cpuinfo_cache_count[cpuinfo_cache_level_1i];
+	cpuinfo_cache[cpuinfo_cache_level_2] =
+		cpuinfo_cache[cpuinfo_cache_level_1d] + cpuinfo_cache_count[cpuinfo_cache_level_1d];
+	cpuinfo_cache[cpuinfo_cache_level_3] =
+		cpuinfo_cache[cpuinfo_cache_level_2] + cpuinfo_cache_count[cpuinfo_cache_level_2];
+	cpuinfo_cache[cpuinfo_cache_level_4] =
+		cpuinfo_cache[cpuinfo_cache_level_3] + cpuinfo_cache_count[cpuinfo_cache_level_3];
+	cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
+
+	result = true;
+	MemoryBarrier();
+
+	processors = NULL;
+	packages = NULL;
+	clusters = NULL;
+	cores = NULL;
+	caches = NULL;
+	uarchs = NULL;
+
+clean_up:
+	/* The propagated pointers, shouldn't be freed, only in case of error
+	 * and unfinished init.
+	 */
+	if (processors != NULL) {
+		HeapFree(heap, 0, processors);
+	}
+	if (packages != NULL) {
+		HeapFree(heap, 0, packages);
+	}
+	if (clusters != NULL) {
+		HeapFree(heap, 0, clusters);
+	}
+	if (cores != NULL) {
+		HeapFree(heap, 0, cores);
+	}
+	if (caches != NULL) {
+		HeapFree(heap, 0, caches);
+	}
+	if (uarchs != NULL) {
+		HeapFree(heap, 0, uarchs);
+	}
+
+	/* Free the locally used temporary pointers */
+	HeapFree(heap, 0, global_proc_index_per_group);
+	global_proc_index_per_group = NULL;
+	return result;
+}
+
+static uint32_t count_logical_processors(const uint32_t max_group_count, uint32_t* global_proc_index_per_group) {
+	uint32_t nr_of_processors = 0;
+
+	for (uint32_t i = 0; i < max_group_count; i++) {
+		uint32_t nr_of_processors_per_group = GetMaximumProcessorCount((WORD)i);
+		cpuinfo_log_debug(
+			"detected %" PRIu32 " processor(s) in group %" PRIu32 "", nr_of_processors_per_group, i);
+		global_proc_index_per_group[i] = nr_of_processors;
+		nr_of_processors += nr_of_processors_per_group;
+	}
+	return nr_of_processors;
+}
+
+static uint32_t read_packages_for_processors(
+	struct cpuinfo_processor* processors,
+	const uint32_t number_of_processors,
+	const uint32_t* global_proc_index_per_group,
+	const struct woa_chip_info* chip_info) {
+	return read_all_logical_processor_info_of_relation(
+		RelationProcessorPackage,
+		processors,
+		number_of_processors,
+		NULL,
+		NULL,
+		NULL,
+		global_proc_index_per_group,
+		chip_info);
+}
+
+uint32_t read_cores_for_processors(
+	struct cpuinfo_processor* processors,
+	const uint32_t number_of_processors,
+	const uint32_t* global_proc_index_per_group,
+	struct cpuinfo_core* cores,
+	const struct woa_chip_info* chip_info) {
+	return read_all_logical_processor_info_of_relation(
+		RelationProcessorCore,
+		processors,
+		number_of_processors,
+		NULL,
+		NULL,
+		cores,
+		global_proc_index_per_group,
+		chip_info);
+}
+
+static uint32_t read_caches_for_processors(
+	struct cpuinfo_processor* processors,
+	const uint32_t number_of_processors,
+	struct cpuinfo_cache* caches,
+	uint32_t* numbers_of_caches,
+	const uint32_t* global_proc_index_per_group,
+	const struct woa_chip_info* chip_info) {
+	/* Reset processor start indexes */
+	if (caches) {
+		uint32_t cache_offset = 0;
+		for (uint32_t i = 0; i < MAX_NR_OF_CACHES; i++) {
+			for (uint32_t j = 0; j < numbers_of_caches[i]; j++) {
+				caches[cache_offset + j].processor_start = UINT32_MAX;
+			}
+			cache_offset += numbers_of_caches[i];
+		}
+	}
+
+	return read_all_logical_processor_info_of_relation(
+		RelationCache,
+		processors,
+		number_of_processors,
+		caches,
+		numbers_of_caches,
+		NULL,
+		global_proc_index_per_group,
+		chip_info);
+}
+
+static uint32_t read_all_logical_processor_info_of_relation(
+	LOGICAL_PROCESSOR_RELATIONSHIP info_type,
+	struct cpuinfo_processor* processors,
+	const uint32_t number_of_processors,
+	struct cpuinfo_cache* caches,
+	uint32_t* numbers_of_caches,
+	struct cpuinfo_core* cores,
+	const uint32_t* global_proc_index_per_group,
+	const struct woa_chip_info* chip_info) {
+	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX infos = NULL;
+	uint32_t nr_of_structs = 0;
+	DWORD info_size = 0;
+	bool result = false;
+	HANDLE heap = GetProcessHeap();
+
+	/* 1. Query the size of the information structure first */
+	if (GetLogicalProcessorInformationEx(info_type, NULL, &info_size) == FALSE) {
+		const DWORD last_error = GetLastError();
+		if (last_error != ERROR_INSUFFICIENT_BUFFER) {
+			cpuinfo_log_error(
+				"failed to query size of processor %" PRIu32 " information information: error %" PRIu32
+				"",
+				(uint32_t)info_type,
+				(uint32_t)last_error);
+			goto clean_up;
+		}
+	}
+	/* 2. Allocate memory for the information structure */
+	infos = HeapAlloc(heap, 0, info_size);
+	if (infos == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %" PRIu32 " bytes for logical processor information", (uint32_t)info_size);
+		goto clean_up;
+	}
+	/* 3. Read the information structure */
+	if (GetLogicalProcessorInformationEx(info_type, infos, &info_size) == FALSE) {
+		cpuinfo_log_error(
+			"failed to query processor %" PRIu32 " information: error %" PRIu32 "",
+			(uint32_t)info_type,
+			(uint32_t)GetLastError());
+		goto clean_up;
+	}
+
+	/* 4. Parse the structure and store relevant data */
+	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info_end =
+		(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)((uintptr_t)infos + info_size);
+	for (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info = infos; info < info_end;
+	     info = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)((uintptr_t)info + info->Size)) {
+		if (info->Relationship != info_type) {
+			cpuinfo_log_warning(
+				"unexpected processor info type (%" PRIu32 ") for processor information",
+				(uint32_t)info->Relationship);
+			continue;
+		}
+
+		const uint32_t info_id = nr_of_structs++;
+
+		switch (info_type) {
+			case RelationProcessorPackage:
+				result = parse_relation_processor_info(
+					processors,
+					number_of_processors,
+					global_proc_index_per_group,
+					info,
+					info_id,
+					cores,
+					chip_info);
+				break;
+			case RelationProcessorCore:
+				result = parse_relation_processor_info(
+					processors,
+					number_of_processors,
+					global_proc_index_per_group,
+					info,
+					info_id,
+					cores,
+					chip_info);
+				break;
+			case RelationCache:
+				result = parse_relation_cache_info(
+					processors, caches, numbers_of_caches, global_proc_index_per_group, info);
+				break;
+			default:
+				cpuinfo_log_error(
+					"unexpected processor info type (%" PRIu32 ") for processor information",
+					(uint32_t)info->Relationship);
+				result = false;
+				break;
+		}
+		if (!result) {
+			nr_of_structs = 0;
+			goto clean_up;
+		}
+	}
+clean_up:
+	/* 5. Release dynamically allocated info structure. */
+	HeapFree(heap, 0, infos);
+	infos = NULL;
+	return nr_of_structs;
+}
+
+static bool parse_relation_processor_info(
+	struct cpuinfo_processor* processors,
+	uint32_t nr_of_processors,
+	const uint32_t* global_proc_index_per_group,
+	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info,
+	const uint32_t info_id,
+	struct cpuinfo_core* cores,
+	const struct woa_chip_info* chip_info) {
+	for (uint32_t i = 0; i < info->Processor.GroupCount; i++) {
+		const uint32_t group_id = info->Processor.GroupMask[i].Group;
+		/* Bitmask representing processors in this group belonging to
+		 * this package
+		 */
+		KAFFINITY group_processors_mask = info->Processor.GroupMask[i].Mask;
+		while (group_processors_mask != 0) {
+			const uint32_t processor_id_in_group = low_index_from_kaffinity(group_processors_mask);
+			const uint32_t processor_global_index =
+				global_proc_index_per_group[group_id] + processor_id_in_group;
+
+			if (processor_global_index >= nr_of_processors) {
+				cpuinfo_log_error("unexpected processor index %" PRIu32 "", processor_global_index);
+				return false;
+			}
+
+			switch (info->Relationship) {
+				case RelationProcessorPackage:
+					store_package_info_per_processor(
+						processors,
+						processor_global_index,
+						info_id,
+						group_id,
+						processor_id_in_group);
+					break;
+				case RelationProcessorCore:
+					store_core_info_per_processor(
+						processors, processor_global_index, info_id, info, cores, chip_info);
+					break;
+				default:
+					cpuinfo_log_error(
+						"unexpected processor info type (%" PRIu32
+						") for processor information",
+						(uint32_t)info->Relationship);
+					break;
+			}
+			/* Clear the bits in affinity mask, lower the least set
+			 * bit. */
+			group_processors_mask &= (group_processors_mask - 1);
+		}
+	}
+	return true;
+}
+
+static bool parse_relation_cache_info(
+	struct cpuinfo_processor* processors,
+	struct cpuinfo_cache* caches,
+	uint32_t* numbers_of_caches,
+	const uint32_t* global_proc_index_per_group,
+	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info) {
+	static uint32_t l1i_counter = 0;
+	static uint32_t l1d_counter = 0;
+	static uint32_t l2_counter = 0;
+	static uint32_t l3_counter = 0;
+
+	/* Count cache types for allocation at first. */
+	if (caches == NULL) {
+		switch (info->Cache.Level) {
+			case 1:
+				switch (info->Cache.Type) {
+					case CacheInstruction:
+						numbers_of_caches[cpuinfo_cache_level_1i]++;
+						break;
+					case CacheData:
+						numbers_of_caches[cpuinfo_cache_level_1d]++;
+						break;
+					case CacheUnified:
+						break;
+					case CacheTrace:
+						break;
+					default:
+						break;
+				}
+				break;
+			case 2:
+				numbers_of_caches[cpuinfo_cache_level_2]++;
+				break;
+			case 3:
+				numbers_of_caches[cpuinfo_cache_level_3]++;
+				break;
+		}
+		return true;
+	}
+	struct cpuinfo_cache* l1i_base = caches;
+	struct cpuinfo_cache* l1d_base = l1i_base + numbers_of_caches[cpuinfo_cache_level_1i];
+	struct cpuinfo_cache* l2_base = l1d_base + numbers_of_caches[cpuinfo_cache_level_1d];
+	struct cpuinfo_cache* l3_base = l2_base + numbers_of_caches[cpuinfo_cache_level_2];
+
+	cpuinfo_log_debug(
+		"info->Cache.GroupCount:%" PRIu32 ", info->Cache.GroupMask:%" PRIu32
+		","
+		"info->Cache.Level:%" PRIu32 ", info->Cache.Associativity:%" PRIu32
+		","
+		"info->Cache.LineSize:%" PRIu32
+		","
+		"info->Cache.CacheSize:%" PRIu32 ", info->Cache.Type:%" PRIu32 "",
+		info->Cache.GroupCount,
+		(unsigned int)info->Cache.GroupMask.Mask,
+		info->Cache.Level,
+		info->Cache.Associativity,
+		info->Cache.LineSize,
+		info->Cache.CacheSize,
+		info->Cache.Type);
+
+	struct cpuinfo_cache* current_cache = NULL;
+	switch (info->Cache.Level) {
+		case 1:
+			switch (info->Cache.Type) {
+				case CacheInstruction:
+					current_cache = l1i_base + l1i_counter;
+					l1i_counter++;
+					break;
+				case CacheData:
+					current_cache = l1d_base + l1d_counter;
+					l1d_counter++;
+					break;
+				case CacheUnified:
+					break;
+				case CacheTrace:
+					break;
+				default:
+					break;
+			}
+			break;
+		case 2:
+			current_cache = l2_base + l2_counter;
+			l2_counter++;
+			break;
+		case 3:
+			current_cache = l3_base + l3_counter;
+			l3_counter++;
+			break;
+	}
+	current_cache->size = info->Cache.CacheSize;
+	current_cache->line_size = info->Cache.LineSize;
+	current_cache->associativity = info->Cache.Associativity;
+	/* We don't have partition and set information of caches on Windows,
+	 * so we set partitions to 1 and calculate the expected sets.
+	 */
+	current_cache->partitions = 1;
+	current_cache->sets = current_cache->size / current_cache->line_size / current_cache->associativity;
+	if (info->Cache.Type == CacheUnified) {
+		current_cache->flags = CPUINFO_CACHE_UNIFIED;
+	}
+
+	for (uint32_t i = 0; i < info->Cache.GroupCount; i++) {
+		/* Zero GroupCount is valid, GroupMask still can store bits set.
+		 */
+		const uint32_t group_id = info->Cache.GroupMasks[i].Group;
+		/* Bitmask representing processors in this group belonging to
+		 * this package
+		 */
+		KAFFINITY group_processors_mask = info->Cache.GroupMasks[i].Mask;
+		while (group_processors_mask != 0) {
+			const uint32_t processor_id_in_group = low_index_from_kaffinity(group_processors_mask);
+			const uint32_t processor_global_index =
+				global_proc_index_per_group[group_id] + processor_id_in_group;
+
+			store_cache_info_per_processor(processors, processor_global_index, info, current_cache);
+
+			/* Clear the bits in affinity mask, lower the least set
+			 * bit. */
+			group_processors_mask &= (group_processors_mask - 1);
+		}
+	}
+	return true;
+}
+
+static void store_package_info_per_processor(
+	struct cpuinfo_processor* processors,
+	const uint32_t processor_global_index,
+	const uint32_t package_id,
+	const uint32_t group_id,
+	const uint32_t processor_id_in_group) {
+	processors[processor_global_index].windows_group_id = (uint16_t)group_id;
+	processors[processor_global_index].windows_processor_id = (uint16_t)processor_id_in_group;
+
+	/* As we're counting the number of packages now, we haven't allocated
+	 * memory for cpuinfo_packages yet, so we only set the package pointer's
+	 * offset now.
+	 */
+	processors[processor_global_index].package = (const struct cpuinfo_package*)NULL + package_id;
+}
+
+void store_core_info_per_processor(
+	struct cpuinfo_processor* processors,
+	const uint32_t processor_global_index,
+	const uint32_t core_id,
+	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX core_info,
+	struct cpuinfo_core* cores,
+	const struct woa_chip_info* chip_info) {
+	if (cores) {
+		processors[processor_global_index].core = cores + core_id;
+		cores[core_id].core_id = core_id;
+
+		if (chip_info->uarchs == NULL) {
+			cpuinfo_log_error("uarch is NULL for core %d", core_id);
+			return;
+		}
+
+		cores[core_id].uarch = chip_info->uarchs[0].uarch;
+		cores[core_id].frequency = chip_info->uarchs[0].frequency;
+
+		/* We don't have cluster information, so we handle it as
+		 * fixed 1 to (cluster / cores).
+		 * Set the cluster offset ID now, as soon as we have the
+		 * cluster base address, we'll set the absolute address.
+		 */
+		processors[processor_global_index].cluster = (const struct cpuinfo_cluster*)NULL + core_id;
+	}
+}
+
+static void store_cache_info_per_processor(
+	struct cpuinfo_processor* processors,
+	const uint32_t processor_global_index,
+	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info,
+	struct cpuinfo_cache* current_cache) {
+	if (current_cache->processor_start > processor_global_index) {
+		current_cache->processor_start = processor_global_index;
+	}
+	current_cache->processor_count++;
+
+	switch (info->Cache.Level) {
+		case 1:
+			switch (info->Cache.Type) {
+				case CacheInstruction:
+					processors[processor_global_index].cache.l1i = current_cache;
+					break;
+				case CacheData:
+					processors[processor_global_index].cache.l1d = current_cache;
+					break;
+				case CacheUnified:
+					break;
+				case CacheTrace:
+					break;
+				default:
+					break;
+			}
+			break;
+		case 2:
+			processors[processor_global_index].cache.l2 = current_cache;
+			break;
+		case 3:
+			processors[processor_global_index].cache.l3 = current_cache;
+			break;
+	}
+}
+
+static bool connect_packages_cores_clusters_by_processors(
+	struct cpuinfo_processor* processors,
+	const uint32_t nr_of_processors,
+	struct cpuinfo_package* packages,
+	const uint32_t nr_of_packages,
+	struct cpuinfo_cluster* clusters,
+	struct cpuinfo_core* cores,
+	const uint32_t nr_of_cores,
+	const struct woa_chip_info* chip_info,
+	enum cpuinfo_vendor vendor) {
+	/* Adjust core and package pointers for all logical processors. */
+	for (uint32_t i = nr_of_processors; i != 0; i--) {
+		const uint32_t processor_id = i - 1;
+		struct cpuinfo_processor* processor = processors + processor_id;
+
+		struct cpuinfo_core* core = (struct cpuinfo_core*)processor->core;
+
+		/* We stored the offset of pointers when we haven't allocated
+		 * memory for packages and clusters, so now add offsets to base
+		 * addresses.
+		 */
+		struct cpuinfo_package* package =
+			(struct cpuinfo_package*)((uintptr_t)packages + (uintptr_t)processor->package);
+		if (package < packages || package >= (packages + nr_of_packages)) {
+			cpuinfo_log_error("invalid package indexing");
+			return false;
+		}
+		processor->package = package;
+
+		struct cpuinfo_cluster* cluster =
+			(struct cpuinfo_cluster*)((uintptr_t)clusters + (uintptr_t)processor->cluster);
+		if (cluster < clusters || cluster >= (clusters + nr_of_cores)) {
+			cpuinfo_log_error("invalid cluster indexing");
+			return false;
+		}
+		processor->cluster = cluster;
+
+		if (chip_info) {
+			size_t converted_chars = 0;
+			if (!WideCharToMultiByte(
+				    CP_UTF8,
+				    WC_ERR_INVALID_CHARS,
+				    chip_info->chip_name_string,
+				    -1,
+				    package->name,
+				    CPUINFO_PACKAGE_NAME_MAX,
+				    NULL,
+				    NULL)) {
+				cpuinfo_log_error("cpu name character conversion error");
+				return false;
+			};
+		}
+
+		/* Set start indexes and counts per packages / clusters / cores
+		 * - going backwards */
+
+		/* This can be overwritten by lower-index processors on the same
+		 * package. */
+		package->processor_start = processor_id;
+		package->processor_count++;
+
+		/* This can be overwritten by lower-index processors on the same
+		 * cluster. */
+		cluster->processor_start = processor_id;
+		cluster->processor_count++;
+
+		/* This can be overwritten by lower-index processors on the same
+		 * core. */
+		core->processor_start = processor_id;
+		core->processor_count++;
+	}
+	/* Fill cores */
+	for (uint32_t i = nr_of_cores; i != 0; i--) {
+		const uint32_t global_core_id = i - 1;
+		struct cpuinfo_core* core = cores + global_core_id;
+		const struct cpuinfo_processor* processor = processors + core->processor_start;
+		struct cpuinfo_package* package = (struct cpuinfo_package*)processor->package;
+		struct cpuinfo_cluster* cluster = (struct cpuinfo_cluster*)processor->cluster;
+
+		core->package = package;
+		core->cluster = cluster;
+		core->vendor = vendor;
+
+		/* This can be overwritten by lower-index cores on the same
+		 * cluster/package.
+		 */
+		cluster->core_start = global_core_id;
+		cluster->core_count++;
+		package->core_start = global_core_id;
+		package->core_count++;
+		package->cluster_start = global_core_id;
+		package->cluster_count = package->core_count;
+
+		cluster->package = package;
+		cluster->vendor = cores[cluster->core_start].vendor;
+		cluster->uarch = cores[cluster->core_start].uarch;
+		cluster->frequency = cores[cluster->core_start].frequency;
+	}
+	return true;
+}
+
+static inline uint32_t low_index_from_kaffinity(KAFFINITY kaffinity) {
+	unsigned long index;
+	_BitScanForward64(&index, (unsigned __int64)kaffinity);
+	return (uint32_t)index;
+}
--- a/3rdparty/cpuinfo/src/arm/windows/init.c
+++ b/3rdparty/cpuinfo/src/arm/windows/init.c
@@ -0,0 +1,222 @@
+#include <errno.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+
+#include <arm/api.h>
+#include <arm/midr.h>
+
+#include "windows-arm-init.h"
+
+struct cpuinfo_arm_isa cpuinfo_isa;
+
+static void set_cpuinfo_isa_fields(void);
+static struct woa_chip_info* get_system_info_from_registry(void);
+
+static struct woa_chip_info woa_chip_unknown = {L"Unknown", {{cpuinfo_vendor_unknown, cpuinfo_uarch_unknown, 0}}};
+
+BOOL CALLBACK cpuinfo_arm_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context) {
+	struct woa_chip_info* chip_info = NULL;
+	enum cpuinfo_vendor vendor = cpuinfo_vendor_unknown;
+
+	set_cpuinfo_isa_fields();
+
+	chip_info = get_system_info_from_registry();
+	if (chip_info == NULL) {
+		chip_info = &woa_chip_unknown;
+	}
+
+	cpuinfo_is_initialized = cpu_info_init_by_logical_sys_info(chip_info, chip_info->uarchs[0].vendor);
+
+	return true;
+}
+
+/* Static helper functions */
+
+static wchar_t* read_registry(LPCWSTR subkey, LPCWSTR value) {
+	DWORD key_type = 0;
+	DWORD data_size = 0;
+	const DWORD flags = RRF_RT_REG_SZ; /* Only read strings (REG_SZ) */
+	wchar_t* text_buffer = NULL;
+	LSTATUS result = 0;
+	HANDLE heap = GetProcessHeap();
+
+	result = RegGetValueW(
+		HKEY_LOCAL_MACHINE,
+		subkey,
+		value,
+		flags,
+		&key_type,
+		NULL, /* Request buffer size */
+		&data_size);
+	if (result != 0 || data_size == 0) {
+		cpuinfo_log_error("Registry entry size read error");
+		return NULL;
+	}
+
+	text_buffer = HeapAlloc(heap, HEAP_ZERO_MEMORY, data_size);
+	if (text_buffer == NULL) {
+		cpuinfo_log_error("Registry textbuffer allocation error");
+		return NULL;
+	}
+
+	result = RegGetValueW(
+		HKEY_LOCAL_MACHINE,
+		subkey,
+		value,
+		flags,
+		NULL,
+		text_buffer, /* Write string in this destination buffer */
+		&data_size);
+	if (result != 0) {
+		cpuinfo_log_error("Registry read error");
+		HeapFree(heap, 0, text_buffer);
+		return NULL;
+	}
+	return text_buffer;
+}
+
+static uint64_t read_registry_qword(LPCWSTR subkey, LPCWSTR value) {
+	DWORD key_type = 0;
+	DWORD data_size = sizeof(uint64_t);
+	const DWORD flags = RRF_RT_REG_QWORD; /* Only read QWORD (REG_QWORD) values */
+	uint64_t qword_value = 0;
+	LSTATUS result = RegGetValueW(HKEY_LOCAL_MACHINE, subkey, value, flags, &key_type, &qword_value, &data_size);
+	if (result != ERROR_SUCCESS || data_size != sizeof(uint64_t)) {
+		cpuinfo_log_error("Registry QWORD read error");
+		return 0;
+	}
+	return qword_value;
+}
+
+static uint64_t read_registry_dword(LPCWSTR subkey, LPCWSTR value) {
+	DWORD key_type = 0;
+	DWORD data_size = sizeof(DWORD);
+	DWORD dword_value = 0;
+	LSTATUS result =
+		RegGetValueW(HKEY_LOCAL_MACHINE, subkey, value, RRF_RT_REG_DWORD, &key_type, &dword_value, &data_size);
+	if (result != ERROR_SUCCESS || data_size != sizeof(DWORD)) {
+		cpuinfo_log_error("Registry DWORD read error");
+		return 0;
+	}
+	return (uint64_t)dword_value;
+}
+
+static wchar_t* wcsndup(const wchar_t* src, size_t n) {
+	size_t len = wcsnlen(src, n);
+	wchar_t* dup = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, (len + 1) * sizeof(wchar_t));
+	if (dup) {
+		wcsncpy_s(dup, len + 1, src, len);
+		dup[len] = L'\0';
+	}
+	return dup;
+}
+
+static struct core_info_by_chip_name get_core_info_from_midr(uint32_t midr, uint64_t frequency) {
+	struct core_info_by_chip_name info;
+	enum cpuinfo_vendor vendor;
+	enum cpuinfo_uarch uarch;
+
+#if CPUINFO_ARCH_ARM
+	bool has_vfpv4 = false;
+	cpuinfo_arm_decode_vendor_uarch(midr, has_vfpv4, &vendor, &uarch);
+#else
+	cpuinfo_arm_decode_vendor_uarch(midr, &vendor, &uarch);
+#endif
+
+	info.vendor = vendor;
+	info.uarch = uarch;
+	info.frequency = frequency;
+	return info;
+}
+
+static struct woa_chip_info* get_system_info_from_registry(void) {
+	wchar_t* text_buffer = NULL;
+	LPCWSTR cpu0_subkey = L"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0";
+	LPCWSTR chip_name_value = L"ProcessorNameString";
+	LPCWSTR chip_midr_value = L"CP 4000";
+	LPCWSTR chip_mhz_value = L"~MHz";
+	struct woa_chip_info* chip_info = NULL;
+
+	/* Read processor model name from registry and find in the hard-coded
+	 * list. */
+	text_buffer = read_registry(cpu0_subkey, chip_name_value);
+	if (text_buffer == NULL) {
+		cpuinfo_log_error("Registry read error for processor name");
+		return NULL;
+	}
+
+	/*
+	 *  https://developer.arm.com/documentation/100442/0100/register-descriptions/aarch32-system-registers/midr--main-id-register
+	 *	Regedit for MIDR :
+	 *HKEY_LOCAL_MACHINE\HARDWARE\DESCRIPTION\System\CentralProcessor\0\CP 4000
+	 */
+	uint64_t midr_qword = (uint32_t)read_registry_qword(cpu0_subkey, chip_midr_value);
+	if (midr_qword == 0) {
+		cpuinfo_log_error("Registry read error for MIDR value");
+		return NULL;
+	}
+	// MIDR is only 32 bits, so we need to cast it to uint32_t
+	uint32_t midr_value = (uint32_t)midr_qword;
+
+	/* Read the frequency from the registry
+	 * The value is in MHz, so we need to convert it to Hz */
+	uint64_t frequency_mhz = read_registry_dword(cpu0_subkey, chip_mhz_value);
+	if (frequency_mhz == 0) {
+		cpuinfo_log_error("Registry read error for frequency value");
+		return NULL;
+	}
+	// Convert MHz to Hz
+	uint64_t frequency_hz = frequency_mhz * 1000000;
+
+	// Allocate chip_info before using it.
+	chip_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct woa_chip_info));
+	if (chip_info == NULL) {
+		cpuinfo_log_error("Heap allocation error for chip_info");
+		return NULL;
+	}
+
+	// set chip_info fields
+	chip_info->chip_name_string = wcsndup(text_buffer, CPUINFO_PACKAGE_NAME_MAX - 1);
+	chip_info->uarchs[0] = get_core_info_from_midr(midr_value, frequency_hz);
+
+	cpuinfo_log_debug("detected chip model name: %ls", chip_info->chip_name_string);
+
+	return chip_info;
+}
+
+static void set_cpuinfo_isa_fields(void) {
+	cpuinfo_isa.atomics = IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE) != 0;
+
+	const bool dotprod = IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != 0;
+	cpuinfo_isa.dot = dotprod;
+
+	SYSTEM_INFO system_info;
+	GetSystemInfo(&system_info);
+	switch (system_info.wProcessorLevel) {
+		case 0x803: // Kryo 385 Silver (Snapdragon 850)
+			cpuinfo_isa.fp16arith = dotprod;
+			cpuinfo_isa.rdm = dotprod;
+			break;
+		default:
+			// Assume that Dot Product support implies FP16
+			// arithmetics and RDM support. ARM manuals don't
+			// guarantee that, but it holds in practice.
+			cpuinfo_isa.fp16arith = dotprod;
+			cpuinfo_isa.rdm = dotprod;
+			break;
+	}
+
+	/* Windows API reports all or nothing for cryptographic instructions. */
+	const bool crypto = IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0;
+	cpuinfo_isa.aes = crypto;
+	cpuinfo_isa.sha1 = crypto;
+	cpuinfo_isa.sha2 = crypto;
+	cpuinfo_isa.pmull = crypto;
+
+	cpuinfo_isa.crc32 = IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != 0;
+}
--- a/3rdparty/cpuinfo/src/arm/windows/windows-arm-init.h
+++ b/3rdparty/cpuinfo/src/arm/windows/windows-arm-init.h
@@ -0,0 +1,21 @@
+#pragma once
+
+/* Efficiency class = 0 means little core, while 1 means big core for now. */
+#define MAX_WOA_VALID_EFFICIENCY_CLASSES 2
+
+/* Topology information hard-coded by SoC/chip name */
+struct core_info_by_chip_name {
+	enum cpuinfo_vendor vendor;
+	enum cpuinfo_uarch uarch;
+	uint64_t frequency; /* Hz */
+};
+
+/* SoC/chip info that's currently not readable by logical system information,
+ * but can be read from registry.
+ */
+struct woa_chip_info {
+	wchar_t* chip_name_string;
+	struct core_info_by_chip_name uarchs[MAX_WOA_VALID_EFFICIENCY_CLASSES];
+};
+
+bool cpu_info_init_by_logical_sys_info(const struct woa_chip_info* chip_info, enum cpuinfo_vendor vendor);
--- a/3rdparty/cpuinfo/src/cache.c
+++ b/3rdparty/cpuinfo/src/cache.c
@@ -0,0 +1,17 @@
+#include <stddef.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/internal-api.h>
+
+uint32_t cpuinfo_compute_max_cache_size(const struct cpuinfo_processor* processor) {
+	if (processor->cache.l4 != NULL) {
+		return processor->cache.l4->size;
+	} else if (processor->cache.l3 != NULL) {
+		return processor->cache.l3->size;
+	} else if (processor->cache.l2 != NULL) {
+		return processor->cache.l2->size;
+	} else if (processor->cache.l1d != NULL) {
+		return processor->cache.l1d->size;
+	}
+	return 0;
+}
--- a/3rdparty/cpuinfo/src/cpuinfo/common.h
+++ b/3rdparty/cpuinfo/src/cpuinfo/common.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#define CPUINFO_COUNT_OF(array) (sizeof(array) / sizeof(0 [array]))
+
+#if defined(__GNUC__)
+#define CPUINFO_LIKELY(condition) (__builtin_expect(!!(condition), 1))
+#define CPUINFO_UNLIKELY(condition) (__builtin_expect(!!(condition), 0))
+#else
+#define CPUINFO_LIKELY(condition) (!!(condition))
+#define CPUINFO_UNLIKELY(condition) (!!(condition))
+#endif
+
+#ifndef CPUINFO_INTERNAL
+#if defined(__ELF__)
+#define CPUINFO_INTERNAL __attribute__((__visibility__("internal")))
+#elif defined(__MACH__)
+#define CPUINFO_INTERNAL __attribute__((__visibility__("hidden")))
+#else
+#define CPUINFO_INTERNAL
+#endif
+#endif
+
+#ifndef CPUINFO_PRIVATE
+#if defined(__ELF__)
+#define CPUINFO_PRIVATE __attribute__((__visibility__("hidden")))
+#elif defined(__MACH__)
+#define CPUINFO_PRIVATE __attribute__((__visibility__("hidden")))
+#else
+#define CPUINFO_PRIVATE
+#endif
+#endif
--- a/3rdparty/cpuinfo/src/cpuinfo/internal-api.h
+++ b/3rdparty/cpuinfo/src/cpuinfo/internal-api.h
@@ -0,0 +1,67 @@
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+#include <windows.h>
+#endif
+
+#include <cpuinfo.h>
+#include <cpuinfo/common.h>
+
+enum cpuinfo_cache_level {
+	cpuinfo_cache_level_1i = 0,
+	cpuinfo_cache_level_1d = 1,
+	cpuinfo_cache_level_2 = 2,
+	cpuinfo_cache_level_3 = 3,
+	cpuinfo_cache_level_4 = 4,
+	cpuinfo_cache_level_max = 5,
+};
+
+extern CPUINFO_INTERNAL bool cpuinfo_is_initialized;
+
+extern CPUINFO_INTERNAL struct cpuinfo_processor* cpuinfo_processors;
+extern CPUINFO_INTERNAL struct cpuinfo_core* cpuinfo_cores;
+extern CPUINFO_INTERNAL struct cpuinfo_cluster* cpuinfo_clusters;
+extern CPUINFO_INTERNAL struct cpuinfo_package* cpuinfo_packages;
+extern CPUINFO_INTERNAL struct cpuinfo_cache* cpuinfo_cache[cpuinfo_cache_level_max];
+
+extern CPUINFO_INTERNAL uint32_t cpuinfo_processors_count;
+extern CPUINFO_INTERNAL uint32_t cpuinfo_cores_count;
+extern CPUINFO_INTERNAL uint32_t cpuinfo_clusters_count;
+extern CPUINFO_INTERNAL uint32_t cpuinfo_packages_count;
+extern CPUINFO_INTERNAL uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max];
+extern CPUINFO_INTERNAL uint32_t cpuinfo_max_cache_size;
+
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64
+extern CPUINFO_INTERNAL struct cpuinfo_uarch_info* cpuinfo_uarchs;
+extern CPUINFO_INTERNAL uint32_t cpuinfo_uarchs_count;
+#else
+extern CPUINFO_INTERNAL struct cpuinfo_uarch_info cpuinfo_global_uarch;
+#endif
+
+#ifdef __linux__
+extern CPUINFO_INTERNAL uint32_t cpuinfo_linux_cpu_max;
+extern CPUINFO_INTERNAL const struct cpuinfo_processor** cpuinfo_linux_cpu_to_processor_map;
+extern CPUINFO_INTERNAL const struct cpuinfo_core** cpuinfo_linux_cpu_to_core_map;
+#endif
+
+CPUINFO_PRIVATE void cpuinfo_x86_mach_init(void);
+CPUINFO_PRIVATE void cpuinfo_x86_linux_init(void);
+CPUINFO_PRIVATE void cpuinfo_x86_freebsd_init(void);
+#if defined(_WIN32) || defined(__CYGWIN__)
+#if CPUINFO_ARCH_ARM64
+CPUINFO_PRIVATE BOOL CALLBACK cpuinfo_arm_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context);
+#else
+CPUINFO_PRIVATE BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context);
+#endif
+#endif
+CPUINFO_PRIVATE void cpuinfo_arm_mach_init(void);
+CPUINFO_PRIVATE void cpuinfo_arm_linux_init(void);
+CPUINFO_PRIVATE void cpuinfo_riscv_linux_init(void);
+CPUINFO_PRIVATE void cpuinfo_emscripten_init(void);
+
+CPUINFO_PRIVATE uint32_t cpuinfo_compute_max_cache_size(const struct cpuinfo_processor* processor);
+
+typedef void (*cpuinfo_processor_callback)(uint32_t);
--- a/3rdparty/cpuinfo/src/cpuinfo/log.h
+++ b/3rdparty/cpuinfo/src/cpuinfo/log.h
@@ -0,0 +1,102 @@
+#pragma once
+
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdlib.h>
+
+#ifndef CPUINFO_LOG_LEVEL
+#error "Undefined CPUINFO_LOG_LEVEL"
+#endif
+
+#define CPUINFO_LOG_NONE 0
+#define CPUINFO_LOG_FATAL 1
+#define CPUINFO_LOG_ERROR 2
+#define CPUINFO_LOG_WARNING 3
+#define CPUINFO_LOG_INFO 4
+#define CPUINFO_LOG_DEBUG 5
+
+#ifndef CPUINFO_LOG_DEBUG_PARSERS
+#define CPUINFO_LOG_DEBUG_PARSERS 0
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_DEBUG
+void cpuinfo_vlog_debug(const char* format, va_list args);
+#endif
+
+#if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_INFO
+void cpuinfo_vlog_info(const char* format, va_list args);
+#endif
+
+#if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_WARNING
+void cpuinfo_vlog_warning(const char* format, va_list args);
+#endif
+
+#if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_ERROR
+void cpuinfo_vlog_error(const char* format, va_list args);
+#endif
+
+#if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_FATAL
+void cpuinfo_vlog_fatal(const char* format, va_list args);
+#endif
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#ifndef CPUINFO_LOG_ARGUMENTS_FORMAT
+#ifdef __GNUC__
+#define CPUINFO_LOG_ARGUMENTS_FORMAT __attribute__((__format__(__printf__, 1, 2)))
+#else
+#define CPUINFO_LOG_ARGUMENTS_FORMAT
+#endif
+#endif
+
+CPUINFO_LOG_ARGUMENTS_FORMAT inline static void cpuinfo_log_debug(const char* format, ...) {
+#if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_DEBUG
+	va_list args;
+	va_start(args, format);
+	cpuinfo_vlog_debug(format, args);
+	va_end(args);
+#endif
+}
+
+CPUINFO_LOG_ARGUMENTS_FORMAT inline static void cpuinfo_log_info(const char* format, ...) {
+#if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_INFO
+	va_list args;
+	va_start(args, format);
+	cpuinfo_vlog_info(format, args);
+	va_end(args);
+#endif
+}
+
+CPUINFO_LOG_ARGUMENTS_FORMAT inline static void cpuinfo_log_warning(const char* format, ...) {
+#if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_WARNING
+	va_list args;
+	va_start(args, format);
+	cpuinfo_vlog_warning(format, args);
+	va_end(args);
+#endif
+}
+
+CPUINFO_LOG_ARGUMENTS_FORMAT inline static void cpuinfo_log_error(const char* format, ...) {
+#if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_ERROR
+	va_list args;
+	va_start(args, format);
+	cpuinfo_vlog_error(format, args);
+	va_end(args);
+#endif
+}
+
+CPUINFO_LOG_ARGUMENTS_FORMAT inline static void cpuinfo_log_fatal(const char* format, ...) {
+#if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_FATAL
+	va_list args;
+	va_start(args, format);
+	cpuinfo_vlog_fatal(format, args);
+	va_end(args);
+#endif
+	abort();
+}
--- a/3rdparty/cpuinfo/src/cpuinfo/utils.h
+++ b/3rdparty/cpuinfo/src/cpuinfo/utils.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+#include <stdint.h>
+
+inline static uint32_t bit_length(uint32_t n) {
+	const uint32_t n_minus_1 = n - 1;
+	if (n_minus_1 == 0) {
+		return 0;
+	} else {
+#ifdef _MSC_VER
+		unsigned long bsr;
+		_BitScanReverse(&bsr, n_minus_1);
+		return bsr + 1;
+#else
+		return 32 - __builtin_clz(n_minus_1);
+#endif
+	}
+}
--- a/3rdparty/cpuinfo/src/emscripten/init.c
+++ b/3rdparty/cpuinfo/src/emscripten/init.c
@@ -0,0 +1,288 @@
+#include <math.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <emscripten/threading.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+
+static const volatile float infinity = INFINITY;
+
+static struct cpuinfo_package static_package = {};
+
+static struct cpuinfo_cache static_x86_l3 = {
+	.size = 2 * 1024 * 1024,
+	.associativity = 16,
+	.sets = 2048,
+	.partitions = 1,
+	.line_size = 64,
+};
+
+void cpuinfo_emscripten_init(void) {
+	struct cpuinfo_processor* processors = NULL;
+	struct cpuinfo_core* cores = NULL;
+	struct cpuinfo_cluster* clusters = NULL;
+	struct cpuinfo_cache* l1i = NULL;
+	struct cpuinfo_cache* l1d = NULL;
+	struct cpuinfo_cache* l2 = NULL;
+
+	const bool is_x86 = signbit(infinity - infinity);
+
+	int logical_cores_count = emscripten_num_logical_cores();
+	if (logical_cores_count <= 0) {
+		logical_cores_count = 1;
+	}
+	uint32_t processor_count = (uint32_t)logical_cores_count;
+	uint32_t core_count = processor_count;
+	uint32_t cluster_count = 1;
+	uint32_t big_cluster_core_count = core_count;
+	uint32_t processors_per_core = 1;
+	if (is_x86) {
+		if (processor_count % 2 == 0) {
+			processors_per_core = 2;
+			core_count = processor_count / 2;
+			big_cluster_core_count = core_count;
+		}
+	} else {
+		/* Assume ARM/ARM64 */
+		if (processor_count > 4) {
+			/* Assume big.LITTLE architecture */
+			cluster_count = 2;
+			big_cluster_core_count = processor_count >= 8 ? 4 : 2;
+		}
+	}
+	uint32_t l2_count = is_x86 ? core_count : cluster_count;
+
+	processors = calloc(processor_count, sizeof(struct cpuinfo_processor));
+	if (processors == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " logical processors",
+			processor_count * sizeof(struct cpuinfo_processor),
+			processor_count);
+		goto cleanup;
+	}
+	cores = calloc(processor_count, sizeof(struct cpuinfo_core));
+	if (cores == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " cores",
+			processor_count * sizeof(struct cpuinfo_core),
+			processor_count);
+		goto cleanup;
+	}
+	clusters = calloc(cluster_count, sizeof(struct cpuinfo_cluster));
+	if (clusters == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " clusters",
+			cluster_count * sizeof(struct cpuinfo_cluster),
+			cluster_count);
+		goto cleanup;
+	}
+
+	l1i = calloc(core_count, sizeof(struct cpuinfo_cache));
+	if (l1i == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " L1I caches",
+			core_count * sizeof(struct cpuinfo_cache),
+			core_count);
+		goto cleanup;
+	}
+
+	l1d = calloc(core_count, sizeof(struct cpuinfo_cache));
+	if (l1d == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " L1D caches",
+			core_count * sizeof(struct cpuinfo_cache),
+			core_count);
+		goto cleanup;
+	}
+
+	l2 = calloc(l2_count, sizeof(struct cpuinfo_cache));
+	if (l2 == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " L2 caches",
+			l2_count * sizeof(struct cpuinfo_cache),
+			l2_count);
+		goto cleanup;
+	}
+
+	static_package.processor_count = processor_count;
+	static_package.core_count = core_count;
+	static_package.cluster_count = cluster_count;
+	if (is_x86) {
+		strncpy(static_package.name, "x86 vCPU", CPUINFO_PACKAGE_NAME_MAX);
+	} else {
+		strncpy(static_package.name, "ARM vCPU", CPUINFO_PACKAGE_NAME_MAX);
+	}
+
+	for (uint32_t i = 0; i < core_count; i++) {
+		for (uint32_t j = 0; j < processors_per_core; j++) {
+			processors[i * processors_per_core + j] = (struct cpuinfo_processor){
+				.smt_id = j,
+				.core = cores + i,
+				.cluster = clusters + (uint32_t)(i >= big_cluster_core_count),
+				.package = &static_package,
+				.cache.l1i = l1i + i,
+				.cache.l1d = l1d + i,
+				.cache.l2 = is_x86 ? l2 + i : l2 + (uint32_t)(i >= big_cluster_core_count),
+				.cache.l3 = is_x86 ? &static_x86_l3 : NULL,
+			};
+		}
+
+		cores[i] = (struct cpuinfo_core){
+			.processor_start = i * processors_per_core,
+			.processor_count = processors_per_core,
+			.core_id = i,
+			.cluster = clusters + (uint32_t)(i >= big_cluster_core_count),
+			.package = &static_package,
+			.vendor = cpuinfo_vendor_unknown,
+			.uarch = cpuinfo_uarch_unknown,
+			.frequency = 0,
+		};
+
+		l1i[i] = (struct cpuinfo_cache){
+			.size = 32 * 1024,
+			.associativity = 4,
+			.sets = 128,
+			.partitions = 1,
+			.line_size = 64,
+			.processor_start = i * processors_per_core,
+			.processor_count = processors_per_core,
+		};
+
+		l1d[i] = (struct cpuinfo_cache){
+			.size = 32 * 1024,
+			.associativity = 4,
+			.sets = 128,
+			.partitions = 1,
+			.line_size = 64,
+			.processor_start = i * processors_per_core,
+			.processor_count = processors_per_core,
+		};
+
+		if (is_x86) {
+			l2[i] = (struct cpuinfo_cache){
+				.size = 256 * 1024,
+				.associativity = 8,
+				.sets = 512,
+				.partitions = 1,
+				.line_size = 64,
+				.processor_start = i * processors_per_core,
+				.processor_count = processors_per_core,
+			};
+		}
+	}
+
+	if (is_x86) {
+		clusters[0] = (struct cpuinfo_cluster){
+			.processor_start = 0,
+			.processor_count = processor_count,
+			.core_start = 0,
+			.core_count = core_count,
+			.cluster_id = 0,
+			.package = &static_package,
+			.vendor = cpuinfo_vendor_unknown,
+			.uarch = cpuinfo_uarch_unknown,
+			.frequency = 0,
+		};
+
+		static_x86_l3.processor_count = processor_count;
+	} else {
+		clusters[0] = (struct cpuinfo_cluster){
+			.processor_start = 0,
+			.processor_count = big_cluster_core_count,
+			.core_start = 0,
+			.core_count = big_cluster_core_count,
+			.cluster_id = 0,
+			.package = &static_package,
+			.vendor = cpuinfo_vendor_unknown,
+			.uarch = cpuinfo_uarch_unknown,
+			.frequency = 0,
+		};
+
+		l2[0] = (struct cpuinfo_cache){
+			.size = 1024 * 1024,
+			.associativity = 8,
+			.sets = 2048,
+			.partitions = 1,
+			.line_size = 64,
+			.processor_start = 0,
+			.processor_count = big_cluster_core_count,
+		};
+
+		if (cluster_count > 1) {
+			l2[1] = (struct cpuinfo_cache){
+				.size = 256 * 1024,
+				.associativity = 8,
+				.sets = 512,
+				.partitions = 1,
+				.line_size = 64,
+				.processor_start = big_cluster_core_count,
+				.processor_count = processor_count - big_cluster_core_count,
+			};
+
+			clusters[1] = (struct cpuinfo_cluster){
+				.processor_start = big_cluster_core_count,
+				.processor_count = processor_count - big_cluster_core_count,
+				.core_start = big_cluster_core_count,
+				.core_count = processor_count - big_cluster_core_count,
+				.cluster_id = 1,
+				.package = &static_package,
+				.vendor = cpuinfo_vendor_unknown,
+				.uarch = cpuinfo_uarch_unknown,
+				.frequency = 0,
+			};
+		}
+	}
+
+	/* Commit changes */
+	cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
+	cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
+	cpuinfo_cache[cpuinfo_cache_level_2] = l2;
+	if (is_x86) {
+		cpuinfo_cache[cpuinfo_cache_level_3] = &static_x86_l3;
+	}
+
+	cpuinfo_processors = processors;
+	cpuinfo_cores = cores;
+	cpuinfo_clusters = clusters;
+	cpuinfo_packages = &static_package;
+
+	cpuinfo_cache_count[cpuinfo_cache_level_1i] = processor_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_1d] = processor_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
+	if (is_x86) {
+		cpuinfo_cache_count[cpuinfo_cache_level_3] = 1;
+	}
+
+	cpuinfo_global_uarch = (struct cpuinfo_uarch_info){
+		.uarch = cpuinfo_uarch_unknown,
+		.processor_count = processor_count,
+		.core_count = core_count,
+	};
+
+	cpuinfo_processors_count = processor_count;
+	cpuinfo_cores_count = processor_count;
+	cpuinfo_clusters_count = cluster_count;
+	cpuinfo_packages_count = 1;
+
+	cpuinfo_max_cache_size = is_x86 ? 128 * 1024 * 1024 : 8 * 1024 * 1024;
+
+	cpuinfo_is_initialized = true;
+
+	processors = NULL;
+	cores = NULL;
+	clusters = NULL;
+	l1i = l1d = l2 = NULL;
+
+cleanup:
+	free(processors);
+	free(cores);
+	free(clusters);
+	free(l1i);
+	free(l1d);
+	free(l2);
+}
--- a/3rdparty/cpuinfo/src/freebsd/api.h
+++ b/3rdparty/cpuinfo/src/freebsd/api.h
@@ -0,0 +1,12 @@
+#pragma once
+
+#include <stdint.h>
+
+struct cpuinfo_freebsd_topology {
+	uint32_t packages;
+	uint32_t cores;
+	uint32_t threads;
+	uint32_t threads_per_core;
+};
+
+struct cpuinfo_freebsd_topology cpuinfo_freebsd_detect_topology(void);
--- a/3rdparty/cpuinfo/src/freebsd/topology.c
+++ b/3rdparty/cpuinfo/src/freebsd/topology.c
@@ -0,0 +1,100 @@
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/sysctl.h>
+#include <sys/types.h>
+
+#include <cpuinfo/log.h>
+#include <freebsd/api.h>
+
+static int sysctl_int(const char* name) {
+	int value = 0;
+	size_t value_size = sizeof(value);
+	if (sysctlbyname(name, &value, &value_size, NULL, 0) != 0) {
+		cpuinfo_log_error("sysctlbyname(\"%s\") failed: %s", name, strerror(errno));
+	} else if (value <= 0) {
+		cpuinfo_log_error("sysctlbyname(\"%s\") returned invalid value %d %zu", name, value, value_size);
+		value = 0;
+	}
+	return value;
+}
+
+static char* sysctl_str(const char* name) {
+	size_t value_size = 0;
+	if (sysctlbyname(name, NULL, &value_size, NULL, 0) != 0) {
+		cpuinfo_log_error("sysctlbyname(\"%s\") failed: %s", name, strerror(errno));
+		return NULL;
+	} else if (value_size <= 0) {
+		cpuinfo_log_error("sysctlbyname(\"%s\") returned invalid value size %zu", name, value_size);
+		return NULL;
+	}
+	value_size += 1;
+	char* value = calloc(value_size, 1);
+	if (!value) {
+		cpuinfo_log_error("calloc %zu bytes failed", value_size);
+		return NULL;
+	}
+	if (sysctlbyname(name, value, &value_size, NULL, 0) != 0) {
+		cpuinfo_log_error("sysctlbyname(\"%s\") failed: %s", name, strerror(errno));
+		free(value);
+		return NULL;
+	}
+	return value;
+}
+
+struct cpuinfo_freebsd_topology cpuinfo_freebsd_detect_topology(void) {
+	struct cpuinfo_freebsd_topology topology = {
+		.packages = 0,
+		.cores = 0,
+		.threads_per_core = 0,
+		.threads = 0,
+	};
+	char* topology_spec = sysctl_str("kern.sched.topology_spec");
+	if (!topology_spec) {
+		return topology;
+	}
+	const char* group_tags[] = {"<group level=\"2\" cache-level=\"0\">", "<group level=\"1\" "};
+	for (size_t i = 0; i < sizeof(group_tags) / sizeof(group_tags[0]); i++) {
+		const char* group_tag = group_tags[i];
+		char* p = strstr(topology_spec, group_tag);
+		while (p) {
+			topology.packages += 1;
+			p++;
+			p = strstr(p, group_tag);
+		}
+		if (topology.packages > 0) {
+			break;
+		}
+	}
+
+	if (topology.packages == 0) {
+		cpuinfo_log_error("failed to parse topology_spec: %s", topology_spec);
+		free(topology_spec);
+		goto fail;
+	}
+	free(topology_spec);
+	topology.cores = sysctl_int("kern.smp.cores");
+	if (topology.cores == 0) {
+		goto fail;
+	}
+	if (topology.cores < topology.packages) {
+		cpuinfo_log_error("invalid numbers of package and core: %d %d", topology.packages, topology.cores);
+		goto fail;
+	}
+	topology.threads_per_core = sysctl_int("kern.smp.threads_per_core");
+	if (topology.threads_per_core == 0) {
+		goto fail;
+	}
+	cpuinfo_log_debug(
+		"freebsd topology: packages = %d, cores = %d, "
+		"threads_per_core = %d",
+		topology.packages,
+		topology.cores,
+		topology.threads_per_core);
+	topology.threads = topology.threads_per_core * topology.cores;
+	return topology;
+fail:
+	topology.packages = 0;
+	return topology;
+}
--- a/3rdparty/cpuinfo/src/init.c
+++ b/3rdparty/cpuinfo/src/init.c
@@ -0,0 +1,67 @@
+#if defined(_WIN32) || defined(__CYGWIN__)
+#include <windows.h>
+#elif !defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__)
+#include <pthread.h>
+#endif
+
+#include <cpuinfo.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+
+#ifdef __APPLE__
+#include "TargetConditionals.h"
+#endif
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+static INIT_ONCE init_guard = INIT_ONCE_STATIC_INIT;
+#elif !defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__)
+static pthread_once_t init_guard = PTHREAD_ONCE_INIT;
+#else
+static bool init_guard = false;
+#endif
+
+bool CPUINFO_ABI cpuinfo_initialize(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if defined(__MACH__) && defined(__APPLE__)
+	pthread_once(&init_guard, &cpuinfo_x86_mach_init);
+#elif defined(__FreeBSD__)
+	pthread_once(&init_guard, &cpuinfo_x86_freebsd_init);
+#elif defined(__linux__)
+	pthread_once(&init_guard, &cpuinfo_x86_linux_init);
+#elif defined(_WIN32) || defined(__CYGWIN__)
+	InitOnceExecuteOnce(&init_guard, &cpuinfo_x86_windows_init, NULL, NULL);
+#else
+	cpuinfo_log_error("operating system is not supported in cpuinfo");
+#endif
+#elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if defined(__linux__)
+	pthread_once(&init_guard, &cpuinfo_arm_linux_init);
+#elif defined(__MACH__) && defined(__APPLE__)
+	pthread_once(&init_guard, &cpuinfo_arm_mach_init);
+#elif defined(_WIN32)
+	InitOnceExecuteOnce(&init_guard, &cpuinfo_arm_windows_init, NULL, NULL);
+#else
+	cpuinfo_log_error("operating system is not supported in cpuinfo");
+#endif
+#elif CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64
+#if defined(__linux__)
+	pthread_once(&init_guard, &cpuinfo_riscv_linux_init);
+#else
+	cpuinfo_log_error("operating system is not supported in cpuinfo");
+#endif
+#elif CPUINFO_ARCH_ASMJS || CPUINFO_ARCH_WASM || CPUINFO_ARCH_WASMSIMD
+#if defined(__EMSCRIPTEN_PTHREADS__)
+	pthread_once(&init_guard, &cpuinfo_emscripten_init);
+#else
+	if (!init_guard) {
+		cpuinfo_emscripten_init();
+	}
+	init_guard = true;
+#endif
+#else
+	cpuinfo_log_error("processor architecture is not supported in cpuinfo");
+#endif
+	return cpuinfo_is_initialized;
+}
+
+void CPUINFO_ABI cpuinfo_deinitialize(void) {}
--- a/3rdparty/cpuinfo/src/linux/api.h
+++ b/3rdparty/cpuinfo/src/linux/api.h
@@ -0,0 +1,94 @@
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/common.h>
+
+#define CPUINFO_LINUX_FLAG_PRESENT UINT32_C(0x00000001)
+#define CPUINFO_LINUX_FLAG_POSSIBLE UINT32_C(0x00000002)
+#define CPUINFO_LINUX_FLAG_MAX_FREQUENCY UINT32_C(0x00000004)
+#define CPUINFO_LINUX_FLAG_MIN_FREQUENCY UINT32_C(0x00000008)
+#define CPUINFO_LINUX_FLAG_SMT_ID UINT32_C(0x00000010)
+#define CPUINFO_LINUX_FLAG_CORE_ID UINT32_C(0x00000020)
+#define CPUINFO_LINUX_FLAG_PACKAGE_ID UINT32_C(0x00000040)
+#define CPUINFO_LINUX_FLAG_APIC_ID UINT32_C(0x00000080)
+#define CPUINFO_LINUX_FLAG_SMT_CLUSTER UINT32_C(0x00000100)
+#define CPUINFO_LINUX_FLAG_CORE_CLUSTER UINT32_C(0x00000200)
+#define CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER UINT32_C(0x00000400)
+#define CPUINFO_LINUX_FLAG_PROC_CPUINFO UINT32_C(0x00000800)
+#define CPUINFO_LINUX_FLAG_VALID UINT32_C(0x00001000)
+#define CPUINFO_LINUX_FLAG_CUR_FREQUENCY UINT32_C(0x00002000)
+#define CPUINFO_LINUX_FLAG_CLUSTER_CLUSTER UINT32_C(0x00004000)
+
+typedef bool (*cpuinfo_cpulist_callback)(uint32_t, uint32_t, void*);
+CPUINFO_INTERNAL bool cpuinfo_linux_parse_cpulist(
+	const char* filename,
+	cpuinfo_cpulist_callback callback,
+	void* context);
+typedef bool (*cpuinfo_smallfile_callback)(const char*, const char*, const char*, void*);
+CPUINFO_INTERNAL bool cpuinfo_linux_parse_small_file(
+	const char* filename,
+	size_t buffer_size,
+	cpuinfo_smallfile_callback,
+	void* context);
+typedef bool (*cpuinfo_line_callback)(const char*, const char*, void*, uint64_t);
+CPUINFO_INTERNAL bool cpuinfo_linux_parse_multiline_file(
+	const char* filename,
+	size_t buffer_size,
+	cpuinfo_line_callback,
+	void* context);
+
+CPUINFO_INTERNAL uint32_t cpuinfo_linux_get_max_processors_count(void);
+CPUINFO_INTERNAL uint32_t cpuinfo_linux_get_max_possible_processor(uint32_t max_processors_count);
+CPUINFO_INTERNAL uint32_t cpuinfo_linux_get_max_present_processor(uint32_t max_processors_count);
+CPUINFO_INTERNAL uint32_t cpuinfo_linux_get_processor_cur_frequency(uint32_t processor);
+CPUINFO_INTERNAL uint32_t cpuinfo_linux_get_processor_min_frequency(uint32_t processor);
+CPUINFO_INTERNAL uint32_t cpuinfo_linux_get_processor_max_frequency(uint32_t processor);
+CPUINFO_INTERNAL bool cpuinfo_linux_get_processor_package_id(
+	uint32_t processor,
+	uint32_t package_id[restrict static 1]);
+CPUINFO_INTERNAL bool cpuinfo_linux_get_processor_core_id(uint32_t processor, uint32_t core_id[restrict static 1]);
+
+CPUINFO_INTERNAL bool cpuinfo_linux_detect_possible_processors(
+	uint32_t max_processors_count,
+	uint32_t* processor0_flags,
+	uint32_t processor_struct_size,
+	uint32_t possible_flag);
+CPUINFO_INTERNAL bool cpuinfo_linux_detect_present_processors(
+	uint32_t max_processors_count,
+	uint32_t* processor0_flags,
+	uint32_t processor_struct_size,
+	uint32_t present_flag);
+
+typedef bool (*cpuinfo_siblings_callback)(uint32_t, uint32_t, uint32_t, void*);
+CPUINFO_INTERNAL bool cpuinfo_linux_detect_core_siblings(
+	uint32_t max_processors_count,
+	uint32_t processor,
+	cpuinfo_siblings_callback callback,
+	void* context);
+CPUINFO_INTERNAL bool cpuinfo_linux_detect_thread_siblings(
+	uint32_t max_processors_count,
+	uint32_t processor,
+	cpuinfo_siblings_callback callback,
+	void* context);
+CPUINFO_INTERNAL bool cpuinfo_linux_detect_cluster_cpus(
+	uint32_t max_processors_count,
+	uint32_t processor,
+	cpuinfo_siblings_callback callback,
+	void* context);
+CPUINFO_INTERNAL bool cpuinfo_linux_detect_core_cpus(
+	uint32_t max_processors_count,
+	uint32_t processor,
+	cpuinfo_siblings_callback callback,
+	void* context);
+CPUINFO_INTERNAL bool cpuinfo_linux_detect_package_cpus(
+	uint32_t max_processors_count,
+	uint32_t processor,
+	cpuinfo_siblings_callback callback,
+	void* context);
+
+extern CPUINFO_INTERNAL const struct cpuinfo_processor** cpuinfo_linux_cpu_to_processor_map;
+extern CPUINFO_INTERNAL const struct cpuinfo_core** cpuinfo_linux_cpu_to_core_map;
--- a/3rdparty/cpuinfo/src/linux/cpulist.c
+++ b/3rdparty/cpuinfo/src/linux/cpulist.c
@@ -0,0 +1,242 @@
+#include <errno.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <fcntl.h>
+#include <sched.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#if CPUINFO_MOCK
+#include <cpuinfo-mock.h>
+#endif
+#include <cpuinfo/log.h>
+#include <linux/api.h>
+
+/*
+ * Size, in chars, of the on-stack buffer used for parsing cpu lists.
+ * This is also the limit on the length of a single entry
+ * (<cpu-number> or <cpu-number-start>-<cpu-number-end>)
+ * in the cpu list.
+ */
+#define BUFFER_SIZE 256
+
+/* Locale-independent */
+inline static bool is_whitespace(char c) {
+	switch (c) {
+		case ' ':
+		case '\t':
+		case '\n':
+		case '\r':
+			return true;
+		default:
+			return false;
+	}
+}
+
+inline static const char* parse_number(const char* string, const char* end, uint32_t number_ptr[restrict static 1]) {
+	uint32_t number = 0;
+	while (string != end) {
+		const uint32_t digit = (uint32_t)(*string) - (uint32_t)'0';
+		if (digit >= 10) {
+			break;
+		}
+		number = number * UINT32_C(10) + digit;
+		string += 1;
+	}
+	*number_ptr = number;
+	return string;
+}
+
+inline static bool parse_entry(
+	const char* entry_start,
+	const char* entry_end,
+	cpuinfo_cpulist_callback callback,
+	void* context) {
+	/* Skip whitespace at the beginning of an entry */
+	for (; entry_start != entry_end; entry_start++) {
+		if (!is_whitespace(*entry_start)) {
+			break;
+		}
+	}
+	/* Skip whitespace at the end of an entry */
+	for (; entry_end != entry_start; entry_end--) {
+		if (!is_whitespace(entry_end[-1])) {
+			break;
+		}
+	}
+
+	const size_t entry_length = (size_t)(entry_end - entry_start);
+	if (entry_length == 0) {
+		cpuinfo_log_warning("unexpected zero-length cpu list entry ignored");
+		return false;
+	}
+
+#if CPUINFO_LOG_DEBUG_PARSERS
+	cpuinfo_log_debug("parse cpu list entry \"%.*s\" (%zu chars)", (int)entry_length, entry_start, entry_length);
+#endif
+	uint32_t first_cpu, last_cpu;
+
+	const char* number_end = parse_number(entry_start, entry_end, &first_cpu);
+	if (number_end == entry_start) {
+		/* Failed to parse the number; ignore the entry */
+		cpuinfo_log_warning(
+			"invalid character '%c' in the cpu list entry \"%.*s\": entry is ignored",
+			entry_start[0],
+			(int)entry_length,
+			entry_start);
+		return false;
+	} else if (number_end == entry_end) {
+/* Completely parsed the entry */
+#if CPUINFO_LOG_DEBUG_PARSERS
+		cpuinfo_log_debug(
+			"cpulist: call callback with list_start = %" PRIu32 ", list_end = %" PRIu32,
+			first_cpu,
+			first_cpu + 1);
+#endif
+		return callback(first_cpu, first_cpu + 1, context);
+	}
+
+	/* Parse the second part of the entry */
+	if (*number_end != '-') {
+		cpuinfo_log_warning(
+			"invalid character '%c' in the cpu list entry \"%.*s\": entry is ignored",
+			*number_end,
+			(int)entry_length,
+			entry_start);
+		return false;
+	}
+
+	const char* number_start = number_end + 1;
+	number_end = parse_number(number_start, entry_end, &last_cpu);
+	if (number_end == number_start) {
+		/* Failed to parse the second number; ignore the entry */
+		cpuinfo_log_warning(
+			"invalid character '%c' in the cpu list entry \"%.*s\": entry is ignored",
+			*number_start,
+			(int)entry_length,
+			entry_start);
+		return false;
+	}
+
+	if (number_end != entry_end) {
+		/* Partially parsed the entry; ignore unparsed characters and
+		 * continue with the parsed part */
+		cpuinfo_log_warning(
+			"ignored invalid characters \"%.*s\" at the end of cpu list entry \"%.*s\"",
+			(int)(entry_end - number_end),
+			number_start,
+			(int)entry_length,
+			entry_start);
+	}
+
+	if (last_cpu < first_cpu) {
+		cpuinfo_log_warning(
+			"ignored cpu list entry \"%.*s\": invalid range %" PRIu32 "-%" PRIu32,
+			(int)entry_length,
+			entry_start,
+			first_cpu,
+			last_cpu);
+		return false;
+	}
+
+/* Parsed both parts of the entry; update CPU set */
+#if CPUINFO_LOG_DEBUG_PARSERS
+	cpuinfo_log_debug(
+		"cpulist: call callback with list_start = %" PRIu32 ", list_end = %" PRIu32, first_cpu, last_cpu + 1);
+#endif
+	return callback(first_cpu, last_cpu + 1, context);
+}
+
+bool cpuinfo_linux_parse_cpulist(const char* filename, cpuinfo_cpulist_callback callback, void* context) {
+	bool status = true;
+	int file = -1;
+	char buffer[BUFFER_SIZE];
+#if CPUINFO_LOG_DEBUG_PARSERS
+	cpuinfo_log_debug("parsing cpu list from file %s", filename);
+#endif
+
+#if CPUINFO_MOCK
+	file = cpuinfo_mock_open(filename, O_RDONLY);
+#else
+	file = open(filename, O_RDONLY);
+#endif
+	if (file == -1) {
+		cpuinfo_log_info("failed to open %s: %s", filename, strerror(errno));
+		status = false;
+		goto cleanup;
+	}
+
+	size_t position = 0;
+	const char* buffer_end = &buffer[BUFFER_SIZE];
+	char* data_start = buffer;
+	ssize_t bytes_read;
+	do {
+#if CPUINFO_MOCK
+		bytes_read = cpuinfo_mock_read(file, data_start, (size_t)(buffer_end - data_start));
+#else
+		bytes_read = read(file, data_start, (size_t)(buffer_end - data_start));
+#endif
+		if (bytes_read < 0) {
+			cpuinfo_log_info(
+				"failed to read file %s at position %zu: %s", filename, position, strerror(errno));
+			status = false;
+			goto cleanup;
+		}
+
+		position += (size_t)bytes_read;
+		const char* data_end = data_start + (size_t)bytes_read;
+		const char* entry_start = buffer;
+
+		if (bytes_read == 0) {
+			/* No more data in the file: process the remaining text
+			 * in the buffer as a single entry */
+			const char* entry_end = data_end;
+			const bool entry_status = parse_entry(entry_start, entry_end, callback, context);
+			status &= entry_status;
+		} else {
+			const char* entry_end;
+			do {
+				/* Find the end of the entry, as indicated by a
+				 * comma (',') */
+				for (entry_end = entry_start; entry_end != data_end; entry_end++) {
+					if (*entry_end == ',') {
+						break;
+					}
+				}
+
+				/*
+				 * If we located separator at the end of the
+				 * entry, parse it. Otherwise, there may be more
+				 * data at the end; read the file once again.
+				 */
+				if (entry_end != data_end) {
+					const bool entry_status =
+						parse_entry(entry_start, entry_end, callback, context);
+					status &= entry_status;
+					entry_start = entry_end + 1;
+				}
+			} while (entry_end != data_end);
+
+			/* Move remaining partial entry data at the end to the
+			 * beginning of the buffer */
+			const size_t entry_length = (size_t)(entry_end - entry_start);
+			memmove(buffer, entry_start, entry_length);
+			data_start = &buffer[entry_length];
+		}
+	} while (bytes_read != 0);
+
+cleanup:
+	if (file != -1) {
+#if CPUINFO_MOCK
+		cpuinfo_mock_close(file);
+#else
+		close(file);
+#endif
+		file = -1;
+	}
+	return status;
+}
--- a/3rdparty/cpuinfo/src/linux/mockfile.c
+++ b/3rdparty/cpuinfo/src/linux/mockfile.c
@@ -0,0 +1,103 @@
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <fcntl.h>
+#include <sched.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#if !CPUINFO_MOCK
+#error This file should be built only in mock mode
+#endif
+
+#include <arm/linux/api.h>
+#include <arm/midr.h>
+#include <cpuinfo-mock.h>
+#include <cpuinfo/log.h>
+
+static struct cpuinfo_mock_file* cpuinfo_mock_files = NULL;
+static uint32_t cpuinfo_mock_file_count = 0;
+
+void CPUINFO_ABI cpuinfo_mock_filesystem(struct cpuinfo_mock_file* files) {
+	cpuinfo_log_info("filesystem mocking enabled");
+	uint32_t file_count = 0;
+	while (files[file_count].path != NULL) {
+		/* Indicate that file is not opened */
+		files[file_count].offset = SIZE_MAX;
+		file_count += 1;
+	}
+	cpuinfo_mock_files = files;
+	cpuinfo_mock_file_count = file_count;
+}
+
+int CPUINFO_ABI cpuinfo_mock_open(const char* path, int oflag) {
+	if (cpuinfo_mock_files == NULL) {
+		cpuinfo_log_warning("cpuinfo_mock_open called without mock filesystem; redictering to open");
+		return open(path, oflag);
+	}
+
+	for (uint32_t i = 0; i < cpuinfo_mock_file_count; i++) {
+		if (strcmp(cpuinfo_mock_files[i].path, path) == 0) {
+			if (oflag != O_RDONLY) {
+				errno = EACCES;
+				return -1;
+			}
+			if (cpuinfo_mock_files[i].offset != SIZE_MAX) {
+				errno = ENFILE;
+				return -1;
+			}
+			cpuinfo_mock_files[i].offset = 0;
+			return (int)i;
+		}
+	}
+	errno = ENOENT;
+	return -1;
+}
+
+int CPUINFO_ABI cpuinfo_mock_close(int fd) {
+	if (cpuinfo_mock_files == NULL) {
+		cpuinfo_log_warning("cpuinfo_mock_close called without mock filesystem; redictering to close");
+		return close(fd);
+	}
+
+	if ((unsigned int)fd >= cpuinfo_mock_file_count) {
+		errno = EBADF;
+		return -1;
+	}
+	if (cpuinfo_mock_files[fd].offset == SIZE_MAX) {
+		errno = EBADF;
+		return -1;
+	}
+	cpuinfo_mock_files[fd].offset = SIZE_MAX;
+	return 0;
+}
+
+ssize_t CPUINFO_ABI cpuinfo_mock_read(int fd, void* buffer, size_t capacity) {
+	if (cpuinfo_mock_files == NULL) {
+		cpuinfo_log_warning("cpuinfo_mock_read called without mock filesystem; redictering to read");
+		return read(fd, buffer, capacity);
+	}
+
+	if ((unsigned int)fd >= cpuinfo_mock_file_count) {
+		errno = EBADF;
+		return -1;
+	}
+	if (cpuinfo_mock_files[fd].offset == SIZE_MAX) {
+		errno = EBADF;
+		return -1;
+	}
+
+	const size_t offset = cpuinfo_mock_files[fd].offset;
+	size_t count = cpuinfo_mock_files[fd].size - offset;
+	if (count > capacity) {
+		count = capacity;
+	}
+	memcpy(buffer, (void*)cpuinfo_mock_files[fd].content + offset, count);
+	cpuinfo_mock_files[fd].offset += count;
+	return (ssize_t)count;
+}
--- a/3rdparty/cpuinfo/src/linux/multiline.c
+++ b/3rdparty/cpuinfo/src/linux/multiline.c
@@ -0,0 +1,113 @@
+#include <alloca.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#if CPUINFO_MOCK
+#include <cpuinfo-mock.h>
+#endif
+#include <cpuinfo/log.h>
+#include <linux/api.h>
+
+bool cpuinfo_linux_parse_multiline_file(
+	const char* filename,
+	size_t buffer_size,
+	cpuinfo_line_callback callback,
+	void* context) {
+	int file = -1;
+	bool status = false;
+	char* buffer = (char*)alloca(buffer_size);
+
+#if CPUINFO_MOCK
+	file = cpuinfo_mock_open(filename, O_RDONLY);
+#else
+	file = open(filename, O_RDONLY);
+#endif
+	if (file == -1) {
+		cpuinfo_log_info("failed to open %s: %s", filename, strerror(errno));
+		goto cleanup;
+	}
+
+	/* Only used for error reporting */
+	size_t position = 0;
+	uint64_t line_number = 1;
+	const char* buffer_end = &buffer[buffer_size];
+	char* data_start = buffer;
+	ssize_t bytes_read;
+	do {
+#if CPUINFO_MOCK
+		bytes_read = cpuinfo_mock_read(file, data_start, (size_t)(buffer_end - data_start));
+#else
+		bytes_read = read(file, data_start, (size_t)(buffer_end - data_start));
+#endif
+		if (bytes_read < 0) {
+			cpuinfo_log_info(
+				"failed to read file %s at position %zu: %s", filename, position, strerror(errno));
+			goto cleanup;
+		}
+
+		position += (size_t)bytes_read;
+		const char* data_end = data_start + (size_t)bytes_read;
+		const char* line_start = buffer;
+
+		if (bytes_read == 0) {
+			/* No more data in the file: process the remaining text
+			 * in the buffer as a single entry */
+			const char* line_end = data_end;
+			if (!callback(line_start, line_end, context, line_number)) {
+				goto cleanup;
+			}
+		} else {
+			const char* line_end;
+			do {
+				/* Find the end of the entry, as indicated by
+				 * newline character ('\n')
+				 */
+				for (line_end = line_start; line_end != data_end; line_end++) {
+					if (*line_end == '\n') {
+						break;
+					}
+				}
+
+				/*
+				 * If we located separator at the end of the
+				 * entry, parse it. Otherwise, there may be more
+				 * data at the end; read the file once again.
+				 */
+				if (line_end != data_end) {
+					if (!callback(line_start, line_end, context, line_number++)) {
+						goto cleanup;
+					}
+					line_start = line_end + 1;
+				}
+			} while (line_end != data_end);
+
+			/* Move remaining partial line data at the end to the
+			 * beginning of the buffer */
+			const size_t line_length = (size_t)(line_end - line_start);
+			memmove(buffer, line_start, line_length);
+			data_start = &buffer[line_length];
+		}
+	} while (bytes_read != 0);
+
+	/* Commit */
+	status = true;
+
+cleanup:
+	if (file != -1) {
+#if CPUINFO_MOCK
+		cpuinfo_mock_close(file);
+#else
+		close(file);
+#endif
+		file = -1;
+	}
+	return status;
+}
--- a/3rdparty/cpuinfo/src/linux/processors.c
+++ b/3rdparty/cpuinfo/src/linux/processors.c
@@ -0,0 +1,580 @@
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if !defined(__ANDROID__)
+/*
+ * sched.h is only used for CPU_SETSIZE constant.
+ * Android NDK headers before platform 21 do have this constant in sched.h
+ */
+#include <sched.h>
+#endif
+
+#include <cpuinfo/log.h>
+#include <linux/api.h>
+
+#define STRINGIFY(token) #token
+
+#define KERNEL_MAX_FILENAME "/sys/devices/system/cpu/kernel_max"
+#define KERNEL_MAX_FILESIZE 32
+#define FREQUENCY_FILENAME_SIZE \
+	(sizeof("/sys/devices/system/cpu/cpu" STRINGIFY(UINT32_MAX) "/cpufreq/cpuinfo_max_freq"))
+#define CUR_FREQUENCY_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/cpufreq/cpuinfo_cur_freq"
+#define MAX_FREQUENCY_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/cpufreq/cpuinfo_max_freq"
+#define MIN_FREQUENCY_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/cpufreq/cpuinfo_min_freq"
+#define FREQUENCY_FILESIZE 32
+#define PACKAGE_ID_FILENAME_SIZE \
+	(sizeof("/sys/devices/system/cpu/cpu" STRINGIFY(UINT32_MAX) "/topology/physical_package_id"))
+#define PACKAGE_ID_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/topology/physical_package_id"
+#define PACKAGE_ID_FILESIZE 32
+#define CORE_ID_FILENAME_SIZE (sizeof("/sys/devices/system/cpu/cpu" STRINGIFY(UINT32_MAX) "/topology/core_id"))
+#define CORE_ID_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/topology/core_id"
+#define CORE_ID_FILESIZE 32
+
+#define CORE_CPUS_FILENAME_SIZE (sizeof("/sys/devices/system/cpu/cpu" STRINGIFY(UINT32_MAX) "/topology/core_cpus_list"))
+#define CORE_CPUS_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/topology/core_cpus_list"
+#define CORE_SIBLINGS_FILENAME_SIZE \
+	(sizeof("/sys/devices/system/cpu/cpu" STRINGIFY(UINT32_MAX) "/topology/core_siblings_list"))
+#define CORE_SIBLINGS_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/topology/core_siblings_list"
+#define CLUSTER_CPUS_FILENAME_SIZE \
+	(sizeof("/sys/devices/system/cpu/cpu" STRINGIFY(UINT32_MAX) "/topology/cluster_cpus_list"))
+#define CLUSTER_CPUS_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/topology/cluster_cpus_list"
+#define PACKAGE_CPUS_FILENAME_SIZE \
+	(sizeof("/sys/devices/system/cpu/cpu" STRINGIFY(UINT32_MAX) "/topology/package_cpus_list"))
+#define PACKAGE_CPUS_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/topology/package_cpus_list"
+#define THREAD_SIBLINGS_FILENAME_SIZE \
+	(sizeof("/sys/devices/system/cpu/cpu" STRINGIFY(UINT32_MAX) "/topology/thread_siblings_list"))
+#define THREAD_SIBLINGS_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/topology/thread_siblings_list"
+
+#define POSSIBLE_CPULIST_FILENAME "/sys/devices/system/cpu/possible"
+#define PRESENT_CPULIST_FILENAME "/sys/devices/system/cpu/present"
+
+inline static const char* parse_number(const char* start, const char* end, uint32_t number_ptr[restrict static 1]) {
+	uint32_t number = 0;
+	const char* parsed = start;
+	for (; parsed != end; parsed++) {
+		const uint32_t digit = (uint32_t)(uint8_t)(*parsed) - (uint32_t)'0';
+		if (digit >= 10) {
+			break;
+		}
+		number = number * UINT32_C(10) + digit;
+	}
+	*number_ptr = number;
+	return parsed;
+}
+
+/* Locale-independent */
+inline static bool is_whitespace(char c) {
+	switch (c) {
+		case ' ':
+		case '\t':
+		case '\n':
+		case '\r':
+			return true;
+		default:
+			return false;
+	}
+}
+
+#if defined(__ANDROID__) && !defined(CPU_SETSIZE)
+/*
+ * Android NDK headers before platform 21 do not define CPU_SETSIZE,
+ * so we hard-code its value, as defined in platform 21 headers
+ */
+#if defined(__LP64__)
+static const uint32_t default_max_processors_count = 1024;
+#else
+static const uint32_t default_max_processors_count = 32;
+#endif
+#else
+static const uint32_t default_max_processors_count = CPU_SETSIZE;
+#endif
+
+static bool uint32_parser(const char* filename, const char* text_start, const char* text_end, void* context) {
+	if (text_start == text_end) {
+		cpuinfo_log_error("failed to parse file %s: file is empty", KERNEL_MAX_FILENAME);
+		return false;
+	}
+
+	uint32_t kernel_max = 0;
+	const char* parsed_end = parse_number(text_start, text_end, &kernel_max);
+	if (parsed_end == text_start) {
+		cpuinfo_log_error(
+			"failed to parse file %s: \"%.*s\" is not an unsigned number",
+			filename,
+			(int)(text_end - text_start),
+			text_start);
+		return false;
+	} else {
+		for (const char* char_ptr = parsed_end; char_ptr != text_end; char_ptr++) {
+			if (!is_whitespace(*char_ptr)) {
+				cpuinfo_log_warning(
+					"non-whitespace characters \"%.*s\" following number in file %s are ignored",
+					(int)(text_end - char_ptr),
+					char_ptr,
+					filename);
+				break;
+			}
+		}
+	}
+
+	uint32_t* kernel_max_ptr = (uint32_t*)context;
+	*kernel_max_ptr = kernel_max;
+	return true;
+}
+
+uint32_t cpuinfo_linux_get_max_processors_count(void) {
+	uint32_t kernel_max;
+	if (cpuinfo_linux_parse_small_file(KERNEL_MAX_FILENAME, KERNEL_MAX_FILESIZE, uint32_parser, &kernel_max)) {
+		cpuinfo_log_debug("parsed kernel_max value of %" PRIu32 " from %s", kernel_max, KERNEL_MAX_FILENAME);
+
+		if (kernel_max >= default_max_processors_count) {
+			cpuinfo_log_warning(
+				"kernel_max value of %" PRIu32
+				" parsed from %s exceeds platform-default limit %" PRIu32,
+				kernel_max,
+				KERNEL_MAX_FILENAME,
+				default_max_processors_count - 1);
+		}
+
+		return kernel_max + 1;
+	} else {
+		cpuinfo_log_warning(
+			"using platform-default max processors count = %" PRIu32, default_max_processors_count);
+		return default_max_processors_count;
+	}
+}
+
+uint32_t cpuinfo_linux_get_processor_cur_frequency(uint32_t processor) {
+	char cur_frequency_filename[FREQUENCY_FILENAME_SIZE];
+	const int chars_formatted =
+		snprintf(cur_frequency_filename, FREQUENCY_FILENAME_SIZE, CUR_FREQUENCY_FILENAME_FORMAT, processor);
+	if ((unsigned int)chars_formatted >= FREQUENCY_FILENAME_SIZE) {
+		cpuinfo_log_warning("failed to format filename for current frequency of processor %" PRIu32, processor);
+		return 0;
+	}
+
+	uint32_t cur_frequency;
+	if (cpuinfo_linux_parse_small_file(cur_frequency_filename, FREQUENCY_FILESIZE, uint32_parser, &cur_frequency)) {
+		cpuinfo_log_debug(
+			"parsed currrent frequency value of %" PRIu32 " KHz for logical processor %" PRIu32 " from %s",
+			cur_frequency,
+			processor,
+			cur_frequency_filename);
+		return cur_frequency;
+	} else {
+		cpuinfo_log_warning(
+			"failed to parse current frequency for processor %" PRIu32 " from %s",
+			processor,
+			cur_frequency_filename);
+		return 0;
+	}
+}
+
+uint32_t cpuinfo_linux_get_processor_max_frequency(uint32_t processor) {
+	char max_frequency_filename[FREQUENCY_FILENAME_SIZE];
+	const int chars_formatted =
+		snprintf(max_frequency_filename, FREQUENCY_FILENAME_SIZE, MAX_FREQUENCY_FILENAME_FORMAT, processor);
+	if ((unsigned int)chars_formatted >= FREQUENCY_FILENAME_SIZE) {
+		cpuinfo_log_warning("failed to format filename for max frequency of processor %" PRIu32, processor);
+		return 0;
+	}
+
+	uint32_t max_frequency;
+	if (cpuinfo_linux_parse_small_file(max_frequency_filename, FREQUENCY_FILESIZE, uint32_parser, &max_frequency)) {
+		cpuinfo_log_debug(
+			"parsed max frequency value of %" PRIu32 " KHz for logical processor %" PRIu32 " from %s",
+			max_frequency,
+			processor,
+			max_frequency_filename);
+		return max_frequency;
+	} else {
+		cpuinfo_log_warning(
+			"failed to parse max frequency for processor %" PRIu32 " from %s",
+			processor,
+			max_frequency_filename);
+		return 0;
+	}
+}
+
+uint32_t cpuinfo_linux_get_processor_min_frequency(uint32_t processor) {
+	char min_frequency_filename[FREQUENCY_FILENAME_SIZE];
+	const int chars_formatted =
+		snprintf(min_frequency_filename, FREQUENCY_FILENAME_SIZE, MIN_FREQUENCY_FILENAME_FORMAT, processor);
+	if ((unsigned int)chars_formatted >= FREQUENCY_FILENAME_SIZE) {
+		cpuinfo_log_warning("failed to format filename for min frequency of processor %" PRIu32, processor);
+		return 0;
+	}
+
+	uint32_t min_frequency;
+	if (cpuinfo_linux_parse_small_file(min_frequency_filename, FREQUENCY_FILESIZE, uint32_parser, &min_frequency)) {
+		cpuinfo_log_debug(
+			"parsed min frequency value of %" PRIu32 " KHz for logical processor %" PRIu32 " from %s",
+			min_frequency,
+			processor,
+			min_frequency_filename);
+		return min_frequency;
+	} else {
+		/*
+		 * This error is less severe than parsing max frequency, because
+		 * min frequency is only useful for clustering, while max
+		 * frequency is also needed for peak FLOPS calculation.
+		 */
+		cpuinfo_log_info(
+			"failed to parse min frequency for processor %" PRIu32 " from %s",
+			processor,
+			min_frequency_filename);
+		return 0;
+	}
+}
+
+bool cpuinfo_linux_get_processor_core_id(uint32_t processor, uint32_t core_id_ptr[restrict static 1]) {
+	char core_id_filename[PACKAGE_ID_FILENAME_SIZE];
+	const int chars_formatted =
+		snprintf(core_id_filename, CORE_ID_FILENAME_SIZE, CORE_ID_FILENAME_FORMAT, processor);
+	if ((unsigned int)chars_formatted >= CORE_ID_FILENAME_SIZE) {
+		cpuinfo_log_warning("failed to format filename for core id of processor %" PRIu32, processor);
+		return 0;
+	}
+
+	uint32_t core_id;
+	if (cpuinfo_linux_parse_small_file(core_id_filename, CORE_ID_FILESIZE, uint32_parser, &core_id)) {
+		cpuinfo_log_debug(
+			"parsed core id value of %" PRIu32 " for logical processor %" PRIu32 " from %s",
+			core_id,
+			processor,
+			core_id_filename);
+		*core_id_ptr = core_id;
+		return true;
+	} else {
+		cpuinfo_log_info(
+			"failed to parse core id for processor %" PRIu32 " from %s", processor, core_id_filename);
+		return false;
+	}
+}
+
+bool cpuinfo_linux_get_processor_package_id(uint32_t processor, uint32_t package_id_ptr[restrict static 1]) {
+	char package_id_filename[PACKAGE_ID_FILENAME_SIZE];
+	const int chars_formatted =
+		snprintf(package_id_filename, PACKAGE_ID_FILENAME_SIZE, PACKAGE_ID_FILENAME_FORMAT, processor);
+	if ((unsigned int)chars_formatted >= PACKAGE_ID_FILENAME_SIZE) {
+		cpuinfo_log_warning("failed to format filename for package id of processor %" PRIu32, processor);
+		return 0;
+	}
+
+	uint32_t package_id;
+	if (cpuinfo_linux_parse_small_file(package_id_filename, PACKAGE_ID_FILESIZE, uint32_parser, &package_id)) {
+		cpuinfo_log_debug(
+			"parsed package id value of %" PRIu32 " for logical processor %" PRIu32 " from %s",
+			package_id,
+			processor,
+			package_id_filename);
+		*package_id_ptr = package_id;
+		return true;
+	} else {
+		cpuinfo_log_info(
+			"failed to parse package id for processor %" PRIu32 " from %s", processor, package_id_filename);
+		return false;
+	}
+}
+
+static bool max_processor_number_parser(uint32_t processor_list_start, uint32_t processor_list_end, void* context) {
+	uint32_t* processor_number_ptr = (uint32_t*)context;
+	const uint32_t processor_list_last = processor_list_end - 1;
+	if (*processor_number_ptr < processor_list_last) {
+		*processor_number_ptr = processor_list_last;
+	}
+	return true;
+}
+
+uint32_t cpuinfo_linux_get_max_possible_processor(uint32_t max_processors_count) {
+	uint32_t max_possible_processor = 0;
+	if (!cpuinfo_linux_parse_cpulist(
+		    POSSIBLE_CPULIST_FILENAME, max_processor_number_parser, &max_possible_processor)) {
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+		cpuinfo_log_error("failed to parse the list of possible processors in %s", POSSIBLE_CPULIST_FILENAME);
+#else
+		cpuinfo_log_warning("failed to parse the list of possible processors in %s", POSSIBLE_CPULIST_FILENAME);
+#endif
+		return UINT32_MAX;
+	}
+	if (max_possible_processor >= max_processors_count) {
+		cpuinfo_log_warning(
+			"maximum possible processor number %" PRIu32 " exceeds system limit %" PRIu32
+			": truncating to the latter",
+			max_possible_processor,
+			max_processors_count - 1);
+		max_possible_processor = max_processors_count - 1;
+	}
+	return max_possible_processor;
+}
+
+uint32_t cpuinfo_linux_get_max_present_processor(uint32_t max_processors_count) {
+	uint32_t max_present_processor = 0;
+	if (!cpuinfo_linux_parse_cpulist(
+		    PRESENT_CPULIST_FILENAME, max_processor_number_parser, &max_present_processor)) {
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+		cpuinfo_log_error("failed to parse the list of present processors in %s", PRESENT_CPULIST_FILENAME);
+#else
+		cpuinfo_log_warning("failed to parse the list of present processors in %s", PRESENT_CPULIST_FILENAME);
+#endif
+		return UINT32_MAX;
+	}
+	if (max_present_processor >= max_processors_count) {
+		cpuinfo_log_warning(
+			"maximum present processor number %" PRIu32 " exceeds system limit %" PRIu32
+			": truncating to the latter",
+			max_present_processor,
+			max_processors_count - 1);
+		max_present_processor = max_processors_count - 1;
+	}
+	return max_present_processor;
+}
+
+struct detect_processors_context {
+	uint32_t max_processors_count;
+	uint32_t* processor0_flags;
+	uint32_t processor_struct_size;
+	uint32_t detected_flag;
+};
+
+static bool detect_processor_parser(uint32_t processor_list_start, uint32_t processor_list_end, void* context) {
+	const uint32_t max_processors_count = ((struct detect_processors_context*)context)->max_processors_count;
+	const uint32_t* processor0_flags = ((struct detect_processors_context*)context)->processor0_flags;
+	const uint32_t processor_struct_size = ((struct detect_processors_context*)context)->processor_struct_size;
+	const uint32_t detected_flag = ((struct detect_processors_context*)context)->detected_flag;
+
+	for (uint32_t processor = processor_list_start; processor < processor_list_end; processor++) {
+		if (processor >= max_processors_count) {
+			break;
+		}
+		*((uint32_t*)((uintptr_t)processor0_flags + processor_struct_size * processor)) |= detected_flag;
+	}
+	return true;
+}
+
+bool cpuinfo_linux_detect_possible_processors(
+	uint32_t max_processors_count,
+	uint32_t* processor0_flags,
+	uint32_t processor_struct_size,
+	uint32_t possible_flag) {
+	struct detect_processors_context context = {
+		.max_processors_count = max_processors_count,
+		.processor0_flags = processor0_flags,
+		.processor_struct_size = processor_struct_size,
+		.detected_flag = possible_flag,
+	};
+	if (cpuinfo_linux_parse_cpulist(POSSIBLE_CPULIST_FILENAME, detect_processor_parser, &context)) {
+		return true;
+	} else {
+		cpuinfo_log_warning("failed to parse the list of possible processors in %s", POSSIBLE_CPULIST_FILENAME);
+		return false;
+	}
+}
+
+bool cpuinfo_linux_detect_present_processors(
+	uint32_t max_processors_count,
+	uint32_t* processor0_flags,
+	uint32_t processor_struct_size,
+	uint32_t present_flag) {
+	struct detect_processors_context context = {
+		.max_processors_count = max_processors_count,
+		.processor0_flags = processor0_flags,
+		.processor_struct_size = processor_struct_size,
+		.detected_flag = present_flag,
+	};
+	if (cpuinfo_linux_parse_cpulist(PRESENT_CPULIST_FILENAME, detect_processor_parser, &context)) {
+		return true;
+	} else {
+		cpuinfo_log_warning("failed to parse the list of present processors in %s", PRESENT_CPULIST_FILENAME);
+		return false;
+	}
+}
+
+struct siblings_context {
+	const char* group_name;
+	uint32_t max_processors_count;
+	uint32_t processor;
+	cpuinfo_siblings_callback callback;
+	void* callback_context;
+};
+
+static bool siblings_parser(uint32_t sibling_list_start, uint32_t sibling_list_end, struct siblings_context* context) {
+	const char* group_name = context->group_name;
+	const uint32_t max_processors_count = context->max_processors_count;
+	const uint32_t processor = context->processor;
+
+	if (sibling_list_end > max_processors_count) {
+		cpuinfo_log_warning(
+			"ignore %s siblings %" PRIu32 "-%" PRIu32 " of processor %" PRIu32,
+			group_name,
+			max_processors_count,
+			sibling_list_end - 1,
+			processor);
+		sibling_list_end = max_processors_count;
+	}
+
+	return context->callback(processor, sibling_list_start, sibling_list_end, context->callback_context);
+}
+
+bool cpuinfo_linux_detect_core_cpus(
+	uint32_t max_processors_count,
+	uint32_t processor,
+	cpuinfo_siblings_callback callback,
+	void* context) {
+	char core_cpus_filename[CORE_CPUS_FILENAME_SIZE];
+	const int chars_formatted =
+		snprintf(core_cpus_filename, CORE_CPUS_FILENAME_SIZE, CORE_CPUS_FILENAME_FORMAT, processor);
+	if ((unsigned int)chars_formatted >= CORE_CPUS_FILENAME_SIZE) {
+		cpuinfo_log_warning("failed to format filename for core cpus of processor %" PRIu32, processor);
+		return false;
+	}
+
+	struct siblings_context siblings_context = {
+		.group_name = "cpus",
+		.max_processors_count = max_processors_count,
+		.processor = processor,
+		.callback = callback,
+		.callback_context = context,
+	};
+	if (cpuinfo_linux_parse_cpulist(
+		    core_cpus_filename, (cpuinfo_cpulist_callback)siblings_parser, &siblings_context)) {
+		return true;
+	} else {
+		cpuinfo_log_info(
+			"failed to parse the list of core cpus for processor %" PRIu32 " from %s",
+			processor,
+			core_cpus_filename);
+		return false;
+	}
+}
+
+bool cpuinfo_linux_detect_core_siblings(
+	uint32_t max_processors_count,
+	uint32_t processor,
+	cpuinfo_siblings_callback callback,
+	void* context) {
+	char core_siblings_filename[CORE_SIBLINGS_FILENAME_SIZE];
+	const int chars_formatted =
+		snprintf(core_siblings_filename, CORE_SIBLINGS_FILENAME_SIZE, CORE_SIBLINGS_FILENAME_FORMAT, processor);
+	if ((unsigned int)chars_formatted >= CORE_SIBLINGS_FILENAME_SIZE) {
+		cpuinfo_log_warning("failed to format filename for core siblings of processor %" PRIu32, processor);
+		return false;
+	}
+
+	struct siblings_context siblings_context = {
+		.group_name = "package",
+		.max_processors_count = max_processors_count,
+		.processor = processor,
+		.callback = callback,
+		.callback_context = context,
+	};
+	if (cpuinfo_linux_parse_cpulist(
+		    core_siblings_filename, (cpuinfo_cpulist_callback)siblings_parser, &siblings_context)) {
+		return true;
+	} else {
+		cpuinfo_log_info(
+			"failed to parse the list of core siblings for processor %" PRIu32 " from %s",
+			processor,
+			core_siblings_filename);
+		return false;
+	}
+}
+
+bool cpuinfo_linux_detect_thread_siblings(
+	uint32_t max_processors_count,
+	uint32_t processor,
+	cpuinfo_siblings_callback callback,
+	void* context) {
+	char thread_siblings_filename[THREAD_SIBLINGS_FILENAME_SIZE];
+	const int chars_formatted = snprintf(
+		thread_siblings_filename, THREAD_SIBLINGS_FILENAME_SIZE, THREAD_SIBLINGS_FILENAME_FORMAT, processor);
+	if ((unsigned int)chars_formatted >= THREAD_SIBLINGS_FILENAME_SIZE) {
+		cpuinfo_log_warning("failed to format filename for thread siblings of processor %" PRIu32, processor);
+		return false;
+	}
+
+	struct siblings_context siblings_context = {
+		.group_name = "core",
+		.max_processors_count = max_processors_count,
+		.processor = processor,
+		.callback = callback,
+		.callback_context = context,
+	};
+	if (cpuinfo_linux_parse_cpulist(
+		    thread_siblings_filename, (cpuinfo_cpulist_callback)siblings_parser, &siblings_context)) {
+		return true;
+	} else {
+		cpuinfo_log_info(
+			"failed to parse the list of thread siblings for processor %" PRIu32 " from %s",
+			processor,
+			thread_siblings_filename);
+		return false;
+	}
+}
+
+bool cpuinfo_linux_detect_cluster_cpus(
+	uint32_t max_processors_count,
+	uint32_t processor,
+	cpuinfo_siblings_callback callback,
+	void* context) {
+	char cluster_cpus_filename[CLUSTER_CPUS_FILENAME_SIZE];
+	const int chars_formatted =
+		snprintf(cluster_cpus_filename, CLUSTER_CPUS_FILENAME_SIZE, CLUSTER_CPUS_FILENAME_FORMAT, processor);
+	if ((unsigned int)chars_formatted >= CLUSTER_CPUS_FILENAME_SIZE) {
+		cpuinfo_log_warning("failed to format filename for cluster cpus of processor %" PRIu32, processor);
+		return false;
+	}
+
+	struct siblings_context siblings_context = {
+		.group_name = "cluster",
+		.max_processors_count = max_processors_count,
+		.processor = processor,
+		.callback = callback,
+		.callback_context = context,
+	};
+	if (cpuinfo_linux_parse_cpulist(
+		    cluster_cpus_filename, (cpuinfo_cpulist_callback)siblings_parser, &siblings_context)) {
+		return true;
+	} else {
+		cpuinfo_log_info(
+			"failed to parse the list of cluster cpus for processor %" PRIu32 " from %s",
+			processor,
+			cluster_cpus_filename);
+		return false;
+	}
+}
+
+bool cpuinfo_linux_detect_package_cpus(
+	uint32_t max_processors_count,
+	uint32_t processor,
+	cpuinfo_siblings_callback callback,
+	void* context) {
+	char package_cpus_filename[PACKAGE_CPUS_FILENAME_SIZE];
+	const int chars_formatted =
+		snprintf(package_cpus_filename, PACKAGE_CPUS_FILENAME_SIZE, PACKAGE_CPUS_FILENAME_FORMAT, processor);
+	if ((unsigned int)chars_formatted >= PACKAGE_CPUS_FILENAME_SIZE) {
+		cpuinfo_log_warning("failed to format filename for package cpus of processor %" PRIu32, processor);
+		return false;
+	}
+
+	struct siblings_context siblings_context = {
+		.group_name = "package",
+		.max_processors_count = max_processors_count,
+		.processor = processor,
+		.callback = callback,
+		.callback_context = context,
+	};
+	if (cpuinfo_linux_parse_cpulist(
+		    package_cpus_filename, (cpuinfo_cpulist_callback)siblings_parser, &siblings_context)) {
+		return true;
+	} else {
+		cpuinfo_log_info(
+			"failed to parse the list of package cpus for processor %" PRIu32 " from %s",
+			processor,
+			package_cpus_filename);
+		return false;
+	}
+}
--- a/3rdparty/cpuinfo/src/linux/smallfile.c
+++ b/3rdparty/cpuinfo/src/linux/smallfile.c
@@ -0,0 +1,78 @@
+#include <alloca.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#if CPUINFO_MOCK
+#include <cpuinfo-mock.h>
+#endif
+#include <cpuinfo/log.h>
+#include <linux/api.h>
+
+bool cpuinfo_linux_parse_small_file(
+	const char* filename,
+	size_t buffer_size,
+	cpuinfo_smallfile_callback callback,
+	void* context) {
+	int file = -1;
+	bool status = false;
+	char* buffer = (char*)alloca(buffer_size);
+
+#if CPUINFO_LOG_DEBUG_PARSERS
+	cpuinfo_log_debug("parsing small file %s", filename);
+#endif
+
+#if CPUINFO_MOCK
+	file = cpuinfo_mock_open(filename, O_RDONLY);
+#else
+	file = open(filename, O_RDONLY);
+#endif
+	if (file == -1) {
+		cpuinfo_log_info("failed to open %s: %s", filename, strerror(errno));
+		goto cleanup;
+	}
+
+	size_t buffer_position = 0;
+	ssize_t bytes_read;
+	do {
+#if CPUINFO_MOCK
+		bytes_read = cpuinfo_mock_read(file, &buffer[buffer_position], buffer_size - buffer_position);
+#else
+		bytes_read = read(file, &buffer[buffer_position], buffer_size - buffer_position);
+#endif
+		if (bytes_read < 0) {
+			cpuinfo_log_info(
+				"failed to read file %s at position %zu: %s",
+				filename,
+				buffer_position,
+				strerror(errno));
+			goto cleanup;
+		}
+		buffer_position += (size_t)bytes_read;
+		if (buffer_position >= buffer_size) {
+			cpuinfo_log_error(
+				"failed to read file %s: insufficient buffer of size %zu", filename, buffer_size);
+			goto cleanup;
+		}
+	} while (bytes_read != 0);
+
+	status = callback(filename, buffer, &buffer[buffer_position], context);
+
+cleanup:
+	if (file != -1) {
+#if CPUINFO_MOCK
+		cpuinfo_mock_close(file);
+#else
+		close(file);
+#endif
+		file = -1;
+	}
+	return status;
+}
--- a/3rdparty/cpuinfo/src/log.c
+++ b/3rdparty/cpuinfo/src/log.c
@@ -0,0 +1,203 @@
+#include <assert.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <unistd.h>
+#endif
+#if defined(__ANDROID__)
+#include <android/log.h>
+#endif
+#if defined(__hexagon__)
+#include <qurt_printf.h>
+#endif
+
+#ifndef CPUINFO_LOG_TO_STDIO
+#if defined(__ANDROID__)
+#define CPUINFO_LOG_TO_STDIO 0
+#else
+#define CPUINFO_LOG_TO_STDIO 1
+#endif
+#endif
+
+#include <cpuinfo/log.h>
+
+/* Messages up to this size are formatted entirely on-stack, and don't allocate
+ * heap memory */
+#define CPUINFO_LOG_STACK_BUFFER_SIZE 1024
+
+#ifdef _WIN32
+#define CPUINFO_LOG_NEWLINE_LENGTH 2
+
+#define CPUINFO_LOG_STDERR STD_ERROR_HANDLE
+#define CPUINFO_LOG_STDOUT STD_OUTPUT_HANDLE
+#elif defined(__hexagon__)
+#define CPUINFO_LOG_NEWLINE_LENGTH 1
+
+#define CPUINFO_LOG_STDERR 0
+#define CPUINFO_LOG_STDOUT 0
+#else
+#define CPUINFO_LOG_NEWLINE_LENGTH 1
+
+#define CPUINFO_LOG_STDERR STDERR_FILENO
+#define CPUINFO_LOG_STDOUT STDOUT_FILENO
+#endif
+
+#if CPUINFO_LOG_TO_STDIO
+static void cpuinfo_vlog(
+	int output_handle,
+	const char* prefix,
+	size_t prefix_length,
+	const char* format,
+	va_list args) {
+	char stack_buffer[CPUINFO_LOG_STACK_BUFFER_SIZE];
+	char* heap_buffer = NULL;
+	char* out_buffer = &stack_buffer[0];
+
+	/* The first call to vsnprintf will clobber args, thus need a copy in
+	 * case a second vsnprintf call is needed */
+	va_list args_copy;
+	va_copy(args_copy, args);
+
+	memcpy(stack_buffer, prefix, prefix_length * sizeof(char));
+	assert((prefix_length + CPUINFO_LOG_NEWLINE_LENGTH) * sizeof(char) <= CPUINFO_LOG_STACK_BUFFER_SIZE);
+
+	const int format_chars = vsnprintf(
+		&stack_buffer[prefix_length],
+		CPUINFO_LOG_STACK_BUFFER_SIZE - (prefix_length + CPUINFO_LOG_NEWLINE_LENGTH) * sizeof(char),
+		format,
+		args);
+	if (format_chars < 0) {
+		/* Format error in the message: silently ignore this particular
+		 * message. */
+		goto cleanup;
+	}
+	const size_t format_length = (size_t)format_chars;
+	if ((prefix_length + format_length + CPUINFO_LOG_NEWLINE_LENGTH) * sizeof(char) >
+	    CPUINFO_LOG_STACK_BUFFER_SIZE) {
+		/* Allocate a buffer on heap, and vsnprintf to this buffer */
+		const size_t heap_buffer_size =
+			(prefix_length + format_length + CPUINFO_LOG_NEWLINE_LENGTH) * sizeof(char);
+#if _WIN32
+		heap_buffer = HeapAlloc(GetProcessHeap(), 0, heap_buffer_size);
+#else
+		heap_buffer = malloc(heap_buffer_size);
+#endif
+		if (heap_buffer == NULL) {
+			goto cleanup;
+		}
+
+		/* Copy pre-formatted prefix into the on-heap buffer */
+		memcpy(heap_buffer, prefix, prefix_length * sizeof(char));
+		vsnprintf(
+			&heap_buffer[prefix_length],
+			(format_length + CPUINFO_LOG_NEWLINE_LENGTH) * sizeof(char),
+			format,
+			args_copy);
+		out_buffer = heap_buffer;
+	}
+#ifdef _WIN32
+	out_buffer[prefix_length + format_length] = '\r';
+	out_buffer[prefix_length + format_length + 1] = '\n';
+
+	DWORD bytes_written;
+	WriteFile(
+		GetStdHandle((DWORD)output_handle),
+		out_buffer,
+		(prefix_length + format_length + CPUINFO_LOG_NEWLINE_LENGTH) * sizeof(char),
+		&bytes_written,
+		NULL);
+#elif defined(__hexagon__)
+	qurt_printf("%s", out_buffer);
+#else
+	out_buffer[prefix_length + format_length] = '\n';
+
+	ssize_t bytes_written = write(
+		output_handle, out_buffer, (prefix_length + format_length + CPUINFO_LOG_NEWLINE_LENGTH) * sizeof(char));
+	(void)bytes_written;
+#endif
+
+cleanup:
+#ifdef _WIN32
+	HeapFree(GetProcessHeap(), 0, heap_buffer);
+#else
+	free(heap_buffer);
+#endif
+	va_end(args_copy);
+}
+#elif defined(__ANDROID__) && CPUINFO_LOG_LEVEL > CPUINFO_LOG_NONE
+static const char cpuinfo_module[] = "XNNPACK";
+#endif
+
+#if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_DEBUG
+void cpuinfo_vlog_debug(const char* format, va_list args) {
+#if CPUINFO_LOG_TO_STDIO
+	static const char debug_prefix[17] = {
+		'D', 'e', 'b', 'u', 'g', ' ', '(', 'c', 'p', 'u', 'i', 'n', 'f', 'o', ')', ':', ' '};
+	cpuinfo_vlog(CPUINFO_LOG_STDOUT, debug_prefix, 17, format, args);
+#elif defined(__ANDROID__)
+	__android_log_vprint(ANDROID_LOG_DEBUG, cpuinfo_module, format, args);
+#else
+#error "Platform-specific implementation required"
+#endif
+}
+#endif
+
+#if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_INFO
+void cpuinfo_vlog_info(const char* format, va_list args) {
+#if CPUINFO_LOG_TO_STDIO
+	static const char info_prefix[16] = {
+		'N', 'o', 't', 'e', ' ', '(', 'c', 'p', 'u', 'i', 'n', 'f', 'o', ')', ':', ' '};
+	cpuinfo_vlog(CPUINFO_LOG_STDOUT, info_prefix, 16, format, args);
+#elif defined(__ANDROID__)
+	__android_log_vprint(ANDROID_LOG_INFO, cpuinfo_module, format, args);
+#else
+#error "Platform-specific implementation required"
+#endif
+}
+#endif
+
+#if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_WARNING
+void cpuinfo_vlog_warning(const char* format, va_list args) {
+#if CPUINFO_LOG_TO_STDIO
+	static const char warning_prefix[20] = {'W', 'a', 'r', 'n', 'i', 'n', 'g', ' ', 'i', 'n',
+						' ', 'c', 'p', 'u', 'i', 'n', 'f', 'o', ':', ' '};
+	cpuinfo_vlog(CPUINFO_LOG_STDERR, warning_prefix, 20, format, args);
+#elif defined(__ANDROID__)
+	__android_log_vprint(ANDROID_LOG_WARN, cpuinfo_module, format, args);
+#else
+#error "Platform-specific implementation required"
+#endif
+}
+#endif
+
+#if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_ERROR
+void cpuinfo_vlog_error(const char* format, va_list args) {
+#if CPUINFO_LOG_TO_STDIO
+	static const char error_prefix[18] = {
+		'E', 'r', 'r', 'o', 'r', ' ', 'i', 'n', ' ', 'c', 'p', 'u', 'i', 'n', 'f', 'o', ':', ' '};
+	cpuinfo_vlog(CPUINFO_LOG_STDERR, error_prefix, 18, format, args);
+#elif defined(__ANDROID__)
+	__android_log_vprint(ANDROID_LOG_ERROR, cpuinfo_module, format, args);
+#else
+#error "Platform-specific implementation required"
+#endif
+}
+#endif
+
+#if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_FATAL
+void cpuinfo_vlog_fatal(const char* format, va_list args) {
+#if CPUINFO_LOG_TO_STDIO
+	static const char fatal_prefix[24] = {'F', 'a', 't', 'a', 'l', ' ', 'e', 'r', 'r', 'o', 'r', ' ',
+					      'i', 'n', ' ', 'c', 'p', 'u', 'i', 'n', 'f', 'o', ':', ' '};
+	cpuinfo_vlog(CPUINFO_LOG_STDERR, fatal_prefix, 24, format, args);
+#elif defined(__ANDROID__)
+	__android_log_vprint(ANDROID_LOG_FATAL, cpuinfo_module, format, args);
+#else
+#error "Platform-specific implementation required"
+#endif
+}
+#endif
--- a/3rdparty/cpuinfo/src/mach/api.h
+++ b/3rdparty/cpuinfo/src/mach/api.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <stdint.h>
+
+#define CPUINFO_MACH_MAX_CACHE_LEVELS 8
+
+struct cpuinfo_mach_topology {
+	uint32_t packages;
+	uint32_t cores;
+	uint32_t threads;
+	uint32_t threads_per_cache[CPUINFO_MACH_MAX_CACHE_LEVELS];
+};
+
+struct cpuinfo_mach_topology cpuinfo_mach_detect_topology(void);
--- a/3rdparty/cpuinfo/src/mach/topology.c
+++ b/3rdparty/cpuinfo/src/mach/topology.c
@@ -0,0 +1,69 @@
+#include <alloca.h>
+#include <errno.h>
+#include <string.h>
+
+#include <sys/sysctl.h>
+#include <sys/types.h>
+
+#include <cpuinfo/log.h>
+#include <mach/api.h>
+
+#include <TargetConditionals.h>
+
+struct cpuinfo_mach_topology cpuinfo_mach_detect_topology(void) {
+	int cores = 1;
+	size_t sizeof_cores = sizeof(cores);
+	if (sysctlbyname("hw.physicalcpu_max", &cores, &sizeof_cores, NULL, 0) != 0) {
+		cpuinfo_log_error("sysctlbyname(\"hw.physicalcpu_max\") failed: %s", strerror(errno));
+	} else if (cores <= 0) {
+		cpuinfo_log_error("sysctlbyname(\"hw.physicalcpu_max\") returned invalid value %d", cores);
+		cores = 1;
+	}
+
+	int threads = 1;
+	size_t sizeof_threads = sizeof(threads);
+	if (sysctlbyname("hw.logicalcpu_max", &threads, &sizeof_threads, NULL, 0) != 0) {
+		cpuinfo_log_error("sysctlbyname(\"hw.logicalcpu_max\") failed: %s", strerror(errno));
+	} else if (threads <= 0) {
+		cpuinfo_log_error("sysctlbyname(\"hw.logicalcpu_max\") returned invalid value %d", threads);
+		threads = cores;
+	}
+
+	int packages = 1;
+#if !TARGET_OS_IPHONE
+	size_t sizeof_packages = sizeof(packages);
+	if (sysctlbyname("hw.packages", &packages, &sizeof_packages, NULL, 0) != 0) {
+		cpuinfo_log_error("sysctlbyname(\"hw.packages\") failed: %s", strerror(errno));
+	} else if (packages <= 0) {
+		cpuinfo_log_error("sysctlbyname(\"hw.packages\") returned invalid value %d", packages);
+		packages = 1;
+	}
+#endif
+
+	cpuinfo_log_debug("mach topology: packages = %d, cores = %d, threads = %d", packages, (int)cores, (int)threads);
+	struct cpuinfo_mach_topology topology = {
+		.packages = (uint32_t)packages, .cores = (uint32_t)cores, .threads = (uint32_t)threads};
+
+#if !TARGET_OS_IPHONE
+	size_t cacheconfig_size = 0;
+	if (sysctlbyname("hw.cacheconfig", NULL, &cacheconfig_size, NULL, 0) != 0) {
+		cpuinfo_log_error("sysctlbyname(\"hw.cacheconfig\") failed: %s", strerror(errno));
+	} else {
+		uint64_t* cacheconfig = alloca(cacheconfig_size);
+		if (sysctlbyname("hw.cacheconfig", cacheconfig, &cacheconfig_size, NULL, 0) != 0) {
+			cpuinfo_log_error("sysctlbyname(\"hw.cacheconfig\") failed: %s", strerror(errno));
+		} else {
+			size_t cache_configs = cacheconfig_size / sizeof(uint64_t);
+			cpuinfo_log_debug("mach hw.cacheconfig count: %zu", cache_configs);
+			if (cache_configs > CPUINFO_MACH_MAX_CACHE_LEVELS) {
+				cache_configs = CPUINFO_MACH_MAX_CACHE_LEVELS;
+			}
+			for (size_t i = 0; i < cache_configs; i++) {
+				cpuinfo_log_debug("mach hw.cacheconfig[%zu]: %" PRIu64, i, cacheconfig[i]);
+				topology.threads_per_cache[i] = cacheconfig[i];
+			}
+		}
+	}
+#endif
+	return topology;
+}
--- a/3rdparty/cpuinfo/src/riscv/api.h
+++ b/3rdparty/cpuinfo/src/riscv/api.h
@@ -0,0 +1,42 @@
+#pragma once
+
+#include <stdint.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/common.h>
+
+/* RISC-V Vendor IDs. */
+enum cpuinfo_riscv_chipset_vendor {
+	cpuinfo_riscv_chipset_vendor_unknown = 0,
+	cpuinfo_riscv_chipset_vendor_sifive = 0x489,
+	cpuinfo_riscv_chipset_vendor_max,
+};
+
+/* RISC-V Architecture IDs. */
+enum cpuinfo_riscv_chipset_arch {
+	cpuinfo_riscv_chipset_arch_unknown = 0,
+	cpuinfo_riscv_chipset_arch_max,
+};
+
+/* RISC-V Implementation IDs. */
+enum cpuinfo_riscv_chipset_impl {
+	cpuinfo_riscv_chipset_impl_unknown = 0,
+	cpuinfo_riscv_chipset_impl_max,
+};
+
+/**
+ * Decodes the vendor and micro-architecture based on the provided input
+ * parameters, regardless of underlying operating system.
+ *
+ * @param[vendor_id]: The 'mvendorid' as described by the RISC-V Manual.
+ * @param[arch_id]: The 'marchid' as described by the RISC-V Manual.
+ * @param[imp_id]: The 'mimplid' as described by the RISC-V Manual.
+ * @param[vendor] - Reference to the cpuinfo_vendor to populate.
+ * @param[uarch] - Reference to the cpuinfo_uarch to populate.
+ */
+CPUINFO_INTERNAL void cpuinfo_riscv_decode_vendor_uarch(
+	uint32_t vendor_id,
+	uint32_t arch_id,
+	uint32_t imp_id,
+	enum cpuinfo_vendor vendor[restrict static 1],
+	enum cpuinfo_uarch uarch[restrict static 1]);
--- a/3rdparty/cpuinfo/src/riscv/linux/api.h
+++ b/3rdparty/cpuinfo/src/riscv/linux/api.h
@@ -0,0 +1,71 @@
+#pragma once
+
+#include <cpuinfo.h>
+#include <cpuinfo/common.h>
+
+/**
+ * Definition of a RISC-V Linux processor. It is composed of the base processor
+ * definition in "include/cpuinfo.h" and flags specific to RISC-V Linux
+ * implementations.
+ */
+struct cpuinfo_riscv_linux_processor {
+	/* Public ABI cpuinfo structures. */
+	struct cpuinfo_processor processor;
+	struct cpuinfo_core core;
+	struct cpuinfo_cluster cluster;
+	struct cpuinfo_package package;
+
+	/**
+	 * Linux-specific flags for the logical processor:
+	 * - Bit field that can be masked with CPUINFO_LINUX_FLAG_*.
+	 */
+	uint32_t flags;
+
+	/**
+	 * Minimum processor ID on the cluster which includes this logical
+	 * processor. This value can serve as an ID for the cluster of logical
+	 * processors: it is the same for all logical processors on the same
+	 * package.
+	 */
+	uint32_t cluster_leader_id;
+
+	/**
+	 * Minimum processor ID on the core which includes this logical
+	 * processor. This value can serve as an ID for the core of logical
+	 * processors: it is the same for all logical processors on the same
+	 * core.
+	 */
+	uint32_t core_leader_id;
+
+	/**
+	 * Minimum processor ID on the package which includes this logical
+	 * processor. This value can serve as an ID for the package of logical
+	 * processors: it is the same for all logical processors on the same
+	 * package.
+	 */
+	uint32_t package_leader_id;
+};
+
+/**
+ * Reads AT_HWCAP from `getauxval` and populates the cpuinfo_riscv_isa
+ * structure.
+ *
+ * @param[isa] - Reference to cpuinfo_riscv_isa structure to populate.
+ */
+CPUINFO_INTERNAL void cpuinfo_riscv_linux_decode_isa_from_hwcap(struct cpuinfo_riscv_isa isa[restrict static 1]);
+
+/**
+ * Reads `sys_riscv_hwprobe` and determines the processor vendor and
+ * micro-architecture.
+ *
+ * @param[processor] - The Linux ID of the target processor.
+ * @param[vendor] - Reference to the cpuinfo_vendor to populate.
+ * @param[uarch] - Reference to the cpuinfo_uarch to populate.
+ */
+CPUINFO_INTERNAL void cpuinfo_riscv_linux_decode_vendor_uarch_from_hwprobe(
+	uint32_t processor,
+	enum cpuinfo_vendor vendor[restrict static 1],
+	enum cpuinfo_uarch uarch[restrict static 1]);
+
+/* Used to determine which uarch is associated with the current thread. */
+extern CPUINFO_INTERNAL const uint32_t* cpuinfo_linux_cpu_to_uarch_index_map;
--- a/3rdparty/cpuinfo/src/riscv/linux/init.c
+++ b/3rdparty/cpuinfo/src/riscv/linux/init.c
@@ -0,0 +1,619 @@
+#include <string.h>
+
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+#include <linux/api.h>
+#include <riscv/linux/api.h>
+
+/* ISA structure to hold supported extensions. */
+struct cpuinfo_riscv_isa cpuinfo_isa;
+
+/* Helper function to bitmask flags and ensure operator precedence. */
+static inline bool bitmask_all(uint32_t flags, uint32_t mask) {
+	return (flags & mask) == mask;
+}
+
+static int compare_riscv_linux_processors(const void* a, const void* b) {
+	/**
+	 * For our purposes, it is only relevant that the list is sorted by
+	 * micro-architecture, so the nature of ordering is irrelevant.
+	 */
+	return ((const struct cpuinfo_riscv_linux_processor*)a)->core.uarch -
+		((const struct cpuinfo_riscv_linux_processor*)b)->core.uarch;
+}
+
+/**
+ * Parses the core cpus list for each processor. This function is called once
+ * per-processor, with the IDs of all other processors in the core list.
+ *
+ * The 'processor_[start|count]' are populated in the processor's 'core'
+ * attribute, with 'start' being the smallest ID in the core list.
+ *
+ * The 'core_leader_id' of each processor is set to the smallest ID in it's
+ * cluster CPU list.
+ *
+ * Precondition: The element in the 'processors' list must be initialized with
+ * their 'core_leader_id' to their index in the list.
+
+ * E.g. processors[0].core_leader_id = 0.
+ */
+static bool core_cpus_parser(
+	uint32_t processor,
+	uint32_t core_cpus_start,
+	uint32_t core_cpus_end,
+	struct cpuinfo_riscv_linux_processor* processors) {
+	uint32_t processor_start = UINT32_MAX;
+	uint32_t processor_count = 0;
+
+	/* If the processor already has a leader, use it. */
+	if (bitmask_all(processors[processor].flags, CPUINFO_LINUX_FLAG_CORE_CLUSTER)) {
+		processor_start = processors[processor].core_leader_id;
+	}
+
+	for (size_t core_cpu = core_cpus_start; core_cpu < core_cpus_end; core_cpu++) {
+		if (!bitmask_all(processors[core_cpu].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			continue;
+		}
+		/**
+		 * The first valid processor observed is the smallest ID in the
+		 * list that attaches to this core.
+		 */
+		if (processor_start == UINT32_MAX) {
+			processor_start = core_cpu;
+		}
+		processors[core_cpu].core_leader_id = processor_start;
+		processor_count++;
+	}
+	/**
+	 * If the cluster flag has not been set, assign the processor start. If
+	 * it has been set, only apply the processor start if it's less than the
+	 * held value. This can happen if the callback is invoked twice:
+	 *
+	 * e.g. core_cpu_list=1,10-12
+	 */
+	if (!bitmask_all(processors[processor].flags, CPUINFO_LINUX_FLAG_CORE_CLUSTER) ||
+	    processors[processor].core.processor_start > processor_start) {
+		processors[processor].core.processor_start = processor_start;
+		processors[processor].core_leader_id = processor_start;
+	}
+	processors[processor].core.processor_count += processor_count;
+	processors[processor].flags |= CPUINFO_LINUX_FLAG_CORE_CLUSTER;
+	/* The parser has failed only if no processors were found. */
+	return processor_count != 0;
+}
+
+/**
+ * Parses the cluster cpu list for each processor. This function is called once
+ * per-processor, with the IDs of all other processors in the cluster.
+ *
+ * The 'cluster_leader_id' of each processor is set to the smallest ID in it's
+ * cluster CPU list.
+ *
+ * Precondition: The element in the 'processors' list must be initialized with
+ * their 'cluster_leader_id' to their index in the list.
+ * E.g. processors[0].cluster_leader_id = 0.
+ */
+static bool cluster_cpus_parser(
+	uint32_t processor,
+	uint32_t cluster_cpus_start,
+	uint32_t cluster_cpus_end,
+	struct cpuinfo_riscv_linux_processor* processors) {
+	uint32_t processor_start = UINT32_MAX;
+	uint32_t processor_count = 0;
+	uint32_t core_count = 0;
+
+	/* If the processor already has a leader, use it. */
+	if (bitmask_all(processors[processor].flags, CPUINFO_LINUX_FLAG_CLUSTER_CLUSTER)) {
+		processor_start = processors[processor].cluster_leader_id;
+	}
+
+	for (size_t cluster_cpu = cluster_cpus_start; cluster_cpu < cluster_cpus_end; cluster_cpu++) {
+		if (!bitmask_all(processors[cluster_cpu].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			continue;
+		}
+		/**
+		 * The first valid processor observed is the smallest ID in the
+		 * list that attaches to this core.
+		 */
+		if (processor_start == UINT32_MAX) {
+			processor_start = cluster_cpu;
+		}
+		processors[cluster_cpu].cluster_leader_id = processor_start;
+		processor_count++;
+		/**
+		 * A processor should only represent it's core if it is the
+		 * assigned leader of that core.
+		 */
+		if (processors[cluster_cpu].core_leader_id == cluster_cpu) {
+			core_count++;
+		}
+	}
+	/**
+	 * If the cluster flag has not been set, assign the processor start. If
+	 * it has been set, only apply the processor start if it's less than the
+	 * held value. This can happen if the callback is invoked twice:
+	 *
+	 * e.g. cluster_cpus_list=1,10-12
+	 */
+	if (!bitmask_all(processors[processor].flags, CPUINFO_LINUX_FLAG_CLUSTER_CLUSTER) ||
+	    processors[processor].cluster.processor_start > processor_start) {
+		processors[processor].cluster.processor_start = processor_start;
+		processors[processor].cluster.core_start = processor_start;
+		processors[processor].cluster.cluster_id = processor_start;
+		processors[processor].cluster_leader_id = processor_start;
+	}
+	processors[processor].cluster.processor_count += processor_count;
+	processors[processor].cluster.core_count += core_count;
+	processors[processor].flags |= CPUINFO_LINUX_FLAG_CLUSTER_CLUSTER;
+	return true;
+}
+
+/**
+ * Parses the package cpus list for each processor. This function is called once
+ * per-processor, with the IDs of all other processors in the package list.
+ *
+ * The 'processor_[start|count]' are populated in the processor's 'package'
+ * attribute, with 'start' being the smallest ID in the package list.
+ *
+ * The 'package_leader_id' of each processor is set to the smallest ID in it's
+ * cluster CPU list.
+ *
+ * Precondition: The element in the 'processors' list must be initialized with
+ * their 'package_leader_id' to their index in the list.
+ * E.g. processors[0].package_leader_id = 0.
+ */
+static bool package_cpus_parser(
+	uint32_t processor,
+	uint32_t package_cpus_start,
+	uint32_t package_cpus_end,
+	struct cpuinfo_riscv_linux_processor* processors) {
+	uint32_t processor_start = UINT32_MAX;
+	uint32_t processor_count = 0;
+	uint32_t cluster_count = 0;
+	uint32_t core_count = 0;
+
+	/* If the processor already has a leader, use it. */
+	if (bitmask_all(processors[processor].flags, CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER)) {
+		processor_start = processors[processor].package_leader_id;
+	}
+
+	for (size_t package_cpu = package_cpus_start; package_cpu < package_cpus_end; package_cpu++) {
+		if (!bitmask_all(processors[package_cpu].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			continue;
+		}
+		/**
+		 * The first valid processor observed is the smallest ID in the
+		 * list that attaches to this package.
+		 */
+		if (processor_start == UINT32_MAX) {
+			processor_start = package_cpu;
+		}
+		processors[package_cpu].package_leader_id = processor_start;
+		processor_count++;
+		/**
+		 * A processor should only represent it's core if it is the
+		 * assigned leader of that core, and similarly for it's cluster.
+		 */
+		if (processors[package_cpu].cluster_leader_id == package_cpu) {
+			cluster_count++;
+		}
+		if (processors[package_cpu].core_leader_id == package_cpu) {
+			core_count++;
+		}
+	}
+	/**
+	 * If the cluster flag has not been set, assign the processor start. If
+	 * it has been set, only apply the processor start if it's less than the
+	 * held value. This can happen if the callback is invoked twice:
+	 *
+	 * e.g. package_cpus_list=1,10-12
+	 */
+	if (!bitmask_all(processors[processor].flags, CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER) ||
+	    processors[processor].package.processor_start > processor_start) {
+		processors[processor].package.processor_start = processor_start;
+		processors[processor].package.cluster_start = processor_start;
+		processors[processor].package.core_start = processor_start;
+		processors[processor].package_leader_id = processor_start;
+	}
+	processors[processor].package.processor_count += processor_count;
+	processors[processor].package.cluster_count += cluster_count;
+	processors[processor].package.core_count += core_count;
+	processors[processor].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER;
+	return true;
+}
+
+/* Initialization for the RISC-V Linux system. */
+void cpuinfo_riscv_linux_init(void) {
+	struct cpuinfo_riscv_linux_processor* riscv_linux_processors = NULL;
+	struct cpuinfo_processor* processors = NULL;
+	struct cpuinfo_package* packages = NULL;
+	struct cpuinfo_cluster* clusters = NULL;
+	struct cpuinfo_core* cores = NULL;
+	struct cpuinfo_uarch_info* uarchs = NULL;
+	const struct cpuinfo_processor** linux_cpu_to_processor_map = NULL;
+	const struct cpuinfo_core** linux_cpu_to_core_map = NULL;
+	uint32_t* linux_cpu_to_uarch_index_map = NULL;
+
+	/**
+	 * The interesting set of processors are the number of 'present'
+	 * processors on the system. There may be more 'possible' processors,
+	 * but processor information cannot be gathered on non-present
+	 * processors.
+	 *
+	 * Note: For SoCs, it is largely the case that all processors are known
+	 * at boot and no processors are hotplugged at runtime, so the
+	 * 'present' and 'possible' list is often the same.
+	 *
+	 * Note: This computes the maximum processor ID of the 'present'
+	 * processors. It is not a count of the number of processors on the
+	 * system.
+	 */
+	const uint32_t max_processor_id =
+		1 + cpuinfo_linux_get_max_present_processor(cpuinfo_linux_get_max_processors_count());
+	if (max_processor_id == 0) {
+		cpuinfo_log_error("failed to discover any processors");
+		return;
+	}
+
+	/**
+	 * Allocate space to store all processor information. This array is
+	 * sized to the max processor ID as opposed to the number of 'present'
+	 * processors, to leverage pointer math in the common utility functions.
+	 */
+	riscv_linux_processors = calloc(max_processor_id, sizeof(struct cpuinfo_riscv_linux_processor));
+	if (riscv_linux_processors == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for %" PRIu32 " processors.",
+			max_processor_id * sizeof(struct cpuinfo_riscv_linux_processor),
+			max_processor_id);
+		goto cleanup;
+	}
+
+	/**
+	 * Attempt to detect all processors and apply the corresponding flag to
+	 * each processor struct that we find.
+	 */
+	if (!cpuinfo_linux_detect_present_processors(
+		    max_processor_id,
+		    &riscv_linux_processors->flags,
+		    sizeof(struct cpuinfo_riscv_linux_processor),
+		    CPUINFO_LINUX_FLAG_PRESENT | CPUINFO_LINUX_FLAG_VALID)) {
+		cpuinfo_log_error("failed to detect present processors");
+		goto cleanup;
+	}
+
+	/* Populate processor information. */
+	for (size_t processor = 0; processor < max_processor_id; processor++) {
+		if (!bitmask_all(riscv_linux_processors[processor].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			continue;
+		}
+		/* TODO: Determine if an 'smt_id' is available. */
+		riscv_linux_processors[processor].processor.linux_id = processor;
+	}
+
+	/* Populate core information. */
+	for (size_t processor = 0; processor < max_processor_id; processor++) {
+		if (!bitmask_all(riscv_linux_processors[processor].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			continue;
+		}
+
+		/* Populate processor start and count information. */
+		if (!cpuinfo_linux_detect_core_cpus(
+			    max_processor_id,
+			    processor,
+			    (cpuinfo_siblings_callback)core_cpus_parser,
+			    riscv_linux_processors)) {
+			cpuinfo_log_error("failed to detect core cpus for processor %zu.", processor);
+			goto cleanup;
+		}
+
+		/* Populate core ID information. */
+		if (cpuinfo_linux_get_processor_core_id(processor, &riscv_linux_processors[processor].core.core_id)) {
+			riscv_linux_processors[processor].flags |= CPUINFO_LINUX_FLAG_CORE_ID;
+		}
+
+		/**
+		 * Populate the vendor and uarch of this core from this
+		 * processor. When the final 'cores' list is constructed, only
+		 * the values from the core leader will be honored.
+		 */
+		cpuinfo_riscv_linux_decode_vendor_uarch_from_hwprobe(
+			processor,
+			&riscv_linux_processors[processor].core.vendor,
+			&riscv_linux_processors[processor].core.uarch);
+
+		/* Populate frequency information of this core. */
+		uint32_t frequency = cpuinfo_linux_get_processor_cur_frequency(processor);
+		if (frequency != 0) {
+			riscv_linux_processors[processor].core.frequency = frequency;
+			riscv_linux_processors[processor].flags |= CPUINFO_LINUX_FLAG_CUR_FREQUENCY;
+		}
+	}
+
+	/* Populate cluster information. */
+	for (size_t processor = 0; processor < max_processor_id; processor++) {
+		if (!bitmask_all(riscv_linux_processors[processor].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			continue;
+		}
+		if (!cpuinfo_linux_detect_cluster_cpus(
+			    max_processor_id,
+			    processor,
+			    (cpuinfo_siblings_callback)cluster_cpus_parser,
+			    riscv_linux_processors)) {
+			cpuinfo_log_warning("failed to detect cluster cpus for processor %zu.", processor);
+			goto cleanup;
+		}
+
+		/**
+		 * Populate the vendor, uarch and frequency of this cluster from
+		 * this logical processor. When the 'clusters' list is
+		 * constructed, only the values from the cluster leader will be
+		 * honored.
+		 */
+		riscv_linux_processors[processor].cluster.vendor = riscv_linux_processors[processor].core.vendor;
+		riscv_linux_processors[processor].cluster.uarch = riscv_linux_processors[processor].core.uarch;
+		riscv_linux_processors[processor].cluster.frequency = riscv_linux_processors[processor].core.frequency;
+	}
+
+	/* Populate package information. */
+	for (size_t processor = 0; processor < max_processor_id; processor++) {
+		if (!bitmask_all(riscv_linux_processors[processor].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			continue;
+		}
+		if (!cpuinfo_linux_detect_package_cpus(
+			    max_processor_id,
+			    processor,
+			    (cpuinfo_siblings_callback)package_cpus_parser,
+			    riscv_linux_processors)) {
+			cpuinfo_log_warning("failed to detect package cpus for processor %zu.", processor);
+			goto cleanup;
+		}
+	}
+
+	/* Populate ISA structure with hwcap information. */
+	cpuinfo_riscv_linux_decode_isa_from_hwcap(&cpuinfo_isa);
+
+	/**
+	 * To efficiently compute the number of unique micro-architectures
+	 * present on the system, sort the processor list by micro-architecture
+	 * and then scan through the list to count the differences.
+	 *
+	 * Ensure this is done at the end of composing the processor list - the
+	 * parsing functions assume that the position of the processor in the
+	 * list matches it's Linux ID, which this sorting operation breaks.
+	 */
+	qsort(riscv_linux_processors,
+	      max_processor_id,
+	      sizeof(struct cpuinfo_riscv_linux_processor),
+	      compare_riscv_linux_processors);
+
+	/**
+	 * Determine the number of *valid* detected processors, cores,
+	 * clusters, packages and uarchs in the list.
+	 */
+	size_t valid_processors_count = 0;
+	size_t valid_cores_count = 0;
+	size_t valid_clusters_count = 0;
+	size_t valid_packages_count = 0;
+	size_t valid_uarchs_count = 0;
+	enum cpuinfo_uarch last_uarch = cpuinfo_uarch_unknown;
+	for (size_t processor = 0; processor < max_processor_id; processor++) {
+		if (!bitmask_all(riscv_linux_processors[processor].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			continue;
+		}
+
+		/**
+		 * All comparisons to the leader id values MUST be done against
+		 * the 'linux_id' as opposed to 'processor'. The sort function
+		 * above no longer allows us to make the assumption that these
+		 * two values are the same.
+		 */
+		uint32_t linux_id = riscv_linux_processors[processor].processor.linux_id;
+
+		valid_processors_count++;
+		if (riscv_linux_processors[processor].core_leader_id == linux_id) {
+			valid_cores_count++;
+		}
+		if (riscv_linux_processors[processor].cluster_leader_id == linux_id) {
+			valid_clusters_count++;
+		}
+		if (riscv_linux_processors[processor].package_leader_id == linux_id) {
+			valid_packages_count++;
+		}
+		/**
+		 * As we've sorted by micro-architecture, when the uarch differs
+		 * between two entries, a unique uarch has been observed.
+		 */
+		if (last_uarch != riscv_linux_processors[processor].core.uarch || valid_uarchs_count == 0) {
+			valid_uarchs_count++;
+			last_uarch = riscv_linux_processors[processor].core.uarch;
+		}
+	}
+
+	/* Allocate and populate final public ABI structures. */
+	processors = calloc(valid_processors_count, sizeof(struct cpuinfo_processor));
+	if (processors == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for %zu processors.",
+			valid_processors_count * sizeof(struct cpuinfo_processor),
+			valid_processors_count);
+		goto cleanup;
+	}
+
+	cores = calloc(valid_cores_count, sizeof(struct cpuinfo_core));
+	if (cores == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for %zu cores.",
+			valid_cores_count * sizeof(struct cpuinfo_core),
+			valid_cores_count);
+		goto cleanup;
+	}
+
+	clusters = calloc(valid_clusters_count, sizeof(struct cpuinfo_cluster));
+	if (clusters == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for %zu clusters.",
+			valid_clusters_count * sizeof(struct cpuinfo_cluster),
+			valid_clusters_count);
+		goto cleanup;
+	}
+
+	packages = calloc(valid_packages_count, sizeof(struct cpuinfo_package));
+	if (packages == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for %zu packages.",
+			valid_packages_count * sizeof(struct cpuinfo_package),
+			valid_packages_count);
+		goto cleanup;
+	}
+
+	uarchs = calloc(valid_uarchs_count, sizeof(struct cpuinfo_uarch_info));
+	if (uarchs == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for %zu packages.",
+			valid_uarchs_count * sizeof(struct cpuinfo_uarch_info),
+			valid_uarchs_count);
+		goto cleanup;
+	}
+
+	linux_cpu_to_processor_map = calloc(max_processor_id, sizeof(struct cpuinfo_processor*));
+	if (linux_cpu_to_processor_map == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for %" PRIu32 " processor map.",
+			max_processor_id * sizeof(struct cpuinfo_processor*),
+			max_processor_id);
+		goto cleanup;
+	}
+
+	linux_cpu_to_core_map = calloc(max_processor_id, sizeof(struct cpuinfo_core*));
+	if (linux_cpu_to_core_map == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for %" PRIu32 " core map.",
+			max_processor_id * sizeof(struct cpuinfo_core*),
+			max_processor_id);
+		goto cleanup;
+	}
+
+	linux_cpu_to_uarch_index_map = calloc(max_processor_id, sizeof(struct cpuinfo_uarch_info*));
+	if (linux_cpu_to_uarch_index_map == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for %" PRIu32 " uarch map.",
+			max_processor_id * sizeof(struct cpuinfo_uarch_info*),
+			max_processor_id);
+		goto cleanup;
+	}
+
+	/* Transfer contents of processor list to ABI structures. */
+	size_t valid_processors_index = 0;
+	size_t valid_cores_index = 0;
+	size_t valid_clusters_index = 0;
+	size_t valid_packages_index = 0;
+	size_t valid_uarchs_index = 0;
+	last_uarch = cpuinfo_uarch_unknown;
+	for (size_t processor = 0; processor < max_processor_id; processor++) {
+		if (!bitmask_all(riscv_linux_processors[processor].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			continue;
+		}
+
+		/**
+		 * All comparisons to the leader id values MUST be done against
+		 * the 'linux_id' as opposed to 'processor'. The sort function
+		 * above no longer allows us to make the assumption that these
+		 * two values are the same.
+		 */
+		uint32_t linux_id = riscv_linux_processors[processor].processor.linux_id;
+
+		/* Create uarch entry if this uarch has not been seen before. */
+		if (last_uarch != riscv_linux_processors[processor].core.uarch || valid_uarchs_index == 0) {
+			uarchs[valid_uarchs_index++].uarch = riscv_linux_processors[processor].core.uarch;
+			last_uarch = riscv_linux_processors[processor].core.uarch;
+		}
+
+		/* Copy cpuinfo_processor information. */
+		memcpy(&processors[valid_processors_index++],
+		       &riscv_linux_processors[processor].processor,
+		       sizeof(struct cpuinfo_processor));
+
+		/* Update uarch processor count. */
+		uarchs[valid_uarchs_index - 1].processor_count++;
+
+		/* Copy cpuinfo_core information, if this is the leader. */
+		if (riscv_linux_processors[processor].core_leader_id == linux_id) {
+			memcpy(&cores[valid_cores_index++],
+			       &riscv_linux_processors[processor].core,
+			       sizeof(struct cpuinfo_core));
+			/* Update uarch core count. */
+			uarchs[valid_uarchs_index - 1].core_count++;
+		}
+
+		/* Copy cpuinfo_cluster information, if this is the leader. */
+		if (riscv_linux_processors[processor].cluster_leader_id == linux_id) {
+			memcpy(&clusters[valid_clusters_index++],
+			       &riscv_linux_processors[processor].cluster,
+			       sizeof(struct cpuinfo_cluster));
+		}
+
+		/* Copy cpuinfo_package information, if this is the leader. */
+		if (riscv_linux_processors[processor].package_leader_id == linux_id) {
+			memcpy(&packages[valid_packages_index++],
+			       &riscv_linux_processors[processor].package,
+			       sizeof(struct cpuinfo_package));
+		}
+
+		/* Commit pointers on the final structures. */
+		processors[valid_processors_index - 1].core = &cores[valid_cores_index - 1];
+		processors[valid_processors_index - 1].cluster = &clusters[valid_clusters_index - 1];
+		processors[valid_processors_index - 1].package = &packages[valid_packages_index - 1];
+
+		cores[valid_cores_index - 1].cluster = &clusters[valid_clusters_index - 1];
+		cores[valid_cores_index - 1].package = &packages[valid_packages_index - 1];
+
+		clusters[valid_clusters_index - 1].package = &packages[valid_packages_index - 1];
+
+		linux_cpu_to_processor_map[linux_id] = &processors[valid_processors_index - 1];
+		linux_cpu_to_core_map[linux_id] = &cores[valid_cores_index - 1];
+		linux_cpu_to_uarch_index_map[linux_id] = valid_uarchs_index - 1;
+	}
+
+	/* Commit */
+	cpuinfo_processors = processors;
+	cpuinfo_processors_count = valid_processors_count;
+	cpuinfo_cores = cores;
+	cpuinfo_cores_count = valid_cores_count;
+	cpuinfo_clusters = clusters;
+	cpuinfo_clusters_count = valid_clusters_count;
+	cpuinfo_packages = packages;
+	cpuinfo_packages_count = valid_packages_count;
+	cpuinfo_uarchs = uarchs;
+	cpuinfo_uarchs_count = valid_uarchs_count;
+
+	cpuinfo_linux_cpu_max = max_processor_id;
+	cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map;
+	cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map;
+	cpuinfo_linux_cpu_to_uarch_index_map = linux_cpu_to_uarch_index_map;
+
+	__sync_synchronize();
+
+	cpuinfo_is_initialized = true;
+
+	/* Mark all public structures NULL to prevent cleanup from erasing them.
+	 */
+	processors = NULL;
+	cores = NULL;
+	clusters = NULL;
+	packages = NULL;
+	uarchs = NULL;
+	linux_cpu_to_processor_map = NULL;
+	linux_cpu_to_core_map = NULL;
+	linux_cpu_to_uarch_index_map = NULL;
+cleanup:
+	free(riscv_linux_processors);
+	free(processors);
+	free(cores);
+	free(clusters);
+	free(packages);
+	free(uarchs);
+	free(linux_cpu_to_processor_map);
+	free(linux_cpu_to_core_map);
+	free(linux_cpu_to_uarch_index_map);
+}
--- a/3rdparty/cpuinfo/src/riscv/linux/riscv-hw.c
+++ b/3rdparty/cpuinfo/src/riscv/linux/riscv-hw.c
@@ -0,0 +1,151 @@
+/*
+ * Only enable the C standard library hwprobe interface on Android for now.
+ * Patches to add a compatible hwprobe API to glibc are available but not
+ * merged at the time of writing and so cannot easily be tested.  The
+ * #ifdef __ANDROID__ check will be removed in the future.
+ */
+#ifdef __ANDROID__
+#ifdef __has_include
+#if __has_include(<sys/hwprobe.h>)
+#define CPUINFO_RISCV_LINUX_HAVE_C_HWPROBE
+#include <sys/hwprobe.h>
+#endif
+#endif
+#endif
+
+#include <sched.h>
+
+#include <cpuinfo/log.h>
+#include <riscv/api.h>
+#include <riscv/linux/api.h>
+
+#ifndef CPUINFO_RISCV_LINUX_HAVE_C_HWPROBE
+
+#include <stdint.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+struct riscv_hwprobe {
+	int64_t key;
+	uint64_t value;
+};
+
+/*
+ * The standard C library our binary was compiled with does not support
+ * hwprobe but the kernel on which we are running might do.  The
+ * constants below are copied from
+ * /usr/include/riscv64-linux-gnu/asm/hwprobe.h.  They allow us to
+ * invoke the hwprobe syscall directly.  We duplicate the constants
+ * rather than including the kernel hwprobe.h header, as this header
+ * will only be present if we're building Linux 6.4 or greater.
+ */
+
+#define RISCV_HWPROBE_KEY_MVENDORID 0
+#define RISCV_HWPROBE_KEY_MARCHID 1
+#define RISCV_HWPROBE_KEY_MIMPID 2
+#define RISCV_HWPROBE_KEY_BASE_BEHAVIOR 3
+#define RISCV_HWPROBE_BASE_BEHAVIOR_IMA (1 << 0)
+#define RISCV_HWPROBE_KEY_IMA_EXT_0 4
+#define RISCV_HWPROBE_IMA_FD (1 << 0)
+#define RISCV_HWPROBE_IMA_C (1 << 1)
+#define RISCV_HWPROBE_IMA_V (1 << 2)
+#define RISCV_HWPROBE_EXT_ZBA (1 << 3)
+#define RISCV_HWPROBE_EXT_ZBB (1 << 4)
+#define RISCV_HWPROBE_EXT_ZBS (1 << 5)
+#define RISCV_HWPROBE_EXT_ZICBOZ (1 << 6)
+#define RISCV_HWPROBE_KEY_CPUPERF_0 5
+#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0)
+#define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0)
+#define RISCV_HWPROBE_MISALIGNED_SLOW (2 << 0)
+#define RISCV_HWPROBE_MISALIGNED_FAST (3 << 0)
+#define RISCV_HWPROBE_MISALIGNED_UNSUPPORTED (4 << 0)
+#define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0)
+
+#ifndef NR_riscv_hwprobe
+#ifndef NR_arch_specific_syscall
+#define NR_arch_specific_syscall 244
+#endif
+#define NR_riscv_hwprobe (NR_arch_specific_syscall + 14)
+#endif
+#endif
+
+void cpuinfo_riscv_linux_decode_vendor_uarch_from_hwprobe(
+	uint32_t processor,
+	enum cpuinfo_vendor vendor[restrict static 1],
+	enum cpuinfo_uarch uarch[restrict static 1]) {
+	struct riscv_hwprobe pairs[] = {
+		{
+			.key = RISCV_HWPROBE_KEY_MVENDORID,
+		},
+		{
+			.key = RISCV_HWPROBE_KEY_MARCHID,
+		},
+		{
+			.key = RISCV_HWPROBE_KEY_MIMPID,
+		},
+	};
+	const size_t pairs_count = sizeof(pairs) / sizeof(struct riscv_hwprobe);
+
+	/* In case of failure, report unknown. */
+	*vendor = cpuinfo_vendor_unknown;
+	*uarch = cpuinfo_uarch_unknown;
+
+	/* Create a CPU set with this processor flagged. */
+	const size_t cpu_count = processor + 1;
+	cpu_set_t* cpu_set = CPU_ALLOC(cpu_count);
+	if (cpu_set == NULL) {
+		cpuinfo_log_warning("failed to allocate space for cpu_set");
+		return;
+	}
+
+	const size_t cpu_set_size = CPU_ALLOC_SIZE(cpu_count);
+	CPU_ZERO_S(cpu_set_size, cpu_set);
+	CPU_SET_S(processor, cpu_set_size, cpu_set);
+
+	/* Request all available information from hwprobe. */
+#ifndef CPUINFO_RISCV_LINUX_HAVE_C_HWPROBE
+	/*
+	 * No standard library support for hwprobe.  We'll need to invoke the
+	 * syscall directly.  See
+	 *
+	 * https://docs.kernel.org/arch/riscv/hwprobe.html
+	 *
+	 * for more details.
+	 */
+	int ret = syscall(NR_riscv_hwprobe, pairs, pairs_count, cpu_set_size, (unsigned long*)cpu_set, 0 /* flags */);
+#else
+	int ret = __riscv_hwprobe(pairs, pairs_count, cpu_set_size, (unsigned long*)cpu_set, 0 /* flags */);
+#endif
+	if (ret < 0) {
+		cpuinfo_log_warning("failed to get hwprobe information, err: %d", ret);
+		goto cleanup;
+	}
+
+	/**
+	 * The syscall may not have populated all requested keys, loop through
+	 * the list and store the values that were discovered.
+	 */
+	uint32_t vendor_id = 0;
+	uint32_t arch_id = 0;
+	uint32_t imp_id = 0;
+	for (size_t pair = 0; pair < pairs_count; pair++) {
+		switch (pairs[pair].key) {
+			case RISCV_HWPROBE_KEY_MVENDORID:
+				vendor_id = pairs[pair].value;
+				break;
+			case RISCV_HWPROBE_KEY_MARCHID:
+				arch_id = pairs[pair].value;
+				break;
+			case RISCV_HWPROBE_KEY_MIMPID:
+				imp_id = pairs[pair].value;
+				break;
+			default:
+				/* The key value may be -1 if unsupported. */
+				break;
+		}
+	}
+	cpuinfo_riscv_decode_vendor_uarch(vendor_id, arch_id, imp_id, vendor, uarch);
+
+cleanup:
+	CPU_FREE(cpu_set);
+}
--- a/3rdparty/cpuinfo/src/riscv/linux/riscv-isa.c
+++ b/3rdparty/cpuinfo/src/riscv/linux/riscv-isa.c
@@ -0,0 +1,43 @@
+#include <string.h>
+#include <sys/auxv.h>
+
+#include <riscv/linux/api.h>
+
+/**
+ * arch/riscv/include/uapi/asm/hwcap.h
+ *
+ * This must be kept in sync with the upstream kernel header.
+ */
+#define COMPAT_HWCAP_ISA_I (1 << ('I' - 'A'))
+#define COMPAT_HWCAP_ISA_M (1 << ('M' - 'A'))
+#define COMPAT_HWCAP_ISA_A (1 << ('A' - 'A'))
+#define COMPAT_HWCAP_ISA_F (1 << ('F' - 'A'))
+#define COMPAT_HWCAP_ISA_D (1 << ('D' - 'A'))
+#define COMPAT_HWCAP_ISA_C (1 << ('C' - 'A'))
+#define COMPAT_HWCAP_ISA_V (1 << ('V' - 'A'))
+
+void cpuinfo_riscv_linux_decode_isa_from_hwcap(struct cpuinfo_riscv_isa isa[restrict static 1]) {
+	const unsigned long hwcap = getauxval(AT_HWCAP);
+
+	if (hwcap & COMPAT_HWCAP_ISA_I) {
+		isa->i = true;
+	}
+	if (hwcap & COMPAT_HWCAP_ISA_M) {
+		isa->m = true;
+	}
+	if (hwcap & COMPAT_HWCAP_ISA_A) {
+		isa->a = true;
+	}
+	if (hwcap & COMPAT_HWCAP_ISA_F) {
+		isa->f = true;
+	}
+	if (hwcap & COMPAT_HWCAP_ISA_D) {
+		isa->d = true;
+	}
+	if (hwcap & COMPAT_HWCAP_ISA_C) {
+		isa->c = true;
+	}
+	if (hwcap & COMPAT_HWCAP_ISA_V) {
+		isa->v = true;
+	}
+}
--- a/3rdparty/cpuinfo/src/riscv/uarch.c
+++ b/3rdparty/cpuinfo/src/riscv/uarch.c
@@ -0,0 +1,27 @@
+#include <stdint.h>
+
+#include <cpuinfo/log.h>
+#include <riscv/api.h>
+
+void cpuinfo_riscv_decode_vendor_uarch(
+	uint32_t vendor_id,
+	uint32_t arch_id,
+	uint32_t imp_id,
+	enum cpuinfo_vendor vendor[restrict static 1],
+	enum cpuinfo_uarch uarch[restrict static 1]) {
+	/* The vendor ID is sufficient to determine the cpuinfo_vendor. */
+	switch (vendor_id) {
+		case cpuinfo_riscv_chipset_vendor_sifive:
+			*vendor = cpuinfo_vendor_sifive;
+			break;
+		default:
+			*vendor = cpuinfo_vendor_unknown;
+			cpuinfo_log_warning("unknown vendor ID: %" PRIu32, vendor_id);
+			break;
+	}
+	/**
+	 * TODO: Add support for parsing chipset architecture and implementation
+	 * IDs here, when a chipset of interest comes along.
+	 */
+	*uarch = cpuinfo_uarch_unknown;
+}
--- a/3rdparty/cpuinfo/src/x86/api.h
+++ b/3rdparty/cpuinfo/src/x86/api.h
@@ -0,0 +1,159 @@
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/common.h>
+
+struct cpuid_regs {
+	uint32_t eax;
+	uint32_t ebx;
+	uint32_t ecx;
+	uint32_t edx;
+};
+
+struct cpuinfo_x86_cache {
+	uint32_t size;
+	uint32_t associativity;
+	uint32_t sets;
+	uint32_t partitions;
+	uint32_t line_size;
+	uint32_t flags;
+	uint32_t apic_bits;
+};
+
+struct cpuinfo_x86_caches {
+	struct cpuinfo_trace_cache trace;
+	struct cpuinfo_x86_cache l1i;
+	struct cpuinfo_x86_cache l1d;
+	struct cpuinfo_x86_cache l2;
+	struct cpuinfo_x86_cache l3;
+	struct cpuinfo_x86_cache l4;
+	uint32_t prefetch_size;
+};
+
+struct cpuinfo_x86_model_info {
+	uint32_t model;
+	uint32_t family;
+
+	uint32_t base_model;
+	uint32_t base_family;
+	uint32_t stepping;
+	uint32_t extended_model;
+	uint32_t extended_family;
+	uint32_t processor_type;
+};
+
+struct cpuinfo_x86_topology {
+	uint32_t apic_id;
+	uint32_t thread_bits_offset;
+	uint32_t thread_bits_length;
+	uint32_t core_bits_offset;
+	uint32_t core_bits_length;
+};
+
+struct cpuinfo_x86_processor {
+	uint32_t cpuid;
+	enum cpuinfo_vendor vendor;
+	enum cpuinfo_uarch uarch;
+#ifdef __linux__
+	int linux_id;
+#endif
+	struct cpuinfo_x86_caches cache;
+	struct {
+		struct cpuinfo_tlb itlb_4KB;
+		struct cpuinfo_tlb itlb_2MB;
+		struct cpuinfo_tlb itlb_4MB;
+		struct cpuinfo_tlb dtlb0_4KB;
+		struct cpuinfo_tlb dtlb0_2MB;
+		struct cpuinfo_tlb dtlb0_4MB;
+		struct cpuinfo_tlb dtlb_4KB;
+		struct cpuinfo_tlb dtlb_2MB;
+		struct cpuinfo_tlb dtlb_4MB;
+		struct cpuinfo_tlb dtlb_1GB;
+		struct cpuinfo_tlb stlb2_4KB;
+		struct cpuinfo_tlb stlb2_2MB;
+		struct cpuinfo_tlb stlb2_1GB;
+	} tlb;
+	struct cpuinfo_x86_topology topology;
+	char brand_string[CPUINFO_PACKAGE_NAME_MAX];
+};
+
+CPUINFO_INTERNAL void cpuinfo_x86_init_processor(struct cpuinfo_x86_processor* processor);
+
+CPUINFO_INTERNAL enum cpuinfo_vendor cpuinfo_x86_decode_vendor(uint32_t ebx, uint32_t ecx, uint32_t edx);
+CPUINFO_INTERNAL struct cpuinfo_x86_model_info cpuinfo_x86_decode_model_info(uint32_t eax);
+CPUINFO_INTERNAL enum cpuinfo_uarch cpuinfo_x86_decode_uarch(
+	enum cpuinfo_vendor vendor,
+	const struct cpuinfo_x86_model_info* model_info);
+
+CPUINFO_INTERNAL struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
+	const struct cpuid_regs basic_info,
+	const struct cpuid_regs extended_info,
+	uint32_t max_base_index,
+	uint32_t max_extended_index,
+	enum cpuinfo_vendor vendor,
+	enum cpuinfo_uarch uarch);
+
+CPUINFO_INTERNAL void cpuinfo_x86_detect_topology(
+	uint32_t max_base_index,
+	uint32_t max_extended_index,
+	struct cpuid_regs leaf1,
+	struct cpuinfo_x86_topology* topology);
+
+CPUINFO_INTERNAL void cpuinfo_x86_detect_cache(
+	uint32_t max_base_index,
+	uint32_t max_extended_index,
+	bool amd_topology_extensions,
+	enum cpuinfo_vendor vendor,
+	const struct cpuinfo_x86_model_info* model_info,
+	struct cpuinfo_x86_caches* cache,
+	struct cpuinfo_tlb* itlb_4KB,
+	struct cpuinfo_tlb* itlb_2MB,
+	struct cpuinfo_tlb* itlb_4MB,
+	struct cpuinfo_tlb* dtlb0_4KB,
+	struct cpuinfo_tlb* dtlb0_2MB,
+	struct cpuinfo_tlb* dtlb0_4MB,
+	struct cpuinfo_tlb* dtlb_4KB,
+	struct cpuinfo_tlb* dtlb_2MB,
+	struct cpuinfo_tlb* dtlb_4MB,
+	struct cpuinfo_tlb* dtlb_1GB,
+	struct cpuinfo_tlb* stlb2_4KB,
+	struct cpuinfo_tlb* stlb2_2MB,
+	struct cpuinfo_tlb* stlb2_1GB,
+	uint32_t* log2_package_cores_max);
+
+CPUINFO_INTERNAL void cpuinfo_x86_decode_cache_descriptor(
+	uint8_t descriptor,
+	enum cpuinfo_vendor vendor,
+	const struct cpuinfo_x86_model_info* model_info,
+	struct cpuinfo_x86_caches* cache,
+	struct cpuinfo_tlb* itlb_4KB,
+	struct cpuinfo_tlb* itlb_2MB,
+	struct cpuinfo_tlb* itlb_4MB,
+	struct cpuinfo_tlb* dtlb0_4KB,
+	struct cpuinfo_tlb* dtlb0_2MB,
+	struct cpuinfo_tlb* dtlb0_4MB,
+	struct cpuinfo_tlb* dtlb_4KB,
+	struct cpuinfo_tlb* dtlb_2MB,
+	struct cpuinfo_tlb* dtlb_4MB,
+	struct cpuinfo_tlb* dtlb_1GB,
+	struct cpuinfo_tlb* stlb2_4KB,
+	struct cpuinfo_tlb* stlb2_2MB,
+	struct cpuinfo_tlb* stlb2_1GB,
+	uint32_t* prefetch_size);
+
+CPUINFO_INTERNAL bool cpuinfo_x86_decode_deterministic_cache_parameters(
+	struct cpuid_regs regs,
+	struct cpuinfo_x86_caches* cache,
+	uint32_t* package_cores_max);
+
+CPUINFO_INTERNAL bool cpuinfo_x86_decode_cache_properties(struct cpuid_regs regs, struct cpuinfo_x86_caches* cache);
+
+CPUINFO_INTERNAL uint32_t cpuinfo_x86_normalize_brand_string(const char raw_name[48], char normalized_name[48]);
+
+CPUINFO_INTERNAL uint32_t cpuinfo_x86_format_package_name(
+	enum cpuinfo_vendor vendor,
+	const char normalized_brand_string[48],
+	char package_name[CPUINFO_PACKAGE_NAME_MAX]);
--- a/3rdparty/cpuinfo/src/x86/cache/descriptor.c
+++ b/3rdparty/cpuinfo/src/x86/cache/descriptor.c
--- a/3rdparty/cpuinfo/src/x86/cache/deterministic.c
+++ b/3rdparty/cpuinfo/src/x86/cache/deterministic.c
@@ -0,0 +1,247 @@
+#include <stdint.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/log.h>
+#include <cpuinfo/utils.h>
+#include <x86/cpuid.h>
+
+enum cache_type {
+	cache_type_none = 0,
+	cache_type_data = 1,
+	cache_type_instruction = 2,
+	cache_type_unified = 3,
+};
+
+bool cpuinfo_x86_decode_deterministic_cache_parameters(
+	struct cpuid_regs regs,
+	struct cpuinfo_x86_caches* cache,
+	uint32_t* package_cores_max) {
+	const uint32_t type = regs.eax & UINT32_C(0x1F);
+	if (type == cache_type_none) {
+		return false;
+	}
+
+	/* Level starts at 1 */
+	const uint32_t level = (regs.eax >> 5) & UINT32_C(0x7);
+
+	const uint32_t sets = 1 + regs.ecx;
+	const uint32_t line_size = 1 + (regs.ebx & UINT32_C(0x00000FFF));
+	const uint32_t partitions = 1 + ((regs.ebx >> 12) & UINT32_C(0x000003FF));
+	const uint32_t associativity = 1 + (regs.ebx >> 22);
+
+	*package_cores_max = 1 + (regs.eax >> 26);
+	const uint32_t processors = 1 + ((regs.eax >> 14) & UINT32_C(0x00000FFF));
+	const uint32_t apic_bits = bit_length(processors);
+
+	uint32_t flags = 0;
+	if (regs.edx & UINT32_C(0x00000002)) {
+		flags |= CPUINFO_CACHE_INCLUSIVE;
+	}
+	if (regs.edx & UINT32_C(0x00000004)) {
+		flags |= CPUINFO_CACHE_COMPLEX_INDEXING;
+	}
+	switch (level) {
+		case 1:
+			switch (type) {
+				case cache_type_unified:
+					cache->l1d = cache->l1i = (struct cpuinfo_x86_cache){
+						.size = associativity * partitions * line_size * sets,
+						.associativity = associativity,
+						.sets = sets,
+						.partitions = partitions,
+						.line_size = line_size,
+						.flags = flags | CPUINFO_CACHE_UNIFIED,
+						.apic_bits = apic_bits};
+					break;
+				case cache_type_data:
+					cache->l1d = (struct cpuinfo_x86_cache){
+						.size = associativity * partitions * line_size * sets,
+						.associativity = associativity,
+						.sets = sets,
+						.partitions = partitions,
+						.line_size = line_size,
+						.flags = flags,
+						.apic_bits = apic_bits};
+					break;
+				case cache_type_instruction:
+					cache->l1i = (struct cpuinfo_x86_cache){
+						.size = associativity * partitions * line_size * sets,
+						.associativity = associativity,
+						.sets = sets,
+						.partitions = partitions,
+						.line_size = line_size,
+						.flags = flags,
+						.apic_bits = apic_bits};
+					break;
+			}
+			break;
+		case 2:
+			switch (type) {
+				case cache_type_instruction:
+					cpuinfo_log_warning(
+						"unexpected L2 instruction cache reported in leaf 0x00000004 is ignored");
+					break;
+				case cache_type_unified:
+					flags |= CPUINFO_CACHE_UNIFIED;
+				case cache_type_data:
+					cache->l2 = (struct cpuinfo_x86_cache){
+						.size = associativity * partitions * line_size * sets,
+						.associativity = associativity,
+						.sets = sets,
+						.partitions = partitions,
+						.line_size = line_size,
+						.flags = flags,
+						.apic_bits = apic_bits};
+					break;
+			}
+			break;
+		case 3:
+			switch (type) {
+				case cache_type_instruction:
+					cpuinfo_log_warning(
+						"unexpected L3 instruction cache reported in leaf 0x00000004 is ignored");
+					break;
+				case cache_type_unified:
+					flags |= CPUINFO_CACHE_UNIFIED;
+				case cache_type_data:
+					cache->l3 = (struct cpuinfo_x86_cache){
+						.size = associativity * partitions * line_size * sets,
+						.associativity = associativity,
+						.sets = sets,
+						.partitions = partitions,
+						.line_size = line_size,
+						.flags = flags,
+						.apic_bits = apic_bits};
+					break;
+			}
+			break;
+		case 4:
+			switch (type) {
+				case cache_type_instruction:
+					cpuinfo_log_warning(
+						"unexpected L4 instruction cache reported in leaf 0x00000004 is ignored");
+					break;
+				case cache_type_unified:
+					flags |= CPUINFO_CACHE_UNIFIED;
+				case cache_type_data:
+					cache->l4 = (struct cpuinfo_x86_cache){
+						.size = associativity * partitions * line_size * sets,
+						.associativity = associativity,
+						.sets = sets,
+						.partitions = partitions,
+						.line_size = line_size,
+						.flags = flags,
+						.apic_bits = apic_bits};
+					break;
+			}
+			break;
+		default:
+			cpuinfo_log_warning(
+				"unexpected L%" PRIu32 " cache reported in leaf 0x00000004 is ignored", level);
+			break;
+	}
+	return true;
+}
+
+bool cpuinfo_x86_decode_cache_properties(struct cpuid_regs regs, struct cpuinfo_x86_caches* cache) {
+	const uint32_t type = regs.eax & UINT32_C(0x1F);
+	if (type == cache_type_none) {
+		return false;
+	}
+
+	const uint32_t level = (regs.eax >> 5) & UINT32_C(0x7);
+	const uint32_t cores = 1 + ((regs.eax >> 14) & UINT32_C(0x00000FFF));
+	const uint32_t apic_bits = bit_length(cores);
+
+	const uint32_t sets = 1 + regs.ecx;
+	const uint32_t line_size = 1 + (regs.ebx & UINT32_C(0x00000FFF));
+	const uint32_t partitions = 1 + ((regs.ebx >> 12) & UINT32_C(0x000003FF));
+	const uint32_t associativity = 1 + (regs.ebx >> 22);
+
+	uint32_t flags = 0;
+	if (regs.edx & UINT32_C(0x00000002)) {
+		flags |= CPUINFO_CACHE_INCLUSIVE;
+	}
+
+	switch (level) {
+		case 1:
+			switch (type) {
+				case cache_type_unified:
+					cache->l1d = cache->l1i = (struct cpuinfo_x86_cache){
+						.size = associativity * partitions * line_size * sets,
+						.associativity = associativity,
+						.sets = sets,
+						.partitions = partitions,
+						.line_size = line_size,
+						.flags = flags | CPUINFO_CACHE_UNIFIED,
+						.apic_bits = apic_bits};
+					break;
+				case cache_type_data:
+					cache->l1d = (struct cpuinfo_x86_cache){
+						.size = associativity * partitions * line_size * sets,
+						.associativity = associativity,
+						.sets = sets,
+						.partitions = partitions,
+						.line_size = line_size,
+						.flags = flags,
+						.apic_bits = apic_bits};
+					break;
+				case cache_type_instruction:
+					cache->l1i = (struct cpuinfo_x86_cache){
+						.size = associativity * partitions * line_size * sets,
+						.associativity = associativity,
+						.sets = sets,
+						.partitions = partitions,
+						.line_size = line_size,
+						.flags = flags,
+						.apic_bits = apic_bits};
+					break;
+			}
+			break;
+		case 2:
+			switch (type) {
+				case cache_type_instruction:
+					cpuinfo_log_warning(
+						"unexpected L2 instruction cache reported in leaf 0x8000001D is ignored");
+					break;
+				case cache_type_unified:
+					flags |= CPUINFO_CACHE_UNIFIED;
+				case cache_type_data:
+					cache->l2 = (struct cpuinfo_x86_cache){
+						.size = associativity * partitions * line_size * sets,
+						.associativity = associativity,
+						.sets = sets,
+						.partitions = partitions,
+						.line_size = line_size,
+						.flags = flags,
+						.apic_bits = apic_bits};
+					break;
+			}
+			break;
+		case 3:
+			switch (type) {
+				case cache_type_instruction:
+					cpuinfo_log_warning(
+						"unexpected L3 instruction cache reported in leaf 0x8000001D is ignored");
+					break;
+				case cache_type_unified:
+					flags |= CPUINFO_CACHE_UNIFIED;
+				case cache_type_data:
+					cache->l3 = (struct cpuinfo_x86_cache){
+						.size = associativity * partitions * line_size * sets,
+						.associativity = associativity,
+						.sets = sets,
+						.partitions = partitions,
+						.line_size = line_size,
+						.flags = flags,
+						.apic_bits = apic_bits};
+					break;
+			}
+			break;
+		default:
+			cpuinfo_log_warning(
+				"unexpected L%" PRIu32 " cache reported in leaf 0x8000001D is ignored", level);
+			break;
+	}
+	return true;
+}
--- a/3rdparty/cpuinfo/src/x86/cache/init.c
+++ b/3rdparty/cpuinfo/src/x86/cache/init.c
@@ -0,0 +1,97 @@
+#include <stdint.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/log.h>
+#include <cpuinfo/utils.h>
+#include <x86/api.h>
+#include <x86/cpuid.h>
+
+union cpuinfo_x86_cache_descriptors {
+	struct cpuid_regs regs;
+	uint8_t as_bytes[16];
+};
+
+enum cache_type {
+	cache_type_none = 0,
+	cache_type_data = 1,
+	cache_type_instruction = 2,
+	cache_type_unified = 3,
+};
+
+void cpuinfo_x86_detect_cache(
+	uint32_t max_base_index,
+	uint32_t max_extended_index,
+	bool amd_topology_extensions,
+	enum cpuinfo_vendor vendor,
+	const struct cpuinfo_x86_model_info* model_info,
+	struct cpuinfo_x86_caches* cache,
+	struct cpuinfo_tlb* itlb_4KB,
+	struct cpuinfo_tlb* itlb_2MB,
+	struct cpuinfo_tlb* itlb_4MB,
+	struct cpuinfo_tlb* dtlb0_4KB,
+	struct cpuinfo_tlb* dtlb0_2MB,
+	struct cpuinfo_tlb* dtlb0_4MB,
+	struct cpuinfo_tlb* dtlb_4KB,
+	struct cpuinfo_tlb* dtlb_2MB,
+	struct cpuinfo_tlb* dtlb_4MB,
+	struct cpuinfo_tlb* dtlb_1GB,
+	struct cpuinfo_tlb* stlb2_4KB,
+	struct cpuinfo_tlb* stlb2_2MB,
+	struct cpuinfo_tlb* stlb2_1GB,
+	uint32_t* log2_package_cores_max) {
+	if (max_base_index >= 2) {
+		union cpuinfo_x86_cache_descriptors descriptors;
+		descriptors.regs = cpuid(2);
+		uint32_t iterations = (uint8_t)descriptors.as_bytes[0];
+		if (iterations != 0) {
+		iterate_descriptors:
+			for (uint32_t i = 1 /* note: not 0 */; i < 16; i++) {
+				const uint8_t descriptor = descriptors.as_bytes[i];
+				if (descriptor != 0) {
+					cpuinfo_x86_decode_cache_descriptor(
+						descriptor,
+						vendor,
+						model_info,
+						cache,
+						itlb_4KB,
+						itlb_2MB,
+						itlb_4MB,
+						dtlb0_4KB,
+						dtlb0_2MB,
+						dtlb0_4MB,
+						dtlb_4KB,
+						dtlb_2MB,
+						dtlb_4MB,
+						dtlb_1GB,
+						stlb2_4KB,
+						stlb2_2MB,
+						stlb2_1GB,
+						&cache->prefetch_size);
+				}
+			}
+			if (--iterations != 0) {
+				descriptors.regs = cpuid(2);
+				goto iterate_descriptors;
+			}
+		}
+
+		if (vendor != cpuinfo_vendor_amd && vendor != cpuinfo_vendor_hygon && max_base_index >= 4) {
+			struct cpuid_regs leaf4;
+			uint32_t input_ecx = 0;
+			uint32_t package_cores_max = 0;
+			do {
+				leaf4 = cpuidex(4, input_ecx++);
+			} while (cpuinfo_x86_decode_deterministic_cache_parameters(leaf4, cache, &package_cores_max));
+			if (package_cores_max != 0) {
+				*log2_package_cores_max = bit_length(package_cores_max);
+			}
+		}
+	}
+	if (amd_topology_extensions && max_extended_index >= UINT32_C(0x8000001D)) {
+		struct cpuid_regs leaf0x8000001D;
+		uint32_t input_ecx = 0;
+		do {
+			leaf0x8000001D = cpuidex(UINT32_C(0x8000001D), input_ecx++);
+		} while (cpuinfo_x86_decode_cache_properties(leaf0x8000001D, cache));
+	}
+}
--- a/3rdparty/cpuinfo/src/x86/cpuid.h
+++ b/3rdparty/cpuinfo/src/x86/cpuid.h
@@ -0,0 +1,77 @@
+#pragma once
+#include <stdint.h>
+
+#if defined(__GNUC__)
+#include <cpuid.h>
+#elif defined(_MSC_VER)
+#include <intrin.h>
+#endif
+
+#if CPUINFO_MOCK
+#include <cpuinfo-mock.h>
+#endif
+#include <x86/api.h>
+
+#if defined(__GNUC__) || defined(_MSC_VER)
+static inline struct cpuid_regs cpuid(uint32_t eax) {
+#if CPUINFO_MOCK
+	uint32_t regs_array[4];
+	cpuinfo_mock_get_cpuid(eax, regs_array);
+	return (struct cpuid_regs){
+		.eax = regs_array[0],
+		.ebx = regs_array[1],
+		.ecx = regs_array[2],
+		.edx = regs_array[3],
+	};
+#else
+	struct cpuid_regs regs;
+#if defined(__GNUC__)
+	__cpuid(eax, regs.eax, regs.ebx, regs.ecx, regs.edx);
+#else
+	int regs_array[4];
+	__cpuid(regs_array, (int)eax);
+	regs.eax = regs_array[0];
+	regs.ebx = regs_array[1];
+	regs.ecx = regs_array[2];
+	regs.edx = regs_array[3];
+#endif
+	return regs;
+#endif
+}
+
+static inline struct cpuid_regs cpuidex(uint32_t eax, uint32_t ecx) {
+#if CPUINFO_MOCK
+	uint32_t regs_array[4];
+	cpuinfo_mock_get_cpuidex(eax, ecx, regs_array);
+	return (struct cpuid_regs){
+		.eax = regs_array[0],
+		.ebx = regs_array[1],
+		.ecx = regs_array[2],
+		.edx = regs_array[3],
+	};
+#else
+	struct cpuid_regs regs;
+#if defined(__GNUC__)
+	__cpuid_count(eax, ecx, regs.eax, regs.ebx, regs.ecx, regs.edx);
+#else
+	int regs_array[4];
+	__cpuidex(regs_array, (int)eax, (int)ecx);
+	regs.eax = regs_array[0];
+	regs.ebx = regs_array[1];
+	regs.ecx = regs_array[2];
+	regs.edx = regs_array[3];
+#endif
+	return regs;
+#endif
+}
+#endif
+
+static inline uint64_t xgetbv(uint32_t ext_ctrl_reg) {
+#ifdef _MSC_VER
+	return (uint64_t)_xgetbv((unsigned int)ext_ctrl_reg);
+#else
+	uint32_t lo, hi;
+	__asm__(".byte 0x0F, 0x01, 0xD0" : "=a"(lo), "=d"(hi) : "c"(ext_ctrl_reg));
+	return ((uint64_t)hi << 32) | (uint64_t)lo;
+#endif
+}
--- a/3rdparty/cpuinfo/src/x86/freebsd/init.c
+++ b/3rdparty/cpuinfo/src/x86/freebsd/init.c
@@ -0,0 +1,398 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+#include <freebsd/api.h>
+#include <x86/api.h>
+
+static inline uint32_t max(uint32_t a, uint32_t b) {
+	return a > b ? a : b;
+}
+
+static inline uint32_t bit_mask(uint32_t bits) {
+	return (UINT32_C(1) << bits) - UINT32_C(1);
+}
+
+void cpuinfo_x86_freebsd_init(void) {
+	struct cpuinfo_processor* processors = NULL;
+	struct cpuinfo_core* cores = NULL;
+	struct cpuinfo_cluster* clusters = NULL;
+	struct cpuinfo_package* packages = NULL;
+	struct cpuinfo_cache* l1i = NULL;
+	struct cpuinfo_cache* l1d = NULL;
+	struct cpuinfo_cache* l2 = NULL;
+	struct cpuinfo_cache* l3 = NULL;
+	struct cpuinfo_cache* l4 = NULL;
+
+	struct cpuinfo_freebsd_topology freebsd_topology = cpuinfo_freebsd_detect_topology();
+	if (freebsd_topology.packages == 0) {
+		cpuinfo_log_error("failed to detect topology");
+		goto cleanup;
+	}
+	processors = calloc(freebsd_topology.threads, sizeof(struct cpuinfo_processor));
+	if (processors == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " logical processors",
+			freebsd_topology.threads * sizeof(struct cpuinfo_processor),
+			freebsd_topology.threads);
+		goto cleanup;
+	}
+	cores = calloc(freebsd_topology.cores, sizeof(struct cpuinfo_core));
+	if (cores == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " cores",
+			freebsd_topology.cores * sizeof(struct cpuinfo_core),
+			freebsd_topology.cores);
+		goto cleanup;
+	}
+	/* On x86 a cluster of cores is the biggest group of cores that shares a
+	 * cache. */
+	clusters = calloc(freebsd_topology.packages, sizeof(struct cpuinfo_cluster));
+	if (clusters == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " core clusters",
+			freebsd_topology.packages * sizeof(struct cpuinfo_cluster),
+			freebsd_topology.packages);
+		goto cleanup;
+	}
+	packages = calloc(freebsd_topology.packages, sizeof(struct cpuinfo_package));
+	if (packages == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " physical packages",
+			freebsd_topology.packages * sizeof(struct cpuinfo_package),
+			freebsd_topology.packages);
+		goto cleanup;
+	}
+
+	struct cpuinfo_x86_processor x86_processor;
+	memset(&x86_processor, 0, sizeof(x86_processor));
+	cpuinfo_x86_init_processor(&x86_processor);
+	char brand_string[48];
+	cpuinfo_x86_normalize_brand_string(x86_processor.brand_string, brand_string);
+
+	const uint32_t threads_per_core = freebsd_topology.threads_per_core;
+	const uint32_t threads_per_package = freebsd_topology.threads / freebsd_topology.packages;
+	const uint32_t cores_per_package = freebsd_topology.cores / freebsd_topology.packages;
+	for (uint32_t i = 0; i < freebsd_topology.packages; i++) {
+		clusters[i] = (struct cpuinfo_cluster){
+			.processor_start = i * threads_per_package,
+			.processor_count = threads_per_package,
+			.core_start = i * cores_per_package,
+			.core_count = cores_per_package,
+			.cluster_id = 0,
+			.package = packages + i,
+			.vendor = x86_processor.vendor,
+			.uarch = x86_processor.uarch,
+			.cpuid = x86_processor.cpuid,
+		};
+		packages[i].processor_start = i * threads_per_package;
+		packages[i].processor_count = threads_per_package;
+		packages[i].core_start = i * cores_per_package;
+		packages[i].core_count = cores_per_package;
+		packages[i].cluster_start = i;
+		packages[i].cluster_count = 1;
+		cpuinfo_x86_format_package_name(x86_processor.vendor, brand_string, packages[i].name);
+	}
+	for (uint32_t i = 0; i < freebsd_topology.cores; i++) {
+		cores[i] = (struct cpuinfo_core){
+			.processor_start = i * threads_per_core,
+			.processor_count = threads_per_core,
+			.core_id = i % cores_per_package,
+			.cluster = clusters + i / cores_per_package,
+			.package = packages + i / cores_per_package,
+			.vendor = x86_processor.vendor,
+			.uarch = x86_processor.uarch,
+			.cpuid = x86_processor.cpuid,
+		};
+	}
+	for (uint32_t i = 0; i < freebsd_topology.threads; i++) {
+		const uint32_t smt_id = i % threads_per_core;
+		const uint32_t core_id = i / threads_per_core;
+		const uint32_t package_id = i / threads_per_package;
+
+		/* Reconstruct APIC IDs from topology components */
+		const uint32_t thread_bits_mask = bit_mask(x86_processor.topology.thread_bits_length);
+		const uint32_t core_bits_mask = bit_mask(x86_processor.topology.core_bits_length);
+		const uint32_t package_bits_offset =
+			max(x86_processor.topology.thread_bits_offset + x86_processor.topology.thread_bits_length,
+			    x86_processor.topology.core_bits_offset + x86_processor.topology.core_bits_length);
+		const uint32_t apic_id = ((smt_id & thread_bits_mask) << x86_processor.topology.thread_bits_offset) |
+			((core_id & core_bits_mask) << x86_processor.topology.core_bits_offset) |
+			(package_id << package_bits_offset);
+		cpuinfo_log_debug("reconstructed APIC ID 0x%08" PRIx32 " for thread %" PRIu32, apic_id, i);
+
+		processors[i].smt_id = smt_id;
+		processors[i].core = cores + i / threads_per_core;
+		processors[i].cluster = clusters + i / threads_per_package;
+		processors[i].package = packages + i / threads_per_package;
+		processors[i].apic_id = apic_id;
+	}
+
+	uint32_t threads_per_l1 = 0, l1_count = 0;
+	if (x86_processor.cache.l1i.size != 0 || x86_processor.cache.l1d.size != 0) {
+		/* Assume that threads on the same core share L1 */
+		threads_per_l1 = freebsd_topology.threads / freebsd_topology.cores;
+		if (threads_per_l1 == 0) {
+			cpuinfo_log_error("failed to detect threads_per_l1");
+			goto cleanup;
+		}
+		cpuinfo_log_warning(
+			"freebsd kernel did not report number of "
+			"threads sharing L1 cache; assume %" PRIu32,
+			threads_per_l1);
+		l1_count = freebsd_topology.threads / threads_per_l1;
+		cpuinfo_log_debug("detected %" PRIu32 " L1 caches", l1_count);
+	}
+
+	uint32_t threads_per_l2 = 0, l2_count = 0;
+	if (x86_processor.cache.l2.size != 0) {
+		if (x86_processor.cache.l3.size != 0) {
+			/* This is not a last-level cache; assume that threads
+			 * on the same core share L2 */
+			threads_per_l2 = freebsd_topology.threads / freebsd_topology.cores;
+		} else {
+			/* This is a last-level cache; assume that threads on
+			 * the same package share L2 */
+			threads_per_l2 = freebsd_topology.threads / freebsd_topology.packages;
+		}
+		if (threads_per_l2 == 0) {
+			cpuinfo_log_error("failed to detect threads_per_l1");
+			goto cleanup;
+		}
+		cpuinfo_log_warning(
+			"freebsd kernel did not report number of "
+			"threads sharing L2 cache; assume %" PRIu32,
+			threads_per_l2);
+		l2_count = freebsd_topology.threads / threads_per_l2;
+		cpuinfo_log_debug("detected %" PRIu32 " L2 caches", l2_count);
+	}
+
+	uint32_t threads_per_l3 = 0, l3_count = 0;
+	if (x86_processor.cache.l3.size != 0) {
+		/*
+		 * Assume that threads on the same package share L3.
+		 * However, is it not necessarily the last-level cache (there
+		 * may be L4 cache as well)
+		 */
+		threads_per_l3 = freebsd_topology.threads / freebsd_topology.packages;
+		if (threads_per_l3 == 0) {
+			cpuinfo_log_error("failed to detect threads_per_l3");
+			goto cleanup;
+		}
+		cpuinfo_log_warning(
+			"freebsd kernel did not report number of "
+			"threads sharing L3 cache; assume %" PRIu32,
+			threads_per_l3);
+		l3_count = freebsd_topology.threads / threads_per_l3;
+		cpuinfo_log_debug("detected %" PRIu32 " L3 caches", l3_count);
+	}
+
+	uint32_t threads_per_l4 = 0, l4_count = 0;
+	if (x86_processor.cache.l4.size != 0) {
+		/*
+		 * Assume that all threads share this L4.
+		 * As of now, L4 cache exists only on notebook x86 CPUs, which
+		 * are single-package, but multi-socket systems could have
+		 * shared L4 (like on IBM POWER8).
+		 */
+		threads_per_l4 = freebsd_topology.threads;
+		if (threads_per_l4 == 0) {
+			cpuinfo_log_error("failed to detect threads_per_l4");
+			goto cleanup;
+		}
+		cpuinfo_log_warning(
+			"freebsd kernel did not report number of "
+			"threads sharing L4 cache; assume %" PRIu32,
+			threads_per_l4);
+		l4_count = freebsd_topology.threads / threads_per_l4;
+		cpuinfo_log_debug("detected %" PRIu32 " L4 caches", l4_count);
+	}
+
+	if (x86_processor.cache.l1i.size != 0) {
+		l1i = calloc(l1_count, sizeof(struct cpuinfo_cache));
+		if (l1i == NULL) {
+			cpuinfo_log_error(
+				"failed to allocate %zu bytes for descriptions of "
+				"%" PRIu32 " L1I caches",
+				l1_count * sizeof(struct cpuinfo_cache),
+				l1_count);
+			goto cleanup;
+		}
+		for (uint32_t c = 0; c < l1_count; c++) {
+			l1i[c] = (struct cpuinfo_cache){
+				.size = x86_processor.cache.l1i.size,
+				.associativity = x86_processor.cache.l1i.associativity,
+				.sets = x86_processor.cache.l1i.sets,
+				.partitions = x86_processor.cache.l1i.partitions,
+				.line_size = x86_processor.cache.l1i.line_size,
+				.flags = x86_processor.cache.l1i.flags,
+				.processor_start = c * threads_per_l1,
+				.processor_count = threads_per_l1,
+			};
+		}
+		for (uint32_t t = 0; t < freebsd_topology.threads; t++) {
+			processors[t].cache.l1i = &l1i[t / threads_per_l1];
+		}
+	}
+
+	if (x86_processor.cache.l1d.size != 0) {
+		l1d = calloc(l1_count, sizeof(struct cpuinfo_cache));
+		if (l1d == NULL) {
+			cpuinfo_log_error(
+				"failed to allocate %zu bytes for descriptions of "
+				"%" PRIu32 " L1D caches",
+				l1_count * sizeof(struct cpuinfo_cache),
+				l1_count);
+			goto cleanup;
+		}
+		for (uint32_t c = 0; c < l1_count; c++) {
+			l1d[c] = (struct cpuinfo_cache){
+				.size = x86_processor.cache.l1d.size,
+				.associativity = x86_processor.cache.l1d.associativity,
+				.sets = x86_processor.cache.l1d.sets,
+				.partitions = x86_processor.cache.l1d.partitions,
+				.line_size = x86_processor.cache.l1d.line_size,
+				.flags = x86_processor.cache.l1d.flags,
+				.processor_start = c * threads_per_l1,
+				.processor_count = threads_per_l1,
+			};
+		}
+		for (uint32_t t = 0; t < freebsd_topology.threads; t++) {
+			processors[t].cache.l1d = &l1d[t / threads_per_l1];
+		}
+	}
+
+	if (l2_count != 0) {
+		l2 = calloc(l2_count, sizeof(struct cpuinfo_cache));
+		if (l2 == NULL) {
+			cpuinfo_log_error(
+				"failed to allocate %zu bytes for descriptions of "
+				"%" PRIu32 " L2 caches",
+				l2_count * sizeof(struct cpuinfo_cache),
+				l2_count);
+			goto cleanup;
+		}
+		for (uint32_t c = 0; c < l2_count; c++) {
+			l2[c] = (struct cpuinfo_cache){
+				.size = x86_processor.cache.l2.size,
+				.associativity = x86_processor.cache.l2.associativity,
+				.sets = x86_processor.cache.l2.sets,
+				.partitions = x86_processor.cache.l2.partitions,
+				.line_size = x86_processor.cache.l2.line_size,
+				.flags = x86_processor.cache.l2.flags,
+				.processor_start = c * threads_per_l2,
+				.processor_count = threads_per_l2,
+			};
+		}
+		for (uint32_t t = 0; t < freebsd_topology.threads; t++) {
+			processors[t].cache.l2 = &l2[t / threads_per_l2];
+		}
+	}
+
+	if (l3_count != 0) {
+		l3 = calloc(l3_count, sizeof(struct cpuinfo_cache));
+		if (l3 == NULL) {
+			cpuinfo_log_error(
+				"failed to allocate %zu bytes for descriptions of "
+				"%" PRIu32 " L3 caches",
+				l3_count * sizeof(struct cpuinfo_cache),
+				l3_count);
+			goto cleanup;
+		}
+		for (uint32_t c = 0; c < l3_count; c++) {
+			l3[c] = (struct cpuinfo_cache){
+				.size = x86_processor.cache.l3.size,
+				.associativity = x86_processor.cache.l3.associativity,
+				.sets = x86_processor.cache.l3.sets,
+				.partitions = x86_processor.cache.l3.partitions,
+				.line_size = x86_processor.cache.l3.line_size,
+				.flags = x86_processor.cache.l3.flags,
+				.processor_start = c * threads_per_l3,
+				.processor_count = threads_per_l3,
+			};
+		}
+		for (uint32_t t = 0; t < freebsd_topology.threads; t++) {
+			processors[t].cache.l3 = &l3[t / threads_per_l3];
+		}
+	}
+
+	if (l4_count != 0) {
+		l4 = calloc(l4_count, sizeof(struct cpuinfo_cache));
+		if (l4 == NULL) {
+			cpuinfo_log_error(
+				"failed to allocate %zu bytes for descriptions of "
+				"%" PRIu32 " L4 caches",
+				l4_count * sizeof(struct cpuinfo_cache),
+				l4_count);
+			goto cleanup;
+		}
+		for (uint32_t c = 0; c < l4_count; c++) {
+			l4[c] = (struct cpuinfo_cache){
+				.size = x86_processor.cache.l4.size,
+				.associativity = x86_processor.cache.l4.associativity,
+				.sets = x86_processor.cache.l4.sets,
+				.partitions = x86_processor.cache.l4.partitions,
+				.line_size = x86_processor.cache.l4.line_size,
+				.flags = x86_processor.cache.l4.flags,
+				.processor_start = c * threads_per_l4,
+				.processor_count = threads_per_l4,
+			};
+		}
+		for (uint32_t t = 0; t < freebsd_topology.threads; t++) {
+			processors[t].cache.l4 = &l4[t / threads_per_l4];
+		}
+	}
+
+	/* Commit changes */
+	cpuinfo_processors = processors;
+	cpuinfo_cores = cores;
+	cpuinfo_clusters = clusters;
+	cpuinfo_packages = packages;
+	cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
+	cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
+	cpuinfo_cache[cpuinfo_cache_level_2] = l2;
+	cpuinfo_cache[cpuinfo_cache_level_3] = l3;
+	cpuinfo_cache[cpuinfo_cache_level_4] = l4;
+
+	cpuinfo_processors_count = freebsd_topology.threads;
+	cpuinfo_cores_count = freebsd_topology.cores;
+	cpuinfo_clusters_count = freebsd_topology.packages;
+	cpuinfo_packages_count = freebsd_topology.packages;
+	cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count;
+	cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
+
+	cpuinfo_global_uarch = (struct cpuinfo_uarch_info){
+		.uarch = x86_processor.uarch,
+		.cpuid = x86_processor.cpuid,
+		.processor_count = freebsd_topology.threads,
+		.core_count = freebsd_topology.cores,
+	};
+
+	__sync_synchronize();
+
+	cpuinfo_is_initialized = true;
+
+	processors = NULL;
+	cores = NULL;
+	clusters = NULL;
+	packages = NULL;
+	l1i = l1d = l2 = l3 = l4 = NULL;
+
+cleanup:
+	free(processors);
+	free(cores);
+	free(clusters);
+	free(packages);
+	free(l1i);
+	free(l1d);
+	free(l2);
+	free(l3);
+	free(l4);
+}
--- a/3rdparty/cpuinfo/src/x86/info.c
+++ b/3rdparty/cpuinfo/src/x86/info.c
@@ -0,0 +1,18 @@
+#include <stdint.h>
+
+#include <cpuinfo.h>
+#include <x86/api.h>
+
+struct cpuinfo_x86_model_info cpuinfo_x86_decode_model_info(uint32_t eax) {
+	struct cpuinfo_x86_model_info model_info;
+	model_info.stepping = eax & 0xF;
+	model_info.base_model = (eax >> 4) & 0xF;
+	model_info.base_family = (eax >> 8) & 0xF;
+	model_info.processor_type = (eax >> 12) & 0x3;
+	model_info.extended_model = (eax >> 16) & 0xF;
+	model_info.extended_family = (eax >> 20) & 0xFF;
+
+	model_info.family = model_info.base_family + model_info.extended_family;
+	model_info.model = model_info.base_model + (model_info.extended_model << 4);
+	return model_info;
+}
--- a/3rdparty/cpuinfo/src/x86/init.c
+++ b/3rdparty/cpuinfo/src/x86/init.c
@@ -0,0 +1,78 @@
+#include <stdint.h>
+#include <string.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/common.h>
+#include <cpuinfo/log.h>
+#include <cpuinfo/utils.h>
+#include <x86/api.h>
+#include <x86/cpuid.h>
+
+struct cpuinfo_x86_isa cpuinfo_isa = {0};
+CPUINFO_INTERNAL uint32_t cpuinfo_x86_clflush_size = 0;
+
+void cpuinfo_x86_init_processor(struct cpuinfo_x86_processor* processor) {
+	const struct cpuid_regs leaf0 = cpuid(0);
+	const uint32_t max_base_index = leaf0.eax;
+	const enum cpuinfo_vendor vendor = processor->vendor =
+		cpuinfo_x86_decode_vendor(leaf0.ebx, leaf0.ecx, leaf0.edx);
+
+	const struct cpuid_regs leaf0x80000000 = cpuid(UINT32_C(0x80000000));
+	const uint32_t max_extended_index = leaf0x80000000.eax >= UINT32_C(0x80000000) ? leaf0x80000000.eax : 0;
+
+	const struct cpuid_regs leaf0x80000001 = max_extended_index >= UINT32_C(0x80000001)
+		? cpuid(UINT32_C(0x80000001))
+		: (struct cpuid_regs){0, 0, 0, 0};
+
+	if (max_base_index >= 1) {
+		const struct cpuid_regs leaf1 = cpuid(1);
+		processor->cpuid = leaf1.eax;
+
+		const struct cpuinfo_x86_model_info model_info = cpuinfo_x86_decode_model_info(leaf1.eax);
+		const enum cpuinfo_uarch uarch = processor->uarch = cpuinfo_x86_decode_uarch(vendor, &model_info);
+
+		cpuinfo_x86_clflush_size = ((leaf1.ebx >> 8) & UINT32_C(0x000000FF)) * 8;
+
+		/*
+		 * Topology extensions support:
+		 * - AMD: ecx[bit 22] in extended info (reserved bit on Intel
+		 * CPUs).
+		 */
+		const bool amd_topology_extensions = !!(leaf0x80000001.ecx & UINT32_C(0x00400000));
+
+		cpuinfo_x86_detect_cache(
+			max_base_index,
+			max_extended_index,
+			amd_topology_extensions,
+			vendor,
+			&model_info,
+			&processor->cache,
+			&processor->tlb.itlb_4KB,
+			&processor->tlb.itlb_2MB,
+			&processor->tlb.itlb_4MB,
+			&processor->tlb.dtlb0_4KB,
+			&processor->tlb.dtlb0_2MB,
+			&processor->tlb.dtlb0_4MB,
+			&processor->tlb.dtlb_4KB,
+			&processor->tlb.dtlb_2MB,
+			&processor->tlb.dtlb_4MB,
+			&processor->tlb.dtlb_1GB,
+			&processor->tlb.stlb2_4KB,
+			&processor->tlb.stlb2_2MB,
+			&processor->tlb.stlb2_1GB,
+			&processor->topology.core_bits_length);
+
+		cpuinfo_x86_detect_topology(max_base_index, max_extended_index, leaf1, &processor->topology);
+
+		cpuinfo_isa = cpuinfo_x86_detect_isa(
+			leaf1, leaf0x80000001, max_base_index, max_extended_index, vendor, uarch);
+	}
+	if (max_extended_index >= UINT32_C(0x80000004)) {
+		struct cpuid_regs brand_string[3];
+		for (uint32_t i = 0; i < 3; i++) {
+			brand_string[i] = cpuid(UINT32_C(0x80000002) + i);
+		}
+		memcpy(processor->brand_string, brand_string, sizeof(processor->brand_string));
+		cpuinfo_log_debug("raw CPUID brand string: \"%48s\"", processor->brand_string);
+	}
+}
--- a/3rdparty/cpuinfo/src/x86/isa.c
+++ b/3rdparty/cpuinfo/src/x86/isa.c
@@ -0,0 +1,832 @@
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <cpuinfo.h>
+#include <x86/cpuid.h>
+
+#if CPUINFO_ARCH_X86
+#ifdef _MSC_VER
+#pragma pack(push, 2)
+#endif
+struct fxsave_region {
+	uint16_t fpu_control_word;
+	uint16_t fpu_status_word;
+	uint16_t fpu_tag_word;
+	uint16_t fpu_opcode;
+	uint32_t fpu_instruction_pointer_offset;
+	uint32_t fpu_instruction_pointer_selector;
+	uint32_t fpu_operand_pointer_offset;
+	uint32_t fpu_operand_pointer_selector;
+	uint32_t mxcsr_state;
+	uint32_t mxcsr_mask;
+	uint64_t fpu_registers[8 * 2];
+	uint64_t xmm_registers[8 * 2];
+	uint64_t padding[28];
+}
+#ifndef _MSC_VER
+__attribute__((__aligned__(16), __packed__))
+#endif
+; /* end of fxsave_region structure */
+#ifdef _MSC_VER
+#pragma pack(pop, 2)
+#endif
+#endif
+
+struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
+	const struct cpuid_regs basic_info,
+	const struct cpuid_regs extended_info,
+	uint32_t max_base_index,
+	uint32_t max_extended_index,
+	enum cpuinfo_vendor vendor,
+	enum cpuinfo_uarch uarch) {
+	struct cpuinfo_x86_isa isa = {0};
+
+	const struct cpuid_regs structured_feature_info0 =
+		(max_base_index >= 7) ? cpuidex(7, 0) : (struct cpuid_regs){0, 0, 0, 0};
+	const struct cpuid_regs structured_feature_info1 =
+		(max_base_index >= 7) ? cpuidex(7, 1) : (struct cpuid_regs){0, 0, 0, 0};
+	const struct cpuid_regs structured_feature_info2 =
+		(max_base_index >= 7) ? cpuidex(0x24, 0) : (struct cpuid_regs){0, 0, 0, 0};
+
+	const uint32_t processor_capacity_info_index = UINT32_C(0x80000008);
+	const struct cpuid_regs processor_capacity_info = (max_extended_index >= processor_capacity_info_index)
+		? cpuid(processor_capacity_info_index)
+		: (struct cpuid_regs){0, 0, 0, 0};
+
+	bool avx_regs = false, avx512_regs = false, mpx_regs = false;
+	/*
+	 * OSXSAVE: Operating system enabled XSAVE instructions for application
+	 * use:
+	 * - Intel, AMD: ecx[bit 26] in basic info = XSAVE/XRSTOR instructions
+	 * supported by a chip.
+	 * - Intel, AMD: ecx[bit 27] in basic info = XSAVE/XRSTOR instructions
+	 * enabled by OS.
+	 */
+	const uint32_t osxsave_mask = UINT32_C(0x0C000000);
+	if ((basic_info.ecx & osxsave_mask) == osxsave_mask) {
+		uint64_t xcr0_valid_bits = 0;
+		if (max_base_index >= 0xD) {
+			const struct cpuid_regs regs = cpuidex(0xD, 0);
+			xcr0_valid_bits = ((uint64_t)regs.edx << 32) | regs.eax;
+		}
+
+		const uint64_t xfeature_enabled_mask = xgetbv(0);
+
+		/*
+		 * AVX registers:
+		 * - Intel, AMD: XFEATURE_ENABLED_MASK[bit 1] for low 128 bits
+		 * of ymm registers
+		 * - Intel, AMD: XFEATURE_ENABLED_MASK[bit 2] for high 128 bits
+		 * of ymm registers
+		 */
+		const uint64_t avx_regs_mask = UINT64_C(0x0000000000000006);
+		if ((xcr0_valid_bits & avx_regs_mask) == avx_regs_mask) {
+			avx_regs = (xfeature_enabled_mask & avx_regs_mask) == avx_regs_mask;
+		}
+
+		/*
+		 * AVX512 registers:
+		 * - Intel, AMD: XFEATURE_ENABLED_MASK[bit 1] for low 128 bits
+		 * of zmm registers
+		 * - Intel, AMD: XFEATURE_ENABLED_MASK[bit 2] for bits 128-255
+		 * of zmm registers
+		 * - Intel: XFEATURE_ENABLED_MASK[bit 5] for 8 64-bit OpMask
+		 * registers (k0-k7)
+		 * - Intel: XFEATURE_ENABLED_MASK[bit 6] for the high 256 bits
+		 * of the zmm registers zmm0-zmm15
+		 * - Intel: XFEATURE_ENABLED_MASK[bit 7] for the 512-bit zmm
+		 * registers zmm16-zmm31
+		 */
+		const uint64_t avx512_regs_mask = UINT64_C(0x00000000000000E6);
+		if ((xcr0_valid_bits & avx512_regs_mask) == avx512_regs_mask) {
+			avx512_regs = (xfeature_enabled_mask & avx512_regs_mask) == avx512_regs_mask;
+		}
+
+		/*
+		 * MPX registers:
+		 * - Intel: XFEATURE_ENABLED_MASK[bit 3] for BNDREGS
+		 * - Intel: XFEATURE_ENABLED_MASK[bit 4] for BNDCSR
+		 */
+		const uint64_t mpx_regs_mask = UINT64_C(0x0000000000000018);
+		if ((xcr0_valid_bits & mpx_regs_mask) == mpx_regs_mask) {
+			mpx_regs = (xfeature_enabled_mask & mpx_regs_mask) == mpx_regs_mask;
+		}
+	}
+
+#if CPUINFO_ARCH_X86
+	/*
+	 * RDTSC instruction:
+	 * - Intel, AMD: edx[bit 4] in basic info.
+	 * - AMD: edx[bit 4] in extended info (reserved bit on Intel CPUs).
+	 */
+	isa.rdtsc = !!((basic_info.edx | extended_info.edx) & UINT32_C(0x00000010));
+#endif
+
+	/*
+	 * SYSENTER/SYSEXIT instructions:
+	 * - Intel, AMD: edx[bit 11] in basic info.
+	 */
+	isa.sysenter = !!(basic_info.edx & UINT32_C(0x00000800));
+
+#if CPUINFO_ARCH_X86
+	/*
+	 * SYSCALL/SYSRET instructions:
+	 * - Intel, AMD: edx[bit 11] in extended info.
+	 */
+	isa.syscall = !!(extended_info.edx & UINT32_C(0x00000800));
+#endif
+
+	/*
+	 * RDMSR/WRMSR instructions:
+	 * - Intel, AMD: edx[bit 5] in basic info.
+	 * - AMD: edx[bit 5] in extended info (reserved bit on Intel CPUs).
+	 */
+	isa.msr = !!((basic_info.edx | extended_info.edx) & UINT32_C(0x00000020));
+
+	/*
+	 * CLZERO instruction:
+	 * - AMD: ebx[bit 0] in processor capacity info (reserved bit on Intel
+	 * CPUs).
+	 */
+	isa.clzero = !!(processor_capacity_info.ebx & UINT32_C(0x00000001));
+
+	/*
+	 * CLFLUSH instruction:
+	 * - Intel, AMD: edx[bit 19] in basic info.
+	 */
+	isa.clflush = !!(basic_info.edx & UINT32_C(0x00080000));
+
+	/*
+	 * CLFLUSHOPT instruction:
+	 * - Intel: ebx[bit 23] in structured feature info (ecx = 0).
+	 */
+	isa.clflushopt = !!(structured_feature_info0.ebx & UINT32_C(0x00800000));
+
+	/*
+	 * MWAIT/MONITOR instructions:
+	 * - Intel, AMD: ecx[bit 3] in basic info.
+	 */
+	isa.mwait = !!(basic_info.ecx & UINT32_C(0x00000008));
+
+	/*
+	 * MWAITX/MONITORX instructions:
+	 * - AMD: ecx[bit 29] in extended info.
+	 */
+	isa.mwaitx = !!(extended_info.ecx & UINT32_C(0x20000000));
+
+	/*
+	 * FXSAVE/FXRSTOR instructions:
+	 * - Intel, AMD: edx[bit 24] in basic info.
+	 * - AMD: edx[bit 24] in extended info (zero bit on Intel CPUs, EMMX bit
+	 * on Cyrix CPUs).
+	 */
+	switch (vendor) {
+#if CPUINFO_ARCH_X86
+		case cpuinfo_vendor_cyrix:
+		case cpuinfo_vendor_nsc:
+			isa.emmx = !!(extended_info.edx & UINT32_C(0x01000000));
+			break;
+#endif
+		default:
+			isa.fxsave = !!((basic_info.edx | extended_info.edx) & UINT32_C(0x01000000));
+			break;
+	}
+
+	/*
+	 * XSAVE/XRSTOR instructions:
+	 * - Intel, AMD: ecx[bit 26] in basic info.
+	 */
+	isa.xsave = !!(basic_info.ecx & UINT32_C(0x04000000));
+
+#if CPUINFO_ARCH_X86
+	/*
+	 * x87 FPU instructions:
+	 * - Intel, AMD: edx[bit 0] in basic info.
+	 * - AMD: edx[bit 0] in extended info (reserved bit on Intel CPUs).
+	 */
+	isa.fpu = !!((basic_info.edx | extended_info.edx) & UINT32_C(0x00000001));
+
+	/*
+	 * MMX instructions:
+	 * - Intel, AMD: edx[bit 23] in basic info.
+	 * - AMD: edx[bit 23] in extended info (zero bit on Intel CPUs).
+	 */
+	isa.mmx = !!((basic_info.edx | extended_info.edx) & UINT32_C(0x00800000));
+
+	/*
+	 * MMX+/Integer SSE instructions:
+	 * - Intel, AMD: edx[bit 25] in basic info (SSE feature flag).
+	 * - Pre-SSE AMD: edx[bit 22] in extended info (zero bit on Intel CPUs).
+	 */
+	isa.mmx_plus = !!((basic_info.edx & UINT32_C(0x02000000)) | (extended_info.edx & UINT32_C(0x00400000)));
+#endif
+
+	/*
+	 * 3dnow! instructions:
+	 * - AMD: edx[bit 31] of extended info (zero bit on Intel CPUs).
+	 */
+	isa.three_d_now = !!(extended_info.edx & UINT32_C(0x80000000));
+
+	/*
+	 * 3dnow!+ instructions:
+	 * - AMD: edx[bit 30] of extended info (zero bit on Intel CPUs).
+	 */
+	isa.three_d_now_plus = !!(extended_info.edx & UINT32_C(0x40000000));
+
+#if CPUINFO_ARCH_X86
+	/*
+	 * 3dnow! Geode instructions:
+	 * - No CPUID bit, detect as Geode microarchitecture + 3dnow!+ support
+	 */
+	isa.three_d_now_geode = isa.three_d_now_plus && (uarch == cpuinfo_uarch_geode);
+#endif
+
+	/*
+	 * PREFETCH instruction:
+	 * - AMD: ecx[bit 8] of extended info (one of 3dnow! prefetch
+	 * instructions). On Intel this bit indicates PREFETCHW, but not
+	 * PREFETCH support.
+	 * - AMD: edx[bit 31] of extended info (implied by 3dnow! support).
+	 * Reserved bit on Intel CPUs.
+	 * - AMD: edx[bit 30] of extended info (implied by 3dnow!+ support).
+	 * Reserved bit on Intel CPUs.
+	 * - AMD: edx[bit 29] of extended info (x86-64 support). Does not imply
+	 * PREFETCH support on non-AMD CPUs!!!
+	 */
+	switch (vendor) {
+		case cpuinfo_vendor_intel:
+			/*
+			 * Instruction is not documented in the manual,
+			 * and the 3dnow! prefetch CPUID bit indicates PREFETCHW
+			 * instruction.
+			 */
+			break;
+		case cpuinfo_vendor_amd:
+		case cpuinfo_vendor_hygon:
+			isa.prefetch =
+				!!((extended_info.ecx & UINT32_C(0x00000100)) |
+				   (extended_info.edx & UINT32_C(0xE0000000)));
+			break;
+		default:
+			/*
+			 * Conservatively assume, that 3dnow!/3dnow!+ support
+			 * implies PREFETCH support, but 3dnow! prefetch CPUID
+			 * bit follows Intel spec (PREFETCHW, but not PREFETCH).
+			 */
+			isa.prefetch = !!(extended_info.edx & UINT32_C(0xC0000000));
+			break;
+	}
+
+	/*
+	 * PREFETCHW instruction:
+	 * - AMD: ecx[bit 8] of extended info (one of 3dnow! prefetch
+	 * instructions).
+	 * - Intel: ecx[bit 8] of extended info (PREFETCHW instruction only).
+	 * - AMD: edx[bit 31] of extended info (implied by 3dnow! support).
+	 * Reserved bit on Intel CPUs.
+	 * - AMD: edx[bit 30] of extended info (implied by 3dnow!+ support).
+	 * Reserved bit on Intel CPUs.
+	 * - AMD: edx[bit 29] of extended info (x86-64 support). Does not imply
+	 * PREFETCHW support on non-AMD CPUs!!!
+	 */
+	switch (vendor) {
+		case cpuinfo_vendor_amd:
+		case cpuinfo_vendor_hygon:
+			isa.prefetchw =
+				!!((extended_info.ecx & UINT32_C(0x00000100)) |
+				   (extended_info.edx & UINT32_C(0xE0000000)));
+			break;
+		default:
+			/* Assume, that 3dnow!/3dnow!+ support implies PREFETCHW
+			 * support, not implications from x86-64 support */
+			isa.prefetchw =
+				!!((extended_info.ecx & UINT32_C(0x00000100)) |
+				   (extended_info.edx & UINT32_C(0xC0000000)));
+			break;
+	}
+
+	/*
+	 * PREFETCHWT1 instruction:
+	 * - Intel: ecx[bit 0] of structured feature info (ecx = 0). Reserved
+	 * bit on AMD.
+	 */
+	isa.prefetchwt1 = !!(structured_feature_info0.ecx & UINT32_C(0x00000001));
+
+#if CPUINFO_ARCH_X86
+	/*
+	 * SSE instructions:
+	 * - Intel, AMD: edx[bit 25] in basic info.
+	 */
+	isa.sse = !!(basic_info.edx & UINT32_C(0x02000000));
+
+	/*
+	 * SSE2 instructions:
+	 * - Intel, AMD: edx[bit 26] in basic info.
+	 */
+	isa.sse2 = !!(basic_info.edx & UINT32_C(0x04000000));
+#endif
+
+	/*
+	 * SSE3 instructions:
+	 * - Intel, AMD: ecx[bit 0] in basic info.
+	 */
+	isa.sse3 = !!(basic_info.ecx & UINT32_C(0x00000001));
+
+#if CPUINFO_ARCH_X86
+	/*
+	 * CPUs with x86-64 or SSE3 always support DAZ (denormals-as-zero) mode.
+	 * Only early Pentium 4 models may not support it.
+	 */
+	if (isa.sse3) {
+		isa.daz = true;
+	} else {
+		/* Detect DAZ support from masked MXCSR bits */
+		if (isa.sse && isa.fxsave) {
+			struct fxsave_region region = {0};
+#ifdef _MSC_VER
+			_fxsave(&region);
+#else
+			__asm__ __volatile__("fxsave %[region];" : [region] "+m"(region));
+#endif
+
+			/*
+			 * Denormals-as-zero (DAZ) flag:
+			 * - Intel, AMD: MXCSR[bit 6]
+			 */
+			isa.daz = !!(region.mxcsr_mask & UINT32_C(0x00000040));
+		}
+	}
+#endif
+
+	/*
+	 * SSSE3 instructions:
+	 * - Intel, AMD: ecx[bit 9] in basic info.
+	 */
+	isa.ssse3 = !!(basic_info.ecx & UINT32_C(0x0000200));
+
+	/*
+	 * SSE4.1 instructions:
+	 * - Intel, AMD: ecx[bit 19] in basic info.
+	 */
+	isa.sse4_1 = !!(basic_info.ecx & UINT32_C(0x00080000));
+
+	/*
+	 * SSE4.2 instructions:
+	 * - Intel: ecx[bit 20] in basic info (reserved bit on AMD CPUs).
+	 */
+	isa.sse4_2 = !!(basic_info.ecx & UINT32_C(0x00100000));
+
+	/*
+	 * SSE4A instructions:
+	 * - AMD: ecx[bit 6] in extended info (reserved bit on Intel CPUs).
+	 */
+	isa.sse4a = !!(extended_info.ecx & UINT32_C(0x00000040));
+
+	/*
+	 * Misaligned memory operands in SSE instructions:
+	 * - AMD: ecx[bit 7] in extended info (reserved bit on Intel CPUs).
+	 */
+	isa.misaligned_sse = !!(extended_info.ecx & UINT32_C(0x00000080));
+
+	/*
+	 * AVX instructions:
+	 * - Intel, AMD: ecx[bit 28] in basic info.
+	 */
+	isa.avx = avx_regs && !!(basic_info.ecx & UINT32_C(0x10000000));
+
+	/*
+	 * FMA3 instructions:
+	 * - Intel: ecx[bit 12] in basic info (reserved bit on AMD CPUs).
+	 */
+	isa.fma3 = avx_regs && !!(basic_info.ecx & UINT32_C(0x00001000));
+
+	/*
+	 * FMA4 instructions:
+	 * - AMD: ecx[bit 16] in extended info (reserved bit on Intel CPUs).
+	 */
+	isa.fma4 = avx_regs && !!(extended_info.ecx & UINT32_C(0x00010000));
+
+	/*
+	 * XOP instructions:
+	 * - AMD: ecx[bit 11] in extended info (reserved bit on Intel CPUs).
+	 */
+	isa.xop = avx_regs && !!(extended_info.ecx & UINT32_C(0x00000800));
+
+	/*
+	 * F16C instructions:
+	 * - Intel, AMD: ecx[bit 29] in basic info.
+	 */
+	isa.f16c = avx_regs && !!(basic_info.ecx & UINT32_C(0x20000000));
+
+	/*
+	 * AVX2 instructions:
+	 * - Intel: ebx[bit 5] in structured feature info (ecx = 0).
+	 */
+	isa.avx2 = avx_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00000020));
+
+	/*
+	 * AVX512F instructions:
+	 * - Intel: ebx[bit 16] in structured feature info (ecx = 0).
+	 */
+	isa.avx512f = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00010000));
+
+	/*
+	 * AVX 10.1 instructions: avx 10 isa supported.
+	 * - Intel: edx[bit 19] in structured feature info (ecx = 1).
+	 */
+	isa.avx10_1 = avx512_regs && !!(structured_feature_info1.edx & UINT32_C(0x00080000));
+
+	/*
+	 * AVX 10.2 instructions: avx 10 version information.
+	 * - Intel: ebx[bits 0-7] in structured features info (eax = 24 ecx = 0).
+	 */
+	isa.avx10_2 = ((structured_feature_info2.ebx & UINT32_C(0x000000FF)) >= 2) && isa.avx10_1;
+
+	/*
+	 * AVX512PF instructions:
+	 * - Intel: ebx[bit 26] in structured feature info (ecx = 0).
+	 */
+	isa.avx512pf = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x04000000));
+
+	/*
+	 * AVX512ER instructions:
+	 * - Intel: ebx[bit 27] in structured feature info (ecx = 0).
+	 */
+	isa.avx512er = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x08000000));
+
+	/*
+	 * AVX512CD instructions:
+	 * - Intel: ebx[bit 28] in structured feature info (ecx = 0).
+	 */
+	isa.avx512cd = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x10000000));
+
+	/*
+	 * AVX512DQ instructions:
+	 * - Intel: ebx[bit 17] in structured feature info (ecx = 0).
+	 */
+	isa.avx512dq = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00020000));
+
+	/*
+	 * AVX512BW instructions:
+	 * - Intel: ebx[bit 30] in structured feature info (ecx = 0).
+	 */
+	isa.avx512bw = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x40000000));
+
+	/*
+	 * AVX512VL instructions:
+	 * - Intel: ebx[bit 31] in structured feature info (ecx = 0).
+	 */
+	isa.avx512vl = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x80000000));
+
+	/*
+	 * AVX512IFMA instructions:
+	 * - Intel: ebx[bit 21] in structured feature info (ecx = 0).
+	 */
+	isa.avx512ifma = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00200000));
+
+	/*
+	 * AVX512VBMI instructions:
+	 * - Intel: ecx[bit 1] in structured feature info (ecx = 0).
+	 */
+	isa.avx512vbmi = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00000002));
+
+	/*
+	 * AVX512VBMI2 instructions:
+	 * - Intel: ecx[bit 6] in structured feature info (ecx = 0).
+	 */
+	isa.avx512vbmi2 = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00000040));
+
+	/*
+	 * AVX512BITALG instructions:
+	 * - Intel: ecx[bit 12] in structured feature info (ecx = 0).
+	 */
+	isa.avx512bitalg = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00001000));
+
+	/*
+	 * AVX512VPOPCNTDQ instructions:
+	 * - Intel: ecx[bit 14] in structured feature info (ecx = 0).
+	 */
+	isa.avx512vpopcntdq = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00004000));
+
+	/*
+	 * AVX512VNNI instructions:
+	 * - Intel: ecx[bit 11] in structured feature info (ecx = 0).
+	 */
+	isa.avx512vnni = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00000800));
+
+	/*
+	 * AVX512_4VNNIW instructions:
+	 * - Intel: edx[bit 2] in structured feature info (ecx = 0).
+	 */
+	isa.avx512_4vnniw = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00000004));
+
+	/*
+	 * AVX512_4FMAPS instructions:
+	 * - Intel: edx[bit 3] in structured feature info (ecx = 0).
+	 */
+	isa.avx512_4fmaps = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00000008));
+
+	/*
+	 * AVX512_VP2INTERSECT instructions:
+	 * - Intel: edx[bit 8] in structured feature info (ecx = 0).
+	 */
+	isa.avx512vp2intersect = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00000100));
+
+	/*
+	 * AVX512_FP16 instructions:
+	 * - Intel: edx[bit 23] in structured feature info (ecx = 0).
+	 */
+	isa.avx512fp16 = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00800000));
+
+	/*
+	 * AVX_VNNI instructions:
+	 * - Intel: eax[bit 4] in structured feature info (ecx = 1).
+	 */
+	isa.avxvnni = avx_regs && !!(structured_feature_info1.eax & UINT32_C(0x00000010));
+
+	/*
+	 * AVX512_BF16 instructions:
+	 * - Intel: eax[bit 5] in structured feature info (ecx = 1).
+	 */
+	isa.avx512bf16 = avx512_regs && !!(structured_feature_info1.eax & UINT32_C(0x00000020));
+
+	/*
+	 * AMX_BF16 instructions:
+	 * - Intel: edx[bit 22] in structured feature info (ecx = 0).
+	 */
+	isa.amx_bf16 = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00400000));
+
+	/*
+	 * AMX_TILE instructions:
+	 * - Intel: edx[bit 24] in structured feature info (ecx = 0).
+	 */
+	isa.amx_tile = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x01000000));
+
+	/*
+	 * AMX_INT8 instructions:
+	 * - Intel: edx[bit 25] in structured feature info (ecx = 0).
+	 */
+	isa.amx_int8 = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x02000000));
+
+	/*
+	 * AMX_FP16 instructions:
+	 * - Intel: eax[bit 21] in structured feature info (ecx = 1).
+	 */
+	isa.amx_fp16 = avx512_regs && !!(structured_feature_info1.eax & UINT32_C(0x00200000));
+
+	/*
+	 * AVX_VNNI_INT8 instructions:
+	 * - Intel: edx[bit 4] in structured feature info (ecx = 1).
+	 */
+	isa.avx_vnni_int8 = avx_regs && !!(structured_feature_info1.edx & UINT32_C(0x00000010));
+
+	/*
+	 * AVX_VNNI_INT16 instructions:
+	 * - Intel: edx[bit 10] in structured feature info (ecx = 1).
+	 */
+	isa.avx_vnni_int16 = avx_regs && !!(structured_feature_info1.edx & UINT32_C(0x00000400));
+
+	/*
+	 * AVX_NE_CONVERT instructions:
+	 * - Intel: edx[bit 5] in structured feature info (ecx = 1).
+	 */
+	isa.avx_ne_convert = avx_regs && !!(structured_feature_info1.edx & UINT32_C(0x00000020));
+
+	/*
+	 * HLE instructions:
+	 * - Intel: ebx[bit 4] in structured feature info (ecx = 0).
+	 */
+	isa.hle = !!(structured_feature_info0.ebx & UINT32_C(0x00000010));
+
+	/*
+	 * RTM instructions:
+	 * - Intel: ebx[bit 11] in structured feature info (ecx = 0).
+	 */
+	isa.rtm = !!(structured_feature_info0.ebx & UINT32_C(0x00000800));
+
+	/*
+	 * XTEST instruction:
+	 * - Intel: either HLE or RTM is supported
+	 */
+	isa.xtest = isa.hle || isa.rtm;
+
+	/*
+	 * MPX registers and instructions:
+	 * - Intel: ebx[bit 14] in structured feature info (ecx = 0).
+	 */
+	isa.mpx = mpx_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00004000));
+
+#if CPUINFO_ARCH_X86
+	/*
+	 * CMOV instructions:
+	 * - Intel, AMD: edx[bit 15] in basic info.
+	 * - AMD: edx[bit 15] in extended info (zero bit on Intel CPUs).
+	 */
+	isa.cmov = !!((basic_info.edx | extended_info.edx) & UINT32_C(0x00008000));
+
+	/*
+	 * CMPXCHG8B instruction:
+	 * - Intel, AMD: edx[bit 8] in basic info.
+	 * - AMD: edx[bit 8] in extended info (reserved bit on Intel CPUs).
+	 */
+	isa.cmpxchg8b = !!((basic_info.edx | extended_info.edx) & UINT32_C(0x00000100));
+#endif
+
+	/*
+	 * CMPXCHG16B instruction:
+	 * - Intel, AMD: ecx[bit 13] in basic info.
+	 */
+	isa.cmpxchg16b = !!(basic_info.ecx & UINT32_C(0x00002000));
+
+	/*
+	 * CLWB instruction:
+	 * - Intel: ebx[bit 24] in structured feature info (ecx = 0).
+	 */
+	isa.clwb = !!(structured_feature_info0.ebx & UINT32_C(0x01000000));
+
+	/*
+	 * MOVBE instruction:
+	 * - Intel: ecx[bit 22] in basic info.
+	 */
+	isa.movbe = !!(basic_info.ecx & UINT32_C(0x00400000));
+
+#if CPUINFO_ARCH_X86_64
+	/*
+	 * Some early x86-64 CPUs lack LAHF & SAHF instructions.
+	 * A special CPU feature bit must be checked to ensure their
+	 * availability:
+	 * - Intel, AMD: ecx[bit 0] in extended info.
+	 */
+	isa.lahf_sahf = !!(extended_info.ecx & UINT32_C(0x00000001));
+#endif
+
+	/*
+	 * RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE instructions.
+	 * - Intel: ebx[bit 0] in structured feature info (ecx = 0).
+	 */
+	isa.fs_gs_base = !!(structured_feature_info0.ebx & UINT32_C(0x00000001));
+
+	/*
+	 * LZCNT instruction:
+	 * - Intel, AMD: ecx[bit 5] in extended info.
+	 */
+	isa.lzcnt = !!(extended_info.ecx & UINT32_C(0x00000020));
+
+	/*
+	 * POPCNT instruction:
+	 * - Intel, AMD: ecx[bit 23] in basic info.
+	 */
+	isa.popcnt = !!(basic_info.ecx & UINT32_C(0x00800000));
+
+	/*
+	 * TBM instructions:
+	 * - AMD: ecx[bit 21] in extended info (reserved bit on Intel CPUs).
+	 */
+	isa.tbm = !!(extended_info.ecx & UINT32_C(0x00200000));
+
+	/*
+	 * BMI instructions:
+	 * - Intel, AMD: ebx[bit 3] in structured feature info (ecx = 0).
+	 */
+	isa.bmi = !!(structured_feature_info0.ebx & UINT32_C(0x00000008));
+
+	/*
+	 * BMI2 instructions:
+	 * - Intel: ebx[bit 8] in structured feature info (ecx = 0).
+	 */
+	isa.bmi2 = !!(structured_feature_info0.ebx & UINT32_C(0x00000100));
+
+	/*
+	 * ADCX/ADOX instructions:
+	 * - Intel: ebx[bit 19] in structured feature info (ecx = 0).
+	 */
+	isa.adx = !!(structured_feature_info0.ebx & UINT32_C(0x00080000));
+
+	/*
+	 * AES instructions:
+	 * - Intel: ecx[bit 25] in basic info (reserved bit on AMD CPUs).
+	 */
+	isa.aes = !!(basic_info.ecx & UINT32_C(0x02000000));
+
+	/*
+	 * VAES instructions:
+	 * - Intel: ecx[bit 9] in structured feature info (ecx = 0).
+	 */
+	isa.vaes = !!(structured_feature_info0.ecx & UINT32_C(0x00000200));
+
+	/*
+	 * PCLMULQDQ instruction:
+	 * - Intel: ecx[bit 1] in basic info (reserved bit on AMD CPUs).
+	 */
+	isa.pclmulqdq = !!(basic_info.ecx & UINT32_C(0x00000002));
+
+	/*
+	 * VPCLMULQDQ instruction:
+	 * - Intel: ecx[bit 10] in structured feature info (ecx = 0).
+	 */
+	isa.vpclmulqdq = !!(structured_feature_info0.ecx & UINT32_C(0x00000400));
+
+	/*
+	 * GFNI instructions:
+	 * - Intel: ecx[bit 8] in structured feature info (ecx = 0).
+	 */
+	isa.gfni = !!(structured_feature_info0.ecx & UINT32_C(0x00000100));
+
+	/*
+	 * RDRAND instruction:
+	 * - Intel: ecx[bit 30] in basic info (reserved bit on AMD CPUs).
+	 */
+	isa.rdrand = !!(basic_info.ecx & UINT32_C(0x40000000));
+
+	/*
+	 * RDSEED instruction:
+	 * - Intel: ebx[bit 18] in structured feature info (ecx = 0).
+	 */
+	isa.rdseed = !!(structured_feature_info0.ebx & UINT32_C(0x00040000));
+
+	/*
+	 * SHA instructions:
+	 * - Intel: ebx[bit 29] in structured feature info (ecx = 0).
+	 */
+	isa.sha = !!(structured_feature_info0.ebx & UINT32_C(0x20000000));
+
+	if (vendor == cpuinfo_vendor_via) {
+		const struct cpuid_regs padlock_meta_info = cpuid(UINT32_C(0xC0000000));
+		const uint32_t max_padlock_index = padlock_meta_info.eax;
+		const uint32_t padlock_info_index = UINT32_C(0xC0000001);
+		if (max_padlock_index >= padlock_info_index) {
+			const struct cpuid_regs padlock_info = cpuid(padlock_info_index);
+
+			/*
+			 * Padlock RNG extension:
+			 * - VIA: edx[bit 2] in padlock info = RNG exists on
+			 * chip flag.
+			 * - VIA: edx[bit 3] in padlock info = RNG enabled by
+			 * OS.
+			 */
+			const uint32_t padlock_rng_mask = UINT32_C(0x0000000C);
+			isa.rng = (padlock_info.edx & padlock_rng_mask) == padlock_rng_mask;
+
+			/*
+			 * Padlock ACE extension:
+			 * - VIA: edx[bit 6] in padlock info = ACE exists on
+			 * chip flag.
+			 * - VIA: edx[bit 7] in padlock info = ACE enabled by
+			 * OS.
+			 */
+			const uint32_t padlock_ace_mask = UINT32_C(0x000000C0);
+			isa.ace = (padlock_info.edx & padlock_ace_mask) == padlock_ace_mask;
+
+			/*
+			 * Padlock ACE 2 extension:
+			 * - VIA: edx[bit 8] in padlock info = ACE2 exists on
+			 * chip flag.
+			 * - VIA: edx[bit 9] in padlock info = ACE 2 enabled by
+			 * OS.
+			 */
+			const uint32_t padlock_ace2_mask = UINT32_C(0x00000300);
+			isa.ace2 = (padlock_info.edx & padlock_ace2_mask) == padlock_ace2_mask;
+
+			/*
+			 * Padlock PHE extension:
+			 * - VIA: edx[bit 10] in padlock info = PHE exists on
+			 * chip flag.
+			 * - VIA: edx[bit 11] in padlock info = PHE enabled by
+			 * OS.
+			 */
+			const uint32_t padlock_phe_mask = UINT32_C(0x00000C00);
+			isa.phe = (padlock_info.edx & padlock_phe_mask) == padlock_phe_mask;
+
+			/*
+			 * Padlock PMM extension:
+			 * - VIA: edx[bit 12] in padlock info = PMM exists on
+			 * chip flag.
+			 * - VIA: edx[bit 13] in padlock info = PMM enabled by
+			 * OS.
+			 */
+			const uint32_t padlock_pmm_mask = UINT32_C(0x00003000);
+			isa.pmm = (padlock_info.edx & padlock_pmm_mask) == padlock_pmm_mask;
+		}
+	}
+
+	/*
+	 * LWP instructions:
+	 * - AMD: ecx[bit 15] in extended info (reserved bit on Intel CPUs).
+	 */
+	isa.lwp = !!(extended_info.ecx & UINT32_C(0x00008000));
+
+	/*
+	 * RDTSCP instruction:
+	 * - Intel, AMD: edx[bit 27] in extended info.
+	 */
+	isa.rdtscp = !!(extended_info.edx & UINT32_C(0x08000000));
+
+	/*
+	 * RDPID instruction:
+	 * - Intel: ecx[bit 22] in structured feature info (ecx = 0).
+	 */
+	isa.rdpid = !!(structured_feature_info0.ecx & UINT32_C(0x00400000));
+
+	return isa;
+}
--- a/3rdparty/cpuinfo/src/x86/linux/api.h
+++ b/3rdparty/cpuinfo/src/x86/linux/api.h
@@ -0,0 +1,19 @@
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/common.h>
+#include <linux/api.h>
+#include <x86/api.h>
+
+struct cpuinfo_x86_linux_processor {
+	uint32_t apic_id;
+	uint32_t linux_id;
+	uint32_t flags;
+};
+
+CPUINFO_INTERNAL bool cpuinfo_x86_linux_parse_proc_cpuinfo(
+	uint32_t max_processors_count,
+	struct cpuinfo_x86_linux_processor processors[restrict static max_processors_count]);
--- a/3rdparty/cpuinfo/src/x86/linux/cpuinfo.c
+++ b/3rdparty/cpuinfo/src/x86/linux/cpuinfo.c
@@ -0,0 +1,220 @@
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <cpuinfo/log.h>
+#include <linux/api.h>
+#include <x86/linux/api.h>
+
+/*
+ * Size, in chars, of the on-stack buffer used for parsing lines of
+ * /proc/cpuinfo. This is also the limit on the length of a single line.
+ */
+#define BUFFER_SIZE 2048
+
+static uint32_t parse_processor_number(const char* processor_start, const char* processor_end) {
+	const size_t processor_length = (size_t)(processor_end - processor_start);
+
+	if (processor_length == 0) {
+		cpuinfo_log_warning("Processor number in /proc/cpuinfo is ignored: string is empty");
+		return 0;
+	}
+
+	uint32_t processor_number = 0;
+	for (const char* digit_ptr = processor_start; digit_ptr != processor_end; digit_ptr++) {
+		const uint32_t digit = (uint32_t)(*digit_ptr - '0');
+		if (digit > 10) {
+			cpuinfo_log_warning(
+				"non-decimal suffix %.*s in /proc/cpuinfo processor number is ignored",
+				(int)(processor_end - digit_ptr),
+				digit_ptr);
+			break;
+		}
+
+		processor_number = processor_number * 10 + digit;
+	}
+
+	return processor_number;
+}
+
+/*
+ * Decode APIC ID reported by Linux kernel for x86/x86-64 architecture.
+ * Example of APIC ID reported in /proc/cpuinfo:
+ *
+ *		apicid		: 2
+ */
+static void parse_apic_id(
+	const char* apic_start,
+	const char* apic_end,
+	struct cpuinfo_x86_linux_processor processor[restrict static 1]) {
+	uint32_t apic_id = 0;
+	for (const char* digit_ptr = apic_start; digit_ptr != apic_end; digit_ptr++) {
+		const uint32_t digit = *digit_ptr - '0';
+		if (digit >= 10) {
+			cpuinfo_log_warning(
+				"APIC ID %.*s in /proc/cpuinfo is ignored due to unexpected non-digit character '%c' at offset %zu",
+				(int)(apic_end - apic_start),
+				apic_start,
+				*digit_ptr,
+				(size_t)(digit_ptr - apic_start));
+			return;
+		}
+
+		apic_id = apic_id * 10 + digit;
+	}
+
+	processor->apic_id = apic_id;
+	processor->flags |= CPUINFO_LINUX_FLAG_APIC_ID;
+}
+
+struct proc_cpuinfo_parser_state {
+	uint32_t processor_index;
+	uint32_t max_processors_count;
+	struct cpuinfo_x86_linux_processor* processors;
+	struct cpuinfo_x86_linux_processor dummy_processor;
+};
+
+/*
+ *	Decode a single line of /proc/cpuinfo information.
+ *	Lines have format <words-with-spaces>[ ]*:[ ]<space-separated words>
+ */
+static bool parse_line(
+	const char* line_start,
+	const char* line_end,
+	void* context,
+	uint64_t line_number) {
+	struct proc_cpuinfo_parser_state* restrict state = context;
+	/* Empty line. Skip. */
+	if (line_start == line_end) {
+		return true;
+	}
+
+	/* Search for ':' on the line. */
+	const char* separator = line_start;
+	for (; separator != line_end; separator++) {
+		if (*separator == ':') {
+			break;
+		}
+	}
+	/* Skip line if no ':' separator was found. */
+	if (separator == line_end) {
+		cpuinfo_log_debug(
+			"Line %.*s in /proc/cpuinfo is ignored: key/value separator ':' not found",
+			(int)(line_end - line_start),
+			line_start);
+		return true;
+	}
+
+	/* Skip trailing spaces in key part. */
+	const char* key_end = separator;
+	for (; key_end != line_start; key_end--) {
+		if (key_end[-1] != ' ' && key_end[-1] != '\t') {
+			break;
+		}
+	}
+	/* Skip line if key contains nothing but spaces. */
+	if (key_end == line_start) {
+		cpuinfo_log_debug(
+			"Line %.*s in /proc/cpuinfo is ignored: key contains only spaces",
+			(int)(line_end - line_start),
+			line_start);
+		return true;
+	}
+
+	/* Skip leading spaces in value part. */
+	const char* value_start = separator + 1;
+	for (; value_start != line_end; value_start++) {
+		if (*value_start != ' ') {
+			break;
+		}
+	}
+	/* Value part contains nothing but spaces. Skip line. */
+	if (value_start == line_end) {
+		cpuinfo_log_debug(
+			"Line %.*s in /proc/cpuinfo is ignored: value contains only spaces",
+			(int)(line_end - line_start),
+			line_start);
+		return true;
+	}
+
+	/* Skip trailing spaces in value part (if any) */
+	const char* value_end = line_end;
+	for (; value_end != value_start; value_end--) {
+		if (value_end[-1] != ' ') {
+			break;
+		}
+	}
+
+	const uint32_t processor_index = state->processor_index;
+	const uint32_t max_processors_count = state->max_processors_count;
+	struct cpuinfo_x86_linux_processor* processors = state->processors;
+	struct cpuinfo_x86_linux_processor* processor = &state->dummy_processor;
+	if (processor_index < max_processors_count) {
+		processor = &processors[processor_index];
+	}
+
+	const size_t key_length = key_end - line_start;
+	switch (key_length) {
+		case 6:
+			if (memcmp(line_start, "apicid", key_length) == 0) {
+				parse_apic_id(value_start, value_end, processor);
+			} else {
+				goto unknown;
+			}
+			break;
+		case 9:
+			if (memcmp(line_start, "processor", key_length) == 0) {
+				const uint32_t new_processor_index = parse_processor_number(value_start, value_end);
+				if (new_processor_index < processor_index) {
+					/* Strange: decreasing processor number
+					 */
+					cpuinfo_log_warning(
+						"unexpectedly low processor number %" PRIu32
+						" following processor %" PRIu32 " in /proc/cpuinfo",
+						new_processor_index,
+						processor_index);
+				} else if (new_processor_index > processor_index + 1) {
+					/* Strange, but common: skipped
+					 * processor $(processor_index + 1) */
+					cpuinfo_log_warning(
+						"unexpectedly high processor number %" PRIu32
+						" following processor %" PRIu32 " in /proc/cpuinfo",
+						new_processor_index,
+						processor_index);
+				}
+				if (new_processor_index >= max_processors_count) {
+					/* Log and ignore processor */
+					cpuinfo_log_warning(
+						"processor %" PRIu32
+						" in /proc/cpuinfo is ignored: index exceeds system limit %" PRIu32,
+						new_processor_index,
+						max_processors_count - 1);
+				} else {
+					processors[new_processor_index].flags |= CPUINFO_LINUX_FLAG_PROC_CPUINFO;
+				}
+				state->processor_index = new_processor_index;
+				return true;
+			} else {
+				goto unknown;
+			}
+			break;
+		default:
+		unknown:
+			cpuinfo_log_debug("unknown /proc/cpuinfo key: %.*s", (int)key_length, line_start);
+	}
+	return true;
+}
+
+bool cpuinfo_x86_linux_parse_proc_cpuinfo(
+	uint32_t max_processors_count,
+	struct cpuinfo_x86_linux_processor processors[restrict static max_processors_count]) {
+	struct proc_cpuinfo_parser_state state = {
+		.processor_index = 0,
+		.max_processors_count = max_processors_count,
+		.processors = processors,
+	};
+	return cpuinfo_linux_parse_multiline_file(
+		"/proc/cpuinfo", BUFFER_SIZE, parse_line, &state);
+}
--- a/3rdparty/cpuinfo/src/x86/linux/init.c
+++ b/3rdparty/cpuinfo/src/x86/linux/init.c
@@ -0,0 +1,678 @@
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+#include <linux/api.h>
+#include <x86/api.h>
+#include <x86/linux/api.h>
+
+static inline uint32_t bit_mask(uint32_t bits) {
+	return (UINT32_C(1) << bits) - UINT32_C(1);
+}
+
+static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) {
+	return (bitfield & mask) == mask;
+}
+
+static inline uint32_t min(uint32_t a, uint32_t b) {
+	return a < b ? a : b;
+}
+
+static inline int cmp(uint32_t a, uint32_t b) {
+	return (a > b) - (a < b);
+}
+
+static int cmp_x86_linux_processor(const void* ptr_a, const void* ptr_b) {
+	const struct cpuinfo_x86_linux_processor* processor_a = (const struct cpuinfo_x86_linux_processor*)ptr_a;
+	const struct cpuinfo_x86_linux_processor* processor_b = (const struct cpuinfo_x86_linux_processor*)ptr_b;
+
+	/* Move usable processors towards the start of the array */
+	const bool usable_a = bitmask_all(processor_a->flags, CPUINFO_LINUX_FLAG_VALID);
+	const bool usable_b = bitmask_all(processor_b->flags, CPUINFO_LINUX_FLAG_VALID);
+	if (usable_a != usable_b) {
+		return (int)usable_b - (int)usable_a;
+	}
+
+	/* Compare based on APIC ID (i.e. processor 0 < processor 1) */
+	const uint32_t id_a = processor_a->apic_id;
+	const uint32_t id_b = processor_b->apic_id;
+	return cmp(id_a, id_b);
+}
+
+static void cpuinfo_x86_count_objects(
+	uint32_t linux_processors_count,
+	const struct cpuinfo_x86_linux_processor linux_processors[restrict static linux_processors_count],
+	const struct cpuinfo_x86_processor processor[restrict static 1],
+	uint32_t valid_processor_mask,
+	uint32_t llc_apic_bits,
+	uint32_t cores_count_ptr[restrict static 1],
+	uint32_t clusters_count_ptr[restrict static 1],
+	uint32_t packages_count_ptr[restrict static 1],
+	uint32_t l1i_count_ptr[restrict static 1],
+	uint32_t l1d_count_ptr[restrict static 1],
+	uint32_t l2_count_ptr[restrict static 1],
+	uint32_t l3_count_ptr[restrict static 1],
+	uint32_t l4_count_ptr[restrict static 1]) {
+	const uint32_t core_apic_mask =
+		~(bit_mask(processor->topology.thread_bits_length) << processor->topology.thread_bits_offset);
+	const uint32_t package_apic_mask = core_apic_mask &
+		~(bit_mask(processor->topology.core_bits_length) << processor->topology.core_bits_offset);
+	const uint32_t llc_apic_mask = ~bit_mask(llc_apic_bits);
+	const uint32_t cluster_apic_mask = package_apic_mask | llc_apic_mask;
+
+	uint32_t cores_count = 0, clusters_count = 0, packages_count = 0;
+	uint32_t l1i_count = 0, l1d_count = 0, l2_count = 0, l3_count = 0, l4_count = 0;
+	uint32_t last_core_id = UINT32_MAX, last_cluster_id = UINT32_MAX, last_package_id = UINT32_MAX;
+	uint32_t last_l1i_id = UINT32_MAX, last_l1d_id = UINT32_MAX;
+	uint32_t last_l2_id = UINT32_MAX, last_l3_id = UINT32_MAX, last_l4_id = UINT32_MAX;
+	for (uint32_t i = 0; i < linux_processors_count; i++) {
+		if (bitmask_all(linux_processors[i].flags, valid_processor_mask)) {
+			const uint32_t apic_id = linux_processors[i].apic_id;
+			cpuinfo_log_debug(
+				"APID ID %" PRIu32 ": system processor %" PRIu32,
+				apic_id,
+				linux_processors[i].linux_id);
+
+			/* All bits of APIC ID except thread ID mask */
+			const uint32_t core_id = apic_id & core_apic_mask;
+			if (core_id != last_core_id) {
+				last_core_id = core_id;
+				cores_count++;
+			}
+			/* All bits of APIC ID except thread ID and core ID
+			 * masks */
+			const uint32_t package_id = apic_id & package_apic_mask;
+			if (package_id != last_package_id) {
+				last_package_id = package_id;
+				packages_count++;
+			}
+			/* Bits of APIC ID which are part of either LLC or
+			 * package ID mask */
+			const uint32_t cluster_id = apic_id & cluster_apic_mask;
+			if (cluster_id != last_cluster_id) {
+				last_cluster_id = cluster_id;
+				clusters_count++;
+			}
+			if (processor->cache.l1i.size != 0) {
+				const uint32_t l1i_id = apic_id & ~bit_mask(processor->cache.l1i.apic_bits);
+				if (l1i_id != last_l1i_id) {
+					last_l1i_id = l1i_id;
+					l1i_count++;
+				}
+			}
+			if (processor->cache.l1d.size != 0) {
+				const uint32_t l1d_id = apic_id & ~bit_mask(processor->cache.l1d.apic_bits);
+				if (l1d_id != last_l1d_id) {
+					last_l1d_id = l1d_id;
+					l1d_count++;
+				}
+			}
+			if (processor->cache.l2.size != 0) {
+				const uint32_t l2_id = apic_id & ~bit_mask(processor->cache.l2.apic_bits);
+				if (l2_id != last_l2_id) {
+					last_l2_id = l2_id;
+					l2_count++;
+				}
+			}
+			if (processor->cache.l3.size != 0) {
+				const uint32_t l3_id = apic_id & ~bit_mask(processor->cache.l3.apic_bits);
+				if (l3_id != last_l3_id) {
+					last_l3_id = l3_id;
+					l3_count++;
+				}
+			}
+			if (processor->cache.l4.size != 0) {
+				const uint32_t l4_id = apic_id & ~bit_mask(processor->cache.l4.apic_bits);
+				if (l4_id != last_l4_id) {
+					last_l4_id = l4_id;
+					l4_count++;
+				}
+			}
+		}
+	}
+	*cores_count_ptr = cores_count;
+	*clusters_count_ptr = clusters_count;
+	*packages_count_ptr = packages_count;
+	*l1i_count_ptr = l1i_count;
+	*l1d_count_ptr = l1d_count;
+	*l2_count_ptr = l2_count;
+	*l3_count_ptr = l3_count;
+	*l4_count_ptr = l4_count;
+}
+
+void cpuinfo_x86_linux_init(void) {
+	struct cpuinfo_x86_linux_processor* x86_linux_processors = NULL;
+	struct cpuinfo_processor* processors = NULL;
+	struct cpuinfo_core* cores = NULL;
+	struct cpuinfo_cluster* clusters = NULL;
+	struct cpuinfo_package* packages = NULL;
+	const struct cpuinfo_processor** linux_cpu_to_processor_map = NULL;
+	const struct cpuinfo_core** linux_cpu_to_core_map = NULL;
+	struct cpuinfo_cache* l1i = NULL;
+	struct cpuinfo_cache* l1d = NULL;
+	struct cpuinfo_cache* l2 = NULL;
+	struct cpuinfo_cache* l3 = NULL;
+	struct cpuinfo_cache* l4 = NULL;
+
+	const uint32_t max_processors_count = cpuinfo_linux_get_max_processors_count();
+	cpuinfo_log_debug("system maximum processors count: %" PRIu32, max_processors_count);
+
+	const uint32_t max_possible_processors_count =
+		1 + cpuinfo_linux_get_max_possible_processor(max_processors_count);
+	cpuinfo_log_debug("maximum possible processors count: %" PRIu32, max_possible_processors_count);
+	const uint32_t max_present_processors_count = 1 + cpuinfo_linux_get_max_present_processor(max_processors_count);
+	cpuinfo_log_debug("maximum present processors count: %" PRIu32, max_present_processors_count);
+
+	uint32_t valid_processor_mask = 0;
+	uint32_t x86_linux_processors_count = max_processors_count;
+	if (max_present_processors_count != 0) {
+		x86_linux_processors_count = min(x86_linux_processors_count, max_present_processors_count);
+		valid_processor_mask = CPUINFO_LINUX_FLAG_PRESENT;
+	} else {
+		valid_processor_mask = CPUINFO_LINUX_FLAG_PROC_CPUINFO;
+	}
+	if (max_possible_processors_count != 0) {
+		x86_linux_processors_count = min(x86_linux_processors_count, max_possible_processors_count);
+		valid_processor_mask |= CPUINFO_LINUX_FLAG_POSSIBLE;
+	}
+
+	x86_linux_processors = calloc(x86_linux_processors_count, sizeof(struct cpuinfo_x86_linux_processor));
+	if (x86_linux_processors == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " x86 logical processors",
+			x86_linux_processors_count * sizeof(struct cpuinfo_x86_linux_processor),
+			x86_linux_processors_count);
+		return;
+	}
+
+	if (max_possible_processors_count != 0) {
+		cpuinfo_linux_detect_possible_processors(
+			x86_linux_processors_count,
+			&x86_linux_processors->flags,
+			sizeof(struct cpuinfo_x86_linux_processor),
+			CPUINFO_LINUX_FLAG_POSSIBLE);
+	}
+
+	if (max_present_processors_count != 0) {
+		cpuinfo_linux_detect_present_processors(
+			x86_linux_processors_count,
+			&x86_linux_processors->flags,
+			sizeof(struct cpuinfo_x86_linux_processor),
+			CPUINFO_LINUX_FLAG_PRESENT);
+	}
+
+	if (!cpuinfo_x86_linux_parse_proc_cpuinfo(x86_linux_processors_count, x86_linux_processors)) {
+		cpuinfo_log_error("failed to parse processor information from /proc/cpuinfo");
+		return;
+	}
+
+	for (uint32_t i = 0; i < x86_linux_processors_count; i++) {
+		if (bitmask_all(x86_linux_processors[i].flags, valid_processor_mask)) {
+			x86_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_VALID;
+		}
+	}
+
+	struct cpuinfo_x86_processor x86_processor;
+	memset(&x86_processor, 0, sizeof(x86_processor));
+	cpuinfo_x86_init_processor(&x86_processor);
+	char brand_string[48];
+	cpuinfo_x86_normalize_brand_string(x86_processor.brand_string, brand_string);
+
+	uint32_t processors_count = 0;
+	for (uint32_t i = 0; i < x86_linux_processors_count; i++) {
+		if (bitmask_all(x86_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			x86_linux_processors[i].linux_id = i;
+			processors_count++;
+		}
+	}
+
+	qsort(x86_linux_processors,
+	      x86_linux_processors_count,
+	      sizeof(struct cpuinfo_x86_linux_processor),
+	      cmp_x86_linux_processor);
+
+	processors = calloc(processors_count, sizeof(struct cpuinfo_processor));
+	if (processors == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " logical processors",
+			processors_count * sizeof(struct cpuinfo_processor),
+			processors_count);
+		goto cleanup;
+	}
+
+	uint32_t llc_apic_bits = 0;
+	if (x86_processor.cache.l4.size != 0) {
+		llc_apic_bits = x86_processor.cache.l4.apic_bits;
+	} else if (x86_processor.cache.l3.size != 0) {
+		llc_apic_bits = x86_processor.cache.l3.apic_bits;
+	} else if (x86_processor.cache.l2.size != 0) {
+		llc_apic_bits = x86_processor.cache.l2.apic_bits;
+	} else if (x86_processor.cache.l1d.size != 0) {
+		llc_apic_bits = x86_processor.cache.l1d.apic_bits;
+	}
+	uint32_t packages_count = 0, clusters_count = 0, cores_count = 0;
+	uint32_t l1i_count = 0, l1d_count = 0, l2_count = 0, l3_count = 0, l4_count = 0;
+	cpuinfo_x86_count_objects(
+		x86_linux_processors_count,
+		x86_linux_processors,
+		&x86_processor,
+		valid_processor_mask,
+		llc_apic_bits,
+		&cores_count,
+		&clusters_count,
+		&packages_count,
+		&l1i_count,
+		&l1d_count,
+		&l2_count,
+		&l3_count,
+		&l4_count);
+
+	cpuinfo_log_debug("detected %" PRIu32 " cores", cores_count);
+	cpuinfo_log_debug("detected %" PRIu32 " clusters", clusters_count);
+	cpuinfo_log_debug("detected %" PRIu32 " packages", packages_count);
+	cpuinfo_log_debug("detected %" PRIu32 " L1I caches", l1i_count);
+	cpuinfo_log_debug("detected %" PRIu32 " L1D caches", l1d_count);
+	cpuinfo_log_debug("detected %" PRIu32 " L2 caches", l2_count);
+	cpuinfo_log_debug("detected %" PRIu32 " L3 caches", l3_count);
+	cpuinfo_log_debug("detected %" PRIu32 " L4 caches", l4_count);
+
+	linux_cpu_to_processor_map = calloc(x86_linux_processors_count, sizeof(struct cpuinfo_processor*));
+	if (linux_cpu_to_processor_map == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for mapping entries of %" PRIu32 " logical processors",
+			x86_linux_processors_count * sizeof(struct cpuinfo_processor*),
+			x86_linux_processors_count);
+		goto cleanup;
+	}
+
+	linux_cpu_to_core_map = calloc(x86_linux_processors_count, sizeof(struct cpuinfo_core*));
+	if (linux_cpu_to_core_map == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for mapping entries of %" PRIu32 " cores",
+			x86_linux_processors_count * sizeof(struct cpuinfo_core*),
+			x86_linux_processors_count);
+		goto cleanup;
+	}
+
+	cores = calloc(cores_count, sizeof(struct cpuinfo_core));
+	if (cores == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " cores",
+			cores_count * sizeof(struct cpuinfo_core),
+			cores_count);
+		goto cleanup;
+	}
+
+	clusters = calloc(clusters_count, sizeof(struct cpuinfo_cluster));
+	if (clusters == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " core clusters",
+			clusters_count * sizeof(struct cpuinfo_cluster),
+			clusters_count);
+		goto cleanup;
+	}
+
+	packages = calloc(packages_count, sizeof(struct cpuinfo_package));
+	if (packages == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " physical packages",
+			packages_count * sizeof(struct cpuinfo_package),
+			packages_count);
+		goto cleanup;
+	}
+
+	if (l1i_count != 0) {
+		l1i = calloc(l1i_count, sizeof(struct cpuinfo_cache));
+		if (l1i == NULL) {
+			cpuinfo_log_error(
+				"failed to allocate %zu bytes for descriptions of %" PRIu32 " L1I caches",
+				l1i_count * sizeof(struct cpuinfo_cache),
+				l1i_count);
+			goto cleanup;
+		}
+	}
+	if (l1d_count != 0) {
+		l1d = calloc(l1d_count, sizeof(struct cpuinfo_cache));
+		if (l1d == NULL) {
+			cpuinfo_log_error(
+				"failed to allocate %zu bytes for descriptions of %" PRIu32 " L1D caches",
+				l1d_count * sizeof(struct cpuinfo_cache),
+				l1d_count);
+			goto cleanup;
+		}
+	}
+	if (l2_count != 0) {
+		l2 = calloc(l2_count, sizeof(struct cpuinfo_cache));
+		if (l2 == NULL) {
+			cpuinfo_log_error(
+				"failed to allocate %zu bytes for descriptions of %" PRIu32 " L2 caches",
+				l2_count * sizeof(struct cpuinfo_cache),
+				l2_count);
+			goto cleanup;
+		}
+	}
+	if (l3_count != 0) {
+		l3 = calloc(l3_count, sizeof(struct cpuinfo_cache));
+		if (l3 == NULL) {
+			cpuinfo_log_error(
+				"failed to allocate %zu bytes for descriptions of %" PRIu32 " L3 caches",
+				l3_count * sizeof(struct cpuinfo_cache),
+				l3_count);
+			goto cleanup;
+		}
+	}
+	if (l4_count != 0) {
+		l4 = calloc(l4_count, sizeof(struct cpuinfo_cache));
+		if (l4 == NULL) {
+			cpuinfo_log_error(
+				"failed to allocate %zu bytes for descriptions of %" PRIu32 " L4 caches",
+				l4_count * sizeof(struct cpuinfo_cache),
+				l4_count);
+			goto cleanup;
+		}
+	}
+
+	const uint32_t core_apic_mask =
+		~(bit_mask(x86_processor.topology.thread_bits_length) << x86_processor.topology.thread_bits_offset);
+	const uint32_t package_apic_mask = core_apic_mask &
+		~(bit_mask(x86_processor.topology.core_bits_length) << x86_processor.topology.core_bits_offset);
+	const uint32_t llc_apic_mask = ~bit_mask(llc_apic_bits);
+	const uint32_t cluster_apic_mask = package_apic_mask | llc_apic_mask;
+
+	uint32_t processor_index = UINT32_MAX, core_index = UINT32_MAX, cluster_index = UINT32_MAX,
+		 package_index = UINT32_MAX;
+	uint32_t l1i_index = UINT32_MAX, l1d_index = UINT32_MAX, l2_index = UINT32_MAX, l3_index = UINT32_MAX,
+		 l4_index = UINT32_MAX;
+	uint32_t cluster_id = 0, core_id = 0, smt_id = 0;
+	uint32_t last_apic_core_id = UINT32_MAX, last_apic_cluster_id = UINT32_MAX, last_apic_package_id = UINT32_MAX;
+	uint32_t last_l1i_id = UINT32_MAX, last_l1d_id = UINT32_MAX;
+	uint32_t last_l2_id = UINT32_MAX, last_l3_id = UINT32_MAX, last_l4_id = UINT32_MAX;
+	for (uint32_t i = 0; i < x86_linux_processors_count; i++) {
+		if (bitmask_all(x86_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			const uint32_t apic_id = x86_linux_processors[i].apic_id;
+			processor_index++;
+			smt_id++;
+
+			/* All bits of APIC ID except thread ID mask */
+			const uint32_t apid_core_id = apic_id & core_apic_mask;
+			if (apid_core_id != last_apic_core_id) {
+				core_index++;
+				core_id++;
+				smt_id = 0;
+			}
+			/* Bits of APIC ID which are part of either LLC or
+			 * package ID mask */
+			const uint32_t apic_cluster_id = apic_id & cluster_apic_mask;
+			if (apic_cluster_id != last_apic_cluster_id) {
+				cluster_index++;
+				cluster_id++;
+			}
+			/* All bits of APIC ID except thread ID and core ID
+			 * masks */
+			const uint32_t apic_package_id = apic_id & package_apic_mask;
+			if (apic_package_id != last_apic_package_id) {
+				package_index++;
+				core_id = 0;
+				cluster_id = 0;
+			}
+
+			/* Initialize logical processor object */
+			processors[processor_index].smt_id = smt_id;
+			processors[processor_index].core = cores + core_index;
+			processors[processor_index].cluster = clusters + cluster_index;
+			processors[processor_index].package = packages + package_index;
+			processors[processor_index].linux_id = x86_linux_processors[i].linux_id;
+			processors[processor_index].apic_id = x86_linux_processors[i].apic_id;
+
+			if (apid_core_id != last_apic_core_id) {
+				/* new core */
+				cores[core_index] = (struct cpuinfo_core){
+					.processor_start = processor_index,
+					.processor_count = 1,
+					.core_id = core_id,
+					.cluster = clusters + cluster_index,
+					.package = packages + package_index,
+					.vendor = x86_processor.vendor,
+					.uarch = x86_processor.uarch,
+					.cpuid = x86_processor.cpuid,
+				};
+				clusters[cluster_index].core_count += 1;
+				packages[package_index].core_count += 1;
+				last_apic_core_id = apid_core_id;
+			} else {
+				/* another logical processor on the same core */
+				cores[core_index].processor_count++;
+			}
+
+			if (apic_cluster_id != last_apic_cluster_id) {
+				/* new cluster */
+				clusters[cluster_index].processor_start = processor_index;
+				clusters[cluster_index].processor_count = 1;
+				clusters[cluster_index].core_start = core_index;
+				clusters[cluster_index].cluster_id = cluster_id;
+				clusters[cluster_index].package = packages + package_index;
+				clusters[cluster_index].vendor = x86_processor.vendor;
+				clusters[cluster_index].uarch = x86_processor.uarch;
+				clusters[cluster_index].cpuid = x86_processor.cpuid;
+				packages[package_index].cluster_count += 1;
+				last_apic_cluster_id = apic_cluster_id;
+			} else {
+				/* another logical processor on the same cluster
+				 */
+				clusters[cluster_index].processor_count++;
+			}
+
+			if (apic_package_id != last_apic_package_id) {
+				/* new package */
+				packages[package_index].processor_start = processor_index;
+				packages[package_index].processor_count = 1;
+				packages[package_index].core_start = core_index;
+				packages[package_index].cluster_start = cluster_index;
+				cpuinfo_x86_format_package_name(
+					x86_processor.vendor, brand_string, packages[package_index].name);
+				last_apic_package_id = apic_package_id;
+			} else {
+				/* another logical processor on the same package
+				 */
+				packages[package_index].processor_count++;
+			}
+
+			linux_cpu_to_processor_map[x86_linux_processors[i].linux_id] = processors + processor_index;
+			linux_cpu_to_core_map[x86_linux_processors[i].linux_id] = cores + core_index;
+
+			if (x86_processor.cache.l1i.size != 0) {
+				const uint32_t l1i_id = apic_id & ~bit_mask(x86_processor.cache.l1i.apic_bits);
+				processors[i].cache.l1i = &l1i[l1i_index];
+				if (l1i_id != last_l1i_id) {
+					/* new cache */
+					last_l1i_id = l1i_id;
+					l1i[++l1i_index] = (struct cpuinfo_cache){
+						.size = x86_processor.cache.l1i.size,
+						.associativity = x86_processor.cache.l1i.associativity,
+						.sets = x86_processor.cache.l1i.sets,
+						.partitions = x86_processor.cache.l1i.partitions,
+						.line_size = x86_processor.cache.l1i.line_size,
+						.flags = x86_processor.cache.l1i.flags,
+						.processor_start = processor_index,
+						.processor_count = 1,
+					};
+				} else {
+					/* another processor sharing the same
+					 * cache */
+					l1i[l1i_index].processor_count += 1;
+				}
+				processors[i].cache.l1i = &l1i[l1i_index];
+			} else {
+				/* reset cache id */
+				last_l1i_id = UINT32_MAX;
+			}
+			if (x86_processor.cache.l1d.size != 0) {
+				const uint32_t l1d_id = apic_id & ~bit_mask(x86_processor.cache.l1d.apic_bits);
+				processors[i].cache.l1d = &l1d[l1d_index];
+				if (l1d_id != last_l1d_id) {
+					/* new cache */
+					last_l1d_id = l1d_id;
+					l1d[++l1d_index] = (struct cpuinfo_cache){
+						.size = x86_processor.cache.l1d.size,
+						.associativity = x86_processor.cache.l1d.associativity,
+						.sets = x86_processor.cache.l1d.sets,
+						.partitions = x86_processor.cache.l1d.partitions,
+						.line_size = x86_processor.cache.l1d.line_size,
+						.flags = x86_processor.cache.l1d.flags,
+						.processor_start = processor_index,
+						.processor_count = 1,
+					};
+				} else {
+					/* another processor sharing the same
+					 * cache */
+					l1d[l1d_index].processor_count += 1;
+				}
+				processors[i].cache.l1d = &l1d[l1d_index];
+			} else {
+				/* reset cache id */
+				last_l1d_id = UINT32_MAX;
+			}
+			if (x86_processor.cache.l2.size != 0) {
+				const uint32_t l2_id = apic_id & ~bit_mask(x86_processor.cache.l2.apic_bits);
+				processors[i].cache.l2 = &l2[l2_index];
+				if (l2_id != last_l2_id) {
+					/* new cache */
+					last_l2_id = l2_id;
+					l2[++l2_index] = (struct cpuinfo_cache){
+						.size = x86_processor.cache.l2.size,
+						.associativity = x86_processor.cache.l2.associativity,
+						.sets = x86_processor.cache.l2.sets,
+						.partitions = x86_processor.cache.l2.partitions,
+						.line_size = x86_processor.cache.l2.line_size,
+						.flags = x86_processor.cache.l2.flags,
+						.processor_start = processor_index,
+						.processor_count = 1,
+					};
+				} else {
+					/* another processor sharing the same
+					 * cache */
+					l2[l2_index].processor_count += 1;
+				}
+				processors[i].cache.l2 = &l2[l2_index];
+			} else {
+				/* reset cache id */
+				last_l2_id = UINT32_MAX;
+			}
+			if (x86_processor.cache.l3.size != 0) {
+				const uint32_t l3_id = apic_id & ~bit_mask(x86_processor.cache.l3.apic_bits);
+				processors[i].cache.l3 = &l3[l3_index];
+				if (l3_id != last_l3_id) {
+					/* new cache */
+					last_l3_id = l3_id;
+					l3[++l3_index] = (struct cpuinfo_cache){
+						.size = x86_processor.cache.l3.size,
+						.associativity = x86_processor.cache.l3.associativity,
+						.sets = x86_processor.cache.l3.sets,
+						.partitions = x86_processor.cache.l3.partitions,
+						.line_size = x86_processor.cache.l3.line_size,
+						.flags = x86_processor.cache.l3.flags,
+						.processor_start = processor_index,
+						.processor_count = 1,
+					};
+				} else {
+					/* another processor sharing the same
+					 * cache */
+					l3[l3_index].processor_count += 1;
+				}
+				processors[i].cache.l3 = &l3[l3_index];
+			} else {
+				/* reset cache id */
+				last_l3_id = UINT32_MAX;
+			}
+			if (x86_processor.cache.l4.size != 0) {
+				const uint32_t l4_id = apic_id & ~bit_mask(x86_processor.cache.l4.apic_bits);
+				processors[i].cache.l4 = &l4[l4_index];
+				if (l4_id != last_l4_id) {
+					/* new cache */
+					last_l4_id = l4_id;
+					l4[++l4_index] = (struct cpuinfo_cache){
+						.size = x86_processor.cache.l4.size,
+						.associativity = x86_processor.cache.l4.associativity,
+						.sets = x86_processor.cache.l4.sets,
+						.partitions = x86_processor.cache.l4.partitions,
+						.line_size = x86_processor.cache.l4.line_size,
+						.flags = x86_processor.cache.l4.flags,
+						.processor_start = processor_index,
+						.processor_count = 1,
+					};
+				} else {
+					/* another processor sharing the same
+					 * cache */
+					l4[l4_index].processor_count += 1;
+				}
+				processors[i].cache.l4 = &l4[l4_index];
+			} else {
+				/* reset cache id */
+				last_l4_id = UINT32_MAX;
+			}
+		}
+	}
+
+	/* Commit changes */
+	cpuinfo_processors = processors;
+	cpuinfo_cores = cores;
+	cpuinfo_clusters = clusters;
+	cpuinfo_packages = packages;
+	cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
+	cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
+	cpuinfo_cache[cpuinfo_cache_level_2] = l2;
+	cpuinfo_cache[cpuinfo_cache_level_3] = l3;
+	cpuinfo_cache[cpuinfo_cache_level_4] = l4;
+
+	cpuinfo_processors_count = processors_count;
+	cpuinfo_cores_count = cores_count;
+	cpuinfo_clusters_count = clusters_count;
+	cpuinfo_packages_count = packages_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1i_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1d_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count;
+	cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
+
+	cpuinfo_global_uarch = (struct cpuinfo_uarch_info){
+		.uarch = x86_processor.uarch,
+		.cpuid = x86_processor.cpuid,
+		.processor_count = processors_count,
+		.core_count = cores_count,
+	};
+
+	cpuinfo_linux_cpu_max = x86_linux_processors_count;
+	cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map;
+	cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map;
+
+	__sync_synchronize();
+
+	cpuinfo_is_initialized = true;
+
+	processors = NULL;
+	cores = NULL;
+	clusters = NULL;
+	packages = NULL;
+	l1i = l1d = l2 = l3 = l4 = NULL;
+	linux_cpu_to_processor_map = NULL;
+	linux_cpu_to_core_map = NULL;
+
+cleanup:
+	free(x86_linux_processors);
+	free(processors);
+	free(cores);
+	free(clusters);
+	free(packages);
+	free(l1i);
+	free(l1d);
+	free(l2);
+	free(l3);
+	free(l4);
+	free(linux_cpu_to_processor_map);
+	free(linux_cpu_to_core_map);
+}
--- a/3rdparty/cpuinfo/src/x86/mach/init.c
+++ b/3rdparty/cpuinfo/src/x86/mach/init.c
@@ -0,0 +1,380 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+#include <mach/api.h>
+#include <x86/api.h>
+
+static inline uint32_t max(uint32_t a, uint32_t b) {
+	return a > b ? a : b;
+}
+
+static inline uint32_t bit_mask(uint32_t bits) {
+	return (UINT32_C(1) << bits) - UINT32_C(1);
+}
+
+void cpuinfo_x86_mach_init(void) {
+	struct cpuinfo_processor* processors = NULL;
+	struct cpuinfo_core* cores = NULL;
+	struct cpuinfo_cluster* clusters = NULL;
+	struct cpuinfo_package* packages = NULL;
+	struct cpuinfo_cache* l1i = NULL;
+	struct cpuinfo_cache* l1d = NULL;
+	struct cpuinfo_cache* l2 = NULL;
+	struct cpuinfo_cache* l3 = NULL;
+	struct cpuinfo_cache* l4 = NULL;
+
+	struct cpuinfo_mach_topology mach_topology = cpuinfo_mach_detect_topology();
+	processors = calloc(mach_topology.threads, sizeof(struct cpuinfo_processor));
+	if (processors == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " logical processors",
+			mach_topology.threads * sizeof(struct cpuinfo_processor),
+			mach_topology.threads);
+		goto cleanup;
+	}
+	cores = calloc(mach_topology.cores, sizeof(struct cpuinfo_core));
+	if (cores == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " cores",
+			mach_topology.cores * sizeof(struct cpuinfo_core),
+			mach_topology.cores);
+		goto cleanup;
+	}
+	/* On x86 cluster of cores is a physical package */
+	clusters = calloc(mach_topology.packages, sizeof(struct cpuinfo_cluster));
+	if (clusters == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " core clusters",
+			mach_topology.packages * sizeof(struct cpuinfo_cluster),
+			mach_topology.packages);
+		goto cleanup;
+	}
+	packages = calloc(mach_topology.packages, sizeof(struct cpuinfo_package));
+	if (packages == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " physical packages",
+			mach_topology.packages * sizeof(struct cpuinfo_package),
+			mach_topology.packages);
+		goto cleanup;
+	}
+
+	struct cpuinfo_x86_processor x86_processor;
+	memset(&x86_processor, 0, sizeof(x86_processor));
+	cpuinfo_x86_init_processor(&x86_processor);
+	char brand_string[48];
+	cpuinfo_x86_normalize_brand_string(x86_processor.brand_string, brand_string);
+
+	const uint32_t threads_per_core = mach_topology.threads / mach_topology.cores;
+	const uint32_t threads_per_package = mach_topology.threads / mach_topology.packages;
+	const uint32_t cores_per_package = mach_topology.cores / mach_topology.packages;
+	for (uint32_t i = 0; i < mach_topology.packages; i++) {
+		clusters[i] = (struct cpuinfo_cluster){
+			.processor_start = i * threads_per_package,
+			.processor_count = threads_per_package,
+			.core_start = i * cores_per_package,
+			.core_count = cores_per_package,
+			.cluster_id = 0,
+			.package = packages + i,
+			.vendor = x86_processor.vendor,
+			.uarch = x86_processor.uarch,
+			.cpuid = x86_processor.cpuid,
+		};
+		packages[i].processor_start = i * threads_per_package;
+		packages[i].processor_count = threads_per_package;
+		packages[i].core_start = i * cores_per_package;
+		packages[i].core_count = cores_per_package;
+		packages[i].cluster_start = i;
+		packages[i].cluster_count = 1;
+		cpuinfo_x86_format_package_name(x86_processor.vendor, brand_string, packages[i].name);
+	}
+	for (uint32_t i = 0; i < mach_topology.cores; i++) {
+		cores[i] = (struct cpuinfo_core){
+			.processor_start = i * threads_per_core,
+			.processor_count = threads_per_core,
+			.core_id = i % cores_per_package,
+			.cluster = clusters + i / cores_per_package,
+			.package = packages + i / cores_per_package,
+			.vendor = x86_processor.vendor,
+			.uarch = x86_processor.uarch,
+			.cpuid = x86_processor.cpuid,
+		};
+	}
+	for (uint32_t i = 0; i < mach_topology.threads; i++) {
+		const uint32_t smt_id = i % threads_per_core;
+		const uint32_t core_id = i / threads_per_core;
+		const uint32_t package_id = i / threads_per_package;
+
+		/* Reconstruct APIC IDs from topology components */
+		const uint32_t thread_bits_mask = bit_mask(x86_processor.topology.thread_bits_length);
+		const uint32_t core_bits_mask = bit_mask(x86_processor.topology.core_bits_length);
+		const uint32_t package_bits_offset =
+			max(x86_processor.topology.thread_bits_offset + x86_processor.topology.thread_bits_length,
+			    x86_processor.topology.core_bits_offset + x86_processor.topology.core_bits_length);
+		const uint32_t apic_id = ((smt_id & thread_bits_mask) << x86_processor.topology.thread_bits_offset) |
+			((core_id & core_bits_mask) << x86_processor.topology.core_bits_offset) |
+			(package_id << package_bits_offset);
+		cpuinfo_log_debug("reconstructed APIC ID 0x%08" PRIx32 " for thread %" PRIu32, apic_id, i);
+
+		processors[i].smt_id = smt_id;
+		processors[i].core = cores + i / threads_per_core;
+		processors[i].cluster = clusters + i / threads_per_package;
+		processors[i].package = packages + i / threads_per_package;
+		processors[i].apic_id = apic_id;
+	}
+
+	uint32_t threads_per_l1 = 0, l1_count = 0;
+	if (x86_processor.cache.l1i.size != 0 || x86_processor.cache.l1d.size != 0) {
+		threads_per_l1 = mach_topology.threads_per_cache[1];
+		if (threads_per_l1 == 0) {
+			/* Assume that threads on the same core share L1 */
+			threads_per_l1 = mach_topology.threads / mach_topology.cores;
+			cpuinfo_log_warning(
+				"Mach kernel did not report number of threads sharing L1 cache; assume %" PRIu32,
+				threads_per_l1);
+		}
+		l1_count = mach_topology.threads / threads_per_l1;
+		cpuinfo_log_debug("detected %" PRIu32 " L1 caches", l1_count);
+	}
+
+	uint32_t threads_per_l2 = 0, l2_count = 0;
+	if (x86_processor.cache.l2.size != 0) {
+		threads_per_l2 = mach_topology.threads_per_cache[2];
+		if (threads_per_l2 == 0) {
+			if (x86_processor.cache.l3.size != 0) {
+				/* This is not a last-level cache; assume that
+				 * threads on the same core share L2 */
+				threads_per_l2 = mach_topology.threads / mach_topology.cores;
+			} else {
+				/* This is a last-level cache; assume that
+				 * threads on the same package share L2 */
+				threads_per_l2 = mach_topology.threads / mach_topology.packages;
+			}
+			cpuinfo_log_warning(
+				"Mach kernel did not report number of threads sharing L2 cache; assume %" PRIu32,
+				threads_per_l2);
+		}
+		l2_count = mach_topology.threads / threads_per_l2;
+		cpuinfo_log_debug("detected %" PRIu32 " L2 caches", l2_count);
+	}
+
+	uint32_t threads_per_l3 = 0, l3_count = 0;
+	if (x86_processor.cache.l3.size != 0) {
+		threads_per_l3 = mach_topology.threads_per_cache[3];
+		if (threads_per_l3 == 0) {
+			/*
+			 * Assume that threads on the same package share L3.
+			 * However, is it not necessarily the last-level cache
+			 * (there may be L4 cache as well)
+			 */
+			threads_per_l3 = mach_topology.threads / mach_topology.packages;
+			cpuinfo_log_warning(
+				"Mach kernel did not report number of threads sharing L3 cache; assume %" PRIu32,
+				threads_per_l3);
+		}
+		l3_count = mach_topology.threads / threads_per_l3;
+		cpuinfo_log_debug("detected %" PRIu32 " L3 caches", l3_count);
+	}
+
+	uint32_t threads_per_l4 = 0, l4_count = 0;
+	if (x86_processor.cache.l4.size != 0) {
+		threads_per_l4 = mach_topology.threads_per_cache[4];
+		if (threads_per_l4 == 0) {
+			/*
+			 * Assume that all threads share this L4.
+			 * As of now, L4 cache exists only on notebook x86 CPUs,
+			 * which are single-package, but multi-socket systems
+			 * could have shared L4 (like on IBM POWER8).
+			 */
+			threads_per_l4 = mach_topology.threads;
+			cpuinfo_log_warning(
+				"Mach kernel did not report number of threads sharing L4 cache; assume %" PRIu32,
+				threads_per_l4);
+		}
+		l4_count = mach_topology.threads / threads_per_l4;
+		cpuinfo_log_debug("detected %" PRIu32 " L4 caches", l4_count);
+	}
+
+	if (x86_processor.cache.l1i.size != 0) {
+		l1i = calloc(l1_count, sizeof(struct cpuinfo_cache));
+		if (l1i == NULL) {
+			cpuinfo_log_error(
+				"failed to allocate %zu bytes for descriptions of %" PRIu32 " L1I caches",
+				l1_count * sizeof(struct cpuinfo_cache),
+				l1_count);
+			return;
+		}
+		for (uint32_t c = 0; c < l1_count; c++) {
+			l1i[c] = (struct cpuinfo_cache){
+				.size = x86_processor.cache.l1i.size,
+				.associativity = x86_processor.cache.l1i.associativity,
+				.sets = x86_processor.cache.l1i.sets,
+				.partitions = x86_processor.cache.l1i.partitions,
+				.line_size = x86_processor.cache.l1i.line_size,
+				.flags = x86_processor.cache.l1i.flags,
+				.processor_start = c * threads_per_l1,
+				.processor_count = threads_per_l1,
+			};
+		}
+		for (uint32_t t = 0; t < mach_topology.threads; t++) {
+			processors[t].cache.l1i = &l1i[t / threads_per_l1];
+		}
+	}
+
+	if (x86_processor.cache.l1d.size != 0) {
+		l1d = calloc(l1_count, sizeof(struct cpuinfo_cache));
+		if (l1d == NULL) {
+			cpuinfo_log_error(
+				"failed to allocate %zu bytes for descriptions of %" PRIu32 " L1D caches",
+				l1_count * sizeof(struct cpuinfo_cache),
+				l1_count);
+			return;
+		}
+		for (uint32_t c = 0; c < l1_count; c++) {
+			l1d[c] = (struct cpuinfo_cache){
+				.size = x86_processor.cache.l1d.size,
+				.associativity = x86_processor.cache.l1d.associativity,
+				.sets = x86_processor.cache.l1d.sets,
+				.partitions = x86_processor.cache.l1d.partitions,
+				.line_size = x86_processor.cache.l1d.line_size,
+				.flags = x86_processor.cache.l1d.flags,
+				.processor_start = c * threads_per_l1,
+				.processor_count = threads_per_l1,
+			};
+		}
+		for (uint32_t t = 0; t < mach_topology.threads; t++) {
+			processors[t].cache.l1d = &l1d[t / threads_per_l1];
+		}
+	}
+
+	if (l2_count != 0) {
+		l2 = calloc(l2_count, sizeof(struct cpuinfo_cache));
+		if (l2 == NULL) {
+			cpuinfo_log_error(
+				"failed to allocate %zu bytes for descriptions of %" PRIu32 " L2 caches",
+				l2_count * sizeof(struct cpuinfo_cache),
+				l2_count);
+			return;
+		}
+		for (uint32_t c = 0; c < l2_count; c++) {
+			l2[c] = (struct cpuinfo_cache){
+				.size = x86_processor.cache.l2.size,
+				.associativity = x86_processor.cache.l2.associativity,
+				.sets = x86_processor.cache.l2.sets,
+				.partitions = x86_processor.cache.l2.partitions,
+				.line_size = x86_processor.cache.l2.line_size,
+				.flags = x86_processor.cache.l2.flags,
+				.processor_start = c * threads_per_l2,
+				.processor_count = threads_per_l2,
+			};
+		}
+		for (uint32_t t = 0; t < mach_topology.threads; t++) {
+			processors[t].cache.l2 = &l2[t / threads_per_l2];
+		}
+	}
+
+	if (l3_count != 0) {
+		l3 = calloc(l3_count, sizeof(struct cpuinfo_cache));
+		if (l3 == NULL) {
+			cpuinfo_log_error(
+				"failed to allocate %zu bytes for descriptions of %" PRIu32 " L3 caches",
+				l3_count * sizeof(struct cpuinfo_cache),
+				l3_count);
+			return;
+		}
+		for (uint32_t c = 0; c < l3_count; c++) {
+			l3[c] = (struct cpuinfo_cache){
+				.size = x86_processor.cache.l3.size,
+				.associativity = x86_processor.cache.l3.associativity,
+				.sets = x86_processor.cache.l3.sets,
+				.partitions = x86_processor.cache.l3.partitions,
+				.line_size = x86_processor.cache.l3.line_size,
+				.flags = x86_processor.cache.l3.flags,
+				.processor_start = c * threads_per_l3,
+				.processor_count = threads_per_l3,
+			};
+		}
+		for (uint32_t t = 0; t < mach_topology.threads; t++) {
+			processors[t].cache.l3 = &l3[t / threads_per_l3];
+		}
+	}
+
+	if (l4_count != 0) {
+		l4 = calloc(l4_count, sizeof(struct cpuinfo_cache));
+		if (l4 == NULL) {
+			cpuinfo_log_error(
+				"failed to allocate %zu bytes for descriptions of %" PRIu32 " L4 caches",
+				l4_count * sizeof(struct cpuinfo_cache),
+				l4_count);
+			return;
+		}
+		for (uint32_t c = 0; c < l4_count; c++) {
+			l4[c] = (struct cpuinfo_cache){
+				.size = x86_processor.cache.l4.size,
+				.associativity = x86_processor.cache.l4.associativity,
+				.sets = x86_processor.cache.l4.sets,
+				.partitions = x86_processor.cache.l4.partitions,
+				.line_size = x86_processor.cache.l4.line_size,
+				.flags = x86_processor.cache.l4.flags,
+				.processor_start = c * threads_per_l4,
+				.processor_count = threads_per_l4,
+			};
+		}
+		for (uint32_t t = 0; t < mach_topology.threads; t++) {
+			processors[t].cache.l4 = &l4[t / threads_per_l4];
+		}
+	}
+
+	/* Commit changes */
+	cpuinfo_processors = processors;
+	cpuinfo_cores = cores;
+	cpuinfo_clusters = clusters;
+	cpuinfo_packages = packages;
+	cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
+	cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
+	cpuinfo_cache[cpuinfo_cache_level_2] = l2;
+	cpuinfo_cache[cpuinfo_cache_level_3] = l3;
+	cpuinfo_cache[cpuinfo_cache_level_4] = l4;
+
+	cpuinfo_processors_count = mach_topology.threads;
+	cpuinfo_cores_count = mach_topology.cores;
+	cpuinfo_clusters_count = mach_topology.packages;
+	cpuinfo_packages_count = mach_topology.packages;
+	cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count;
+	cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
+
+	cpuinfo_global_uarch = (struct cpuinfo_uarch_info){
+		.uarch = x86_processor.uarch,
+		.cpuid = x86_processor.cpuid,
+		.processor_count = mach_topology.threads,
+		.core_count = mach_topology.cores,
+	};
+
+	__sync_synchronize();
+
+	cpuinfo_is_initialized = true;
+
+	processors = NULL;
+	cores = NULL;
+	clusters = NULL;
+	packages = NULL;
+	l1i = l1d = l2 = l3 = l4 = NULL;
+
+cleanup:
+	free(processors);
+	free(cores);
+	free(clusters);
+	free(packages);
+	free(l1i);
+	free(l1d);
+	free(l2);
+	free(l3);
+	free(l4);
+}
--- a/3rdparty/cpuinfo/src/x86/mockcpuid.c
+++ b/3rdparty/cpuinfo/src/x86/mockcpuid.c
@@ -0,0 +1,68 @@
+#include <stddef.h>
+#include <stdint.h>
+
+#if !CPUINFO_MOCK
+#error This file should be built only in mock mode
+#endif
+
+#include <cpuinfo-mock.h>
+
+static struct cpuinfo_mock_cpuid* cpuinfo_mock_cpuid_data = NULL;
+static uint32_t cpuinfo_mock_cpuid_entries = 0;
+static uint32_t cpuinfo_mock_cpuid_leaf4_iteration = 0;
+
+void CPUINFO_ABI cpuinfo_mock_set_cpuid(struct cpuinfo_mock_cpuid* dump, size_t entries) {
+	cpuinfo_mock_cpuid_data = dump;
+	cpuinfo_mock_cpuid_entries = entries;
+};
+
+void CPUINFO_ABI cpuinfo_mock_get_cpuid(uint32_t eax, uint32_t regs[4]) {
+	if (eax != 4) {
+		cpuinfo_mock_cpuid_leaf4_iteration = 0;
+	}
+	if (cpuinfo_mock_cpuid_data != NULL && cpuinfo_mock_cpuid_entries != 0) {
+		if (eax == 4) {
+			uint32_t skip_entries = cpuinfo_mock_cpuid_leaf4_iteration;
+			for (uint32_t i = 0; i < cpuinfo_mock_cpuid_entries; i++) {
+				if (eax == cpuinfo_mock_cpuid_data[i].input_eax) {
+					if (skip_entries-- == 0) {
+						regs[0] = cpuinfo_mock_cpuid_data[i].eax;
+						regs[1] = cpuinfo_mock_cpuid_data[i].ebx;
+						regs[2] = cpuinfo_mock_cpuid_data[i].ecx;
+						regs[3] = cpuinfo_mock_cpuid_data[i].edx;
+						cpuinfo_mock_cpuid_leaf4_iteration++;
+						return;
+					}
+				}
+			}
+		} else {
+			for (uint32_t i = 0; i < cpuinfo_mock_cpuid_entries; i++) {
+				if (eax == cpuinfo_mock_cpuid_data[i].input_eax) {
+					regs[0] = cpuinfo_mock_cpuid_data[i].eax;
+					regs[1] = cpuinfo_mock_cpuid_data[i].ebx;
+					regs[2] = cpuinfo_mock_cpuid_data[i].ecx;
+					regs[3] = cpuinfo_mock_cpuid_data[i].edx;
+					return;
+				}
+			}
+		}
+	}
+	regs[0] = regs[1] = regs[2] = regs[3] = 0;
+}
+
+void CPUINFO_ABI cpuinfo_mock_get_cpuidex(uint32_t eax, uint32_t ecx, uint32_t regs[4]) {
+	cpuinfo_mock_cpuid_leaf4_iteration = 0;
+	if (cpuinfo_mock_cpuid_data != NULL && cpuinfo_mock_cpuid_entries != 0) {
+		for (uint32_t i = 0; i < cpuinfo_mock_cpuid_entries; i++) {
+			if (eax == cpuinfo_mock_cpuid_data[i].input_eax &&
+			    ecx == cpuinfo_mock_cpuid_data[i].input_ecx) {
+				regs[0] = cpuinfo_mock_cpuid_data[i].eax;
+				regs[1] = cpuinfo_mock_cpuid_data[i].ebx;
+				regs[2] = cpuinfo_mock_cpuid_data[i].ecx;
+				regs[3] = cpuinfo_mock_cpuid_data[i].edx;
+				return;
+			}
+		}
+	}
+	regs[0] = regs[1] = regs[2] = regs[3] = 0;
+}
--- a/3rdparty/cpuinfo/src/x86/name.c
+++ b/3rdparty/cpuinfo/src/x86/name.c
@@ -0,0 +1,754 @@
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/common.h>
+#include <x86/api.h>
+
+/* The state of the parser to be preserved between parsing different tokens. */
+struct parser_state {
+	/*
+	 * Pointer to the start of the previous token if it is "model".
+	 * NULL if previous token is not "model".
+	 */
+	char* context_model;
+	/*
+	 * Pointer to the start of the previous token if it is a
+	 * single-uppercase-letter token. NULL if previous token is anything
+	 * different.
+	 */
+	char* context_upper_letter;
+	/*
+	 * Pointer to the start of the previous token if it is "Dual".
+	 * NULL if previous token is not "Dual".
+	 */
+	char* context_dual;
+	/*
+	 * Pointer to the start of the previous token if it is "Core",
+	 * "Dual-Core", "QuadCore", etc. NULL if previous token is anything
+	 * different.
+	 */
+	char* context_core;
+	/*
+	 * Pointer to the start of the previous token if it is "Eng" or
+	 * "Engineering", etc. NULL if previous token is anything different.
+	 */
+	char* context_engineering;
+	/*
+	 * Pointer to the '@' symbol in the brand string (separates frequency
+	 * specification). NULL if there is no '@' symbol.
+	 */
+	char* frequency_separator;
+	/* Indicates whether the brand string (after transformations) contains
+	 * frequency. */
+	bool frequency_token;
+	/* Indicates whether the processor is of Xeon family (contains "Xeon"
+	 * substring). */
+	bool xeon;
+	/* Indicates whether the processor model number was already parsed. */
+	bool parsed_model_number;
+	/* Indicates whether the processor is an engineering sample (contains
+	 * "Engineering Sample" or "Eng Sample" substrings). */
+	bool engineering_sample;
+};
+
+/** @brief	Resets information about the previous token. Keeps all other
+ * state information. */
+static void reset_context(struct parser_state* state) {
+	state->context_model = NULL;
+	state->context_upper_letter = NULL;
+	state->context_dual = NULL;
+	state->context_core = NULL;
+}
+
+/**
+ * @brief	Overwrites the supplied string with space characters if it
+ * exactly matches the given string.
+ * @param	string	The string to be compared against other string, and
+ * erased in case of matching.
+ * @param	length	The length of the two string to be compared against each
+ * other.
+ * @param	target	The string to compare against.
+ * @retval	true	If the two strings match and the first supplied string
+ * was erased (overwritten with space characters).
+ * @retval	false	If the two strings are different and the first supplied
+ * string remained unchanged.
+ */
+static inline bool erase_matching(char* string, size_t length, const char* target) {
+	const bool match = memcmp(string, target, length) == 0;
+	if (match) {
+		memset(string, ' ', length);
+	}
+	return match;
+}
+
+/**
+ * @brief	Checks if the supplied ASCII character is an uppercase latin
+ * letter.
+ * @param	character	The character to analyse.
+ * @retval	true	If the supplied character is an uppercase latin letter
+ * ('A' to 'Z').
+ * @retval	false	If the supplied character is anything different.
+ */
+static inline bool is_upper_letter(char character) {
+	return (uint32_t)(character - 'A') <= (uint32_t)('Z' - 'A');
+}
+
+/**
+ * @brief	Checks if the supplied ASCII character is a digit.
+ * @param	character	The character to analyse.
+ * @retval	true	If the supplied character is a digit ('0' to '9').
+ * @retval	false	If the supplied character is anything different.
+ */
+static inline bool is_digit(char character) {
+	return (uint32_t)(character - '0') < UINT32_C(10);
+}
+
+static inline bool is_zero_number(const char* token_start, const char* token_end) {
+	for (const char* char_ptr = token_start; char_ptr != token_end; char_ptr++) {
+		if (*char_ptr != '0') {
+			return false;
+		}
+	}
+	return true;
+}
+
+static inline bool is_space(const char* token_start, const char* token_end) {
+	for (const char* char_ptr = token_start; char_ptr != token_end; char_ptr++) {
+		if (*char_ptr != ' ') {
+			return false;
+		}
+	}
+	return true;
+}
+
+static inline bool is_number(const char* token_start, const char* token_end) {
+	for (const char* char_ptr = token_start; char_ptr != token_end; char_ptr++) {
+		if (!is_digit(*char_ptr)) {
+			return false;
+		}
+	}
+	return true;
+}
+
+static inline bool is_model_number(const char* token_start, const char* token_end) {
+	for (const char* char_ptr = token_start + 1; char_ptr < token_end; char_ptr++) {
+		if (is_digit(char_ptr[-1]) && is_digit(char_ptr[0])) {
+			return true;
+		}
+	}
+	return false;
+}
+
+static inline bool is_frequency(const char* token_start, const char* token_end) {
+	const size_t token_length = (size_t)(token_end - token_start);
+	if (token_length > 3 && token_end[-2] == 'H' && token_end[-1] == 'z') {
+		switch (token_end[-3]) {
+			case 'K':
+			case 'M':
+			case 'G':
+				return true;
+		}
+	}
+	return false;
+}
+
+/**
+ * @warning	Input and output tokens can overlap
+ */
+static inline char* move_token(const char* token_start, const char* token_end, char* output_ptr) {
+	const size_t token_length = (size_t)(token_end - token_start);
+	memmove(output_ptr, token_start, token_length);
+	return output_ptr + token_length;
+}
+
+static bool transform_token(char* token_start, char* token_end, struct parser_state* state) {
+	const struct parser_state previousState = *state;
+	reset_context(state);
+
+	size_t token_length = (size_t)(token_end - token_start);
+
+	if (state->frequency_separator != NULL) {
+		if (token_start > state->frequency_separator) {
+			if (state->parsed_model_number) {
+				memset(token_start, ' ', token_length);
+			}
+		}
+	}
+
+	/* Early AMD and Cyrix processors have "tm" suffix for trademark, e.g.
+	 *   "AMD-K6tm w/ multimedia extensions"
+	 *   "Cyrix MediaGXtm MMXtm Enhanced"
+	 */
+	if (token_length > 2) {
+		const char context_char = token_end[-3];
+		if (is_digit(context_char) || is_upper_letter(context_char)) {
+			if (erase_matching(token_end - 2, 2, "tm")) {
+				token_end -= 2;
+				token_length -= 2;
+			}
+		}
+	}
+	if (token_length > 4) {
+		/* Some early AMD CPUs have "AMD-" at the beginning, e.g.
+		 *   "AMD-K5(tm) Processor"
+		 *   "AMD-K6tm w/ multimedia extensions"
+		 *   "AMD-K6(tm) 3D+ Processor"
+		 *   "AMD-K6(tm)-III Processor"
+		 */
+		if (erase_matching(token_start, 4, "AMD-")) {
+			token_start += 4;
+			token_length -= 4;
+		}
+	}
+	switch (token_length) {
+		case 1:
+			/*
+			 * On some Intel processors there is a space between the
+			 * first letter of the name and the number after it,
+			 * e.g. "Intel(R) Core(TM) i7 CPU X 990  @ 3.47GHz"
+			 *   "Intel(R) Core(TM) CPU Q 820  @ 1.73GHz"
+			 * We want to merge these parts together, in reverse
+			 * order, i.e. "X 990"
+			 * -> "990X", "820" -> "820Q"
+			 */
+			if (is_upper_letter(token_start[0])) {
+				state->context_upper_letter = token_start;
+				return true;
+			}
+			break;
+		case 2:
+			/* Erase everything after "w/" in "AMD-K6tm w/
+			 * multimedia extensions" */
+			if (erase_matching(token_start, token_length, "w/")) {
+				return false;
+			}
+			/*
+			 * Intel Xeon processors since Ivy Bridge use versions,
+			 * e.g. "Intel Xeon E3-1230 v2" Some processor branch
+			 * strings report them as "V<N>", others report as
+			 * "v<N>". Normalize the former (upper-case) to the
+			 * latter (lower-case) version
+			 */
+			if (token_start[0] == 'V' && is_digit(token_start[1])) {
+				token_start[0] = 'v';
+				return true;
+			}
+			break;
+		case 3:
+			/*
+			 * Erase "CPU" in brand string on Intel processors, e.g.
+			 *  "Intel(R) Core(TM) i5 CPU         650  @ 3.20GHz"
+			 *  "Intel(R) Xeon(R) CPU           X3210  @ 2.13GHz"
+			 *  "Intel(R) Atom(TM) CPU Z2760  @ 1.80GHz"
+			 */
+			if (erase_matching(token_start, token_length, "CPU")) {
+				return true;
+			}
+			/*
+			 * Erase everything after "SOC" on AMD System-on-Chips,
+			 * e.g. "AMD GX-212JC SOC with Radeon(TM) R2E Graphics
+			 * \0"
+			 */
+			if (erase_matching(token_start, token_length, "SOC")) {
+				return false;
+			}
+			/*
+			 * Erase "AMD" in brand string on AMD processors, e.g.
+			 *  "AMD Athlon(tm) Processor"
+			 *  "AMD Engineering Sample"
+			 *  "Quad-Core AMD Opteron(tm) Processor 2344 HE"
+			 */
+			if (erase_matching(token_start, token_length, "AMD")) {
+				return true;
+			}
+			/*
+			 * Erase "VIA" in brand string on VIA processors, e.g.
+			 *   "VIA C3 Ezra"
+			 *   "VIA C7-M Processor 1200MHz"
+			 *   "VIA Nano L3050@1800MHz"
+			 */
+			if (erase_matching(token_start, token_length, "VIA")) {
+				return true;
+			}
+			/* Erase "IDT" in brand string on early Centaur
+			 * processors, e.g. "IDT WinChip 2-3D" */
+			if (erase_matching(token_start, token_length, "IDT")) {
+				return true;
+			}
+			/*
+			 * Erase everything starting with "MMX" in
+			 * "Cyrix MediaGXtm MMXtm Enhanced" ("tm" suffix is
+			 * removed by this point)
+			 */
+			if (erase_matching(token_start, token_length, "MMX")) {
+				return false;
+			}
+			/*
+			 * Erase everything starting with "APU" on AMD
+			 * processors, e.g. "AMD A10-4600M APU with Radeon(tm)
+			 * HD Graphics" "AMD A10-7850K APU with Radeon(TM) R7
+			 * Graphics" "AMD A6-6310 APU with AMD Radeon R4
+			 * Graphics"
+			 */
+			if (erase_matching(token_start, token_length, "APU")) {
+				return false;
+			}
+			/*
+			 * Remember to discard string if it contains "Eng
+			 * Sample", e.g. "Eng Sample,
+			 * ZD302046W4K43_36/30/20_2/8_A"
+			 */
+			if (memcmp(token_start, "Eng", token_length) == 0) {
+				state->context_engineering = token_start;
+			}
+			break;
+		case 4:
+			/* Remember to erase "Dual Core" in "AMD Athlon(tm) 64
+			 * X2 Dual Core Processor 3800+" */
+			if (memcmp(token_start, "Dual", token_length) == 0) {
+				state->context_dual = token_start;
+			}
+			/* Remember if the processor is on Xeon family */
+			if (memcmp(token_start, "Xeon", token_length) == 0) {
+				state->xeon = true;
+			}
+			/* Erase "Dual Core" in "AMD Athlon(tm) 64 X2 Dual Core
+			 * Processor 3800+"
+			 */
+			if (previousState.context_dual != NULL) {
+				if (memcmp(token_start, "Core", token_length) == 0) {
+					memset(previousState.context_dual,
+					       ' ',
+					       (size_t)(token_end - previousState.context_dual));
+					state->context_core = token_end;
+					return true;
+				}
+			}
+			break;
+		case 5:
+			/*
+			 * Erase "Intel" in brand string on Intel processors,
+			 * e.g. "Intel(R) Xeon(R) CPU X3210 @ 2.13GHz" "Intel(R)
+			 * Atom(TM) CPU D2700 @ 2.13GHz" "Genuine Intel(R)
+			 * processor 800MHz"
+			 */
+			if (erase_matching(token_start, token_length, "Intel")) {
+				return true;
+			}
+			/*
+			 * Erase "Cyrix" in brand string on Cyrix processors,
+			 * e.g. "Cyrix MediaGXtm MMXtm Enhanced"
+			 */
+			if (erase_matching(token_start, token_length, "Cyrix")) {
+				return true;
+			}
+			/*
+			 * Erase everything following "Geode" (but not "Geode"
+			 * token itself) on Geode processors, e.g. "Geode(TM)
+			 * Integrated Processor by AMD PCS" "Geode(TM)
+			 * Integrated Processor by National Semi"
+			 */
+			if (memcmp(token_start, "Geode", token_length) == 0) {
+				return false;
+			}
+			/* Remember to erase "model unknown" in "AMD Processor
+			 * model unknown" */
+			if (memcmp(token_start, "model", token_length) == 0) {
+				state->context_model = token_start;
+				return true;
+			}
+			break;
+		case 6:
+			/*
+			 * Erase everything starting with "Radeon" or "RADEON"
+			 * on AMD APUs, e.g. "A8-7670K Radeon R7, 10 Compute
+			 * Cores 4C+6G" "FX-8800P Radeon R7, 12 Compute Cores
+			 * 4C+8G" "A12-9800 RADEON R7, 12 COMPUTE CORES 4C+8G"
+			 *   "A9-9410 RADEON R5, 5 COMPUTE CORES 2C+3G"
+			 */
+			if (erase_matching(token_start, token_length, "Radeon") ||
+			    erase_matching(token_start, token_length, "RADEON")) {
+				return false;
+			}
+			/*
+			 * Erase "Mobile" when it is not part of the processor
+			 * name, e.g. in "AMD Turion(tm) X2 Ultra Dual-Core
+			 * Mobile ZM-82"
+			 */
+			if (previousState.context_core != NULL) {
+				if (erase_matching(token_start, token_length, "Mobile")) {
+					return true;
+				}
+			}
+			/* Erase "family" in "Intel(R) Pentium(R) III CPU family
+			 * 1266MHz" */
+			if (erase_matching(token_start, token_length, "family")) {
+				return true;
+			}
+			/* Discard the string if it contains "Engineering
+			 * Sample" */
+			if (previousState.context_engineering != NULL) {
+				if (memcmp(token_start, "Sample", token_length) == 0) {
+					state->engineering_sample = true;
+					return false;
+				}
+			}
+			break;
+		case 7:
+			/*
+			 * Erase "Geniune" in brand string on Intel engineering
+			 * samples, e.g. "Genuine Intel(R) processor 800MHz"
+			 *   "Genuine Intel(R) CPU @ 2.13GHz"
+			 *   "Genuine Intel(R) CPU 0000 @ 1.73GHz"
+			 */
+			if (erase_matching(token_start, token_length, "Genuine")) {
+				return true;
+			}
+			/*
+			 * Erase "12-core" in brand string on AMD Threadripper,
+			 * e.g. "AMD Ryzen Threadripper 1920X 12-Core Processor"
+			 */
+			if (erase_matching(token_start, token_length, "12-Core")) {
+				return true;
+			}
+			/*
+			 * Erase "16-core" in brand string on AMD Threadripper,
+			 * e.g. "AMD Ryzen Threadripper 1950X 16-Core Processor"
+			 */
+			if (erase_matching(token_start, token_length, "16-Core")) {
+				return true;
+			}
+			/* Erase "model unknown" in "AMD Processor model
+			 * unknown" */
+			if (previousState.context_model != NULL) {
+				if (memcmp(token_start, "unknown", token_length) == 0) {
+					memset(previousState.context_model,
+					       ' ',
+					       token_end - previousState.context_model);
+					return true;
+				}
+			}
+			/*
+			 * Discard the string if it contains "Eng Sample:" or
+			 * "Eng Sample," e.g. "AMD Eng Sample,
+			 * ZD302046W4K43_36/30/20_2/8_A" "AMD Eng Sample:
+			 * 2D3151A2M88E4_35/31_N"
+			 */
+			if (previousState.context_engineering != NULL) {
+				if (memcmp(token_start, "Sample,", token_length) == 0 ||
+				    memcmp(token_start, "Sample:", token_length) == 0) {
+					state->engineering_sample = true;
+					return false;
+				}
+			}
+			break;
+		case 8:
+			/* Erase "QuadCore" in "VIA QuadCore L4700 @ 1.2+ GHz"
+			 */
+			if (erase_matching(token_start, token_length, "QuadCore")) {
+				state->context_core = token_end;
+				return true;
+			}
+			/* Erase "Six-Core" in "AMD FX(tm)-6100 Six-Core
+			 * Processor" */
+			if (erase_matching(token_start, token_length, "Six-Core")) {
+				state->context_core = token_end;
+				return true;
+			}
+			break;
+		case 9:
+			if (erase_matching(token_start, token_length, "Processor")) {
+				return true;
+			}
+			if (erase_matching(token_start, token_length, "processor")) {
+				return true;
+			}
+			/* Erase "Dual-Core" in "Pentium(R) Dual-Core CPU T4200
+			 * @ 2.00GHz" */
+			if (erase_matching(token_start, token_length, "Dual-Core")) {
+				state->context_core = token_end;
+				return true;
+			}
+			/* Erase "Quad-Core" in AMD processors, e.g.
+			 *   "Quad-Core AMD Opteron(tm) Processor 2347 HE"
+			 *   "AMD FX(tm)-4170 Quad-Core Processor"
+			 */
+			if (erase_matching(token_start, token_length, "Quad-Core")) {
+				state->context_core = token_end;
+				return true;
+			}
+			/* Erase "Transmeta" in brand string on Transmeta
+			 * processors, e.g. "Transmeta(tm) Crusoe(tm) Processor
+			 * TM5800" "Transmeta Efficeon(tm) Processor TM8000"
+			 */
+			if (erase_matching(token_start, token_length, "Transmeta")) {
+				return true;
+			}
+			break;
+		case 10:
+			/*
+			 * Erase "Eight-Core" in AMD processors, e.g.
+			 *   "AMD FX(tm)-8150 Eight-Core Processor"
+			 */
+			if (erase_matching(token_start, token_length, "Eight-Core")) {
+				state->context_core = token_end;
+				return true;
+			}
+			break;
+		case 11:
+			/*
+			 * Erase "Triple-Core" in AMD processors, e.g.
+			 *   "AMD Phenom(tm) II N830 Triple-Core Processor"
+			 *   "AMD Phenom(tm) 8650 Triple-Core Processor"
+			 */
+			if (erase_matching(token_start, token_length, "Triple-Core")) {
+				state->context_core = token_end;
+				return true;
+			}
+			/*
+			 * Remember to discard string if it contains
+			 * "Engineering Sample", e.g. "AMD Engineering Sample"
+			 */
+			if (memcmp(token_start, "Engineering", token_length) == 0) {
+				state->context_engineering = token_start;
+				return true;
+			}
+			break;
+	}
+	if (is_zero_number(token_start, token_end)) {
+		memset(token_start, ' ', token_length);
+		return true;
+	}
+	/* On some Intel processors the last letter of the name is put before
+	 * the number, and an additional space it added, e.g. "Intel(R) Core(TM)
+	 * i7 CPU X 990  @ 3.47GHz" "Intel(R) Core(TM) CPU Q 820  @ 1.73GHz"
+	 * "Intel(R) Core(TM) i5 CPU M 480  @ 2.67GHz" We fix this issue, i.e.
+	 * "X 990" -> "990X", "Q 820"
+	 * -> "820Q"
+	 */
+	if (previousState.context_upper_letter != 0) {
+		/* A single letter token followed by 2-to-5 digit letter is
+		 * merged together
+		 */
+		switch (token_length) {
+			case 2:
+			case 3:
+			case 4:
+			case 5:
+				if (is_number(token_start, token_end)) {
+					/* Load the previous single-letter token
+					 */
+					const char letter = *previousState.context_upper_letter;
+					/* Erase the previous single-letter
+					 * token */
+					*previousState.context_upper_letter = ' ';
+					/* Move the current token one position
+					 * to the left */
+					move_token(token_start, token_end, token_start - 1);
+					token_start -= 1;
+					/*
+					 * Add the letter on the end
+					 * Note: accessing token_start[-1] is
+					 * safe because this is not the first
+					 * token
+					 */
+					token_end[-1] = letter;
+				}
+		}
+	}
+	if (state->frequency_separator != NULL) {
+		if (is_model_number(token_start, token_end)) {
+			state->parsed_model_number = true;
+		}
+	}
+	if (is_frequency(token_start, token_end)) {
+		state->frequency_token = true;
+	}
+	return true;
+}
+
+uint32_t cpuinfo_x86_normalize_brand_string(const char raw_name[48], char normalized_name[48]) {
+	normalized_name[0] = '\0';
+	char name[48];
+	memcpy(name, raw_name, sizeof(name));
+
+	/*
+	 * First find the end of the string
+	 * Start search from the end because some brand strings contain zeroes
+	 * in the middle
+	 */
+	char* name_end = &name[48];
+	while (name_end[-1] == '\0') {
+		/*
+		 * Adject name_end by 1 position and check that we didn't reach
+		 * the start of the brand string. This is possible if all
+		 * characters are zero.
+		 */
+		if (--name_end == name) {
+			/* All characters are zeros */
+			return 0;
+		}
+	}
+
+	struct parser_state parser_state = {0};
+
+	/* Now unify all whitespace characters: replace tabs and '\0' with
+	 * spaces */
+	{
+		bool inside_parentheses = false;
+		for (char* char_ptr = name; char_ptr != name_end; char_ptr++) {
+			switch (*char_ptr) {
+				case '(':
+					inside_parentheses = true;
+					*char_ptr = ' ';
+					break;
+				case ')':
+					inside_parentheses = false;
+					*char_ptr = ' ';
+					break;
+				case '@':
+					parser_state.frequency_separator = char_ptr;
+				case '\0':
+				case '\t':
+					*char_ptr = ' ';
+					break;
+				default:
+					if (inside_parentheses) {
+						*char_ptr = ' ';
+					}
+			}
+		}
+	}
+
+	/* Iterate through all tokens and erase redundant parts */
+	{
+		bool is_token = false;
+		char* token_start = NULL;
+		for (char* char_ptr = name; char_ptr != name_end; char_ptr++) {
+			if (*char_ptr == ' ') {
+				if (is_token) {
+					is_token = false;
+					if (!transform_token(token_start, char_ptr, &parser_state)) {
+						name_end = char_ptr;
+						break;
+					}
+				}
+			} else {
+				if (!is_token) {
+					is_token = true;
+					token_start = char_ptr;
+				}
+			}
+		}
+		if (is_token) {
+			transform_token(token_start, name_end, &parser_state);
+		}
+	}
+
+	/* If this is an engineering sample, return empty string */
+	if (parser_state.engineering_sample) {
+		return 0;
+	}
+
+	/* Check if there is some string before the frequency separator. */
+	if (parser_state.frequency_separator != NULL) {
+		if (is_space(name, parser_state.frequency_separator)) {
+			/* If only frequency is available, return empty string
+			 */
+			return 0;
+		}
+	}
+
+	/* Compact tokens: collapse multiple spacing into one */
+	{
+		char* output_ptr = normalized_name;
+		char* token_start = NULL;
+		bool is_token = false;
+		bool previous_token_ends_with_dash = true;
+		bool current_token_starts_with_dash = false;
+		uint32_t token_count = 1;
+		for (char* char_ptr = name; char_ptr != name_end; char_ptr++) {
+			const char character = *char_ptr;
+			if (character == ' ') {
+				if (is_token) {
+					is_token = false;
+					if (!current_token_starts_with_dash && !previous_token_ends_with_dash) {
+						token_count += 1;
+						*output_ptr++ = ' ';
+					}
+					output_ptr = move_token(token_start, char_ptr, output_ptr);
+					/* Note: char_ptr[-1] exists because
+					 * there is a token before this space */
+					previous_token_ends_with_dash = (char_ptr[-1] == '-');
+				}
+			} else {
+				if (!is_token) {
+					is_token = true;
+					token_start = char_ptr;
+					current_token_starts_with_dash = (character == '-');
+				}
+			}
+		}
+		if (is_token) {
+			if (!current_token_starts_with_dash && !previous_token_ends_with_dash) {
+				token_count += 1;
+				*output_ptr++ = ' ';
+			}
+			output_ptr = move_token(token_start, name_end, output_ptr);
+		}
+		if (parser_state.frequency_token && token_count <= 1) {
+			/* The only remaining part is frequency */
+			normalized_name[0] = '\0';
+			return 0;
+		}
+		if (output_ptr < &normalized_name[48]) {
+			*output_ptr = '\0';
+		} else {
+			normalized_name[47] = '\0';
+		}
+		return (uint32_t)(output_ptr - normalized_name);
+	}
+}
+
+static const char* vendor_string_map[] = {
+	[cpuinfo_vendor_intel] = "Intel",
+	[cpuinfo_vendor_amd] = "AMD",
+	[cpuinfo_vendor_via] = "VIA",
+	[cpuinfo_vendor_hygon] = "Hygon",
+	[cpuinfo_vendor_rdc] = "RDC",
+	[cpuinfo_vendor_dmp] = "DM&P",
+	[cpuinfo_vendor_transmeta] = "Transmeta",
+	[cpuinfo_vendor_cyrix] = "Cyrix",
+	[cpuinfo_vendor_rise] = "Rise",
+	[cpuinfo_vendor_nsc] = "NSC",
+	[cpuinfo_vendor_sis] = "SiS",
+	[cpuinfo_vendor_nexgen] = "NexGen",
+	[cpuinfo_vendor_umc] = "UMC",
+};
+
+uint32_t cpuinfo_x86_format_package_name(
+	enum cpuinfo_vendor vendor,
+	const char normalized_brand_string[48],
+	char package_name[CPUINFO_PACKAGE_NAME_MAX]) {
+	if (normalized_brand_string[0] == '\0') {
+		package_name[0] = '\0';
+		return 0;
+	}
+
+	const char* vendor_string = NULL;
+	if ((uint32_t)vendor < (uint32_t)CPUINFO_COUNT_OF(vendor_string_map)) {
+		vendor_string = vendor_string_map[(uint32_t)vendor];
+	}
+	if (vendor_string == NULL) {
+		strncpy(package_name, normalized_brand_string, CPUINFO_PACKAGE_NAME_MAX);
+		package_name[CPUINFO_PACKAGE_NAME_MAX - 1] = '\0';
+		return 0;
+	} else {
+		snprintf(package_name, CPUINFO_PACKAGE_NAME_MAX, "%s %s", vendor_string, normalized_brand_string);
+		return (uint32_t)strlen(vendor_string) + 1;
+	}
+}
--- a/3rdparty/cpuinfo/src/x86/topology.c
+++ b/3rdparty/cpuinfo/src/x86/topology.c
@@ -0,0 +1,163 @@
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/log.h>
+#include <cpuinfo/utils.h>
+#include <x86/api.h>
+#include <x86/cpuid.h>
+
+enum topology_type {
+	topology_type_invalid = 0,
+	topology_type_smt = 1,
+	topology_type_core = 2,
+};
+
+void cpuinfo_x86_detect_topology(
+	uint32_t max_base_index,
+	uint32_t max_extended_index,
+	struct cpuid_regs leaf1,
+	struct cpuinfo_x86_topology* topology) {
+	/*
+	 * HTT: indicates multi-core/hyper-threading support on this core.
+	 * - Intel, AMD: edx[bit 28] in basic info.
+	 */
+	const bool htt = !!(leaf1.edx & UINT32_C(0x10000000));
+
+	uint32_t apic_id = 0;
+	if (htt) {
+		apic_id = leaf1.ebx >> 24;
+		bool amd_cmp_legacy = false;
+		if (max_extended_index >= UINT32_C(0x80000001)) {
+			const struct cpuid_regs leaf0x80000001 = cpuid(UINT32_C(0x80000001));
+			/*
+			 * CmpLegacy: core multi-processing legacy mode.
+			 * - AMD: ecx[bit 1] in extended info (reserved bit on
+			 * Intel CPUs).
+			 */
+			amd_cmp_legacy = !!(leaf0x80000001.ecx & UINT32_C(0x00000002));
+		}
+		if (amd_cmp_legacy) {
+			if (max_extended_index >= UINT32_C(0x80000008)) {
+				const struct cpuid_regs leaf0x80000008 = cpuid(UINT32_C(0x80000008));
+				/*
+				 * NC: number of physical cores - 1. The number
+				 * of cores in the processor is NC+1.
+				 * - AMD: ecx[bits 0-7] in leaf 0x80000008
+				 * (reserved zero bits on Intel CPUs).
+				 */
+				const uint32_t cores_per_processor = 1 + (leaf0x80000008.ecx & UINT32_C(0x000000FF));
+				topology->core_bits_length = bit_length(cores_per_processor);
+				cpuinfo_log_debug(
+					"HTT: APIC ID = %08" PRIx32 ", cores per processor = %" PRIu32,
+					apic_id,
+					cores_per_processor);
+			} else {
+				/*
+				 * LogicalProcessorCount: the number of cores
+				 * per processor.
+				 * - AMD: ebx[bits 16-23] in basic info
+				 * (different interpretation on Intel CPUs).
+				 */
+				const uint32_t cores_per_processor = (leaf1.ebx >> 16) & UINT32_C(0x000000FF);
+				if (cores_per_processor != 0) {
+					topology->core_bits_length = bit_length(cores_per_processor);
+				}
+				cpuinfo_log_debug(
+					"HTT: APIC ID = %08" PRIx32 ", cores per processor = %" PRIu32,
+					apic_id,
+					cores_per_processor);
+			}
+		} else {
+			/*
+			 * Maximum number of addressable IDs for logical
+			 * processors in this physical package.
+			 * - Intel: ebx[bits 16-23] in basic info (different
+			 * interpretation on AMD CPUs).
+			 */
+			const uint32_t logical_processors = (leaf1.ebx >> 16) & UINT32_C(0x000000FF);
+			if (logical_processors != 0) {
+				const uint32_t log2_max_logical_processors = bit_length(logical_processors);
+				const uint32_t log2_max_threads_per_core =
+					log2_max_logical_processors - topology->core_bits_length;
+				topology->core_bits_offset = log2_max_threads_per_core;
+				topology->thread_bits_length = log2_max_threads_per_core;
+			}
+			cpuinfo_log_debug(
+				"HTT: APIC ID = %08" PRIx32 ", logical processors = %" PRIu32,
+				apic_id,
+				logical_processors);
+		}
+	}
+
+	/*
+	 * x2APIC: indicated support for x2APIC feature.
+	 * - Intel: ecx[bit 21] in basic info (reserved bit on AMD CPUs).
+	 */
+	const bool x2apic = !!(leaf1.ecx & UINT32_C(0x00200000));
+	if (x2apic && (max_base_index >= UINT32_C(0xB))) {
+		uint32_t level = 0;
+		uint32_t type;
+		uint32_t total_shift = 0;
+		topology->thread_bits_offset = topology->thread_bits_length = 0;
+		topology->core_bits_offset = topology->core_bits_length = 0;
+		do {
+			const struct cpuid_regs leafB = cpuidex(UINT32_C(0xB), level);
+			type = (leafB.ecx >> 8) & UINT32_C(0x000000FF);
+			const uint32_t level_shift = leafB.eax & UINT32_C(0x0000001F);
+			const uint32_t x2apic_id = leafB.edx;
+			apic_id = x2apic_id;
+			switch (type) {
+				case topology_type_invalid:
+					break;
+				case topology_type_smt:
+					cpuinfo_log_debug(
+						"x2 level %" PRIu32 ": APIC ID = %08" PRIx32
+						", "
+						"type SMT, shift %" PRIu32 ", total shift %" PRIu32,
+						level,
+						apic_id,
+						level_shift,
+						total_shift);
+					topology->thread_bits_offset = total_shift;
+					topology->thread_bits_length = level_shift;
+					break;
+				case topology_type_core:
+					cpuinfo_log_debug(
+						"x2 level %" PRIu32 ": APIC ID = %08" PRIx32
+						", "
+						"type core, shift %" PRIu32 ", total shift %" PRIu32,
+						level,
+						apic_id,
+						level_shift,
+						total_shift);
+					topology->core_bits_offset = total_shift;
+					topology->core_bits_length = level_shift;
+					break;
+				default:
+					cpuinfo_log_warning(
+						"unexpected topology type %" PRIu32 " (offset %" PRIu32
+						", length %" PRIu32
+						") "
+						"reported in leaf 0x0000000B is ignored",
+						type,
+						total_shift,
+						level_shift);
+					break;
+			}
+			total_shift += level_shift;
+			level += 1;
+		} while (type != 0);
+		cpuinfo_log_debug(
+			"x2APIC ID 0x%08" PRIx32
+			", "
+			"SMT offset %" PRIu32 " length %" PRIu32 ", core offset %" PRIu32 " length %" PRIu32,
+			apic_id,
+			topology->thread_bits_offset,
+			topology->thread_bits_length,
+			topology->core_bits_offset,
+			topology->core_bits_length);
+	}
+
+	topology->apic_id = apic_id;
+}
--- a/3rdparty/cpuinfo/src/x86/uarch.c
+++ b/3rdparty/cpuinfo/src/x86/uarch.c
@@ -0,0 +1,404 @@
+#include <stdint.h>
+
+#include <cpuinfo.h>
+#include <x86/api.h>
+
+enum cpuinfo_uarch cpuinfo_x86_decode_uarch(
+	enum cpuinfo_vendor vendor,
+	const struct cpuinfo_x86_model_info* model_info) {
+	switch (vendor) {
+		case cpuinfo_vendor_intel:
+			switch (model_info->family) {
+#if CPUINFO_ARCH_X86
+				case 0x05:
+					switch (model_info->model) {
+						case 0x01: // Pentium (60, 66)
+						case 0x02: // Pentium (75, 90,
+							   // 100, 120, 133,
+							   // 150, 166, 200)
+						case 0x03: // Pentium OverDrive
+							   // for Intel486-based
+							   // systems
+						case 0x04: // Pentium MMX
+							return cpuinfo_uarch_p5;
+						case 0x09:
+							return cpuinfo_uarch_quark;
+					}
+					break;
+#endif /* CPUINFO_ARCH_X86 */
+				case 0x06:
+					switch (model_info->model) {
+						/* Mainstream cores */
+#if CPUINFO_ARCH_X86
+						case 0x01: // Pentium Pro
+						case 0x03: // Pentium II
+							   // (Klamath) and
+							   // Pentium II
+							   // Overdrive
+						case 0x05: // Pentium II
+							   // (Deschutes,
+							   // Tonga), Pentium II
+							   // Celeron
+							   // (Covington),
+							   // Pentium II Xeon
+							   // (Drake)
+						case 0x06: // Pentium II
+							   // (Dixon), Pentium
+							   // II Celeron
+							   // (Mendocino)
+						case 0x07: // Pentium III
+							   // (Katmai), Pentium
+							   // III Xeon (Tanner)
+						case 0x08: // Pentium III
+							   // (Coppermine),
+							   // Pentium II Celeron
+							   // (Coppermine-128),
+							   // Pentium III Xeon
+							   // (Cascades)
+						case 0x0A: // Pentium III Xeon
+							   // (Cascades-2MB)
+						case 0x0B: // Pentium III
+							   // (Tualatin),
+							   // Pentium III
+							   // Celeron
+							   // (Tualatin-256)
+							return cpuinfo_uarch_p6;
+						case 0x09: // Pentium M
+							   // (Banias), Pentium
+							   // M Celeron
+							   // (Banias-0,
+							   // Banias-512)
+						case 0x0D: // Pentium M
+							   // (Dothan), Pentium
+							   // M Celeron
+							   // (Dothan-512,
+							   // Dothan-1024)
+						case 0x15: // Intel 80579
+							   // (Tolapai)
+							return cpuinfo_uarch_dothan;
+						case 0x0E: // Core Solo/Duo
+							   // (Yonah), Pentium
+							   // Dual-Core T2xxx
+							   // (Yonah), Celeron M
+							   // (Yonah-512,
+							   // Yonah-1024),
+							   // Dual-Core Xeon
+							   // (Sossaman)
+							return cpuinfo_uarch_yonah;
+#endif /* CPUINFO_ARCH_X86 */
+						case 0x0F: // Core 2 Duo
+							   // (Conroe,
+							   // Conroe-2M, Merom),
+							   // Core 2 Quad
+							   // (Tigerton), Xeon
+							   // (Woodcrest,
+							   // Clovertown,
+							   // Kentsfield)
+						case 0x16: // Celeron (Conroe-L,
+							   // Merom-L), Core 2
+							   // Duo (Merom)
+							return cpuinfo_uarch_conroe;
+						case 0x17: // Core 2 Duo
+							   // (Penryn-3M), Core
+							   // 2 Quad
+							   // (Yorkfield), Core
+							   // 2 Extreme
+							   // (Yorkfield), Xeon
+							   // (Harpertown),
+							   // Pentium Dual-Core
+							   // (Penryn)
+						case 0x1D: // Xeon (Dunnington)
+							return cpuinfo_uarch_penryn;
+						case 0x1A: // Core iX
+							   // (Bloomfield), Xeon
+							   // (Gainestown)
+						case 0x1E: // Core iX
+							   // (Lynnfield,
+							   // Clarksfield)
+						case 0x1F: // Core iX
+							   // (Havendale)
+						case 0x2E: // Xeon (Beckton)
+						case 0x25: // Core iX
+							   // (Clarkdale)
+						case 0x2C: // Core iX
+							   // (Gulftown), Xeon
+							   // (Gulftown)
+						case 0x2F: // Xeon (Eagleton)
+							return cpuinfo_uarch_nehalem;
+						case 0x2A: // Core iX (Sandy
+							   // Bridge)
+						case 0x2D: // Core iX (Sandy
+							   // Bridge-E), Xeon
+							   // (Sandy Bridge
+							   // EP/EX)
+							return cpuinfo_uarch_sandy_bridge;
+						case 0x3A: // Core iX (Ivy
+							   // Bridge)
+						case 0x3E: // Ivy Bridge-E
+							return cpuinfo_uarch_ivy_bridge;
+						case 0x3C:
+						case 0x3F: // Haswell-E
+						case 0x45: // Haswell ULT
+						case 0x46: // Haswell with eDRAM
+							return cpuinfo_uarch_haswell;
+						case 0x3D: // Broadwell-U
+						case 0x47: // Broadwell-H
+						case 0x4F: // Broadwell-E
+						case 0x56: // Broadwell-DE
+							return cpuinfo_uarch_broadwell;
+						case 0x4E: // Sky Lake Client
+							   // Y/U
+						case 0x55: // Sky/Cascade/Cooper
+							   // Lake Server
+						case 0x5E: // Sky Lake Client
+							   // DT/H/S
+						case 0x8E: // Kaby/Whiskey/Amber/Comet
+							   // Lake Y/U
+						case 0x9E: // Kaby/Coffee Lake
+							   // DT/H/S
+						case 0xA5: // Comet Lake H/S
+						case 0xA6: // Comet Lake U/Y
+							return cpuinfo_uarch_sky_lake;
+						case 0x66: // Cannon Lake (Core
+							   // i3-8121U)
+							return cpuinfo_uarch_palm_cove;
+						case 0x6A: // Ice Lake-DE
+						case 0x6C: // Ice Lake-SP
+						case 0x7D: // Ice Lake-Y
+						case 0x7E: // Ice Lake-U
+							return cpuinfo_uarch_sunny_cove;
+
+						/* Low-power cores */
+						case 0x1C: // Diamondville,
+							   // Silverthorne,
+							   // Pineview
+						case 0x26: // Tunnel Creek
+							return cpuinfo_uarch_bonnell;
+						case 0x27: // Medfield
+						case 0x35: // Cloverview
+						case 0x36: // Cedarview,
+							   // Centerton
+							return cpuinfo_uarch_saltwell;
+						case 0x37: // Bay Trail
+						case 0x4A: // Merrifield
+						case 0x4D: // Avoton, Rangeley
+						case 0x5A: // Moorefield
+						case 0x5D: // SoFIA
+							return cpuinfo_uarch_silvermont;
+						case 0x4C: // Braswell, Cherry
+							   // Trail
+						case 0x75: // Spreadtrum
+							   // SC9853I-IA
+							return cpuinfo_uarch_airmont;
+						case 0x5C: // Apollo Lake
+						case 0x5F: // Denverton
+							return cpuinfo_uarch_goldmont;
+						case 0x7A: // Gemini Lake
+							return cpuinfo_uarch_goldmont_plus;
+
+						/* Knights-series cores */
+						case 0x57:
+							return cpuinfo_uarch_knights_landing;
+						case 0x85:
+							return cpuinfo_uarch_knights_mill;
+					}
+					break;
+				case 0x0F:
+					switch (model_info->model) {
+						case 0x00: // Pentium 4 Xeon
+							   // (Foster)
+						case 0x01: // Pentium 4 Celeron
+							   // (Willamette-128),
+							   // Pentium 4 Xeon
+							   // (Foster, Foster
+							   // MP)
+						case 0x02: // Pentium 4
+							   // (Northwood),
+							   // Pentium 4 EE
+							   // (Gallatin),
+							   // Pentium 4 Celeron
+							   // (Northwood-128,
+							   // Northwood-256),
+							   // Pentium 4 Xeon
+							   // (Gallatin DP,
+							   // Prestonia)
+							return cpuinfo_uarch_willamette;
+							break;
+						case 0x03: // Pentium 4
+							   // (Prescott),
+							   // Pentium 4 Xeon
+							   // (Nocona)
+						case 0x04: // Pentium 4
+							   // (Prescott-2M),
+							   // Pentium 4 EE
+							   // (Prescott-2M),
+							   // Pentium D
+							   // (Smithfield),
+							   // Celeron D
+							   // (Prescott-256),
+							   // Pentium 4 Xeon
+							   // (Cranford,
+							   // Irwindale,
+							   // Paxville)
+						case 0x06: // Pentium 4 (Cedar
+							   // Mill), Pentium D
+							   // EE (Presler),
+							   // Celeron D (Cedar
+							   // Mill), Pentium 4
+							   // Xeon (Dempsey,
+							   // Tulsa)
+							return cpuinfo_uarch_prescott;
+					}
+					break;
+			}
+			break;
+		case cpuinfo_vendor_amd:
+			switch (model_info->family) {
+#if CPUINFO_ARCH_X86
+				case 0x5:
+					switch (model_info->model) {
+						case 0x00:
+						case 0x01:
+						case 0x02:
+							return cpuinfo_uarch_k5;
+						case 0x06:
+						case 0x07:
+						case 0x08:
+						case 0x0D:
+							return cpuinfo_uarch_k6;
+						case 0x0A:
+							return cpuinfo_uarch_geode;
+					}
+					break;
+				case 0x6:
+					return cpuinfo_uarch_k7;
+#endif /* CPUINFO_ARCH_X86 */
+				case 0xF: // Opteron, Athlon 64, Sempron
+				case 0x11: // Turion
+					return cpuinfo_uarch_k8;
+				case 0x10: // Opteron, Phenom, Athlon, Sempron
+				case 0x12: // Llano APU
+					return cpuinfo_uarch_k10;
+				case 0x14:
+					return cpuinfo_uarch_bobcat;
+				case 0x15:
+					switch (model_info->model) {
+						case 0x00: // Engineering
+							   // samples
+						case 0x01: // Zambezi,
+							   // Interlagos
+							return cpuinfo_uarch_bulldozer;
+						case 0x02: // Vishera
+						case 0x10: // Trinity
+						case 0x13: // Richland
+							return cpuinfo_uarch_piledriver;
+						case 0x38: // Godavari
+						case 0x30: // Kaveri
+							return cpuinfo_uarch_steamroller;
+						case 0x60: // Carrizo
+						case 0x65: // Bristol Ridge
+						case 0x70: // Stoney Ridge
+							return cpuinfo_uarch_excavator;
+						default:
+							switch (model_info->extended_model) {
+								case 0x0:
+									return cpuinfo_uarch_bulldozer;
+								case 0x1: // No
+									  // L3
+									  // cache
+								case 0x2: // With
+									  // L3
+									  // cache
+									return cpuinfo_uarch_piledriver;
+								case 0x3: // With
+									  // L3
+									  // cache
+								case 0x4: // No
+									  // L3
+									  // cache
+									return cpuinfo_uarch_steamroller;
+							}
+							break;
+					}
+					break;
+				case 0x16:
+					if (model_info->extended_model >= 0x03) {
+						return cpuinfo_uarch_puma;
+					} else {
+						return cpuinfo_uarch_jaguar;
+					}
+				case 0x17:
+					switch (model_info->extended_model) {
+						case 0x0: // model 01h -> 14 nm
+							  // Naples/Whitehaven/Summit
+							  // Ridge/Snowy Owl,
+							  // model 08h -> 12 nm
+							  // Colfax/Pinnacle
+							  // Ridge
+						case 0x1: // model 11h -> 14 nm
+							  // Raven Ridge/Great
+							  // Horned Owl, model
+							  // 18h -> 14 nm Banded
+							  // Kestrel / 12 nm
+							  // Picasso
+							return cpuinfo_uarch_zen;
+						case 0x3: // model 31h ->
+							  // Rome/Castle Peak
+						case 0x4: // model 47h -> Xbox
+							  // Series X
+						case 0x6: // model 60h ->
+							  // Renoir/Grey Hawk,
+							  // model 68h ->
+							  // Lucienne
+						case 0x7: // model 71h ->
+							  // Matisse
+						case 0x9: // model 90h -> Van
+							  // Gogh, model 98h ->
+							  // Mero
+							return cpuinfo_uarch_zen2;
+					}
+					break;
+				case 0x19:
+					switch (model_info->extended_model) {
+						case 0x0: // model 00h ->
+							  // Genesis, model 01h
+							  // -> Milan, model 08h
+							  // -> Chagall
+						case 0x2: // model 21h ->
+							  // Vermeer
+						case 0x3: // model 30h ->
+							  // Badami, Trento
+						case 0x4: // model 40h ->
+							  // Rembrandt
+						case 0x5: // model 50h ->
+							  // Cezanne
+							return cpuinfo_uarch_zen3;
+						case 0x1: // model 10h..1Fh ->
+							  // Stones
+						case 0x6: // model 60h..6Fh ->
+							  // Raphael
+						case 0x7: // model 70h..77h ->
+							  // Phoenix/Hawkpoint1,
+							  // model 78h..7Fh ->
+							  // Phoenix
+							  // 2/Hawkpoint2
+						case 0xA: // model A0h..AFh ->
+							  // Stones-Dense
+							return cpuinfo_uarch_zen4;
+					}
+					break;
+				case 0x1a:
+					return cpuinfo_uarch_zen5;
+			}
+			break;
+		case cpuinfo_vendor_hygon:
+			switch (model_info->family) {
+				case 0x00:
+					return cpuinfo_uarch_dhyana;
+			}
+			break;
+		default:
+			break;
+	}
+	return cpuinfo_uarch_unknown;
+}
--- a/3rdparty/cpuinfo/src/x86/vendor.c
+++ b/3rdparty/cpuinfo/src/x86/vendor.c
@@ -0,0 +1,187 @@
+#include <stdint.h>
+
+#include <cpuinfo.h>
+#include <x86/api.h>
+
+/* Intel vendor string: "GenuineIntel" */
+#define Genu UINT32_C(0x756E6547)
+#define ineI UINT32_C(0x49656E69)
+#define ntel UINT32_C(0x6C65746E)
+
+/* AMD vendor strings: "AuthenticAMD", "AMDisbetter!", "AMD ISBETTER" */
+#define Auth UINT32_C(0x68747541)
+#define enti UINT32_C(0x69746E65)
+#define cAMD UINT32_C(0x444D4163)
+#define AMDi UINT32_C(0x69444D41)
+#define sbet UINT32_C(0x74656273)
+#define ter UINT32_C(0x21726574)
+#define AMD UINT32_C(0x20444D41)
+#define ISBE UINT32_C(0x45425349)
+#define TTER UINT32_C(0x52455454)
+
+/* VIA (Centaur) vendor strings: "CentaurHauls", "VIA VIA VIA " */
+#define Cent UINT32_C(0x746E6543)
+#define aurH UINT32_C(0x48727561)
+#define auls UINT32_C(0x736C7561)
+#define VIA UINT32_C(0x20414956)
+
+/* Hygon vendor string: "HygonGenuine" */
+#define Hygo UINT32_C(0x6F677948)
+#define nGen UINT32_C(0x6E65476E)
+#define uine UINT32_C(0x656E6975)
+
+/* Transmeta vendor strings: "GenuineTMx86", "TransmetaCPU" */
+#define ineT UINT32_C(0x54656E69)
+#define Mx86 UINT32_C(0x3638784D)
+#define Tran UINT32_C(0x6E617254)
+#define smet UINT32_C(0x74656D73)
+#define aCPU UINT32_C(0x55504361)
+
+/* Cyrix vendor string: "CyrixInstead" */
+#define Cyri UINT32_C(0x69727943)
+#define xIns UINT32_C(0x736E4978)
+#define tead UINT32_C(0x64616574)
+
+/* Rise vendor string: "RiseRiseRise" */
+#define Rise UINT32_C(0x65736952)
+
+/* NSC vendor string: "Geode by NSC" */
+#define Geod UINT32_C(0x646F6547)
+#define e_by UINT32_C(0x79622065)
+#define NSC UINT32_C(0x43534E20)
+
+/* SiS vendor string: "SiS SiS SiS " */
+#define SiS UINT32_C(0x20536953)
+
+/* NexGen vendor string: "NexGenDriven" */
+#define NexG UINT32_C(0x4778654E)
+#define enDr UINT32_C(0x72446E65)
+#define iven UINT32_C(0x6E657669)
+
+/* UMC vendor string: "UMC UMC UMC " */
+#define UMC UINT32_C(0x20434D55)
+
+/* RDC vendor string: "Genuine  RDC" */
+#define ine UINT32_C(0x20656E69)
+#define RDC UINT32_C(0x43445220)
+
+/* D&MP vendor string: "Vortex86 SoC" */
+#define Vort UINT32_C(0x74726F56)
+#define ex86 UINT32_C(0x36387865)
+#define SoC UINT32_C(0x436F5320)
+
+enum cpuinfo_vendor cpuinfo_x86_decode_vendor(uint32_t ebx, uint32_t ecx, uint32_t edx) {
+	switch (ebx) {
+		case Genu:
+			switch (edx) {
+				case ineI:
+					if (ecx == ntel) {
+						/* "GenuineIntel" */
+						return cpuinfo_vendor_intel;
+					}
+					break;
+#if CPUINFO_ARCH_X86
+				case ineT:
+					if (ecx == Mx86) {
+						/* "GenuineTMx86" */
+						return cpuinfo_vendor_transmeta;
+					}
+					break;
+				case ine:
+					if (ecx == RDC) {
+						/* "Genuine  RDC" */
+						return cpuinfo_vendor_rdc;
+					}
+					break;
+#endif
+			}
+			break;
+		case Auth:
+			if (edx == enti && ecx == cAMD) {
+				/* "AuthenticAMD" */
+				return cpuinfo_vendor_amd;
+			}
+			break;
+		case Cent:
+			if (edx == aurH && ecx == auls) {
+				/* "CentaurHauls" */
+				return cpuinfo_vendor_via;
+			}
+			break;
+		case Hygo:
+			if (edx == nGen && ecx == uine) {
+				/* "HygonGenuine" */
+				return cpuinfo_vendor_hygon;
+			}
+			break;
+#if CPUINFO_ARCH_X86
+		case AMDi:
+			if (edx == sbet && ecx == ter) {
+				/* "AMDisbetter!" */
+				return cpuinfo_vendor_amd;
+			}
+			break;
+		case AMD:
+			if (edx == ISBE && ecx == TTER) {
+				/* "AMD ISBETTER" */
+				return cpuinfo_vendor_amd;
+			}
+			break;
+		case VIA:
+			if (edx == VIA && ecx == VIA) {
+				/* "VIA VIA VIA " */
+				return cpuinfo_vendor_via;
+			}
+			break;
+		case Tran:
+			if (edx == smet && ecx == aCPU) {
+				/* "TransmetaCPU" */
+				return cpuinfo_vendor_transmeta;
+			}
+			break;
+		case Cyri:
+			if (edx == xIns && ecx == tead) {
+				/* "CyrixInstead" */
+				return cpuinfo_vendor_cyrix;
+			}
+			break;
+		case Rise:
+			if (edx == Rise && ecx == Rise) {
+				/* "RiseRiseRise" */
+				return cpuinfo_vendor_rise;
+			}
+			break;
+		case Geod:
+			if (edx == e_by && ecx == NSC) {
+				/* "Geode by NSC" */
+				return cpuinfo_vendor_nsc;
+			}
+			break;
+		case SiS:
+			if (edx == SiS && ecx == SiS) {
+				/* "SiS SiS SiS " */
+				return cpuinfo_vendor_sis;
+			}
+			break;
+		case NexG:
+			if (edx == enDr && ecx == iven) {
+				/* "NexGenDriven" */
+				return cpuinfo_vendor_nexgen;
+			}
+			break;
+		case UMC:
+			if (edx == UMC && ecx == UMC) {
+				/* "UMC UMC UMC " */
+				return cpuinfo_vendor_umc;
+			}
+			break;
+		case Vort:
+			if (edx == ex86 && ecx == SoC) {
+				/* "Vortex86 SoC" */
+				return cpuinfo_vendor_dmp;
+			}
+			break;
+#endif
+	}
+	return cpuinfo_vendor_unknown;
+}
--- a/3rdparty/cpuinfo/src/x86/windows/api.h
+++ b/3rdparty/cpuinfo/src/x86/windows/api.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <windows.h>
+
+#include <cpuinfo.h>
+#include <x86/api.h>
+
+struct cpuinfo_arm_linux_processor {
+	/**
+	 * Minimum processor ID on the package which includes this logical
+	 * processor. This value can serve as an ID for the cluster of logical
+	 * processors: it is the same for all logical processors on the same
+	 * package.
+	 */
+	uint32_t package_leader_id;
+	/**
+	 * Minimum processor ID on the core which includes this logical
+	 * processor. This value can serve as an ID for the cluster of logical
+	 * processors: it is the same for all logical processors on the same
+	 * package.
+	 */
+	/**
+	 * Number of logical processors in the package.
+	 */
+	uint32_t package_processor_count;
+	/**
+	 * Maximum frequency, in kHZ.
+	 * The value is parsed from
+	 * /sys/devices/system/cpu/cpu<N>/cpufreq/cpuinfo_max_freq If failed to
+	 * read or parse the file, the value is 0.
+	 */
+	uint32_t max_frequency;
+	/**
+	 * Minimum frequency, in kHZ.
+	 * The value is parsed from
+	 * /sys/devices/system/cpu/cpu<N>/cpufreq/cpuinfo_min_freq If failed to
+	 * read or parse the file, the value is 0.
+	 */
+	uint32_t min_frequency;
+	/** Linux processor ID */
+	uint32_t system_processor_id;
+	uint32_t flags;
+};
--- a/3rdparty/cpuinfo/src/x86/windows/init.c
+++ b/3rdparty/cpuinfo/src/x86/windows/init.c
@@ -0,0 +1,671 @@
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+#include <x86/api.h>
+
+#include <windows.h>
+
+#ifdef __GNUC__
+#define CPUINFO_ALLOCA __builtin_alloca
+#else
+#define CPUINFO_ALLOCA _alloca
+#endif
+
+static inline uint32_t bit_mask(uint32_t bits) {
+	return (UINT32_C(1) << bits) - UINT32_C(1);
+}
+
+static inline uint32_t low_index_from_kaffinity(KAFFINITY kaffinity) {
+#if defined(_M_X64) || defined(_M_AMD64)
+	unsigned long index;
+	_BitScanForward64(&index, (unsigned __int64)kaffinity);
+	return (uint32_t)index;
+#elif defined(_M_IX86)
+	unsigned long index;
+	_BitScanForward(&index, (unsigned long)kaffinity);
+	return (uint32_t)index;
+#else
+#error Platform-specific implementation required
+#endif
+}
+
+static void cpuinfo_x86_count_caches(
+	uint32_t processors_count,
+	const struct cpuinfo_processor* processors,
+	const struct cpuinfo_x86_processor* x86_processor,
+	uint32_t* l1i_count_ptr,
+	uint32_t* l1d_count_ptr,
+	uint32_t* l2_count_ptr,
+	uint32_t* l3_count_ptr,
+	uint32_t* l4_count_ptr) {
+	uint32_t l1i_count = 0, l1d_count = 0, l2_count = 0, l3_count = 0, l4_count = 0;
+	uint32_t last_l1i_id = UINT32_MAX, last_l1d_id = UINT32_MAX;
+	uint32_t last_l2_id = UINT32_MAX, last_l3_id = UINT32_MAX, last_l4_id = UINT32_MAX;
+	for (uint32_t i = 0; i < processors_count; i++) {
+		const uint32_t apic_id = processors[i].apic_id;
+		cpuinfo_log_debug("APID ID %" PRIu32 ": logical processor %" PRIu32, apic_id, i);
+
+		if (x86_processor->cache.l1i.size != 0) {
+			const uint32_t l1i_id = apic_id & ~bit_mask(x86_processor->cache.l1i.apic_bits);
+			if (l1i_id != last_l1i_id) {
+				last_l1i_id = l1i_id;
+				l1i_count++;
+			}
+		}
+		if (x86_processor->cache.l1d.size != 0) {
+			const uint32_t l1d_id = apic_id & ~bit_mask(x86_processor->cache.l1d.apic_bits);
+			if (l1d_id != last_l1d_id) {
+				last_l1d_id = l1d_id;
+				l1d_count++;
+			}
+		}
+		if (x86_processor->cache.l2.size != 0) {
+			const uint32_t l2_id = apic_id & ~bit_mask(x86_processor->cache.l2.apic_bits);
+			if (l2_id != last_l2_id) {
+				last_l2_id = l2_id;
+				l2_count++;
+			}
+		}
+		if (x86_processor->cache.l3.size != 0) {
+			const uint32_t l3_id = apic_id & ~bit_mask(x86_processor->cache.l3.apic_bits);
+			if (l3_id != last_l3_id) {
+				last_l3_id = l3_id;
+				l3_count++;
+			}
+		}
+		if (x86_processor->cache.l4.size != 0) {
+			const uint32_t l4_id = apic_id & ~bit_mask(x86_processor->cache.l4.apic_bits);
+			if (l4_id != last_l4_id) {
+				last_l4_id = l4_id;
+				l4_count++;
+			}
+		}
+	}
+	*l1i_count_ptr = l1i_count;
+	*l1d_count_ptr = l1d_count;
+	*l2_count_ptr = l2_count;
+	*l3_count_ptr = l3_count;
+	*l4_count_ptr = l4_count;
+}
+
+static bool cpuinfo_x86_windows_is_wine(void) {
+	HMODULE ntdll = GetModuleHandleW(L"ntdll.dll");
+	if (ntdll == NULL) {
+		return false;
+	}
+
+	return GetProcAddress(ntdll, "wine_get_version") != NULL;
+}
+
+BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context) {
+	struct cpuinfo_processor* processors = NULL;
+	struct cpuinfo_core* cores = NULL;
+	struct cpuinfo_cluster* clusters = NULL;
+	struct cpuinfo_package* packages = NULL;
+	struct cpuinfo_cache* l1i = NULL;
+	struct cpuinfo_cache* l1d = NULL;
+	struct cpuinfo_cache* l2 = NULL;
+	struct cpuinfo_cache* l3 = NULL;
+	struct cpuinfo_cache* l4 = NULL;
+	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX processor_infos = NULL;
+
+	HANDLE heap = GetProcessHeap();
+	const bool is_wine = cpuinfo_x86_windows_is_wine();
+
+	struct cpuinfo_x86_processor x86_processor;
+	ZeroMemory(&x86_processor, sizeof(x86_processor));
+	cpuinfo_x86_init_processor(&x86_processor);
+	char brand_string[48];
+	cpuinfo_x86_normalize_brand_string(x86_processor.brand_string, brand_string);
+
+	const uint32_t thread_bits_mask = bit_mask(x86_processor.topology.thread_bits_length);
+	const uint32_t core_bits_mask = bit_mask(x86_processor.topology.core_bits_length);
+	const uint32_t package_bits_offset =
+		max(x86_processor.topology.thread_bits_offset + x86_processor.topology.thread_bits_length,
+		    x86_processor.topology.core_bits_offset + x86_processor.topology.core_bits_length);
+
+	/* WINE doesn't implement GetMaximumProcessorGroupCount and aborts when
+	 * calling it */
+	const uint32_t max_group_count = is_wine ? 1 : (uint32_t)GetMaximumProcessorGroupCount();
+	cpuinfo_log_debug("detected %" PRIu32 " processor groups", max_group_count);
+
+	uint32_t processors_count = 0;
+	uint32_t* processors_per_group = (uint32_t*)CPUINFO_ALLOCA(max_group_count * sizeof(uint32_t));
+	for (uint32_t i = 0; i < max_group_count; i++) {
+		processors_per_group[i] = GetMaximumProcessorCount((WORD)i);
+		cpuinfo_log_debug("detected %" PRIu32 " processors in group %" PRIu32, processors_per_group[i], i);
+		processors_count += processors_per_group[i];
+	}
+
+	uint32_t* processors_before_group = (uint32_t*)CPUINFO_ALLOCA(max_group_count * sizeof(uint32_t));
+	for (uint32_t i = 0, count = 0; i < max_group_count; i++) {
+		processors_before_group[i] = count;
+		cpuinfo_log_debug(
+			"detected %" PRIu32 " processors before group %" PRIu32, processors_before_group[i], i);
+		count += processors_per_group[i];
+	}
+
+	processors = HeapAlloc(heap, HEAP_ZERO_MEMORY, processors_count * sizeof(struct cpuinfo_processor));
+	if (processors == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " logical processors",
+			processors_count * sizeof(struct cpuinfo_processor),
+			processors_count);
+		goto cleanup;
+	}
+
+	DWORD cores_info_size = 0;
+	if (GetLogicalProcessorInformationEx(RelationProcessorCore, NULL, &cores_info_size) == FALSE) {
+		const DWORD last_error = GetLastError();
+		if (last_error != ERROR_INSUFFICIENT_BUFFER) {
+			cpuinfo_log_error(
+				"failed to query size of processor cores information: error %" PRIu32,
+				(uint32_t)last_error);
+			goto cleanup;
+		}
+	}
+
+	DWORD packages_info_size = 0;
+	if (GetLogicalProcessorInformationEx(RelationProcessorPackage, NULL, &packages_info_size) == FALSE) {
+		const DWORD last_error = GetLastError();
+		if (last_error != ERROR_INSUFFICIENT_BUFFER) {
+			cpuinfo_log_error(
+				"failed to query size of processor packages information: error %" PRIu32,
+				(uint32_t)last_error);
+			goto cleanup;
+		}
+	}
+
+	DWORD max_info_size = max(cores_info_size, packages_info_size);
+
+	processor_infos = HeapAlloc(heap, 0, max_info_size);
+	if (processor_infos == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %" PRIu32 " bytes for logical processor information",
+			(uint32_t)max_info_size);
+		goto cleanup;
+	}
+
+	if (GetLogicalProcessorInformationEx(RelationProcessorPackage, processor_infos, &max_info_size) == FALSE) {
+		cpuinfo_log_error(
+			"failed to query processor packages information: error %" PRIu32, (uint32_t)GetLastError());
+		goto cleanup;
+	}
+
+	uint32_t packages_count = 0;
+	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX packages_info_end =
+		(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)((uintptr_t)processor_infos + packages_info_size);
+	for (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX package_info = processor_infos; package_info < packages_info_end;
+	     package_info = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)((uintptr_t)package_info + package_info->Size)) {
+		if (package_info->Relationship != RelationProcessorPackage) {
+			cpuinfo_log_warning(
+				"unexpected processor info type (%" PRIu32 ") for processor package information",
+				(uint32_t)package_info->Relationship);
+			continue;
+		}
+
+		/* We assume that packages are reported in APIC order */
+		const uint32_t package_id = packages_count++;
+		/* Reconstruct package part of APIC ID */
+		const uint32_t package_apic_id = package_id << package_bits_offset;
+		/* Iterate processor groups and set the package part of APIC ID
+		 */
+		for (uint32_t i = 0; i < package_info->Processor.GroupCount; i++) {
+			const uint32_t group_id = package_info->Processor.GroupMask[i].Group;
+			/* Global index of the first logical processor belonging
+			 * to this group */
+			const uint32_t group_processors_start = processors_before_group[group_id];
+			/* Bitmask representing processors in this group
+			 * belonging to this package
+			 */
+			KAFFINITY group_processors_mask = package_info->Processor.GroupMask[i].Mask;
+			while (group_processors_mask != 0) {
+				const uint32_t group_processor_id = low_index_from_kaffinity(group_processors_mask);
+				const uint32_t processor_id = group_processors_start + group_processor_id;
+				processors[processor_id].package = (const struct cpuinfo_package*)NULL + package_id;
+				processors[processor_id].windows_group_id = (uint16_t)group_id;
+				processors[processor_id].windows_processor_id = (uint16_t)group_processor_id;
+				processors[processor_id].apic_id = package_apic_id;
+
+				/* Reset the lowest bit in affinity mask */
+				group_processors_mask &= (group_processors_mask - 1);
+			}
+		}
+	}
+
+	max_info_size = max(cores_info_size, packages_info_size);
+	if (GetLogicalProcessorInformationEx(RelationProcessorCore, processor_infos, &max_info_size) == FALSE) {
+		cpuinfo_log_error(
+			"failed to query processor cores information: error %" PRIu32, (uint32_t)GetLastError());
+		goto cleanup;
+	}
+
+	uint32_t cores_count = 0;
+	/* Index (among all cores) of the the first core on the current package
+	 */
+	uint32_t package_core_start = 0;
+	uint32_t current_package_apic_id = 0;
+	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX cores_info_end =
+		(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)((uintptr_t)processor_infos + cores_info_size);
+	for (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX core_info = processor_infos; core_info < cores_info_end;
+	     core_info = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)((uintptr_t)core_info + core_info->Size)) {
+		if (core_info->Relationship != RelationProcessorCore) {
+			cpuinfo_log_warning(
+				"unexpected processor info type (%" PRIu32 ") for processor core information",
+				(uint32_t)core_info->Relationship);
+			continue;
+		}
+
+		/* We assume that cores and logical processors are reported in
+		 * APIC order */
+		const uint32_t core_id = cores_count++;
+		uint32_t smt_id = 0;
+		/* Reconstruct core part of APIC ID */
+		const uint32_t core_apic_id = (core_id & core_bits_mask) << x86_processor.topology.core_bits_offset;
+		/* Iterate processor groups and set the core & SMT parts of APIC
+		 * ID */
+		for (uint32_t i = 0; i < core_info->Processor.GroupCount; i++) {
+			const uint32_t group_id = core_info->Processor.GroupMask[i].Group;
+			/* Global index of the first logical processor belonging
+			 * to this group */
+			const uint32_t group_processors_start = processors_before_group[group_id];
+			/* Bitmask representing processors in this group
+			 * belonging to this package
+			 */
+			KAFFINITY group_processors_mask = core_info->Processor.GroupMask[i].Mask;
+			while (group_processors_mask != 0) {
+				const uint32_t group_processor_id = low_index_from_kaffinity(group_processors_mask);
+				const uint32_t processor_id = group_processors_start + group_processor_id;
+
+				/* Check if this is the first core on a new
+				 * package */
+				if (processors[processor_id].apic_id != current_package_apic_id) {
+					package_core_start = core_id;
+					current_package_apic_id = processors[processor_id].apic_id;
+				}
+				/* Core ID w.r.t package */
+				const uint32_t package_core_id = core_id - package_core_start;
+
+				/* Update APIC ID with core and SMT parts */
+				processors[processor_id].apic_id |=
+					((smt_id & thread_bits_mask) << x86_processor.topology.thread_bits_offset) |
+					((package_core_id & core_bits_mask) << x86_processor.topology.core_bits_offset);
+				cpuinfo_log_debug(
+					"reconstructed APIC ID 0x%08" PRIx32 " for processor %" PRIu32
+					" in group %" PRIu32,
+					processors[processor_id].apic_id,
+					group_processor_id,
+					group_id);
+
+				/* Set SMT ID (assume logical processors within
+				 * the core are reported in APIC order) */
+				processors[processor_id].smt_id = smt_id++;
+				processors[processor_id].core = (const struct cpuinfo_core*)NULL + core_id;
+
+				/* Reset the lowest bit in affinity mask */
+				group_processors_mask &= (group_processors_mask - 1);
+			}
+		}
+	}
+
+	cores = HeapAlloc(heap, HEAP_ZERO_MEMORY, cores_count * sizeof(struct cpuinfo_core));
+	if (cores == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " cores",
+			cores_count * sizeof(struct cpuinfo_core),
+			cores_count);
+		goto cleanup;
+	}
+
+	clusters = HeapAlloc(heap, HEAP_ZERO_MEMORY, packages_count * sizeof(struct cpuinfo_cluster));
+	if (clusters == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " core clusters",
+			packages_count * sizeof(struct cpuinfo_cluster),
+			packages_count);
+		goto cleanup;
+	}
+
+	packages = HeapAlloc(heap, HEAP_ZERO_MEMORY, packages_count * sizeof(struct cpuinfo_package));
+	if (packages == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %" PRIu32 " physical packages",
+			packages_count * sizeof(struct cpuinfo_package),
+			packages_count);
+		goto cleanup;
+	}
+
+	for (uint32_t i = processors_count; i != 0; i--) {
+		const uint32_t processor_id = i - 1;
+		struct cpuinfo_processor* processor = processors + processor_id;
+
+		/* Adjust core and package pointers for all logical processors
+		 */
+		struct cpuinfo_core* core = (struct cpuinfo_core*)((uintptr_t)cores + (uintptr_t)processor->core);
+		processor->core = core;
+		struct cpuinfo_cluster* cluster =
+			(struct cpuinfo_cluster*)((uintptr_t)clusters + (uintptr_t)processor->cluster);
+		processor->cluster = cluster;
+		struct cpuinfo_package* package =
+			(struct cpuinfo_package*)((uintptr_t)packages + (uintptr_t)processor->package);
+		processor->package = package;
+
+		/* This can be overwritten by lower-index processors on the same
+		 * package */
+		package->processor_start = processor_id;
+		package->processor_count += 1;
+
+		/* This can be overwritten by lower-index processors on the same
+		 * cluster */
+		cluster->processor_start = processor_id;
+		cluster->processor_count += 1;
+
+		/* This can be overwritten by lower-index processors on the same
+		 * core*/
+		core->processor_start = processor_id;
+		core->processor_count += 1;
+	}
+
+	/* Set vendor/uarch/CPUID information for cores */
+	for (uint32_t i = cores_count; i != 0; i--) {
+		const uint32_t global_core_id = i - 1;
+		struct cpuinfo_core* core = cores + global_core_id;
+		const struct cpuinfo_processor* processor = processors + core->processor_start;
+		struct cpuinfo_package* package = (struct cpuinfo_package*)processor->package;
+		struct cpuinfo_cluster* cluster = (struct cpuinfo_cluster*)processor->cluster;
+
+		core->cluster = cluster;
+		core->package = package;
+		core->core_id = core_bits_mask & (processor->apic_id >> x86_processor.topology.core_bits_offset);
+		core->vendor = x86_processor.vendor;
+		core->uarch = x86_processor.uarch;
+		core->cpuid = x86_processor.cpuid;
+
+		/* This can be overwritten by lower-index cores on the same
+		 * cluster/package
+		 */
+		cluster->core_start = global_core_id;
+		cluster->core_count += 1;
+		package->core_start = global_core_id;
+		package->core_count += 1;
+	}
+
+	for (uint32_t i = 0; i < packages_count; i++) {
+		struct cpuinfo_package* package = packages + i;
+		struct cpuinfo_cluster* cluster = clusters + i;
+
+		cluster->package = package;
+		cluster->vendor = cores[cluster->core_start].vendor;
+		cluster->uarch = cores[cluster->core_start].uarch;
+		cluster->cpuid = cores[cluster->core_start].cpuid;
+		package->cluster_start = i;
+		package->cluster_count = 1;
+		cpuinfo_x86_format_package_name(x86_processor.vendor, brand_string, package->name);
+	}
+
+	/* Count caches */
+	uint32_t l1i_count, l1d_count, l2_count, l3_count, l4_count;
+	cpuinfo_x86_count_caches(
+		processors_count, processors, &x86_processor, &l1i_count, &l1d_count, &l2_count, &l3_count, &l4_count);
+
+	/* Allocate cache descriptions */
+	if (l1i_count != 0) {
+		l1i = HeapAlloc(heap, HEAP_ZERO_MEMORY, l1i_count * sizeof(struct cpuinfo_cache));
+		if (l1i == NULL) {
+			cpuinfo_log_error(
+				"failed to allocate %zu bytes for descriptions of %" PRIu32 " L1I caches",
+				l1i_count * sizeof(struct cpuinfo_cache),
+				l1i_count);
+			goto cleanup;
+		}
+	}
+	if (l1d_count != 0) {
+		l1d = HeapAlloc(heap, HEAP_ZERO_MEMORY, l1d_count * sizeof(struct cpuinfo_cache));
+		if (l1d == NULL) {
+			cpuinfo_log_error(
+				"failed to allocate %zu bytes for descriptions of %" PRIu32 " L1D caches",
+				l1d_count * sizeof(struct cpuinfo_cache),
+				l1d_count);
+			goto cleanup;
+		}
+	}
+	if (l2_count != 0) {
+		l2 = HeapAlloc(heap, HEAP_ZERO_MEMORY, l2_count * sizeof(struct cpuinfo_cache));
+		if (l2 == NULL) {
+			cpuinfo_log_error(
+				"failed to allocate %zu bytes for descriptions of %" PRIu32 " L2 caches",
+				l2_count * sizeof(struct cpuinfo_cache),
+				l2_count);
+			goto cleanup;
+		}
+	}
+	if (l3_count != 0) {
+		l3 = HeapAlloc(heap, HEAP_ZERO_MEMORY, l3_count * sizeof(struct cpuinfo_cache));
+		if (l3 == NULL) {
+			cpuinfo_log_error(
+				"failed to allocate %zu bytes for descriptions of %" PRIu32 " L3 caches",
+				l3_count * sizeof(struct cpuinfo_cache),
+				l3_count);
+			goto cleanup;
+		}
+	}
+	if (l4_count != 0) {
+		l4 = HeapAlloc(heap, HEAP_ZERO_MEMORY, l4_count * sizeof(struct cpuinfo_cache));
+		if (l4 == NULL) {
+			cpuinfo_log_error(
+				"failed to allocate %zu bytes for descriptions of %" PRIu32 " L4 caches",
+				l4_count * sizeof(struct cpuinfo_cache),
+				l4_count);
+			goto cleanup;
+		}
+	}
+
+	/* Set cache information */
+	uint32_t l1i_index = UINT32_MAX, l1d_index = UINT32_MAX, l2_index = UINT32_MAX, l3_index = UINT32_MAX,
+		 l4_index = UINT32_MAX;
+	uint32_t last_l1i_id = UINT32_MAX, last_l1d_id = UINT32_MAX;
+	uint32_t last_l2_id = UINT32_MAX, last_l3_id = UINT32_MAX, last_l4_id = UINT32_MAX;
+	for (uint32_t i = 0; i < processors_count; i++) {
+		const uint32_t apic_id = processors[i].apic_id;
+
+		if (x86_processor.cache.l1i.size != 0) {
+			const uint32_t l1i_id = apic_id & ~bit_mask(x86_processor.cache.l1i.apic_bits);
+			processors[i].cache.l1i = &l1i[l1i_index];
+			if (l1i_id != last_l1i_id) {
+				/* new cache */
+				last_l1i_id = l1i_id;
+				l1i[++l1i_index] = (struct cpuinfo_cache){
+					.size = x86_processor.cache.l1i.size,
+					.associativity = x86_processor.cache.l1i.associativity,
+					.sets = x86_processor.cache.l1i.sets,
+					.partitions = x86_processor.cache.l1i.partitions,
+					.line_size = x86_processor.cache.l1i.line_size,
+					.flags = x86_processor.cache.l1i.flags,
+					.processor_start = i,
+					.processor_count = 1,
+				};
+			} else {
+				/* another processor sharing the same cache */
+				l1i[l1i_index].processor_count += 1;
+			}
+			processors[i].cache.l1i = &l1i[l1i_index];
+		} else {
+			/* reset cache id */
+			last_l1i_id = UINT32_MAX;
+		}
+		if (x86_processor.cache.l1d.size != 0) {
+			const uint32_t l1d_id = apic_id & ~bit_mask(x86_processor.cache.l1d.apic_bits);
+			processors[i].cache.l1d = &l1d[l1d_index];
+			if (l1d_id != last_l1d_id) {
+				/* new cache */
+				last_l1d_id = l1d_id;
+				l1d[++l1d_index] = (struct cpuinfo_cache){
+					.size = x86_processor.cache.l1d.size,
+					.associativity = x86_processor.cache.l1d.associativity,
+					.sets = x86_processor.cache.l1d.sets,
+					.partitions = x86_processor.cache.l1d.partitions,
+					.line_size = x86_processor.cache.l1d.line_size,
+					.flags = x86_processor.cache.l1d.flags,
+					.processor_start = i,
+					.processor_count = 1,
+				};
+			} else {
+				/* another processor sharing the same cache */
+				l1d[l1d_index].processor_count += 1;
+			}
+			processors[i].cache.l1d = &l1d[l1d_index];
+		} else {
+			/* reset cache id */
+			last_l1d_id = UINT32_MAX;
+		}
+		if (x86_processor.cache.l2.size != 0) {
+			const uint32_t l2_id = apic_id & ~bit_mask(x86_processor.cache.l2.apic_bits);
+			processors[i].cache.l2 = &l2[l2_index];
+			if (l2_id != last_l2_id) {
+				/* new cache */
+				last_l2_id = l2_id;
+				l2[++l2_index] = (struct cpuinfo_cache){
+					.size = x86_processor.cache.l2.size,
+					.associativity = x86_processor.cache.l2.associativity,
+					.sets = x86_processor.cache.l2.sets,
+					.partitions = x86_processor.cache.l2.partitions,
+					.line_size = x86_processor.cache.l2.line_size,
+					.flags = x86_processor.cache.l2.flags,
+					.processor_start = i,
+					.processor_count = 1,
+				};
+			} else {
+				/* another processor sharing the same cache */
+				l2[l2_index].processor_count += 1;
+			}
+			processors[i].cache.l2 = &l2[l2_index];
+		} else {
+			/* reset cache id */
+			last_l2_id = UINT32_MAX;
+		}
+		if (x86_processor.cache.l3.size != 0) {
+			const uint32_t l3_id = apic_id & ~bit_mask(x86_processor.cache.l3.apic_bits);
+			processors[i].cache.l3 = &l3[l3_index];
+			if (l3_id != last_l3_id) {
+				/* new cache */
+				last_l3_id = l3_id;
+				l3[++l3_index] = (struct cpuinfo_cache){
+					.size = x86_processor.cache.l3.size,
+					.associativity = x86_processor.cache.l3.associativity,
+					.sets = x86_processor.cache.l3.sets,
+					.partitions = x86_processor.cache.l3.partitions,
+					.line_size = x86_processor.cache.l3.line_size,
+					.flags = x86_processor.cache.l3.flags,
+					.processor_start = i,
+					.processor_count = 1,
+				};
+			} else {
+				/* another processor sharing the same cache */
+				l3[l3_index].processor_count += 1;
+			}
+			processors[i].cache.l3 = &l3[l3_index];
+		} else {
+			/* reset cache id */
+			last_l3_id = UINT32_MAX;
+		}
+		if (x86_processor.cache.l4.size != 0) {
+			const uint32_t l4_id = apic_id & ~bit_mask(x86_processor.cache.l4.apic_bits);
+			processors[i].cache.l4 = &l4[l4_index];
+			if (l4_id != last_l4_id) {
+				/* new cache */
+				last_l4_id = l4_id;
+				l4[++l4_index] = (struct cpuinfo_cache){
+					.size = x86_processor.cache.l4.size,
+					.associativity = x86_processor.cache.l4.associativity,
+					.sets = x86_processor.cache.l4.sets,
+					.partitions = x86_processor.cache.l4.partitions,
+					.line_size = x86_processor.cache.l4.line_size,
+					.flags = x86_processor.cache.l4.flags,
+					.processor_start = i,
+					.processor_count = 1,
+				};
+			} else {
+				/* another processor sharing the same cache */
+				l4[l4_index].processor_count += 1;
+			}
+			processors[i].cache.l4 = &l4[l4_index];
+		} else {
+			/* reset cache id */
+			last_l4_id = UINT32_MAX;
+		}
+	}
+
+	/* Commit changes */
+	cpuinfo_processors = processors;
+	cpuinfo_cores = cores;
+	cpuinfo_clusters = clusters;
+	cpuinfo_packages = packages;
+	cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
+	cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
+	cpuinfo_cache[cpuinfo_cache_level_2] = l2;
+	cpuinfo_cache[cpuinfo_cache_level_3] = l3;
+	cpuinfo_cache[cpuinfo_cache_level_4] = l4;
+
+	cpuinfo_processors_count = processors_count;
+	cpuinfo_cores_count = cores_count;
+	cpuinfo_clusters_count = packages_count;
+	cpuinfo_packages_count = packages_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1i_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1d_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count;
+	cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
+
+	cpuinfo_global_uarch = (struct cpuinfo_uarch_info){
+		.uarch = x86_processor.uarch,
+		.cpuid = x86_processor.cpuid,
+		.processor_count = processors_count,
+		.core_count = cores_count,
+	};
+
+	MemoryBarrier();
+
+	cpuinfo_is_initialized = true;
+
+	processors = NULL;
+	cores = NULL;
+	clusters = NULL;
+	packages = NULL;
+	l1i = l1d = l2 = l3 = l4 = NULL;
+
+cleanup:
+	if (processors != NULL) {
+		HeapFree(heap, 0, processors);
+	}
+	if (cores != NULL) {
+		HeapFree(heap, 0, cores);
+	}
+	if (clusters != NULL) {
+		HeapFree(heap, 0, clusters);
+	}
+	if (packages != NULL) {
+		HeapFree(heap, 0, packages);
+	}
+	if (l1i != NULL) {
+		HeapFree(heap, 0, l1i);
+	}
+	if (l1d != NULL) {
+		HeapFree(heap, 0, l1d);
+	}
+	if (l2 != NULL) {
+		HeapFree(heap, 0, l2);
+	}
+	if (l3 != NULL) {
+		HeapFree(heap, 0, l3);
+	}
+	if (l4 != NULL) {
+		HeapFree(heap, 0, l4);
+	}
+	return TRUE;
+}