git: dbe851cc291b - main - misc/pytorch: Update to 2.1.0

From: Po-Chuan Hsieh <sunpoet_at_FreeBSD.org>
Date: Mon, 27 Nov 2023 10:45:03 UTC
The branch main has been updated by sunpoet:

URL: https://cgit.FreeBSD.org/ports/commit/?id=dbe851cc291bde43def88e727615619c64c2117c

commit dbe851cc291bde43def88e727615619c64c2117c
Author:     Po-Chuan Hsieh <sunpoet@FreeBSD.org>
AuthorDate: 2023-11-27 10:09:26 +0000
Commit:     Po-Chuan Hsieh <sunpoet@FreeBSD.org>
CommitDate: 2023-11-27 10:30:09 +0000

    misc/pytorch: Update to 2.1.0
    
    - Add LICENSE_FILE
    - Remove PYTHON_EXECUTABLE which is set by USES=python now
    - Comment BROKEN_* temporarily to see if the new version builds
    
    Changes:        https://github.com/pytorch/pytorch/releases
    PR:             274859
    Approved by:    maintainer (timeout, 25 days)
---
 misc/pytorch/Makefile                              |  19 +-
 misc/pytorch/distinfo                              |   6 +-
 .../files/patch-aten_src_ATen_native_SoftMax.cpp   |  11 -
 ...native_sparse_ValidateCompressedIndicesCommon.h |  82 ++-
 ...aten_src_ATen_native_transformers_attention.cpp |  11 -
 misc/pytorch/files/patch-cmake_Dependencies.cmake  |  17 +-
 .../patch-third__party_cpuinfo_CMakeLists.txt      |  53 +-
 ...hird__party_kineto_libkineto_src_ThreadUtil.cpp |   4 +-
 misc/pytorch/pkg-plist                             | 784 ++++++++++++++-------
 9 files changed, 637 insertions(+), 350 deletions(-)

diff --git a/misc/pytorch/Makefile b/misc/pytorch/Makefile
index fef8cca67935..841af8cd1dbd 100644
--- a/misc/pytorch/Makefile
+++ b/misc/pytorch/Makefile
@@ -1,9 +1,8 @@
 PORTNAME=	pytorch
 DISTVERSIONPREFIX=	v
-DISTVERSION=	1.13.1
-PORTREVISION=	1
+DISTVERSION=	2.1.0
 CATEGORIES=	misc # machine-learning
-MASTER_SITES=	https://github.com/pytorch/pytorch/releases/download/v1.13.1/
+MASTER_SITES=	https://github.com/pytorch/pytorch/releases/download/v${DISTVERSION}/
 DIST_SUBDIR=	${PORTNAME}
 
 MAINTAINER=	yuri@FreeBSD.org
@@ -11,21 +10,22 @@ COMMENT=	Tensors and dynamic neural networks in Python (C++ library)
 WWW=		https://pytorch.org/
 
 LICENSE=	BSD3CLAUSE
+LICENSE_FILE=	${WRKSRC}/LICENSE
 
-BROKEN_aarch64=	configure fails: CMake Error at third_party/XNNPACK/CMakeLists.txt:94 (MESSAGE): Unrecognized CMAKE_SYSTEM_NAME = FreeBSD
-BROKEN_i386=	compilation fails: error: use of undeclared identifier 'AVX2'
-BROKEN_FreeBSD_14=	fails to compile calling a private constructor of class
+#BROKEN_aarch64=	configure fails: CMake Error at third_party/XNNPACK/CMakeLists.txt:94 (MESSAGE): Unrecognized CMAKE_SYSTEM_NAME = FreeBSD
+#BROKEN_i386=	compilation fails: error: use of undeclared identifier 'AVX2'
+#BROKEN_FreeBSD_14=	fails to compile calling a private constructor of class
 
 BUILD_DEPENDS=	gmake:devel/gmake \
 		${LOCALBASE}/include/fxdiv.h:devel/fxdiv
 LIB_DEPENDS=	libopenblas.so:math/openblas \
 		libmpi.so:net/openmpi \
-		libpthreadpool.so:devel/pthreadpool \
 		libonnx.so:misc/onnx \
+		libpthreadpool.so:devel/pthreadpool \
 		libprotobuf.so:devel/protobuf \
 		libsleef.so:math/sleef
 
-USES=		compiler:c++14-lang cmake localbase:ldflags python # requires python even with PYTHON=off
+USES=		compiler:c++17-lang cmake localbase:ldflags python # requires python even with PYTHON=off
 
 CMAKE_OFF=	BUILD_CUSTOM_PROTOBUF USE_CUDA USE_ROCM  USE_NNPACK USE_QNNPACK USE_PYTORCH_QNNPACK \
 		USE_FBGEMM # workaround recommended by the upstream here: https://github.com/pytorch/pytorch/issues/28337
@@ -33,8 +33,7 @@ CMAKE_ON=	USE_SYSTEM_PYBIND11 \
 		USE_SYSTEM_SLEEF \
 		USE_SYSTEM_ONNX # see other USE_SYSTEM_xx in CMakeLists.txt
 CMAKE_ARGS=	-DPSIMD_SOURCE_DIR=${WRKSRC}/third_party/psimd \
-		-DFREEBSD_PYTHON_VER=${PYTHON_VER} \
-		-DPYTHON_EXECUTABLE:STRING=${PYTHON_CMD}
+		-DFREEBSD_PYTHON_VER=${PYTHON_VER}
 
 MAKE_ENV=	USE_NINJA=no # ninja breaks for some reason
 LDFLAGS+=	-lexecinfo
diff --git a/misc/pytorch/distinfo b/misc/pytorch/distinfo
index 2b0b01afc7df..654de6ea7ac3 100644
--- a/misc/pytorch/distinfo
+++ b/misc/pytorch/distinfo
@@ -1,3 +1,3 @@
-TIMESTAMP = 1672353185
-SHA256 (pytorch/pytorch-v1.13.1.tar.gz) = dbc229ee9750b02b514937d017744443a269ea0241ed3f32b9af0703589d25d4
-SIZE (pytorch/pytorch-v1.13.1.tar.gz) = 234057741
+TIMESTAMP = 1697035721
+SHA256 (pytorch/pytorch-v2.1.0.tar.gz) = 631c71f7f7d6174952f35b5ed4a45ec115720a4ef3eb619678de5893af54f403
+SIZE (pytorch/pytorch-v2.1.0.tar.gz) = 283041980
diff --git a/misc/pytorch/files/patch-aten_src_ATen_native_SoftMax.cpp b/misc/pytorch/files/patch-aten_src_ATen_native_SoftMax.cpp
deleted file mode 100644
index 6f66aece26dc..000000000000
--- a/misc/pytorch/files/patch-aten_src_ATen_native_SoftMax.cpp
+++ /dev/null
@@ -1,11 +0,0 @@
---- aten/src/ATen/native/SoftMax.cpp.orig	2022-12-29 23:05:30 UTC
-+++ aten/src/ATen/native/SoftMax.cpp
-@@ -132,7 +132,7 @@ void host_softmax(
-     const Tensor& input,
-     const int64_t dim,
-     bool* mask = nullptr,
--    const c10::optional<int64_t> mask_type_ = NULL) {
-+    const c10::optional<int64_t> mask_type_ = 0) {
- 
-   if (MaskedSoftMax) {
-     TORCH_CHECK(mask_type_.has_value(), "Mask Type should be defined");
diff --git a/misc/pytorch/files/patch-aten_src_ATen_native_sparse_ValidateCompressedIndicesCommon.h b/misc/pytorch/files/patch-aten_src_ATen_native_sparse_ValidateCompressedIndicesCommon.h
index 3eba700c307c..70dc6fbd61e2 100644
--- a/misc/pytorch/files/patch-aten_src_ATen_native_sparse_ValidateCompressedIndicesCommon.h
+++ b/misc/pytorch/files/patch-aten_src_ATen_native_sparse_ValidateCompressedIndicesCommon.h
@@ -1,80 +1,78 @@
---- aten/src/ATen/native/sparse/ValidateCompressedIndicesCommon.h.orig	2022-12-29 23:09:42 UTC
+--- aten/src/ATen/native/sparse/ValidateCompressedIndicesCommon.h.orig	2023-05-07 08:51:40 UTC
 +++ aten/src/ATen/native/sparse/ValidateCompressedIndicesCommon.h
-@@ -38,7 +38,7 @@ namespace {
- // respectively.
+@@ -39,7 +39,7 @@ namespace {
+ // use `cidx/idx` to refer to `compressed_indices/plain_indices` respectively.
  
  INVARIANT_CHECK_FUNC_API
 -_assert(const bool cond, const char* const message) {
-+__assert__(const bool cond, const char* const message) {
++__assert(const bool cond, const char* const message) {
  #ifdef GPUCC
    CUDA_KERNEL_ASSERT(cond && message);
  #else
-@@ -58,10 +58,10 @@ INVARIANT_CHECK_FUNC_API
- _check_first_cidx_is_zero(const index_t& cidx, const index_t& zero) {
+@@ -57,9 +57,9 @@ INVARIANT_CHECK_FUNC_API _check_first_cidx_is_zero(
+     const index_t& zero) {
    const bool invariant = cidx == zero;
    if (cdim_name == CDimName::CRow) {
 -    _assert(invariant, "`crow_indices[..., 0] == 0` is not satisfied.");
-+    __assert__(invariant, "`crow_indices[..., 0] == 0` is not satisfied.");
-   }
-   else {
++    __assert(invariant, "`crow_indices[..., 0] == 0` is not satisfied.");
+   } else {
 -    _assert(invariant, "`ccol_indices[..., 0] == 0` is not satisfied.");
-+    __assert__(invariant, "`ccol_indices[..., 0] == 0` is not satisfied.");
++    __assert(invariant, "`ccol_indices[..., 0] == 0` is not satisfied.");
    }
  }
  
-@@ -72,10 +72,10 @@ INVARIANT_CHECK_FUNC_API
- _check_last_cidx_is_nnz(const index_t& cidx, const index_t& nnz) {
+@@ -71,9 +71,9 @@ INVARIANT_CHECK_FUNC_API _check_last_cidx_is_nnz(
+     const index_t& nnz) {
    const bool invariant = cidx == nnz;
    if (cdim_name == CDimName::CRow) {
 -    _assert(invariant, "`crow_indices[..., -1] == nnz` is not satisfied.");
-+    __assert__(invariant, "`crow_indices[..., -1] == nnz` is not satisfied.");
-   }
-   else {
++    __assert(invariant, "`crow_indices[..., -1] == nnz` is not satisfied.");
+   } else {
 -    _assert(invariant, "`ccol_indices[..., -1] == nnz` is not satisfied.");
-+    __assert__(invariant, "`ccol_indices[..., -1] == nnz` is not satisfied.");
++    __assert(invariant, "`ccol_indices[..., -1] == nnz` is not satisfied.");
    }
  }
  
-@@ -91,11 +91,11 @@ _check_cidx_nondecreasing_locally_bounded_sequence(
+@@ -88,11 +88,11 @@ INVARIANT_CHECK_FUNC_API _check_cidx_nondecreasing_loc
    const auto s_cidx = cidx_next - cidx;
    const bool invariant = zero <= s_cidx && s_cidx <= dim;
    if (cdim_name == CDimName::CRow) {
--    _assert(invariant,
-+    __assert__(invariant,
+-    _assert(
++    __assert(
+         invariant,
          "`0 <= crow_indices[..., 1:] - crow_indices[..., :-1] <= ncols` is not satisfied.");
-   }
-   else {
--    _assert(invariant,
-+    __assert__(invariant,
+   } else {
+-    _assert(
++    __assert(
+         invariant,
          "`0 <= ccol_indices[..., 1:] - ccol_indices[..., :-1] <= nrows` is not satisfied.");
    }
- }
-@@ -110,10 +110,10 @@ _check_idx_bounds(
+@@ -107,9 +107,9 @@ INVARIANT_CHECK_FUNC_API _check_idx_bounds(
      const index_t& dim) {
    const bool invariant = zero <= idx && idx < dim;
    if (cdim_name == CDimName::CRow) {
 -    _assert(invariant, "`0 <= col_indices < ncols` is not satisfied.");
-+    __assert__(invariant, "`0 <= col_indices < ncols` is not satisfied.");
-   }
-   else {
++    __assert(invariant, "`0 <= col_indices < ncols` is not satisfied.");
+   } else {
 -    _assert(invariant, "`0 <= row_indices < nrows` is not satisfied.");
-+    __assert__(invariant, "`0 <= row_indices < nrows` is not satisfied.");
++    __assert(invariant, "`0 <= row_indices < nrows` is not satisfied.");
    }
  }
  
-@@ -133,13 +133,13 @@ _check_idx_sorted_distinct_vals_slices_with_cidx(
+@@ -128,14 +128,14 @@ INVARIANT_CHECK_FUNC_API _check_idx_sorted_distinct_va
    for (auto* RESTRICT curr = slice_begin + 1; curr < slice_end; ++curr) {
      const auto invariant = *(curr - 1) < *curr;
      if (cdim_name == CDimName::CRow) {
--      _assert(invariant, "`col_indices[..., crow_indices[..., i - 1]:crow_indices[..., i]] "
-+      __assert__(invariant, "`col_indices[..., crow_indices[..., i - 1]:crow_indices[..., i]] "
-                          "for all i = 1, ..., nrows "
-                          "are sorted and distinct along the last dimension values` "
-                          "is not satisfied.");
-     }
-     else {
--      _assert(invariant, "`row_indices[..., ccol_indices[..., i - 1]:ccol_indices[..., i]] "
-+      __assert__(invariant, "`row_indices[..., ccol_indices[..., i - 1]:ccol_indices[..., i]] "
-                          "for all i = 1, ..., ncols "
-                          "are sorted and distinct along the last dimension values` "
-                          "is not satisfied.");
+-      _assert(
++      __assert(
+           invariant,
+           "`col_indices[..., crow_indices[..., i - 1]:crow_indices[..., i]] "
+           "for all i = 1, ..., nrows "
+           "are sorted and distinct along the last dimension values` "
+           "is not satisfied.");
+     } else {
+-      _assert(
++      __assert(
+           invariant,
+           "`row_indices[..., ccol_indices[..., i - 1]:ccol_indices[..., i]] "
+           "for all i = 1, ..., ncols "
diff --git a/misc/pytorch/files/patch-aten_src_ATen_native_transformers_attention.cpp b/misc/pytorch/files/patch-aten_src_ATen_native_transformers_attention.cpp
deleted file mode 100644
index 0f3383d2260a..000000000000
--- a/misc/pytorch/files/patch-aten_src_ATen_native_transformers_attention.cpp
+++ /dev/null
@@ -1,11 +0,0 @@
---- aten/src/ATen/native/transformers/attention.cpp.orig	2022-12-29 23:18:12 UTC
-+++ aten/src/ATen/native/transformers/attention.cpp
-@@ -118,7 +118,7 @@ Tensor masked_softmax(
-     Tensor& attn_scores,
-     c10::optional<Tensor> attn_mask,
-     const Tensor& query,
--    c10::optional<int64_t> mask_type = NULL) {
-+    c10::optional<int64_t> mask_type = 0) {
-   if (query.is_nested() && !attn_mask) {
-     return at::_nested_tensor_softmax_with_shape(attn_scores, query);
-   }
diff --git a/misc/pytorch/files/patch-cmake_Dependencies.cmake b/misc/pytorch/files/patch-cmake_Dependencies.cmake
index 165534d123bd..0b1436e51889 100644
--- a/misc/pytorch/files/patch-cmake_Dependencies.cmake
+++ b/misc/pytorch/files/patch-cmake_Dependencies.cmake
@@ -1,11 +1,8 @@
---- cmake/Dependencies.cmake.orig	2022-12-16 00:23:46 UTC
+--- cmake/Dependencies.cmake.orig	2023-05-08 19:58:16 UTC
 +++ cmake/Dependencies.cmake
-@@ -339,7 +339,7 @@ if(USE_NNPACK OR USE_QNNPACK OR USE_PYTORCH_QNNPACK OR
-       set(DISABLE_NNPACK_AND_FAMILY ON)
-     endif()
-   else()
--    if(NOT IOS AND NOT (CMAKE_SYSTEM_NAME MATCHES "^(Android|Linux|Darwin|Windows)$"))
-+    if(NOT IOS AND NOT (CMAKE_SYSTEM_NAME MATCHES "^(Android|Linux|FreeBSD|Darwin|Windows)$"))
-       message(WARNING
-         "Target platform \"${CMAKE_SYSTEM_NAME}\" is not supported in {Q/X}NNPACK. "
-         "Supported platforms are Android, iOS, Linux, and macOS. "
+@@ -1,3 +1,5 @@
++set(CMAKE_CXX_STANDARD 17)
++
+ # RPATH stuff
+ # see https://cmake.org/Wiki/CMake_RPATH_handling
+ if(APPLE)
diff --git a/misc/pytorch/files/patch-third__party_cpuinfo_CMakeLists.txt b/misc/pytorch/files/patch-third__party_cpuinfo_CMakeLists.txt
index 755ebd06f98a..059eaaedadcf 100644
--- a/misc/pytorch/files/patch-third__party_cpuinfo_CMakeLists.txt
+++ b/misc/pytorch/files/patch-third__party_cpuinfo_CMakeLists.txt
@@ -1,11 +1,56 @@
---- third_party/cpuinfo/CMakeLists.txt.orig	2022-12-16 00:23:47 UTC
+--- third_party/cpuinfo/CMakeLists.txt.orig	2023-10-16 12:32:17 UTC
 +++ third_party/cpuinfo/CMakeLists.txt
-@@ -77,7 +77,7 @@ IF(NOT CMAKE_SYSTEM_NAME)
+@@ -77,7 +77,7 @@ IF(NOT CMAKE_SYSTEM_PROCESSOR)
+       "cpuinfo will compile, but cpuinfo_initialize() will always fail.")
+     SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
+   ENDIF()
+-ELSEIF(NOT CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?|armv[5-8].*|aarch64|arm64|ARM64)$")
++ELSEIF(NOT CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|amd64|x86(_64)?|armv[5-8].*|aarch64|arm64|ARM64)$")
+   MESSAGE(WARNING
+     "Target processor architecture \"${CPUINFO_TARGET_PROCESSOR}\" is not supported in cpuinfo. "
+     "cpuinfo will compile, but cpuinfo_initialize() will always fail.")
+@@ -89,7 +89,7 @@ IF(NOT CMAKE_SYSTEM_NAME)
        "Target operating system is not specified. "
        "cpuinfo will compile, but cpuinfo_initialize() will always fail.")
    SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
--ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS|Darwin|Linux|Android)$")
-+ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS|Darwin|Linux|FreeBSD|Android)$")
+-ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Windows|WindowsStore|CYGWIN|MSYS|Darwin|Linux|Android)$")
++ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Windows|WindowsStore|CYGWIN|MSYS|Darwin|Linux|FreeBSD|Android)$")
    IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14" AND NOT CMAKE_SYSTEM_NAME STREQUAL "iOS")
      MESSAGE(WARNING
        "Target operating system \"${CMAKE_SYSTEM_NAME}\" is not supported in cpuinfo. "
+@@ -135,7 +135,7 @@ IF(CPUINFO_SUPPORTED_PLATFORM)
+   src/cache.c)
+ 
+ IF(CPUINFO_SUPPORTED_PLATFORM)
+-  IF(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND (CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$"))
++  IF(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND (CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|amd64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$"))
+     LIST(APPEND CPUINFO_SRCS
+       src/x86/init.c
+       src/x86/info.c
+@@ -341,7 +341,7 @@ IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_MOCK_T
+ # ---[ cpuinfo mock library and mock tests
+ IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_MOCK_TESTS)
+   SET(CPUINFO_MOCK_SRCS "${CPUINFO_SRCS}")
+-  IF(CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$")
++  IF(CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|amd64|x86(_64)?)$")
+     LIST(APPEND CPUINFO_MOCK_SRCS src/x86/mockcpuid.c)
+   ENDIF()
+   IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
+@@ -785,7 +785,7 @@ IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_UNIT_T
+     ADD_TEST(NAME get-current-test COMMAND get-current-test)
+   ENDIF()
+ 
+-  IF(CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$")
++  IF(CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|amd64|x86(_64)?)$")
+     ADD_EXECUTABLE(brand-string-test test/name/brand-string.cc)
+     CPUINFO_TARGET_ENABLE_CXX11(brand-string-test)
+     CPUINFO_TARGET_RUNTIME_LIBRARY(brand-string-test)
+@@ -852,7 +852,7 @@ IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_TOOLS)
+     CPUINFO_TARGET_RUNTIME_LIBRARY(cpuinfo-dump)
+   ENDIF()
+ 
+-  IF(CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$")
++  IF(CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|amd64|x86(_64)?)$")
+     ADD_EXECUTABLE(cpuid-dump tools/cpuid-dump.c)
+     CPUINFO_TARGET_ENABLE_C99(cpuid-dump)
+     CPUINFO_TARGET_RUNTIME_LIBRARY(cpuid-dump)
diff --git a/misc/pytorch/files/patch-third__party_kineto_libkineto_src_ThreadUtil.cpp b/misc/pytorch/files/patch-third__party_kineto_libkineto_src_ThreadUtil.cpp
index e6ec0eca3790..f014a26e4f20 100644
--- a/misc/pytorch/files/patch-third__party_kineto_libkineto_src_ThreadUtil.cpp
+++ b/misc/pytorch/files/patch-third__party_kineto_libkineto_src_ThreadUtil.cpp
@@ -1,6 +1,6 @@
---- third_party/kineto/libkineto/src/ThreadUtil.cpp.orig	2022-12-29 22:41:51 UTC
+--- third_party/kineto/libkineto/src/ThreadUtil.cpp.orig	2023-04-03 19:46:02 UTC
 +++ third_party/kineto/libkineto/src/ThreadUtil.cpp
-@@ -49,7 +49,7 @@ int32_t systemThreadId() {
+@@ -57,7 +57,7 @@ int32_t systemThreadId() {
  #elif defined _MSC_VER
      _sysTid = (int32_t)GetCurrentThreadId();
  #else
diff --git a/misc/pytorch/pkg-plist b/misc/pytorch/pkg-plist
index 95479410cab7..ea3ba93c4270 100644
--- a/misc/pytorch/pkg-plist
+++ b/misc/pytorch/pkg-plist
@@ -4,8 +4,6 @@ include/ATen/AccumulateType.h
 include/ATen/ArrayRef.h
 include/ATen/Backend.h
 include/ATen/Backtrace.h
-include/ATen/BatchedFallback.h
-include/ATen/BatchedTensorImpl.h
 include/ATen/CPUApplyUtils.h
 include/ATen/CPUFixedAllocator.h
 include/ATen/CPUFunctions.h
@@ -13,6 +11,7 @@ include/ATen/CPUFunctions_inl.h
 include/ATen/CPUGeneratorImpl.h
 include/ATen/CUDAFunctions.h
 include/ATen/CUDAFunctions_inl.h
+include/ATen/CachedTensorUtils.h
 include/ATen/CollapseDims.h
 include/ATen/CompositeExplicitAutogradFunctions.h
 include/ATen/CompositeExplicitAutogradFunctions_inl.h
@@ -43,6 +42,10 @@ include/ATen/Generator.h
 include/ATen/InferSize.h
 include/ATen/InitialTensorOptions.h
 include/ATen/Layout.h
+include/ATen/LegacyBatchedFallback.h
+include/ATen/LegacyBatchedTensorImpl.h
+include/ATen/LegacyVmapMode.h
+include/ATen/LegacyVmapTransforms.h
 include/ATen/LinalgBackend.h
 include/ATen/MapAllocator.h
 include/ATen/MatrixRef.h
@@ -79,8 +82,8 @@ include/ATen/SmallVector.h
 include/ATen/SparseCsrTensorImpl.h
 include/ATen/SparseCsrTensorUtils.h
 include/ATen/SparseTensorImpl.h
-include/ATen/SparseTensorUtils.h
 include/ATen/Storage.h
+include/ATen/StorageUtils.h
 include/ATen/Tensor.h
 include/ATen/TensorAccessor.h
 include/ATen/TensorGeometry.h
@@ -93,14 +96,13 @@ include/ATen/TensorOperators.h
 include/ATen/TensorOptions.h
 include/ATen/TensorSubclassLikeUtils.h
 include/ATen/TensorUtils.h
+include/ATen/ThreadLocalPythonObjects.h
 include/ATen/ThreadLocalState.h
 include/ATen/TracerMode.h
 include/ATen/TypeDefault.h
 include/ATen/Utils.h
 include/ATen/Version.h
 include/ATen/VmapGeneratedPlumbing.h
-include/ATen/VmapMode.h
-include/ATen/VmapTransforms.h
 include/ATen/WrapDimUtils.h
 include/ATen/WrapDimUtilsMulti.h
 include/ATen/autocast_mode.h
@@ -122,6 +124,7 @@ include/ATen/core/Dimname.h
 include/ATen/core/DistributionsHelper.h
 include/ATen/core/Formatting.h
 include/ATen/core/Generator.h
+include/ATen/core/GeneratorForPrivateuseone.h
 include/ATen/core/IListRef.h
 include/ATen/core/IListRef_inl.h
 include/ATen/core/LegacyTypeDispatch.h
@@ -131,6 +134,7 @@ include/ATen/core/MT19937RNGEngine.h
 include/ATen/core/NamedTensor.h
 include/ATen/core/PhiloxRNGEngine.h
 include/ATen/core/PythonFallbackKernel.h
+include/ATen/core/PythonOpRegistrationTrampoline.h
 include/ATen/core/QuantizerBase.h
 include/ATen/core/Range.h
 include/ATen/core/Reduction.h
@@ -199,6 +203,7 @@ include/ATen/core/type_ptr.h
 include/ATen/core/typeid.h
 include/ATen/cpp_custom_type_hack.h
 include/ATen/cpu/FlushDenormal.h
+include/ATen/cpu/Utils.h
 include/ATen/cpu/vec/functional.h
 include/ATen/cpu/vec/functional_base.h
 include/ATen/cpu/vec/functional_bfloat16.h
@@ -215,6 +220,19 @@ include/ATen/cpu/vec/vec256/vec256_float.h
 include/ATen/cpu/vec/vec256/vec256_float_neon.h
 include/ATen/cpu/vec/vec256/vec256_int.h
 include/ATen/cpu/vec/vec256/vec256_qint.h
+include/ATen/cpu/vec/vec256/vsx/vec256_bfloat16_vsx.h
+include/ATen/cpu/vec/vec256/vsx/vec256_common_vsx.h
+include/ATen/cpu/vec/vec256/vsx/vec256_complex_double_vsx.h
+include/ATen/cpu/vec/vec256/vsx/vec256_complex_float_vsx.h
+include/ATen/cpu/vec/vec256/vsx/vec256_double_vsx.h
+include/ATen/cpu/vec/vec256/vsx/vec256_float_vsx.h
+include/ATen/cpu/vec/vec256/vsx/vec256_int16_vsx.h
+include/ATen/cpu/vec/vec256/vsx/vec256_int32_vsx.h
+include/ATen/cpu/vec/vec256/vsx/vec256_int64_vsx.h
+include/ATen/cpu/vec/vec256/vsx/vec256_qint32_vsx.h
+include/ATen/cpu/vec/vec256/vsx/vec256_qint8_vsx.h
+include/ATen/cpu/vec/vec256/vsx/vec256_quint8_vsx.h
+include/ATen/cpu/vec/vec256/vsx/vsx_helpers.h
 include/ATen/cpu/vec/vec512/vec512.h
 include/ATen/cpu/vec/vec512/vec512_bfloat16.h
 include/ATen/cpu/vec/vec512/vec512_complex_double.h
@@ -279,7 +297,11 @@ include/ATen/cudnn/cudnn-wrapper.h
 include/ATen/detail/CUDAHooksInterface.h
 include/ATen/detail/FunctionTraits.h
 include/ATen/detail/HIPHooksInterface.h
+include/ATen/detail/MPSHooksInterface.h
+include/ATen/detail/MTIAHooksInterface.h
 include/ATen/detail/ORTHooksInterface.h
+include/ATen/detail/PrivateUse1HooksInterface.h
+include/ATen/detail/XPUHooksInterface.h
 include/ATen/div_rtn.h
 include/ATen/dlpack.h
 include/ATen/functorch/ADInterpreters.h
@@ -308,6 +330,17 @@ include/ATen/miopen/Handle.h
 include/ATen/miopen/Types.h
 include/ATen/miopen/Utils.h
 include/ATen/miopen/miopen-wrapper.h
+include/ATen/mps/EmptyTensor.h
+include/ATen/mps/IndexKernels.h
+include/ATen/mps/MPSAllocator.h
+include/ATen/mps/MPSAllocatorInterface.h
+include/ATen/mps/MPSDevice.h
+include/ATen/mps/MPSEvent.h
+include/ATen/mps/MPSGeneratorImpl.h
+include/ATen/mps/MPSGuardImpl.h
+include/ATen/mps/MPSHooks.h
+include/ATen/mps/MPSProfiler.h
+include/ATen/mps/MPSStream.h
 include/ATen/native/Activation.h
 include/ATen/native/AdaptivePooling.h
 include/ATen/native/BatchLinearAlgebra.h
@@ -331,6 +364,7 @@ include/ATen/native/Distributions.h
 include/ATen/native/EmbeddingBag.h
 include/ATen/native/Fill.h
 include/ATen/native/ForeachUtils.h
+include/ATen/native/FractionalMaxPooling.h
 include/ATen/native/FunctionOfAMatrixUtils.h
 include/ATen/native/GridSampler.h
 include/ATen/native/GridSamplerUtils.h
@@ -348,6 +382,7 @@ include/ATen/native/MaxPooling.h
 include/ATen/native/NonEmptyUtils.h
 include/ATen/native/NonSymbolicBC.h
 include/ATen/native/Normalization.h
+include/ATen/native/Padding.h
 include/ATen/native/PointwiseOps.h
 include/ATen/native/Pool.h
 include/ATen/native/Pow.h
@@ -356,6 +391,7 @@ include/ATen/native/RangeFactories.h
 include/ATen/native/ReduceAllOps.h
 include/ATen/native/ReduceOps.h
 include/ATen/native/ReduceOpsUtils.h
+include/ATen/native/ReductionType.h
 include/ATen/native/Repeat.h
 include/ATen/native/Resize.h
 include/ATen/native/ResizeCommon.h
@@ -365,6 +401,7 @@ include/ATen/native/SharedReduceOps.h
 include/ATen/native/SobolEngineOpsUtils.h
 include/ATen/native/Sorting.h
 include/ATen/native/SortingUtils.h
+include/ATen/native/SparseTensorUtils.h
 include/ATen/native/SpectralOpsUtils.h
 include/ATen/native/StridedRandomAccessor.h
 include/ATen/native/TensorAdvancedIndexing.h
@@ -401,13 +438,18 @@ include/ATen/native/cpu/GridSamplerKernel.h
 include/ATen/native/cpu/IndexKernelUtils.h
 include/ATen/native/cpu/Intrinsics.h
 include/ATen/native/cpu/IsContiguous.h
+include/ATen/native/cpu/LogAddExp.h
 include/ATen/native/cpu/Loops.h
 include/ATen/native/cpu/MaxUnpoolKernel.h
 include/ATen/native/cpu/PixelShuffleKernel.h
 include/ATen/native/cpu/Reduce.h
+include/ATen/native/cpu/ReduceUtils.h
+include/ATen/native/cpu/SampledAddmmKernel.h
 include/ATen/native/cpu/SerialStackImpl.h
 include/ATen/native/cpu/SoftmaxKernel.h
+include/ATen/native/cpu/SpmmReduceKernel.h
 include/ATen/native/cpu/StackKernel.h
+include/ATen/native/cpu/UpSampleKernelAVXAntialias.h
 include/ATen/native/cpu/WeightNormKernel.h
 include/ATen/native/cpu/avx_mathfun.h
 include/ATen/native/cpu/mixed_data_type.h
@@ -427,6 +469,7 @@ include/ATen/native/cuda/DistributionTemplates.h
 include/ATen/native/cuda/Distributions.h
 include/ATen/native/cuda/EmbeddingBackwardKernel.cuh
 include/ATen/native/cuda/ForeachFunctors.cuh
+include/ATen/native/cuda/ForeachMinMaxFunctors.cuh
 include/ATen/native/cuda/GridSampler.cuh
 include/ATen/native/cuda/GridSampler.h
 include/ATen/native/cuda/IndexKernel.h
@@ -463,6 +506,8 @@ include/ATen/native/cuda/block_reduce.cuh
 include/ATen/native/cuda/fused_adam_amsgrad_impl.cuh
 include/ATen/native/cuda/fused_adam_impl.cuh
 include/ATen/native/cuda/fused_adam_utils.cuh
+include/ATen/native/cuda/fused_adamw_amsgrad_impl.cuh
+include/ATen/native/cuda/fused_adamw_impl.cuh
 include/ATen/native/cuda/im2col.cuh
 include/ATen/native/cuda/jit_utils.h
 include/ATen/native/cuda/reduction_template.cuh
@@ -472,8 +517,15 @@ include/ATen/native/group_norm.h
 include/ATen/native/im2col.h
 include/ATen/native/im2col_shape_check.h
 include/ATen/native/layer_norm.h
+include/ATen/native/mps/Copy.h
+include/ATen/native/mps/MPSGraphVenturaOps.h
+include/ATen/native/mps/OperationUtils.h
+include/ATen/native/mps/TensorFactory.h
+include/ATen/native/mps/UnaryConstants.h
+include/ATen/native/mps/operations/Indexing.h
 include/ATen/native/quantized/AffineQuantizer.h
 include/ATen/native/quantized/AffineQuantizerBase.h
+include/ATen/native/quantized/ConvUtils.h
 include/ATen/native/quantized/Copy.h
 include/ATen/native/quantized/FakeQuantAffine.h
 include/ATen/native/quantized/IndexKernel.h
@@ -492,6 +544,9 @@ include/ATen/native/quantized/cpu/init_qnnpack.h
 include/ATen/native/quantized/cpu/qembeddingbag.h
 include/ATen/native/quantized/cpu/qembeddingbag_prepack.h
 include/ATen/native/quantized/cudnn/utils.h
+include/ATen/native/utils/Factory.h
+include/ATen/native/utils/ParamUtils.h
+include/ATen/native/utils/ParamsHash.h
 include/ATen/native/verbose_wrapper.h
 include/ATen/native/vol2col.h
 include/ATen/ops/_adaptive_avg_pool2d.h
@@ -704,6 +759,14 @@ include/ATen/ops/_copy_from_and_resize_ops.h
 include/ATen/ops/_copy_from_compositeexplicitautograd_dispatch.h
 include/ATen/ops/_copy_from_native.h
 include/ATen/ops/_copy_from_ops.h
+include/ATen/ops/_cslt_compress.h
+include/ATen/ops/_cslt_compress_cuda_dispatch.h
+include/ATen/ops/_cslt_compress_native.h
+include/ATen/ops/_cslt_compress_ops.h
+include/ATen/ops/_cslt_sparse_mm.h
+include/ATen/ops/_cslt_sparse_mm_cuda_dispatch.h
+include/ATen/ops/_cslt_sparse_mm_native.h
+include/ATen/ops/_cslt_sparse_mm_ops.h
 include/ATen/ops/_ctc_loss.h
 include/ATen/ops/_ctc_loss_backward.h
 include/ATen/ops/_ctc_loss_backward_compositeexplicitautograd_dispatch.h
@@ -787,10 +850,19 @@ include/ATen/ops/_dirichlet_grad_cpu_dispatch.h
 include/ATen/ops/_dirichlet_grad_cuda_dispatch.h
 include/ATen/ops/_dirichlet_grad_native.h
 include/ATen/ops/_dirichlet_grad_ops.h
+include/ATen/ops/_efficient_attention_backward.h
+include/ATen/ops/_efficient_attention_backward_cuda_dispatch.h
+include/ATen/ops/_efficient_attention_backward_native.h
+include/ATen/ops/_efficient_attention_backward_ops.h
+include/ATen/ops/_efficient_attention_forward.h
+include/ATen/ops/_efficient_attention_forward_cuda_dispatch.h
+include/ATen/ops/_efficient_attention_forward_native.h
+include/ATen/ops/_efficient_attention_forward_ops.h
 include/ATen/ops/_efficientzerotensor.h
 include/ATen/ops/_efficientzerotensor_compositeexplicitautograd_dispatch.h
 include/ATen/ops/_efficientzerotensor_cpu_dispatch.h
 include/ATen/ops/_efficientzerotensor_cuda_dispatch.h
+include/ATen/ops/_efficientzerotensor_meta_dispatch.h
 include/ATen/ops/_efficientzerotensor_native.h
 include/ATen/ops/_efficientzerotensor_ops.h
 include/ATen/ops/_embedding_bag.h
@@ -882,10 +954,19 @@ include/ATen/ops/_fft_r2c_cpu_dispatch.h
 include/ATen/ops/_fft_r2c_cuda_dispatch.h
 include/ATen/ops/_fft_r2c_native.h
 include/ATen/ops/_fft_r2c_ops.h
-include/ATen/ops/_flash_scaled_dot_product_attention.h
-include/ATen/ops/_flash_scaled_dot_product_attention_cuda_dispatch.h
-include/ATen/ops/_flash_scaled_dot_product_attention_native.h
-include/ATen/ops/_flash_scaled_dot_product_attention_ops.h
+include/ATen/ops/_fill_mem_eff_dropout_mask.h
+include/ATen/ops/_fill_mem_eff_dropout_mask_cuda_dispatch.h
+include/ATen/ops/_fill_mem_eff_dropout_mask_meta_dispatch.h
+include/ATen/ops/_fill_mem_eff_dropout_mask_native.h
+include/ATen/ops/_fill_mem_eff_dropout_mask_ops.h
+include/ATen/ops/_flash_attention_backward.h
+include/ATen/ops/_flash_attention_backward_cuda_dispatch.h
+include/ATen/ops/_flash_attention_backward_native.h
+include/ATen/ops/_flash_attention_backward_ops.h
+include/ATen/ops/_flash_attention_forward.h
+include/ATen/ops/_flash_attention_forward_cuda_dispatch.h
+include/ATen/ops/_flash_attention_forward_native.h
+include/ATen/ops/_flash_attention_forward_ops.h
 include/ATen/ops/_foobar.h
 include/ATen/ops/_foobar_compositeexplicitautograd_dispatch.h
 include/ATen/ops/_foobar_cpu_dispatch.h
@@ -939,6 +1020,24 @@ include/ATen/ops/_foreach_ceil_cpu_dispatch.h
 include/ATen/ops/_foreach_ceil_cuda_dispatch.h
 include/ATen/ops/_foreach_ceil_native.h
 include/ATen/ops/_foreach_ceil_ops.h
+include/ATen/ops/_foreach_clamp_max.h
+include/ATen/ops/_foreach_clamp_max_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_foreach_clamp_max_cpu_dispatch.h
+include/ATen/ops/_foreach_clamp_max_cuda_dispatch.h
+include/ATen/ops/_foreach_clamp_max_native.h
+include/ATen/ops/_foreach_clamp_max_ops.h
+include/ATen/ops/_foreach_clamp_min.h
+include/ATen/ops/_foreach_clamp_min_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_foreach_clamp_min_cpu_dispatch.h
+include/ATen/ops/_foreach_clamp_min_cuda_dispatch.h
+include/ATen/ops/_foreach_clamp_min_native.h
+include/ATen/ops/_foreach_clamp_min_ops.h
+include/ATen/ops/_foreach_copy.h
+include/ATen/ops/_foreach_copy_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_foreach_copy_cpu_dispatch.h
+include/ATen/ops/_foreach_copy_cuda_dispatch.h
+include/ATen/ops/_foreach_copy_native.h
+include/ATen/ops/_foreach_copy_ops.h
 include/ATen/ops/_foreach_cos.h
 include/ATen/ops/_foreach_cos_compositeexplicitautograd_dispatch.h
 include/ATen/ops/_foreach_cos_cpu_dispatch.h
@@ -993,6 +1092,12 @@ include/ATen/ops/_foreach_frac_cpu_dispatch.h
 include/ATen/ops/_foreach_frac_cuda_dispatch.h
 include/ATen/ops/_foreach_frac_native.h
 include/ATen/ops/_foreach_frac_ops.h
+include/ATen/ops/_foreach_lerp.h
+include/ATen/ops/_foreach_lerp_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_foreach_lerp_cpu_dispatch.h
+include/ATen/ops/_foreach_lerp_cuda_dispatch.h
+include/ATen/ops/_foreach_lerp_native.h
+include/ATen/ops/_foreach_lerp_ops.h
 include/ATen/ops/_foreach_lgamma.h
 include/ATen/ops/_foreach_lgamma_compositeexplicitautograd_dispatch.h
 include/ATen/ops/_foreach_lgamma_cpu_dispatch.h
@@ -1053,6 +1158,12 @@ include/ATen/ops/_foreach_norm_cpu_dispatch.h
 include/ATen/ops/_foreach_norm_cuda_dispatch.h
 include/ATen/ops/_foreach_norm_native.h
 include/ATen/ops/_foreach_norm_ops.h
+include/ATen/ops/_foreach_pow.h
+include/ATen/ops/_foreach_pow_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_foreach_pow_cpu_dispatch.h
+include/ATen/ops/_foreach_pow_cuda_dispatch.h
+include/ATen/ops/_foreach_pow_native.h
+include/ATen/ops/_foreach_pow_ops.h
 include/ATen/ops/_foreach_reciprocal.h
 include/ATen/ops/_foreach_reciprocal_compositeexplicitautograd_dispatch.h
 include/ATen/ops/_foreach_reciprocal_cpu_dispatch.h
@@ -1071,6 +1182,12 @@ include/ATen/ops/_foreach_sigmoid_cpu_dispatch.h
 include/ATen/ops/_foreach_sigmoid_cuda_dispatch.h
 include/ATen/ops/_foreach_sigmoid_native.h
 include/ATen/ops/_foreach_sigmoid_ops.h
+include/ATen/ops/_foreach_sign.h
+include/ATen/ops/_foreach_sign_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_foreach_sign_cpu_dispatch.h
+include/ATen/ops/_foreach_sign_cuda_dispatch.h
+include/ATen/ops/_foreach_sign_native.h
+include/ATen/ops/_foreach_sign_ops.h
 include/ATen/ops/_foreach_sin.h
 include/ATen/ops/_foreach_sin_compositeexplicitautograd_dispatch.h
 include/ATen/ops/_foreach_sin_cpu_dispatch.h
@@ -1119,11 +1236,28 @@ include/ATen/ops/_foreach_zero_cpu_dispatch.h
 include/ATen/ops/_foreach_zero_cuda_dispatch.h
 include/ATen/ops/_foreach_zero_native.h
 include/ATen/ops/_foreach_zero_ops.h
+include/ATen/ops/_functional_assert_async.h
+include/ATen/ops/_functional_assert_async_cpu_dispatch.h
+include/ATen/ops/_functional_assert_async_native.h
+include/ATen/ops/_functional_assert_async_ops.h
+include/ATen/ops/_functional_sym_constrain_range.h
+include/ATen/ops/_functional_sym_constrain_range_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_functional_sym_constrain_range_for_size.h
+include/ATen/ops/_functional_sym_constrain_range_for_size_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_functional_sym_constrain_range_for_size_native.h
+include/ATen/ops/_functional_sym_constrain_range_for_size_ops.h
+include/ATen/ops/_functional_sym_constrain_range_native.h
+include/ATen/ops/_functional_sym_constrain_range_ops.h
 include/ATen/ops/_fused_adam.h
 include/ATen/ops/_fused_adam_compositeexplicitautograd_dispatch.h
 include/ATen/ops/_fused_adam_cuda_dispatch.h
 include/ATen/ops/_fused_adam_native.h
 include/ATen/ops/_fused_adam_ops.h
+include/ATen/ops/_fused_adamw.h
+include/ATen/ops/_fused_adamw_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_fused_adamw_cuda_dispatch.h
+include/ATen/ops/_fused_adamw_native.h
+include/ATen/ops/_fused_adamw_ops.h
 include/ATen/ops/_fused_dropout.h
 include/ATen/ops/_fused_dropout_compositeexplicitautograd_dispatch.h
 include/ATen/ops/_fused_dropout_cuda_dispatch.h
@@ -1135,6 +1269,12 @@ include/ATen/ops/_fused_moving_avg_obs_fq_helper_cpu_dispatch.h
 include/ATen/ops/_fused_moving_avg_obs_fq_helper_cuda_dispatch.h
 include/ATen/ops/_fused_moving_avg_obs_fq_helper_native.h
 include/ATen/ops/_fused_moving_avg_obs_fq_helper_ops.h
+include/ATen/ops/_fused_sdp_choice.h
+include/ATen/ops/_fused_sdp_choice_cpu_dispatch.h
+include/ATen/ops/_fused_sdp_choice_cuda_dispatch.h
+include/ATen/ops/_fused_sdp_choice_meta_dispatch.h
+include/ATen/ops/_fused_sdp_choice_native.h
+include/ATen/ops/_fused_sdp_choice_ops.h
 include/ATen/ops/_fw_primal.h
 include/ATen/ops/_fw_primal_compositeexplicitautograd_dispatch.h
 include/ATen/ops/_fw_primal_copy.h
@@ -1194,6 +1334,18 @@ include/ATen/ops/_indices_copy_native.h
 include/ATen/ops/_indices_copy_ops.h
 include/ATen/ops/_indices_native.h
 include/ATen/ops/_indices_ops.h
+include/ATen/ops/_int_mm.h
+include/ATen/ops/_int_mm_cuda_dispatch.h
+include/ATen/ops/_int_mm_native.h
+include/ATen/ops/_int_mm_ops.h
+include/ATen/ops/_is_all_true.h
+include/ATen/ops/_is_all_true_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_is_all_true_native.h
+include/ATen/ops/_is_all_true_ops.h
+include/ATen/ops/_is_any_true.h
+include/ATen/ops/_is_any_true_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_is_any_true_native.h
+include/ATen/ops/_is_any_true_ops.h
 include/ATen/ops/_is_zerotensor.h
 include/ATen/ops/_is_zerotensor_compositeimplicitautograd_dispatch.h
 include/ATen/ops/_is_zerotensor_native.h
@@ -1276,6 +1428,10 @@ include/ATen/ops/_lu_with_info.h
 include/ATen/ops/_lu_with_info_compositeimplicitautograd_dispatch.h
 include/ATen/ops/_lu_with_info_native.h
 include/ATen/ops/_lu_with_info_ops.h
+include/ATen/ops/_make_dep_token.h
+include/ATen/ops/_make_dep_token_cpu_dispatch.h
+include/ATen/ops/_make_dep_token_native.h
+include/ATen/ops/_make_dep_token_ops.h
 include/ATen/ops/_make_dual.h
 include/ATen/ops/_make_dual_compositeexplicitautograd_dispatch.h
 include/ATen/ops/_make_dual_copy.h
@@ -1331,16 +1487,16 @@ include/ATen/ops/_mps_convolution_transpose.h
 include/ATen/ops/_mps_convolution_transpose_compositeexplicitautograd_dispatch.h
 include/ATen/ops/_mps_convolution_transpose_native.h
 include/ATen/ops/_mps_convolution_transpose_ops.h
-include/ATen/ops/_mps_max_pool2d.h
-include/ATen/ops/_mps_max_pool2d_compositeexplicitautograd_dispatch.h
-include/ATen/ops/_mps_max_pool2d_native.h
-include/ATen/ops/_mps_max_pool2d_ops.h
-include/ATen/ops/_native_decoder_only_multi_head_attention.h
-include/ATen/ops/_native_decoder_only_multi_head_attention_compositeexplicitautograd_dispatch.h
-include/ATen/ops/_native_decoder_only_multi_head_attention_cpu_dispatch.h
-include/ATen/ops/_native_decoder_only_multi_head_attention_cuda_dispatch.h
-include/ATen/ops/_native_decoder_only_multi_head_attention_native.h
-include/ATen/ops/_native_decoder_only_multi_head_attention_ops.h
+include/ATen/ops/_native_batch_norm_legit.h
+include/ATen/ops/_native_batch_norm_legit_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_native_batch_norm_legit_cpu_dispatch.h
+include/ATen/ops/_native_batch_norm_legit_cuda_dispatch.h
+include/ATen/ops/_native_batch_norm_legit_native.h
+include/ATen/ops/_native_batch_norm_legit_no_training.h
+include/ATen/ops/_native_batch_norm_legit_no_training_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_native_batch_norm_legit_no_training_native.h
+include/ATen/ops/_native_batch_norm_legit_no_training_ops.h
+include/ATen/ops/_native_batch_norm_legit_ops.h
 include/ATen/ops/_native_multi_head_attention.h
 include/ATen/ops/_native_multi_head_attention_compositeexplicitautograd_dispatch.h
 include/ATen/ops/_native_multi_head_attention_cpu_dispatch.h
@@ -1387,13 +1543,6 @@ include/ATen/ops/_nested_tensor_from_tensor_list.h
 include/ATen/ops/_nested_tensor_from_tensor_list_compositeexplicitautograd_dispatch.h
 include/ATen/ops/_nested_tensor_from_tensor_list_native.h
 include/ATen/ops/_nested_tensor_from_tensor_list_ops.h
-include/ATen/ops/_nested_tensor_layer_norm.h
-include/ATen/ops/_nested_tensor_layer_norm_compositeexplicitautograd_dispatch.h
-include/ATen/ops/_nested_tensor_layer_norm_native.h
-include/ATen/ops/_nested_tensor_layer_norm_ops.h
-include/ATen/ops/_nested_tensor_offsets.h
-include/ATen/ops/_nested_tensor_offsets_native.h
-include/ATen/ops/_nested_tensor_offsets_ops.h
 include/ATen/ops/_nested_tensor_size.h
 include/ATen/ops/_nested_tensor_size_compositeexplicitautograd_dispatch.h
 include/ATen/ops/_nested_tensor_size_native.h
@@ -1401,6 +1550,10 @@ include/ATen/ops/_nested_tensor_size_ops.h
 include/ATen/ops/_nested_tensor_softmax_with_shape.h
 include/ATen/ops/_nested_tensor_softmax_with_shape_native.h
 include/ATen/ops/_nested_tensor_softmax_with_shape_ops.h
+include/ATen/ops/_nested_tensor_storage_offsets.h
+include/ATen/ops/_nested_tensor_storage_offsets_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_nested_tensor_storage_offsets_native.h
+include/ATen/ops/_nested_tensor_storage_offsets_ops.h
 include/ATen/ops/_nested_tensor_strides.h
 include/ATen/ops/_nested_tensor_strides_compositeexplicitautograd_dispatch.h
 include/ATen/ops/_nested_tensor_strides_native.h
@@ -1467,6 +1620,20 @@ include/ATen/ops/_pin_memory_compositeexplicitautograd_dispatch.h
 include/ATen/ops/_pin_memory_cuda_dispatch.h
 include/ATen/ops/_pin_memory_native.h
 include/ATen/ops/_pin_memory_ops.h
+include/ATen/ops/_prelu_kernel.h
+include/ATen/ops/_prelu_kernel_backward.h
+include/ATen/ops/_prelu_kernel_backward_cpu_dispatch.h
+include/ATen/ops/_prelu_kernel_backward_cuda_dispatch.h
+include/ATen/ops/_prelu_kernel_backward_native.h
+include/ATen/ops/_prelu_kernel_backward_ops.h
+include/ATen/ops/_prelu_kernel_cpu_dispatch.h
+include/ATen/ops/_prelu_kernel_cuda_dispatch.h
+include/ATen/ops/_prelu_kernel_native.h
+include/ATen/ops/_prelu_kernel_ops.h
+include/ATen/ops/_propagate_xla_data.h
+include/ATen/ops/_propagate_xla_data_compositeimplicitautograd_dispatch.h
+include/ATen/ops/_propagate_xla_data_native.h
+include/ATen/ops/_propagate_xla_data_ops.h
 include/ATen/ops/_remove_batch_dim.h
 include/ATen/ops/_remove_batch_dim_compositeimplicitautograd_dispatch.h
 include/ATen/ops/_remove_batch_dim_native.h
@@ -1482,6 +1649,10 @@ include/ATen/ops/_reshape_alias_cuda_dispatch.h
 include/ATen/ops/_reshape_alias_meta_dispatch.h
 include/ATen/ops/_reshape_alias_native.h
 include/ATen/ops/_reshape_alias_ops.h
+include/ATen/ops/_reshape_copy.h
+include/ATen/ops/_reshape_copy_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_reshape_copy_native.h
+include/ATen/ops/_reshape_copy_ops.h
 include/ATen/ops/_reshape_from_tensor.h
 include/ATen/ops/_reshape_from_tensor_compositeimplicitautograd_dispatch.h
 include/ATen/ops/_reshape_from_tensor_native.h
@@ -1505,20 +1676,32 @@ include/ATen/ops/_saturate_weight_to_fp16.h
 include/ATen/ops/_saturate_weight_to_fp16_compositeimplicitautograd_dispatch.h
 include/ATen/ops/_saturate_weight_to_fp16_native.h
 include/ATen/ops/_saturate_weight_to_fp16_ops.h
-include/ATen/ops/_scaled_dot_product_attention.h
-include/ATen/ops/_scaled_dot_product_attention_compositeimplicitautograd_dispatch.h
-include/ATen/ops/_scaled_dot_product_attention_forward.h
-include/ATen/ops/_scaled_dot_product_attention_forward_cpu_dispatch.h
-include/ATen/ops/_scaled_dot_product_attention_forward_cuda_dispatch.h
-include/ATen/ops/_scaled_dot_product_attention_forward_meta_dispatch.h
-include/ATen/ops/_scaled_dot_product_attention_forward_native.h
-include/ATen/ops/_scaled_dot_product_attention_forward_ops.h
 include/ATen/ops/_scaled_dot_product_attention_math.h
 include/ATen/ops/_scaled_dot_product_attention_math_compositeimplicitautograd_dispatch.h
 include/ATen/ops/_scaled_dot_product_attention_math_native.h
 include/ATen/ops/_scaled_dot_product_attention_math_ops.h
-include/ATen/ops/_scaled_dot_product_attention_native.h
-include/ATen/ops/_scaled_dot_product_attention_ops.h
+include/ATen/ops/_scaled_dot_product_efficient_attention.h
+include/ATen/ops/_scaled_dot_product_efficient_attention_backward.h
+include/ATen/ops/_scaled_dot_product_efficient_attention_backward_cuda_dispatch.h
+include/ATen/ops/_scaled_dot_product_efficient_attention_backward_native.h
+include/ATen/ops/_scaled_dot_product_efficient_attention_backward_ops.h
+include/ATen/ops/_scaled_dot_product_efficient_attention_cuda_dispatch.h
+include/ATen/ops/_scaled_dot_product_efficient_attention_native.h
+include/ATen/ops/_scaled_dot_product_efficient_attention_ops.h
+include/ATen/ops/_scaled_dot_product_flash_attention.h
+include/ATen/ops/_scaled_dot_product_flash_attention_backward.h
+include/ATen/ops/_scaled_dot_product_flash_attention_backward_cpu_dispatch.h
+include/ATen/ops/_scaled_dot_product_flash_attention_backward_cuda_dispatch.h
+include/ATen/ops/_scaled_dot_product_flash_attention_backward_native.h
+include/ATen/ops/_scaled_dot_product_flash_attention_backward_ops.h
+include/ATen/ops/_scaled_dot_product_flash_attention_cpu_dispatch.h
+include/ATen/ops/_scaled_dot_product_flash_attention_cuda_dispatch.h
+include/ATen/ops/_scaled_dot_product_flash_attention_native.h
+include/ATen/ops/_scaled_dot_product_flash_attention_ops.h
+include/ATen/ops/_scaled_mm.h
+include/ATen/ops/_scaled_mm_cuda_dispatch.h
+include/ATen/ops/_scaled_mm_native.h
+include/ATen/ops/_scaled_mm_ops.h
 include/ATen/ops/_segment_reduce_backward.h
 include/ATen/ops/_segment_reduce_backward_compositeexplicitautograd_dispatch.h
 include/ATen/ops/_segment_reduce_backward_cpu_dispatch.h
@@ -1635,14 +1818,24 @@ include/ATen/ops/_sparse_log_softmax_compositeexplicitautograd_dispatch.h
 include/ATen/ops/_sparse_log_softmax_compositeimplicitautograd_dispatch.h
 include/ATen/ops/_sparse_log_softmax_native.h
 include/ATen/ops/_sparse_log_softmax_ops.h
-include/ATen/ops/_sparse_mask_helper.h
-include/ATen/ops/_sparse_mask_helper_compositeexplicitautograd_dispatch.h
-include/ATen/ops/_sparse_mask_helper_native.h
-include/ATen/ops/_sparse_mask_helper_ops.h
+include/ATen/ops/_sparse_mask_projection.h
+include/ATen/ops/_sparse_mask_projection_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_sparse_mask_projection_native.h
+include/ATen/ops/_sparse_mask_projection_ops.h
 include/ATen/ops/_sparse_mm.h
 include/ATen/ops/_sparse_mm_compositeimplicitautograd_dispatch.h
 include/ATen/ops/_sparse_mm_native.h
 include/ATen/ops/_sparse_mm_ops.h
+include/ATen/ops/_sparse_mm_reduce_impl.h
+include/ATen/ops/_sparse_mm_reduce_impl_backward.h
+include/ATen/ops/_sparse_mm_reduce_impl_backward_native.h
+include/ATen/ops/_sparse_mm_reduce_impl_backward_ops.h
+include/ATen/ops/_sparse_mm_reduce_impl_native.h
+include/ATen/ops/_sparse_mm_reduce_impl_ops.h
+include/ATen/ops/_sparse_semi_structured_linear.h
+include/ATen/ops/_sparse_semi_structured_linear_cuda_dispatch.h
+include/ATen/ops/_sparse_semi_structured_linear_native.h
+include/ATen/ops/_sparse_semi_structured_linear_ops.h
 include/ATen/ops/_sparse_softmax.h
 include/ATen/ops/_sparse_softmax_backward_data.h
 include/ATen/ops/_sparse_softmax_backward_data_compositeexplicitautograd_dispatch.h
@@ -1687,12 +1880,6 @@ include/ATen/ops/_standard_gamma_grad_native.h
 include/ATen/ops/_standard_gamma_grad_ops.h
 include/ATen/ops/_standard_gamma_native.h
 include/ATen/ops/_standard_gamma_ops.h
-include/ATen/ops/_symeig_helper.h
-include/ATen/ops/_symeig_helper_compositeexplicitautograd_dispatch.h
-include/ATen/ops/_symeig_helper_cpu_dispatch.h
-include/ATen/ops/_symeig_helper_cuda_dispatch.h
-include/ATen/ops/_symeig_helper_native.h
-include/ATen/ops/_symeig_helper_ops.h
 include/ATen/ops/_test_ambiguous_defaults.h
 include/ATen/ops/_test_ambiguous_defaults_compositeimplicitautograd_dispatch.h
 include/ATen/ops/_test_ambiguous_defaults_native.h
@@ -1711,6 +1898,15 @@ include/ATen/ops/_test_autograd_multiple_dispatch_view_copy_native.h
 include/ATen/ops/_test_autograd_multiple_dispatch_view_copy_ops.h
 include/ATen/ops/_test_autograd_multiple_dispatch_view_native.h
 include/ATen/ops/_test_autograd_multiple_dispatch_view_ops.h
+include/ATen/ops/_test_check_tensor.h
+include/ATen/ops/_test_check_tensor_compositeimplicitautograd_dispatch.h
+include/ATen/ops/_test_check_tensor_native.h
+include/ATen/ops/_test_check_tensor_ops.h
+include/ATen/ops/_test_functorch_fallback.h
+include/ATen/ops/_test_functorch_fallback_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_test_functorch_fallback_cpu_dispatch.h
+include/ATen/ops/_test_functorch_fallback_native.h
+include/ATen/ops/_test_functorch_fallback_ops.h
 include/ATen/ops/_test_optional_filled_intlist.h
 include/ATen/ops/_test_optional_filled_intlist_compositeexplicitautograd_dispatch.h
 include/ATen/ops/_test_optional_filled_intlist_cpu_dispatch.h
@@ -1782,23 +1978,46 @@ include/ATen/ops/_to_dense.h
 include/ATen/ops/_to_dense_compositeexplicitautograd_dispatch.h
 include/ATen/ops/_to_dense_native.h
 include/ATen/ops/_to_dense_ops.h
-include/ATen/ops/_torch_cuda_cu_linker_symbol_op.h
-include/ATen/ops/_torch_cuda_cu_linker_symbol_op_compositeexplicitautograd_dispatch.h
-include/ATen/ops/_torch_cuda_cu_linker_symbol_op_cuda_dispatch.h
-include/ATen/ops/_torch_cuda_cu_linker_symbol_op_native.h
-include/ATen/ops/_torch_cuda_cu_linker_symbol_op_ops.h
+include/ATen/ops/_to_sparse.h
+include/ATen/ops/_to_sparse_bsc.h
+include/ATen/ops/_to_sparse_bsc_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_to_sparse_bsc_cpu_dispatch.h
+include/ATen/ops/_to_sparse_bsc_cuda_dispatch.h
+include/ATen/ops/_to_sparse_bsc_native.h
+include/ATen/ops/_to_sparse_bsc_ops.h
+include/ATen/ops/_to_sparse_bsr.h
+include/ATen/ops/_to_sparse_bsr_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_to_sparse_bsr_cpu_dispatch.h
+include/ATen/ops/_to_sparse_bsr_cuda_dispatch.h
+include/ATen/ops/_to_sparse_bsr_native.h
+include/ATen/ops/_to_sparse_bsr_ops.h
+include/ATen/ops/_to_sparse_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_to_sparse_cpu_dispatch.h
+include/ATen/ops/_to_sparse_csc.h
*** 1237 LINES SKIPPED ***