summaryrefslogtreecommitdiff
path: root/misc/pytorch/pkg-plist
diff options
context:
space:
mode:
Diffstat (limited to 'misc/pytorch/pkg-plist')
-rw-r--r--misc/pytorch/pkg-plist317
1 files changed, 266 insertions, 51 deletions
diff --git a/misc/pytorch/pkg-plist b/misc/pytorch/pkg-plist
index 3807aa006697..9ecea36ba731 100644
--- a/misc/pytorch/pkg-plist
+++ b/misc/pytorch/pkg-plist
@@ -73,8 +73,10 @@ include/ATen/ParallelFuture.h
include/ATen/ParallelNative.h
include/ATen/ParallelOpenMP.h
include/ATen/PythonTorchFunctionTLS.h
+include/ATen/ROCmFABackend.h
include/ATen/RedispatchFunctions.h
include/ATen/RegistrationDeclarations.h
+include/ATen/SDPBackend.h
include/ATen/SavedTensorHooks.h
include/ATen/Scalar.h
include/ATen/ScalarOps.h
@@ -212,18 +214,31 @@ include/ATen/cpu/vec/functional.h
include/ATen/cpu/vec/functional_base.h
include/ATen/cpu/vec/functional_bfloat16.h
include/ATen/cpu/vec/intrinsics.h
+include/ATen/cpu/vec/sve/sve_helper.h
+include/ATen/cpu/vec/sve/vec_bfloat16.h
+include/ATen/cpu/vec/sve/vec_common_sve.h
+include/ATen/cpu/vec/sve/vec_double.h
+include/ATen/cpu/vec/sve/vec_float.h
+include/ATen/cpu/vec/sve/vec_int.h
+include/ATen/cpu/vec/sve/vec_qint.h
include/ATen/cpu/vec/vec.h
+include/ATen/cpu/vec/vec128/vec128.h
+include/ATen/cpu/vec/vec128/vec128_bfloat16_neon.h
+include/ATen/cpu/vec/vec128/vec128_convert.h
+include/ATen/cpu/vec/vec128/vec128_float_neon.h
+include/ATen/cpu/vec/vec128/vec128_half_neon.h
+include/ATen/cpu/vec/vec128/vec128_reduced_precision_common_neon.h
include/ATen/cpu/vec/vec256/missing_vld1_neon.h
include/ATen/cpu/vec/vec256/missing_vst1_neon.h
include/ATen/cpu/vec/vec256/vec256.h
+include/ATen/cpu/vec/vec256/vec256_16bit_float.h
include/ATen/cpu/vec/vec256/vec256_bfloat16.h
include/ATen/cpu/vec/vec256/vec256_complex_double.h
include/ATen/cpu/vec/vec256/vec256_complex_float.h
include/ATen/cpu/vec/vec256/vec256_convert.h
include/ATen/cpu/vec/vec256/vec256_double.h
include/ATen/cpu/vec/vec256/vec256_float.h
-include/ATen/cpu/vec/vec256/vec256_float_neon.h
-include/ATen/cpu/vec/vec256/vec256_half_neon.h
+include/ATen/cpu/vec/vec256/vec256_half.h
include/ATen/cpu/vec/vec256/vec256_int.h
include/ATen/cpu/vec/vec256/vec256_mask.h
include/ATen/cpu/vec/vec256/vec256_qint.h
@@ -248,6 +263,7 @@ include/ATen/cpu/vec/vec512/vec512_complex_float.h
include/ATen/cpu/vec/vec512/vec512_convert.h
include/ATen/cpu/vec/vec512/vec512_double.h
include/ATen/cpu/vec/vec512/vec512_float.h
+include/ATen/cpu/vec/vec512/vec512_float8.h
include/ATen/cpu/vec/vec512/vec512_int.h
include/ATen/cpu/vec/vec512/vec512_mask.h
include/ATen/cpu/vec/vec512/vec512_qint.h
@@ -288,6 +304,7 @@ include/ATen/cuda/PinnedMemoryAllocator.h
include/ATen/cuda/ScanUtils.cuh
include/ATen/cuda/Sleep.h
include/ATen/cuda/ThrustAllocator.h
+include/ATen/cuda/cub-RadixSortPairs.cuh
include/ATen/cuda/cub.cuh
include/ATen/cuda/cub.h
include/ATen/cuda/cub_definitions.cuh
@@ -312,7 +329,6 @@ include/ATen/cuda/tunable/Tunable.h
include/ATen/cuda/tunable/TunableGemm.h
include/ATen/cuda/tunable/TunableOp.h
include/ATen/cudnn/Descriptors.h
-include/ATen/cudnn/Exceptions.h
include/ATen/cudnn/Handle.h
include/ATen/cudnn/Handles.h
include/ATen/cudnn/Types.h
@@ -322,6 +338,7 @@ include/ATen/detail/AcceleratorHooksInterface.h
include/ATen/detail/CUDAHooksInterface.h
include/ATen/detail/FunctionTraits.h
include/ATen/detail/HIPHooksInterface.h
+include/ATen/detail/HPUHooksInterface.h
include/ATen/detail/IPUHooksInterface.h
include/ATen/detail/MAIAHooksInterface.h
include/ATen/detail/MPSHooksInterface.h
@@ -396,6 +413,7 @@ include/ATen/native/FunctionOfAMatrixUtils.h
include/ATen/native/FusedAdagrad.h
include/ATen/native/FusedAdam.h
include/ATen/native/FusedSGD.h
+include/ATen/native/Gelu.h
include/ATen/native/GridSampler.h
include/ATen/native/GridSamplerUtils.h
include/ATen/native/Histogram.h
@@ -419,6 +437,7 @@ include/ATen/native/Pool.h
include/ATen/native/Pow.h
include/ATen/native/RNN.h
include/ATen/native/RangeFactories.h
+include/ATen/native/RangeUtils.h
include/ATen/native/ReduceAllOps.h
include/ATen/native/ReduceOps.h
include/ATen/native/ReduceOpsUtils.h
@@ -465,16 +484,20 @@ include/ATen/native/cpu/ChannelShuffleKernel.h
include/ATen/native/cpu/CopyKernel.h
include/ATen/native/cpu/DepthwiseConvKernel.h
include/ATen/native/cpu/DistributionTemplates.h
+include/ATen/native/cpu/Elu.h
+include/ATen/native/cpu/Gelu.h
include/ATen/native/cpu/GridSamplerKernel.h
include/ATen/native/cpu/IndexKernelUtils.h
include/ATen/native/cpu/Intrinsics.h
include/ATen/native/cpu/IsContiguous.h
include/ATen/native/cpu/LogAddExp.h
+include/ATen/native/cpu/LogSoftmaxKernelImpl.h
include/ATen/native/cpu/Loops.h
include/ATen/native/cpu/MaxUnpoolKernel.h
include/ATen/native/cpu/PixelShuffleKernel.h
include/ATen/native/cpu/Reduce.h
include/ATen/native/cpu/ReduceUtils.h
+include/ATen/native/cpu/ReducedPrecisionFloatGemvFastPathKernel.h
include/ATen/native/cpu/SampledAddmmKernel.h
include/ATen/native/cpu/SerialStackImpl.h
include/ATen/native/cpu/SoftmaxKernel.h
@@ -504,7 +527,10 @@ include/ATen/native/cuda/ForeachFunctors.cuh
include/ATen/native/cuda/ForeachMinMaxFunctors.cuh
include/ATen/native/cuda/GridSampler.cuh
include/ATen/native/cuda/GridSampler.h
+include/ATen/native/cuda/GroupMM.h
+include/ATen/native/cuda/GroupMMCommon.cuh
include/ATen/native/cuda/IndexKernel.h
+include/ATen/native/cuda/IndexKernelUtils.h
include/ATen/native/cuda/JitLoops.cuh
include/ATen/native/cuda/KernelUtils.cuh
include/ATen/native/cuda/LaunchUtils.h
@@ -520,6 +546,8 @@ include/ATen/native/cuda/Randperm.cuh
include/ATen/native/cuda/Reduce.cuh
include/ATen/native/cuda/ReduceOps.h
include/ATen/native/cuda/Resize.h
+include/ATen/native/cuda/RowwiseScaledMM.h
+include/ATen/native/cuda/ScaledGroupMM.h
include/ATen/native/cuda/ScanKernels.h
include/ATen/native/cuda/ScanUtils.cuh
include/ATen/native/cuda/Sort.h
@@ -534,6 +562,7 @@ include/ATen/native/cuda/TensorTopK.h
include/ATen/native/cuda/UniqueCub.cuh
include/ATen/native/cuda/UpSample.cuh
include/ATen/native/cuda/block_reduce.cuh
+include/ATen/native/cuda/cutlass_common.cuh
include/ATen/native/cuda/fused_adam_amsgrad_impl.cuh
include/ATen/native/cuda/fused_adam_impl.cuh
include/ATen/native/cuda/fused_adam_utils.cuh
@@ -545,19 +574,43 @@ include/ATen/native/cuda/reduction_template.cuh
include/ATen/native/cuda/thread_constants.h
include/ATen/native/cuda/vol2col.cuh
include/ATen/native/group_norm.h
+include/ATen/native/hip/bgemm_kernels/bgemm_kernel_collection.h
+include/ATen/native/hip/bgemm_kernels/bgemm_kernel_template.h
+include/ATen/native/hip/ck_bgemm.h
+include/ATen/native/hip/ck_gemm.h
+include/ATen/native/hip/ck_gemm_template.h
+include/ATen/native/hip/ck_types.h
include/ATen/native/im2col.h
include/ATen/native/im2col_shape_check.h
+include/ATen/native/kleidiai/kai_kernels.h
+include/ATen/native/kleidiai/kai_pack.h
+include/ATen/native/kleidiai/kai_ukernel_interface.h
include/ATen/native/layer_norm.h
+include/ATen/native/mkldnn/xpu/Conv.h
+include/ATen/native/mkldnn/xpu/FusionUtils.h
+include/ATen/native/mkldnn/xpu/detail/Attr.h
+include/ATen/native/mkldnn/xpu/detail/DnnlExt.h
+include/ATen/native/mkldnn/xpu/detail/LRUCache.h
+include/ATen/native/mkldnn/xpu/detail/Utils.h
+include/ATen/native/mkldnn/xpu/detail/oneDNN.h
+include/ATen/native/mkldnn/xpu/detail/oneDNNContext.h
include/ATen/native/mps/Copy.h
+include/ATen/native/mps/MPSGraphSequoiaOps.h
include/ATen/native/mps/MPSGraphSonomaOps.h
include/ATen/native/mps/MPSGraphVenturaOps.h
+include/ATen/native/mps/MetalShaderLibrary.h
include/ATen/native/mps/OperationUtils.h
include/ATen/native/mps/TensorFactory.h
-include/ATen/native/mps/UnaryConstants.h
+include/ATen/native/mps/kernels/UpSample.h
include/ATen/native/mps/operations/BinaryKernel.h
+include/ATen/native/mps/operations/FusedAdamAmsgradKernelImpl.h
+include/ATen/native/mps/operations/FusedAdamKernelImpl.h
+include/ATen/native/mps/operations/FusedAdamWAmsgradKernelImpl.h
+include/ATen/native/mps/operations/FusedAdamWKernelImpl.h
include/ATen/native/mps/operations/Indexing.h
+include/ATen/native/mps/operations/MultiTensorApply.h
+include/ATen/native/mtia/EmptyTensor.h
include/ATen/native/nested/NestedTensorBinaryOps.h
-include/ATen/native/nested/NestedTensorFactories.h
include/ATen/native/nested/NestedTensorMath.h
include/ATen/native/nested/NestedTensorTransformerFunctions.h
include/ATen/native/nested/NestedTensorTransformerUtils.h
@@ -569,6 +622,7 @@ include/ATen/native/quantized/Copy.h
include/ATen/native/quantized/FakeQuantAffine.h
include/ATen/native/quantized/IndexKernel.h
include/ATen/native/quantized/PackedParams.h
+include/ATen/native/quantized/cpu/ACLUtils.h
include/ATen/native/quantized/cpu/BinaryOps.h
include/ATen/native/quantized/cpu/EmbeddingPackedParams.h
include/ATen/native/quantized/cpu/OnednnUtils.h
@@ -580,26 +634,15 @@ include/ATen/native/quantized/cpu/XnnpackUtils.h
include/ATen/native/quantized/cpu/conv_serialization.h
include/ATen/native/quantized/cpu/fbgemm_utils.h
include/ATen/native/quantized/cpu/init_qnnpack.h
+include/ATen/native/quantized/cpu/qconv.h
include/ATen/native/quantized/cpu/qembeddingbag.h
include/ATen/native/quantized/cpu/qembeddingbag_prepack.h
+include/ATen/native/quantized/cpu/qlinear.h
include/ATen/native/quantized/cudnn/utils.h
+include/ATen/native/quantized/library.h
include/ATen/native/transformers/attention.h
-include/ATen/native/transformers/cuda/flash_attn/alibi.h
-include/ATen/native/transformers/cuda/flash_attn/block_info.h
-include/ATen/native/transformers/cuda/flash_attn/dropout.h
-include/ATen/native/transformers/cuda/flash_attn/flash.h
include/ATen/native/transformers/cuda/flash_attn/flash_api.h
-include/ATen/native/transformers/cuda/flash_attn/flash_bwd_kernel.h
-include/ATen/native/transformers/cuda/flash_attn/flash_bwd_launch_template.h
-include/ATen/native/transformers/cuda/flash_attn/flash_bwd_preprocess_kernel.h
-include/ATen/native/transformers/cuda/flash_attn/flash_fwd_kernel.h
-include/ATen/native/transformers/cuda/flash_attn/flash_fwd_launch_template.h
-include/ATen/native/transformers/cuda/flash_attn/kernel_traits.h
-include/ATen/native/transformers/cuda/flash_attn/mask.h
-include/ATen/native/transformers/cuda/flash_attn/rotary.h
-include/ATen/native/transformers/cuda/flash_attn/softmax.h
include/ATen/native/transformers/cuda/flash_attn/static_switch.h
-include/ATen/native/transformers/cuda/flash_attn/utils.h
include/ATen/native/transformers/cuda/mem_eff_attention/debug_utils.h
include/ATen/native/transformers/cuda/mem_eff_attention/epilogue/epilogue_pipelined.h
include/ATen/native/transformers/cuda/mem_eff_attention/epilogue/epilogue_rescale_output.h
@@ -627,6 +670,9 @@ include/ATen/native/transformers/cuda/mem_eff_attention/pytorch_utils.h
include/ATen/native/transformers/cuda/mem_eff_attention/transform/tile_smem_loader.h
include/ATen/native/transformers/cuda/sdp_utils.h
include/ATen/native/transformers/hip/aotriton_adapter.h
+include/ATen/native/transformers/hip/flash_attn/ck/me_ck_api.h
+include/ATen/native/transformers/hip/flash_attn/flash_api.h
+include/ATen/native/transformers/sdp_utils.h
include/ATen/native/transformers/sdp_utils_cpp.h
include/ATen/native/utils/Factory.h
include/ATen/native/utils/ParamUtils.h
@@ -704,7 +750,8 @@ include/ATen/ops/_assert_scalar_compositeexplicitautograd_dispatch.h
include/ATen/ops/_assert_scalar_native.h
include/ATen/ops/_assert_scalar_ops.h
include/ATen/ops/_assert_tensor_metadata.h
-include/ATen/ops/_assert_tensor_metadata_compositeimplicitautograd_dispatch.h
+include/ATen/ops/_assert_tensor_metadata_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_assert_tensor_metadata_meta_dispatch.h
include/ATen/ops/_assert_tensor_metadata_native.h
include/ATen/ops/_assert_tensor_metadata_ops.h
include/ATen/ops/_autocast_to_full_precision.h
@@ -844,8 +891,11 @@ include/ATen/ops/_convert_indices_from_csr_to_coo_meta_dispatch.h
include/ATen/ops/_convert_indices_from_csr_to_coo_native.h
include/ATen/ops/_convert_indices_from_csr_to_coo_ops.h
include/ATen/ops/_convert_weight_to_int4pack.h
-include/ATen/ops/_convert_weight_to_int4pack_cpu_dispatch.h
include/ATen/ops/_convert_weight_to_int4pack_cuda_dispatch.h
+include/ATen/ops/_convert_weight_to_int4pack_for_cpu.h
+include/ATen/ops/_convert_weight_to_int4pack_for_cpu_cpu_dispatch.h
+include/ATen/ops/_convert_weight_to_int4pack_for_cpu_native.h
+include/ATen/ops/_convert_weight_to_int4pack_for_cpu_ops.h
include/ATen/ops/_convert_weight_to_int4pack_native.h
include/ATen/ops/_convert_weight_to_int4pack_ops.h
include/ATen/ops/_convolution.h
@@ -894,6 +944,10 @@ include/ATen/ops/_ctc_loss_cuda_dispatch.h
include/ATen/ops/_ctc_loss_meta_dispatch.h
include/ATen/ops/_ctc_loss_native.h
include/ATen/ops/_ctc_loss_ops.h
+include/ATen/ops/_cudnn_attention_forward.h
+include/ATen/ops/_cudnn_attention_forward_cuda_dispatch.h
+include/ATen/ops/_cudnn_attention_forward_native.h
+include/ATen/ops/_cudnn_attention_forward_ops.h
include/ATen/ops/_cudnn_ctc_loss.h
include/ATen/ops/_cudnn_ctc_loss_compositeexplicitautograd_dispatch.h
include/ATen/ops/_cudnn_ctc_loss_cuda_dispatch.h
@@ -965,6 +1019,14 @@ include/ATen/ops/_dirichlet_grad_cpu_dispatch.h
include/ATen/ops/_dirichlet_grad_cuda_dispatch.h
include/ATen/ops/_dirichlet_grad_native.h
include/ATen/ops/_dirichlet_grad_ops.h
+include/ATen/ops/_dyn_quant_matmul_4bit.h
+include/ATen/ops/_dyn_quant_matmul_4bit_cpu_dispatch.h
+include/ATen/ops/_dyn_quant_matmul_4bit_native.h
+include/ATen/ops/_dyn_quant_matmul_4bit_ops.h
+include/ATen/ops/_dyn_quant_pack_4bit_weight.h
+include/ATen/ops/_dyn_quant_pack_4bit_weight_cpu_dispatch.h
+include/ATen/ops/_dyn_quant_pack_4bit_weight_native.h
+include/ATen/ops/_dyn_quant_pack_4bit_weight_ops.h
include/ATen/ops/_efficient_attention_backward.h
include/ATen/ops/_efficient_attention_backward_cuda_dispatch.h
include/ATen/ops/_efficient_attention_backward_native.h
@@ -982,7 +1044,8 @@ include/ATen/ops/_efficientzerotensor_native.h
include/ATen/ops/_efficientzerotensor_ops.h
include/ATen/ops/_embedding_bag.h
include/ATen/ops/_embedding_bag_backward.h
-include/ATen/ops/_embedding_bag_backward_compositeimplicitautograd_dispatch.h
+include/ATen/ops/_embedding_bag_backward_cpu_dispatch.h
+include/ATen/ops/_embedding_bag_backward_cuda_dispatch.h
include/ATen/ops/_embedding_bag_backward_native.h
include/ATen/ops/_embedding_bag_backward_ops.h
include/ATen/ops/_embedding_bag_compositeexplicitautograd_dispatch.h
@@ -1262,6 +1325,11 @@ include/ATen/ops/_foreach_round_compositeexplicitautograd_dispatch.h
include/ATen/ops/_foreach_round_cuda_dispatch.h
include/ATen/ops/_foreach_round_native.h
include/ATen/ops/_foreach_round_ops.h
+include/ATen/ops/_foreach_rsqrt.h
+include/ATen/ops/_foreach_rsqrt_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_foreach_rsqrt_cuda_dispatch.h
+include/ATen/ops/_foreach_rsqrt_native.h
+include/ATen/ops/_foreach_rsqrt_ops.h
include/ATen/ops/_foreach_sigmoid.h
include/ATen/ops/_foreach_sigmoid_compositeexplicitautograd_dispatch.h
include/ATen/ops/_foreach_sigmoid_cuda_dispatch.h
@@ -1356,6 +1424,9 @@ include/ATen/ops/_fused_moving_avg_obs_fq_helper_cpu_dispatch.h
include/ATen/ops/_fused_moving_avg_obs_fq_helper_cuda_dispatch.h
include/ATen/ops/_fused_moving_avg_obs_fq_helper_native.h
include/ATen/ops/_fused_moving_avg_obs_fq_helper_ops.h
+include/ATen/ops/_fused_rms_norm.h
+include/ATen/ops/_fused_rms_norm_native.h
+include/ATen/ops/_fused_rms_norm_ops.h
include/ATen/ops/_fused_sdp_choice.h
include/ATen/ops/_fused_sdp_choice_cpu_dispatch.h
include/ATen/ops/_fused_sdp_choice_cuda_dispatch.h
@@ -1389,6 +1460,10 @@ include/ATen/ops/_grid_sampler_2d_cpu_fallback_backward_ops.h
include/ATen/ops/_grid_sampler_2d_cpu_fallback_compositeexplicitautograd_dispatch.h
include/ATen/ops/_grid_sampler_2d_cpu_fallback_native.h
include/ATen/ops/_grid_sampler_2d_cpu_fallback_ops.h
+include/ATen/ops/_grouped_mm.h
+include/ATen/ops/_grouped_mm_cuda_dispatch.h
+include/ATen/ops/_grouped_mm_native.h
+include/ATen/ops/_grouped_mm_ops.h
include/ATen/ops/_has_compatible_shallow_copy_type.h
include/ATen/ops/_has_compatible_shallow_copy_type_compositeimplicitautograd_dispatch.h
include/ATen/ops/_has_compatible_shallow_copy_type_native.h
@@ -1445,6 +1520,7 @@ include/ATen/ops/_is_zerotensor_compositeimplicitautograd_dispatch.h
include/ATen/ops/_is_zerotensor_native.h
include/ATen/ops/_is_zerotensor_ops.h
include/ATen/ops/_jagged_to_padded_dense_forward.h
+include/ATen/ops/_jagged_to_padded_dense_forward_cpu_dispatch.h
include/ATen/ops/_jagged_to_padded_dense_forward_cuda_dispatch.h
include/ATen/ops/_jagged_to_padded_dense_forward_native.h
include/ATen/ops/_jagged_to_padded_dense_forward_ops.h
@@ -1638,12 +1714,21 @@ include/ATen/ops/_nested_from_padded_cpu_dispatch.h
include/ATen/ops/_nested_from_padded_cuda_dispatch.h
include/ATen/ops/_nested_from_padded_native.h
include/ATen/ops/_nested_from_padded_ops.h
+include/ATen/ops/_nested_from_padded_tensor.h
+include/ATen/ops/_nested_from_padded_tensor_native.h
+include/ATen/ops/_nested_from_padded_tensor_ops.h
include/ATen/ops/_nested_get_jagged_dummy.h
include/ATen/ops/_nested_get_jagged_dummy_native.h
include/ATen/ops/_nested_get_jagged_dummy_ops.h
include/ATen/ops/_nested_get_lengths.h
include/ATen/ops/_nested_get_lengths_native.h
include/ATen/ops/_nested_get_lengths_ops.h
+include/ATen/ops/_nested_get_max_seqlen.h
+include/ATen/ops/_nested_get_max_seqlen_native.h
+include/ATen/ops/_nested_get_max_seqlen_ops.h
+include/ATen/ops/_nested_get_min_seqlen.h
+include/ATen/ops/_nested_get_min_seqlen_native.h
+include/ATen/ops/_nested_get_min_seqlen_ops.h
include/ATen/ops/_nested_get_offsets.h
include/ATen/ops/_nested_get_offsets_native.h
include/ATen/ops/_nested_get_offsets_ops.h
@@ -1748,6 +1833,7 @@ include/ATen/ops/_pad_packed_sequence_compositeimplicitautograd_dispatch.h
include/ATen/ops/_pad_packed_sequence_native.h
include/ATen/ops/_pad_packed_sequence_ops.h
include/ATen/ops/_padded_dense_to_jagged_forward.h
+include/ATen/ops/_padded_dense_to_jagged_forward_cpu_dispatch.h
include/ATen/ops/_padded_dense_to_jagged_forward_cuda_dispatch.h
include/ATen/ops/_padded_dense_to_jagged_forward_native.h
include/ATen/ops/_padded_dense_to_jagged_forward_ops.h
@@ -1765,7 +1851,6 @@ include/ATen/ops/_pdist_forward_native.h
include/ATen/ops/_pdist_forward_ops.h
include/ATen/ops/_pin_memory.h
include/ATen/ops/_pin_memory_compositeexplicitautograd_dispatch.h
-include/ATen/ops/_pin_memory_cuda_dispatch.h
include/ATen/ops/_pin_memory_native.h
include/ATen/ops/_pin_memory_ops.h
include/ATen/ops/_prelu_kernel.h
@@ -1818,6 +1903,10 @@ include/ATen/ops/_rowwise_prune.h
include/ATen/ops/_rowwise_prune_compositeimplicitautograd_dispatch.h
include/ATen/ops/_rowwise_prune_native.h
include/ATen/ops/_rowwise_prune_ops.h
+include/ATen/ops/_safe_softmax.h
+include/ATen/ops/_safe_softmax_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_safe_softmax_native.h
+include/ATen/ops/_safe_softmax_ops.h
include/ATen/ops/_sample_dirichlet.h
include/ATen/ops/_sample_dirichlet_compositeexplicitautograd_dispatch.h
include/ATen/ops/_sample_dirichlet_cpu_dispatch.h
@@ -1830,6 +1919,9 @@ include/ATen/ops/_saturate_weight_to_fp16_native.h
include/ATen/ops/_saturate_weight_to_fp16_ops.h
include/ATen/ops/_scaled_dot_product_attention_math.h
include/ATen/ops/_scaled_dot_product_attention_math_compositeimplicitautograd_dispatch.h
+include/ATen/ops/_scaled_dot_product_attention_math_for_mps.h
+include/ATen/ops/_scaled_dot_product_attention_math_for_mps_native.h
+include/ATen/ops/_scaled_dot_product_attention_math_for_mps_ops.h
include/ATen/ops/_scaled_dot_product_attention_math_native.h
include/ATen/ops/_scaled_dot_product_attention_math_ops.h
include/ATen/ops/_scaled_dot_product_cudnn_attention.h
@@ -1864,7 +1956,20 @@ include/ATen/ops/_scaled_dot_product_flash_attention_for_cpu_native.h
include/ATen/ops/_scaled_dot_product_flash_attention_for_cpu_ops.h
include/ATen/ops/_scaled_dot_product_flash_attention_native.h
include/ATen/ops/_scaled_dot_product_flash_attention_ops.h
+include/ATen/ops/_scaled_dot_product_fused_attention_overrideable.h
+include/ATen/ops/_scaled_dot_product_fused_attention_overrideable_backward.h
+include/ATen/ops/_scaled_dot_product_fused_attention_overrideable_backward_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_scaled_dot_product_fused_attention_overrideable_backward_native.h
+include/ATen/ops/_scaled_dot_product_fused_attention_overrideable_backward_ops.h
+include/ATen/ops/_scaled_dot_product_fused_attention_overrideable_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_scaled_dot_product_fused_attention_overrideable_native.h
+include/ATen/ops/_scaled_dot_product_fused_attention_overrideable_ops.h
+include/ATen/ops/_scaled_grouped_mm.h
+include/ATen/ops/_scaled_grouped_mm_cuda_dispatch.h
+include/ATen/ops/_scaled_grouped_mm_native.h
+include/ATen/ops/_scaled_grouped_mm_ops.h
include/ATen/ops/_scaled_mm.h
+include/ATen/ops/_scaled_mm_cpu_dispatch.h
include/ATen/ops/_scaled_mm_cuda_dispatch.h
include/ATen/ops/_scaled_mm_native.h
include/ATen/ops/_scaled_mm_ops.h
@@ -2053,6 +2158,9 @@ include/ATen/ops/_spdiags_compositeexplicitautograd_dispatch.h
include/ATen/ops/_spdiags_cpu_dispatch.h
include/ATen/ops/_spdiags_native.h
include/ATen/ops/_spdiags_ops.h
+include/ATen/ops/_spsolve.h
+include/ATen/ops/_spsolve_native.h
+include/ATen/ops/_spsolve_ops.h
include/ATen/ops/_stack.h
include/ATen/ops/_stack_compositeexplicitautograd_dispatch.h
include/ATen/ops/_stack_cpu_dispatch.h
@@ -2257,6 +2365,14 @@ include/ATen/ops/_unsafe_index_put.h
include/ATen/ops/_unsafe_index_put_compositeexplicitautograd_dispatch.h
include/ATen/ops/_unsafe_index_put_native.h
include/ATen/ops/_unsafe_index_put_ops.h
+include/ATen/ops/_unsafe_masked_index.h
+include/ATen/ops/_unsafe_masked_index_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_unsafe_masked_index_native.h
+include/ATen/ops/_unsafe_masked_index_ops.h
+include/ATen/ops/_unsafe_masked_index_put_accumulate.h
+include/ATen/ops/_unsafe_masked_index_put_accumulate_compositeexplicitautograd_dispatch.h
+include/ATen/ops/_unsafe_masked_index_put_accumulate_native.h
+include/ATen/ops/_unsafe_masked_index_put_accumulate_ops.h
include/ATen/ops/_unsafe_view.h
include/ATen/ops/_unsafe_view_compositeexplicitautograd_dispatch.h
include/ATen/ops/_unsafe_view_native.h
@@ -2396,10 +2512,16 @@ include/ATen/ops/_version_compositeimplicitautograd_dispatch.h
include/ATen/ops/_version_native.h
include/ATen/ops/_version_ops.h
include/ATen/ops/_weight_int4pack_mm.h
-include/ATen/ops/_weight_int4pack_mm_cpu_dispatch.h
include/ATen/ops/_weight_int4pack_mm_cuda_dispatch.h
+include/ATen/ops/_weight_int4pack_mm_for_cpu.h
+include/ATen/ops/_weight_int4pack_mm_for_cpu_cpu_dispatch.h
+include/ATen/ops/_weight_int4pack_mm_for_cpu_native.h
+include/ATen/ops/_weight_int4pack_mm_for_cpu_ops.h
include/ATen/ops/_weight_int4pack_mm_native.h
include/ATen/ops/_weight_int4pack_mm_ops.h
+include/ATen/ops/_weight_int4pack_mm_with_scales_and_zeros.h
+include/ATen/ops/_weight_int4pack_mm_with_scales_and_zeros_native.h
+include/ATen/ops/_weight_int4pack_mm_with_scales_and_zeros_ops.h
include/ATen/ops/_weight_int8pack_mm.h
include/ATen/ops/_weight_int8pack_mm_cpu_dispatch.h
include/ATen/ops/_weight_int8pack_mm_native.h
@@ -2424,6 +2546,14 @@ include/ATen/ops/_weight_norm_interface_native.h
include/ATen/ops/_weight_norm_interface_ops.h
include/ATen/ops/_weight_norm_native.h
include/ATen/ops/_weight_norm_ops.h
+include/ATen/ops/_wrapped_linear_prepack.h
+include/ATen/ops/_wrapped_linear_prepack_compositeimplicitautograd_dispatch.h
+include/ATen/ops/_wrapped_linear_prepack_native.h
+include/ATen/ops/_wrapped_linear_prepack_ops.h
+include/ATen/ops/_wrapped_quantized_linear_prepacked.h
+include/ATen/ops/_wrapped_quantized_linear_prepacked_compositeimplicitautograd_dispatch.h
+include/ATen/ops/_wrapped_quantized_linear_prepacked_native.h
+include/ATen/ops/_wrapped_quantized_linear_prepacked_ops.h
include/ATen/ops/abs.h
include/ATen/ops/abs_compositeexplicitautograd_dispatch.h
include/ATen/ops/abs_cpu_dispatch.h
@@ -2451,6 +2581,7 @@ include/ATen/ops/acosh_meta_dispatch.h
include/ATen/ops/acosh_native.h
include/ATen/ops/acosh_ops.h
include/ATen/ops/adaptive_avg_pool1d.h
+include/ATen/ops/adaptive_avg_pool1d_compositeexplicitautograd_dispatch.h
include/ATen/ops/adaptive_avg_pool1d_compositeimplicitautograd_dispatch.h
include/ATen/ops/adaptive_avg_pool1d_native.h
include/ATen/ops/adaptive_avg_pool1d_ops.h
@@ -2706,10 +2837,7 @@ include/ATen/ops/argmin_meta_dispatch.h
include/ATen/ops/argmin_native.h
include/ATen/ops/argmin_ops.h
include/ATen/ops/argsort.h
-include/ATen/ops/argsort_compositeexplicitautograd_dispatch.h
include/ATen/ops/argsort_compositeimplicitautograd_dispatch.h
-include/ATen/ops/argsort_cpu_dispatch.h
-include/ATen/ops/argsort_cuda_dispatch.h
include/ATen/ops/argsort_native.h
include/ATen/ops/argsort_ops.h
include/ATen/ops/argwhere.h
@@ -2786,6 +2914,7 @@ include/ATen/ops/atleast_3d_compositeimplicitautograd_dispatch.h
include/ATen/ops/atleast_3d_native.h
include/ATen/ops/atleast_3d_ops.h
include/ATen/ops/avg_pool1d.h
+include/ATen/ops/avg_pool1d_compositeexplicitautograd_dispatch.h
include/ATen/ops/avg_pool1d_compositeimplicitautograd_dispatch.h
include/ATen/ops/avg_pool1d_native.h
include/ATen/ops/avg_pool1d_ops.h
@@ -4392,7 +4521,6 @@ include/ATen/ops/is_nonzero_native.h
include/ATen/ops/is_nonzero_ops.h
include/ATen/ops/is_pinned.h
include/ATen/ops/is_pinned_compositeexplicitautograd_dispatch.h
-include/ATen/ops/is_pinned_cuda_dispatch.h
include/ATen/ops/is_pinned_native.h
include/ATen/ops/is_pinned_ops.h
include/ATen/ops/is_same_size.h
@@ -5555,6 +5683,7 @@ include/ATen/ops/nonzero_numpy_ops.h
include/ATen/ops/nonzero_ops.h
include/ATen/ops/nonzero_static.h
include/ATen/ops/nonzero_static_cpu_dispatch.h
+include/ATen/ops/nonzero_static_cuda_dispatch.h
include/ATen/ops/nonzero_static_native.h
include/ATen/ops/nonzero_static_ops.h
include/ATen/ops/norm.h
@@ -6120,6 +6249,7 @@ include/ATen/ops/rrelu_with_noise_backward.h
include/ATen/ops/rrelu_with_noise_backward_compositeexplicitautograd_dispatch.h
include/ATen/ops/rrelu_with_noise_backward_native.h
include/ATen/ops/rrelu_with_noise_backward_ops.h
+include/ATen/ops/rrelu_with_noise_compositeexplicitautograd_dispatch.h
include/ATen/ops/rrelu_with_noise_cpu_dispatch.h
include/ATen/ops/rrelu_with_noise_cuda_dispatch.h
include/ATen/ops/rrelu_with_noise_meta_dispatch.h
@@ -7315,6 +7445,7 @@ include/ATen/ops/upsample_bilinear2d_backward_meta.h
include/ATen/ops/upsample_bilinear2d_backward_meta_dispatch.h
include/ATen/ops/upsample_bilinear2d_backward_native.h
include/ATen/ops/upsample_bilinear2d_backward_ops.h
+include/ATen/ops/upsample_bilinear2d_compositeexplicitautograd_dispatch.h
include/ATen/ops/upsample_bilinear2d_compositeexplicitautogradnonfunctional_dispatch.h
include/ATen/ops/upsample_bilinear2d_compositeimplicitautograd_dispatch.h
include/ATen/ops/upsample_bilinear2d_cpu_dispatch.h
@@ -7366,6 +7497,7 @@ include/ATen/ops/upsample_nearest2d_backward_meta.h
include/ATen/ops/upsample_nearest2d_backward_meta_dispatch.h
include/ATen/ops/upsample_nearest2d_backward_native.h
include/ATen/ops/upsample_nearest2d_backward_ops.h
+include/ATen/ops/upsample_nearest2d_compositeexplicitautograd_dispatch.h
include/ATen/ops/upsample_nearest2d_compositeexplicitautogradnonfunctional_dispatch.h
include/ATen/ops/upsample_nearest2d_compositeimplicitautograd_dispatch.h
include/ATen/ops/upsample_nearest2d_cpu_dispatch.h
@@ -7535,10 +7667,12 @@ include/ATen/xpu/XPUDevice.h
include/ATen/xpu/XPUEvent.h
include/ATen/xpu/XPUGeneratorImpl.h
include/ATen/xpu/detail/XPUHooks.h
+include/advisor-annotate.h
include/c10/core/Allocator.h
include/c10/core/AutogradState.h
include/c10/core/Backend.h
include/c10/core/CPUAllocator.h
+include/c10/core/CachingDeviceAllocator.h
include/c10/core/CompileTimeFunctionPointer.h
include/c10/core/ConstantSymNodeImpl.h
include/c10/core/Contiguity.h
@@ -7619,6 +7753,14 @@ include/c10/cuda/impl/CUDATest.h
include/c10/macros/Export.h
include/c10/macros/Macros.h
include/c10/macros/cmake_macros.h
+include/c10/metal/atomic.h
+include/c10/metal/common.h
+include/c10/metal/expm1f.h
+include/c10/metal/indexing.h
+include/c10/metal/random.h
+include/c10/metal/reduction_utils.h
+include/c10/metal/special_math.h
+include/c10/metal/utils.h
include/c10/mobile/CPUCachingAllocator.h
include/c10/mobile/CPUProfilingAllocator.h
include/c10/test/util/Macros.h
@@ -7640,11 +7782,14 @@ include/c10/util/ConstexprCrc.h
include/c10/util/DeadlockDetection.h
include/c10/util/Deprecated.h
include/c10/util/DimVector.h
+include/c10/util/DynamicCounter.h
+include/c10/util/Enumerate.h
include/c10/util/Exception.h
include/c10/util/ExclusivelyOwned.h
include/c10/util/ExclusivelyOwnedTensorTraits.h
include/c10/util/FbcodeMaps.h
include/c10/util/Flags.h
+include/c10/util/Float4_e2m1fn_x2.h
include/c10/util/Float8_e4m3fn-inl.h
include/c10/util/Float8_e4m3fn.h
include/c10/util/Float8_e4m3fnuz-inl.h
@@ -7653,11 +7798,15 @@ include/c10/util/Float8_e5m2-inl.h
include/c10/util/Float8_e5m2.h
include/c10/util/Float8_e5m2fnuz-inl.h
include/c10/util/Float8_e5m2fnuz.h
+include/c10/util/Float8_e8m0fnu-inl.h
+include/c10/util/Float8_e8m0fnu.h
include/c10/util/Float8_fnuz_cvt.h
include/c10/util/FunctionRef.h
+include/c10/util/Gauge.h
include/c10/util/Half-inl.h
include/c10/util/Half.h
include/c10/util/IdWrapper.h
+include/c10/util/IntrusiveList.h
include/c10/util/Lazy.h
include/c10/util/LeftRight.h
include/c10/util/Load.h
@@ -7665,11 +7814,13 @@ include/c10/util/Logging.h
include/c10/util/MathConstants.h
include/c10/util/MaybeOwned.h
include/c10/util/Metaprogramming.h
+include/c10/util/NetworkFlow.h
include/c10/util/Optional.h
include/c10/util/OptionalArrayRef.h
include/c10/util/ParallelGuard.h
include/c10/util/Registry.h
include/c10/util/ScopeExit.h
+include/c10/util/Semaphore.h
include/c10/util/SmallBuffer.h
include/c10/util/SmallVector.h
include/c10/util/StringUtil.h
@@ -7685,6 +7836,8 @@ include/c10/util/TypeTraits.h
include/c10/util/Unicode.h
include/c10/util/UniqueVoidPtr.h
include/c10/util/Unroll.h
+include/c10/util/WaitCounter.h
+include/c10/util/WaitCounterDynamicBackend.h
include/c10/util/accumulate.h
include/c10/util/bit_cast.h
include/c10/util/bits.h
@@ -7693,6 +7846,7 @@ include/c10/util/complex_math.h
include/c10/util/complex_utils.h
include/c10/util/copysign.h
include/c10/util/env.h
+include/c10/util/error.h
include/c10/util/flat_hash_map.h
include/c10/util/floating_point_utils.h
include/c10/util/generic_math.h
@@ -7705,6 +7859,7 @@ include/c10/util/logging_is_google_glog.h
include/c10/util/logging_is_not_google_glog.h
include/c10/util/numa.h
include/c10/util/order_preserving_flat_hash_map.h
+include/c10/util/overflows.h
include/c10/util/overloaded.h
include/c10/util/python_stub.h
include/c10/util/qint32.h
@@ -7737,21 +7892,9 @@ include/c10/xpu/test/impl/XPUTest.h
include/caffe2/core/common.h
include/caffe2/core/macros.h
include/caffe2/core/timer.h
-include/caffe2/perfkernels/adagrad.h
-include/caffe2/perfkernels/batch_box_cox.h
+include/caffe2/perfkernels/batch_box_cox_vec.h
include/caffe2/perfkernels/common.h
-include/caffe2/perfkernels/cvtsh_ss_bugfix.h
-include/caffe2/perfkernels/embedding_lookup.h
include/caffe2/perfkernels/embedding_lookup_idx.h
-include/caffe2/perfkernels/fused_8bit_rowwise_embedding_lookup.h
-include/caffe2/perfkernels/fused_8bit_rowwise_embedding_lookup_idx.h
-include/caffe2/perfkernels/fused_nbit_rowwise_conversion.h
-include/caffe2/perfkernels/lstm_unit_cpu-impl.h
-include/caffe2/perfkernels/lstm_unit_cpu.h
-include/caffe2/perfkernels/lstm_unit_cpu_common.h
-include/caffe2/perfkernels/math.h
-include/caffe2/perfkernels/typed_axpy.h
-include/caffe2/perfkernels/vectorizer.h
include/caffe2/serialize/crc_alt.h
include/caffe2/serialize/file_adapter.h
include/caffe2/serialize/in_memory_adapter.h
@@ -7759,7 +7902,6 @@ include/caffe2/serialize/inline_container.h
include/caffe2/serialize/istream_adapter.h
include/caffe2/serialize/read_adapter_interface.h
include/caffe2/serialize/versions.h
-include/caffe2/utils/conversions.h
include/caffe2/utils/fixed_divisor.h
include/caffe2/utils/proto_wrap.h
include/caffe2/utils/string_utils.h
@@ -7791,6 +7933,9 @@ include/fp16/avx2.py
include/fp16/bitcasts.h
include/fp16/fp16.h
include/fp16/psimd.h
+include/ittnotify-zca.h
+include/ittnotify.h
+include/jitprofiling.h
include/kineto/AbstractConfig.h
include/kineto/ActivityProfilerInterface.h
include/kineto/ActivityTraceInterface.h
@@ -7807,6 +7952,8 @@ include/kineto/TraceSpan.h
include/kineto/libkineto.h
include/kineto/output_base.h
include/kineto/time_since_epoch.h
+include/legacy/ittnotify.h
+include/libittnotify.h
include/libshm.h
%%AMD64%%include/oneapi/dnnl/dnnl.h
%%AMD64%%include/oneapi/dnnl/dnnl.hpp
@@ -7817,6 +7964,8 @@ include/libshm.h
%%AMD64%%include/oneapi/dnnl/dnnl_debug.h
%%AMD64%%include/oneapi/dnnl/dnnl_graph.h
%%AMD64%%include/oneapi/dnnl/dnnl_graph.hpp
+%%AMD64%%include/oneapi/dnnl/dnnl_graph_ocl.h
+%%AMD64%%include/oneapi/dnnl/dnnl_graph_ocl.hpp
%%AMD64%%include/oneapi/dnnl/dnnl_graph_sycl.h
%%AMD64%%include/oneapi/dnnl/dnnl_graph_sycl.hpp
%%AMD64%%include/oneapi/dnnl/dnnl_graph_types.h
@@ -7830,11 +7979,16 @@ include/libshm.h
%%AMD64%%include/oneapi/dnnl/dnnl_threadpool.hpp
%%AMD64%%include/oneapi/dnnl/dnnl_threadpool_iface.hpp
%%AMD64%%include/oneapi/dnnl/dnnl_types.h
+%%AMD64%%include/oneapi/dnnl/dnnl_ukernel.h
+%%AMD64%%include/oneapi/dnnl/dnnl_ukernel.hpp
+%%AMD64%%include/oneapi/dnnl/dnnl_ukernel_types.h
%%AMD64%%include/oneapi/dnnl/dnnl_version.h
+%%AMD64%%include/oneapi/dnnl/dnnl_version_hash.h
include/psimd.h
include/torch/csrc/CudaIPCTypes.h
include/torch/csrc/DataLoader.h
include/torch/csrc/Device.h
+include/torch/csrc/DeviceAccelerator.h
include/torch/csrc/Dtype.h
include/torch/csrc/DynamicTypes.h
include/torch/csrc/Event.h
@@ -7900,7 +8054,6 @@ include/torch/csrc/api/include/torch/expanding_array.h
include/torch/csrc/api/include/torch/fft.h
include/torch/csrc/api/include/torch/imethod.h
include/torch/csrc/api/include/torch/jit.h
-include/torch/csrc/api/include/torch/linalg.h
include/torch/csrc/api/include/torch/mps.h
include/torch/csrc/api/include/torch/nested.h
include/torch/csrc/api/include/torch/nn.h
@@ -8082,6 +8235,7 @@ include/torch/csrc/copy_utils.h
include/torch/csrc/cpu/Module.h
include/torch/csrc/cuda/CUDAPluggableAllocator.h
include/torch/csrc/cuda/Event.h
+include/torch/csrc/cuda/GdsFile.h
include/torch/csrc/cuda/Module.h
include/torch/csrc/cuda/Stream.h
include/torch/csrc/cuda/THCP.h
@@ -8091,6 +8245,7 @@ include/torch/csrc/cuda/memory_snapshot.h
include/torch/csrc/cuda/nccl.h
include/torch/csrc/cuda/python_comm.h
include/torch/csrc/cuda/python_nccl.h
+include/torch/csrc/cuda/utils.h
include/torch/csrc/distributed/autograd/autograd.h
include/torch/csrc/distributed/autograd/context/container.h
include/torch/csrc/distributed/autograd/context/context.h
@@ -8110,20 +8265,24 @@ include/torch/csrc/distributed/autograd/rpc_messages/rref_backward_req.h
include/torch/csrc/distributed/autograd/rpc_messages/rref_backward_resp.h
include/torch/csrc/distributed/autograd/utils.h
include/torch/csrc/distributed/c10d/Backend.hpp
+include/torch/csrc/distributed/c10d/Backoff.hpp
include/torch/csrc/distributed/c10d/FakeProcessGroup.hpp
include/torch/csrc/distributed/c10d/FileStore.hpp
+include/torch/csrc/distributed/c10d/FlightRecorder.hpp
+include/torch/csrc/distributed/c10d/FlightRecorderDetail.hpp
+include/torch/csrc/distributed/c10d/Functional.hpp
include/torch/csrc/distributed/c10d/GlooDeviceFactory.hpp
include/torch/csrc/distributed/c10d/GroupRegistry.hpp
include/torch/csrc/distributed/c10d/HashStore.hpp
include/torch/csrc/distributed/c10d/NCCLUtils.hpp
+include/torch/csrc/distributed/c10d/NanCheck.hpp
include/torch/csrc/distributed/c10d/ParamCommsUtils.hpp
include/torch/csrc/distributed/c10d/PrefixStore.hpp
include/torch/csrc/distributed/c10d/ProcessGroup.hpp
-include/torch/csrc/distributed/c10d/ProcessGroupCudaP2P.hpp
include/torch/csrc/distributed/c10d/ProcessGroupGloo.hpp
+include/torch/csrc/distributed/c10d/ProcessGroupGlooDetail.hpp
include/torch/csrc/distributed/c10d/ProcessGroupMPI.hpp
include/torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp
-include/torch/csrc/distributed/c10d/ProcessGroupRoundRobin.hpp
include/torch/csrc/distributed/c10d/ProcessGroupUCC.hpp
include/torch/csrc/distributed/c10d/ProcessGroupWrapper.hpp
include/torch/csrc/distributed/c10d/PyProcessGroup.hpp
@@ -8145,11 +8304,11 @@ include/torch/csrc/distributed/c10d/control_collectives/ControlCollectives.hpp
include/torch/csrc/distributed/c10d/control_collectives/StoreCollectives.hpp
include/torch/csrc/distributed/c10d/control_plane/Handlers.hpp
include/torch/csrc/distributed/c10d/control_plane/WorkerServer.hpp
+include/torch/csrc/distributed/c10d/cuda/utils.hpp
include/torch/csrc/distributed/c10d/debug.h
include/torch/csrc/distributed/c10d/default_comm_hooks.hpp
include/torch/csrc/distributed/c10d/error.h
include/torch/csrc/distributed/c10d/exception.h
-include/torch/csrc/distributed/c10d/intra_node_comm.hpp
include/torch/csrc/distributed/c10d/logger.hpp
include/torch/csrc/distributed/c10d/logging.h
include/torch/csrc/distributed/c10d/python_comm_hook.h
@@ -8160,6 +8319,14 @@ include/torch/csrc/distributed/c10d/reducer.hpp
include/torch/csrc/distributed/c10d/reducer_timer.hpp
include/torch/csrc/distributed/c10d/sequence_num.hpp
include/torch/csrc/distributed/c10d/socket.h
+include/torch/csrc/distributed/c10d/socket_fmt.h
+include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemory-inl.h
+include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemory.hpp
+include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryTypes.hpp
+include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.hpp
+include/torch/csrc/distributed/c10d/symm_mem/DMAConnectivity.hpp
+include/torch/csrc/distributed/c10d/symm_mem/SymmetricMemory.hpp
+include/torch/csrc/distributed/c10d/symm_mem/intra_node_comm.hpp
include/torch/csrc/distributed/rpc/agent_utils.h
include/torch/csrc/distributed/rpc/message.h
include/torch/csrc/distributed/rpc/metrics/RpcMetricsHandler.h
@@ -8196,37 +8363,78 @@ include/torch/csrc/dynamo/cache_entry.h
include/torch/csrc/dynamo/compiled_autograd.h
include/torch/csrc/dynamo/cpp_shim.h
include/torch/csrc/dynamo/cpython_defs.h
+include/torch/csrc/dynamo/cpython_includes.h
include/torch/csrc/dynamo/debug_macros.h
include/torch/csrc/dynamo/eval_frame.h
+include/torch/csrc/dynamo/eval_frame_cpp.h
include/torch/csrc/dynamo/extra_state.h
+include/torch/csrc/dynamo/framelocals_mapping.h
include/torch/csrc/dynamo/guards.h
include/torch/csrc/dynamo/init.h
include/torch/csrc/dynamo/python_compiled_autograd.h
include/torch/csrc/dynamo/utils.h
+include/torch/csrc/export/pt2_archive_constants.h
+include/torch/csrc/export/pybind.h
include/torch/csrc/functorch/init.h
+include/torch/csrc/fx/node.h
include/torch/csrc/inductor/aoti_eager/kernel_holder.h
include/torch/csrc/inductor/aoti_eager/kernel_meta_info.h
+include/torch/csrc/inductor/aoti_include/array_ref.h
+include/torch/csrc/inductor/aoti_include/common.h
+include/torch/csrc/inductor/aoti_include/cpu.h
+include/torch/csrc/inductor/aoti_include/cuda.h
+include/torch/csrc/inductor/aoti_include/mps.h
+include/torch/csrc/inductor/aoti_include/xpu.h
+include/torch/csrc/inductor/aoti_package/model_package_loader.h
+include/torch/csrc/inductor/aoti_package/pybind.h
include/torch/csrc/inductor/aoti_runner/model_container_runner.h
include/torch/csrc/inductor/aoti_runner/model_container_runner_cpu.h
include/torch/csrc/inductor/aoti_runner/model_container_runner_cuda.h
+include/torch/csrc/inductor/aoti_runner/model_container_runner_mps.h
+include/torch/csrc/inductor/aoti_runner/model_container_runner_xpu.h
include/torch/csrc/inductor/aoti_runner/pybind.h
include/torch/csrc/inductor/aoti_runtime/arrayref_tensor.h
+include/torch/csrc/inductor/aoti_runtime/constant_type.h
include/torch/csrc/inductor/aoti_runtime/device_utils.h
include/torch/csrc/inductor/aoti_runtime/interface.h
+include/torch/csrc/inductor/aoti_runtime/mini_array_ref.h
include/torch/csrc/inductor/aoti_runtime/model.h
include/torch/csrc/inductor/aoti_runtime/model_container.h
include/torch/csrc/inductor/aoti_runtime/scalar_to_tensor.h
+include/torch/csrc/inductor/aoti_runtime/sycl_runtime_wrappers.h
include/torch/csrc/inductor/aoti_runtime/thread_local.h
include/torch/csrc/inductor/aoti_runtime/utils.h
include/torch/csrc/inductor/aoti_runtime/utils_cuda.h
+include/torch/csrc/inductor/aoti_runtime/utils_xpu.h
include/torch/csrc/inductor/aoti_torch/c/shim.h
+include/torch/csrc/inductor/aoti_torch/c/shim_cpu.h
+include/torch/csrc/inductor/aoti_torch/c/shim_mps.h
+include/torch/csrc/inductor/aoti_torch/c/shim_xpu.h
include/torch/csrc/inductor/aoti_torch/generated/c_shim_cpu.h
include/torch/csrc/inductor/aoti_torch/generated/c_shim_cuda.h
+include/torch/csrc/inductor/aoti_torch/generated/c_shim_mps.h
+include/torch/csrc/inductor/aoti_torch/generated/c_shim_xpu.h
include/torch/csrc/inductor/aoti_torch/mkldnn_tensor.h
+include/torch/csrc/inductor/aoti_torch/oss_proxy_executor.h
include/torch/csrc/inductor/aoti_torch/proxy_executor.h
include/torch/csrc/inductor/aoti_torch/tensor_converter.h
include/torch/csrc/inductor/aoti_torch/utils.h
+include/torch/csrc/inductor/array_ref_impl.h
+include/torch/csrc/inductor/cpp_prefix.h
+include/torch/csrc/inductor/cpp_wrapper/array_ref.h
+include/torch/csrc/inductor/cpp_wrapper/common.h
+include/torch/csrc/inductor/cpp_wrapper/cpu.h
+include/torch/csrc/inductor/cpp_wrapper/cuda.h
+include/torch/csrc/inductor/cpp_wrapper/device_internal/cpu.h
+include/torch/csrc/inductor/cpp_wrapper/device_internal/cuda.h
+include/torch/csrc/inductor/cpp_wrapper/device_internal/mps.h
+include/torch/csrc/inductor/cpp_wrapper/device_internal/xpu.h
+include/torch/csrc/inductor/cpp_wrapper/mps.h
+include/torch/csrc/inductor/cpp_wrapper/xpu.h
include/torch/csrc/inductor/inductor_ops.h
+include/torch/csrc/inductor/static_cuda_launcher.h
+include/torch/csrc/instruction_counter/Module.h
+include/torch/csrc/itt.h
include/torch/csrc/itt_wrapper.h
include/torch/csrc/jit/api/compilation_unit.h
include/torch/csrc/jit/api/function_impl.h
@@ -8686,10 +8894,9 @@ include/torch/csrc/monitor/events.h
include/torch/csrc/monitor/python_init.h
include/torch/csrc/mps/Module.h
include/torch/csrc/mtia/Module.h
+include/torch/csrc/mtia/profiler/MTIAMemoryProfiler.h
include/torch/csrc/multiprocessing/init.h
include/torch/csrc/onnx/back_compat.h
-include/torch/csrc/onnx/diagnostics/diagnostics.h
-include/torch/csrc/onnx/diagnostics/generated/rules.h
include/torch/csrc/onnx/init.h
include/torch/csrc/onnx/onnx.h
include/torch/csrc/profiler/api.h
@@ -8698,6 +8905,7 @@ include/torch/csrc/profiler/combined_traceback.h
include/torch/csrc/profiler/containers.h
include/torch/csrc/profiler/data_flow.h
include/torch/csrc/profiler/events.h
+include/torch/csrc/profiler/kineto_client_interface.h
include/torch/csrc/profiler/kineto_shim.h
include/torch/csrc/profiler/orchestration/observer.h
include/torch/csrc/profiler/orchestration/python_tracer.h
@@ -8732,6 +8940,8 @@ include/torch/csrc/profiler/util.h
include/torch/csrc/python_dimname.h
include/torch/csrc/python_headers.h
include/torch/csrc/serialization.h
+include/torch/csrc/stable/library.h
+include/torch/csrc/stable/tensor.h
include/torch/csrc/tensor/python_tensor.h
include/torch/csrc/utils.h
include/torch/csrc/utils/byte_order.h
@@ -8739,6 +8949,7 @@ include/torch/csrc/utils/cpp_stacktraces.h
include/torch/csrc/utils/cuda_enabled.h
include/torch/csrc/utils/device_lazy_init.h
include/torch/csrc/utils/disable_torch_function.h
+include/torch/csrc/utils/generated_serialization_types.h
include/torch/csrc/utils/init.h
include/torch/csrc/utils/invalid_arguments.h
include/torch/csrc/utils/nested.h
@@ -8784,18 +8995,21 @@ include/torch/csrc/xpu/Stream.h
include/torch/custom_class.h
include/torch/custom_class_detail.h
include/torch/extension.h
+include/torch/headeronly/macros/Export.h
include/torch/library.h
include/torch/script.h
%%AMD64%%lib/cmake/dnnl/dnnl-config-version.cmake
%%AMD64%%lib/cmake/dnnl/dnnl-config.cmake
%%AMD64%%lib/cmake/dnnl/dnnl-targets-%%CMAKE_BUILD_TYPE%%.cmake
%%AMD64%%lib/cmake/dnnl/dnnl-targets.cmake
-%%AMD64%%lib/libCaffe2_perfkernels_avx.a
-%%AMD64%%lib/libCaffe2_perfkernels_avx2.a
-%%AMD64%%lib/libCaffe2_perfkernels_avx512.a
+%%AMD64%%lib/cmake/ittapi/ittapi-targets-%%CMAKE_BUILD_TYPE%%.cmake
+%%AMD64%%lib/cmake/ittapi/ittapi-targets.cmake
+%%AMD64%%lib/cmake/ittapi/ittapiConfig.cmake
+%%AMD64%%lib/cmake/ittapi/ittapiConfigVersion.cmake
lib/libc10.so
lib/libcpuinfo.a
%%AMD64%%lib/libdnnl.a
+lib/libittnotify.a
lib/libkineto.a
lib/libshm.so
lib/libtorch.so
@@ -8809,6 +9023,7 @@ share/cmake/Caffe2/Caffe2Config.cmake
share/cmake/Caffe2/Caffe2Targets-%%CMAKE_BUILD_TYPE%%.cmake
share/cmake/Caffe2/Caffe2Targets.cmake
share/cmake/Caffe2/FindCUDAToolkit.cmake
+share/cmake/Caffe2/FindCUDSS.cmake
share/cmake/Caffe2/FindCUSPARSELT.cmake
share/cmake/Caffe2/FindSYCLToolkit.cmake
share/cmake/Caffe2/Modules_CUDA_fix/FindCUDA.cmake