diff options
Diffstat (limited to 'misc/pytorch/pkg-plist')
-rw-r--r-- | misc/pytorch/pkg-plist | 317 |
1 files changed, 266 insertions, 51 deletions
diff --git a/misc/pytorch/pkg-plist b/misc/pytorch/pkg-plist index 3807aa006697..9ecea36ba731 100644 --- a/misc/pytorch/pkg-plist +++ b/misc/pytorch/pkg-plist @@ -73,8 +73,10 @@ include/ATen/ParallelFuture.h include/ATen/ParallelNative.h include/ATen/ParallelOpenMP.h include/ATen/PythonTorchFunctionTLS.h +include/ATen/ROCmFABackend.h include/ATen/RedispatchFunctions.h include/ATen/RegistrationDeclarations.h +include/ATen/SDPBackend.h include/ATen/SavedTensorHooks.h include/ATen/Scalar.h include/ATen/ScalarOps.h @@ -212,18 +214,31 @@ include/ATen/cpu/vec/functional.h include/ATen/cpu/vec/functional_base.h include/ATen/cpu/vec/functional_bfloat16.h include/ATen/cpu/vec/intrinsics.h +include/ATen/cpu/vec/sve/sve_helper.h +include/ATen/cpu/vec/sve/vec_bfloat16.h +include/ATen/cpu/vec/sve/vec_common_sve.h +include/ATen/cpu/vec/sve/vec_double.h +include/ATen/cpu/vec/sve/vec_float.h +include/ATen/cpu/vec/sve/vec_int.h +include/ATen/cpu/vec/sve/vec_qint.h include/ATen/cpu/vec/vec.h +include/ATen/cpu/vec/vec128/vec128.h +include/ATen/cpu/vec/vec128/vec128_bfloat16_neon.h +include/ATen/cpu/vec/vec128/vec128_convert.h +include/ATen/cpu/vec/vec128/vec128_float_neon.h +include/ATen/cpu/vec/vec128/vec128_half_neon.h +include/ATen/cpu/vec/vec128/vec128_reduced_precision_common_neon.h include/ATen/cpu/vec/vec256/missing_vld1_neon.h include/ATen/cpu/vec/vec256/missing_vst1_neon.h include/ATen/cpu/vec/vec256/vec256.h +include/ATen/cpu/vec/vec256/vec256_16bit_float.h include/ATen/cpu/vec/vec256/vec256_bfloat16.h include/ATen/cpu/vec/vec256/vec256_complex_double.h include/ATen/cpu/vec/vec256/vec256_complex_float.h include/ATen/cpu/vec/vec256/vec256_convert.h include/ATen/cpu/vec/vec256/vec256_double.h include/ATen/cpu/vec/vec256/vec256_float.h -include/ATen/cpu/vec/vec256/vec256_float_neon.h -include/ATen/cpu/vec/vec256/vec256_half_neon.h +include/ATen/cpu/vec/vec256/vec256_half.h include/ATen/cpu/vec/vec256/vec256_int.h include/ATen/cpu/vec/vec256/vec256_mask.h include/ATen/cpu/vec/vec256/vec256_qint.h @@ -248,6 +263,7 @@ include/ATen/cpu/vec/vec512/vec512_complex_float.h include/ATen/cpu/vec/vec512/vec512_convert.h include/ATen/cpu/vec/vec512/vec512_double.h include/ATen/cpu/vec/vec512/vec512_float.h +include/ATen/cpu/vec/vec512/vec512_float8.h include/ATen/cpu/vec/vec512/vec512_int.h include/ATen/cpu/vec/vec512/vec512_mask.h include/ATen/cpu/vec/vec512/vec512_qint.h @@ -288,6 +304,7 @@ include/ATen/cuda/PinnedMemoryAllocator.h include/ATen/cuda/ScanUtils.cuh include/ATen/cuda/Sleep.h include/ATen/cuda/ThrustAllocator.h +include/ATen/cuda/cub-RadixSortPairs.cuh include/ATen/cuda/cub.cuh include/ATen/cuda/cub.h include/ATen/cuda/cub_definitions.cuh @@ -312,7 +329,6 @@ include/ATen/cuda/tunable/Tunable.h include/ATen/cuda/tunable/TunableGemm.h include/ATen/cuda/tunable/TunableOp.h include/ATen/cudnn/Descriptors.h -include/ATen/cudnn/Exceptions.h include/ATen/cudnn/Handle.h include/ATen/cudnn/Handles.h include/ATen/cudnn/Types.h @@ -322,6 +338,7 @@ include/ATen/detail/AcceleratorHooksInterface.h include/ATen/detail/CUDAHooksInterface.h include/ATen/detail/FunctionTraits.h include/ATen/detail/HIPHooksInterface.h +include/ATen/detail/HPUHooksInterface.h include/ATen/detail/IPUHooksInterface.h include/ATen/detail/MAIAHooksInterface.h include/ATen/detail/MPSHooksInterface.h @@ -396,6 +413,7 @@ include/ATen/native/FunctionOfAMatrixUtils.h include/ATen/native/FusedAdagrad.h include/ATen/native/FusedAdam.h include/ATen/native/FusedSGD.h +include/ATen/native/Gelu.h include/ATen/native/GridSampler.h include/ATen/native/GridSamplerUtils.h include/ATen/native/Histogram.h @@ -419,6 +437,7 @@ include/ATen/native/Pool.h include/ATen/native/Pow.h include/ATen/native/RNN.h include/ATen/native/RangeFactories.h +include/ATen/native/RangeUtils.h include/ATen/native/ReduceAllOps.h include/ATen/native/ReduceOps.h include/ATen/native/ReduceOpsUtils.h @@ -465,16 +484,20 @@ include/ATen/native/cpu/ChannelShuffleKernel.h include/ATen/native/cpu/CopyKernel.h include/ATen/native/cpu/DepthwiseConvKernel.h include/ATen/native/cpu/DistributionTemplates.h +include/ATen/native/cpu/Elu.h +include/ATen/native/cpu/Gelu.h include/ATen/native/cpu/GridSamplerKernel.h include/ATen/native/cpu/IndexKernelUtils.h include/ATen/native/cpu/Intrinsics.h include/ATen/native/cpu/IsContiguous.h include/ATen/native/cpu/LogAddExp.h +include/ATen/native/cpu/LogSoftmaxKernelImpl.h include/ATen/native/cpu/Loops.h include/ATen/native/cpu/MaxUnpoolKernel.h include/ATen/native/cpu/PixelShuffleKernel.h include/ATen/native/cpu/Reduce.h include/ATen/native/cpu/ReduceUtils.h +include/ATen/native/cpu/ReducedPrecisionFloatGemvFastPathKernel.h include/ATen/native/cpu/SampledAddmmKernel.h include/ATen/native/cpu/SerialStackImpl.h include/ATen/native/cpu/SoftmaxKernel.h @@ -504,7 +527,10 @@ include/ATen/native/cuda/ForeachFunctors.cuh include/ATen/native/cuda/ForeachMinMaxFunctors.cuh include/ATen/native/cuda/GridSampler.cuh include/ATen/native/cuda/GridSampler.h +include/ATen/native/cuda/GroupMM.h +include/ATen/native/cuda/GroupMMCommon.cuh include/ATen/native/cuda/IndexKernel.h +include/ATen/native/cuda/IndexKernelUtils.h include/ATen/native/cuda/JitLoops.cuh include/ATen/native/cuda/KernelUtils.cuh include/ATen/native/cuda/LaunchUtils.h @@ -520,6 +546,8 @@ include/ATen/native/cuda/Randperm.cuh include/ATen/native/cuda/Reduce.cuh include/ATen/native/cuda/ReduceOps.h include/ATen/native/cuda/Resize.h +include/ATen/native/cuda/RowwiseScaledMM.h +include/ATen/native/cuda/ScaledGroupMM.h include/ATen/native/cuda/ScanKernels.h include/ATen/native/cuda/ScanUtils.cuh include/ATen/native/cuda/Sort.h @@ -534,6 +562,7 @@ include/ATen/native/cuda/TensorTopK.h include/ATen/native/cuda/UniqueCub.cuh include/ATen/native/cuda/UpSample.cuh include/ATen/native/cuda/block_reduce.cuh +include/ATen/native/cuda/cutlass_common.cuh include/ATen/native/cuda/fused_adam_amsgrad_impl.cuh include/ATen/native/cuda/fused_adam_impl.cuh include/ATen/native/cuda/fused_adam_utils.cuh @@ -545,19 +574,43 @@ include/ATen/native/cuda/reduction_template.cuh include/ATen/native/cuda/thread_constants.h include/ATen/native/cuda/vol2col.cuh include/ATen/native/group_norm.h +include/ATen/native/hip/bgemm_kernels/bgemm_kernel_collection.h +include/ATen/native/hip/bgemm_kernels/bgemm_kernel_template.h +include/ATen/native/hip/ck_bgemm.h +include/ATen/native/hip/ck_gemm.h +include/ATen/native/hip/ck_gemm_template.h +include/ATen/native/hip/ck_types.h include/ATen/native/im2col.h include/ATen/native/im2col_shape_check.h +include/ATen/native/kleidiai/kai_kernels.h +include/ATen/native/kleidiai/kai_pack.h +include/ATen/native/kleidiai/kai_ukernel_interface.h include/ATen/native/layer_norm.h +include/ATen/native/mkldnn/xpu/Conv.h +include/ATen/native/mkldnn/xpu/FusionUtils.h +include/ATen/native/mkldnn/xpu/detail/Attr.h +include/ATen/native/mkldnn/xpu/detail/DnnlExt.h +include/ATen/native/mkldnn/xpu/detail/LRUCache.h +include/ATen/native/mkldnn/xpu/detail/Utils.h +include/ATen/native/mkldnn/xpu/detail/oneDNN.h +include/ATen/native/mkldnn/xpu/detail/oneDNNContext.h include/ATen/native/mps/Copy.h +include/ATen/native/mps/MPSGraphSequoiaOps.h include/ATen/native/mps/MPSGraphSonomaOps.h include/ATen/native/mps/MPSGraphVenturaOps.h +include/ATen/native/mps/MetalShaderLibrary.h include/ATen/native/mps/OperationUtils.h include/ATen/native/mps/TensorFactory.h -include/ATen/native/mps/UnaryConstants.h +include/ATen/native/mps/kernels/UpSample.h include/ATen/native/mps/operations/BinaryKernel.h +include/ATen/native/mps/operations/FusedAdamAmsgradKernelImpl.h +include/ATen/native/mps/operations/FusedAdamKernelImpl.h +include/ATen/native/mps/operations/FusedAdamWAmsgradKernelImpl.h +include/ATen/native/mps/operations/FusedAdamWKernelImpl.h include/ATen/native/mps/operations/Indexing.h +include/ATen/native/mps/operations/MultiTensorApply.h +include/ATen/native/mtia/EmptyTensor.h include/ATen/native/nested/NestedTensorBinaryOps.h -include/ATen/native/nested/NestedTensorFactories.h include/ATen/native/nested/NestedTensorMath.h include/ATen/native/nested/NestedTensorTransformerFunctions.h include/ATen/native/nested/NestedTensorTransformerUtils.h @@ -569,6 +622,7 @@ include/ATen/native/quantized/Copy.h include/ATen/native/quantized/FakeQuantAffine.h include/ATen/native/quantized/IndexKernel.h include/ATen/native/quantized/PackedParams.h +include/ATen/native/quantized/cpu/ACLUtils.h include/ATen/native/quantized/cpu/BinaryOps.h include/ATen/native/quantized/cpu/EmbeddingPackedParams.h include/ATen/native/quantized/cpu/OnednnUtils.h @@ -580,26 +634,15 @@ include/ATen/native/quantized/cpu/XnnpackUtils.h include/ATen/native/quantized/cpu/conv_serialization.h include/ATen/native/quantized/cpu/fbgemm_utils.h include/ATen/native/quantized/cpu/init_qnnpack.h +include/ATen/native/quantized/cpu/qconv.h include/ATen/native/quantized/cpu/qembeddingbag.h include/ATen/native/quantized/cpu/qembeddingbag_prepack.h +include/ATen/native/quantized/cpu/qlinear.h include/ATen/native/quantized/cudnn/utils.h +include/ATen/native/quantized/library.h include/ATen/native/transformers/attention.h -include/ATen/native/transformers/cuda/flash_attn/alibi.h -include/ATen/native/transformers/cuda/flash_attn/block_info.h -include/ATen/native/transformers/cuda/flash_attn/dropout.h -include/ATen/native/transformers/cuda/flash_attn/flash.h include/ATen/native/transformers/cuda/flash_attn/flash_api.h -include/ATen/native/transformers/cuda/flash_attn/flash_bwd_kernel.h -include/ATen/native/transformers/cuda/flash_attn/flash_bwd_launch_template.h -include/ATen/native/transformers/cuda/flash_attn/flash_bwd_preprocess_kernel.h -include/ATen/native/transformers/cuda/flash_attn/flash_fwd_kernel.h -include/ATen/native/transformers/cuda/flash_attn/flash_fwd_launch_template.h -include/ATen/native/transformers/cuda/flash_attn/kernel_traits.h -include/ATen/native/transformers/cuda/flash_attn/mask.h -include/ATen/native/transformers/cuda/flash_attn/rotary.h -include/ATen/native/transformers/cuda/flash_attn/softmax.h include/ATen/native/transformers/cuda/flash_attn/static_switch.h -include/ATen/native/transformers/cuda/flash_attn/utils.h include/ATen/native/transformers/cuda/mem_eff_attention/debug_utils.h include/ATen/native/transformers/cuda/mem_eff_attention/epilogue/epilogue_pipelined.h include/ATen/native/transformers/cuda/mem_eff_attention/epilogue/epilogue_rescale_output.h @@ -627,6 +670,9 @@ include/ATen/native/transformers/cuda/mem_eff_attention/pytorch_utils.h include/ATen/native/transformers/cuda/mem_eff_attention/transform/tile_smem_loader.h include/ATen/native/transformers/cuda/sdp_utils.h include/ATen/native/transformers/hip/aotriton_adapter.h +include/ATen/native/transformers/hip/flash_attn/ck/me_ck_api.h +include/ATen/native/transformers/hip/flash_attn/flash_api.h +include/ATen/native/transformers/sdp_utils.h include/ATen/native/transformers/sdp_utils_cpp.h include/ATen/native/utils/Factory.h include/ATen/native/utils/ParamUtils.h @@ -704,7 +750,8 @@ include/ATen/ops/_assert_scalar_compositeexplicitautograd_dispatch.h include/ATen/ops/_assert_scalar_native.h include/ATen/ops/_assert_scalar_ops.h include/ATen/ops/_assert_tensor_metadata.h -include/ATen/ops/_assert_tensor_metadata_compositeimplicitautograd_dispatch.h +include/ATen/ops/_assert_tensor_metadata_compositeexplicitautograd_dispatch.h +include/ATen/ops/_assert_tensor_metadata_meta_dispatch.h include/ATen/ops/_assert_tensor_metadata_native.h include/ATen/ops/_assert_tensor_metadata_ops.h include/ATen/ops/_autocast_to_full_precision.h @@ -844,8 +891,11 @@ include/ATen/ops/_convert_indices_from_csr_to_coo_meta_dispatch.h include/ATen/ops/_convert_indices_from_csr_to_coo_native.h include/ATen/ops/_convert_indices_from_csr_to_coo_ops.h include/ATen/ops/_convert_weight_to_int4pack.h -include/ATen/ops/_convert_weight_to_int4pack_cpu_dispatch.h include/ATen/ops/_convert_weight_to_int4pack_cuda_dispatch.h +include/ATen/ops/_convert_weight_to_int4pack_for_cpu.h +include/ATen/ops/_convert_weight_to_int4pack_for_cpu_cpu_dispatch.h +include/ATen/ops/_convert_weight_to_int4pack_for_cpu_native.h +include/ATen/ops/_convert_weight_to_int4pack_for_cpu_ops.h include/ATen/ops/_convert_weight_to_int4pack_native.h include/ATen/ops/_convert_weight_to_int4pack_ops.h include/ATen/ops/_convolution.h @@ -894,6 +944,10 @@ include/ATen/ops/_ctc_loss_cuda_dispatch.h include/ATen/ops/_ctc_loss_meta_dispatch.h include/ATen/ops/_ctc_loss_native.h include/ATen/ops/_ctc_loss_ops.h +include/ATen/ops/_cudnn_attention_forward.h +include/ATen/ops/_cudnn_attention_forward_cuda_dispatch.h +include/ATen/ops/_cudnn_attention_forward_native.h +include/ATen/ops/_cudnn_attention_forward_ops.h include/ATen/ops/_cudnn_ctc_loss.h include/ATen/ops/_cudnn_ctc_loss_compositeexplicitautograd_dispatch.h include/ATen/ops/_cudnn_ctc_loss_cuda_dispatch.h @@ -965,6 +1019,14 @@ include/ATen/ops/_dirichlet_grad_cpu_dispatch.h include/ATen/ops/_dirichlet_grad_cuda_dispatch.h include/ATen/ops/_dirichlet_grad_native.h include/ATen/ops/_dirichlet_grad_ops.h +include/ATen/ops/_dyn_quant_matmul_4bit.h +include/ATen/ops/_dyn_quant_matmul_4bit_cpu_dispatch.h +include/ATen/ops/_dyn_quant_matmul_4bit_native.h +include/ATen/ops/_dyn_quant_matmul_4bit_ops.h +include/ATen/ops/_dyn_quant_pack_4bit_weight.h +include/ATen/ops/_dyn_quant_pack_4bit_weight_cpu_dispatch.h +include/ATen/ops/_dyn_quant_pack_4bit_weight_native.h +include/ATen/ops/_dyn_quant_pack_4bit_weight_ops.h include/ATen/ops/_efficient_attention_backward.h include/ATen/ops/_efficient_attention_backward_cuda_dispatch.h include/ATen/ops/_efficient_attention_backward_native.h @@ -982,7 +1044,8 @@ include/ATen/ops/_efficientzerotensor_native.h include/ATen/ops/_efficientzerotensor_ops.h include/ATen/ops/_embedding_bag.h include/ATen/ops/_embedding_bag_backward.h -include/ATen/ops/_embedding_bag_backward_compositeimplicitautograd_dispatch.h +include/ATen/ops/_embedding_bag_backward_cpu_dispatch.h +include/ATen/ops/_embedding_bag_backward_cuda_dispatch.h include/ATen/ops/_embedding_bag_backward_native.h include/ATen/ops/_embedding_bag_backward_ops.h include/ATen/ops/_embedding_bag_compositeexplicitautograd_dispatch.h @@ -1262,6 +1325,11 @@ include/ATen/ops/_foreach_round_compositeexplicitautograd_dispatch.h include/ATen/ops/_foreach_round_cuda_dispatch.h include/ATen/ops/_foreach_round_native.h include/ATen/ops/_foreach_round_ops.h +include/ATen/ops/_foreach_rsqrt.h +include/ATen/ops/_foreach_rsqrt_compositeexplicitautograd_dispatch.h +include/ATen/ops/_foreach_rsqrt_cuda_dispatch.h +include/ATen/ops/_foreach_rsqrt_native.h +include/ATen/ops/_foreach_rsqrt_ops.h include/ATen/ops/_foreach_sigmoid.h include/ATen/ops/_foreach_sigmoid_compositeexplicitautograd_dispatch.h include/ATen/ops/_foreach_sigmoid_cuda_dispatch.h @@ -1356,6 +1424,9 @@ include/ATen/ops/_fused_moving_avg_obs_fq_helper_cpu_dispatch.h include/ATen/ops/_fused_moving_avg_obs_fq_helper_cuda_dispatch.h include/ATen/ops/_fused_moving_avg_obs_fq_helper_native.h include/ATen/ops/_fused_moving_avg_obs_fq_helper_ops.h +include/ATen/ops/_fused_rms_norm.h +include/ATen/ops/_fused_rms_norm_native.h +include/ATen/ops/_fused_rms_norm_ops.h include/ATen/ops/_fused_sdp_choice.h include/ATen/ops/_fused_sdp_choice_cpu_dispatch.h include/ATen/ops/_fused_sdp_choice_cuda_dispatch.h @@ -1389,6 +1460,10 @@ include/ATen/ops/_grid_sampler_2d_cpu_fallback_backward_ops.h include/ATen/ops/_grid_sampler_2d_cpu_fallback_compositeexplicitautograd_dispatch.h include/ATen/ops/_grid_sampler_2d_cpu_fallback_native.h include/ATen/ops/_grid_sampler_2d_cpu_fallback_ops.h +include/ATen/ops/_grouped_mm.h +include/ATen/ops/_grouped_mm_cuda_dispatch.h +include/ATen/ops/_grouped_mm_native.h +include/ATen/ops/_grouped_mm_ops.h include/ATen/ops/_has_compatible_shallow_copy_type.h include/ATen/ops/_has_compatible_shallow_copy_type_compositeimplicitautograd_dispatch.h include/ATen/ops/_has_compatible_shallow_copy_type_native.h @@ -1445,6 +1520,7 @@ include/ATen/ops/_is_zerotensor_compositeimplicitautograd_dispatch.h include/ATen/ops/_is_zerotensor_native.h include/ATen/ops/_is_zerotensor_ops.h include/ATen/ops/_jagged_to_padded_dense_forward.h +include/ATen/ops/_jagged_to_padded_dense_forward_cpu_dispatch.h include/ATen/ops/_jagged_to_padded_dense_forward_cuda_dispatch.h include/ATen/ops/_jagged_to_padded_dense_forward_native.h include/ATen/ops/_jagged_to_padded_dense_forward_ops.h @@ -1638,12 +1714,21 @@ include/ATen/ops/_nested_from_padded_cpu_dispatch.h include/ATen/ops/_nested_from_padded_cuda_dispatch.h include/ATen/ops/_nested_from_padded_native.h include/ATen/ops/_nested_from_padded_ops.h +include/ATen/ops/_nested_from_padded_tensor.h +include/ATen/ops/_nested_from_padded_tensor_native.h +include/ATen/ops/_nested_from_padded_tensor_ops.h include/ATen/ops/_nested_get_jagged_dummy.h include/ATen/ops/_nested_get_jagged_dummy_native.h include/ATen/ops/_nested_get_jagged_dummy_ops.h include/ATen/ops/_nested_get_lengths.h include/ATen/ops/_nested_get_lengths_native.h include/ATen/ops/_nested_get_lengths_ops.h +include/ATen/ops/_nested_get_max_seqlen.h +include/ATen/ops/_nested_get_max_seqlen_native.h +include/ATen/ops/_nested_get_max_seqlen_ops.h +include/ATen/ops/_nested_get_min_seqlen.h +include/ATen/ops/_nested_get_min_seqlen_native.h +include/ATen/ops/_nested_get_min_seqlen_ops.h include/ATen/ops/_nested_get_offsets.h include/ATen/ops/_nested_get_offsets_native.h include/ATen/ops/_nested_get_offsets_ops.h @@ -1748,6 +1833,7 @@ include/ATen/ops/_pad_packed_sequence_compositeimplicitautograd_dispatch.h include/ATen/ops/_pad_packed_sequence_native.h include/ATen/ops/_pad_packed_sequence_ops.h include/ATen/ops/_padded_dense_to_jagged_forward.h +include/ATen/ops/_padded_dense_to_jagged_forward_cpu_dispatch.h include/ATen/ops/_padded_dense_to_jagged_forward_cuda_dispatch.h include/ATen/ops/_padded_dense_to_jagged_forward_native.h include/ATen/ops/_padded_dense_to_jagged_forward_ops.h @@ -1765,7 +1851,6 @@ include/ATen/ops/_pdist_forward_native.h include/ATen/ops/_pdist_forward_ops.h include/ATen/ops/_pin_memory.h include/ATen/ops/_pin_memory_compositeexplicitautograd_dispatch.h -include/ATen/ops/_pin_memory_cuda_dispatch.h include/ATen/ops/_pin_memory_native.h include/ATen/ops/_pin_memory_ops.h include/ATen/ops/_prelu_kernel.h @@ -1818,6 +1903,10 @@ include/ATen/ops/_rowwise_prune.h include/ATen/ops/_rowwise_prune_compositeimplicitautograd_dispatch.h include/ATen/ops/_rowwise_prune_native.h include/ATen/ops/_rowwise_prune_ops.h +include/ATen/ops/_safe_softmax.h +include/ATen/ops/_safe_softmax_compositeexplicitautograd_dispatch.h +include/ATen/ops/_safe_softmax_native.h +include/ATen/ops/_safe_softmax_ops.h include/ATen/ops/_sample_dirichlet.h include/ATen/ops/_sample_dirichlet_compositeexplicitautograd_dispatch.h include/ATen/ops/_sample_dirichlet_cpu_dispatch.h @@ -1830,6 +1919,9 @@ include/ATen/ops/_saturate_weight_to_fp16_native.h include/ATen/ops/_saturate_weight_to_fp16_ops.h include/ATen/ops/_scaled_dot_product_attention_math.h include/ATen/ops/_scaled_dot_product_attention_math_compositeimplicitautograd_dispatch.h +include/ATen/ops/_scaled_dot_product_attention_math_for_mps.h +include/ATen/ops/_scaled_dot_product_attention_math_for_mps_native.h +include/ATen/ops/_scaled_dot_product_attention_math_for_mps_ops.h include/ATen/ops/_scaled_dot_product_attention_math_native.h include/ATen/ops/_scaled_dot_product_attention_math_ops.h include/ATen/ops/_scaled_dot_product_cudnn_attention.h @@ -1864,7 +1956,20 @@ include/ATen/ops/_scaled_dot_product_flash_attention_for_cpu_native.h include/ATen/ops/_scaled_dot_product_flash_attention_for_cpu_ops.h include/ATen/ops/_scaled_dot_product_flash_attention_native.h include/ATen/ops/_scaled_dot_product_flash_attention_ops.h +include/ATen/ops/_scaled_dot_product_fused_attention_overrideable.h +include/ATen/ops/_scaled_dot_product_fused_attention_overrideable_backward.h +include/ATen/ops/_scaled_dot_product_fused_attention_overrideable_backward_compositeexplicitautograd_dispatch.h +include/ATen/ops/_scaled_dot_product_fused_attention_overrideable_backward_native.h +include/ATen/ops/_scaled_dot_product_fused_attention_overrideable_backward_ops.h +include/ATen/ops/_scaled_dot_product_fused_attention_overrideable_compositeexplicitautograd_dispatch.h +include/ATen/ops/_scaled_dot_product_fused_attention_overrideable_native.h +include/ATen/ops/_scaled_dot_product_fused_attention_overrideable_ops.h +include/ATen/ops/_scaled_grouped_mm.h +include/ATen/ops/_scaled_grouped_mm_cuda_dispatch.h +include/ATen/ops/_scaled_grouped_mm_native.h +include/ATen/ops/_scaled_grouped_mm_ops.h include/ATen/ops/_scaled_mm.h +include/ATen/ops/_scaled_mm_cpu_dispatch.h include/ATen/ops/_scaled_mm_cuda_dispatch.h include/ATen/ops/_scaled_mm_native.h include/ATen/ops/_scaled_mm_ops.h @@ -2053,6 +2158,9 @@ include/ATen/ops/_spdiags_compositeexplicitautograd_dispatch.h include/ATen/ops/_spdiags_cpu_dispatch.h include/ATen/ops/_spdiags_native.h include/ATen/ops/_spdiags_ops.h +include/ATen/ops/_spsolve.h +include/ATen/ops/_spsolve_native.h +include/ATen/ops/_spsolve_ops.h include/ATen/ops/_stack.h include/ATen/ops/_stack_compositeexplicitautograd_dispatch.h include/ATen/ops/_stack_cpu_dispatch.h @@ -2257,6 +2365,14 @@ include/ATen/ops/_unsafe_index_put.h include/ATen/ops/_unsafe_index_put_compositeexplicitautograd_dispatch.h include/ATen/ops/_unsafe_index_put_native.h include/ATen/ops/_unsafe_index_put_ops.h +include/ATen/ops/_unsafe_masked_index.h +include/ATen/ops/_unsafe_masked_index_compositeexplicitautograd_dispatch.h +include/ATen/ops/_unsafe_masked_index_native.h +include/ATen/ops/_unsafe_masked_index_ops.h +include/ATen/ops/_unsafe_masked_index_put_accumulate.h +include/ATen/ops/_unsafe_masked_index_put_accumulate_compositeexplicitautograd_dispatch.h +include/ATen/ops/_unsafe_masked_index_put_accumulate_native.h +include/ATen/ops/_unsafe_masked_index_put_accumulate_ops.h include/ATen/ops/_unsafe_view.h include/ATen/ops/_unsafe_view_compositeexplicitautograd_dispatch.h include/ATen/ops/_unsafe_view_native.h @@ -2396,10 +2512,16 @@ include/ATen/ops/_version_compositeimplicitautograd_dispatch.h include/ATen/ops/_version_native.h include/ATen/ops/_version_ops.h include/ATen/ops/_weight_int4pack_mm.h -include/ATen/ops/_weight_int4pack_mm_cpu_dispatch.h include/ATen/ops/_weight_int4pack_mm_cuda_dispatch.h +include/ATen/ops/_weight_int4pack_mm_for_cpu.h +include/ATen/ops/_weight_int4pack_mm_for_cpu_cpu_dispatch.h +include/ATen/ops/_weight_int4pack_mm_for_cpu_native.h +include/ATen/ops/_weight_int4pack_mm_for_cpu_ops.h include/ATen/ops/_weight_int4pack_mm_native.h include/ATen/ops/_weight_int4pack_mm_ops.h +include/ATen/ops/_weight_int4pack_mm_with_scales_and_zeros.h +include/ATen/ops/_weight_int4pack_mm_with_scales_and_zeros_native.h +include/ATen/ops/_weight_int4pack_mm_with_scales_and_zeros_ops.h include/ATen/ops/_weight_int8pack_mm.h include/ATen/ops/_weight_int8pack_mm_cpu_dispatch.h include/ATen/ops/_weight_int8pack_mm_native.h @@ -2424,6 +2546,14 @@ include/ATen/ops/_weight_norm_interface_native.h include/ATen/ops/_weight_norm_interface_ops.h include/ATen/ops/_weight_norm_native.h include/ATen/ops/_weight_norm_ops.h +include/ATen/ops/_wrapped_linear_prepack.h +include/ATen/ops/_wrapped_linear_prepack_compositeimplicitautograd_dispatch.h +include/ATen/ops/_wrapped_linear_prepack_native.h +include/ATen/ops/_wrapped_linear_prepack_ops.h +include/ATen/ops/_wrapped_quantized_linear_prepacked.h +include/ATen/ops/_wrapped_quantized_linear_prepacked_compositeimplicitautograd_dispatch.h +include/ATen/ops/_wrapped_quantized_linear_prepacked_native.h +include/ATen/ops/_wrapped_quantized_linear_prepacked_ops.h include/ATen/ops/abs.h include/ATen/ops/abs_compositeexplicitautograd_dispatch.h include/ATen/ops/abs_cpu_dispatch.h @@ -2451,6 +2581,7 @@ include/ATen/ops/acosh_meta_dispatch.h include/ATen/ops/acosh_native.h include/ATen/ops/acosh_ops.h include/ATen/ops/adaptive_avg_pool1d.h +include/ATen/ops/adaptive_avg_pool1d_compositeexplicitautograd_dispatch.h include/ATen/ops/adaptive_avg_pool1d_compositeimplicitautograd_dispatch.h include/ATen/ops/adaptive_avg_pool1d_native.h include/ATen/ops/adaptive_avg_pool1d_ops.h @@ -2706,10 +2837,7 @@ include/ATen/ops/argmin_meta_dispatch.h include/ATen/ops/argmin_native.h include/ATen/ops/argmin_ops.h include/ATen/ops/argsort.h -include/ATen/ops/argsort_compositeexplicitautograd_dispatch.h include/ATen/ops/argsort_compositeimplicitautograd_dispatch.h -include/ATen/ops/argsort_cpu_dispatch.h -include/ATen/ops/argsort_cuda_dispatch.h include/ATen/ops/argsort_native.h include/ATen/ops/argsort_ops.h include/ATen/ops/argwhere.h @@ -2786,6 +2914,7 @@ include/ATen/ops/atleast_3d_compositeimplicitautograd_dispatch.h include/ATen/ops/atleast_3d_native.h include/ATen/ops/atleast_3d_ops.h include/ATen/ops/avg_pool1d.h +include/ATen/ops/avg_pool1d_compositeexplicitautograd_dispatch.h include/ATen/ops/avg_pool1d_compositeimplicitautograd_dispatch.h include/ATen/ops/avg_pool1d_native.h include/ATen/ops/avg_pool1d_ops.h @@ -4392,7 +4521,6 @@ include/ATen/ops/is_nonzero_native.h include/ATen/ops/is_nonzero_ops.h include/ATen/ops/is_pinned.h include/ATen/ops/is_pinned_compositeexplicitautograd_dispatch.h -include/ATen/ops/is_pinned_cuda_dispatch.h include/ATen/ops/is_pinned_native.h include/ATen/ops/is_pinned_ops.h include/ATen/ops/is_same_size.h @@ -5555,6 +5683,7 @@ include/ATen/ops/nonzero_numpy_ops.h include/ATen/ops/nonzero_ops.h include/ATen/ops/nonzero_static.h include/ATen/ops/nonzero_static_cpu_dispatch.h +include/ATen/ops/nonzero_static_cuda_dispatch.h include/ATen/ops/nonzero_static_native.h include/ATen/ops/nonzero_static_ops.h include/ATen/ops/norm.h @@ -6120,6 +6249,7 @@ include/ATen/ops/rrelu_with_noise_backward.h include/ATen/ops/rrelu_with_noise_backward_compositeexplicitautograd_dispatch.h include/ATen/ops/rrelu_with_noise_backward_native.h include/ATen/ops/rrelu_with_noise_backward_ops.h +include/ATen/ops/rrelu_with_noise_compositeexplicitautograd_dispatch.h include/ATen/ops/rrelu_with_noise_cpu_dispatch.h include/ATen/ops/rrelu_with_noise_cuda_dispatch.h include/ATen/ops/rrelu_with_noise_meta_dispatch.h @@ -7315,6 +7445,7 @@ include/ATen/ops/upsample_bilinear2d_backward_meta.h include/ATen/ops/upsample_bilinear2d_backward_meta_dispatch.h include/ATen/ops/upsample_bilinear2d_backward_native.h include/ATen/ops/upsample_bilinear2d_backward_ops.h +include/ATen/ops/upsample_bilinear2d_compositeexplicitautograd_dispatch.h include/ATen/ops/upsample_bilinear2d_compositeexplicitautogradnonfunctional_dispatch.h include/ATen/ops/upsample_bilinear2d_compositeimplicitautograd_dispatch.h include/ATen/ops/upsample_bilinear2d_cpu_dispatch.h @@ -7366,6 +7497,7 @@ include/ATen/ops/upsample_nearest2d_backward_meta.h include/ATen/ops/upsample_nearest2d_backward_meta_dispatch.h include/ATen/ops/upsample_nearest2d_backward_native.h include/ATen/ops/upsample_nearest2d_backward_ops.h +include/ATen/ops/upsample_nearest2d_compositeexplicitautograd_dispatch.h include/ATen/ops/upsample_nearest2d_compositeexplicitautogradnonfunctional_dispatch.h include/ATen/ops/upsample_nearest2d_compositeimplicitautograd_dispatch.h include/ATen/ops/upsample_nearest2d_cpu_dispatch.h @@ -7535,10 +7667,12 @@ include/ATen/xpu/XPUDevice.h include/ATen/xpu/XPUEvent.h include/ATen/xpu/XPUGeneratorImpl.h include/ATen/xpu/detail/XPUHooks.h +include/advisor-annotate.h include/c10/core/Allocator.h include/c10/core/AutogradState.h include/c10/core/Backend.h include/c10/core/CPUAllocator.h +include/c10/core/CachingDeviceAllocator.h include/c10/core/CompileTimeFunctionPointer.h include/c10/core/ConstantSymNodeImpl.h include/c10/core/Contiguity.h @@ -7619,6 +7753,14 @@ include/c10/cuda/impl/CUDATest.h include/c10/macros/Export.h include/c10/macros/Macros.h include/c10/macros/cmake_macros.h +include/c10/metal/atomic.h +include/c10/metal/common.h +include/c10/metal/expm1f.h +include/c10/metal/indexing.h +include/c10/metal/random.h +include/c10/metal/reduction_utils.h +include/c10/metal/special_math.h +include/c10/metal/utils.h include/c10/mobile/CPUCachingAllocator.h include/c10/mobile/CPUProfilingAllocator.h include/c10/test/util/Macros.h @@ -7640,11 +7782,14 @@ include/c10/util/ConstexprCrc.h include/c10/util/DeadlockDetection.h include/c10/util/Deprecated.h include/c10/util/DimVector.h +include/c10/util/DynamicCounter.h +include/c10/util/Enumerate.h include/c10/util/Exception.h include/c10/util/ExclusivelyOwned.h include/c10/util/ExclusivelyOwnedTensorTraits.h include/c10/util/FbcodeMaps.h include/c10/util/Flags.h +include/c10/util/Float4_e2m1fn_x2.h include/c10/util/Float8_e4m3fn-inl.h include/c10/util/Float8_e4m3fn.h include/c10/util/Float8_e4m3fnuz-inl.h @@ -7653,11 +7798,15 @@ include/c10/util/Float8_e5m2-inl.h include/c10/util/Float8_e5m2.h include/c10/util/Float8_e5m2fnuz-inl.h include/c10/util/Float8_e5m2fnuz.h +include/c10/util/Float8_e8m0fnu-inl.h +include/c10/util/Float8_e8m0fnu.h include/c10/util/Float8_fnuz_cvt.h include/c10/util/FunctionRef.h +include/c10/util/Gauge.h include/c10/util/Half-inl.h include/c10/util/Half.h include/c10/util/IdWrapper.h +include/c10/util/IntrusiveList.h include/c10/util/Lazy.h include/c10/util/LeftRight.h include/c10/util/Load.h @@ -7665,11 +7814,13 @@ include/c10/util/Logging.h include/c10/util/MathConstants.h include/c10/util/MaybeOwned.h include/c10/util/Metaprogramming.h +include/c10/util/NetworkFlow.h include/c10/util/Optional.h include/c10/util/OptionalArrayRef.h include/c10/util/ParallelGuard.h include/c10/util/Registry.h include/c10/util/ScopeExit.h +include/c10/util/Semaphore.h include/c10/util/SmallBuffer.h include/c10/util/SmallVector.h include/c10/util/StringUtil.h @@ -7685,6 +7836,8 @@ include/c10/util/TypeTraits.h include/c10/util/Unicode.h include/c10/util/UniqueVoidPtr.h include/c10/util/Unroll.h +include/c10/util/WaitCounter.h +include/c10/util/WaitCounterDynamicBackend.h include/c10/util/accumulate.h include/c10/util/bit_cast.h include/c10/util/bits.h @@ -7693,6 +7846,7 @@ include/c10/util/complex_math.h include/c10/util/complex_utils.h include/c10/util/copysign.h include/c10/util/env.h +include/c10/util/error.h include/c10/util/flat_hash_map.h include/c10/util/floating_point_utils.h include/c10/util/generic_math.h @@ -7705,6 +7859,7 @@ include/c10/util/logging_is_google_glog.h include/c10/util/logging_is_not_google_glog.h include/c10/util/numa.h include/c10/util/order_preserving_flat_hash_map.h +include/c10/util/overflows.h include/c10/util/overloaded.h include/c10/util/python_stub.h include/c10/util/qint32.h @@ -7737,21 +7892,9 @@ include/c10/xpu/test/impl/XPUTest.h include/caffe2/core/common.h include/caffe2/core/macros.h include/caffe2/core/timer.h -include/caffe2/perfkernels/adagrad.h -include/caffe2/perfkernels/batch_box_cox.h +include/caffe2/perfkernels/batch_box_cox_vec.h include/caffe2/perfkernels/common.h -include/caffe2/perfkernels/cvtsh_ss_bugfix.h -include/caffe2/perfkernels/embedding_lookup.h include/caffe2/perfkernels/embedding_lookup_idx.h -include/caffe2/perfkernels/fused_8bit_rowwise_embedding_lookup.h -include/caffe2/perfkernels/fused_8bit_rowwise_embedding_lookup_idx.h -include/caffe2/perfkernels/fused_nbit_rowwise_conversion.h -include/caffe2/perfkernels/lstm_unit_cpu-impl.h -include/caffe2/perfkernels/lstm_unit_cpu.h -include/caffe2/perfkernels/lstm_unit_cpu_common.h -include/caffe2/perfkernels/math.h -include/caffe2/perfkernels/typed_axpy.h -include/caffe2/perfkernels/vectorizer.h include/caffe2/serialize/crc_alt.h include/caffe2/serialize/file_adapter.h include/caffe2/serialize/in_memory_adapter.h @@ -7759,7 +7902,6 @@ include/caffe2/serialize/inline_container.h include/caffe2/serialize/istream_adapter.h include/caffe2/serialize/read_adapter_interface.h include/caffe2/serialize/versions.h -include/caffe2/utils/conversions.h include/caffe2/utils/fixed_divisor.h include/caffe2/utils/proto_wrap.h include/caffe2/utils/string_utils.h @@ -7791,6 +7933,9 @@ include/fp16/avx2.py include/fp16/bitcasts.h include/fp16/fp16.h include/fp16/psimd.h +include/ittnotify-zca.h +include/ittnotify.h +include/jitprofiling.h include/kineto/AbstractConfig.h include/kineto/ActivityProfilerInterface.h include/kineto/ActivityTraceInterface.h @@ -7807,6 +7952,8 @@ include/kineto/TraceSpan.h include/kineto/libkineto.h include/kineto/output_base.h include/kineto/time_since_epoch.h +include/legacy/ittnotify.h +include/libittnotify.h include/libshm.h %%AMD64%%include/oneapi/dnnl/dnnl.h %%AMD64%%include/oneapi/dnnl/dnnl.hpp @@ -7817,6 +7964,8 @@ include/libshm.h %%AMD64%%include/oneapi/dnnl/dnnl_debug.h %%AMD64%%include/oneapi/dnnl/dnnl_graph.h %%AMD64%%include/oneapi/dnnl/dnnl_graph.hpp +%%AMD64%%include/oneapi/dnnl/dnnl_graph_ocl.h +%%AMD64%%include/oneapi/dnnl/dnnl_graph_ocl.hpp %%AMD64%%include/oneapi/dnnl/dnnl_graph_sycl.h %%AMD64%%include/oneapi/dnnl/dnnl_graph_sycl.hpp %%AMD64%%include/oneapi/dnnl/dnnl_graph_types.h @@ -7830,11 +7979,16 @@ include/libshm.h %%AMD64%%include/oneapi/dnnl/dnnl_threadpool.hpp %%AMD64%%include/oneapi/dnnl/dnnl_threadpool_iface.hpp %%AMD64%%include/oneapi/dnnl/dnnl_types.h +%%AMD64%%include/oneapi/dnnl/dnnl_ukernel.h +%%AMD64%%include/oneapi/dnnl/dnnl_ukernel.hpp +%%AMD64%%include/oneapi/dnnl/dnnl_ukernel_types.h %%AMD64%%include/oneapi/dnnl/dnnl_version.h +%%AMD64%%include/oneapi/dnnl/dnnl_version_hash.h include/psimd.h include/torch/csrc/CudaIPCTypes.h include/torch/csrc/DataLoader.h include/torch/csrc/Device.h +include/torch/csrc/DeviceAccelerator.h include/torch/csrc/Dtype.h include/torch/csrc/DynamicTypes.h include/torch/csrc/Event.h @@ -7900,7 +8054,6 @@ include/torch/csrc/api/include/torch/expanding_array.h include/torch/csrc/api/include/torch/fft.h include/torch/csrc/api/include/torch/imethod.h include/torch/csrc/api/include/torch/jit.h -include/torch/csrc/api/include/torch/linalg.h include/torch/csrc/api/include/torch/mps.h include/torch/csrc/api/include/torch/nested.h include/torch/csrc/api/include/torch/nn.h @@ -8082,6 +8235,7 @@ include/torch/csrc/copy_utils.h include/torch/csrc/cpu/Module.h include/torch/csrc/cuda/CUDAPluggableAllocator.h include/torch/csrc/cuda/Event.h +include/torch/csrc/cuda/GdsFile.h include/torch/csrc/cuda/Module.h include/torch/csrc/cuda/Stream.h include/torch/csrc/cuda/THCP.h @@ -8091,6 +8245,7 @@ include/torch/csrc/cuda/memory_snapshot.h include/torch/csrc/cuda/nccl.h include/torch/csrc/cuda/python_comm.h include/torch/csrc/cuda/python_nccl.h +include/torch/csrc/cuda/utils.h include/torch/csrc/distributed/autograd/autograd.h include/torch/csrc/distributed/autograd/context/container.h include/torch/csrc/distributed/autograd/context/context.h @@ -8110,20 +8265,24 @@ include/torch/csrc/distributed/autograd/rpc_messages/rref_backward_req.h include/torch/csrc/distributed/autograd/rpc_messages/rref_backward_resp.h include/torch/csrc/distributed/autograd/utils.h include/torch/csrc/distributed/c10d/Backend.hpp +include/torch/csrc/distributed/c10d/Backoff.hpp include/torch/csrc/distributed/c10d/FakeProcessGroup.hpp include/torch/csrc/distributed/c10d/FileStore.hpp +include/torch/csrc/distributed/c10d/FlightRecorder.hpp +include/torch/csrc/distributed/c10d/FlightRecorderDetail.hpp +include/torch/csrc/distributed/c10d/Functional.hpp include/torch/csrc/distributed/c10d/GlooDeviceFactory.hpp include/torch/csrc/distributed/c10d/GroupRegistry.hpp include/torch/csrc/distributed/c10d/HashStore.hpp include/torch/csrc/distributed/c10d/NCCLUtils.hpp +include/torch/csrc/distributed/c10d/NanCheck.hpp include/torch/csrc/distributed/c10d/ParamCommsUtils.hpp include/torch/csrc/distributed/c10d/PrefixStore.hpp include/torch/csrc/distributed/c10d/ProcessGroup.hpp -include/torch/csrc/distributed/c10d/ProcessGroupCudaP2P.hpp include/torch/csrc/distributed/c10d/ProcessGroupGloo.hpp +include/torch/csrc/distributed/c10d/ProcessGroupGlooDetail.hpp include/torch/csrc/distributed/c10d/ProcessGroupMPI.hpp include/torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp -include/torch/csrc/distributed/c10d/ProcessGroupRoundRobin.hpp include/torch/csrc/distributed/c10d/ProcessGroupUCC.hpp include/torch/csrc/distributed/c10d/ProcessGroupWrapper.hpp include/torch/csrc/distributed/c10d/PyProcessGroup.hpp @@ -8145,11 +8304,11 @@ include/torch/csrc/distributed/c10d/control_collectives/ControlCollectives.hpp include/torch/csrc/distributed/c10d/control_collectives/StoreCollectives.hpp include/torch/csrc/distributed/c10d/control_plane/Handlers.hpp include/torch/csrc/distributed/c10d/control_plane/WorkerServer.hpp +include/torch/csrc/distributed/c10d/cuda/utils.hpp include/torch/csrc/distributed/c10d/debug.h include/torch/csrc/distributed/c10d/default_comm_hooks.hpp include/torch/csrc/distributed/c10d/error.h include/torch/csrc/distributed/c10d/exception.h -include/torch/csrc/distributed/c10d/intra_node_comm.hpp include/torch/csrc/distributed/c10d/logger.hpp include/torch/csrc/distributed/c10d/logging.h include/torch/csrc/distributed/c10d/python_comm_hook.h @@ -8160,6 +8319,14 @@ include/torch/csrc/distributed/c10d/reducer.hpp include/torch/csrc/distributed/c10d/reducer_timer.hpp include/torch/csrc/distributed/c10d/sequence_num.hpp include/torch/csrc/distributed/c10d/socket.h +include/torch/csrc/distributed/c10d/socket_fmt.h +include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemory-inl.h +include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemory.hpp +include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryTypes.hpp +include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.hpp +include/torch/csrc/distributed/c10d/symm_mem/DMAConnectivity.hpp +include/torch/csrc/distributed/c10d/symm_mem/SymmetricMemory.hpp +include/torch/csrc/distributed/c10d/symm_mem/intra_node_comm.hpp include/torch/csrc/distributed/rpc/agent_utils.h include/torch/csrc/distributed/rpc/message.h include/torch/csrc/distributed/rpc/metrics/RpcMetricsHandler.h @@ -8196,37 +8363,78 @@ include/torch/csrc/dynamo/cache_entry.h include/torch/csrc/dynamo/compiled_autograd.h include/torch/csrc/dynamo/cpp_shim.h include/torch/csrc/dynamo/cpython_defs.h +include/torch/csrc/dynamo/cpython_includes.h include/torch/csrc/dynamo/debug_macros.h include/torch/csrc/dynamo/eval_frame.h +include/torch/csrc/dynamo/eval_frame_cpp.h include/torch/csrc/dynamo/extra_state.h +include/torch/csrc/dynamo/framelocals_mapping.h include/torch/csrc/dynamo/guards.h include/torch/csrc/dynamo/init.h include/torch/csrc/dynamo/python_compiled_autograd.h include/torch/csrc/dynamo/utils.h +include/torch/csrc/export/pt2_archive_constants.h +include/torch/csrc/export/pybind.h include/torch/csrc/functorch/init.h +include/torch/csrc/fx/node.h include/torch/csrc/inductor/aoti_eager/kernel_holder.h include/torch/csrc/inductor/aoti_eager/kernel_meta_info.h +include/torch/csrc/inductor/aoti_include/array_ref.h +include/torch/csrc/inductor/aoti_include/common.h +include/torch/csrc/inductor/aoti_include/cpu.h +include/torch/csrc/inductor/aoti_include/cuda.h +include/torch/csrc/inductor/aoti_include/mps.h +include/torch/csrc/inductor/aoti_include/xpu.h +include/torch/csrc/inductor/aoti_package/model_package_loader.h +include/torch/csrc/inductor/aoti_package/pybind.h include/torch/csrc/inductor/aoti_runner/model_container_runner.h include/torch/csrc/inductor/aoti_runner/model_container_runner_cpu.h include/torch/csrc/inductor/aoti_runner/model_container_runner_cuda.h +include/torch/csrc/inductor/aoti_runner/model_container_runner_mps.h +include/torch/csrc/inductor/aoti_runner/model_container_runner_xpu.h include/torch/csrc/inductor/aoti_runner/pybind.h include/torch/csrc/inductor/aoti_runtime/arrayref_tensor.h +include/torch/csrc/inductor/aoti_runtime/constant_type.h include/torch/csrc/inductor/aoti_runtime/device_utils.h include/torch/csrc/inductor/aoti_runtime/interface.h +include/torch/csrc/inductor/aoti_runtime/mini_array_ref.h include/torch/csrc/inductor/aoti_runtime/model.h include/torch/csrc/inductor/aoti_runtime/model_container.h include/torch/csrc/inductor/aoti_runtime/scalar_to_tensor.h +include/torch/csrc/inductor/aoti_runtime/sycl_runtime_wrappers.h include/torch/csrc/inductor/aoti_runtime/thread_local.h include/torch/csrc/inductor/aoti_runtime/utils.h include/torch/csrc/inductor/aoti_runtime/utils_cuda.h +include/torch/csrc/inductor/aoti_runtime/utils_xpu.h include/torch/csrc/inductor/aoti_torch/c/shim.h +include/torch/csrc/inductor/aoti_torch/c/shim_cpu.h +include/torch/csrc/inductor/aoti_torch/c/shim_mps.h +include/torch/csrc/inductor/aoti_torch/c/shim_xpu.h include/torch/csrc/inductor/aoti_torch/generated/c_shim_cpu.h include/torch/csrc/inductor/aoti_torch/generated/c_shim_cuda.h +include/torch/csrc/inductor/aoti_torch/generated/c_shim_mps.h +include/torch/csrc/inductor/aoti_torch/generated/c_shim_xpu.h include/torch/csrc/inductor/aoti_torch/mkldnn_tensor.h +include/torch/csrc/inductor/aoti_torch/oss_proxy_executor.h include/torch/csrc/inductor/aoti_torch/proxy_executor.h include/torch/csrc/inductor/aoti_torch/tensor_converter.h include/torch/csrc/inductor/aoti_torch/utils.h +include/torch/csrc/inductor/array_ref_impl.h +include/torch/csrc/inductor/cpp_prefix.h +include/torch/csrc/inductor/cpp_wrapper/array_ref.h +include/torch/csrc/inductor/cpp_wrapper/common.h +include/torch/csrc/inductor/cpp_wrapper/cpu.h +include/torch/csrc/inductor/cpp_wrapper/cuda.h +include/torch/csrc/inductor/cpp_wrapper/device_internal/cpu.h +include/torch/csrc/inductor/cpp_wrapper/device_internal/cuda.h +include/torch/csrc/inductor/cpp_wrapper/device_internal/mps.h +include/torch/csrc/inductor/cpp_wrapper/device_internal/xpu.h +include/torch/csrc/inductor/cpp_wrapper/mps.h +include/torch/csrc/inductor/cpp_wrapper/xpu.h include/torch/csrc/inductor/inductor_ops.h +include/torch/csrc/inductor/static_cuda_launcher.h +include/torch/csrc/instruction_counter/Module.h +include/torch/csrc/itt.h include/torch/csrc/itt_wrapper.h include/torch/csrc/jit/api/compilation_unit.h include/torch/csrc/jit/api/function_impl.h @@ -8686,10 +8894,9 @@ include/torch/csrc/monitor/events.h include/torch/csrc/monitor/python_init.h include/torch/csrc/mps/Module.h include/torch/csrc/mtia/Module.h +include/torch/csrc/mtia/profiler/MTIAMemoryProfiler.h include/torch/csrc/multiprocessing/init.h include/torch/csrc/onnx/back_compat.h -include/torch/csrc/onnx/diagnostics/diagnostics.h -include/torch/csrc/onnx/diagnostics/generated/rules.h include/torch/csrc/onnx/init.h include/torch/csrc/onnx/onnx.h include/torch/csrc/profiler/api.h @@ -8698,6 +8905,7 @@ include/torch/csrc/profiler/combined_traceback.h include/torch/csrc/profiler/containers.h include/torch/csrc/profiler/data_flow.h include/torch/csrc/profiler/events.h +include/torch/csrc/profiler/kineto_client_interface.h include/torch/csrc/profiler/kineto_shim.h include/torch/csrc/profiler/orchestration/observer.h include/torch/csrc/profiler/orchestration/python_tracer.h @@ -8732,6 +8940,8 @@ include/torch/csrc/profiler/util.h include/torch/csrc/python_dimname.h include/torch/csrc/python_headers.h include/torch/csrc/serialization.h +include/torch/csrc/stable/library.h +include/torch/csrc/stable/tensor.h include/torch/csrc/tensor/python_tensor.h include/torch/csrc/utils.h include/torch/csrc/utils/byte_order.h @@ -8739,6 +8949,7 @@ include/torch/csrc/utils/cpp_stacktraces.h include/torch/csrc/utils/cuda_enabled.h include/torch/csrc/utils/device_lazy_init.h include/torch/csrc/utils/disable_torch_function.h +include/torch/csrc/utils/generated_serialization_types.h include/torch/csrc/utils/init.h include/torch/csrc/utils/invalid_arguments.h include/torch/csrc/utils/nested.h @@ -8784,18 +8995,21 @@ include/torch/csrc/xpu/Stream.h include/torch/custom_class.h include/torch/custom_class_detail.h include/torch/extension.h +include/torch/headeronly/macros/Export.h include/torch/library.h include/torch/script.h %%AMD64%%lib/cmake/dnnl/dnnl-config-version.cmake %%AMD64%%lib/cmake/dnnl/dnnl-config.cmake %%AMD64%%lib/cmake/dnnl/dnnl-targets-%%CMAKE_BUILD_TYPE%%.cmake %%AMD64%%lib/cmake/dnnl/dnnl-targets.cmake -%%AMD64%%lib/libCaffe2_perfkernels_avx.a -%%AMD64%%lib/libCaffe2_perfkernels_avx2.a -%%AMD64%%lib/libCaffe2_perfkernels_avx512.a +%%AMD64%%lib/cmake/ittapi/ittapi-targets-%%CMAKE_BUILD_TYPE%%.cmake +%%AMD64%%lib/cmake/ittapi/ittapi-targets.cmake +%%AMD64%%lib/cmake/ittapi/ittapiConfig.cmake +%%AMD64%%lib/cmake/ittapi/ittapiConfigVersion.cmake lib/libc10.so lib/libcpuinfo.a %%AMD64%%lib/libdnnl.a +lib/libittnotify.a lib/libkineto.a lib/libshm.so lib/libtorch.so @@ -8809,6 +9023,7 @@ share/cmake/Caffe2/Caffe2Config.cmake share/cmake/Caffe2/Caffe2Targets-%%CMAKE_BUILD_TYPE%%.cmake share/cmake/Caffe2/Caffe2Targets.cmake share/cmake/Caffe2/FindCUDAToolkit.cmake +share/cmake/Caffe2/FindCUDSS.cmake share/cmake/Caffe2/FindCUSPARSELT.cmake share/cmake/Caffe2/FindSYCLToolkit.cmake share/cmake/Caffe2/Modules_CUDA_fix/FindCUDA.cmake |