$ pkg delete intel-compute-runtime $ mpivars PCI: Failed to initialize libpciaccess with pci_system_init(): 6 (Permission denied) Abort(268484367) on node 0 (rank 0 in comm 0): Fatal error in PMPI_Init_thread: Other MPI error, error stack: MPIR_Init_thread(153): gpu_init failed [unset]: write_line error; fd=-1 buf=:cmd=abort exitcode=268484367 : system msg for write_line failure : Bad file descriptor Attempting to use an MPI routine before initializing MPICH --- src/mpi/init/initthread.c.orig 2021-05-25 17:37:05 UTC +++ src/mpi/init/initthread.c @@ -150,7 +150,9 @@ int MPIR_Init_thread(int *argc, char ***argv, int user * inside MPID_Init */ if (MPIR_CVAR_ENABLE_GPU) { int mpl_errno = MPL_gpu_init(); - MPIR_ERR_CHKANDJUMP(mpl_errno != MPL_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**gpu_init"); + MPIR_ERR_CHKANDJUMP( + mpl_errno != MPL_SUCCESS && mpl_errno != MPL_ERR_GPU_INTERNAL, + mpi_errno, MPI_ERR_OTHER, "**gpu_init"); } MPL_atomic_store_int(&MPIR_Process.mpich_state, MPICH_MPI_STATE__IN_INIT); --- src/mpid/ch4/netmod/ofi/ofi_init.c.orig 2021-05-25 17:37:05 UTC +++ src/mpid/ch4/netmod/ofi/ofi_init.c @@ -731,7 +731,6 @@ int MPIDI_OFI_mpi_init_hook(int rank, int size, int ap MPL_gpu_malloc_host(&(MPIDI_OFI_global.am_bufs[i]), MPIDI_OFI_AM_BUFF_SZ); MPIDI_OFI_global.am_reqs[i].event_id = MPIDI_OFI_EVENT_AM_RECV; MPIDI_OFI_global.am_reqs[i].index = i; - MPIR_Assert(MPIDI_OFI_global.am_bufs[i]); MPIDI_OFI_global.am_iov[i].iov_base = MPIDI_OFI_global.am_bufs[i]; MPIDI_OFI_global.am_iov[i].iov_len = MPIDI_OFI_AM_BUFF_SZ; MPIDI_OFI_global.am_msg[i].msg_iov = &MPIDI_OFI_global.am_iov[i]; --- src/mpl/src/gpu/mpl_gpu_ze.c.orig 2021-05-25 17:37:05 UTC +++ src/mpl/src/gpu/mpl_gpu_ze.c @@ -33,7 +33,7 @@ int MPL_gpu_get_dev_count(int *dev_cnt, int *dev_id) { int ret = MPL_SUCCESS; if (!gpu_initialized) { - ret = MPL_gpu_init(); + MPL_gpu_init(); } *dev_cnt = device_count;