https://github.com/intel/intel-graphics-compiler/issues/155 --- IGC/Compiler/CISACodeGen/ResolveGAS.cpp.orig 2020-12-01 09:02:30 UTC +++ IGC/Compiler/CISACodeGen/ResolveGAS.cpp @@ -35,7 +35,6 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "llvm/Support/Debug.h" #include "llvmWrapper/IR/Constant.h" #include -#include #include #include #include @@ -80,8 +79,6 @@ namespace { void getAnalysisUsage(AnalysisUsage& AU) const override { AU.setPreservesCFG(); AU.addRequired(); - AU.addRequired(); - AU.addRequired(); } bool isResolvableLoopPHI(PHINode* PN) const { @@ -92,17 +89,11 @@ namespace { bool resolveOnFunction(Function*) const; bool resolveOnBasicBlock(BasicBlock*) const; - bool resolveMemoryFromHost(Function&) const; - void populateResolvableLoopPHIs(); void populateResolvableLoopPHIsForLoop(const Loop*); bool isAddrSpaceResolvable(PHINode* PN, const Loop* L, BasicBlock* BackEdge) const; - - bool checkGenericArguments(Function& F) const; - void convertLoadToGlobal(LoadInst* LI) const; - bool isLoadGlobalCandidate(LoadInst* LI) const; }; class GASPropagator : public InstVisitor { @@ -157,8 +148,6 @@ namespace IGC { IGC_INITIALIZE_PASS_BEGIN(GASResolving, PASS_FLAG, PASS_DESC, PASS_CFG_ONLY, PASS_ANALYSIS) IGC_INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) - IGC_INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) - IGC_INITIALIZE_PASS_DEPENDENCY(MetaDataUtilsWrapper) IGC_INITIALIZE_PASS_END(GASResolving, PASS_FLAG, PASS_DESC, PASS_CFG_ONLY, PASS_ANALYSIS) } @@ -169,8 +158,6 @@ bool GASResolving::runOnFunction(Function& F) { IRB = &TheBuilder; Propagator = &ThePropagator; - resolveMemoryFromHost(F); - populateResolvableLoopPHIs(); bool Changed = false; @@ -716,177 +703,5 @@ bool GASPropagator::visitCallInst(CallInst& I) { return true; } - return false; -} - -bool GASResolving::resolveMemoryFromHost(Function& F) const { - MetaDataUtils* pMdUtils = getAnalysis().getMetaDataUtils(); - - // skip all non-entry functions - if (!isEntryFunc(pMdUtils, &F)) - return false; - - // early check in order not to iterate whole function - if (!checkGenericArguments(F)) - return false; - - SmallVector Stores; - SmallVector Loads; - AliasAnalysis* AA = &getAnalysis().getAAResults(); - - // collect load candidates and in parallel check for unsafe instructions - // visitor may be a more beautiful way to do this - bool HasASCast = false; // if there exists addrspace cast from non global/generic space - bool HasPtoi = false; // if there exists ptrtoint with global/generic space - for (BasicBlock& B : F) { - for (Instruction& I : B) { - if (auto LI = dyn_cast(&I)) { - if (isLoadGlobalCandidate(LI)) { - Loads.push_back(LI); - } - } - else if (auto CI = dyn_cast(&I)) { - if (CI->onlyReadsMemory()) - continue; - - // currently recognize only these ones - // in fact intrinsics should be marked as read-only - if (auto II = dyn_cast(CI)) { - if (II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end) - continue; - } - - // if we have an unsafe call in the kernel, abort - // to improve we can collect arguments of writing calls as memlocations for alias analysis - return false; - } - else if (auto PI = dyn_cast(&I)) { - // if we have a ptrtoint we need to check data flow which we don't want to - if (PI->getPointerAddressSpace() != ADDRESS_SPACE_GLOBAL && - PI->getPointerAddressSpace() != ADDRESS_SPACE_GENERIC) - return false; - else { - HasPtoi = true; - } - - return false; - } - else if (auto AI = dyn_cast(&I)) { - if (AI->getSrcAddressSpace() != ADDRESS_SPACE_GLOBAL && - AI->getSrcAddressSpace() != ADDRESS_SPACE_GENERIC) { - HasASCast = true; - } - } - else if (auto SI = dyn_cast(&I)) { - Value* V = SI->getValueOperand(); - if (isa(V->getType())) { - // this store can potentially write non-global pointer to memory - Stores.push_back(SI); - } - } - else if (I.mayWriteToMemory()) { - // unsupported instruction poisoning memory - return false; - } - } - } - if (HasASCast && HasPtoi) - return false; - - if (Loads.empty()) - return false; - - bool Changed = false; - while (!Loads.empty()) - { - LoadInst* LI = Loads.pop_back_val(); - - // check that we don't have aliasing stores for this load - // we expect to have basic and addrspace AA available at the moment - // on optimization phase - bool aliases = false; - for (auto SI : Stores) { - if (AA->alias(MemoryLocation::get(SI), MemoryLocation::get(LI))) { - aliases = true; - break; - } - } - if (aliases) - continue; - - convertLoadToGlobal(LI); - Changed = true; - } - return Changed; -} - -bool GASResolving::isLoadGlobalCandidate(LoadInst* LI) const { - // first check that loaded address has generic address space - // otherwise it is not our candidate - PointerType* PtrTy = dyn_cast(LI->getType()); - if (!PtrTy || PtrTy->getAddressSpace() != ADDRESS_SPACE_GENERIC) - return false; - - // next check that it is a load from function argument + offset - // which is necessary to prove that this address has global addrspace - Value* LoadBase = LI->getPointerOperand()->stripInBoundsOffsets(); - // WA for gep not_inbounds base, 0, 0 that is not handled in stripoffsets - LoadBase = LoadBase->stripPointerCasts(); - if (!isa(LoadBase)) - return false; - - // don't want to process cases when argument is from local address space - auto LoadTy = cast(LoadBase->getType()); - if (LoadTy->getAddressSpace() != ADDRESS_SPACE_GLOBAL) - return false; - - // TODO: skip cases that have been fixed on previous traversals - - return true; -} - -void GASResolving::convertLoadToGlobal(LoadInst* LI) const { - // create two addressspace casts: generic -> global -> generic - // the next scalar phase of this pass will propagate global to all uses of the load - - PointerType* PtrTy = cast(LI->getType()); - IRB->SetInsertPoint(LI->getNextNode()); - PointerType* GlobalPtrTy = PointerType::get(PtrTy->getElementType(), ADDRESS_SPACE_GLOBAL); - Value* GlobalAddr = IRB->CreateAddrSpaceCast(LI, GlobalPtrTy); - Value* GenericCopyAddr = IRB->CreateAddrSpaceCast(GlobalAddr, PtrTy); - - for (auto UI = LI->use_begin(), UE = LI->use_end(); UI != UE; /*EMPTY*/) { - Use& U = *UI++; - if (U.getUser() == GlobalAddr) - continue; - U.set(GenericCopyAddr); - } -} - -bool GASResolving::checkGenericArguments(Function& F) const { - // check that we have a pointer to pointer or pointer to struct that has pointer elements - // and main pointer type is global while underlying pointer type is generic - - auto* FT = F.getFunctionType(); - for (unsigned p = 0; p < FT->getNumParams(); ++p) { - if (auto Ty = dyn_cast(FT->getParamType(p))) { - if (Ty->getAddressSpace() != ADDRESS_SPACE_GLOBAL) - continue; - auto PteeTy = Ty->getElementType(); - if (auto PTy = dyn_cast(PteeTy)) { - if (PTy->getAddressSpace() == ADDRESS_SPACE_GENERIC) - return true; - } - if (auto STy = dyn_cast(PteeTy)) { - for (unsigned e = 0; e < STy->getNumElements(); ++e) { - if (auto ETy = dyn_cast(STy->getElementType(e))) { - if (ETy->getAddressSpace() == ADDRESS_SPACE_GENERIC) - return true; - } - } - } - } - } return false; } \ No newline at end of file --- IGC/VectorCompiler/lib/GenXOpts/CMPacketize/GenXPacketize.cpp.orig 2020-11-13 09:31:31 UTC +++ IGC/VectorCompiler/lib/GenXOpts/CMPacketize/GenXPacketize.cpp @@ -276,7 +276,7 @@ Function *GenXPacketize::vectorizeSIMTFunction(Functio VecFName + Suffix[Width / 8], F->getParent()); ClonedFunc->setCallingConv(F->getCallingConv()); ClonedFunc->setAttributes(F->getAttributes()); - ClonedFunc->setAlignment(IGCLLVM::getAlign(*F)); + ClonedFunc->setAlignment(IGCLLVM::getAlign(F->getAlignment())); // then use CloneFunctionInto ValueToValueMapTy ArgMap; @@ -888,8 +888,10 @@ Value *GenXPacketize::packetizeLLVMInstruction(Instruc ->isPointerTy()); auto Align = LI->getAlignment(); pReplacedInst = B->MASKED_GATHER(pVecSrc, Align); - } else - pReplacedInst = B->ALIGNED_LOAD(pVecSrc, IGCLLVM::getAlign(*LI)); + } else { + auto Align = LI->getAlignment(); + pReplacedInst = B->ALIGNED_LOAD(pVecSrc, Align); + } break; } --- IGC/VectorCompiler/lib/GenXOpts/CMPacketize/gen_builder.hpp.orig 2020-11-13 09:31:31 UTC +++ IGC/VectorCompiler/lib/GenXOpts/CMPacketize/gen_builder.hpp @@ -573,24 +573,24 @@ StoreInst* STORE(Value *Val, Value *Ptr, bool isVolati return IRB()->CreateStore(Val, Ptr, isVolatile); } -inline LoadInst* ALIGNED_LOAD(Value *Ptr, IGCLLVM::Align Align, const char *Name) +LoadInst* ALIGNED_LOAD(Value *Ptr, unsigned Align, const char *Name) { - return IRB()->CreateAlignedLoad(Ptr, Align, Name); + return IRB()->CreateAlignedLoad(Ptr, IGCLLVM::getAlign(Align), Name); } -inline LoadInst* ALIGNED_LOAD(Value *Ptr, IGCLLVM::Align Align, const Twine &Name = "") +LoadInst* ALIGNED_LOAD(Value *Ptr, unsigned Align, const Twine &Name = "") { - return IRB()->CreateAlignedLoad(Ptr, Align, Name); + return IRB()->CreateAlignedLoad(Ptr, IGCLLVM::getAlign(Align), Name); } -inline LoadInst* ALIGNED_LOAD(Value *Ptr, IGCLLVM::Align Align, bool isVolatile, const Twine &Name = "") +LoadInst* ALIGNED_LOAD(Value *Ptr, unsigned Align, bool isVolatile, const Twine &Name = "") { - return IRB()->CreateAlignedLoad(Ptr, Align, isVolatile, Name); + return IRB()->CreateAlignedLoad(Ptr, IGCLLVM::getAlign(Align), isVolatile, Name); } -inline StoreInst* ALIGNED_STORE(Value *Val, Value *Ptr, IGCLLVM::Align Align, bool isVolatile = false) +StoreInst* ALIGNED_STORE(Value *Val, Value *Ptr, unsigned Align, bool isVolatile = false) { - return IRB()->CreateAlignedStore(Val, Ptr, Align, isVolatile); + return IRB()->CreateAlignedStore(Val, Ptr, IGCLLVM::getAlign(Align), isVolatile); } FenceInst* FENCE(AtomicOrdering Ordering, SyncScope::ID SSID = SyncScope::System, const Twine &Name = "") --- IGC/WrapperLLVM/include/llvmWrapper/Support/Alignment.h.orig 2020-12-01 09:02:30 UTC +++ IGC/WrapperLLVM/include/llvmWrapper/Support/Alignment.h @@ -63,16 +63,16 @@ namespace IGCLLVM { inline llvm::Align getAlignmentValueIfNeeded(llvm::Align A) { return A; } #endif - using Align = + template= 11 - llvm::Align; + llvm::Align #endif - - inline Align getCorrectAlign(uint32_t Val) + > + inline T getCorrectAlign(uint32_t Val) { if (Val == 0) { @@ -80,25 +80,37 @@ namespace IGCLLVM { // Instead assume byte-align. Val = 1; } - return Align{ Val }; + return T{ Val }; } // It is meant for copying alignement. // getAlign returns different type for different LLVM versions but // it can be overcome by using auto or direct usage in another LLVM // interface. +#if LLVM_VERSION_MAJOR <= 9 template ::value, int> = 0> - Align getAlign(const TValue &Val) + unsigned getAlign(const TValue &Val) { -#if LLVM_VERSION_MAJOR <= 9 return Val.getAlignment(); + } #elif LLVM_VERSION_MAJOR <= 10 + template ::value, int> = 0> + llvm::MaybeAlign getAlign(const TValue &Val) + { + // LLVM 10 instructions accept MaybeAlign but do not provide + // getMaybeAlignMethod return llvm::MaybeAlign(Val.getAlignment()); + } #else + template ::value, int> = 0> + llvm::Align getAlign(const TValue &Val) + { return Val.getAlign(); -#endif } +#endif } // namespace IGCLLVM