diff options
Diffstat (limited to 'mlir/lib/Target')
| -rw-r--r-- | mlir/lib/Target/Cpp/TranslateToCpp.cpp | 6 | ||||
| -rw-r--r-- | mlir/lib/Target/LLVM/CMakeLists.txt | 17 | ||||
| -rw-r--r-- | mlir/lib/Target/LLVM/ROCDL/Target.cpp | 302 | ||||
| -rw-r--r-- | mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp | 10 | ||||
| -rw-r--r-- | mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 165 | ||||
| -rw-r--r-- | mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp | 11 | ||||
| -rw-r--r-- | mlir/lib/Target/LLVMIR/ModuleImport.cpp | 34 | ||||
| -rw-r--r-- | mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 30 |
8 files changed, 363 insertions, 212 deletions
diff --git a/mlir/lib/Target/Cpp/TranslateToCpp.cpp b/mlir/lib/Target/Cpp/TranslateToCpp.cpp index 202df89025f2..626638282efe 100644 --- a/mlir/lib/Target/Cpp/TranslateToCpp.cpp +++ b/mlir/lib/Target/Cpp/TranslateToCpp.cpp @@ -1574,6 +1574,12 @@ LogicalResult CppEmitter::emitType(Location loc, Type type) { } if (auto iType = dyn_cast<IndexType>(type)) return (os << "size_t"), success(); + if (auto sType = dyn_cast<emitc::SizeTType>(type)) + return (os << "size_t"), success(); + if (auto sType = dyn_cast<emitc::SignedSizeTType>(type)) + return (os << "ssize_t"), success(); + if (auto pType = dyn_cast<emitc::PtrDiffTType>(type)) + return (os << "ptrdiff_t"), success(); if (auto tType = dyn_cast<TensorType>(type)) { if (!tType.hasRank()) return emitError(loc, "cannot emit unranked tensor type"); diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt index 5a3fa160850b..93dc5ff9d35b 100644 --- a/mlir/lib/Target/LLVM/CMakeLists.txt +++ b/mlir/lib/Target/LLVM/CMakeLists.txt @@ -32,6 +32,8 @@ endif() add_mlir_dialect_library(MLIRNVVMTarget NVVM/Target.cpp + OBJECT + ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/LLVMIR @@ -109,6 +111,8 @@ endif() add_mlir_dialect_library(MLIRROCDLTarget ROCDL/Target.cpp + OBJECT + LINK_COMPONENTS MCParser ${AMDGPU_LIBS} @@ -123,17 +127,18 @@ add_mlir_dialect_library(MLIRROCDLTarget ) if(MLIR_ENABLE_ROCM_CONVERSIONS) - if (NOT ("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD)) - message(SEND_ERROR - "Building mlir with ROCm support requires the AMDGPU backend") - endif() - if (DEFINED ROCM_PATH) set(DEFAULT_ROCM_PATH "${ROCM_PATH}" CACHE PATH "Fallback path to search for ROCm installs") elseif(DEFINED ENV{ROCM_PATH}) set(DEFAULT_ROCM_PATH "$ENV{ROCM_PATH}" CACHE PATH "Fallback path to search for ROCm installs") else() - set(DEFAULT_ROCM_PATH "/opt/rocm" CACHE PATH "Fallback path to search for ROCm installs") + if (WIN32) + # Avoid setting an UNIX path for Windows. + # TODO: Eventually migrate to FindHIP once it becomes a part of CMake. + set(DEFAULT_ROCM_PATH "" CACHE PATH "Fallback path to search for ROCm installs") + else() + set(DEFAULT_ROCM_PATH "/opt/rocm" CACHE PATH "Fallback path to search for ROCm installs") + endif() endif() message(VERBOSE "MLIR Default ROCM toolkit path: ${DEFAULT_ROCM_PATH}") diff --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp index cc13e5b7436e..047d214b751f 100644 --- a/mlir/lib/Target/LLVM/ROCDL/Target.cpp +++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp @@ -17,9 +17,6 @@ #include "mlir/Dialect/LLVMIR/ROCDLDialect.h" #include "mlir/Support/FileUtilities.h" #include "mlir/Target/LLVM/ROCDL/Utils.h" -#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h" -#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" -#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Export.h" #include "llvm/IR/Constants.h" @@ -111,9 +108,6 @@ SerializeGPUModuleBase::SerializeGPUModuleBase( for (Attribute attr : files.getValue()) if (auto file = dyn_cast<StringAttr>(attr)) fileList.push_back(file.str()); - - // Append standard ROCm device bitcode libraries to the files to be loaded. - (void)appendStandardLibs(); } void SerializeGPUModuleBase::init() { @@ -138,28 +132,63 @@ ArrayRef<std::string> SerializeGPUModuleBase::getFileList() const { return fileList; } -LogicalResult SerializeGPUModuleBase::appendStandardLibs() { +LogicalResult SerializeGPUModuleBase::appendStandardLibs(AMDGCNLibraries libs) { + if (libs == AMDGCNLibraries::None) + return success(); StringRef pathRef = getToolkitPath(); - if (!pathRef.empty()) { - SmallVector<char, 256> path; - path.insert(path.begin(), pathRef.begin(), pathRef.end()); - llvm::sys::path::append(path, "amdgcn", "bitcode"); - pathRef = StringRef(path.data(), path.size()); - if (!llvm::sys::fs::is_directory(pathRef)) { - getOperation().emitRemark() << "ROCm amdgcn bitcode path: " << pathRef - << " does not exist or is not a directory."; - return failure(); - } - StringRef isaVersion = - llvm::AMDGPU::getArchNameAMDGCN(llvm::AMDGPU::parseArchAMDGCN(chip)); - isaVersion.consume_front("gfx"); - return getCommonBitcodeLibs(fileList, path, isaVersion); + + // Get the path for the device libraries + SmallString<256> path; + path.insert(path.begin(), pathRef.begin(), pathRef.end()); + llvm::sys::path::append(path, "amdgcn", "bitcode"); + pathRef = StringRef(path.data(), path.size()); + + // Fail if the path is invalid. + if (!llvm::sys::fs::is_directory(pathRef)) { + getOperation().emitError() << "ROCm amdgcn bitcode path: " << pathRef + << " does not exist or is not a directory"; + return failure(); } + + // Get the ISA version. + StringRef isaVersion = + llvm::AMDGPU::getArchNameAMDGCN(llvm::AMDGPU::parseArchAMDGCN(chip)); + isaVersion.consume_front("gfx"); + + // Helper function for adding a library. + auto addLib = [&](const Twine &lib) -> bool { + auto baseSize = path.size(); + llvm::sys::path::append(path, lib); + StringRef pathRef(path.data(), path.size()); + if (!llvm::sys::fs::is_regular_file(pathRef)) { + getOperation().emitRemark() << "bitcode library path: " << pathRef + << " does not exist or is not a file"; + return true; + } + fileList.push_back(pathRef.str()); + path.truncate(baseSize); + return false; + }; + + // Add ROCm device libraries. Fail if any of the libraries is not found, ie. + // if any of the `addLib` failed. + if ((any(libs & AMDGCNLibraries::Ocml) && addLib("ocml.bc")) || + (any(libs & AMDGCNLibraries::Ockl) && addLib("ockl.bc")) || + (any(libs & AMDGCNLibraries::Hip) && addLib("hip.bc")) || + (any(libs & AMDGCNLibraries::OpenCL) && addLib("opencl.bc")) || + (any(libs & (AMDGCNLibraries::Ocml | AMDGCNLibraries::Ockl)) && + addLib("oclc_isa_version_" + isaVersion + ".bc"))) + return failure(); return success(); } std::optional<SmallVector<std::unique_ptr<llvm::Module>>> SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) { + // Return if there are no libs to load. + if (deviceLibs == AMDGCNLibraries::None && fileList.empty()) + return SmallVector<std::unique_ptr<llvm::Module>>(); + if (failed(appendStandardLibs(deviceLibs))) + return std::nullopt; SmallVector<std::unique_ptr<llvm::Module>> bcFiles; if (failed(loadBitcodeFilesFromList(module.getContext(), fileList, bcFiles, true))) @@ -174,80 +203,77 @@ LogicalResult SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module) { // Stop spamming us with clang version numbers if (auto *ident = module.getNamedMetadata("llvm.ident")) module.eraseNamedMetadata(ident); + // Override the libModules datalayout and target triple with the compiler's + // data layout should there be a discrepency. + setDataLayoutAndTriple(module); return success(); } void SerializeGPUModuleBase::handleModulePreLink(llvm::Module &module) { - [[maybe_unused]] std::optional<llvm::TargetMachine *> targetMachine = - getOrCreateTargetMachine(); - assert(targetMachine && "expect a TargetMachine"); - addControlVariables(module, target.hasWave64(), target.hasDaz(), + // If all libraries are not set, traverse the module to determine which + // libraries are required. + if (deviceLibs != AMDGCNLibraries::All) { + for (llvm::Function &f : module.functions()) { + if (f.hasExternalLinkage() && f.hasName() && !f.hasExactDefinition()) { + StringRef funcName = f.getName(); + if ("printf" == funcName) + deviceLibs |= AMDGCNLibraries::OpenCL | AMDGCNLibraries::Ockl | + AMDGCNLibraries::Ocml; + if (funcName.starts_with("__ockl_")) + deviceLibs |= AMDGCNLibraries::Ockl; + if (funcName.starts_with("__ocml_")) + deviceLibs |= AMDGCNLibraries::Ocml; + if (funcName == "__atomic_work_item_fence") + deviceLibs |= AMDGCNLibraries::Hip; + } + } + } + addControlVariables(module, deviceLibs, target.hasWave64(), target.hasDaz(), target.hasFiniteOnly(), target.hasUnsafeMath(), target.hasFastMath(), target.hasCorrectSqrt(), target.getAbi()); } -// Get the paths of ROCm device libraries. -LogicalResult SerializeGPUModuleBase::getCommonBitcodeLibs( - llvm::SmallVector<std::string> &libs, SmallVector<char, 256> &libPath, - StringRef isaVersion) { - auto addLib = [&](StringRef path) -> bool { - if (!llvm::sys::fs::is_regular_file(path)) { - getOperation().emitRemark() << "Bitcode library path: " << path - << " does not exist or is not a file.\n"; - return true; - } - libs.push_back(path.str()); - return false; - }; - auto getLibPath = [&libPath](Twine lib) { - auto baseSize = libPath.size(); - llvm::sys::path::append(libPath, lib + ".bc"); - std::string path(StringRef(libPath.data(), libPath.size()).str()); - libPath.truncate(baseSize); - return path; - }; - - // Add ROCm device libraries. Fail if any of the libraries is not found. - if (addLib(getLibPath("ocml")) || addLib(getLibPath("ockl")) || - addLib(getLibPath("hip")) || addLib(getLibPath("opencl")) || - addLib(getLibPath("oclc_isa_version_" + isaVersion))) - return failure(); - return success(); -} - void SerializeGPUModuleBase::addControlVariables( - llvm::Module &module, bool wave64, bool daz, bool finiteOnly, - bool unsafeMath, bool fastMath, bool correctSqrt, StringRef abiVer) { - llvm::Type *i8Ty = llvm::Type::getInt8Ty(module.getContext()); - auto addControlVariable = [i8Ty, &module](StringRef name, bool enable) { + llvm::Module &module, AMDGCNLibraries libs, bool wave64, bool daz, + bool finiteOnly, bool unsafeMath, bool fastMath, bool correctSqrt, + StringRef abiVer) { + // Return if no device libraries are required. + if (libs == AMDGCNLibraries::None) + return; + // Helper function for adding control variables. + auto addControlVariable = [&module](StringRef name, uint32_t value, + uint32_t bitwidth) { + if (module.getNamedGlobal(name)) + return; + llvm::IntegerType *type = + llvm::IntegerType::getIntNTy(module.getContext(), bitwidth); llvm::GlobalVariable *controlVariable = new llvm::GlobalVariable( - module, i8Ty, true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage, - llvm::ConstantInt::get(i8Ty, enable), name, nullptr, - llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4); + module, /*isConstant=*/type, true, + llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage, + llvm::ConstantInt::get(type, value), name, /*before=*/nullptr, + /*threadLocalMode=*/llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, + /*addressSpace=*/4); controlVariable->setVisibility( llvm::GlobalValue::VisibilityTypes::ProtectedVisibility); - controlVariable->setAlignment(llvm::MaybeAlign(1)); + controlVariable->setAlignment(llvm::MaybeAlign(bitwidth / 8)); controlVariable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local); }; - addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath); - addControlVariable("__oclc_unsafe_math_opt", unsafeMath || fastMath); - addControlVariable("__oclc_daz_opt", daz || fastMath); - addControlVariable("__oclc_correctly_rounded_sqrt32", - correctSqrt && !fastMath); - addControlVariable("__oclc_wavefrontsize64", wave64); - - llvm::Type *i32Ty = llvm::Type::getInt32Ty(module.getContext()); - int abi = 500; - abiVer.getAsInteger(0, abi); - llvm::GlobalVariable *abiVersion = new llvm::GlobalVariable( - module, i32Ty, true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage, - llvm::ConstantInt::get(i32Ty, abi), "__oclc_ABI_version", nullptr, - llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4); - abiVersion->setVisibility( - llvm::GlobalValue::VisibilityTypes::ProtectedVisibility); - abiVersion->setAlignment(llvm::MaybeAlign(4)); - abiVersion->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local); + // Add ocml related control variables. + if (any(libs & AMDGCNLibraries::Ocml)) { + addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath, 8); + addControlVariable("__oclc_daz_opt", daz || fastMath, 8); + addControlVariable("__oclc_correctly_rounded_sqrt32", + correctSqrt && !fastMath, 8); + addControlVariable("__oclc_unsafe_math_opt", unsafeMath || fastMath, 8); + } + // Add ocml or ockl related control variables. + if (any(libs & (AMDGCNLibraries::Ocml | AMDGCNLibraries::Ockl))) { + addControlVariable("__oclc_wavefrontsize64", wave64, 8); + int abi = 500; + abiVer.getAsInteger(0, abi); + addControlVariable("__oclc_ABI_version", abi, 32); + } } std::optional<SmallVector<char, 0>> @@ -307,53 +333,21 @@ SerializeGPUModuleBase::assembleIsa(StringRef isa) { if (!tap) { emitError(loc, "assembler initialization error"); - return {}; + return std::nullopt; } parser->setTargetParser(*tap); parser->Run(false); - - return result; -} - -#if MLIR_ENABLE_ROCM_CONVERSIONS -namespace { -class AMDGPUSerializer : public SerializeGPUModuleBase { -public: - AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, - const gpu::TargetOptions &targetOptions); - - gpu::GPUModuleOp getOperation(); - - // Compile to HSA. - std::optional<SmallVector<char, 0>> - compileToBinary(const std::string &serializedISA); - - std::optional<SmallVector<char, 0>> - moduleToObject(llvm::Module &llvmModule) override; - -private: - // Target options. - gpu::TargetOptions targetOptions; -}; -} // namespace - -AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, - const gpu::TargetOptions &targetOptions) - : SerializeGPUModuleBase(module, target, targetOptions), - targetOptions(targetOptions) {} - -gpu::GPUModuleOp AMDGPUSerializer::getOperation() { - return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation()); + return std::move(result); } std::optional<SmallVector<char, 0>> -AMDGPUSerializer::compileToBinary(const std::string &serializedISA) { +SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) { // Assemble the ISA. std::optional<SmallVector<char, 0>> isaBinary = assembleIsa(serializedISA); if (!isaBinary) { - getOperation().emitError() << "Failed during ISA assembling."; + getOperation().emitError() << "failed during ISA assembling"; return std::nullopt; } @@ -363,7 +357,7 @@ AMDGPUSerializer::compileToBinary(const std::string &serializedISA) { if (llvm::sys::fs::createTemporaryFile("kernel%%", "o", tempIsaBinaryFd, tempIsaBinaryFilename)) { getOperation().emitError() - << "Failed to create a temporary file for dumping the ISA binary."; + << "failed to create a temporary file for dumping the ISA binary"; return std::nullopt; } llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename); @@ -378,7 +372,7 @@ AMDGPUSerializer::compileToBinary(const std::string &serializedISA) { if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFilename)) { getOperation().emitError() - << "Failed to create a temporary file for the HSA code object."; + << "failed to create a temporary file for the HSA code object"; return std::nullopt; } llvm::FileRemover cleanupHsaco(tempHsacoFilename); @@ -389,7 +383,7 @@ AMDGPUSerializer::compileToBinary(const std::string &serializedISA) { lldPath, {"ld.lld", "-shared", tempIsaBinaryFilename, "-o", tempHsacoFilename}); if (lldResult != 0) { - getOperation().emitError() << "lld invocation failed."; + getOperation().emitError() << "lld invocation failed"; return std::nullopt; } @@ -398,7 +392,7 @@ AMDGPUSerializer::compileToBinary(const std::string &serializedISA) { llvm::MemoryBuffer::getFile(tempHsacoFilename, /*IsText=*/false); if (!hsacoFile) { getOperation().emitError() - << "Failed to read the HSA code object from the temp file."; + << "failed to read the HSA code object from the temp file"; return std::nullopt; } @@ -407,13 +401,13 @@ AMDGPUSerializer::compileToBinary(const std::string &serializedISA) { return SmallVector<char, 0>(buffer.begin(), buffer.end()); } -std::optional<SmallVector<char, 0>> -AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) { +std::optional<SmallVector<char, 0>> SerializeGPUModuleBase::moduleToObjectImpl( + const gpu::TargetOptions &targetOptions, llvm::Module &llvmModule) { // Return LLVM IR if the compilation target is offload. #define DEBUG_TYPE "serialize-to-llvm" LLVM_DEBUG({ - llvm::dbgs() << "LLVM IR for module: " << getOperation().getNameAttr() - << "\n" + llvm::dbgs() << "LLVM IR for module: " + << cast<gpu::GPUModuleOp>(getOperation()).getNameAttr() << "\n" << llvmModule << "\n"; }); #undef DEBUG_TYPE @@ -423,8 +417,8 @@ AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) { std::optional<llvm::TargetMachine *> targetMachine = getOrCreateTargetMachine(); if (!targetMachine) { - getOperation().emitError() << "Target Machine unavailable for triple " - << triple << ", can't compile with LLVM\n"; + getOperation().emitError() << "target Machine unavailable for triple " + << triple << ", can't compile with LLVM"; return std::nullopt; } @@ -432,12 +426,13 @@ AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) { std::optional<std::string> serializedISA = translateToISA(llvmModule, **targetMachine); if (!serializedISA) { - getOperation().emitError() << "Failed translating the module to ISA."; + getOperation().emitError() << "failed translating the module to ISA"; return std::nullopt; } #define DEBUG_TYPE "serialize-to-isa" LLVM_DEBUG({ - llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n" + llvm::dbgs() << "ISA for module: " + << cast<gpu::GPUModuleOp>(getOperation()).getNameAttr() << "\n" << *serializedISA << "\n"; }); #undef DEBUG_TYPE @@ -445,9 +440,41 @@ AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) { if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Assembly) return SmallVector<char, 0>(serializedISA->begin(), serializedISA->end()); + // Compiling to binary requires a valid ROCm path, fail if it's not found. + if (getToolkitPath().empty()) { + getOperation().emitError() << "invalid ROCm path, please set a valid path"; + return std::nullopt; + } + // Compile to binary. return compileToBinary(*serializedISA); } + +#if MLIR_ENABLE_ROCM_CONVERSIONS +namespace { +class AMDGPUSerializer : public SerializeGPUModuleBase { +public: + AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, + const gpu::TargetOptions &targetOptions); + + std::optional<SmallVector<char, 0>> + moduleToObject(llvm::Module &llvmModule) override; + +private: + // Target options. + gpu::TargetOptions targetOptions; +}; +} // namespace + +AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, + const gpu::TargetOptions &targetOptions) + : SerializeGPUModuleBase(module, target, targetOptions), + targetOptions(targetOptions) {} + +std::optional<SmallVector<char, 0>> +AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) { + return moduleToObjectImpl(targetOptions, llvmModule); +} #endif // MLIR_ENABLE_ROCM_CONVERSIONS std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject( @@ -457,7 +484,7 @@ std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject( if (!module) return std::nullopt; if (!mlir::isa<gpu::GPUModuleOp>(module)) { - module->emitError("Module must be a GPU module."); + module->emitError("module must be a GPU module"); return std::nullopt; } #if MLIR_ENABLE_ROCM_CONVERSIONS @@ -466,8 +493,8 @@ std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject( serializer.init(); return serializer.run(); #else - module->emitError("The `AMDGPU` target was not built. Please enable it when " - "building LLVM."); + module->emitError("the `AMDGPU` target was not built. Please enable it when " + "building LLVM"); return std::nullopt; #endif // MLIR_ENABLE_ROCM_CONVERSIONS } @@ -477,10 +504,15 @@ ROCDLTargetAttrImpl::createObject(Attribute attribute, const SmallVector<char, 0> &object, const gpu::TargetOptions &options) const { gpu::CompilationTarget format = options.getCompilationTarget(); + // If format is `fatbin` transform it to binary as `fatbin` is not yet + // supported. + if (format > gpu::CompilationTarget::Binary) + format = gpu::CompilationTarget::Binary; + + DictionaryAttr properties{}; Builder builder(attribute.getContext()); return builder.getAttr<gpu::ObjectAttr>( - attribute, - format > gpu::CompilationTarget::Binary ? gpu::CompilationTarget::Binary - : format, - builder.getStringAttr(StringRef(object.data(), object.size())), nullptr); + attribute, format, + builder.getStringAttr(StringRef(object.data(), object.size())), + properties); } diff --git a/mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp b/mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp index 455889377953..92e786b130ab 100644 --- a/mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp +++ b/mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp @@ -16,9 +16,12 @@ #include "mlir/Target/LLVMIR/Dialect/All.h" #include "mlir/Target/LLVMIR/Export.h" #include "mlir/Tools/mlir-translate/Translation.h" +#include "llvm/IR/DebugProgramInstruction.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +extern llvm::cl::opt<bool> WriteNewDbgInfoFormat; + using namespace mlir; namespace mlir { @@ -31,6 +34,13 @@ void registerToLLVMIRTranslation() { if (!llvmModule) return failure(); + // When printing LLVM IR, we should convert the module to the debug info + // format that LLVM expects us to print. + // See https://llvm.org/docs/RemoveDIsDebugInfo.html + llvm::ScopedDbgInfoFormatSetter formatSetter(*llvmModule, + WriteNewDbgInfoFormat); + if (WriteNewDbgInfoFormat) + llvmModule->removeDebugIntrinsicDeclarations(); llvmModule->print(output, nullptr); return success(); }, diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 6ec4c120c11e..eabc4b30f57a 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -30,6 +30,7 @@ #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/ReplaceConstant.h" #include "llvm/Support/FileSystem.h" #include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -388,8 +389,19 @@ static LogicalResult inlineConvertOmpRegions( // be processed multiple times. moduleTranslation.forgetMapping(region); - if (potentialTerminator && potentialTerminator->isTerminator()) - potentialTerminator->insertAfter(&builder.GetInsertBlock()->back()); + if (potentialTerminator && potentialTerminator->isTerminator()) { + llvm::BasicBlock *block = builder.GetInsertBlock(); + if (block->empty()) { + // this can happen for really simple reduction init regions e.g. + // %0 = llvm.mlir.constant(0 : i32) : i32 + // omp.yield(%0 : i32) + // because the llvm.mlir.constant (MLIR op) isn't converted into any + // llvm op + potentialTerminator->insertInto(block, block->begin()); + } else { + potentialTerminator->insertAfter(&block->back()); + } + } return success(); } @@ -762,7 +774,7 @@ convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder, /// Allocate space for privatized reduction variables. template <typename T> static void allocByValReductionVars( - T loop, llvm::IRBuilderBase &builder, + T loop, ArrayRef<BlockArgument> reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls, @@ -770,17 +782,15 @@ static void allocByValReductionVars( DenseMap<Value, llvm::Value *> &reductionVariableMap, llvm::ArrayRef<bool> isByRefs) { llvm::IRBuilderBase::InsertPointGuard guard(builder); - builder.restoreIP(allocaIP); - auto args = - loop.getRegion().getArguments().take_back(loop.getNumReductionVars()); + builder.SetInsertPoint(allocaIP.getBlock()->getTerminator()); for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) { if (isByRefs[i]) continue; llvm::Value *var = builder.CreateAlloca( moduleTranslation.convertType(reductionDecls[i].getType())); - moduleTranslation.mapValue(args[i], var); - privateReductionVariables.push_back(var); + moduleTranslation.mapValue(reductionArgs[i], var); + privateReductionVariables[i] = var; reductionVariableMap.try_emplace(loop.getReductionVars()[i], var); } } @@ -826,14 +836,17 @@ static void collectReductionInfo( // Collect the reduction information. reductionInfos.reserve(numReductions); for (unsigned i = 0; i < numReductions; ++i) { - llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr; + llvm::OpenMPIRBuilder::ReductionGenAtomicCBTy atomicGen = nullptr; if (owningAtomicReductionGens[i]) atomicGen = owningAtomicReductionGens[i]; llvm::Value *variable = moduleTranslation.lookupValue(loop.getReductionVars()[i]); reductionInfos.push_back( {moduleTranslation.convertType(reductionDecls[i].getType()), variable, - privateReductionVariables[i], owningReductionGens[i], atomicGen}); + privateReductionVariables[i], + /*EvaluationKind=*/llvm::OpenMPIRBuilder::EvalKind::Scalar, + owningReductionGens[i], + /*ReductionGenClang=*/nullptr, atomicGen}); } } @@ -911,16 +924,20 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation); - SmallVector<llvm::Value *> privateReductionVariables; + SmallVector<llvm::Value *> privateReductionVariables( + wsloopOp.getNumReductionVars()); DenseMap<Value, llvm::Value *> reductionVariableMap; - allocByValReductionVars(wsloopOp, builder, moduleTranslation, allocaIP, - reductionDecls, privateReductionVariables, + + MutableArrayRef<BlockArgument> reductionArgs = + wsloopOp.getRegion().getArguments(); + + allocByValReductionVars(wsloopOp, reductionArgs, builder, moduleTranslation, + allocaIP, reductionDecls, privateReductionVariables, reductionVariableMap, isByRef); // Before the loop, store the initial values of reductions into reduction // variables. Although this could be done after allocas, we don't want to mess // up with the alloca insertion point. - ArrayRef<BlockArgument> reductionArgs = wsloopOp.getRegion().getArguments(); for (unsigned i = 0; i < wsloopOp.getNumReductionVars(); ++i) { SmallVector<llvm::Value *> phis; @@ -942,7 +959,7 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, // ptr builder.CreateStore(phis[0], var); - privateReductionVariables.push_back(var); + privateReductionVariables[i] = var; moduleTranslation.mapValue(reductionArgs[i], phis[0]); reductionVariableMap.try_emplace(wsloopOp.getReductionVars()[i], phis[0]); } else { @@ -1140,20 +1157,40 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, // Collect reduction declarations SmallVector<omp::DeclareReductionOp> reductionDecls; collectReductionDecls(opInst, reductionDecls); - SmallVector<llvm::Value *> privateReductionVariables; + SmallVector<llvm::Value *> privateReductionVariables( + opInst.getNumReductionVars()); auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { // Allocate reduction vars DenseMap<Value, llvm::Value *> reductionVariableMap; - allocByValReductionVars(opInst, builder, moduleTranslation, allocaIP, - reductionDecls, privateReductionVariables, + + MutableArrayRef<BlockArgument> reductionArgs = + opInst.getRegion().getArguments().slice( + opInst.getNumAllocateVars() + opInst.getNumAllocatorsVars(), + opInst.getNumReductionVars()); + + allocByValReductionVars(opInst, reductionArgs, builder, moduleTranslation, + allocaIP, reductionDecls, privateReductionVariables, reductionVariableMap, isByRef); // Initialize reduction vars builder.restoreIP(allocaIP); - MutableArrayRef<BlockArgument> reductionArgs = - opInst.getRegion().getArguments().take_back( - opInst.getNumReductionVars()); + llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init"); + allocaIP = + InsertPointTy(allocaIP.getBlock(), + allocaIP.getBlock()->getTerminator()->getIterator()); + SmallVector<llvm::Value *> byRefVars(opInst.getNumReductionVars()); + for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) { + if (isByRef[i]) { + // Allocate reduction variable (which is a pointer to the real reduciton + // variable allocated in the inlined region) + byRefVars[i] = builder.CreateAlloca( + moduleTranslation.convertType(reductionDecls[i].getType())); + } + } + + builder.SetInsertPoint(initBlock->getFirstNonPHIOrDbgOrAlloca()); + for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) { SmallVector<llvm::Value *> phis; @@ -1166,18 +1203,17 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, assert(phis.size() == 1 && "expected one value to be yielded from the " "reduction neutral element declaration region"); - builder.restoreIP(allocaIP); + + // mapInitializationArg finishes its block with a terminator. We need to + // insert before that terminator. + builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator()); if (isByRef[i]) { - // Allocate reduction variable (which is a pointer to the real reduciton - // variable allocated in the inlined region) - llvm::Value *var = builder.CreateAlloca( - moduleTranslation.convertType(reductionDecls[i].getType())); // Store the result of the inlined region to the allocated reduction var // ptr - builder.CreateStore(phis[0], var); + builder.CreateStore(phis[0], byRefVars[i]); - privateReductionVariables.push_back(var); + privateReductionVariables[i] = byRefVars[i]; moduleTranslation.mapValue(reductionArgs[i], phis[0]); reductionVariableMap.try_emplace(opInst.getReductionVars()[i], phis[0]); } else { @@ -1275,7 +1311,26 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, // region. The privatizer is processed in-place (see below) before it // gets inlined in the parallel region and therefore processing the // original op is dangerous. - return {privVar, privatizer.clone()}; + + MLIRContext &context = moduleTranslation.getContext(); + mlir::IRRewriter opCloner(&context); + opCloner.setInsertionPoint(privatizer); + auto clone = llvm::cast<mlir::omp::PrivateClauseOp>( + opCloner.clone(*privatizer)); + + // Unique the clone name to avoid clashes in the symbol table. + unsigned counter = 0; + SmallString<256> cloneName = SymbolTable::generateSymbolName<256>( + privatizer.getSymName(), + [&](llvm::StringRef candidate) { + return SymbolTable::lookupNearestSymbolFrom( + opInst, StringAttr::get(&context, candidate)) != + nullptr; + }, + counter); + + clone.setSymName(cloneName); + return {privVar, clone}; } } @@ -1925,12 +1980,6 @@ llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type, Operation *clauseOp, llvm::Value *basePointer, llvm::Type *baseType, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { - // utilising getTypeSizeInBits instead of getTypeSize as getTypeSize gives - // the size in inconsistent byte or bit format. - uint64_t underlyingTypeSzInBits = dl.getTypeSizeInBits(type); - if (auto arrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(type)) - underlyingTypeSzInBits = getArrayElementSizeInBits(arrTy, dl); - if (auto memberClause = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(clauseOp)) { // This calculates the size to transfer based on bounds and the underlying @@ -1956,6 +2005,12 @@ llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type, } } + // utilising getTypeSizeInBits instead of getTypeSize as getTypeSize gives + // the size in inconsistent byte or bit format. + uint64_t underlyingTypeSzInBits = dl.getTypeSizeInBits(type); + if (auto arrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(type)) + underlyingTypeSzInBits = getArrayElementSizeInBits(arrTy, dl); + // The size in bytes x number of elements, the sizeInBytes stored is // the underyling types size, e.g. if ptr<i32>, it'll be the i32's // size, so we do some on the fly runtime math to get the size in @@ -1966,7 +2021,7 @@ llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type, } } - return builder.getInt64(underlyingTypeSzInBits / 8); + return builder.getInt64(dl.getTypeSizeInBits(type) / 8); } void collectMapDataFromMapOperands(MapInfoData &mapData, @@ -2263,7 +2318,7 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers( // This creates the initial MEMBER_OF mapping that consists of // the parent/top level container (same as above effectively, except - // with a fixed initial compile time size and seperate maptype which + // with a fixed initial compile time size and separate maptype which // indicates the true mape type (tofrom etc.). This parent mapping is // only relevant if the structure in its totality is being mapped, // otherwise the above suffices. @@ -2388,7 +2443,7 @@ static void processMapWithMembersOf( // If we have a partial map (no parent referenced in the map clauses of the // directive, only members) and only a single member, we do not need to bind - // the map of the member to the parent, we can pass the member seperately. + // the map of the member to the parent, we can pass the member separately. if (parentClause.getMembers().size() == 1 && parentClause.getPartialMap()) { auto memberClause = llvm::cast<mlir::omp::MapInfoOp>( parentClause.getMembers()[0].getDefiningOp()); @@ -2425,7 +2480,7 @@ createAlteredByCaptureMap(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder) { for (size_t i = 0; i < mapData.MapClause.size(); ++i) { - // if it's declare target, skip it, it's handled seperately. + // if it's declare target, skip it, it's handled separately. if (!mapData.IsDeclareTarget[i]) { auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(mapData.MapClause[i]); @@ -2847,7 +2902,7 @@ static bool targetOpSupported(Operation &opInst) { static void handleDeclareTargetMapVar(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, - llvm::IRBuilderBase &builder) { + llvm::IRBuilderBase &builder, llvm::Function *func) { for (size_t i = 0; i < mapData.MapClause.size(); ++i) { // In the case of declare target mapped variables, the basePointer is // the reference pointer generated by the convertDeclareTargetAttr @@ -2862,19 +2917,31 @@ handleDeclareTargetMapVar(MapInfoData &mapData, // reference pointer and the pointer are assigned in the kernel argument // structure for the host. if (mapData.IsDeclareTarget[i]) { + // If the original map value is a constant, then we have to make sure all + // of it's uses within the current kernel/function that we are going to + // rewrite are converted to instructions, as we will be altering the old + // use (OriginalValue) from a constant to an instruction, which will be + // illegal and ICE the compiler if the user is a constant expression of + // some kind e.g. a constant GEP. + if (auto *constant = dyn_cast<llvm::Constant>(mapData.OriginalValue[i])) + convertUsersOfConstantsToInstructions(constant, func, false); + // The users iterator will get invalidated if we modify an element, - // so we populate this vector of uses to alter each user on an individual - // basis to emit its own load (rather than one load for all). + // so we populate this vector of uses to alter each user on an + // individual basis to emit its own load (rather than one load for + // all). llvm::SmallVector<llvm::User *> userVec; for (llvm::User *user : mapData.OriginalValue[i]->users()) userVec.push_back(user); for (llvm::User *user : userVec) { if (auto *insn = dyn_cast<llvm::Instruction>(user)) { - auto *load = builder.CreateLoad(mapData.BasePointers[i]->getType(), - mapData.BasePointers[i]); - load->moveBefore(insn); - user->replaceUsesOfWith(mapData.OriginalValue[i], load); + if (insn->getFunction() == func) { + auto *load = builder.CreateLoad(mapData.BasePointers[i]->getType(), + mapData.BasePointers[i]); + load->moveBefore(insn); + user->replaceUsesOfWith(mapData.OriginalValue[i], load); + } } } } @@ -2992,6 +3059,7 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, auto &targetRegion = targetOp.getRegion(); DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>()); SmallVector<Value> mapOperands = targetOp.getMapOperands(); + llvm::Function *llvmOutlinedFn = nullptr; LogicalResult bodyGenStatus = success(); using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; @@ -3001,7 +3069,7 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, // original function to the new outlined function. llvm::Function *llvmParentFn = moduleTranslation.lookupFunction(parentFn.getName()); - llvm::Function *llvmOutlinedFn = codeGenIP.getBlock()->getParent(); + llvmOutlinedFn = codeGenIP.getBlock()->getParent(); assert(llvmParentFn && llvmOutlinedFn && "Both parent and outlined functions must exist at this point"); @@ -3096,7 +3164,8 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, // Remap access operations to declare target reference pointers for the // device, essentially generating extra loadop's as necessary if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice()) - handleDeclareTargetMapVar(mapData, moduleTranslation, builder); + handleDeclareTargetMapVar(mapData, moduleTranslation, builder, + llvmOutlinedFn); return bodyGenStatus; } diff --git a/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp index 94423b35d1ff..2a146f5efed3 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp @@ -17,9 +17,9 @@ #include "mlir/IR/Operation.h" #include "mlir/Target/LLVMIR/ModuleTranslation.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicsAMDGPU.h" -#include "llvm/IR/MDBuilder.h" #include "llvm/Support/raw_ostream.h" using namespace mlir; @@ -32,12 +32,9 @@ static llvm::Value *createIntrinsicCallWithRange(llvm::IRBuilderBase &builder, auto *inst = llvm::cast<llvm::CallInst>( createIntrinsicCall(builder, intrinsic, {}, {})); if (maybeRange) { - SmallVector<llvm::APInt, 2> apInts; - for (int32_t i : maybeRange.asArrayRef()) - apInts.push_back(llvm::APInt(32, i)); - llvm::MDBuilder mdBuilder(builder.getContext()); - llvm::MDNode *range = mdBuilder.createRange(apInts[0], apInts[1]); - inst->setMetadata(llvm::LLVMContext::MD_range, range); + llvm::ConstantRange Range(APInt(32, maybeRange[0]), + APInt(32, maybeRange[1])); + inst->addRangeRetAttr(Range); } return inst; } diff --git a/mlir/lib/Target/LLVMIR/ModuleImport.cpp b/mlir/lib/Target/LLVMIR/ModuleImport.cpp index 191b84acd56f..cfcf33436a89 100644 --- a/mlir/lib/Target/LLVMIR/ModuleImport.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleImport.cpp @@ -1664,23 +1664,26 @@ static void processMemoryEffects(llvm::Function *func, LLVMFuncOp funcOp) { // List of LLVM IR attributes that map to an explicit attribute on the MLIR // LLVMFuncOp. -static constexpr std::array ExplicitAttributes{ - StringLiteral("aarch64_pstate_sm_enabled"), - StringLiteral("aarch64_pstate_sm_body"), - StringLiteral("aarch64_pstate_sm_compatible"), - StringLiteral("aarch64_new_za"), - StringLiteral("aarch64_preserves_za"), +static constexpr std::array kExplicitAttributes{ StringLiteral("aarch64_in_za"), - StringLiteral("aarch64_out_za"), StringLiteral("aarch64_inout_za"), - StringLiteral("vscale_range"), + StringLiteral("aarch64_new_za"), + StringLiteral("aarch64_out_za"), + StringLiteral("aarch64_preserves_za"), + StringLiteral("aarch64_pstate_sm_body"), + StringLiteral("aarch64_pstate_sm_compatible"), + StringLiteral("aarch64_pstate_sm_enabled"), + StringLiteral("alwaysinline"), + StringLiteral("approx-func-fp-math"), StringLiteral("frame-pointer"), - StringLiteral("target-features"), - StringLiteral("unsafe-fp-math"), StringLiteral("no-infs-fp-math"), StringLiteral("no-nans-fp-math"), - StringLiteral("approx-func-fp-math"), StringLiteral("no-signed-zeros-fp-math"), + StringLiteral("noinline"), + StringLiteral("optnone"), + StringLiteral("target-features"), + StringLiteral("unsafe-fp-math"), + StringLiteral("vscale_range"), }; static void processPassthroughAttrs(llvm::Function *func, LLVMFuncOp funcOp) { @@ -1709,7 +1712,7 @@ static void processPassthroughAttrs(llvm::Function *func, LLVMFuncOp funcOp) { auto keyAttr = StringAttr::get(context, attrName); // Skip attributes that map to an explicit attribute on the LLVMFuncOp. - if (llvm::is_contained(ExplicitAttributes, attrName)) + if (llvm::is_contained(kExplicitAttributes, attrName)) continue; if (attr.isStringAttribute()) { @@ -1745,6 +1748,13 @@ void ModuleImport::processFunctionAttributes(llvm::Function *func, processMemoryEffects(func, funcOp); processPassthroughAttrs(func, funcOp); + if (func->hasFnAttribute(llvm::Attribute::NoInline)) + funcOp.setNoInline(true); + if (func->hasFnAttribute(llvm::Attribute::AlwaysInline)) + funcOp.setAlwaysInline(true); + if (func->hasFnAttribute(llvm::Attribute::OptimizeNone)) + funcOp.setOptimizeNone(true); + if (func->hasFnAttribute("aarch64_pstate_sm_enabled")) funcOp.setArmStreaming(true); else if (func->hasFnAttribute("aarch64_pstate_sm_body")) diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index 7b86b250c294..6e8b2dec75b7 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -64,6 +64,8 @@ using namespace mlir; using namespace mlir::LLVM; using namespace mlir::LLVM::detail; +extern llvm::cl::opt<bool> UseNewDbgInfoFormat; + #include "mlir/Dialect/LLVMIR/LLVMConversionEnumsToLLVM.inc" namespace { @@ -1390,10 +1392,10 @@ LogicalResult ModuleTranslation::convertDialectAttributes( return success(); } -/// Converts the function attributes from LLVMFuncOp and attaches them to the -/// llvm::Function. -static void convertFunctionAttributes(LLVMFuncOp func, - llvm::Function *llvmFunc) { +/// Converts memory effect attributes from `func` and attaches them to +/// `llvmFunc`. +static void convertFunctionMemoryAttributes(LLVMFuncOp func, + llvm::Function *llvmFunc) { if (!func.getMemory()) return; @@ -1412,6 +1414,18 @@ static void convertFunctionAttributes(LLVMFuncOp func, llvmFunc->setMemoryEffects(newMemEffects); } +/// Converts function attributes from `func` and attaches them to `llvmFunc`. +static void convertFunctionAttributes(LLVMFuncOp func, + llvm::Function *llvmFunc) { + if (func.getNoInlineAttr()) + llvmFunc->addFnAttr(llvm::Attribute::NoInline); + if (func.getAlwaysInlineAttr()) + llvmFunc->addFnAttr(llvm::Attribute::AlwaysInline); + if (func.getOptimizeNoneAttr()) + llvmFunc->addFnAttr(llvm::Attribute::OptimizeNone); + convertFunctionMemoryAttributes(func, llvmFunc); +} + FailureOr<llvm::AttrBuilder> ModuleTranslation::convertParameterAttrs(LLVMFuncOp func, int argIdx, DictionaryAttr paramAttrs) { @@ -1789,6 +1803,9 @@ prepareLLVMModule(Operation *m, llvm::LLVMContext &llvmContext, StringRef name) { m->getContext()->getOrLoadDialect<LLVM::LLVMDialect>(); auto llvmModule = std::make_unique<llvm::Module>(name, llvmContext); + // ModuleTranslation can currently only construct modules in the old debug + // info format, so set the flag accordingly. + llvmModule->setNewDbgInfoFormatFlag(false); if (auto dataLayoutAttr = m->getDiscardableAttr(LLVM::LLVMDialect::getDataLayoutAttrName())) { llvmModule->setDataLayout(cast<StringAttr>(dataLayoutAttr).getValue()); @@ -1867,6 +1884,11 @@ mlir::translateModuleToLLVMIR(Operation *module, llvm::LLVMContext &llvmContext, if (failed(translator.convertFunctions())) return nullptr; + // Once we've finished constructing elements in the module, we should convert + // it to use the debug info format desired by LLVM. + // See https://llvm.org/docs/RemoveDIsDebugInfo.html + translator.llvmModule->setIsNewDbgInfoFormat(UseNewDbgInfoFormat); + if (!disableVerification && llvm::verifyModule(*translator.llvmModule, &llvm::errs())) return nullptr; |
