diff options
Diffstat (limited to 'mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp')
| -rw-r--r-- | mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp | 458 |
1 files changed, 0 insertions, 458 deletions
diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp deleted file mode 100644 index a4f19981eec3..000000000000 --- a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp +++ /dev/null @@ -1,458 +0,0 @@ -//===- LowerGPUToHSACO.cpp - Convert GPU kernel to HSACO blob -------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements a pass that serializes a gpu module into HSAco blob and -// adds that blob as a string attribute of the module. -// -//===----------------------------------------------------------------------===// - -#include "mlir/Config/mlir-config.h" -#include "mlir/Dialect/GPU/Transforms/Passes.h" -#include "mlir/IR/Location.h" -#include "mlir/IR/MLIRContext.h" - -#if MLIR_ENABLE_ROCM_CONVERSIONS -#include "mlir/ExecutionEngine/OptUtils.h" -#include "mlir/Pass/Pass.h" -#include "mlir/Support/FileUtilities.h" -#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h" -#include "mlir/Target/LLVMIR/Export.h" - -#include "llvm/IR/Constants.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Module.h" -#include "llvm/IRReader/IRReader.h" -#include "llvm/Linker/Linker.h" - -#include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCObjectFileInfo.h" -#include "llvm/MC/MCObjectWriter.h" -#include "llvm/MC/MCParser/MCTargetAsmParser.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/TargetRegistry.h" - -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/FileUtilities.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/Program.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Threading.h" -#include "llvm/Support/WithColor.h" - -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" - -#include "llvm/Transforms/IPO/Internalize.h" - -#include <optional> - -using namespace mlir; - -namespace { -class SerializeToHsacoPass - : public PassWrapper<SerializeToHsacoPass, gpu::SerializeToBlobPass> { - static llvm::once_flag initializeBackendOnce; - -public: - MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(SerializeToHsacoPass) - - SerializeToHsacoPass(StringRef triple, StringRef arch, StringRef features, - int optLevel); - SerializeToHsacoPass(const SerializeToHsacoPass &other); - StringRef getArgument() const override { return "gpu-to-hsaco"; } - StringRef getDescription() const override { - return "Lower GPU kernel function to HSACO binary annotations"; - } - -protected: - Option<std::string> rocmPath{*this, "rocm-path", - llvm::cl::desc("Path to ROCm install")}; - - // Overload to allow linking in device libs - std::unique_ptr<llvm::Module> - translateToLLVMIR(llvm::LLVMContext &llvmContext) override; - -private: - // Loads LLVM bitcode libraries - std::optional<SmallVector<std::unique_ptr<llvm::Module>, 3>> - loadLibraries(SmallVectorImpl<char> &path, - SmallVectorImpl<StringRef> &libraries, - llvm::LLVMContext &context); - - // Serializes ROCDL to HSACO. - std::unique_ptr<std::vector<char>> - serializeISA(const std::string &isa) override; - - LogicalResult assembleIsa(const std::string &isa, - SmallVectorImpl<char> &result); - std::unique_ptr<std::vector<char>> createHsaco(ArrayRef<char> isaBinary); - - std::string getRocmPath(); -}; -} // namespace - -SerializeToHsacoPass::SerializeToHsacoPass(const SerializeToHsacoPass &other) - : PassWrapper<SerializeToHsacoPass, gpu::SerializeToBlobPass>(other) {} - -/// Get a user-specified path to ROCm -// Tries, in order, the --rocm-path option, the ROCM_PATH environment variable -// and a compile-time default -std::string SerializeToHsacoPass::getRocmPath() { - if (rocmPath.getNumOccurrences() > 0) - return rocmPath.getValue(); - - return __DEFAULT_ROCM_PATH__; -} - -// Sets the 'option' to 'value' unless it already has a value. -static void maybeSetOption(Pass::Option<std::string> &option, - function_ref<std::string()> getValue) { - if (!option.hasValue()) - option = getValue(); -} - -llvm::once_flag SerializeToHsacoPass::initializeBackendOnce; - -SerializeToHsacoPass::SerializeToHsacoPass(StringRef triple, StringRef arch, - StringRef features, int optLevel) { - // No matter how this pass is constructed, ensure that the AMDGPU backend - // is initialized exactly once. - llvm::call_once(initializeBackendOnce, []() { - // Initialize LLVM AMDGPU backend. - LLVMInitializeAMDGPUAsmParser(); - LLVMInitializeAMDGPUAsmPrinter(); - LLVMInitializeAMDGPUTarget(); - LLVMInitializeAMDGPUTargetInfo(); - LLVMInitializeAMDGPUTargetMC(); - }); - maybeSetOption(this->triple, [&triple] { return triple.str(); }); - maybeSetOption(this->chip, [&arch] { return arch.str(); }); - maybeSetOption(this->features, [&features] { return features.str(); }); - if (this->optLevel.getNumOccurrences() == 0) - this->optLevel.setValue(optLevel); -} - -std::optional<SmallVector<std::unique_ptr<llvm::Module>, 3>> -SerializeToHsacoPass::loadLibraries(SmallVectorImpl<char> &path, - SmallVectorImpl<StringRef> &libraries, - llvm::LLVMContext &context) { - SmallVector<std::unique_ptr<llvm::Module>, 3> ret; - size_t dirLength = path.size(); - - if (!llvm::sys::fs::is_directory(path)) { - getOperation().emitRemark() << "Bitcode path: " << path - << " does not exist or is not a directory\n"; - return std::nullopt; - } - - for (const StringRef file : libraries) { - llvm::SMDiagnostic error; - llvm::sys::path::append(path, file); - llvm::StringRef pathRef(path.data(), path.size()); - std::unique_ptr<llvm::Module> library = - llvm::getLazyIRFileModule(pathRef, error, context); - path.truncate(dirLength); - if (!library) { - getOperation().emitError() << "Failed to load library " << file - << " from " << path << error.getMessage(); - return std::nullopt; - } - // Some ROCM builds don't strip this like they should - if (auto *openclVersion = library->getNamedMetadata("opencl.ocl.version")) - library->eraseNamedMetadata(openclVersion); - // Stop spamming us with clang version numbers - if (auto *ident = library->getNamedMetadata("llvm.ident")) - library->eraseNamedMetadata(ident); - ret.push_back(std::move(library)); - } - - return std::move(ret); -} - -std::unique_ptr<llvm::Module> -SerializeToHsacoPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) { - // MLIR -> LLVM translation - std::unique_ptr<llvm::Module> ret = - gpu::SerializeToBlobPass::translateToLLVMIR(llvmContext); - - if (!ret) { - getOperation().emitOpError("Module lowering failed"); - return ret; - } - // Walk the LLVM module in order to determine if we need to link in device - // libs - bool needOpenCl = false; - bool needOckl = false; - bool needOcml = false; - for (llvm::Function &f : ret->functions()) { - if (f.hasExternalLinkage() && f.hasName() && !f.hasExactDefinition()) { - StringRef funcName = f.getName(); - if ("printf" == funcName) - needOpenCl = true; - if (funcName.starts_with("__ockl_")) - needOckl = true; - if (funcName.starts_with("__ocml_")) - needOcml = true; - } - } - - if (needOpenCl) - needOcml = needOckl = true; - - // No libraries needed (the typical case) - if (!(needOpenCl || needOcml || needOckl)) - return ret; - - // Define one of the control constants the ROCm device libraries expect to be - // present These constants can either be defined in the module or can be - // imported by linking in bitcode that defines the constant. To simplify our - // logic, we define the constants into the module we are compiling - auto addControlConstant = [&module = *ret](StringRef name, uint32_t value, - uint32_t bitwidth) { - using llvm::GlobalVariable; - if (module.getNamedGlobal(name)) { - return; - } - llvm::IntegerType *type = - llvm::IntegerType::getIntNTy(module.getContext(), bitwidth); - auto *initializer = llvm::ConstantInt::get(type, value, /*isSigned=*/false); - auto *constant = new GlobalVariable( - module, type, - /*isConstant=*/true, GlobalVariable::LinkageTypes::LinkOnceODRLinkage, - initializer, name, - /*before=*/nullptr, - /*threadLocalMode=*/GlobalVariable::ThreadLocalMode::NotThreadLocal, - /*addressSpace=*/4); - constant->setUnnamedAddr(GlobalVariable::UnnamedAddr::Local); - constant->setVisibility( - GlobalVariable::VisibilityTypes::ProtectedVisibility); - constant->setAlignment(llvm::MaybeAlign(bitwidth / 8)); - }; - - // Set up control variables in the module instead of linking in tiny bitcode - if (needOcml) { - // TODO(kdrewnia): Enable math optimizations once we have support for - // `-ffast-math`-like options - addControlConstant("__oclc_finite_only_opt", 0, 8); - addControlConstant("__oclc_daz_opt", 0, 8); - addControlConstant("__oclc_correctly_rounded_sqrt32", 1, 8); - addControlConstant("__oclc_unsafe_math_opt", 0, 8); - } - if (needOcml || needOckl) { - addControlConstant("__oclc_wavefrontsize64", 1, 8); - StringRef chipSet = this->chip.getValue(); - if (chipSet.starts_with("gfx")) - chipSet = chipSet.substr(3); - uint32_t minor = - llvm::APInt(32, chipSet.substr(chipSet.size() - 2), 16).getZExtValue(); - uint32_t major = llvm::APInt(32, chipSet.substr(0, chipSet.size() - 2), 10) - .getZExtValue(); - uint32_t isaNumber = minor + 1000 * major; - addControlConstant("__oclc_ISA_version", isaNumber, 32); - - // This constant must always match the default code object ABI version - // of the AMDGPU backend. - addControlConstant("__oclc_ABI_version", 500, 32); - } - - // Determine libraries we need to link - order matters due to dependencies - llvm::SmallVector<StringRef, 4> libraries; - if (needOpenCl) - libraries.push_back("opencl.bc"); - if (needOcml) - libraries.push_back("ocml.bc"); - if (needOckl) - libraries.push_back("ockl.bc"); - - std::optional<SmallVector<std::unique_ptr<llvm::Module>, 3>> mbModules; - std::string theRocmPath = getRocmPath(); - llvm::SmallString<32> bitcodePath(theRocmPath); - llvm::sys::path::append(bitcodePath, "amdgcn", "bitcode"); - mbModules = loadLibraries(bitcodePath, libraries, llvmContext); - - if (!mbModules) { - getOperation() - .emitWarning("Could not load required device libraries") - .attachNote() - << "This will probably cause link-time or run-time failures"; - return ret; // We can still abort here - } - - llvm::Linker linker(*ret); - for (std::unique_ptr<llvm::Module> &libModule : *mbModules) { - // This bitcode linking code is substantially similar to what is used in - // hip-clang It imports the library functions into the module, allowing LLVM - // optimization passes (which must run after linking) to optimize across the - // libraries and the module's code. We also only import symbols if they are - // referenced by the module or a previous library since there will be no - // other source of references to those symbols in this compilation and since - // we don't want to bloat the resulting code object. - bool err = linker.linkInModule( - std::move(libModule), llvm::Linker::Flags::LinkOnlyNeeded, - [](llvm::Module &m, const StringSet<> &gvs) { - llvm::internalizeModule(m, [&gvs](const llvm::GlobalValue &gv) { - return !gv.hasName() || (gvs.count(gv.getName()) == 0); - }); - }); - // True is linker failure - if (err) { - getOperation().emitError( - "Unrecoverable failure during device library linking."); - // We have no guaranties about the state of `ret`, so bail - return nullptr; - } - } - - return ret; -} - -LogicalResult SerializeToHsacoPass::assembleIsa(const std::string &isa, - SmallVectorImpl<char> &result) { - auto loc = getOperation().getLoc(); - - llvm::raw_svector_ostream os(result); - - llvm::Triple triple(llvm::Triple::normalize(this->triple)); - std::string error; - const llvm::Target *target = - llvm::TargetRegistry::lookupTarget(triple.normalize(), error); - if (!target) - return emitError(loc, Twine("failed to lookup target: ") + error); - - llvm::SourceMgr srcMgr; - srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa), SMLoc()); - - const llvm::MCTargetOptions mcOptions; - std::unique_ptr<llvm::MCRegisterInfo> mri( - target->createMCRegInfo(this->triple)); - std::unique_ptr<llvm::MCAsmInfo> mai( - target->createMCAsmInfo(*mri, this->triple, mcOptions)); - std::unique_ptr<llvm::MCSubtargetInfo> sti( - target->createMCSubtargetInfo(this->triple, this->chip, this->features)); - - llvm::MCContext ctx(triple, mai.get(), mri.get(), sti.get(), &srcMgr, - &mcOptions); - std::unique_ptr<llvm::MCObjectFileInfo> mofi(target->createMCObjectFileInfo( - ctx, /*PIC=*/false, /*LargeCodeModel=*/false)); - ctx.setObjectFileInfo(mofi.get()); - - SmallString<128> cwd; - if (!llvm::sys::fs::current_path(cwd)) - ctx.setCompilationDir(cwd); - - std::unique_ptr<llvm::MCStreamer> mcStreamer; - std::unique_ptr<llvm::MCInstrInfo> mcii(target->createMCInstrInfo()); - - llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, ctx); - llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions); - mcStreamer.reset(target->createMCObjectStreamer( - triple, ctx, std::unique_ptr<llvm::MCAsmBackend>(mab), - mab->createObjectWriter(os), std::unique_ptr<llvm::MCCodeEmitter>(ce), - *sti, mcOptions.MCRelaxAll, mcOptions.MCIncrementalLinkerCompatible, - /*DWARFMustBeAtTheEnd*/ false)); - - std::unique_ptr<llvm::MCAsmParser> parser( - createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai)); - std::unique_ptr<llvm::MCTargetAsmParser> tap( - target->createMCAsmParser(*sti, *parser, *mcii, mcOptions)); - - if (!tap) - return emitError(loc, "assembler initialization error"); - - parser->setTargetParser(*tap); - parser->Run(false); - - return success(); -} - -std::unique_ptr<std::vector<char>> -SerializeToHsacoPass::createHsaco(ArrayRef<char> isaBinary) { - auto loc = getOperation().getLoc(); - - // Save the ISA binary to a temp file. - int tempIsaBinaryFd = -1; - SmallString<128> tempIsaBinaryFilename; - if (llvm::sys::fs::createTemporaryFile("kernel", "o", tempIsaBinaryFd, - tempIsaBinaryFilename)) { - emitError(loc, "temporary file for ISA binary creation error"); - return {}; - } - llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename); - llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true); - tempIsaBinaryOs << StringRef(isaBinary.data(), isaBinary.size()); - tempIsaBinaryOs.close(); - - // Create a temp file for HSA code object. - SmallString<128> tempHsacoFilename; - if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", - tempHsacoFilename)) { - emitError(loc, "temporary file for HSA code object creation error"); - return {}; - } - llvm::FileRemover cleanupHsaco(tempHsacoFilename); - - std::string theRocmPath = getRocmPath(); - llvm::SmallString<32> lldPath(theRocmPath); - llvm::sys::path::append(lldPath, "llvm", "bin", "ld.lld"); - int lldResult = llvm::sys::ExecuteAndWait( - lldPath, - {"ld.lld", "-shared", tempIsaBinaryFilename, "-o", tempHsacoFilename}); - if (lldResult != 0) { - emitError(loc, "lld invocation error"); - return {}; - } - - // Load the HSA code object. - auto hsacoFile = - llvm::MemoryBuffer::getFile(tempHsacoFilename, /*IsText=*/false); - if (!hsacoFile) { - emitError(loc, "read HSA code object from temp file error"); - return {}; - } - - StringRef buffer = (*hsacoFile)->getBuffer(); - return std::make_unique<std::vector<char>>(buffer.begin(), buffer.end()); -} - -std::unique_ptr<std::vector<char>> -SerializeToHsacoPass::serializeISA(const std::string &isa) { - SmallVector<char, 0> isaBinary; - if (failed(assembleIsa(isa, isaBinary))) - return {}; - return createHsaco(isaBinary); -} - -// Register pass to serialize GPU kernel functions to a HSACO binary annotation. -void mlir::registerGpuSerializeToHsacoPass() { - PassRegistration<SerializeToHsacoPass> registerSerializeToHSACO([] { - return std::make_unique<SerializeToHsacoPass>("amdgcn-amd-amdhsa", "", "", - 2); - }); -} - -/// Create an instance of the GPU kernel function to HSAco binary serialization -/// pass. -std::unique_ptr<Pass> mlir::createGpuSerializeToHsacoPass(StringRef triple, - StringRef arch, - StringRef features, - int optLevel) { - return std::make_unique<SerializeToHsacoPass>(triple, arch, features, - optLevel); -} - -#else // MLIR_ENABLE_ROCM_CONVERSIONS -void mlir::registerGpuSerializeToHsacoPass() {} -#endif // MLIR_ENABLE_ROCM_CONVERSIONS |
