diff options
Diffstat (limited to 'offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp')
| -rw-r--r-- | offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp | 201 |
1 files changed, 201 insertions, 0 deletions
diff --git a/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp b/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp new file mode 100644 index 000000000000..761e04e4c7bb --- /dev/null +++ b/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp @@ -0,0 +1,201 @@ +//===- llvm-omp-kernel-replay.cpp - Replay OpenMP offload kernel ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a command line utility to replay the execution of recorded OpenMP +// offload kernels. +// +//===----------------------------------------------------------------------===// + +#include "omptarget.h" + +#include "Shared/PluginAPI.h" + +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/MemoryBuffer.h" +#include <cstdint> +#include <cstdlib> + +using namespace llvm; + +cl::OptionCategory ReplayOptions("llvm-omp-kernel-replay Options"); + +// InputFilename - The filename to read the json description of the kernel. +static cl::opt<std::string> InputFilename(cl::Positional, + cl::desc("<input kernel json file>"), + cl::Required); + +static cl::opt<bool> VerifyOpt( + "verify", + cl::desc( + "Verify device memory post execution against the original output."), + cl::init(false), cl::cat(ReplayOptions)); + +static cl::opt<bool> SaveOutputOpt( + "save-output", + cl::desc("Save the device memory output of the replayed kernel execution."), + cl::init(false), cl::cat(ReplayOptions)); + +static cl::opt<unsigned> NumTeamsOpt("num-teams", + cl::desc("Set the number of teams."), + cl::init(0), cl::cat(ReplayOptions)); + +static cl::opt<unsigned> NumThreadsOpt("num-threads", + cl::desc("Set the number of threads."), + cl::init(0), cl::cat(ReplayOptions)); + +static cl::opt<int32_t> DeviceIdOpt("device-id", cl::desc("Set the device id."), + cl::init(-1), cl::cat(ReplayOptions)); + +int main(int argc, char **argv) { + cl::HideUnrelatedOptions(ReplayOptions); + cl::ParseCommandLineOptions(argc, argv, "llvm-omp-kernel-replay\n"); + + ErrorOr<std::unique_ptr<MemoryBuffer>> KernelInfoMB = + MemoryBuffer::getFile(InputFilename, /*isText=*/true, + /*RequiresNullTerminator=*/true); + if (!KernelInfoMB) + report_fatal_error("Error reading the kernel info json file"); + Expected<json::Value> JsonKernelInfo = + json::parse(KernelInfoMB.get()->getBuffer()); + if (auto Err = JsonKernelInfo.takeError()) + report_fatal_error("Cannot parse the kernel info json file"); + + auto NumTeamsJson = + JsonKernelInfo->getAsObject()->getInteger("NumTeamsClause"); + unsigned NumTeams = (NumTeamsOpt > 0 ? NumTeamsOpt : NumTeamsJson.value()); + auto NumThreadsJson = + JsonKernelInfo->getAsObject()->getInteger("ThreadLimitClause"); + unsigned NumThreads = + (NumThreadsOpt > 0 ? NumThreadsOpt : NumThreadsJson.value()); + // TODO: Print a warning if number of teams/threads is explicitly set in the + // kernel info but overriden through command line options. + auto LoopTripCount = + JsonKernelInfo->getAsObject()->getInteger("LoopTripCount"); + auto KernelFunc = JsonKernelInfo->getAsObject()->getString("Name"); + + SmallVector<void *> TgtArgs; + SmallVector<ptrdiff_t> TgtArgOffsets; + auto NumArgs = JsonKernelInfo->getAsObject()->getInteger("NumArgs"); + auto *TgtArgsArray = JsonKernelInfo->getAsObject()->getArray("ArgPtrs"); + for (auto It : *TgtArgsArray) + TgtArgs.push_back(reinterpret_cast<void *>(It.getAsInteger().value())); + auto *TgtArgOffsetsArray = + JsonKernelInfo->getAsObject()->getArray("ArgOffsets"); + for (auto It : *TgtArgOffsetsArray) + TgtArgOffsets.push_back(static_cast<ptrdiff_t>(It.getAsInteger().value())); + + void *BAllocStart = reinterpret_cast<void *>( + JsonKernelInfo->getAsObject()->getInteger("BumpAllocVAStart").value()); + + __tgt_offload_entry KernelEntry = {nullptr, nullptr, 0, 0, 0}; + std::string KernelEntryName = KernelFunc.value().str(); + KernelEntry.name = const_cast<char *>(KernelEntryName.c_str()); + // Anything non-zero works to uniquely identify the kernel. + KernelEntry.addr = (void *)0x1; + + ErrorOr<std::unique_ptr<MemoryBuffer>> ImageMB = + MemoryBuffer::getFile(KernelEntryName + ".image", /*isText=*/false, + /*RequiresNullTerminator=*/false); + if (!ImageMB) + report_fatal_error("Error reading the kernel image."); + + __tgt_device_image DeviceImage; + DeviceImage.ImageStart = const_cast<char *>(ImageMB.get()->getBufferStart()); + DeviceImage.ImageEnd = const_cast<char *>(ImageMB.get()->getBufferEnd()); + DeviceImage.EntriesBegin = &KernelEntry; + DeviceImage.EntriesEnd = &KernelEntry + 1; + + __tgt_bin_desc Desc; + Desc.NumDeviceImages = 1; + Desc.HostEntriesBegin = &KernelEntry; + Desc.HostEntriesEnd = &KernelEntry + 1; + Desc.DeviceImages = &DeviceImage; + + auto DeviceMemorySizeJson = + JsonKernelInfo->getAsObject()->getInteger("DeviceMemorySize"); + // Set device memory size to the ceiling of GB granularity. + uint64_t DeviceMemorySize = std::ceil(DeviceMemorySizeJson.value()); + + auto DeviceIdJson = JsonKernelInfo->getAsObject()->getInteger("DeviceId"); + // TODO: Print warning if the user overrides the device id in the json file. + int32_t DeviceId = (DeviceIdOpt > -1 ? DeviceIdOpt : DeviceIdJson.value()); + + // TODO: do we need requires? + //__tgt_register_requires(/*Flags=*/1); + + __tgt_register_lib(&Desc); + + uint64_t ReqPtrArgOffset = 0; + int Rc = __tgt_activate_record_replay(DeviceId, DeviceMemorySize, BAllocStart, + false, VerifyOpt, ReqPtrArgOffset); + + if (Rc != OMP_TGT_SUCCESS) { + report_fatal_error("Cannot activate record replay\n"); + } + + ErrorOr<std::unique_ptr<MemoryBuffer>> DeviceMemoryMB = + MemoryBuffer::getFile(KernelEntryName + ".memory", /*isText=*/false, + /*RequiresNullTerminator=*/false); + + if (!DeviceMemoryMB) + report_fatal_error("Error reading the kernel input device memory."); + + // On AMD for currently unknown reasons we cannot copy memory mapped data to + // device. This is a work-around. + uint8_t *recored_data = new uint8_t[DeviceMemoryMB.get()->getBufferSize()]; + std::memcpy(recored_data, + const_cast<char *>(DeviceMemoryMB.get()->getBuffer().data()), + DeviceMemoryMB.get()->getBufferSize()); + + // If necessary, adjust pointer arguments. + if (ReqPtrArgOffset) { + for (auto *&Arg : TgtArgs) { + auto ArgInt = uintptr_t(Arg); + // Try to find pointer arguments. + if (ArgInt < uintptr_t(BAllocStart) || + ArgInt >= uintptr_t(BAllocStart) + DeviceMemorySize) + continue; + Arg = reinterpret_cast<void *>(ArgInt - ReqPtrArgOffset); + } + } + + __tgt_target_kernel_replay( + /*Loc=*/nullptr, DeviceId, KernelEntry.addr, (char *)recored_data, + DeviceMemoryMB.get()->getBufferSize(), TgtArgs.data(), + TgtArgOffsets.data(), NumArgs.value(), NumTeams, NumThreads, + LoopTripCount.value()); + + if (VerifyOpt) { + ErrorOr<std::unique_ptr<MemoryBuffer>> OriginalOutputMB = + MemoryBuffer::getFile(KernelEntryName + ".original.output", + /*isText=*/false, + /*RequiresNullTerminator=*/false); + if (!OriginalOutputMB) + report_fatal_error("Error reading the kernel original output file, make " + "sure LIBOMPTARGET_SAVE_OUTPUT is set when recording"); + ErrorOr<std::unique_ptr<MemoryBuffer>> ReplayOutputMB = + MemoryBuffer::getFile(KernelEntryName + ".replay.output", + /*isText=*/false, + /*RequiresNullTerminator=*/false); + if (!ReplayOutputMB) + report_fatal_error("Error reading the kernel replay output file"); + + StringRef OriginalOutput = OriginalOutputMB.get()->getBuffer(); + StringRef ReplayOutput = ReplayOutputMB.get()->getBuffer(); + if (OriginalOutput == ReplayOutput) + outs() << "[llvm-omp-kernel-replay] Replay device memory verified!\n"; + else + outs() << "[llvm-omp-kernel-replay] Replay device memory failed to " + "verify!\n"; + } + + delete[] recored_data; + + return 0; +} |
