summaryrefslogtreecommitdiff
path: root/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp')
-rw-r--r--offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp201
1 files changed, 201 insertions, 0 deletions
diff --git a/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp b/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp
new file mode 100644
index 000000000000..761e04e4c7bb
--- /dev/null
+++ b/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp
@@ -0,0 +1,201 @@
+//===- llvm-omp-kernel-replay.cpp - Replay OpenMP offload kernel ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a command line utility to replay the execution of recorded OpenMP
+// offload kernels.
+//
+//===----------------------------------------------------------------------===//
+
+#include "omptarget.h"
+
+#include "Shared/PluginAPI.h"
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <cstdint>
+#include <cstdlib>
+
+using namespace llvm;
+
+cl::OptionCategory ReplayOptions("llvm-omp-kernel-replay Options");
+
+// InputFilename - The filename to read the json description of the kernel.
+static cl::opt<std::string> InputFilename(cl::Positional,
+ cl::desc("<input kernel json file>"),
+ cl::Required);
+
+static cl::opt<bool> VerifyOpt(
+ "verify",
+ cl::desc(
+ "Verify device memory post execution against the original output."),
+ cl::init(false), cl::cat(ReplayOptions));
+
+static cl::opt<bool> SaveOutputOpt(
+ "save-output",
+ cl::desc("Save the device memory output of the replayed kernel execution."),
+ cl::init(false), cl::cat(ReplayOptions));
+
+static cl::opt<unsigned> NumTeamsOpt("num-teams",
+ cl::desc("Set the number of teams."),
+ cl::init(0), cl::cat(ReplayOptions));
+
+static cl::opt<unsigned> NumThreadsOpt("num-threads",
+ cl::desc("Set the number of threads."),
+ cl::init(0), cl::cat(ReplayOptions));
+
+static cl::opt<int32_t> DeviceIdOpt("device-id", cl::desc("Set the device id."),
+ cl::init(-1), cl::cat(ReplayOptions));
+
+int main(int argc, char **argv) {
+ cl::HideUnrelatedOptions(ReplayOptions);
+ cl::ParseCommandLineOptions(argc, argv, "llvm-omp-kernel-replay\n");
+
+ ErrorOr<std::unique_ptr<MemoryBuffer>> KernelInfoMB =
+ MemoryBuffer::getFile(InputFilename, /*isText=*/true,
+ /*RequiresNullTerminator=*/true);
+ if (!KernelInfoMB)
+ report_fatal_error("Error reading the kernel info json file");
+ Expected<json::Value> JsonKernelInfo =
+ json::parse(KernelInfoMB.get()->getBuffer());
+ if (auto Err = JsonKernelInfo.takeError())
+ report_fatal_error("Cannot parse the kernel info json file");
+
+ auto NumTeamsJson =
+ JsonKernelInfo->getAsObject()->getInteger("NumTeamsClause");
+ unsigned NumTeams = (NumTeamsOpt > 0 ? NumTeamsOpt : NumTeamsJson.value());
+ auto NumThreadsJson =
+ JsonKernelInfo->getAsObject()->getInteger("ThreadLimitClause");
+ unsigned NumThreads =
+ (NumThreadsOpt > 0 ? NumThreadsOpt : NumThreadsJson.value());
+ // TODO: Print a warning if number of teams/threads is explicitly set in the
+ // kernel info but overriden through command line options.
+ auto LoopTripCount =
+ JsonKernelInfo->getAsObject()->getInteger("LoopTripCount");
+ auto KernelFunc = JsonKernelInfo->getAsObject()->getString("Name");
+
+ SmallVector<void *> TgtArgs;
+ SmallVector<ptrdiff_t> TgtArgOffsets;
+ auto NumArgs = JsonKernelInfo->getAsObject()->getInteger("NumArgs");
+ auto *TgtArgsArray = JsonKernelInfo->getAsObject()->getArray("ArgPtrs");
+ for (auto It : *TgtArgsArray)
+ TgtArgs.push_back(reinterpret_cast<void *>(It.getAsInteger().value()));
+ auto *TgtArgOffsetsArray =
+ JsonKernelInfo->getAsObject()->getArray("ArgOffsets");
+ for (auto It : *TgtArgOffsetsArray)
+ TgtArgOffsets.push_back(static_cast<ptrdiff_t>(It.getAsInteger().value()));
+
+ void *BAllocStart = reinterpret_cast<void *>(
+ JsonKernelInfo->getAsObject()->getInteger("BumpAllocVAStart").value());
+
+ __tgt_offload_entry KernelEntry = {nullptr, nullptr, 0, 0, 0};
+ std::string KernelEntryName = KernelFunc.value().str();
+ KernelEntry.name = const_cast<char *>(KernelEntryName.c_str());
+ // Anything non-zero works to uniquely identify the kernel.
+ KernelEntry.addr = (void *)0x1;
+
+ ErrorOr<std::unique_ptr<MemoryBuffer>> ImageMB =
+ MemoryBuffer::getFile(KernelEntryName + ".image", /*isText=*/false,
+ /*RequiresNullTerminator=*/false);
+ if (!ImageMB)
+ report_fatal_error("Error reading the kernel image.");
+
+ __tgt_device_image DeviceImage;
+ DeviceImage.ImageStart = const_cast<char *>(ImageMB.get()->getBufferStart());
+ DeviceImage.ImageEnd = const_cast<char *>(ImageMB.get()->getBufferEnd());
+ DeviceImage.EntriesBegin = &KernelEntry;
+ DeviceImage.EntriesEnd = &KernelEntry + 1;
+
+ __tgt_bin_desc Desc;
+ Desc.NumDeviceImages = 1;
+ Desc.HostEntriesBegin = &KernelEntry;
+ Desc.HostEntriesEnd = &KernelEntry + 1;
+ Desc.DeviceImages = &DeviceImage;
+
+ auto DeviceMemorySizeJson =
+ JsonKernelInfo->getAsObject()->getInteger("DeviceMemorySize");
+ // Set device memory size to the ceiling of GB granularity.
+ uint64_t DeviceMemorySize = std::ceil(DeviceMemorySizeJson.value());
+
+ auto DeviceIdJson = JsonKernelInfo->getAsObject()->getInteger("DeviceId");
+ // TODO: Print warning if the user overrides the device id in the json file.
+ int32_t DeviceId = (DeviceIdOpt > -1 ? DeviceIdOpt : DeviceIdJson.value());
+
+ // TODO: do we need requires?
+ //__tgt_register_requires(/*Flags=*/1);
+
+ __tgt_register_lib(&Desc);
+
+ uint64_t ReqPtrArgOffset = 0;
+ int Rc = __tgt_activate_record_replay(DeviceId, DeviceMemorySize, BAllocStart,
+ false, VerifyOpt, ReqPtrArgOffset);
+
+ if (Rc != OMP_TGT_SUCCESS) {
+ report_fatal_error("Cannot activate record replay\n");
+ }
+
+ ErrorOr<std::unique_ptr<MemoryBuffer>> DeviceMemoryMB =
+ MemoryBuffer::getFile(KernelEntryName + ".memory", /*isText=*/false,
+ /*RequiresNullTerminator=*/false);
+
+ if (!DeviceMemoryMB)
+ report_fatal_error("Error reading the kernel input device memory.");
+
+ // On AMD for currently unknown reasons we cannot copy memory mapped data to
+ // device. This is a work-around.
+ uint8_t *recored_data = new uint8_t[DeviceMemoryMB.get()->getBufferSize()];
+ std::memcpy(recored_data,
+ const_cast<char *>(DeviceMemoryMB.get()->getBuffer().data()),
+ DeviceMemoryMB.get()->getBufferSize());
+
+ // If necessary, adjust pointer arguments.
+ if (ReqPtrArgOffset) {
+ for (auto *&Arg : TgtArgs) {
+ auto ArgInt = uintptr_t(Arg);
+ // Try to find pointer arguments.
+ if (ArgInt < uintptr_t(BAllocStart) ||
+ ArgInt >= uintptr_t(BAllocStart) + DeviceMemorySize)
+ continue;
+ Arg = reinterpret_cast<void *>(ArgInt - ReqPtrArgOffset);
+ }
+ }
+
+ __tgt_target_kernel_replay(
+ /*Loc=*/nullptr, DeviceId, KernelEntry.addr, (char *)recored_data,
+ DeviceMemoryMB.get()->getBufferSize(), TgtArgs.data(),
+ TgtArgOffsets.data(), NumArgs.value(), NumTeams, NumThreads,
+ LoopTripCount.value());
+
+ if (VerifyOpt) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> OriginalOutputMB =
+ MemoryBuffer::getFile(KernelEntryName + ".original.output",
+ /*isText=*/false,
+ /*RequiresNullTerminator=*/false);
+ if (!OriginalOutputMB)
+ report_fatal_error("Error reading the kernel original output file, make "
+ "sure LIBOMPTARGET_SAVE_OUTPUT is set when recording");
+ ErrorOr<std::unique_ptr<MemoryBuffer>> ReplayOutputMB =
+ MemoryBuffer::getFile(KernelEntryName + ".replay.output",
+ /*isText=*/false,
+ /*RequiresNullTerminator=*/false);
+ if (!ReplayOutputMB)
+ report_fatal_error("Error reading the kernel replay output file");
+
+ StringRef OriginalOutput = OriginalOutputMB.get()->getBuffer();
+ StringRef ReplayOutput = ReplayOutputMB.get()->getBuffer();
+ if (OriginalOutput == ReplayOutput)
+ outs() << "[llvm-omp-kernel-replay] Replay device memory verified!\n";
+ else
+ outs() << "[llvm-omp-kernel-replay] Replay device memory failed to "
+ "verify!\n";
+ }
+
+ delete[] recored_data;
+
+ return 0;
+}