summaryrefslogtreecommitdiff
path: root/libc/benchmarks/automemcpy/lib/RandomFunctionGenerator.cpp
diff options
context:
space:
mode:
authorMichael Kruse <llvm-project@meinersbur.de>2025-01-03 10:22:51 +0100
committerMichael Kruse <llvm-project@meinersbur.de>2025-01-03 10:22:51 +0100
commit38500d63e14ce340236840f60d356cdefb56a52c (patch)
tree17edbec446ce9b50d2f215a483b83afb293a635d /libc/benchmarks/automemcpy/lib/RandomFunctionGenerator.cpp
parent1a3d5daaef7a6a63448a497da3eff7fc9e23df26 (diff)
parent27f30029741ecf023baece7b3dde1ff9011ffefc (diff)
Merge branch 'main' into users/meinersbur/flang_runtime_split-headersusers/meinersbur/flang_runtime_split-headers
Diffstat (limited to 'libc/benchmarks/automemcpy/lib/RandomFunctionGenerator.cpp')
-rw-r--r--libc/benchmarks/automemcpy/lib/RandomFunctionGenerator.cpp280
1 files changed, 0 insertions, 280 deletions
diff --git a/libc/benchmarks/automemcpy/lib/RandomFunctionGenerator.cpp b/libc/benchmarks/automemcpy/lib/RandomFunctionGenerator.cpp
deleted file mode 100644
index f438e2a405bd..000000000000
--- a/libc/benchmarks/automemcpy/lib/RandomFunctionGenerator.cpp
+++ /dev/null
@@ -1,280 +0,0 @@
-//===-- Generate random but valid function descriptors -------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "automemcpy/RandomFunctionGenerator.h"
-
-#include <llvm/ADT/StringRef.h>
-#include <llvm/Support/raw_ostream.h>
-
-#include <optional>
-#include <set>
-
-namespace llvm {
-namespace automemcpy {
-
-// Exploration parameters
-// ----------------------
-// Here we define a set of values that will contraint the exploration and
-// limit combinatorial explosion.
-
-// We limit the number of cases for individual sizes to sizes up to 4.
-// More individual sizes don't bring much over the overlapping strategy.
-static constexpr int kMaxIndividualSize = 4;
-
-// We limit Overlapping Strategy to sizes up to 256.
-// An overlap of 256B means accessing 128B at once which is usually not
-// feasible by current CPUs. We rely on the compiler to generate multiple
-// loads/stores if needed but higher sizes are unlikely to benefit from hardware
-// acceleration.
-static constexpr int kMaxOverlapSize = 256;
-
-// For the loop strategies, we make sure that they iterate at least a certain
-// number of times to amortize the cost of looping.
-static constexpr int kLoopMinIter = 3;
-static constexpr int kAlignedLoopMinIter = 2;
-
-// We restrict the size of the block of data to handle in a loop.
-// Generally speaking block size <= 16 perform poorly.
-static constexpr int kLoopBlockSize[] = {16, 32, 64};
-
-// We restrict alignment to the following values.
-static constexpr int kLoopAlignments[] = {16, 32, 64};
-
-// We make sure that the region bounds are one of the following values.
-static constexpr int kAnchors[] = {0, 1, 2, 4, 8, 16, 32, 48,
- 64, 96, 128, 256, 512, 1024, kMaxSize};
-
-// We also allow disabling loops, aligned loops and accelerators.
-static constexpr bool kDisableLoop = false;
-static constexpr bool kDisableAlignedLoop = false;
-static constexpr bool kDisableAccelerator = false;
-
-// For memcpy, we can also explore whether aligning on source or destination has
-// an effect.
-static constexpr bool kExploreAlignmentArg = true;
-
-// The function we generate code for.
-// BCMP is specifically disabled for now.
-static constexpr int kFunctionTypes[] = {
- (int)FunctionType::MEMCPY,
- (int)FunctionType::MEMCMP,
- // (int)FunctionType::BCMP,
- (int)FunctionType::MEMSET,
- (int)FunctionType::BZERO,
-};
-
-// The actual implementation of each function can be handled via primitive types
-// (SCALAR), vector types where available (NATIVE) or by the compiler (BUILTIN).
-// We want to move toward delegating the code generation entirely to the
-// compiler but for now we have to make use of -per microarchitecture- custom
-// implementations. Scalar being more portable but also less performant, we
-// remove it as well.
-static constexpr int kElementClasses[] = {
- // (int)ElementTypeClass::SCALAR,
- (int)ElementTypeClass::NATIVE,
- // (int)ElementTypeClass::BUILTIN
-};
-
-RandomFunctionGenerator::RandomFunctionGenerator()
- : Solver(Context), Type(Context.int_const("Type")),
- ContiguousBegin(Context.int_const("ContiguousBegin")),
- ContiguousEnd(Context.int_const("ContiguousEnd")),
- OverlapBegin(Context.int_const("OverlapBegin")),
- OverlapEnd(Context.int_const("OverlapEnd")),
- LoopBegin(Context.int_const("LoopBegin")),
- LoopEnd(Context.int_const("LoopEnd")),
- LoopBlockSize(Context.int_const("LoopBlockSize")),
- AlignedLoopBegin(Context.int_const("AlignedLoopBegin")),
- AlignedLoopEnd(Context.int_const("AlignedLoopEnd")),
- AlignedLoopBlockSize(Context.int_const("AlignedLoopBlockSize")),
- AlignedAlignment(Context.int_const("AlignedAlignment")),
- AlignedArg(Context.int_const("AlignedArg")),
- AcceleratorBegin(Context.int_const("AcceleratorBegin")),
- AcceleratorEnd(Context.int_const("AcceleratorEnd")),
- ElementClass(Context.int_const("ElementClass")) {
- // All possible functions.
- Solver.add(inSetConstraint(Type, kFunctionTypes));
-
- // Add constraints for region bounds.
- addBoundsAndAnchors(ContiguousBegin, ContiguousEnd);
- addBoundsAndAnchors(OverlapBegin, OverlapEnd);
- addBoundsAndAnchors(LoopBegin, LoopEnd);
- addBoundsAndAnchors(AlignedLoopBegin, AlignedLoopEnd);
- addBoundsAndAnchors(AcceleratorBegin, AcceleratorEnd);
- // We always consider strategies in this order, and we
- // always end with the `Accelerator` strategy, as it's typically more
- // efficient for large sizes.
- // Contiguous <= Overlap <= Loop <= AlignedLoop <= Accelerator
- Solver.add(ContiguousEnd == OverlapBegin);
- Solver.add(OverlapEnd == LoopBegin);
- Solver.add(LoopEnd == AlignedLoopBegin);
- Solver.add(AlignedLoopEnd == AcceleratorBegin);
- // Fix endpoints: The minimum size that we want to copy is 0, and we always
- // start with the `Contiguous` strategy. The max size is `kMaxSize`.
- Solver.add(ContiguousBegin == 0);
- Solver.add(AcceleratorEnd == kMaxSize);
- // Contiguous
- Solver.add(ContiguousEnd <= kMaxIndividualSize + 1);
- // Overlap
- Solver.add(OverlapEnd <= kMaxOverlapSize + 1);
- // Overlap only ever makes sense when accessing multiple bytes at a time.
- // i.e. Overlap<1> is useless.
- Solver.add(OverlapBegin == OverlapEnd || OverlapBegin >= 2);
- // Loop
- addLoopConstraints(LoopBegin, LoopEnd, LoopBlockSize, kLoopMinIter);
- // Aligned Loop
- addLoopConstraints(AlignedLoopBegin, AlignedLoopEnd, AlignedLoopBlockSize,
- kAlignedLoopMinIter);
- Solver.add(inSetConstraint(AlignedAlignment, kLoopAlignments));
- Solver.add(AlignedLoopBegin == AlignedLoopEnd || AlignedLoopBegin >= 64);
- Solver.add(AlignedLoopBlockSize >= AlignedAlignment);
- Solver.add(AlignedLoopBlockSize >= LoopBlockSize);
- z3::expr IsMemcpy = Type == (int)FunctionType::MEMCPY;
- z3::expr ExploreAlignment = IsMemcpy && kExploreAlignmentArg;
- Solver.add(
- (ExploreAlignment &&
- inSetConstraint(AlignedArg, {(int)AlignArg::_1, (int)AlignArg::_2})) ||
- (!ExploreAlignment && AlignedArg == (int)AlignArg::_1));
- // Accelerator
- Solver.add(IsMemcpy ||
- (AcceleratorBegin ==
- AcceleratorEnd)); // Only Memcpy has accelerator for now.
- // Element classes
- Solver.add(inSetConstraint(ElementClass, kElementClasses));
-
- if (kDisableLoop)
- Solver.add(LoopBegin == LoopEnd);
- if (kDisableAlignedLoop)
- Solver.add(AlignedLoopBegin == AlignedLoopEnd);
- if (kDisableAccelerator)
- Solver.add(AcceleratorBegin == AcceleratorEnd);
-}
-
-// Creates SizeSpan from Begin/End values.
-// Returns std::nullopt if Begin==End.
-static std::optional<SizeSpan> AsSizeSpan(size_t Begin, size_t End) {
- if (Begin == End)
- return std::nullopt;
- SizeSpan SS;
- SS.Begin = Begin;
- SS.End = End;
- return SS;
-}
-
-// Generic method to create a `Region` struct with a Span or std::nullopt if
-// span is empty.
-template <typename Region>
-static std::optional<Region> As(size_t Begin, size_t End) {
- if (auto Span = AsSizeSpan(Begin, End)) {
- Region Output;
- Output.Span = *Span;
- return Output;
- }
- return std::nullopt;
-}
-
-// Returns a Loop struct or std::nullopt if span is empty.
-static std::optional<Loop> AsLoop(size_t Begin, size_t End, size_t BlockSize) {
- if (auto Span = AsSizeSpan(Begin, End)) {
- Loop Output;
- Output.Span = *Span;
- Output.BlockSize = BlockSize;
- return Output;
- }
- return std::nullopt;
-}
-
-// Returns an AlignedLoop struct or std::nullopt if span is empty.
-static std::optional<AlignedLoop> AsAlignedLoop(size_t Begin, size_t End,
- size_t BlockSize,
- size_t Alignment,
- AlignArg AlignTo) {
- if (auto Loop = AsLoop(Begin, End, BlockSize)) {
- AlignedLoop Output;
- Output.Loop = *Loop;
- Output.Alignment = Alignment;
- Output.AlignTo = AlignTo;
- return Output;
- }
- return std::nullopt;
-}
-
-std::optional<FunctionDescriptor> RandomFunctionGenerator::next() {
- if (Solver.check() != z3::sat)
- return {};
-
- z3::model m = Solver.get_model();
-
- // Helper method to get the current numerical value of a z3::expr.
- const auto E = [&m](z3::expr &V) -> int {
- return m.eval(V).get_numeral_int();
- };
-
- // Fill is the function descriptor to return.
- FunctionDescriptor R;
- R.Type = FunctionType(E(Type));
- R.Contiguous = As<Contiguous>(E(ContiguousBegin), E(ContiguousEnd));
- R.Overlap = As<Overlap>(E(OverlapBegin), E(OverlapEnd));
- R.Loop = AsLoop(E(LoopBegin), E(LoopEnd), E(LoopBlockSize));
- R.AlignedLoop = AsAlignedLoop(E(AlignedLoopBegin), E(AlignedLoopEnd),
- E(AlignedLoopBlockSize), E(AlignedAlignment),
- AlignArg(E(AlignedArg)));
- R.Accelerator = As<Accelerator>(E(AcceleratorBegin), E(AcceleratorEnd));
- R.ElementClass = ElementTypeClass(E(ElementClass));
-
- // Express current state as a set of constraints.
- z3::expr CurrentLayout =
- (Type == E(Type)) && (ContiguousBegin == E(ContiguousBegin)) &&
- (ContiguousEnd == E(ContiguousEnd)) &&
- (OverlapBegin == E(OverlapBegin)) && (OverlapEnd == E(OverlapEnd)) &&
- (LoopBegin == E(LoopBegin)) && (LoopEnd == E(LoopEnd)) &&
- (LoopBlockSize == E(LoopBlockSize)) &&
- (AlignedLoopBegin == E(AlignedLoopBegin)) &&
- (AlignedLoopEnd == E(AlignedLoopEnd)) &&
- (AlignedLoopBlockSize == E(AlignedLoopBlockSize)) &&
- (AlignedAlignment == E(AlignedAlignment)) &&
- (AlignedArg == E(AlignedArg)) &&
- (AcceleratorBegin == E(AcceleratorBegin)) &&
- (AcceleratorEnd == E(AcceleratorEnd)) &&
- (ElementClass == E(ElementClass));
-
- // Ask solver to never show this configuration ever again.
- Solver.add(!CurrentLayout);
- return R;
-}
-
-// Make sure `Variable` is one of the provided values.
-z3::expr RandomFunctionGenerator::inSetConstraint(z3::expr &Variable,
- ArrayRef<int> Values) const {
- z3::expr_vector Args(Variable.ctx());
- for (int Value : Values)
- Args.push_back(Variable == Value);
- return z3::mk_or(Args);
-}
-
-void RandomFunctionGenerator::addBoundsAndAnchors(z3::expr &Begin,
- z3::expr &End) {
- // Begin and End are picked amongst a set of predefined values.
- Solver.add(inSetConstraint(Begin, kAnchors));
- Solver.add(inSetConstraint(End, kAnchors));
- Solver.add(Begin >= 0);
- Solver.add(Begin <= End);
- Solver.add(End <= kMaxSize);
-}
-
-void RandomFunctionGenerator::addLoopConstraints(const z3::expr &LoopBegin,
- const z3::expr &LoopEnd,
- z3::expr &LoopBlockSize,
- int LoopMinIter) {
- Solver.add(inSetConstraint(LoopBlockSize, kLoopBlockSize));
- Solver.add(LoopBegin == LoopEnd ||
- (LoopBegin > (LoopMinIter * LoopBlockSize)));
-}
-
-} // namespace automemcpy
-} // namespace llvm