diff options
Diffstat (limited to 'libc/benchmarks/automemcpy/lib/RandomFunctionGenerator.cpp')
| -rw-r--r-- | libc/benchmarks/automemcpy/lib/RandomFunctionGenerator.cpp | 280 |
1 files changed, 0 insertions, 280 deletions
diff --git a/libc/benchmarks/automemcpy/lib/RandomFunctionGenerator.cpp b/libc/benchmarks/automemcpy/lib/RandomFunctionGenerator.cpp deleted file mode 100644 index f438e2a405bd..000000000000 --- a/libc/benchmarks/automemcpy/lib/RandomFunctionGenerator.cpp +++ /dev/null @@ -1,280 +0,0 @@ -//===-- Generate random but valid function descriptors -------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "automemcpy/RandomFunctionGenerator.h" - -#include <llvm/ADT/StringRef.h> -#include <llvm/Support/raw_ostream.h> - -#include <optional> -#include <set> - -namespace llvm { -namespace automemcpy { - -// Exploration parameters -// ---------------------- -// Here we define a set of values that will contraint the exploration and -// limit combinatorial explosion. - -// We limit the number of cases for individual sizes to sizes up to 4. -// More individual sizes don't bring much over the overlapping strategy. -static constexpr int kMaxIndividualSize = 4; - -// We limit Overlapping Strategy to sizes up to 256. -// An overlap of 256B means accessing 128B at once which is usually not -// feasible by current CPUs. We rely on the compiler to generate multiple -// loads/stores if needed but higher sizes are unlikely to benefit from hardware -// acceleration. -static constexpr int kMaxOverlapSize = 256; - -// For the loop strategies, we make sure that they iterate at least a certain -// number of times to amortize the cost of looping. -static constexpr int kLoopMinIter = 3; -static constexpr int kAlignedLoopMinIter = 2; - -// We restrict the size of the block of data to handle in a loop. -// Generally speaking block size <= 16 perform poorly. -static constexpr int kLoopBlockSize[] = {16, 32, 64}; - -// We restrict alignment to the following values. -static constexpr int kLoopAlignments[] = {16, 32, 64}; - -// We make sure that the region bounds are one of the following values. -static constexpr int kAnchors[] = {0, 1, 2, 4, 8, 16, 32, 48, - 64, 96, 128, 256, 512, 1024, kMaxSize}; - -// We also allow disabling loops, aligned loops and accelerators. -static constexpr bool kDisableLoop = false; -static constexpr bool kDisableAlignedLoop = false; -static constexpr bool kDisableAccelerator = false; - -// For memcpy, we can also explore whether aligning on source or destination has -// an effect. -static constexpr bool kExploreAlignmentArg = true; - -// The function we generate code for. -// BCMP is specifically disabled for now. -static constexpr int kFunctionTypes[] = { - (int)FunctionType::MEMCPY, - (int)FunctionType::MEMCMP, - // (int)FunctionType::BCMP, - (int)FunctionType::MEMSET, - (int)FunctionType::BZERO, -}; - -// The actual implementation of each function can be handled via primitive types -// (SCALAR), vector types where available (NATIVE) or by the compiler (BUILTIN). -// We want to move toward delegating the code generation entirely to the -// compiler but for now we have to make use of -per microarchitecture- custom -// implementations. Scalar being more portable but also less performant, we -// remove it as well. -static constexpr int kElementClasses[] = { - // (int)ElementTypeClass::SCALAR, - (int)ElementTypeClass::NATIVE, - // (int)ElementTypeClass::BUILTIN -}; - -RandomFunctionGenerator::RandomFunctionGenerator() - : Solver(Context), Type(Context.int_const("Type")), - ContiguousBegin(Context.int_const("ContiguousBegin")), - ContiguousEnd(Context.int_const("ContiguousEnd")), - OverlapBegin(Context.int_const("OverlapBegin")), - OverlapEnd(Context.int_const("OverlapEnd")), - LoopBegin(Context.int_const("LoopBegin")), - LoopEnd(Context.int_const("LoopEnd")), - LoopBlockSize(Context.int_const("LoopBlockSize")), - AlignedLoopBegin(Context.int_const("AlignedLoopBegin")), - AlignedLoopEnd(Context.int_const("AlignedLoopEnd")), - AlignedLoopBlockSize(Context.int_const("AlignedLoopBlockSize")), - AlignedAlignment(Context.int_const("AlignedAlignment")), - AlignedArg(Context.int_const("AlignedArg")), - AcceleratorBegin(Context.int_const("AcceleratorBegin")), - AcceleratorEnd(Context.int_const("AcceleratorEnd")), - ElementClass(Context.int_const("ElementClass")) { - // All possible functions. - Solver.add(inSetConstraint(Type, kFunctionTypes)); - - // Add constraints for region bounds. - addBoundsAndAnchors(ContiguousBegin, ContiguousEnd); - addBoundsAndAnchors(OverlapBegin, OverlapEnd); - addBoundsAndAnchors(LoopBegin, LoopEnd); - addBoundsAndAnchors(AlignedLoopBegin, AlignedLoopEnd); - addBoundsAndAnchors(AcceleratorBegin, AcceleratorEnd); - // We always consider strategies in this order, and we - // always end with the `Accelerator` strategy, as it's typically more - // efficient for large sizes. - // Contiguous <= Overlap <= Loop <= AlignedLoop <= Accelerator - Solver.add(ContiguousEnd == OverlapBegin); - Solver.add(OverlapEnd == LoopBegin); - Solver.add(LoopEnd == AlignedLoopBegin); - Solver.add(AlignedLoopEnd == AcceleratorBegin); - // Fix endpoints: The minimum size that we want to copy is 0, and we always - // start with the `Contiguous` strategy. The max size is `kMaxSize`. - Solver.add(ContiguousBegin == 0); - Solver.add(AcceleratorEnd == kMaxSize); - // Contiguous - Solver.add(ContiguousEnd <= kMaxIndividualSize + 1); - // Overlap - Solver.add(OverlapEnd <= kMaxOverlapSize + 1); - // Overlap only ever makes sense when accessing multiple bytes at a time. - // i.e. Overlap<1> is useless. - Solver.add(OverlapBegin == OverlapEnd || OverlapBegin >= 2); - // Loop - addLoopConstraints(LoopBegin, LoopEnd, LoopBlockSize, kLoopMinIter); - // Aligned Loop - addLoopConstraints(AlignedLoopBegin, AlignedLoopEnd, AlignedLoopBlockSize, - kAlignedLoopMinIter); - Solver.add(inSetConstraint(AlignedAlignment, kLoopAlignments)); - Solver.add(AlignedLoopBegin == AlignedLoopEnd || AlignedLoopBegin >= 64); - Solver.add(AlignedLoopBlockSize >= AlignedAlignment); - Solver.add(AlignedLoopBlockSize >= LoopBlockSize); - z3::expr IsMemcpy = Type == (int)FunctionType::MEMCPY; - z3::expr ExploreAlignment = IsMemcpy && kExploreAlignmentArg; - Solver.add( - (ExploreAlignment && - inSetConstraint(AlignedArg, {(int)AlignArg::_1, (int)AlignArg::_2})) || - (!ExploreAlignment && AlignedArg == (int)AlignArg::_1)); - // Accelerator - Solver.add(IsMemcpy || - (AcceleratorBegin == - AcceleratorEnd)); // Only Memcpy has accelerator for now. - // Element classes - Solver.add(inSetConstraint(ElementClass, kElementClasses)); - - if (kDisableLoop) - Solver.add(LoopBegin == LoopEnd); - if (kDisableAlignedLoop) - Solver.add(AlignedLoopBegin == AlignedLoopEnd); - if (kDisableAccelerator) - Solver.add(AcceleratorBegin == AcceleratorEnd); -} - -// Creates SizeSpan from Begin/End values. -// Returns std::nullopt if Begin==End. -static std::optional<SizeSpan> AsSizeSpan(size_t Begin, size_t End) { - if (Begin == End) - return std::nullopt; - SizeSpan SS; - SS.Begin = Begin; - SS.End = End; - return SS; -} - -// Generic method to create a `Region` struct with a Span or std::nullopt if -// span is empty. -template <typename Region> -static std::optional<Region> As(size_t Begin, size_t End) { - if (auto Span = AsSizeSpan(Begin, End)) { - Region Output; - Output.Span = *Span; - return Output; - } - return std::nullopt; -} - -// Returns a Loop struct or std::nullopt if span is empty. -static std::optional<Loop> AsLoop(size_t Begin, size_t End, size_t BlockSize) { - if (auto Span = AsSizeSpan(Begin, End)) { - Loop Output; - Output.Span = *Span; - Output.BlockSize = BlockSize; - return Output; - } - return std::nullopt; -} - -// Returns an AlignedLoop struct or std::nullopt if span is empty. -static std::optional<AlignedLoop> AsAlignedLoop(size_t Begin, size_t End, - size_t BlockSize, - size_t Alignment, - AlignArg AlignTo) { - if (auto Loop = AsLoop(Begin, End, BlockSize)) { - AlignedLoop Output; - Output.Loop = *Loop; - Output.Alignment = Alignment; - Output.AlignTo = AlignTo; - return Output; - } - return std::nullopt; -} - -std::optional<FunctionDescriptor> RandomFunctionGenerator::next() { - if (Solver.check() != z3::sat) - return {}; - - z3::model m = Solver.get_model(); - - // Helper method to get the current numerical value of a z3::expr. - const auto E = [&m](z3::expr &V) -> int { - return m.eval(V).get_numeral_int(); - }; - - // Fill is the function descriptor to return. - FunctionDescriptor R; - R.Type = FunctionType(E(Type)); - R.Contiguous = As<Contiguous>(E(ContiguousBegin), E(ContiguousEnd)); - R.Overlap = As<Overlap>(E(OverlapBegin), E(OverlapEnd)); - R.Loop = AsLoop(E(LoopBegin), E(LoopEnd), E(LoopBlockSize)); - R.AlignedLoop = AsAlignedLoop(E(AlignedLoopBegin), E(AlignedLoopEnd), - E(AlignedLoopBlockSize), E(AlignedAlignment), - AlignArg(E(AlignedArg))); - R.Accelerator = As<Accelerator>(E(AcceleratorBegin), E(AcceleratorEnd)); - R.ElementClass = ElementTypeClass(E(ElementClass)); - - // Express current state as a set of constraints. - z3::expr CurrentLayout = - (Type == E(Type)) && (ContiguousBegin == E(ContiguousBegin)) && - (ContiguousEnd == E(ContiguousEnd)) && - (OverlapBegin == E(OverlapBegin)) && (OverlapEnd == E(OverlapEnd)) && - (LoopBegin == E(LoopBegin)) && (LoopEnd == E(LoopEnd)) && - (LoopBlockSize == E(LoopBlockSize)) && - (AlignedLoopBegin == E(AlignedLoopBegin)) && - (AlignedLoopEnd == E(AlignedLoopEnd)) && - (AlignedLoopBlockSize == E(AlignedLoopBlockSize)) && - (AlignedAlignment == E(AlignedAlignment)) && - (AlignedArg == E(AlignedArg)) && - (AcceleratorBegin == E(AcceleratorBegin)) && - (AcceleratorEnd == E(AcceleratorEnd)) && - (ElementClass == E(ElementClass)); - - // Ask solver to never show this configuration ever again. - Solver.add(!CurrentLayout); - return R; -} - -// Make sure `Variable` is one of the provided values. -z3::expr RandomFunctionGenerator::inSetConstraint(z3::expr &Variable, - ArrayRef<int> Values) const { - z3::expr_vector Args(Variable.ctx()); - for (int Value : Values) - Args.push_back(Variable == Value); - return z3::mk_or(Args); -} - -void RandomFunctionGenerator::addBoundsAndAnchors(z3::expr &Begin, - z3::expr &End) { - // Begin and End are picked amongst a set of predefined values. - Solver.add(inSetConstraint(Begin, kAnchors)); - Solver.add(inSetConstraint(End, kAnchors)); - Solver.add(Begin >= 0); - Solver.add(Begin <= End); - Solver.add(End <= kMaxSize); -} - -void RandomFunctionGenerator::addLoopConstraints(const z3::expr &LoopBegin, - const z3::expr &LoopEnd, - z3::expr &LoopBlockSize, - int LoopMinIter) { - Solver.add(inSetConstraint(LoopBlockSize, kLoopBlockSize)); - Solver.add(LoopBegin == LoopEnd || - (LoopBegin > (LoopMinIter * LoopBlockSize))); -} - -} // namespace automemcpy -} // namespace llvm |
