18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/DenseSet.h"
20#include "llvm/ADT/Sequence.h"
21#include "llvm/ADT/SetOperations.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/StringRef.h"
24#include "llvm/ADT/iterator_range.h"
25#include "llvm/Analysis/TargetTransformInfo.h"
26#include "llvm/IR/DataLayout.h"
27#include "llvm/IR/Function.h"
28#include "llvm/IR/Module.h"
29#include "llvm/Support/CommandLine.h"
30#include "llvm/Support/Debug.h"
31#include "llvm/Support/TypeSize.h"
32#include "llvm/Support/raw_ostream.h"
46#define DEBUG_TYPE "polly-opt-isl"
56 "polly-target-latency-vector-fma",
57 cl::desc(
"The minimal number of cycles between issuing two "
58 "dependent consecutive vector fused multiply-add "
63 "polly-target-throughput-vector-fma",
64 cl::desc(
"A throughput of the processor floating-point arithmetic units "
65 "expressed in the number of vector fused multiply-add "
66 "instructions per clock cycle."),
70 "polly-target-1st-cache-level-size",
71 cl::desc(
"The size of the first cache level specified in bytes."),
75 "polly-target-1st-cache-level-default-size",
76 cl::desc(
"The default size of the first cache level specified in bytes"
77 " (if not enough were provided by the TargetTransformInfo)."),
81 "polly-target-2nd-cache-level-size",
82 cl::desc(
"The size of the second level specified in bytes."), cl::Hidden,
86 "polly-target-2nd-cache-level-default-size",
87 cl::desc(
"The default size of the second cache level specified in bytes"
88 " (if not enough were provided by the TargetTransformInfo)."),
99 "polly-target-1st-cache-level-associativity",
100 cl::desc(
"The associativity of the first cache level."), cl::Hidden,
104 "polly-target-1st-cache-level-default-associativity",
105 cl::desc(
"The default associativity of the first cache level"
106 " (if not enough were provided by the TargetTransformInfo)."),
110 "polly-target-2nd-cache-level-associativity",
111 cl::desc(
"The associativity of the second cache level."), cl::Hidden,
115 "polly-target-2nd-cache-level-default-associativity",
116 cl::desc(
"The default associativity of the second cache level"
117 " (if not enough were provided by the TargetTransformInfo)."),
121 "polly-target-vector-register-bitwidth",
122 cl::desc(
"The size in bits of a vector register (if not set, this "
123 "information is taken from LLVM's target information."),
127 "polly-pattern-matching-nc-quotient",
128 cl::desc(
"Quotient that is obtained by dividing Nc, the parameter of the"
129 "macro-kernel, by Nr, the parameter of the micro-kernel"),
134 cl::desc(
"Perform optimizations of tensor contractions based "
135 "on pattern matching"),
140 cl::desc(
"Perform optimizations of matrix multiplications "
141 "based on pattern matching"),
145 "polly-tc-dependences-computeout",
146 cl::desc(
"Bound the dependence analysis by a maximal amount of "
147 "computational steps (0 means no bound)"),
148 cl::Hidden, cl::init(500000), cl::ZeroOrMore, cl::cat(
PollyCategory));
155struct MicroKernelParamsTy {
164struct MacroKernelParamsTy {
232 SmallDenseSet<int> I;
233 SmallDenseSet<int> J;
238 SmallDenseSet<int> P;
246 SmallVector<int> DimensionSizes;
247 SmallVector<int> ADimensions;
248 SmallVector<int> BDimensions;
249 SmallVector<int> CDimensions;
255 SmallVector<int> OrderedI;
256 SmallVector<int> OrderedJ;
257 SmallVector<int> OrderedP;
270 UnrollIsolatedSetOption =
272 UnrollIsolatedSetOption =
274 return UnrollIsolatedSetOption.
wrap();
288 unsigned DstPos,
unsigned SrcPos) {
291 if (DstPos == SrcPos)
300 auto MaxDim = std::max(DstPos, SrcPos);
301 auto MinDim = std::min(DstPos, SrcPos);
302 Map = Map.
move_dims(FreeDim, 0, DimType, MaxDim, 1);
303 Map = Map.
move_dims(FreeDim, 0, DimType, MinDim, 1);
304 Map = Map.
move_dims(DimType, MinDim, FreeDim, 1, 1);
305 Map = Map.
move_dims(DimType, MaxDim, FreeDim, 0, 1);
340 int FirstDims[] = {0, 0, 1, 1, 2, 2};
341 int SecondDims[] = {1, 2, 2, 0, 0, 1};
342 for (
int i = 0; i < 6; i += 1) {
343 auto PossibleMatMul =
355 if (AccMap.
is_equal(PossibleMatMul)) {
356 if (FirstPos != -1 && FirstPos != FirstDims[i])
358 FirstPos = FirstDims[i];
359 if (SecondPos != -1 && SecondPos != SecondDims[i])
361 SecondPos = SecondDims[i];
380static bool isMatMulNonScalarReadAccess(
MemoryAccess *MemAccess,
386 if (isMatMulOperandAcc(StmtDomain, AccMap, MMI.i, MMI.j) && !MMI.ReadFromC) {
387 MMI.ReadFromC = MemAccess;
390 if (isMatMulOperandAcc(StmtDomain, AccMap, MMI.i, MMI.k) && !MMI.A) {
394 if (isMatMulOperandAcc(StmtDomain, AccMap, MMI.k, MMI.j) && !MMI.B) {
414static bool containsOnlyMatrMultAcc(
isl::map PartialSchedule,
417 auto *Stmt =
static_cast<ScopStmt *
>(InputDimId.get_user());
419 assert(OutDimNum > 2 &&
"In case of the matrix multiplication the loop nest "
420 "and, consequently, the corresponding scheduling "
421 "functions have at least three dimensions.");
423 permuteDimensions(PartialSchedule,
isl::dim::out, MMI.i, OutDimNum - 1);
425 permuteDimensions(PartialSchedule,
isl::dim::out, MMI.j, OutDimNum - 1);
427 permuteDimensions(PartialSchedule,
isl::dim::out, MMI.k, OutDimNum - 1);
430 for (
auto *MemA = Accesses.begin(); MemA != Accesses.end() - 1; MemA++) {
431 auto *MemAccessPtr = *MemA;
432 if (MemAccessPtr->isLatestArrayKind() && MemAccessPtr != MMI.WriteToC &&
433 !isMatMulNonScalarReadAccess(MemAccessPtr, MMI) &&
434 !(MemAccessPtr->isStrideZero(MapI) &&
435 MemAccessPtr->isStrideZero(MapJ) && MemAccessPtr->isStrideZero(MapK)))
460 Dep = Dep.
unite(Red);
465 for (
int i = 0; i < DeltasDimNum; i++) {
467 Pos = Pos < 0 && Val.is_one() ? i : Pos;
468 if (Val.is_nan() || !(Val.is_zero() || (i == Pos && Val.is_one())))
471 if (DeltasDimNum == 0 || Pos < 0)
502 auto *Stmt =
static_cast<ScopStmt *
>(InputDimsId.get_user());
503 if (Stmt->size() <= 1)
507 for (
auto *MemA = Accesses.end() - 1; MemA != Accesses.begin(); MemA--) {
508 auto *MemAccessPtr = *MemA;
509 if (!MemAccessPtr->isLatestArrayKind())
511 if (!MemAccessPtr->isWrite())
513 auto AccMap = MemAccessPtr->getLatestAccessRelation();
514 if (!isMatMulOperandAcc(Stmt->getDomain(), AccMap, MMI.i, MMI.j))
516 MMI.WriteToC = MemAccessPtr;
520 if (!containsOnlyMatMulDep(PartialSchedule, D, MMI.k))
523 if (!MMI.WriteToC || !containsOnlyMatrMultAcc(PartialSchedule, MMI))
526 if (!MMI.A || !MMI.B || !MMI.ReadFromC)
540 unsigned SecondDim) {
543 std::max(FirstDim, SecondDim));
544 auto PartialSchedule =
546 auto PartialScheduleFirstDim = PartialSchedule.at(FirstDim);
547 auto PartialScheduleSecondDim = PartialSchedule.at(SecondDim);
549 PartialSchedule.set_union_pw_aff(SecondDim, PartialScheduleFirstDim);
551 PartialSchedule.set_union_pw_aff(FirstDim, PartialScheduleSecondDim);
558 MicroKernelParamsTy MicroKernelParams) {
562 return permuteBandNodeDimensions(Node, 0, 1).
child(0).
child(0);
577 MacroKernelParamsTy MacroKernelParams) {
579 if (MacroKernelParams.Mc == 1 && MacroKernelParams.Nc == 1 &&
580 MacroKernelParams.Kc == 1)
583 std::vector<int> TileSizes(DimOutNum, 1);
584 TileSizes[DimOutNum - 3] = MacroKernelParams.Mc;
585 TileSizes[DimOutNum - 2] = MacroKernelParams.Nc;
586 TileSizes[DimOutNum - 1] = MacroKernelParams.Kc;
587 Node =
tileNode(Node,
"1st level tiling", TileSizes, 1);
589 Node = permuteBandNodeDimensions(Node, DimOutNum - 2, DimOutNum - 1);
590 Node = permuteBandNodeDimensions(Node, DimOutNum - 3, DimOutNum - 1);
601static uint64_t getMatMulAlignTypeSize(
const MatMulInfoTy &MMI) {
602 auto *
S = MMI.A->getStatement()->getParent();
603 auto &DL =
S->getFunction().getParent()->getDataLayout();
604 auto ElementSizeA = DL.getTypeAllocSize(MMI.A->getElementType());
605 auto ElementSizeB = DL.getTypeAllocSize(MMI.B->getElementType());
606 auto ElementSizeC = DL.getTypeAllocSize(MMI.WriteToC->getElementType());
607 return std::max({ElementSizeA, ElementSizeB, ElementSizeC});
616static uint64_t getMatMulTypeSize(
const MatMulInfoTy &MMI) {
617 auto *
S = MMI.A->getStatement()->getParent();
618 auto &DL =
S->getFunction().getParent()->getDataLayout();
619 auto ElementSizeA = DL.getTypeSizeInBits(MMI.A->getElementType());
620 auto ElementSizeB = DL.getTypeSizeInBits(MMI.B->getElementType());
621 auto ElementSizeC = DL.getTypeSizeInBits(MMI.WriteToC->getElementType());
622 return std::max({ElementSizeA, ElementSizeB, ElementSizeC});
637static MicroKernelParamsTy getMicroKernelParams(
const TargetTransformInfo *TTI,
638 const MatMulInfoTy &MMI) {
639 assert(TTI &&
"The target transform info should be provided.");
645 if (RegisterBitwidth == -1)
647 TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector);
648 auto ElementSize = getMatMulTypeSize(MMI);
649 assert(ElementSize > 0 &&
"The element size of the matrix multiplication "
650 "operands should be greater than zero.");
651 auto Nvec = RegisterBitwidth / ElementSize;
664static void getTargetCacheParameters(
const llvm::TargetTransformInfo *TTI) {
665 auto L1DCache = llvm::TargetTransformInfo::CacheLevel::L1D;
666 auto L2DCache = llvm::TargetTransformInfo::CacheLevel::L2D;
668 if (TTI->getCacheSize(L1DCache))
674 if (TTI->getCacheSize(L2DCache))
680 if (TTI->getCacheAssociativity(L1DCache))
682 TTI->getCacheAssociativity(L1DCache).value();
688 if (TTI->getCacheAssociativity(L2DCache))
690 TTI->getCacheAssociativity(L2DCache).value();
712static MacroKernelParamsTy
713getMacroKernelParams(
const llvm::TargetTransformInfo *TTI,
714 const MicroKernelParamsTy &MicroKernelParams,
715 const MatMulInfoTy &MMI) {
716 getTargetCacheParameters(TTI);
722 if (!(MicroKernelParams.Mr > 0 && MicroKernelParams.Nr > 0 &&
731 (1 +
static_cast<double>(MicroKernelParams.Nr) / MicroKernelParams.Mr));
740 auto ElementSize = getMatMulAlignTypeSize(MMI);
741 assert(ElementSize > 0 &&
"The element size of the matrix multiplication "
742 "operands should be greater than zero.");
751 assert(Mc > 0 && Nc > 0 && Kc > 0 &&
752 "Matrix block sizes should be greater than zero");
783 unsigned SecondDim) {
784 auto AccessRelSpace =
isl::space(MapOldIndVar.
ctx(), 0, 9, 3);
801 MicroKernelParamsTy MicroParams,
802 MacroKernelParamsTy MacroParams,
808 unsigned FirstDimSize = MacroParams.Nc / MicroParams.Nr;
809 unsigned SecondDimSize = MacroParams.Kc;
810 unsigned ThirdDimSize = MicroParams.Nr;
812 S->createScopArrayInfo(MMI.B->getElementType(),
"Packed_B",
813 {FirstDimSize, SecondDimSize, ThirdDimSize});
816 isl::map AccRelB = MMI.B->getLatestAccessRelation();
817 isl::map AccRelPackedB = getMatMulAccRel(MapOldIndVar, 3, 7);
823 MMI.B->setNewAccessRelation(AccRelPackedB);
833 return createExtensionNode(Node, ExtMap);
838 MicroKernelParamsTy MicroParams,
839 MacroKernelParamsTy MacroParams,
847 unsigned FirstDimSize = MacroParams.Mc / MicroParams.Mr;
848 unsigned SecondDimSize = MacroParams.Kc;
849 unsigned ThirdDimSize = MicroParams.Mr;
851 MMI.A->getElementType(),
"Packed_A",
852 {FirstDimSize, SecondDimSize, ThirdDimSize});
855 isl::map AccRelA = MMI.A->getLatestAccessRelation();
856 isl::map AccRelPackedA = getMatMulAccRel(MapOldIndVar, 4, 6);
887 MMI.A->setNewAccessRelation(AccRelPackedA);
893 return createExtensionNode(Node, ExtScatterCopy);
930 MicroKernelParamsTy MicroParams,
931 MacroKernelParamsTy MacroParams,
939 Node = Node.
child(0);
941 optimizePackedB(Node, Stmt, MapOldIndVar, MicroParams, MacroParams, MMI);
943 Node = Node.
child(0);
945 optimizePackedA(Node, Stmt, MapOldIndVar, MicroParams, MacroParams, MMI);
964 MicroKernelParamsTy MicroKernelParams,
965 MacroKernelParamsTy MacroKernelParams) {
966 auto Child = Node.
child(0);
991 MicroKernelParamsTy MicroKernelParams) {
1005 Options = Options.
unite(getUnrollIsolatedSetOptions(
Ctx));
1044 auto PartialSchedulePwAff =
Domain.identity_union_pw_multi_aff();
1045 auto PartialScheduleMultiPwAff =
1047 PartialScheduleMultiPwAff =
1053 const TargetTransformInfo *TTI,
1054 MatMulInfoTy &MMI) {
1055 assert(TTI &&
"The target transform info should be provided.");
1057 assert(DimOutNum > 2 &&
"In case of the matrix multiplication the loop nest "
1058 "and, consequently, the corresponding scheduling "
1059 "functions have at least three dimensions.");
1060 Node = getBandNodeWithOriginDimOrder(Node);
1061 Node = permuteBandNodeDimensions(Node, MMI.i, DimOutNum - 3);
1062 int NewJ = MMI.j == DimOutNum - 3 ? MMI.i : MMI.j;
1063 int NewK = MMI.k == DimOutNum - 3 ? MMI.i : MMI.k;
1064 Node = permuteBandNodeDimensions(Node, NewJ, DimOutNum - 2);
1065 NewK = NewK == DimOutNum - 2 ? NewJ : NewK;
1066 Node = permuteBandNodeDimensions(Node, NewK, DimOutNum - 1);
1067 auto MicroKernelParams = getMicroKernelParams(TTI, MMI);
1068 auto MacroKernelParams = getMacroKernelParams(TTI, MicroKernelParams, MMI);
1069 Node = createMacroKernel(Node, MacroKernelParams);
1070 Node = createMicroKernel(Node, MicroKernelParams);
1071 if (MacroKernelParams.Mc == 1 || MacroKernelParams.Nc == 1 ||
1072 MacroKernelParams.Kc == 1)
1074 auto MapOldIndVar = getInductionVariablesSubstitution(Node, MicroKernelParams,
1078 Node = markLoopVectorizerDisabled(Node.
parent()).
child(0);
1079 Node = isolateAndUnrollMatMulInnerLoops(Node, MicroKernelParams);
1080 return optimizeDataLayoutMatrMulPattern(Node, MapOldIndVar, MicroKernelParams,
1081 MacroKernelParams, MMI);
1105 MatMulInfoTy &MMI) {
1113 if (containsMatrMult(NewPartialSchedule, D, MMI))
1127static int getDimSize(
const ScopArrayInfo *SAI,
unsigned Pos) {
1132 auto *ConstantDimSize = dyn_cast<const SCEVConstant>(SCEVDimSize);
1134 auto *IntDimSize = dyn_cast<ConstantInt>(ConstantDimSize->getValue());
1136 return IntDimSize->getSExtValue();
1150 ArrayRef<int> Dimensions) {
1160 for (
unsigned i = 0; i < Dimensions.size(); i++) {
1161 const int InPos = Dimensions[i];
1162 if ((InPos >=
static_cast<int>(DimInSize)) || (InPos < 0))
1174 return AccMap.
is_equal(PossibleTensor);
1191 SmallDenseSet<int> &IndexSet,
1192 SmallVectorImpl<int> &DimensionSizes,
1193 SmallVectorImpl<int> &Dimensions) {
1196 assert(SAI &&
"AccMap should represent memory access");
1217 for (
unsigned i = 0; i < OutDimNum; i++)
1221 Dimensions.assign(OutDimNum, -1);
1228 if (ValAPInt.isSignedIntN(32))
1229 OutPos = ValAPInt.getSExtValue();
1230 if ((OutPos < 0) || (OutPos >=
static_cast<int>(OutDimNum)) ||
1234 Dimensions[OutPos] = i;
1235 if (DimensionSizes[i] <= 0)
1236 DimensionSizes[i] = getDimSize(SAI, OutPos);
1239 return isCorrectAccessMap(
Domain, AccMap, Dimensions);
1248static SmallDenseSet<int>
intersect(
const SmallDenseSet<int> &
A,
1249 const SmallDenseSet<int> &
B) {
1250 SmallDenseSet<int> Intersection =
A;
1251 set_intersect(Intersection,
B);
1252 return Intersection;
1261static bool isSuperset(
const SmallDenseSet<int> &
A,
1262 const SmallDenseSet<int> &
B) {
1272static SmallDenseSet<int> unite(
const SmallDenseSet<int> &
A,
1273 const SmallDenseSet<int> &
B) {
1274 SmallDenseSet<int> Union =
A;
1275 set_union(Union,
B);
1290 SmallDenseSet<int> &IandJIndexSet) {
1291 TCI.WriteToC =
nullptr;
1295 if (!MemA->isLatestArrayKind())
1298 if (!MemA->isWrite())
1301 isl::map AccMap = MemA->getLatestAccessRelation();
1302 if (!isTCOperandAcc(
Domain, AccMap, IandJIndexSet, TCI.DimensionSizes,
1322 const SmallDenseSet<int> &IndexSet,
1323 const SmallDenseSet<int> &IandJIndexSet,
1324 ArrayRef<int> Dimensions, TCInfoTy &TCI) {
1327 if (!isSuperset(IndexSet, TCI.P))
1331 TCI.I = set_difference(IndexSet, TCI.P);
1332 if (!isSuperset(IandJIndexSet, TCI.I))
1336 TCI.J = set_difference(IandJIndexSet, TCI.I);
1339 TCI.A = MemAccessPtr;
1340 llvm::replace(TCI.ADimensions, TCI.ADimensions.begin(),
1341 TCI.ADimensions.end(), Dimensions.begin(), Dimensions.end());
1347 if (unite(TCI.P, TCI.J) != IndexSet)
1351 TCI.B = MemAccessPtr;
1352 llvm::replace(TCI.BDimensions, TCI.BDimensions.begin(),
1353 TCI.BDimensions.end(), Dimensions.begin(), Dimensions.end());
1371 SmallDenseSet<int> &IandJIndexSet) {
1374 TCI.ReadFromC =
nullptr;
1376 for (
auto *MemA = Accesses.begin(); *MemA != TCI.WriteToC; MemA++) {
1396 if (AccMap.
is_equal(TCI.WriteToC->getLatestAccessRelation())) {
1399 TCI.ReadFromC = MemAccessPtr;
1403 SmallVector<int> Dimensions;
1404 SmallDenseSet<int> IndexSet;
1405 if (!isTCOperandAcc(
Domain, AccMap, IndexSet, TCI.DimensionSizes,
1409 if (!setReadAccess(MemAccessPtr, IndexSet, IandJIndexSet, Dimensions, TCI))
1415 return TCI.ReadFromC && TCI.A && TCI.B;
1441 TCI.DimensionSizes.resize(DimNum);
1442 SmallDenseSet<int> IandJIndexSet;
1444 TCI.WriteToC = getWriteAccess(
Domain, Stmt, TCI, IandJIndexSet);
1448 if (
intersect(IandJIndexSet, TCI.P).size() != 0)
1451 if (!setReadAccesses(
Domain, Stmt, TCI, IandJIndexSet))
1479static bool isReductionCarriedOverDim(
isl::set DepDelta,
unsigned Dim,
1481 const SmallDenseSet<int> &IndexSet) {
1484 for (
unsigned i = 0; i < Dim; i += 1)
1503 if (!IndexSet.count(i)) {
1567 SmallDenseSet<int> &IndexSet) {
1572 for (
const auto It : IndexSet) {
1580 Domain.subtract(DomainRed));
1624 if (!isReductionCarriedOverDim(Intersection, i, BoundDeltas, IndexSet))
1628 DepDeltas = DepDeltas.
subtract(Intersection);
1636 return areDepsOverCompleteDomain(
Domain, DepsForStmt, UpperBound, IndexSet);
1669 if (!containsOnlyTcDeps(PartialSchedule, D, TCI.P,
Domain))
1673 if (TCI.P.size() == 0)
1676 if (!containsOnlyTCAcc(
Domain, PartialSchedule, TCI))
1680 if ((TCI.I.size() == 0) || (TCI.J.size() == 0))
1743 Node = Node.
child(0);
1815 if (containsTCInfoTy(PartialScheduleMap, D, TCI,
isl::set(
Domain)))
1825 const llvm::TargetTransformInfo *TTI,
1829 POLLY_DEBUG(dbgs() <<
"The tensor contraction pattern was detected\n");
1832 POLLY_DEBUG(dbgs() <<
"The matrix multiplication pattern was detected\n");
1833 return optimizeMatMulPattern(Node, TTI, MMI);
static cl::opt< int > OptComputeOut("polly-dependences-computeout", cl::desc("Bound the dependence analysis by a maximal amount of " "computational steps (0 means no bound)"), cl::Hidden, cl::init(500000), cl::cat(PollyCategory))
static cl::opt< int > FirstCacheLevelDefaultSize("polly-target-1st-cache-level-default-size", cl::desc("The default size of the first cache level specified in bytes" " (if not enough were provided by the TargetTransformInfo)."), cl::Hidden, cl::init(32768), cl::cat(PollyCategory))
static cl::opt< int > SecondCacheLevelDefaultAssociativity("polly-target-2nd-cache-level-default-associativity", cl::desc("The default associativity of the second cache level" " (if not enough were provided by the TargetTransformInfo)."), cl::Hidden, cl::init(8), cl::cat(PollyCategory))
static cl::opt< bool > PMBasedMMMOpts("polly-matmul-opt", cl::desc("Perform optimizations of matrix multiplications " "based on pattern matching"), cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory))
static cl::opt< int > FirstCacheLevelAssociativity("polly-target-1st-cache-level-associativity", cl::desc("The associativity of the first cache level."), cl::Hidden, cl::init(-1), cl::cat(PollyCategory))
static cl::opt< int > SecondCacheLevelDefaultSize("polly-target-2nd-cache-level-default-size", cl::desc("The default size of the second cache level specified in bytes" " (if not enough were provided by the TargetTransformInfo)."), cl::Hidden, cl::init(262144), cl::cat(PollyCategory))
static cl::opt< int > PollyPatternMatchingNcQuotient("polly-pattern-matching-nc-quotient", cl::desc("Quotient that is obtained by dividing Nc, the parameter of the" "macro-kernel, by Nr, the parameter of the micro-kernel"), cl::Hidden, cl::init(256), cl::cat(PollyCategory))
static cl::opt< bool > PMBasedTCOpts("polly-tc-opt", cl::desc("Perform optimizations of tensor contractions based " "on pattern matching"), cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory))
static cl::opt< int > FirstCacheLevelSize("polly-target-1st-cache-level-size", cl::desc("The size of the first cache level specified in bytes."), cl::Hidden, cl::init(-1), cl::cat(PollyCategory))
static cl::opt< int > ThroughputVectorFma("polly-target-throughput-vector-fma", cl::desc("A throughput of the processor floating-point arithmetic units " "expressed in the number of vector fused multiply-add " "instructions per clock cycle."), cl::Hidden, cl::init(1), cl::cat(PollyCategory))
static cl::opt< int > SecondCacheLevelSize("polly-target-2nd-cache-level-size", cl::desc("The size of the second level specified in bytes."), cl::Hidden, cl::init(-1), cl::cat(PollyCategory))
static cl::opt< int > FirstCacheLevelDefaultAssociativity("polly-target-1st-cache-level-default-associativity", cl::desc("The default associativity of the first cache level" " (if not enough were provided by the TargetTransformInfo)."), cl::Hidden, cl::init(8), cl::cat(PollyCategory))
static cl::opt< int > SecondCacheLevelAssociativity("polly-target-2nd-cache-level-associativity", cl::desc("The associativity of the second cache level."), cl::Hidden, cl::init(-1), cl::cat(PollyCategory))
static cl::opt< int > VectorRegisterBitwidth("polly-target-vector-register-bitwidth", cl::desc("The size in bits of a vector register (if not set, this " "information is taken from LLVM's target information."), cl::Hidden, cl::init(-1), cl::cat(PollyCategory))
static cl::opt< int > LatencyVectorFma("polly-target-latency-vector-fma", cl::desc("The minimal number of cycles between issuing two " "dependent consecutive vector fused multiply-add " "instructions."), cl::Hidden, cl::init(8), cl::cat(PollyCategory))
static cl::opt< int > OptComputeOut("polly-tc-dependences-computeout", cl::desc("Bound the dependence analysis by a maximal amount of " "computational steps (0 means no bound)"), cl::Hidden, cl::init(500000), cl::ZeroOrMore, cl::cat(PollyCategory))
llvm::cl::OptionCategory PollyCategory
__isl_give isl_set * isl_set_from_pw_multi_aff(__isl_take isl_pw_multi_aff *pma)
__isl_give isl_pw_multi_aff * isl_pw_multi_aff_from_set(__isl_take isl_set *set)
static isl::id alloc(isl::ctx ctx, const std::string &name, void *user)
isl::map equate(isl::dim type1, int pos1, isl::dim type2, int pos2) const
static isl::map universe(isl::space space)
isl::id get_tuple_id(isl::dim type) const
class size range_tuple_dim() const
isl::map set_tuple_id(isl::dim type, isl::id id) const
isl::map fix_si(isl::dim type, unsigned int pos, int value) const
isl::map intersect_range(isl::set set) const
isl::map apply_range(isl::map map2) const
static isl::map from_union_map(isl::union_map umap)
boolean is_equal(const isl::map &map2) const
isl::map apply_domain(isl::map map2) const
isl::map range_product(isl::map map2) const
class size dim(isl::dim type) const
isl::space get_space() const
__isl_keep isl_map * get() const
isl::map move_dims(isl::dim dst_type, unsigned int dst_pos, isl::dim src_type, unsigned int src_pos, unsigned int n) const
isl::map intersect_domain(isl::set set) const
isl::map project_out(isl::dim type, unsigned int first, unsigned int n) const
boolean has_tuple_id(isl::dim type) const
__isl_give isl_map * copy() const &
__isl_give isl_pw_multi_aff * copy() const &
isl::multi_pw_aff add(const isl::multi_pw_aff &multi2) const
__isl_give isl_pw_multi_aff * release()
isl::multi_pw_aff sub(const isl::multi_pw_aff &multi2) const
class size n_member() const
isl::union_set domain() const
isl::union_set get_universe_domain() const
class size get_schedule_depth() const
isl::schedule_node insert_mark(isl::id mark) const
isl::schedule_node child(int pos) const
__isl_give isl_schedule_node * release()
isl::schedule_node insert_partial_schedule(isl::multi_union_pw_aff schedule) const
isl::union_map get_prefix_schedule_relation() const
__isl_give isl_schedule_node * copy() const &
isl::union_map get_prefix_schedule_union_map() const
isl::schedule_node graft_before(isl::schedule_node graft) const
static isl::schedule_node from_extension(isl::union_map extension)
isl::schedule_node parent() const
__isl_keep isl_schedule_node * get() const
isl::set project_out(isl::dim type, unsigned int first, unsigned int n) const
isl::set intersect(isl::set set2) const
isl::set subtract(isl::set set2) const
static isl::set universe(isl::space space)
isl::set fix_si(isl::dim type, unsigned int pos, int value) const
__isl_give isl_set * copy() const &
boolean is_subset(const isl::set &set2) const
class size tuple_dim() const
isl::space get_space() const
class size dim(isl::dim type) const
isl::set add_dims(isl::dim type, unsigned int n) const
isl::val plain_get_val_if_fixed(isl::dim type, unsigned int pos) const
boolean is_equal(const isl::set &set2) const
isl::space map_from_domain_and_range(isl::space range) const
isl::space domain() const
class size dim(isl::dim type) const
isl::map extract_map(isl::space space) const
isl::union_map unite(isl::union_map umap2) const
isl::union_set unite(isl::union_set uset2) const
__isl_give isl_val * release()
The accumulated dependence information for a SCoP.
isl::union_map getDependences(int Kinds) const
Get the dependences of type Kinds.
Scoped limit of ISL operations.
Represent memory accesses in statements.
isl::map getLatestAccessRelation() const
Return the newest access relation of this access.
bool isLatestArrayKind() const
Whether storage memory is either an custom .s2a/.phiops alloca (false) or an existing pointer into an...
bool isWrite() const
Is this a write memory access?
bool isRead() const
Is this a read memory access?
ScopStmt * getStatement() const
Get the statement that contains this memory access.
A class to store information about arrays in the SCoP.
const SCEV * getDimensionSize(unsigned Dim) const
Return the size of dimension dim as SCEV*.
static const ScopArrayInfo * getFromId(isl::id Id)
Access the ScopArrayInfo associated with an isl Id.
isl::id getBasePtrId() const
Return the isl id for the base pointer.
isl::id getDomainId() const
Get the id of the iteration domain space.
bool isRegionStmt() const
Return true if this statement represents a whole region.
isl::set getDomain() const
Get the iteration domain of this ScopStmt.
void addScopStmt(BasicBlock *BB, StringRef Name, Loop *SurroundingLoop, std::vector< Instruction * > Instructions)
Create a new SCoP statement for BB.
ScopArrayInfo * createScopArrayInfo(Type *ElementType, const std::string &BaseName, const std::vector< unsigned > &Sizes)
Create an array and return the corresponding ScopArrayInfo object.
enum isl_schedule_node_type isl_schedule_node_get_type(__isl_keep isl_schedule_node *node)
boolean manage(isl_bool val)
This file contains the declaration of the PolyhedralInfo class, which will provide an interface to ex...
llvm::SmallVector< MemoryAccess *, 32 > getAccessesInOrder(ScopStmt &Stmt)
Return a vector that contains MemoryAccesses in the order in which they are executed.
@ Value
MemoryKind::Value: Models an llvm::Value.
isl::schedule_node applyRegisterTiling(isl::schedule_node Node, llvm::ArrayRef< int > TileSizes, int DefaultTileSize)
Tile a schedule node and unroll point loops.
isl::val getConstant(isl::pw_aff PwAff, bool Max, bool Min)
If PwAff maps to a constant, return said constant.
isl::map makeIdentityMap(const isl::set &Set, bool RestrictDomain)
Construct an identity map for the given domain values.
llvm::iota_range< unsigned > rangeIslSize(unsigned Begin, isl::size End)
Check that End is valid and return an iterator from Begin to End.
isl::schedule_node tryOptimizeMatMulPattern(isl::schedule_node Node, const llvm::TargetTransformInfo *TTI, const Dependences *D)
Apply the BLIS matmul optimization pattern if possible.
isl::union_set getIsolateOptions(isl::set IsolateDomain, unsigned OutDimsNum)
Create an isl::union_set, which describes the isolate option based on IsolateDomain.
isl::schedule_node tileNode(isl::schedule_node Node, const char *Identifier, llvm::ArrayRef< int > TileSizes, int DefaultTileSize)
Tile a schedule node.
isl::union_set getDimOptions(isl::ctx Ctx, const char *Option)
Create an isl::union_set, which describes the specified option for the dimension of the current node.
llvm::APInt APIntFromVal(__isl_take isl_val *Val)
Translate isl_val to llvm::APInt.
isl::set getPartialTilePrefixes(isl::set ScheduleRange, int VectorWidth)
Build the desired set of partial tile prefixes.
__isl_export isl_size isl_schedule_node_band_n_member(__isl_keep isl_schedule_node *node)
__isl_export __isl_give isl_multi_union_pw_aff * isl_schedule_node_band_get_partial_schedule(__isl_keep isl_schedule_node *node)
__isl_export __isl_give isl_schedule_node * isl_schedule_node_band_split(__isl_take isl_schedule_node *node, int pos)
__isl_give isl_union_map * isl_schedule_node_band_get_partial_schedule_union_map(__isl_keep isl_schedule_node *node)
__isl_give isl_schedule_node * isl_schedule_node_delete(__isl_take isl_schedule_node *node)
@ isl_schedule_node_filter
@ isl_schedule_node_domain
__isl_give isl_set * isl_set_fix_val(__isl_take isl_set *set, enum isl_dim_type type, unsigned pos, __isl_take isl_val *v)
static TupleKindPtr Domain("Domain")
static std::vector< std::string > intersect(const std::vector< std::string > &v1, const std::vector< std::string > &v2)
isl_size isl_union_map_n_map(__isl_keep isl_union_map *umap)
isl_size isl_union_set_n_set(__isl_keep isl_union_set *uset)