Skip to content

Commit

Permalink
[CodeGen] Update for scalable MemoryType in MMO (#70452)
Browse files Browse the repository at this point in the history
Remove getSizeOrUnknown call when MachineMemOperand is created.  For Scalable
TypeSize, the MemoryType created becomes a scalable_vector.

2 MMOs that have scalable memory access can then use the updated BasicAA that
understands scalable LocationSize.

Original Patch by Harvin Iriawan
Co-authored-by: David Green <david.green@arm.com>
  • Loading branch information
harviniriawan committed Mar 23, 2024
1 parent f886dfe commit 57146da
Show file tree
Hide file tree
Showing 17 changed files with 165 additions and 118 deletions.
7 changes: 0 additions & 7 deletions llvm/include/llvm/Analysis/MemoryLocation.h
Original file line number Diff line number Diff line change
Expand Up @@ -297,13 +297,6 @@ class MemoryLocation {
return MemoryLocation(Ptr, LocationSize::beforeOrAfterPointer(), AATags);
}

// Return the exact size if the exact size is known at compiletime,
// otherwise return LocationSize::beforeOrAfterPointer().
static LocationSize getSizeOrUnknown(const TypeSize &T) {
return T.isScalable() ? LocationSize::beforeOrAfterPointer()
: LocationSize::precise(T.getFixedValue());
}

MemoryLocation() : Ptr(nullptr), Size(LocationSize::beforeOrAfterPointer()) {}

explicit MemoryLocation(const Value *Ptr, LocationSize Size,
Expand Down
5 changes: 3 additions & 2 deletions llvm/include/llvm/CodeGen/MachineFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -1058,8 +1058,9 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction {
int64_t Offset, LocationSize Size) {
return getMachineMemOperand(
MMO, Offset,
!Size.hasValue() || Size.isScalable()
? LLT()
!Size.hasValue() ? LLT()
: Size.isScalable()
? LLT::scalable_vector(1, 8 * Size.getValue().getKnownMinValue())
: LLT::scalar(8 * Size.getValue().getKnownMinValue()));
}
MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO,
Expand Down
34 changes: 25 additions & 9 deletions llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,14 +128,14 @@ bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1,
// vector objects on the stack.
// BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
// following situations arise:
if (PtrDiff >= 0 && Size1.hasValue()) {
if (PtrDiff >= 0 && Size1.hasValue() && !Size1.isScalable()) {
// [----BasePtr0----]
// [---BasePtr1--]
// ========PtrDiff========>
IsAlias = !((int64_t)Size1.getValue() <= PtrDiff);
return true;
}
if (PtrDiff < 0 && Size2.hasValue()) {
if (PtrDiff < 0 && Size2.hasValue() && !Size2.isScalable()) {
// [----BasePtr0----]
// [---BasePtr1--]
// =====(-PtrDiff)====>
Expand Down Expand Up @@ -248,10 +248,20 @@ bool GISelAddressing::instMayAlias(const MachineInstr &MI,
return false;
}

// If NumBytes is scalable and offset is not 0, conservatively return may
// alias
if ((MUC0.NumBytes.isScalable() && MUC0.Offset != 0) ||
(MUC1.NumBytes.isScalable() && MUC1.Offset != 0))
return true;

const bool BothNotScalable =
!MUC0.NumBytes.isScalable() && !MUC1.NumBytes.isScalable();

// Try to prove that there is aliasing, or that there is no aliasing. Either
// way, we can return now. If nothing can be proved, proceed with more tests.
bool IsAlias;
if (GISelAddressing::aliasIsKnownForLoadStore(MI, Other, IsAlias, MRI))
if (BothNotScalable &&
GISelAddressing::aliasIsKnownForLoadStore(MI, Other, IsAlias, MRI))
return IsAlias;

// The following all rely on MMO0 and MMO1 being valid.
Expand All @@ -267,12 +277,18 @@ bool GISelAddressing::instMayAlias(const MachineInstr &MI,
Size1.hasValue()) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
int64_t Overlap0 = Size0.getValue() + SrcValOffset0 - MinOffset;
int64_t Overlap1 = Size1.getValue() + SrcValOffset1 - MinOffset;
if (AA->isNoAlias(MemoryLocation(MUC0.MMO->getValue(), Overlap0,
MUC0.MMO->getAAInfo()),
MemoryLocation(MUC1.MMO->getValue(), Overlap1,
MUC1.MMO->getAAInfo())))
int64_t Overlap0 =
Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
int64_t Overlap1 =
Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
LocationSize Loc0 =
Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
LocationSize Loc1 =
Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);

if (AA->isNoAlias(
MemoryLocation(MUC0.MMO->getValue(), Loc0, MUC0.MMO->getAAInfo()),
MemoryLocation(MUC1.MMO->getValue(), Loc1, MUC1.MMO->getAAInfo())))
return false;
}

Expand Down
37 changes: 27 additions & 10 deletions llvm/lib/CodeGen/MachineInstr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1306,6 +1306,7 @@ static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA,
LocationSize WidthB = MMOb->getSize();
bool KnownWidthA = WidthA.hasValue();
bool KnownWidthB = WidthB.hasValue();
bool BothMMONonScalable = !WidthA.isScalable() && !WidthB.isScalable();

const Value *ValA = MMOa->getValue();
const Value *ValB = MMOb->getValue();
Expand All @@ -1321,12 +1322,14 @@ static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA,
SameVal = true;
}

if (SameVal) {
if (SameVal && BothMMONonScalable) {
if (!KnownWidthA || !KnownWidthB)
return true;
int64_t MaxOffset = std::max(OffsetA, OffsetB);
LocationSize LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB;
return (MinOffset + (int)LowWidth.getValue() > MaxOffset);
int64_t LowWidth = (MinOffset == OffsetA)
? WidthA.getValue().getKnownMinValue()
: WidthB.getValue().getKnownMinValue();
return (MinOffset + LowWidth > MaxOffset);
}

if (!AA)
Expand All @@ -1338,15 +1341,29 @@ static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA,
assert((OffsetA >= 0) && "Negative MachineMemOperand offset");
assert((OffsetB >= 0) && "Negative MachineMemOperand offset");

int64_t OverlapA = KnownWidthA ? WidthA.getValue() + OffsetA - MinOffset
: MemoryLocation::UnknownSize;
int64_t OverlapB = KnownWidthB ? WidthB.getValue() + OffsetB - MinOffset
: MemoryLocation::UnknownSize;
// If Scalable Location Size has non-zero offset, Width + Offset does not work
// at the moment
if ((WidthA.isScalable() && OffsetA > 0) ||
(WidthB.isScalable() && OffsetB > 0))
return true;

int64_t OverlapA =
KnownWidthA ? WidthA.getValue().getKnownMinValue() + OffsetA - MinOffset
: MemoryLocation::UnknownSize;
int64_t OverlapB =
KnownWidthB ? WidthB.getValue().getKnownMinValue() + OffsetB - MinOffset
: MemoryLocation::UnknownSize;

LocationSize LocA = (WidthA.isScalable() || !KnownWidthA)
? WidthA
: LocationSize::precise(OverlapA);
LocationSize LocB = (WidthB.isScalable() || !KnownWidthB)
? WidthB
: LocationSize::precise(OverlapB);

return !AA->isNoAlias(
MemoryLocation(ValA, OverlapA, UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
MemoryLocation(ValB, OverlapB,
UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
MemoryLocation(ValA, LocA, UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
MemoryLocation(ValB, LocB, UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
}

bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other,
Expand Down
13 changes: 7 additions & 6 deletions llvm/lib/CodeGen/MachineOperand.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1107,12 +1107,13 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags F,
const MDNode *Ranges, SyncScope::ID SSID,
AtomicOrdering Ordering,
AtomicOrdering FailureOrdering)
: MachineMemOperand(ptrinfo, F,
!TS.hasValue() || TS.isScalable()
? LLT()
: LLT::scalar(8 * TS.getValue().getKnownMinValue()),
BaseAlignment, AAInfo, Ranges, SSID, Ordering,
FailureOrdering) {}
: MachineMemOperand(
ptrinfo, F,
!TS.hasValue() ? LLT()
: TS.isScalable()
? LLT::scalable_vector(1, 8 * TS.getValue().getKnownMinValue())
: LLT::scalar(8 * TS.getValue().getKnownMinValue()),
BaseAlignment, AAInfo, Ranges, SSID, Ordering, FailureOrdering) {}

void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
// The Value and Offset may differ due to CSE. But the flags and size
Expand Down
51 changes: 32 additions & 19 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24200,7 +24200,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
// TODO: Use "BaseIndexOffset" to make this more effective.
SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);

LocationSize StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
LocationSize StoreSize = LocationSize::precise(VT.getStoreSize());
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MMO;
if (Offset.isScalable()) {
Expand Down Expand Up @@ -27845,14 +27845,10 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
: (LSN->getAddressingMode() == ISD::PRE_DEC)
? -1 * C->getSExtValue()
: 0;
LocationSize Size =
MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
return {LSN->isVolatile(),
LSN->isAtomic(),
LSN->getBasePtr(),
Offset /*base offset*/,
Size,
LSN->getMemOperand()};
TypeSize Size = LSN->getMemoryVT().getStoreSize();
return {LSN->isVolatile(), LSN->isAtomic(),
LSN->getBasePtr(), Offset /*base offset*/,
LocationSize::precise(Size), LSN->getMemOperand()};
}
if (const auto *LN = cast<LifetimeSDNode>(N))
return {false /*isVolatile*/,
Expand Down Expand Up @@ -27894,6 +27890,13 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
return false;
}

// If NumBytes is scalable and offset is not 0, conservatively return may
// alias
if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
MUC0.Offset != 0) ||
(MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
MUC1.Offset != 0))
return true;
// Try to prove that there is aliasing, or that there is no aliasing. Either
// way, we can return now. If nothing can be proved, proceed with more tests.
bool IsAlias;
Expand Down Expand Up @@ -27924,18 +27927,22 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
LocationSize Size0 = MUC0.NumBytes;
LocationSize Size1 = MUC1.NumBytes;

if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
Size0.hasValue() && Size1.hasValue() && Size0 == Size1 &&
OrigAlignment0 > Size0.getValue() &&
SrcValOffset0 % Size0.getValue() == 0 &&
SrcValOffset1 % Size1.getValue() == 0) {
Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
!Size1.isScalable() && Size0 == Size1 &&
OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 &&
SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) {
int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();

// There is no overlap between these relatively aligned accesses of
// similar size. Return no alias.
if ((OffAlign0 + (int64_t)Size0.getValue()) <= OffAlign1 ||
(OffAlign1 + (int64_t)Size1.getValue()) <= OffAlign0)
if ((OffAlign0 + static_cast<int64_t>(
Size0.getValue().getKnownMinValue())) <= OffAlign1 ||
(OffAlign1 + static_cast<int64_t>(
Size1.getValue().getKnownMinValue())) <= OffAlign0)
return false;
}

Expand All @@ -27952,12 +27959,18 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
Size0.hasValue() && Size1.hasValue()) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
int64_t Overlap0 = Size0.getValue() + SrcValOffset0 - MinOffset;
int64_t Overlap1 = Size1.getValue() + SrcValOffset1 - MinOffset;
int64_t Overlap0 =
Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
int64_t Overlap1 =
Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
LocationSize Loc0 =
Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
LocationSize Loc1 =
Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
if (AA->isNoAlias(
MemoryLocation(MUC0.MMO->getValue(), Overlap0,
MemoryLocation(MUC0.MMO->getValue(), Loc0,
UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
MemoryLocation(MUC1.MMO->getValue(), Overlap1,
MemoryLocation(MUC1.MMO->getValue(), Loc1,
UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
return false;
}
Expand Down
22 changes: 9 additions & 13 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8406,9 +8406,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(
EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment,
MachineMemOperand::Flags Flags, LocationSize Size,
const AAMDNodes &AAInfo) {
if (Size.hasValue() && MemVT.isScalableVector())
Size = LocationSize::beforeOrAfterPointer();
else if (Size.hasValue() && !Size.getValue())
if (Size.hasValue() && !Size.getValue())
Size = LocationSize::precise(MemVT.getStoreSize());

MachineFunction &MF = getMachineFunction();
Expand Down Expand Up @@ -8571,7 +8569,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
if (PtrInfo.V.isNull())
PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);

LocationSize Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize());
LocationSize Size = LocationSize::precise(MemVT.getStoreSize());
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size,
Alignment, AAInfo, Ranges);
Expand Down Expand Up @@ -8692,8 +8690,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);

MachineFunction &MF = getMachineFunction();
LocationSize Size =
MemoryLocation::getSizeOrUnknown(Val.getValueType().getStoreSize());
LocationSize Size = LocationSize::precise(Val.getValueType().getStoreSize());
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);
return getStore(Chain, dl, Val, Ptr, MMO);
Expand Down Expand Up @@ -8746,8 +8743,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,

MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()),
Alignment, AAInfo);
PtrInfo, MMOFlags, LocationSize::precise(SVT.getStoreSize()), Alignment,
AAInfo);
return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
}

Expand Down Expand Up @@ -8841,7 +8838,7 @@ SDValue SelectionDAG::getLoadVP(
if (PtrInfo.V.isNull())
PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);

LocationSize Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize());
LocationSize Size = LocationSize::precise(MemVT.getStoreSize());
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size,
Alignment, AAInfo, Ranges);
Expand Down Expand Up @@ -8994,8 +8991,8 @@ SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl,

MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()),
Alignment, AAInfo);
PtrInfo, MMOFlags, LocationSize::precise(SVT.getStoreSize()), Alignment,
AAInfo);
return getTruncStoreVP(Chain, dl, Val, Ptr, Mask, EVL, SVT, MMO,
IsCompressing);
}
Expand Down Expand Up @@ -11734,10 +11731,9 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
// We check here that the size of the memory operand fits within the size of
// the MMO. This is because the MMO might indicate only a possible address
// range instead of specifying the affected memory addresses precisely.
// TODO: Make MachineMemOperands aware of scalable vectors.
assert(
(!MMO->getType().isValid() ||
memvt.getStoreSize().getKnownMinValue() <= MMO->getSize().getValue()) &&
TypeSize::isKnownLE(memvt.getStoreSize(), MMO->getSize().getValue())) &&
"Size mismatch!");
}

Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,6 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0,
int64_t PtrDiff;
if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) {
// If the size of memory access is unknown, do not use it to analysis.
// One example of unknown size memory access is to load/store scalable
// vector objects on the stack.
// BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
// following situations arise:
if (PtrDiff >= 0 && NumBytes0.hasValue() && !NumBytes0.isScalable()) {
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4962,7 +4962,8 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(AS), MachineMemOperand::MOLoad,
LocationSize::beforeOrAfterPointer(), Alignment, I.getAAMetadata(), Ranges);
LocationSize::beforeOrAfterPointer(), Alignment, I.getAAMetadata(),
Ranges);

if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Expand Down
5 changes: 1 addition & 4 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2687,10 +2687,7 @@ bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
return false;
// The maximum vscale is 16 under AArch64, return the maximal extent for the
// vector.
Width = WidthN.isScalable()
? WidthN.getKnownMinValue() * AArch64::SVEMaxBitsPerVector /
AArch64::SVEBitsPerBlock
: WidthN.getKnownMinValue();
Width = LocationSize::precise(WidthN);
BaseOps.push_back(BaseOp);
return true;
}
Expand Down

0 comments on commit 57146da

Please sign in to comment.