summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/NVPTX
diff options
context:
space:
mode:
authorArtem Belevich <tra@google.com>2018-05-09 23:46:19 +0000
committerArtem Belevich <tra@google.com>2018-05-09 23:46:19 +0000
commit09001f8a0d183db1935d37f9a4346b8bc893efdc (patch)
treed39d9a193cc3bc66673bcfb421ad7c113f6dc9bd /llvm/lib/Target/NVPTX
parent06d11cdb2166a75806e2fded41e3aff932c0400b (diff)
[NVPTX] Added a feature to use short pointers for const/local/shared AS.
Const/local/shared address spaces are all < 4GB and we can always use 32-bit pointers to access them. This has substantial performance impact on kernels that uses shared memory for intermediary results. The feature is disabled by default. Differential Revision: https://reviews.llvm.org/D46147
Diffstat (limited to 'llvm/lib/Target/NVPTX')
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp127
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h1
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp6
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXInstrInfo.td1
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXIntrinsics.td12
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXSubtarget.h1
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp18
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetMachine.h3
8 files changed, 108 insertions, 61 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 9538d795cc9..a6b7807b4c0 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -66,6 +66,10 @@ bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
return TL->allowUnsafeFPMath(*MF);
}
+bool NVPTXDAGToDAGISel::useShortPointers() const {
+ return TM.useShortPointers();
+}
+
/// Select - Select instructions not customized! Used for
/// expanded, promoted and normal instructions.
void NVPTXDAGToDAGISel::Select(SDNode *N) {
@@ -732,7 +736,6 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
unsigned DstAddrSpace = CastN->getDestAddressSpace();
-
assert(SrcAddrSpace != DstAddrSpace &&
"addrspacecast must be between different address spaces");
@@ -745,13 +748,19 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
break;
case ADDRESS_SPACE_SHARED:
- Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
+ Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_shared_yes_6432
+ : NVPTX::cvta_shared_yes_64)
+ : NVPTX::cvta_shared_yes;
break;
case ADDRESS_SPACE_CONST:
- Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
+ Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_const_yes_6432
+ : NVPTX::cvta_const_yes_64)
+ : NVPTX::cvta_const_yes;
break;
case ADDRESS_SPACE_LOCAL:
- Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
+ Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_local_yes_6432
+ : NVPTX::cvta_local_yes_64)
+ : NVPTX::cvta_local_yes;
break;
}
ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
@@ -769,16 +778,19 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
: NVPTX::cvta_to_global_yes;
break;
case ADDRESS_SPACE_SHARED:
- Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
+ Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_shared_yes_3264
+ : NVPTX::cvta_to_shared_yes_64)
: NVPTX::cvta_to_shared_yes;
break;
case ADDRESS_SPACE_CONST:
- Opc =
- TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
+ Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_const_yes_3264
+ : NVPTX::cvta_to_const_yes_64)
+ : NVPTX::cvta_to_const_yes;
break;
case ADDRESS_SPACE_LOCAL:
- Opc =
- TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
+ Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_local_yes_3264
+ : NVPTX::cvta_to_local_yes_64)
+ : NVPTX::cvta_to_local_yes;
break;
case ADDRESS_SPACE_PARAM:
Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
@@ -834,18 +846,20 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
return false;
// Address Space Setting
- unsigned int codeAddrSpace = getCodeAddrSpace(LD);
-
- if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
+ unsigned int CodeAddrSpace = getCodeAddrSpace(LD);
+ if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) {
return tryLDGLDU(N);
}
+ unsigned int PointerSize =
+ CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace());
+
// Volatile Setting
// - .volatile is only availalble for .global and .shared
bool isVolatile = LD->isVolatile();
- if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
- codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
- codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
+ if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
isVolatile = false;
// Type Setting: fromType + fromTypeWidth
@@ -892,27 +906,27 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
+ SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
getI32Imm(vecType, dl), getI32Imm(fromType, dl),
getI32Imm(fromTypeWidth, dl), Addr, Chain };
NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
MVT::Other, Ops);
- } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
- : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
+ } else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
+ : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi,
NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
+ SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
getI32Imm(vecType, dl), getI32Imm(fromType, dl),
getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
MVT::Other, Ops);
- } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
- : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
- if (TM.is64Bit())
+ } else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
+ : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
+ if (PointerSize == 64)
Opcode = pickOpcodeForVT(
TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64,
@@ -924,13 +938,13 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
+ SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
getI32Imm(vecType, dl), getI32Imm(fromType, dl),
getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
MVT::Other, Ops);
} else {
- if (TM.is64Bit())
+ if (PointerSize == 64)
Opcode = pickOpcodeForVT(
TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64,
@@ -943,7 +957,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
+ SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
getI32Imm(vecType, dl), getI32Imm(fromType, dl),
getI32Imm(fromTypeWidth, dl), N1, Chain };
NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
@@ -977,11 +991,13 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
// Address Space Setting
unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
-
if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
return tryLDGLDU(N);
}
+ unsigned int PointerSize =
+ CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace());
+
// Volatile Setting
// - .volatile is only availalble for .global and .shared
bool IsVolatile = MemSD->isVolatile();
@@ -1064,8 +1080,9 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
getI32Imm(VecType, DL), getI32Imm(FromType, DL),
getI32Imm(FromTypeWidth, DL), Addr, Chain };
LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
- } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
- : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
+ } else if (PointerSize == 64
+ ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
+ : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
switch (N->getOpcode()) {
default:
return false;
@@ -1090,9 +1107,10 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
getI32Imm(VecType, DL), getI32Imm(FromType, DL),
getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
- } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
- : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
- if (TM.is64Bit()) {
+ } else if (PointerSize == 64
+ ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
+ : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
+ if (PointerSize == 64) {
switch (N->getOpcode()) {
default:
return false;
@@ -1140,7 +1158,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
} else {
- if (TM.is64Bit()) {
+ if (PointerSize == 64) {
switch (N->getOpcode()) {
default:
return false;
@@ -1685,14 +1703,16 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
return false;
// Address Space Setting
- unsigned int codeAddrSpace = getCodeAddrSpace(ST);
+ unsigned int CodeAddrSpace = getCodeAddrSpace(ST);
+ unsigned int PointerSize =
+ CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace());
// Volatile Setting
// - .volatile is only availalble for .global and .shared
bool isVolatile = ST->isVolatile();
- if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
- codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
- codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
+ if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
isVolatile = false;
// Vector Setting
@@ -1735,12 +1755,12 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
if (!Opcode)
return false;
SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
- getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
+ getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl),
getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
Chain };
NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
- } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
- : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+ } else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
+ : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
@@ -1748,13 +1768,13 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
if (!Opcode)
return false;
SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
- getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
+ getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl),
getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
Offset, Chain };
NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
- } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
- : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
- if (TM.is64Bit())
+ } else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
+ : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+ if (PointerSize == 64)
Opcode = pickOpcodeForVT(
SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64,
@@ -1768,12 +1788,12 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
return false;
SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
- getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
+ getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl),
getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
Offset, Chain };
NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
} else {
- if (TM.is64Bit())
+ if (PointerSize == 64)
Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
@@ -1787,7 +1807,7 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
if (!Opcode)
return false;
SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
- getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
+ getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl),
getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
Chain };
NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
@@ -1816,11 +1836,12 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
// Address Space Setting
unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
-
if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
report_fatal_error("Cannot store to pointer that points to constant "
"memory space");
}
+ unsigned int PointerSize =
+ CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace());
// Volatile Setting
// - .volatile is only availalble for .global and .shared
@@ -1901,8 +1922,8 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
break;
}
StOps.push_back(Addr);
- } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
- : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+ } else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
+ : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
switch (N->getOpcode()) {
default:
return false;
@@ -1923,9 +1944,9 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
}
StOps.push_back(Base);
StOps.push_back(Offset);
- } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
- : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
- if (TM.is64Bit()) {
+ } else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
+ : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+ if (PointerSize == 64) {
switch (N->getOpcode()) {
default:
return false;
@@ -1968,7 +1989,7 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
StOps.push_back(Base);
StOps.push_back(Offset);
} else {
- if (TM.is64Bit()) {
+ if (PointerSize == 64) {
switch (N->getOpcode()) {
default:
return false;
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index b49e1573fff..9b16bd96b98 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -35,6 +35,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
bool useF32FTZ() const;
bool allowFMA() const;
bool allowUnsafeFPMath() const;
+ bool useShortPointers() const;
public:
explicit NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 159023170f9..6ccd9eb90e0 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -1233,9 +1233,9 @@ SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
SDValue
NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
- const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- auto PtrVT = getPointerTy(DAG.getDataLayout());
- Op = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
+ const GlobalAddressSDNode *GAN = cast<GlobalAddressSDNode>(Op);
+ auto PtrVT = getPointerTy(DAG.getDataLayout(), GAN->getAddressSpace());
+ Op = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, PtrVT);
return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op);
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 7b2bf386d62..443b077184c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -147,6 +147,7 @@ def hasPTX61 : Predicate<"Subtarget->getPTXVersion() >= 61">;
def hasSM30 : Predicate<"Subtarget->getSmVersion() >= 30">;
def hasSM70 : Predicate<"Subtarget->getSmVersion() >= 70">;
+def useShortPtr : Predicate<"useShortPointers()">;
def useFP16Math: Predicate<"Subtarget->allowFP16Math()">;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 66419f034f6..31bed350f38 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1937,6 +1937,12 @@ multiclass NG_TO_G<string Str, Intrinsic Intrin> {
def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
!strconcat("cvta.", Str, ".u64 \t$result, $src;"),
[(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
+ def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
+ "{{ .reg .b64 %tmp;\n\t"
+ #" cvt.u64.u32 \t%tmp, $src;\n\t"
+ #" cvta." # Str # ".u64 \t$result, %tmp; }}",
+ [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
+ Requires<[useShortPtr]>;
}
multiclass G_TO_NG<string Str, Intrinsic Intrin> {
@@ -1946,6 +1952,12 @@ multiclass G_TO_NG<string Str, Intrinsic Intrin> {
def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
!strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
[(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
+ def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
+ "{{ .reg .b64 %tmp;\n\t"
+ #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
+ #" cvt.u32.u64 \t$result, %tmp; }}",
+ [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
+ Requires<[useShortPtr]>;
}
defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index fa35673bbbb..7030fe52432 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -79,7 +79,6 @@ public:
bool hasImageHandles() const;
bool hasFP16Math() const { return SmVersion >= 53; }
bool allowFP16Math() const;
-
unsigned int getSmVersion() const { return SmVersion; }
std::string getTargetName() const { return TargetName; }
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index a3cd99e37cb..a1b160441df 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -52,6 +52,12 @@ static cl::opt<bool> DisableRequireStructuredCFG(
"unexpected regressions happen."),
cl::init(false), cl::Hidden);
+static cl::opt<bool> UseShortPointersOpt(
+ "nvptx-short-ptr",
+ cl::desc(
+ "Use 32-bit pointers for accessing const/local/shared address spaces."),
+ cl::init(false), cl::Hidden);
+
namespace llvm {
void initializeNVVMIntrRangePass(PassRegistry&);
@@ -83,11 +89,13 @@ extern "C" void LLVMInitializeNVPTXTarget() {
initializeNVPTXLowerAggrCopiesPass(PR);
}
-static std::string computeDataLayout(bool is64Bit) {
+static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
std::string Ret = "e";
if (!is64Bit)
Ret += "-p:32:32";
+ else if (UseShortPointers)
+ Ret += "-p3:32:32-p4:32:32-p5:32:32";
Ret += "-i64:64-i128:128-v16:16-v32:32-n16:32:64";
@@ -108,9 +116,11 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL, bool is64bit)
// The pic relocation model is used regardless of what the client has
// specified, as it is the only relocation model currently supported.
- : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options,
- Reloc::PIC_, getEffectiveCodeModel(CM), OL),
- is64bit(is64bit), TLOF(llvm::make_unique<NVPTXTargetObjectFile>()),
+ : LLVMTargetMachine(T, computeDataLayout(is64bit, UseShortPointersOpt), TT,
+ CPU, FS, Options, Reloc::PIC_,
+ getEffectiveCodeModel(CM), OL),
+ is64bit(is64bit), UseShortPointers(UseShortPointersOpt),
+ TLOF(llvm::make_unique<NVPTXTargetObjectFile>()),
Subtarget(TT, CPU, FS, *this) {
if (TT.getOS() == Triple::NVCL)
drvInterface = NVPTX::NVCL;
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
index eeebf64d39c..ca540b8e038 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -26,6 +26,8 @@ namespace llvm {
///
class NVPTXTargetMachine : public LLVMTargetMachine {
bool is64bit;
+ // Use 32-bit pointers for accessing const/local/short AS.
+ bool UseShortPointers;
std::unique_ptr<TargetLoweringObjectFile> TLOF;
NVPTX::DrvInterface drvInterface;
NVPTXSubtarget Subtarget;
@@ -45,6 +47,7 @@ public:
}
const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
bool is64Bit() const { return is64bit; }
+ bool useShortPointers() const { return UseShortPointers; }
NVPTX::DrvInterface getDrvInterface() const { return drvInterface; }
ManagedStringPool *getManagedStrPool() const {
return const_cast<ManagedStringPool *>(&ManagedStrPool);