Skip to content

Commit

Permalink
JIT: Move internal reserved registers to a side table (dotnet#101647)
Browse files Browse the repository at this point in the history
This gets rid of `GenTree::gtRsvdRegs` by moving internal registers to a side
table. We generally use internal registers very rarely, so making the lookup
more costly seems worth the trade off (especially to make it easier to expand
`regMaskTP` to 16 bytes).

There was one exception where we used internal registers a lot, which was
`GT_CALL` for R2R codegen on arm64/arm32. For those nodes we always allocate an
internal register to load the target into (the target is obtained by loading the
R2R indirection cell that is passed in an argument register).

For arm64 it was simple to avoid this internal register: we can simply use LR
always, since that register is going to be overwritten by the call anyway. This
results in -2% TP for crossgen2 arm64 just from avoiding building this extra
interval. This is also the cause of the asm diffs.

For arm32 the same strategy doesn't work as well because loading into LR is a 4
byte instruction while loading into other registers is a 2 byte instruction. So
for arm32 we still use an internal register and take the small throughput hit.

This change reduces JIT memory usage by ~1.5%. The throughput cost (when
discounting some spurious inlining decision changes) seems to be around 0.1%.
  • Loading branch information
jakobbotsch authored and michaelgsharp committed May 8, 2024
1 parent d50d190 commit bd88779
Show file tree
Hide file tree
Showing 23 changed files with 416 additions and 310 deletions.
24 changes: 12 additions & 12 deletions src/coreclr/jit/codegenarm.cpp
Expand Up @@ -280,7 +280,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
if (targetType == TYP_FLOAT)
{
// Get a temp integer register
regNumber tmpReg = tree->GetSingleTempReg();
regNumber tmpReg = internalRegisters.GetSingle(tree);

float f = forceCastToFloat(constValue);
instGen_Set_Reg_To_Imm(EA_4BYTE, tmpReg, *((int*)(&f)));
Expand All @@ -293,8 +293,8 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
unsigned* cv = (unsigned*)&constValue;

// Get two temp integer registers
regNumber tmpReg1 = tree->ExtractTempReg();
regNumber tmpReg2 = tree->GetSingleTempReg();
regNumber tmpReg1 = internalRegisters.Extract(tree);
regNumber tmpReg2 = internalRegisters.GetSingle(tree);

instGen_Set_Reg_To_Imm(EA_4BYTE, tmpReg1, cv[0]);
instGen_Set_Reg_To_Imm(EA_4BYTE, tmpReg2, cv[1]);
Expand Down Expand Up @@ -431,9 +431,9 @@ void CodeGen::genLclHeap(GenTree* tree)
}

// Setup the regTmp, if there is one.
if (tree->AvailableTempRegCount() > 0)
if (internalRegisters.Count(tree) > 0)
{
regTmp = tree->ExtractTempReg();
regTmp = internalRegisters.Extract(tree);
}

// If we have an outgoing arg area then we must adjust the SP by popping off the
Expand Down Expand Up @@ -833,7 +833,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_DST_BYREF, dstAddr->TypeGet());

// Temp register used to perform the sequence of loads and stores.
regNumber tmpReg = cpObjNode->ExtractTempReg();
regNumber tmpReg = internalRegisters.Extract(cpObjNode);
assert(genIsValidIntReg(tmpReg));

if (cpObjNode->IsVolatile())
Expand Down Expand Up @@ -1026,18 +1026,18 @@ void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree)
{
// Arm supports unaligned access only for integer types,
// convert the storing floating data into 1 or 2 integer registers and write them as int.
regNumber addr = tree->ExtractTempReg();
regNumber addr = internalRegisters.Extract(tree);
emit->emitIns_R_S(INS_lea, EA_PTRSIZE, addr, varNum, offset);
if (targetType == TYP_FLOAT)
{
regNumber floatAsInt = tree->GetSingleTempReg();
regNumber floatAsInt = internalRegisters.GetSingle(tree);
emit->emitIns_Mov(INS_vmov_f2i, EA_4BYTE, floatAsInt, dataReg, /* canSkip */ false);
emit->emitIns_R_R(INS_str, EA_4BYTE, floatAsInt, addr);
}
else
{
regNumber halfdoubleAsInt1 = tree->ExtractTempReg();
regNumber halfdoubleAsInt2 = tree->GetSingleTempReg();
regNumber halfdoubleAsInt1 = internalRegisters.Extract(tree);
regNumber halfdoubleAsInt2 = internalRegisters.GetSingle(tree);
emit->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, halfdoubleAsInt1, halfdoubleAsInt2, dataReg);
emit->emitIns_R_R_I(INS_str, EA_4BYTE, halfdoubleAsInt1, addr, 0);
emit->emitIns_R_R_I(INS_str, EA_4BYTE, halfdoubleAsInt1, addr, 4);
Expand Down Expand Up @@ -1209,7 +1209,7 @@ void CodeGen::genCkfinite(GenTree* treeNode)

emitter* emit = GetEmitter();
var_types targetType = treeNode->TypeGet();
regNumber intReg = treeNode->GetSingleTempReg();
regNumber intReg = internalRegisters.GetSingle(treeNode);
regNumber fpReg = genConsumeReg(treeNode->AsOp()->gtOp1);
regNumber targetReg = treeNode->GetRegNum();

Expand Down Expand Up @@ -1592,7 +1592,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)

genConsumeOperands(treeNode->AsOp());

regNumber tmpReg = treeNode->GetSingleTempReg();
regNumber tmpReg = internalRegisters.GetSingle(treeNode);

assert(insVcvt != INS_invalid);
GetEmitter()->emitIns_R_R(insVcvt, dstSize, tmpReg, op1->GetRegNum());
Expand Down
45 changes: 23 additions & 22 deletions src/coreclr/jit/codegenarm64.cpp
Expand Up @@ -2372,7 +2372,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
else
{
// Get a temp integer register to compute long address.
regNumber addrReg = tree->GetSingleTempReg();
regNumber addrReg = internalRegisters.GetSingle(tree);

// We must load the FP constant from the constant pool
// Emit a data section constant for the float or double constant.
Expand Down Expand Up @@ -2407,7 +2407,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
else
{
// Get a temp integer register to compute long address.
regNumber addrReg = tree->GetSingleTempReg();
regNumber addrReg = internalRegisters.GetSingle(tree);

simd8_t constValue;
memcpy(&constValue, &vecCon->gtSimdVal, sizeof(simd8_t));
Expand All @@ -2431,7 +2431,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
else
{
// Get a temp integer register to compute long address.
regNumber addrReg = tree->GetSingleTempReg();
regNumber addrReg = internalRegisters.GetSingle(tree);

simd16_t constValue = {};
memcpy(&constValue, &vecCon->gtSimdVal, sizeof(simd12_t));
Expand All @@ -2455,7 +2455,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
else
{
// Get a temp integer register to compute long address.
regNumber addrReg = tree->GetSingleTempReg();
regNumber addrReg = internalRegisters.GetSingle(tree);

simd16_t constValue;
memcpy(&constValue, &vecCon->gtSimdVal, sizeof(simd16_t));
Expand Down Expand Up @@ -3132,12 +3132,12 @@ void CodeGen::genLclHeap(GenTree* tree)
// since we don't need any internal registers.
if (compiler->info.compInitMem)
{
assert(tree->AvailableTempRegCount() == 0);
assert(internalRegisters.Count(tree) == 0);
regCnt = targetReg;
}
else
{
regCnt = tree->ExtractTempReg();
regCnt = internalRegisters.Extract(tree);
inst_Mov(size->TypeGet(), regCnt, targetReg, /* canSkip */ true);
}

Expand Down Expand Up @@ -3254,12 +3254,12 @@ void CodeGen::genLclHeap(GenTree* tree)
assert(regCnt == REG_NA);
if (compiler->info.compInitMem)
{
assert(tree->AvailableTempRegCount() == 0);
assert(internalRegisters.Count(tree) == 0);
regCnt = targetReg;
}
else
{
regCnt = tree->ExtractTempReg();
regCnt = internalRegisters.Extract(tree);
}
instGen_Set_Reg_To_Imm(((unsigned int)amount == amount) ? EA_4BYTE : EA_8BYTE, regCnt, amount);
}
Expand Down Expand Up @@ -3323,7 +3323,7 @@ void CodeGen::genLclHeap(GenTree* tree)
//

// Setup the regTmp
regNumber regTmp = tree->GetSingleTempReg();
regNumber regTmp = internalRegisters.GetSingle(tree);

BasicBlock* loop = genCreateTempLabel();
BasicBlock* done = genCreateTempLabel();
Expand Down Expand Up @@ -3668,7 +3668,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
unsigned slots = layout->GetSlotCount();

// Temp register(s) used to perform the sequence of loads and stores.
regNumber tmpReg = cpObjNode->ExtractTempReg(RBM_ALLINT);
regNumber tmpReg = internalRegisters.Extract(cpObjNode, RBM_ALLINT);
regNumber tmpReg2 = REG_NA;

assert(genIsValidIntReg(tmpReg));
Expand All @@ -3677,7 +3677,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)

if (slots > 1)
{
tmpReg2 = cpObjNode->ExtractTempReg(RBM_ALLINT);
tmpReg2 = internalRegisters.Extract(cpObjNode, RBM_ALLINT);
assert(tmpReg2 != tmpReg);
assert(genIsValidIntReg(tmpReg2));
assert(tmpReg2 != REG_WRITE_BARRIER_DST_BYREF);
Expand Down Expand Up @@ -3730,8 +3730,8 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
regNumber tmpSimdReg2 = REG_NA;
if ((slots >= 4) && compiler->IsBaselineSimdIsaSupported())
{
tmpSimdReg1 = cpObjNode->ExtractTempReg(RBM_ALLFLOAT);
tmpSimdReg2 = cpObjNode->ExtractTempReg(RBM_ALLFLOAT);
tmpSimdReg1 = internalRegisters.Extract(cpObjNode, RBM_ALLFLOAT);
tmpSimdReg2 = internalRegisters.Extract(cpObjNode, RBM_ALLFLOAT);
}

unsigned i = 0;
Expand Down Expand Up @@ -3810,7 +3810,7 @@ void CodeGen::genTableBasedSwitch(GenTree* treeNode)
regNumber idxReg = treeNode->AsOp()->gtOp1->GetRegNum();
regNumber baseReg = treeNode->AsOp()->gtOp2->GetRegNum();

regNumber tmpReg = treeNode->GetSingleTempReg();
regNumber tmpReg = internalRegisters.GetSingle(treeNode);

// load the ip-relative offset (which is relative to start of fgFirstBB)
GetEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, baseReg, baseReg, idxReg, INS_OPTS_LSL);
Expand Down Expand Up @@ -3869,7 +3869,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
case GT_XAND:
{
// Grab a temp reg to perform `MVN` for dataReg first.
regNumber tempReg = treeNode->GetSingleTempReg();
regNumber tempReg = internalRegisters.GetSingle(treeNode);
GetEmitter()->emitIns_R_R(INS_mvn, dataSize, tempReg, dataReg);
GetEmitter()->emitIns_R_R_R(INS_ldclral, dataSize, tempReg, (targetReg == REG_NA) ? REG_ZR : targetReg,
addrReg);
Expand Down Expand Up @@ -3902,9 +3902,10 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
// These are imported normally if Atomics aren't supported.
assert(!treeNode->OperIs(GT_XORR, GT_XAND));

regNumber exResultReg = treeNode->ExtractTempReg(RBM_ALLINT);
regNumber storeDataReg = (treeNode->OperGet() == GT_XCHG) ? dataReg : treeNode->ExtractTempReg(RBM_ALLINT);
regNumber loadReg = (targetReg != REG_NA) ? targetReg : storeDataReg;
regNumber exResultReg = internalRegisters.Extract(treeNode, RBM_ALLINT);
regNumber storeDataReg =
(treeNode->OperGet() == GT_XCHG) ? dataReg : internalRegisters.Extract(treeNode, RBM_ALLINT);
regNumber loadReg = (targetReg != REG_NA) ? targetReg : storeDataReg;

// Check allocator assumptions
//
Expand Down Expand Up @@ -4055,7 +4056,7 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode)
}
else
{
regNumber exResultReg = treeNode->ExtractTempReg(RBM_ALLINT);
regNumber exResultReg = internalRegisters.Extract(treeNode, RBM_ALLINT);

// Check allocator assumptions
//
Expand Down Expand Up @@ -4600,7 +4601,7 @@ void CodeGen::genCkfinite(GenTree* treeNode)
emitter* emit = GetEmitter();

// Extract exponent into a register.
regNumber intReg = treeNode->GetSingleTempReg();
regNumber intReg = internalRegisters.GetSingle(treeNode);
regNumber fpReg = genConsumeReg(op1);

inst_Mov(targetType, intReg, fpReg, /* canSkip */ false, emitActualTypeSize(treeNode));
Expand Down Expand Up @@ -5351,7 +5352,7 @@ void CodeGen::genStoreIndTypeSimd12(GenTreeStoreInd* treeNode)
regNumber dataReg = genConsumeReg(data);

// Need an additional integer register to extract upper 4 bytes from data.
regNumber tmpReg = treeNode->GetSingleTempReg();
regNumber tmpReg = internalRegisters.GetSingle(treeNode);

// 8-byte write
GetEmitter()->emitIns_R_R(INS_str, EA_8BYTE, dataReg, addrReg);
Expand Down Expand Up @@ -5386,7 +5387,7 @@ void CodeGen::genLoadIndTypeSimd12(GenTreeIndir* treeNode)
regNumber addrReg = genConsumeReg(addr);

// Need an additional int register to read upper 4 bytes, which is different from targetReg
regNumber tmpReg = treeNode->GetSingleTempReg();
regNumber tmpReg = internalRegisters.GetSingle(treeNode);

// 8-byte read
GetEmitter()->emitIns_R_R(INS_ldr, EA_8BYTE, tgtReg, addrReg);
Expand Down

0 comments on commit bd88779

Please sign in to comment.