Skip to content

Commit

Permalink
[GR-49418] Fix AArch64AtomicMove.
Browse files Browse the repository at this point in the history
PullRequest: graal/17601
  • Loading branch information
teshull committed Apr 26, 2024
2 parents c9c290b + b6e870a commit 5f68ad3
Showing 1 changed file with 54 additions and 51 deletions.
Expand Up @@ -111,7 +111,7 @@ public CompareAndSwapOp(AArch64Kind accessKind, MemoryOrderMode memoryOrder, boo
}

/**
* Both cas and ld(a)xr produce a zero-extended value. Since comparisons must be at minimum
* Both cas and ldxr produce a zero-extended value. Since comparisons must be at minimum
* 32-bits, the expected value must also be zero-extended to produce an accurate comparison.
*/
private static void emitCompare(AArch64MacroAssembler masm, int memAccessSize, Register result, Register expected) {
Expand Down Expand Up @@ -168,53 +168,64 @@ public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {
throw GraalError.shouldNotReachHereUnexpectedValue(memoryOrder); // ExcludeFromJacocoGeneratedReport
}

int moveSize = Math.max(memAccessSize, 32);
if (AArch64LIRFlags.useLSE(masm)) {
masm.mov(Math.max(memAccessSize, 32), result, expected);
masm.mov(moveSize, result, expected);
moveSPAndEmitCode(masm, asRegister(newValue), newVal -> {
masm.cas(memAccessSize, result, newVal, address, acquire, release);
});
if (setConditionFlags) {
emitCompare(masm, memAccessSize, result, expected);
}
} else {

try (ScratchRegister scratchRegister1 = masm.getScratchRegister(); ScratchRegister scratchRegister2 = masm.getScratchRegister()) {
Label retry = new Label();
masm.bind(retry);
Register scratch2 = scratchRegister2.getRegister();
Register newValueReg = asRegister(newValue);
if (newValueReg.equals(sp)) {
/*
* SP cannot be used in csel or stl(x)r.
*
* Since csel overwrites scratch2, newValue must be newly loaded each loop
* iteration. However, unless under heavy contention, the storeExclusive
* should rarely fail.
*/
masm.mov(moveSize, scratch2, newValueReg);
newValueReg = scratch2;
}
masm.loadExclusive(memAccessSize, result, address, false);

emitCompare(masm, memAccessSize, result, expected);
masm.csel(moveSize, scratch2, newValueReg, result, AArch64Assembler.ConditionFlag.EQ);

/*
* STLXR must be used also if acquire is set to ensure prior ldaxr/stlxr
* instructions are not reordered after it.
*/
Register scratch1 = scratchRegister1.getRegister();
masm.storeExclusive(memAccessSize, scratch1, scratch2, address, acquire || release);
// if scratch1 == 0 then write successful, else retry.
masm.cbnz(32, scratch1, retry);
}

/*
* Because the store is only conditionally emitted, a dmb is needed for performing a
* release.
*
* Furthermore, even if the stlxr is emitted, if both acquire and release semantics
* are required, then a dmb is anyways needed to ensure that the instruction
* sequence:
* From the Java perspective, the (ldxr, cmp, csel, stl(x)r) is a single atomic
* operation which must abide by all requested semantics. Therefore, when acquire
* semantics are needed, we use a full barrier after the store to guarantee that
* instructions following the store cannot execute before it and violate acquire
* semantics.
*
* A -> ldaxr -> stlxr -> B
*
* cannot be executed as:
*
* ldaxr -> B -> A -> stlxr
* Note we could instead perform a conditional branch and when the comparison fails
* skip the store, but this introduces an opportunity for branch mispredictions, and
* also, when release semantics are needed, requires a barrier to be inserted before
* the operation.
*/
if (release) {

if (acquire) {
masm.dmb(AArch64Assembler.BarrierKind.ANY_ANY);
}

moveSPAndEmitCode(masm, asRegister(newValue), newVal -> {
try (ScratchRegister scratchRegister = masm.getScratchRegister()) {
Register scratch = scratchRegister.getRegister();
Label retry = new Label();
Label fail = new Label();
masm.bind(retry);
masm.loadExclusive(memAccessSize, result, address, acquire);
emitCompare(masm, memAccessSize, result, expected);
masm.branchConditionally(AArch64Assembler.ConditionFlag.NE, fail);
/*
* Even with the prior dmb, for releases it is still necessary to use stlxr
* instead of stxr to guarantee subsequent lda(x)r/stl(x)r cannot be hoisted
* above this instruction and thereby violate volatile semantics.
*/
masm.storeExclusive(memAccessSize, scratch, newVal, address, release);
// if scratch == 0 then write successful, else retry.
masm.cbnz(32, scratch, retry);
masm.bind(fail);
}
});
}
}
}
Expand Down Expand Up @@ -290,14 +301,10 @@ public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {
}
}
/*
* Use a full barrier for the acquire semantics instead of ldaxr to guarantee that the
* instruction sequence:
*
* A -> ldaxr -> stlxr -> B
*
* cannot be executed as:
*
* ldaxr -> B -> A -> stlxr
* From the Java perspective, the (ldxr, add, stlxr) is a single atomic operation which
* must abide by both acquire and release semantics. Therefore, we use a full barrier
* after the store to guarantee that instructions following the store cannot execute
* before it and violate acquire semantics.
*/
masm.dmb(AArch64Assembler.BarrierKind.ANY_ANY);
}
Expand Down Expand Up @@ -405,14 +412,10 @@ public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {
// if scratch == 0 then write successful, else retry
masm.cbnz(32, scratch, retry);
/*
* Use a full barrier for the acquire semantics instead of ldaxr to
* guarantee that the instruction sequence:
*
* A -> ldaxr -> stlxr -> B
*
* cannot be executed as:
*
* ldaxr -> B -> A -> stlxr
* From the Java perspective, the (ldxr, stlxr) is a single atomic operation
* which must abide by both acquire and release semantics. Therefore, we use
* a full barrier after the store to guarantee that instructions following
* the store cannot execute before it and violate acquire semantics.
*/
masm.dmb(AArch64Assembler.BarrierKind.ANY_ANY);
}
Expand Down

0 comments on commit 5f68ad3

Please sign in to comment.