diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/aarch64/AArch64AtomicMove.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/aarch64/AArch64AtomicMove.java index c73b60ea673c..ebef4c4a76d5 100644 --- a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/aarch64/AArch64AtomicMove.java +++ b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/aarch64/AArch64AtomicMove.java @@ -111,7 +111,7 @@ public CompareAndSwapOp(AArch64Kind accessKind, MemoryOrderMode memoryOrder, boo } /** - * Both cas and ld(a)xr produce a zero-extended value. Since comparisons must be at minimum + * Both cas and ldxr produce a zero-extended value. Since comparisons must be at minimum * 32-bits, the expected value must also be zero-extended to produce an accurate comparison. */ private static void emitCompare(AArch64MacroAssembler masm, int memAccessSize, Register result, Register expected) { @@ -168,8 +168,9 @@ public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) { throw GraalError.shouldNotReachHereUnexpectedValue(memoryOrder); // ExcludeFromJacocoGeneratedReport } + int moveSize = Math.max(memAccessSize, 32); if (AArch64LIRFlags.useLSE(masm)) { - masm.mov(Math.max(memAccessSize, 32), result, expected); + masm.mov(moveSize, result, expected); moveSPAndEmitCode(masm, asRegister(newValue), newVal -> { masm.cas(memAccessSize, result, newVal, address, acquire, release); }); @@ -177,44 +178,54 @@ public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) { emitCompare(masm, memAccessSize, result, expected); } } else { + + try (ScratchRegister scratchRegister1 = masm.getScratchRegister(); ScratchRegister scratchRegister2 = masm.getScratchRegister()) { + Label retry = new Label(); + masm.bind(retry); + Register scratch2 = scratchRegister2.getRegister(); + Register newValueReg = asRegister(newValue); + if (newValueReg.equals(sp)) { + /* + * SP cannot be used in csel or stl(x)r. + * + * Since csel overwrites scratch2, newValue must be newly loaded each loop + * iteration. However, unless under heavy contention, the storeExclusive + * should rarely fail. + */ + masm.mov(moveSize, scratch2, newValueReg); + newValueReg = scratch2; + } + masm.loadExclusive(memAccessSize, result, address, false); + + emitCompare(masm, memAccessSize, result, expected); + masm.csel(moveSize, scratch2, newValueReg, result, AArch64Assembler.ConditionFlag.EQ); + + /* + * STLXR must be used also if acquire is set to ensure prior ldaxr/stlxr + * instructions are not reordered after it. + */ + Register scratch1 = scratchRegister1.getRegister(); + masm.storeExclusive(memAccessSize, scratch1, scratch2, address, acquire || release); + // if scratch1 == 0 then write successful, else retry. + masm.cbnz(32, scratch1, retry); + } + /* - * Because the store is only conditionally emitted, a dmb is needed for performing a - * release. - * - * Furthermore, even if the stlxr is emitted, if both acquire and release semantics - * are required, then a dmb is anyways needed to ensure that the instruction - * sequence: + * From the Java perspective, the (ldxr, cmp, csel, stl(x)r) is a single atomic + * operation which must abide by all requested semantics. Therefore, when acquire + * semantics are needed, we use a full barrier after the store to guarantee that + * instructions following the store cannot execute before it and violate acquire + * semantics. * - * A -> ldaxr -> stlxr -> B - * - * cannot be executed as: - * - * ldaxr -> B -> A -> stlxr + * Note we could instead perform a conditional branch and when the comparison fails + * skip the store, but this introduces an opportunity for branch mispredictions, and + * also, when release semantics are needed, requires a barrier to be inserted before + * the operation. */ - if (release) { + + if (acquire) { masm.dmb(AArch64Assembler.BarrierKind.ANY_ANY); } - - moveSPAndEmitCode(masm, asRegister(newValue), newVal -> { - try (ScratchRegister scratchRegister = masm.getScratchRegister()) { - Register scratch = scratchRegister.getRegister(); - Label retry = new Label(); - Label fail = new Label(); - masm.bind(retry); - masm.loadExclusive(memAccessSize, result, address, acquire); - emitCompare(masm, memAccessSize, result, expected); - masm.branchConditionally(AArch64Assembler.ConditionFlag.NE, fail); - /* - * Even with the prior dmb, for releases it is still necessary to use stlxr - * instead of stxr to guarantee subsequent lda(x)r/stl(x)r cannot be hoisted - * above this instruction and thereby violate volatile semantics. - */ - masm.storeExclusive(memAccessSize, scratch, newVal, address, release); - // if scratch == 0 then write successful, else retry. - masm.cbnz(32, scratch, retry); - masm.bind(fail); - } - }); } } } @@ -290,14 +301,10 @@ public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) { } } /* - * Use a full barrier for the acquire semantics instead of ldaxr to guarantee that the - * instruction sequence: - * - * A -> ldaxr -> stlxr -> B - * - * cannot be executed as: - * - * ldaxr -> B -> A -> stlxr + * From the Java perspective, the (ldxr, add, stlxr) is a single atomic operation which + * must abide by both acquire and release semantics. Therefore, we use a full barrier + * after the store to guarantee that instructions following the store cannot execute + * before it and violate acquire semantics. */ masm.dmb(AArch64Assembler.BarrierKind.ANY_ANY); } @@ -405,14 +412,10 @@ public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) { // if scratch == 0 then write successful, else retry masm.cbnz(32, scratch, retry); /* - * Use a full barrier for the acquire semantics instead of ldaxr to - * guarantee that the instruction sequence: - * - * A -> ldaxr -> stlxr -> B - * - * cannot be executed as: - * - * ldaxr -> B -> A -> stlxr + * From the Java perspective, the (ldxr, stlxr) is a single atomic operation + * which must abide by both acquire and release semantics. Therefore, we use + * a full barrier after the store to guarantee that instructions following + * the store cannot execute before it and violate acquire semantics. */ masm.dmb(AArch64Assembler.BarrierKind.ANY_ANY); }