Skip to content

Commit

Permalink
Rename isIntelBridgeFamily to isIntelERMSGoodCPU.
Browse files Browse the repository at this point in the history
Make sure ERMS repmovsb only works on aligned non-overlapped data copy.

Signed-off-by: TangYang <yang.tang@intel.com>
  • Loading branch information
cocotyty committed May 13, 2024
1 parent f3f5b0f commit 8c00c8b
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 24 deletions.
11 changes: 7 additions & 4 deletions src/runtime/cpuflags_amd64.go
Expand Up @@ -8,20 +8,23 @@ import (
"internal/cpu"
)

var useAVXmemmove bool
var (
useAVXmemmove bool
useERMS bool
)

func init() {
// Let's remove stepping and reserved fields
processor := processorVersionInfo & 0x0FFF3FF0

isIntelBridgeFamily := isIntel &&
isIntelERMSGoodCPU := isIntel &&
processor == 0x206A0 || // Sandy Bridge (Client)
processor == 0x206D0 || // Sandy Bridge (Server)
processor == 0x306A0 || // Ivy Bridge (Client)
processor == 0x306E0 || // Ivy Bridge (Server)
processor == 0x606A0 || // Ice Lake (Server) SP
processor == 0x606C0 || // Ice Lake (Server) DE
processor == 0x806F0 // Sapphire Rapids

useAVXmemmove = cpu.X86.HasAVX && !isIntelBridgeFamily
useERMS = isIntelERMSGoodCPU && cpu.X86.HasERMS
useAVXmemmove = cpu.X86.HasAVX
}
41 changes: 21 additions & 20 deletions src/runtime/memmove_amd64.s
Expand Up @@ -72,45 +72,43 @@ tail:
CMPQ BX, $256
JBE move_129through256

TESTB $1, runtime·useAVXmemmove(SB)
JNZ avxUnaligned

/*
* check and set for backwards
*/
CMPQ SI, DI
JLS back

/*
* forward copy loop
*/
* forward copy loop
*/
forward:
CMPQ BX, $2048
JLS move_256through2048
// ERMS is slow if destination address is unaligned.
TESTQ $15, DI
JZ check_avx

TESTB $1, runtime·useERMS(SB)
JNZ erms

// If REP MOVSB isn't fast, don't use it
CMPB internal∕cpu·X86+const_offsetX86HasERMS(SB), $1 // enhanced REP MOVSB/STOSB
JNE fwdBy8
check_avx:
TESTB $1, runtime·useAVXmemmove(SB)
JNZ avxUnaligned

// Check alignment
MOVL SI, AX
ORL DI, AX
TESTL $7, AX
JEQ fwdBy8

// Do 1 byte at a time
MOVQ BX, CX
REP; MOVSB
RET
CMPQ BX, $2048
JLS move_256through2048

fwdBy8:
// Do 8 bytes at a time
MOVQ BX, CX
SHRQ $3, CX
ANDQ $7, BX
REP; MOVSQ
JMP tail

erms:
MOVQ BX, CX
REP; MOVSB
RET

back:
/*
* check overlap
Expand All @@ -119,6 +117,9 @@ back:
ADDQ BX, CX
CMPQ CX, DI
JLS forward

TESTB $1, runtime·useAVXmemmove(SB)
JNZ avxUnaligned
/*
* whole thing backwards has
* adjusted addresses
Expand Down

0 comments on commit 8c00c8b

Please sign in to comment.