diff --git a/MdePkg/Library/BaseMemoryLibSse2/BaseMemoryLibSse2.inf b/MdePkg/Library/BaseMemoryLibSse2/BaseMemoryLibSse2.inf index b4c9a793cc..8aefe4fe15 100644 --- a/MdePkg/Library/BaseMemoryLibSse2/BaseMemoryLibSse2.inf +++ b/MdePkg/Library/BaseMemoryLibSse2/BaseMemoryLibSse2.inf @@ -109,6 +109,7 @@ X64/SetMem32.asm X64/SetMem16.nasm X64/SetMem16.asm + X64/SetMem.nasm X64/SetMem.asm X64/CopyMem.asm X64/ScanMem64.nasm @@ -129,6 +130,7 @@ X64/SetMem32.S X64/SetMem16.nasm X64/SetMem16.S + X64/SetMem.nasm X64/SetMem.S X64/CopyMem.S diff --git a/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem.nasm b/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem.nasm new file mode 100644 index 0000000000..1bb8ca6b45 --- /dev/null +++ b/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem.nasm @@ -0,0 +1,71 @@ +;------------------------------------------------------------------------------ +; +; Copyright (c) 2006, Intel Corporation. All rights reserved.
+; This program and the accompanying materials +; are licensed and made available under the terms and conditions of the BSD License +; which accompanies this distribution. The full text of the license may be found at +; http://opensource.org/licenses/bsd-license.php. +; +; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +; +; Module Name: +; +; SetMem.nasm +; +; Abstract: +; +; SetMem function +; +; Notes: +; +;------------------------------------------------------------------------------ + + DEFAULT REL + SECTION .text + +;------------------------------------------------------------------------------ +; VOID * +; InternalMemSetMem ( +; IN VOID *Buffer, +; IN UINTN Count, +; IN UINT8 Value +; ) +;------------------------------------------------------------------------------ +global ASM_PFX(InternalMemSetMem) +ASM_PFX(InternalMemSetMem): + push rdi + mov rdi, rcx ; rdi <- Buffer + mov al, r8b ; al <- Value + mov r9, rdi ; r9 <- Buffer as return value + xor rcx, rcx + sub rcx, rdi + and rcx, 15 ; rcx + rdi aligns on 16-byte boundary + jz .0 + cmp rcx, rdx + cmova rcx, rdx + sub rdx, rcx + rep stosb +.0: + mov rcx, rdx + and rdx, 15 + shr rcx, 4 + jz @SetBytes + mov ah, al ; ax <- Value repeats twice + movdqa [rsp + 0x10], xmm0 ; save xmm0 + movd xmm0, eax ; xmm0[0..16] <- Value repeats twice + pshuflw xmm0, xmm0, 0 ; xmm0[0..63] <- Value repeats 8 times + movlhps xmm0, xmm0 ; xmm0 <- Value repeats 16 times +.1: + movntdq [rdi], xmm0 ; rdi should be 16-byte aligned + add rdi, 16 + loop .1 + mfence + movdqa xmm0, [rsp + 0x10] ; restore xmm0 +@SetBytes: + mov ecx, edx ; high 32 bits of rcx are always zero + rep stosb + mov rax, r9 ; rax <- Return value + pop rdi + ret +