MdePkg BaseMemoryLibSse2: Add SSE2 implementation of API IsZeroBuffer()
Add the implementation of API IsZeroBuffer() via assembly in BaseMemoryLibSse2. The assembly codes use SSE2 XMM registers and related instructions. Cc: Michael D Kinney <michael.d.kinney@intel.com> Cc: Liming Gao <liming.gao@intel.com> Cc: Jiewen Yao <jiewen.yao@intel.com> Contributed-under: TianoCore Contribution Agreement 1.0 Signed-off-by: Hao Wu <hao.a.wu@intel.com> Reviewed-by: Liming Gao <liming.gao@intel.com>
This commit is contained in:
76
MdePkg/Library/BaseMemoryLibSse2/X64/IsZeroBuffer.nasm
Normal file
76
MdePkg/Library/BaseMemoryLibSse2/X64/IsZeroBuffer.nasm
Normal file
@@ -0,0 +1,76 @@
|
||||
;------------------------------------------------------------------------------
|
||||
;
|
||||
; Copyright (c) 2016, Intel Corporation. All rights reserved.<BR>
|
||||
; This program and the accompanying materials
|
||||
; are licensed and made available under the terms and conditions of the BSD License
|
||||
; which accompanies this distribution. The full text of the license may be found at
|
||||
; http://opensource.org/licenses/bsd-license.php.
|
||||
;
|
||||
; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
|
||||
; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
|
||||
;
|
||||
; Module Name:
|
||||
;
|
||||
; IsZeroBuffer.nasm
|
||||
;
|
||||
; Abstract:
|
||||
;
|
||||
; IsZeroBuffer function
|
||||
;
|
||||
; Notes:
|
||||
;
|
||||
;------------------------------------------------------------------------------
|
||||
|
||||
DEFAULT REL
|
||||
SECTION .text
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; BOOLEAN
|
||||
; EFIAPI
|
||||
; InternalMemIsZeroBuffer (
|
||||
; IN CONST VOID *Buffer,
|
||||
; IN UINTN Length
|
||||
; );
|
||||
;------------------------------------------------------------------------------
|
||||
global ASM_PFX(InternalMemIsZeroBuffer)
|
||||
ASM_PFX(InternalMemIsZeroBuffer):
|
||||
push rdi
|
||||
mov rdi, rcx ; rdi <- Buffer
|
||||
xor rcx, rcx ; rcx <- 0
|
||||
sub rcx, rdi
|
||||
and rcx, 15 ; rcx + rdi aligns on 16-byte boundary
|
||||
jz @Is16BytesZero
|
||||
cmp rcx, rdx ; Length already in rdx
|
||||
cmova rcx, rdx ; bytes before the 16-byte boundary
|
||||
sub rdx, rcx
|
||||
xor rax, rax ; rax <- 0, also set ZF
|
||||
repe scasb
|
||||
jnz @ReturnFalse ; ZF=0 means non-zero element found
|
||||
@Is16BytesZero:
|
||||
mov rcx, rdx
|
||||
and rdx, 15
|
||||
shr rcx, 4
|
||||
jz @IsBytesZero
|
||||
.0:
|
||||
pxor xmm0, xmm0 ; xmm0 <- 0
|
||||
pcmpeqb xmm0, [rdi] ; check zero for 16 bytes
|
||||
pmovmskb eax, xmm0 ; eax <- compare results
|
||||
; nasm doesn't support 64-bit destination
|
||||
; for pmovmskb
|
||||
cmp eax, 0xffff
|
||||
jnz @ReturnFalse
|
||||
add rdi, 16
|
||||
loop .0
|
||||
@IsBytesZero:
|
||||
mov rcx, rdx
|
||||
xor rax, rax ; rax <- 0, also set ZF
|
||||
repe scasb
|
||||
jnz @ReturnFalse ; ZF=0 means non-zero element found
|
||||
pop rdi
|
||||
mov rax, 1 ; return TRUE
|
||||
ret
|
||||
@ReturnFalse:
|
||||
pop rdi
|
||||
xor rax, rax
|
||||
ret ; return FALSE
|
||||
|
Reference in New Issue
Block a user