Hao Wu 102b4c7cdd MdePkg BaseMemoryLibSse2: Add SSE2 implementation of API IsZeroBuffer()
Add the implementation of API IsZeroBuffer() via assembly in
BaseMemoryLibSse2.

The assembly codes use SSE2 XMM registers and related instructions.

Cc: Michael D Kinney <michael.d.kinney@intel.com>
Cc: Liming Gao <liming.gao@intel.com>
Cc: Jiewen Yao <jiewen.yao@intel.com>
Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Hao Wu <hao.a.wu@intel.com>
Reviewed-by: Liming Gao <liming.gao@intel.com>
2016-08-22 18:54:31 +08:00

77 lines
2.7 KiB
NASM

;------------------------------------------------------------------------------
;
; Copyright (c) 2016, Intel Corporation. All rights reserved.<BR>
; This program and the accompanying materials
; are licensed and made available under the terms and conditions of the BSD License
; which accompanies this distribution. The full text of the license may be found at
; http://opensource.org/licenses/bsd-license.php.
;
; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
;
; Module Name:
;
; IsZeroBuffer.nasm
;
; Abstract:
;
; IsZeroBuffer function
;
; Notes:
;
;------------------------------------------------------------------------------
DEFAULT REL
SECTION .text
;------------------------------------------------------------------------------
; BOOLEAN
; EFIAPI
; InternalMemIsZeroBuffer (
; IN CONST VOID *Buffer,
; IN UINTN Length
; );
;------------------------------------------------------------------------------
global ASM_PFX(InternalMemIsZeroBuffer)
ASM_PFX(InternalMemIsZeroBuffer):
push rdi
mov rdi, rcx ; rdi <- Buffer
xor rcx, rcx ; rcx <- 0
sub rcx, rdi
and rcx, 15 ; rcx + rdi aligns on 16-byte boundary
jz @Is16BytesZero
cmp rcx, rdx ; Length already in rdx
cmova rcx, rdx ; bytes before the 16-byte boundary
sub rdx, rcx
xor rax, rax ; rax <- 0, also set ZF
repe scasb
jnz @ReturnFalse ; ZF=0 means non-zero element found
@Is16BytesZero:
mov rcx, rdx
and rdx, 15
shr rcx, 4
jz @IsBytesZero
.0:
pxor xmm0, xmm0 ; xmm0 <- 0
pcmpeqb xmm0, [rdi] ; check zero for 16 bytes
pmovmskb eax, xmm0 ; eax <- compare results
; nasm doesn't support 64-bit destination
; for pmovmskb
cmp eax, 0xffff
jnz @ReturnFalse
add rdi, 16
loop .0
@IsBytesZero:
mov rcx, rdx
xor rax, rax ; rax <- 0, also set ZF
repe scasb
jnz @ReturnFalse ; ZF=0 means non-zero element found
pop rdi
mov rax, 1 ; return TRUE
ret
@ReturnFalse:
pop rdi
xor rax, rax
ret ; return FALSE