Add some ldm/vldm optimized CopyMem routines. Add performance macros to BDS

git-svn-id: https://edk2.svn.sourceforge.net/svnroot/edk2/trunk/edk2@10388 6f19259b-4bc3-4df7-8a09-765794883524
This commit is contained in:
andrewfish
2010-04-21 22:04:35 +00:00
parent d02b28d736
commit d39eb83cc5
47 changed files with 3962 additions and 2 deletions

View File

@@ -0,0 +1,112 @@
#------------------------------------------------------------------------------
#
# CopyMem() worker for ARM
#
# This file started out as C code that did 64 bit moves if the buffer was
# 32-bit aligned, else it does a byte copy. It also does a byte copy for
# any trailing bytes. It was updated to do 32-byte copies using stm/ldm.
#
# Copyright (c) 2008-2010 Apple Inc. All rights reserved.<BR>
# All rights reserved. This program and the accompanying materials
# are licensed and made available under the terms and conditions of the BSD License
# which accompanies this distribution. The full text of the license may be found at
# http://opensource.org/licenses/bsd-license.php
#
# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
#
#------------------------------------------------------------------------------
/**
Copy Length bytes from Source to Destination. Overlap is OK.
This implementation
@param Destination Target of copy
@param Source Place to copy from
@param Length Number of bytes to copy
@return Destination
VOID *
EFIAPI
InternalMemCopyMem (
OUT VOID *DestinationBuffer,
IN CONST VOID *SourceBuffer,
IN UINTN Length
)
**/
.text
.align 2
.globl ASM_PFX(InternalMemCopyMem)
ASM_PFX(InternalMemCopyMem):
stmfd sp!, {r4-r11, lr}
tst r0, #3
mov r11, r0
mov r10, r0
mov ip, r2
mov lr, r1
movne r0, #0
bne L4
tst r1, #3
movne r3, #0
moveq r3, #1
cmp r2, #31
movls r0, #0
andhi r0, r3, #1
L4:
cmp r11, r1
bcc L26
bls L7
rsb r3, r1, r11
cmp ip, r3
bcc L26
cmp ip, #0
beq L7
add r10, r11, ip
add lr, ip, r1
b L16
L29:
sub ip, ip, #8
cmp ip, #7
ldrd r2, [lr, #-8]!
movls r0, #0
cmp ip, #0
strd r2, [r10, #-8]!
beq L7
L16:
cmp r0, #0
bne L29
sub r3, lr, #1
sub ip, ip, #1
ldrb r3, [r3, #0]
sub r2, r10, #1
cmp ip, #0
sub r10, r10, #1
sub lr, lr, #1
strb r3, [r2, #0]
bne L16
b L7
L11:
ldrb r3, [lr], #1
sub ip, ip, #1
strb r3, [r10], #1
L26:
cmp ip, #0
beq L7
L30:
cmp r0, #0
beq L11
sub ip, ip, #32
cmp ip, #31
ldmia lr!, {r2-r9}
movls r0, #0
cmp ip, #0
stmia r10!, {r2-r9}
bne L30
L7:
mov r0, r11
ldmfd sp!, {r4-r11, pc}

View File

@@ -0,0 +1,114 @@
;------------------------------------------------------------------------------
;
; CopyMem() worker for ARM
;
; This file started out as C code that did 64 bit moves if the buffer was
; 32-bit aligned, else it does a byte copy. It also does a byte copy for
; any trailing bytes. It was updated to do 32-byte copies using stm/ldm.
;
; Copyright (c) 2008-2010 Apple Inc. All rights reserved.<BR>
; All rights reserved. This program and the accompanying materials
; are licensed and made available under the terms and conditions of the BSD License
; which accompanies this distribution. The full text of the license may be found at
; http://opensource.org/licenses/bsd-license.php
;
; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
;
;------------------------------------------------------------------------------
/**
Copy Length bytes from Source to Destination. Overlap is OK.
This implementation
@param Destination Target of copy
@param Source Place to copy from
@param Length Number of bytes to copy
@return Destination
VOID *
EFIAPI
InternalMemCopyMem (
OUT VOID *DestinationBuffer,
IN CONST VOID *SourceBuffer,
IN UINTN Length
)
**/
EXPORT InternalMemCopyMem
AREA AsmMemStuff, CODE, READONLY
InternalMemCopyMem
stmfd sp!, {r4-r11, lr}
tst r0, #3
mov r11, r0
mov r10, r0
mov ip, r2
mov lr, r1
movne r0, #0
bne L4
tst r1, #3
movne r3, #0
moveq r3, #1
cmp r2, #31
movls r0, #0
andhi r0, r3, #1
L4
cmp r11, r1
bcc L26
bls L7
rsb r3, r1, r11
cmp ip, r3
bcc L26
cmp ip, #0
beq L7
add r10, r11, ip
add lr, ip, r1
b L16
L29
sub ip, ip, #8
cmp ip, #7
ldrd r2, [lr, #-8]!
movls r0, #0
cmp ip, #0
strd r2, [r10, #-8]!
beq L7
L16
cmp r0, #0
bne L29
sub r3, lr, #1
sub ip, ip, #1
ldrb r3, [r3, #0]
sub r2, r10, #1
cmp ip, #0
sub r10, r10, #1
sub lr, lr, #1
strb r3, [r2, #0]
bne L16
b L7
L11
ldrb r3, [lr], #1
sub ip, ip, #1
strb r3, [r10], #1
L26
cmp ip, #0
beq L7
L30
cmp r0, #0
beq L11
sub ip, ip, #32
cmp ip, #31
ldmia lr!, {r2-r9}
movls r0, #0
cmp ip, #0
stmia r10!, {r2-r9}
bne L30
L7
mov r0, r11
ldmfd sp!, {r4-r11, pc}
END

View File

@@ -0,0 +1,82 @@
#------------------------------------------------------------------------------
#
# SemMem() worker for ARM
#
# This file started out as C code that did 64 bit moves if the buffer was
# 32-bit aligned, else it does a byte copy. It also does a byte copy for
# any trailing bytes. It was updated to do 32-byte at a time.
#
# Copyright (c) 2008-2010 Apple Inc. All rights reserved.<BR>
# All rights reserved. This program and the accompanying materials
# are licensed and made available under the terms and conditions of the BSD License
# which accompanies this distribution. The full text of the license may be found at
# http://opensource.org/licenses/bsd-license.php
#
# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
#
#------------------------------------------------------------------------------
/**
Set Buffer to Value for Size bytes.
@param Buffer Memory to set.
@param Length Number of bytes to set
@param Value Value of the set operation.
@return Buffer
VOID *
EFIAPI
InternalMemSetMem (
OUT VOID *Buffer,
IN UINTN Length,
IN UINT8 Value
)
**/
.text
.align 2
.globl ASM_PFX(InternalMemSetMem)
ASM_PFX(InternalMemSetMem):
stmfd sp!, {r4-r11, lr}
tst r0, #3
movne r3, #0
moveq r3, #1
cmp r1, #31
movls lr, #0
andhi lr, r3, #1
cmp lr, #0
mov r12, r0
bne L31
L32:
mov r3, #0
b L43
L31:
and r4, r2, #0xff
orr r4, r4, r4, LSL #8
orr r4, r4, r4, LSL #16
mov r5, r4
mov r5, r4
mov r6, r4
mov r7, r4
mov r8, r4
mov r9, r4
mov r10, r4
mov r11, r4
b L32
L34:
cmp lr, #0
streqb r2, [r12], #1
subeq r1, r1, #1
beq L43
sub r1, r1, #32
cmp r1, #31
movls lr, r3
stmia r12!, {r4-r11}
L43:
cmp r1, #0
bne L34
ldmfd sp!, {r4-r11, pc}

View File

@@ -0,0 +1,83 @@
;------------------------------------------------------------------------------
;
; SetMem() worker for ARM
;
; This file started out as C code that did 64 bit moves if the buffer was
; 32-bit aligned, else it does a byte copy. It also does a byte copy for
; any trailing bytes. It was updated to do 32-byte at a time.
;
; Copyright (c) 2008-2010 Apple Inc. All rights reserved.<BR>
; All rights reserved. This program and the accompanying materials
; are licensed and made available under the terms and conditions of the BSD License
; which accompanies this distribution. The full text of the license may be found at
; http://opensource.org/licenses/bsd-license.php
;
; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
;
/**
Set Buffer to Value for Size bytes.
@param Buffer Memory to set.
@param Length Number of bytes to set
@param Value Value of the set operation.
@return Buffer
VOID *
EFIAPI
InternalMemSetMem (
OUT VOID *Buffer,
IN UINTN Length,
IN UINT8 Value
)
**/
EXPORT InternalMemSetMem
AREA AsmMemStuff, CODE, READONLY
InternalMemSetMem
stmfd sp!, {r4-r11, lr}
tst r0, #3
movne r3, #0
moveq r3, #1
cmp r1, #31
movls lr, #0
andhi lr, r3, #1
cmp lr, #0
mov r12, r0
bne L31
L32
mov r3, #0
b L43
L31
and r4, r2, #0xff
orr r4, r4, r4, LSL #8
orr r4, r4, r4, LSL #16
mov r5, r4
mov r5, r4
mov r6, r4
mov r7, r4
mov r8, r4
mov r9, r4
mov r10, r4
mov r11, r4
b L32
L34
cmp lr, #0
streqb r2, [r12], #1
subeq r1, r1, #1
beq L43
sub r1, r1, #32
cmp r1, #31
movls lr, r3
stmia r12!, {r4-r11}
L43
cmp r1, #0
bne L34
ldmfd sp!, {r4-r11, pc}
END