arm: Update mem* functions to newer versions

The memcpy/memset/memmove assembly implementations have been taken from
U-Boot, which originally got them from Linux. I turns out that they are
actually not that bad, but they could use an update. This patch pulls in
the current Linux upstream versions of those files, removing some old
U-Boot cruft such as checking whether the two pointers in a memcpy() are
equal (really now?) or side-stepping the R8 register because it was used
for special purposes. It also returns to the good old Linux
ENTRY/ENDPROC macros since we have them now anyway, and straightens out
the W() macro in preparation for unified thumb support.

Change-Id: I138af269b423bef0a237759ac29f1ee58ca206a0
Signed-off-by: Julius Werner <jwerner@chromium.org>
Reviewed-on: https://chromium-review.googlesource.com/182179
Reviewed-by: Vincent Palatin <vpalatin@chromium.org>
(cherry picked from commit 777127997bde5785b21d422d0b6eb04c4328b478)
Signed-off-by: Isaac Christensen <isaac.christensen@se-eng.com>
Reviewed-on: http://review.coreboot.org/6918
Tested-by: build bot (Jenkins)
Reviewed-by: David Hendricks <dhendrix@chromium.org>
This commit is contained in:
Julius Werner 2013-12-13 12:59:57 -08:00 committed by Isaac Christensen
parent 64b9ca9d4e
commit d65e214d66
11 changed files with 366 additions and 152 deletions

View File

@ -33,7 +33,7 @@ head.o-y += head.S
libc-y += main.c sysinfo.c libc-y += main.c sysinfo.c
libc-y += timer.c coreboot.c util.S libc-y += timer.c coreboot.c util.S
libc-y += virtual.c libc-y += virtual.c
libc-y += memcpy.S memset.S libc-y += memcpy.S memset.S memmove.S
libc-y += exception_asm.S exception.c libc-y += exception_asm.S exception.c
libc-y += cache.c libc-y += cache.c
libcbfs-$(CONFIG_LP_CBFS) += dummy_media.c libcbfs-$(CONFIG_LP_CBFS) += dummy_media.c

View File

@ -1,5 +1,7 @@
/* /*
* arch/arm/include/asm/assembler.h * arch/arm/asmlib.h
*
* Adapted from Linux arch/arm/include/assembler.h
* *
* Copyright (C) 1996-2000 Russell King * Copyright (C) 1996-2000 Russell King
* *
@ -14,6 +16,16 @@
* assembler source. * assembler source.
*/ */
/*
* WARNING: This file is *only* meant for memcpy.S and friends which were copied
* from Linux and require some weird macros. It does unspeakable things like
* redefining "push", so do *not* try to turn it into a general assembly macro
* file, and keep it out of global include directories.
*/
#ifndef __ARM_ASMLIB_H__
#define __ARM_ASMLIB_H__
/* /*
* Endian independent macros for shifting bytes within registers. * Endian independent macros for shifting bytes within registers.
*/ */
@ -44,17 +56,17 @@
/* /*
* Data preload for architectures that support it * Data preload for architectures that support it
*/ */
#if defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) || \ #if 1 /* TODO: differentiate once libpayload supports more ARM versions */
defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
defined(__ARM_ARCH_6T2__) || defined(__ARM_ARCH_6Z__) || \
defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_7A__) || \
defined(__ARM_ARCH_7R__)
#define PLD(code...) code #define PLD(code...) code
#else #else
#define PLD(code...) #define PLD(code...)
#endif #endif
/* /*
* Cache alligned * This can be used to enable code to cacheline align the destination
* pointer when bulk writing to memory. Linux doesn't enable this except
* for the "Feroceon" processor, so we better just leave it out.
*/ */
#define CALGN(code...) code #define CALGN(code...)
#endif /* __ARM_ASMLIB_H */

View File

@ -10,9 +10,8 @@
* published by the Free Software Foundation. * published by the Free Software Foundation.
*/ */
#include "assembler.h" #include <arch/asm.h>
#include "asmlib.h"
#define W(instr) instr
#define LDR1W_SHIFT 0 #define LDR1W_SHIFT 0
#define STR1W_SHIFT 0 #define STR1W_SHIFT 0
@ -57,12 +56,7 @@
/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
.type memcpy, function ENTRY(memcpy)
.globl memcpy
memcpy:
cmp r0, r1
moveq pc, lr
enter r4, lr enter r4, lr
@ -242,3 +236,4 @@ memcpy:
17: forward_copy_shift pull=16 push=16 17: forward_copy_shift pull=16 push=16
18: forward_copy_shift pull=24 push=8 18: forward_copy_shift pull=24 push=8
ENDPROC(memcpy)

View File

@ -0,0 +1,199 @@
/*
* linux/arch/arm/lib/memmove.S
*
* Author: Nicolas Pitre
* Created: Sep 28, 2005
* Copyright: (C) MontaVista Software Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <arch/asm.h>
#include "asmlib.h"
.text
/*
* Prototype: void *memmove(void *dest, const void *src, size_t n);
*
* Note:
*
* If the memory regions don't overlap, we simply branch to memcpy which is
* normally a bit faster. Otherwise the copy is done going downwards. This
* is a transposition of the code from copy_template.S but with the copy
* occurring in the opposite direction.
*/
ENTRY(memmove)
subs ip, r0, r1
cmphi r2, ip
bls memcpy
stmfd sp!, {r0, r4, lr}
add r1, r1, r2
add r0, r0, r2
subs r2, r2, #4
blt 8f
ands ip, r0, #3
PLD( pld [r1, #-4] )
bne 9f
ands ip, r1, #3
bne 10f
1: subs r2, r2, #(28)
stmfd sp!, {r5 - r8}
blt 5f
CALGN( ands ip, r0, #31 )
CALGN( sbcnes r4, ip, r2 ) @ C is always set here
CALGN( bcs 2f )
CALGN( adr r4, 6f )
CALGN( subs r2, r2, ip ) @ C is set here
CALGN( rsb ip, ip, #32 )
CALGN( add pc, r4, ip )
PLD( pld [r1, #-4] )
2: PLD( subs r2, r2, #96 )
PLD( pld [r1, #-32] )
PLD( blt 4f )
PLD( pld [r1, #-64] )
PLD( pld [r1, #-96] )
3: PLD( pld [r1, #-128] )
4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
subs r2, r2, #32
stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
bge 3b
PLD( cmn r2, #96 )
PLD( bge 4b )
5: ands ip, r2, #28
rsb ip, ip, #32
addne pc, pc, ip @ C is always clear here
b 7f
6: W(nop)
W(ldr) r3, [r1, #-4]!
W(ldr) r4, [r1, #-4]!
W(ldr) r5, [r1, #-4]!
W(ldr) r6, [r1, #-4]!
W(ldr) r7, [r1, #-4]!
W(ldr) r8, [r1, #-4]!
W(ldr) lr, [r1, #-4]!
add pc, pc, ip
nop
W(nop)
W(str) r3, [r0, #-4]!
W(str) r4, [r0, #-4]!
W(str) r5, [r0, #-4]!
W(str) r6, [r0, #-4]!
W(str) r7, [r0, #-4]!
W(str) r8, [r0, #-4]!
W(str) lr, [r0, #-4]!
CALGN( bcs 2b )
7: ldmfd sp!, {r5 - r8}
8: movs r2, r2, lsl #31
ldrneb r3, [r1, #-1]!
ldrcsb r4, [r1, #-1]!
ldrcsb ip, [r1, #-1]
strneb r3, [r0, #-1]!
strcsb r4, [r0, #-1]!
strcsb ip, [r0, #-1]
ldmfd sp!, {r0, r4, pc}
9: cmp ip, #2
ldrgtb r3, [r1, #-1]!
ldrgeb r4, [r1, #-1]!
ldrb lr, [r1, #-1]!
strgtb r3, [r0, #-1]!
strgeb r4, [r0, #-1]!
subs r2, r2, ip
strb lr, [r0, #-1]!
blt 8b
ands ip, r1, #3
beq 1b
10: bic r1, r1, #3
cmp ip, #2
ldr r3, [r1, #0]
beq 17f
blt 18f
.macro backward_copy_shift push pull
subs r2, r2, #28
blt 14f
CALGN( ands ip, r0, #31 )
CALGN( sbcnes r4, ip, r2 ) @ C is always set here
CALGN( subcc r2, r2, ip )
CALGN( bcc 15f )
11: stmfd sp!, {r5 - r9}
PLD( pld [r1, #-4] )
PLD( subs r2, r2, #96 )
PLD( pld [r1, #-32] )
PLD( blt 13f )
PLD( pld [r1, #-64] )
PLD( pld [r1, #-96] )
12: PLD( pld [r1, #-128] )
13: ldmdb r1!, {r7, r8, r9, ip}
mov lr, r3, push #\push
subs r2, r2, #32
ldmdb r1!, {r3, r4, r5, r6}
orr lr, lr, ip, pull #\pull
mov ip, ip, push #\push
orr ip, ip, r9, pull #\pull
mov r9, r9, push #\push
orr r9, r9, r8, pull #\pull
mov r8, r8, push #\push
orr r8, r8, r7, pull #\pull
mov r7, r7, push #\push
orr r7, r7, r6, pull #\pull
mov r6, r6, push #\push
orr r6, r6, r5, pull #\pull
mov r5, r5, push #\push
orr r5, r5, r4, pull #\pull
mov r4, r4, push #\push
orr r4, r4, r3, pull #\pull
stmdb r0!, {r4 - r9, ip, lr}
bge 12b
PLD( cmn r2, #96 )
PLD( bge 13b )
ldmfd sp!, {r5 - r9}
14: ands ip, r2, #28
beq 16f
15: mov lr, r3, push #\push
ldr r3, [r1, #-4]!
subs ip, ip, #4
orr lr, lr, r3, pull #\pull
str lr, [r0, #-4]!
bgt 15b
CALGN( cmp r2, #0 )
CALGN( bge 11b )
16: add r1, r1, #(\pull / 8)
b 8b
.endm
backward_copy_shift push=8 pull=24
17: backward_copy_shift push=16 pull=16
18: backward_copy_shift push=24 pull=8
ENDPROC(memmove)

View File

@ -9,33 +9,21 @@
* *
* ASM optimised string functions * ASM optimised string functions
*/ */
#include "assembler.h"
#include <arch/asm.h>
#include "asmlib.h"
.text .text
.align 5 .align 5
.word 0
1: subs r2, r2, #4 @ 1 do we have enough ENTRY(memset)
blt 5f @ 1 bytes to align with?
cmp r3, #2 @ 1
strltb r1, [r0], #1 @ 1
strleb r1, [r0], #1 @ 1
strb r1, [r0], #1 @ 1
add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
/*
* The pointer is now aligned and the length is adjusted. Try doing the
* memset again.
*/
.type memset, function
.globl memset
memset:
ands r3, r0, #3 @ 1 unaligned? ands r3, r0, #3 @ 1 unaligned?
bne 1b @ 1 mov ip, r0 @ preserve r0 as return value
bne 6f @ 1
/* /*
* we know that the pointer in r0 is aligned to a word boundary. * we know that the pointer in ip is aligned to a word boundary.
*/ */
orr r1, r1, r1, lsl #8 1: orr r1, r1, r1, lsl #8
orr r1, r1, r1, lsl #16 orr r1, r1, r1, lsl #16
mov r3, r1 mov r3, r1
cmp r2, #16 cmp r2, #16
@ -44,29 +32,28 @@ memset:
#if ! CALGN(1)+0 #if ! CALGN(1)+0
/* /*
* We need an extra register for this loop - save the return address and * We need 2 extra registers for this loop - use r8 and the LR
* use the LR
*/ */
str lr, [sp, #-4]! stmfd sp!, {r8, lr}
mov ip, r1 mov r8, r1
mov lr, r1 mov lr, r1
2: subs r2, r2, #64 2: subs r2, r2, #64
stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time. stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time.
stmgeia r0!, {r1, r3, ip, lr} stmgeia ip!, {r1, r3, r8, lr}
stmgeia r0!, {r1, r3, ip, lr} stmgeia ip!, {r1, r3, r8, lr}
stmgeia r0!, {r1, r3, ip, lr} stmgeia ip!, {r1, r3, r8, lr}
bgt 2b bgt 2b
ldmeqfd sp!, {pc} @ Now <64 bytes to go. ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go.
/* /*
* No need to correct the count; we're only testing bits from now on * No need to correct the count; we're only testing bits from now on
*/ */
tst r2, #32 tst r2, #32
stmneia r0!, {r1, r3, ip, lr} stmneia ip!, {r1, r3, r8, lr}
stmneia r0!, {r1, r3, ip, lr} stmneia ip!, {r1, r3, r8, lr}
tst r2, #16 tst r2, #16
stmneia r0!, {r1, r3, ip, lr} stmneia ip!, {r1, r3, r8, lr}
ldr lr, [sp], #4 ldmfd sp!, {r8, lr}
#else #else
@ -75,53 +62,63 @@ memset:
* whole cache lines at once. * whole cache lines at once.
*/ */
stmfd sp!, {r4-r7, lr} stmfd sp!, {r4-r8, lr}
mov r4, r1 mov r4, r1
mov r5, r1 mov r5, r1
mov r6, r1 mov r6, r1
mov r7, r1 mov r7, r1
mov ip, r1 mov r8, r1
mov lr, r1 mov lr, r1
cmp r2, #96 cmp r2, #96
tstgt r0, #31 tstgt ip, #31
ble 3f ble 3f
and ip, r0, #31 and r8, ip, #31
rsb ip, ip, #32 rsb r8, r8, #32
sub r2, r2, ip sub r2, r2, r8
movs ip, ip, lsl #(32 - 4) movs r8, r8, lsl #(32 - 4)
stmcsia r0!, {r4, r5, r6, r7} stmcsia ip!, {r4, r5, r6, r7}
stmmiia r0!, {r4, r5} stmmiia ip!, {r4, r5}
tst ip, #(1 << 30) tst r8, #(1 << 30)
mov ip, r1 mov r8, r1
strne r1, [r0], #4 strne r1, [ip], #4
3: subs r2, r2, #64 3: subs r2, r2, #64
stmgeia r0!, {r1, r3-r7, ip, lr} stmgeia ip!, {r1, r3-r8, lr}
stmgeia r0!, {r1, r3-r7, ip, lr} stmgeia ip!, {r1, r3-r8, lr}
bgt 3b bgt 3b
ldmeqfd sp!, {r4-r7, pc} ldmeqfd sp!, {r4-r8, pc}
tst r2, #32 tst r2, #32
stmneia r0!, {r1, r3-r7, ip, lr} stmneia ip!, {r1, r3-r8, lr}
tst r2, #16 tst r2, #16
stmneia r0!, {r4-r7} stmneia ip!, {r4-r7}
ldmfd sp!, {r4-r7, lr} ldmfd sp!, {r4-r8, lr}
#endif #endif
4: tst r2, #8 4: tst r2, #8
stmneia r0!, {r1, r3} stmneia ip!, {r1, r3}
tst r2, #4 tst r2, #4
strne r1, [r0], #4 strne r1, [ip], #4
/* /*
* When we get here, we've got less than 4 bytes to zero. We * When we get here, we've got less than 4 bytes to zero. We
* may have an unaligned pointer as well. * may have an unaligned pointer as well.
*/ */
5: tst r2, #2 5: tst r2, #2
strneb r1, [r0], #1 strneb r1, [ip], #1
strneb r1, [r0], #1 strneb r1, [ip], #1
tst r2, #1 tst r2, #1
strneb r1, [r0], #1 strneb r1, [ip], #1
mov pc, lr mov pc, lr
6: subs r2, r2, #4 @ 1 do we have enough
blt 5b @ 1 bytes to align with?
cmp r3, #2 @ 1
strltb r1, [ip], #1 @ 1
strleb r1, [ip], #1 @ 1
strb r1, [ip], #1 @ 1
add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
b 1b
ENDPROC(memset)

View File

@ -23,9 +23,11 @@
#if defined __arm__ #if defined __arm__
# define ARM(x...) x # define ARM(x...) x
# define THUMB(x...) # define THUMB(x...)
# define W(instr) instr
#elif defined __thumb__ #elif defined __thumb__
# define ARM(x...) # define ARM(x...)
# define THUMB(x...) x # define THUMB(x...) x
# define W(instr) instr.w
#else #else
# error Not in ARM or thumb mode! # error Not in ARM or thumb mode!
#endif #endif

View File

@ -1,5 +1,7 @@
/* /*
* arch/arm/include/asm/assembler.h * arch/arm/asmlib.h
*
* Adapted from Linux arch/arm/include/assembler.h
* *
* Copyright (C) 1996-2000 Russell King * Copyright (C) 1996-2000 Russell King
* *
@ -14,6 +16,16 @@
* assembler source. * assembler source.
*/ */
/*
* WARNING: This file is *only* meant for memcpy.S and friends which were copied
* from Linux and require some weird macros. It does unspeakable things like
* redefining "push", so do *not* try to turn it into a general assembly macro
* file, and keep it out of global include directories.
*/
#ifndef __ARM_ASMLIB_H__
#define __ARM_ASMLIB_H__
/* /*
* Endian independent macros for shifting bytes within registers. * Endian independent macros for shifting bytes within registers.
*/ */
@ -44,19 +56,17 @@
/* /*
* Data preload for architectures that support it * Data preload for architectures that support it
*/ */
#if defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) || \ #if __COREBOOT_ARM_ARCH__ >= 5
defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
defined(__ARM_ARCH_6T2__) || defined(__ARM_ARCH_6Z__) || \
defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_7A__) || \
defined(__ARM_ARCH_7R__)
#define PLD(code...) code #define PLD(code...) code
#else #else
#define PLD(code...) #define PLD(code...)
#endif #endif
/* /*
* Cache aligned * This can be used to enable code to cacheline align the destination
* pointer when bulk writing to memory. Linux doesn't enable this except
* for the "Feroceon" processor, so we better just leave it out.
*/ */
#define CALGN(code...) code #define CALGN(code...)
#define W(instr) instr #endif /* __ARM_ASMLIB_H */

View File

@ -23,9 +23,14 @@
#if defined __arm__ #if defined __arm__
# define ARM(x...) x # define ARM(x...) x
# define THUMB(x...) # define THUMB(x...)
# define W(instr) instr
#elif defined __thumb__ #elif defined __thumb__
# define ARM(x...) # define ARM(x...)
# define THUMB(x...) x # define THUMB(x...) x
# define W(instr) instr.w
# if __COREBOOT_ARM_ARCH__ < 7
# error thumb mode has not been tested with ARM < v7!
# endif
#else #else
# error Not in ARM or thumb mode! # error Not in ARM or thumb mode!
#endif #endif

View File

@ -10,9 +10,8 @@
* published by the Free Software Foundation. * published by the Free Software Foundation.
*/ */
#include <assembler.h> #include <arch/asm.h>
#include "asmlib.h"
#define W(instr) instr
#define LDR1W_SHIFT 0 #define LDR1W_SHIFT 0
#define STR1W_SHIFT 0 #define STR1W_SHIFT 0
@ -57,12 +56,7 @@
/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
.type memcpy, function ENTRY(memcpy)
.globl memcpy
memcpy:
cmp r0, r1
moveq pc, lr
enter r4, lr enter r4, lr
@ -242,3 +236,4 @@ memcpy:
17: forward_copy_shift pull=16 push=16 17: forward_copy_shift pull=16 push=16
18: forward_copy_shift pull=24 push=8 18: forward_copy_shift pull=24 push=8
ENDPROC(memcpy)

View File

@ -10,7 +10,8 @@
* published by the Free Software Foundation. * published by the Free Software Foundation.
*/ */
#include <assembler.h> #include <arch/asm.h>
#include "asmlib.h"
.text .text
@ -25,9 +26,8 @@
* occurring in the opposite direction. * occurring in the opposite direction.
*/ */
.type memmove, function ENTRY(memmove)
.globl memmove
memmove:
subs ip, r0, r1 subs ip, r0, r1
cmphi r2, ip cmphi r2, ip
bls memcpy bls memcpy
@ -195,3 +195,5 @@ memmove:
17: backward_copy_shift push=16 pull=16 17: backward_copy_shift push=16 pull=16
18: backward_copy_shift push=24 pull=8 18: backward_copy_shift push=24 pull=8
ENDPROC(memmove)

View File

@ -9,33 +9,21 @@
* *
* ASM optimised string functions * ASM optimised string functions
*/ */
#include <assembler.h>
#include <arch/asm.h>
#include "asmlib.h"
.text .text
.align 5 .align 5
.word 0
1: subs r2, r2, #4 @ 1 do we have enough ENTRY(memset)
blt 5f @ 1 bytes to align with?
cmp r3, #2 @ 1
strltb r1, [r0], #1 @ 1
strleb r1, [r0], #1 @ 1
strb r1, [r0], #1 @ 1
add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
/*
* The pointer is now aligned and the length is adjusted. Try doing the
* memset again.
*/
.type memset, function
.globl memset
memset:
ands r3, r0, #3 @ 1 unaligned? ands r3, r0, #3 @ 1 unaligned?
bne 1b @ 1 mov ip, r0 @ preserve r0 as return value
bne 6f @ 1
/* /*
* we know that the pointer in r0 is aligned to a word boundary. * we know that the pointer in ip is aligned to a word boundary.
*/ */
orr r1, r1, r1, lsl #8 1: orr r1, r1, r1, lsl #8
orr r1, r1, r1, lsl #16 orr r1, r1, r1, lsl #16
mov r3, r1 mov r3, r1
cmp r2, #16 cmp r2, #16
@ -44,29 +32,28 @@ memset:
#if ! CALGN(1)+0 #if ! CALGN(1)+0
/* /*
* We need an extra register for this loop - save the return address and * We need 2 extra registers for this loop - use r8 and the LR
* use the LR
*/ */
str lr, [sp, #-4]! stmfd sp!, {r8, lr}
mov ip, r1 mov r8, r1
mov lr, r1 mov lr, r1
2: subs r2, r2, #64 2: subs r2, r2, #64
stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time. stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time.
stmgeia r0!, {r1, r3, ip, lr} stmgeia ip!, {r1, r3, r8, lr}
stmgeia r0!, {r1, r3, ip, lr} stmgeia ip!, {r1, r3, r8, lr}
stmgeia r0!, {r1, r3, ip, lr} stmgeia ip!, {r1, r3, r8, lr}
bgt 2b bgt 2b
ldmeqfd sp!, {pc} @ Now <64 bytes to go. ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go.
/* /*
* No need to correct the count; we're only testing bits from now on * No need to correct the count; we're only testing bits from now on
*/ */
tst r2, #32 tst r2, #32
stmneia r0!, {r1, r3, ip, lr} stmneia ip!, {r1, r3, r8, lr}
stmneia r0!, {r1, r3, ip, lr} stmneia ip!, {r1, r3, r8, lr}
tst r2, #16 tst r2, #16
stmneia r0!, {r1, r3, ip, lr} stmneia ip!, {r1, r3, r8, lr}
ldr lr, [sp], #4 ldmfd sp!, {r8, lr}
#else #else
@ -75,53 +62,63 @@ memset:
* whole cache lines at once. * whole cache lines at once.
*/ */
stmfd sp!, {r4-r7, lr} stmfd sp!, {r4-r8, lr}
mov r4, r1 mov r4, r1
mov r5, r1 mov r5, r1
mov r6, r1 mov r6, r1
mov r7, r1 mov r7, r1
mov ip, r1 mov r8, r1
mov lr, r1 mov lr, r1
cmp r2, #96 cmp r2, #96
tstgt r0, #31 tstgt ip, #31
ble 3f ble 3f
and ip, r0, #31 and r8, ip, #31
rsb ip, ip, #32 rsb r8, r8, #32
sub r2, r2, ip sub r2, r2, r8
movs ip, ip, lsl #(32 - 4) movs r8, r8, lsl #(32 - 4)
stmcsia r0!, {r4, r5, r6, r7} stmcsia ip!, {r4, r5, r6, r7}
stmmiia r0!, {r4, r5} stmmiia ip!, {r4, r5}
tst ip, #(1 << 30) tst r8, #(1 << 30)
mov ip, r1 mov r8, r1
strne r1, [r0], #4 strne r1, [ip], #4
3: subs r2, r2, #64 3: subs r2, r2, #64
stmgeia r0!, {r1, r3-r7, ip, lr} stmgeia ip!, {r1, r3-r8, lr}
stmgeia r0!, {r1, r3-r7, ip, lr} stmgeia ip!, {r1, r3-r8, lr}
bgt 3b bgt 3b
ldmeqfd sp!, {r4-r7, pc} ldmeqfd sp!, {r4-r8, pc}
tst r2, #32 tst r2, #32
stmneia r0!, {r1, r3-r7, ip, lr} stmneia ip!, {r1, r3-r8, lr}
tst r2, #16 tst r2, #16
stmneia r0!, {r4-r7} stmneia ip!, {r4-r7}
ldmfd sp!, {r4-r7, lr} ldmfd sp!, {r4-r8, lr}
#endif #endif
4: tst r2, #8 4: tst r2, #8
stmneia r0!, {r1, r3} stmneia ip!, {r1, r3}
tst r2, #4 tst r2, #4
strne r1, [r0], #4 strne r1, [ip], #4
/* /*
* When we get here, we've got less than 4 bytes to zero. We * When we get here, we've got less than 4 bytes to zero. We
* may have an unaligned pointer as well. * may have an unaligned pointer as well.
*/ */
5: tst r2, #2 5: tst r2, #2
strneb r1, [r0], #1 strneb r1, [ip], #1
strneb r1, [r0], #1 strneb r1, [ip], #1
tst r2, #1 tst r2, #1
strneb r1, [r0], #1 strneb r1, [ip], #1
mov pc, lr mov pc, lr
6: subs r2, r2, #4 @ 1 do we have enough
blt 5b @ 1 bytes to align with?
cmp r3, #2 @ 1
strltb r1, [ip], #1 @ 1
strleb r1, [ip], #1 @ 1
strb r1, [ip], #1 @ 1
add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
b 1b
ENDPROC(memset)