Add a versin of memset() optimized for speed

git-svn-id: http://svn.code.sf.net/p/nuttx/code/trunk@5242 42af7a65-404d-4744-a932-0658087f49c3
This commit is contained in:
patacongo 2012-10-21 00:41:44 +00:00
parent 4de5307aa3
commit f16ae329fd
6 changed files with 134 additions and 23 deletions

View File

@ -3490,4 +3490,7 @@
the ARMv7-M family contributed by Mike Smith.
* lib/strings/lib_vikmemcpy.c: As an option, the larger but faster
implemementation of memcpy from Daniel Vik is now available (this is
from http://www.danielvik.com/2010/02/fast-memcpy-in-c.html).
from http://www.danielvik.com/2010/02/fast-memcpy-in-c.html).
* lib/strings/lib_memset.c: CONFIG_MEMSET_OPTSPEED will select a
version of memset() optimized for speed. By default, memset() is
optimized for size.

View File

@ -8,7 +8,7 @@
<tr align="center" bgcolor="#e4e4e4">
<td>
<h1><big><font color="#3c34ec"><i>NuttShell (NSH)</i></font></big></h1>
<p>Last Updated: August 28, 2012</p>
<p>Last Updated: October 20, 2012</p>
</td>
</tr>
</table>

View File

@ -12,7 +12,7 @@
<h1><big><font color="#3c34ec">
<i>NuttX RTOS Porting Guide</i>
</font></big></h1>
<p>Last Updated: August 28, 2012</p>
<p>Last Updated: October 20, 2012</p>
</td>
</tr>
</table>
@ -4449,12 +4449,12 @@ build
If <code>CONFIG_ARCH_MEMCPY</code> is <b>not</b> selected, then you make also select Daniel
Vik's optimized implementation of <code>memcpy()</code>:
</p>
<ul><li>
<code>CONFIG_MEMCPY_VIK</code>:
Select this option to use the optimized <code>memcpy()</code> function by Daniel Vik.
See licensing information in the top-level <code>COPYING</code> file.
Default: <code>n</code>.
</li></ul>
<ul><li>
<code>CONFIG_MEMCPY_VIK</code>:
Select this option to use the optimized <code>memcpy()</code> function by Daniel Vik.
See licensing information in the top-level <code>COPYING</code> file.
Default: <code>n</code>.
</li></ul>
<p>
And if <code>CONFIG_MEMCPY_VIK</code>, the following tuning options are available:
@ -4474,6 +4474,15 @@ build
Compiles memcpy for 64 bit architectures
</li></ul>
<p><li>
If <code>CONFIG_ARCH_MEMSET</code> is <b>not</b> selected, then the following option is also available:
</p>
<ul><li>
<code>CONFIG_MEMSET_OPTSPEED</code>:
Select this option to use a version of <code>memset()</code> optimized for speed.
Default: <code>memset()</code> is optimized for size.
</li></ul>
<li>
<p>
The architecture may provide custom versions of certain standard header files:

View File

@ -638,6 +638,12 @@ defconfig -- This is a configuration file similar to the Linux
CONFIG_MEMCPY_64BIT - Compiles memcpy for 64 bit architectures
If CONFIG_ARCH_MEMSET is not selected, then the following option is
also available:
CONFIG_MEMSET_OPTSPEED - Select this option to use a version of memcpy()
optimized for speed. Default: memcpy() is optimized for size.
The architecture may provide custom versions of certain standard header
files:

View File

@ -153,19 +153,20 @@ config ARCH_OPTIMIZED_FUNCTIONS
if ARCH_OPTIMIZED_FUNCTIONS
config ARCH_MEMCPY
bool "memcpy"
bool "memcpy()"
default n
---help---
Select this option if the architecture provides an optimized version
of memcpy().
config MEMCPY_VIK
bool "Vik memcpy"
bool "Vik memcpy()"
default n
depends on !ARCH_MEMCPY
---help---
Select this option to use the optimized memcpy() function by Daniel Vik.
See licensing information in the top-level COPYING file.
Select this option to use the optimized memcpy() function by Daniel Vik.
Select this option to option for speed at the expense of increased size.
See licensing information in the top-level COPYING file.
if MEMCPY_VIK
config MEMCPY_PRE_INC_PTRS
@ -182,50 +183,58 @@ config MEMCPY_INDEXED_COPY
MEMCPY_PRE_INC_PTRS option.
config MEMCPY_64BIT
bool "64-bit memcpy"
bool "64-bit memcpy()"
default n
---help---
Compiles memcpy for 64 bit architectures
Compiles memcpy() for 64 bit architectures
endif
config ARCH_MEMCMP
bool "memcmp"
bool "memcmp()"
default n
---help---
Select this option if the architecture provides an optimized version
of memcmp().
config ARCH_MEMMOVE
bool "memmove"
bool "memmove()"
default n
---help---
Select this option if the architecture provides an optimized version
of memmove().
config ARCH_MEMSET
bool "memset"
bool "memset()"
default n
---help---
Select this option if the architecture provides an optimized version
of memset().
config MEMSET_OPTSPEED
bool "Optimize memset() for speed"
default n
depends on !ARCH_MEMSET
---help---
Select this option to use a version of memcpy() optimized for speed.
Default: memcpy() is optimized for size.
config ARCH_STRCMP
bool "strcmp"
bool "strcmp()"
default n
---help---
Select this option if the architecture provides an optimized version
of strcmp().
config ARCH_STRCPY
bool "strcpy"
bool "strcpy()"
default n
---help---
Select this option if the architecture provides an optimized version
of strcpy().
config ARCH_STRNCPY
bool "strncpy"
bool "strncpy()"
default n
---help---
Select this option if the architecture provides an optimized version
@ -239,14 +248,14 @@ config ARCH_STRLEN
of strlen().
config ARCH_STRNLEN
bool "strlen"
bool "strlen()"
default n
---help---
Select this option if the architecture provides an optimized version
of strnlen().
config ARCH_BZERO
bool "bzero"
bool "bzero()"
default n
---help---
Select this option if the architecture provides an optimized version

View File

@ -42,8 +42,12 @@
************************************************************/
#include <nuttx/config.h>
#include <sys/types.h>
#include <stdint.h>
#include <string.h>
#include <assert.h>
/************************************************************
* Global Functions
@ -52,8 +56,88 @@
#ifndef CONFIG_ARCH_MEMSET
void *memset(void *s, int c, size_t n)
{
#ifdef CONFIG_MEMSET_OPTSPEED
/* This version is optimized for speed (you could do better
* still by exploiting processor caching or memory burst
* knowledge. 64-bit support might improve performance as
* well.
*/
uintptr_t addr = (uintptr_t)s;
uint16_t val16 = ((uint16_t)c << 8) | (uint16_t)c;
uint32_t val32 = ((uint32_t)val16 << 16) | (uint32_t)val16;
/* Make sure that there is something to be cleared */
if (n > 0)
{
/* Align to a 16-bit boundary */
if ((addr & 1) != 0)
{
*(uint8_t*)addr = (uint8_t)c;
addr += 1;
n -= 1;
}
/* Check if there are at least 16-bits left to be zeroed */
if (n >= 2)
{
/* Align to a 32-bit boundary (we know that the destination
* address is already aligned to at least a 16-bit boundary).
*/
if ((addr & 3) != 0)
{
*(uint16_t*)addr = val16;
addr += 2;
n -= 2;
}
/* Loop while there are at least 32-bits left to be zeroed */
while (n >= 4)
{
*(uint32_t*)addr = val32;
addr += 4;
n -= 4;
}
}
/* We may get here under the following conditions:
*
* n = 0, addr may or may not be aligned
* n = 1, addr may or may not be aligned
* n = 2, addr is aligned to a 32-bit boundary
* n = 3, addr is aligned to a 32-bit boundary
*/
switch (n)
{
default:
case 0:
DEBUGASSERT(n == 0);
break;
case 2:
*(uint16_t*)addr = val16;
break;
case 3:
*(uint16_t*)addr = val16;
addr += 2;
case 1:
*(uint8_t*)addr = (uint8_t)c;
break;
}
}
#else
/* This version is optimized for size */
unsigned char *p = (unsigned char*)s;
while (n-- > 0) *p++ = c;
#endif
return s;
}
#endif