From 80c9bda9005d750ad8c5e7825dbd35f593c58895 Mon Sep 17 00:00:00 2001 From: patacongo Date: Sat, 20 Oct 2012 20:59:44 +0000 Subject: [PATCH] Add Daniel Vik's optimized memcpy as a configuration option git-svn-id: http://svn.code.sf.net/p/nuttx/code/trunk@5240 42af7a65-404d-4744-a932-0658087f49c3 --- nuttx/COPYING | 29 +++ nuttx/ChangeLog | 3 + nuttx/lib/Kconfig | 61 ++++++ nuttx/lib/string/Make.defs | 10 +- nuttx/lib/string/lib_vikmemcpy.c | 348 +++++++++++++++++++++++++++++++ 5 files changed, 450 insertions(+), 1 deletion(-) create mode 100644 nuttx/lib/string/lib_vikmemcpy.c diff --git a/nuttx/COPYING b/nuttx/COPYING index 863b81a2fc..1956fb6c72 100644 --- a/nuttx/COPYING +++ b/nuttx/COPYING @@ -163,6 +163,35 @@ dtoa(): "This product includes software developed by the University of California, Berkeley and its contributors." +lib/string/lib_vikmemcpy.c +^^^^^^^^^^^^^^^^^^^^^^^^^^ + + If you enable CONFIG_MEMCPY_VIK, then you will build with the optimized + version of memcpy from Daniel Vik. Licensing information for that version + of memcpy() follows: + + Copyright (C) 1999-2010 Daniel Vik + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any + damages arising from the use of this software. + Permission is granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you + must not claim that you wrote the original software. If you + use this software in a product, an acknowledgment in the + use this software in a product, an acknowledgment in the + product documentation would be appreciated but is not + required. + + 2. Altered source versions must be plainly marked as such, and + must not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. + Documents/rss.gif ^^^^^^^^^^^^^^^^^ diff --git a/nuttx/ChangeLog b/nuttx/ChangeLog index 47174f3ba9..313d8865f7 100644 --- a/nuttx/ChangeLog +++ b/nuttx/ChangeLog @@ -3488,3 +3488,6 @@ USB_CONFIG_ATT_SELFPOWER (contributed by Petteri Aimonen). * arch/arm/src/armv7-m/up_memcpy.S: An optimized memcpy() function for the ARMv7-M family contributed by Mike Smith. + * lib/strings/lib_vikmemcpy.c: As an option, the larger but faster + implemementation of memcpy from Daniel Vik is now available (this is + from http://www.danielvik.com/2010/02/fast-memcpy-in-c.html). \ No newline at end of file diff --git a/nuttx/lib/Kconfig b/nuttx/lib/Kconfig index 69a55d09cf..b3f743db28 100644 --- a/nuttx/lib/Kconfig +++ b/nuttx/lib/Kconfig @@ -155,40 +155,101 @@ if ARCH_OPTIMIZED_FUNCTIONS config ARCH_MEMCPY bool "memcpy" default n + ---help--- + Select this option if the architecture provides an optimized version + of memcpy(). + +config MEMCPY_VIK + bool "Vik memcpy" + default n + depends on !ARCH_MEMCPY + ---help--- + Select this option to use the optimized memcpy() function by Daniel Vik. + See licensing information in the top-level COPYING file. + +if MEMCPY_VIK +config MEMCPY_PRE_INC_PTRS + bool "Pre-increment pointers" + default n + ---help--- + Use pre-increment of pointers. Default is post increment of pointers. + +config MEMCPY_INDEXED_COPY + bool "Array indexing" + default y + ---help--- + Copying data using array indexing. Using this option, disables the + MEMCPY_PRE_INC_PTRS option. + +config MEMCPY_64BIT + bool "64-bit memcpy" + default n + ---help--- + Compiles memcpy for 64 bit architectures + +endif config ARCH_MEMCMP bool "memcmp" default n + ---help--- + Select this option if the architecture provides an optimized version + of memcmp(). config ARCH_MEMMOVE bool "memmove" default n + ---help--- + Select this option if the architecture provides an optimized version + of memmove(). config ARCH_MEMSET bool "memset" default n + ---help--- + Select this option if the architecture provides an optimized version + of memset(). config ARCH_STRCMP bool "strcmp" default n + ---help--- + Select this option if the architecture provides an optimized version + of strcmp(). config ARCH_STRCPY bool "strcpy" default n + ---help--- + Select this option if the architecture provides an optimized version + of strcpy(). config ARCH_STRNCPY bool "strncpy" default n + ---help--- + Select this option if the architecture provides an optimized version + of strncpy(). config ARCH_STRLEN bool "strlen" default n + ---help--- + Select this option if the architecture provides an optimized version + of strlen(). config ARCH_STRNLEN bool "strlen" default n + ---help--- + Select this option if the architecture provides an optimized version + of strnlen(). config ARCH_BZERO bool "bzero" default n + ---help--- + Select this option if the architecture provides an optimized version + of bzero(). + endif diff --git a/nuttx/lib/string/Make.defs b/nuttx/lib/string/Make.defs index 6b21c7f146..16add6e89e 100644 --- a/nuttx/lib/string/Make.defs +++ b/nuttx/lib/string/Make.defs @@ -36,7 +36,7 @@ # Add the string C files to the build CSRCS += lib_checkbase.c lib_isbasedigit.c lib_memset.c lib_memchr.c \ - lib_memccpy.c lib_memcpy.c lib_memcmp.c lib_memmove.c lib_skipspace.c \ + lib_memccpy.c lib_memcmp.c lib_memmove.c lib_skipspace.c \ lib_strcasecmp.c lib_strcat.c lib_strchr.c lib_strcpy.c lib_strcmp.c \ lib_strcspn.c lib_strdup.c lib_strerror.c lib_strlen.c lib_strnlen.c \ lib_strncasecmp.c lib_strncat.c lib_strncmp.c lib_strncpy.c \ @@ -44,6 +44,14 @@ CSRCS += lib_checkbase.c lib_isbasedigit.c lib_memset.c lib_memchr.c \ lib_strspn.c lib_strstr.c lib_strtok.c lib_strtokr.c lib_strtol.c \ lib_strtoll.c lib_strtoul.c lib_strtoull.c lib_strtod.c +ifneq ($(CONFIG_ARCH_MEMCPY),y) +ifeq ($(CONFIG_MEMCPY_VIK),y) +CSRCS += lib_vikmemcpy.c +else +CSRCS += lib_memccpy.c +endif +endif + # Add the string directory to the build DEPPATH += --dep-path string diff --git a/nuttx/lib/string/lib_vikmemcpy.c b/nuttx/lib/string/lib_vikmemcpy.c new file mode 100644 index 0000000000..b50942aaa1 --- /dev/null +++ b/nuttx/lib/string/lib_vikmemcpy.c @@ -0,0 +1,348 @@ +/**************************************************************************** + * File: lib/string/lib_vikmemcpy.c + * + * This is version of the optimized memcpy by Daniel Vik, adapted to the + * NuttX environment. + * + * Copyright (C) 1999-2010 Daniel Vik + * + * Adaptations include: + * - File name change + * - Use of types defined in stdint.h + * - Integration with the NuttX configuration system + * - Other cosmetic changes for consistency with NuttX coding standards + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any + * damages arising from the use of this software. + * Permission is granted to anyone to use this software for any + * purpose, including commercial applications, and to alter it and + * redistribute it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you + * must not claim that you wrote the original software. If you + * use this software in a product, an acknowledgment in the + * use this software in a product, an acknowledgment in the + * product documentation would be appreciated but is not + * required. + * + * 2. Altered source versions must be plainly marked as such, and + * must not be misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source + * distribution. + * + * Description: Implementation of the standard library function memcpy. + * This implementation of memcpy() is ANSI-C89 compatible. + * + * The following configuration options can be set: + * + * CONFIG_ENDIAN_BIG + * Uses processor with big endian addressing. Default is little endian. + * + * CONFIG_MEMCPY_PRE_INC_PTRS + * Use pre increment of pointers. Default is post increment of pointers. + * + * CONFIG_MEMCPY_INDEXED_COPY + * Copying data using array indexing. Using this option, disables the + * CONFIG_MEMCPY_PRE_INC_PTRS option. + * + * CONFIG_MEMCPY_64BIT - Compiles memcpy for 64 bit architectures + * + ****************************************************************************/ + +/**************************************************************************** + * Configuration definitions. + ****************************************************************************/ + +#define CONFIG_MEMCPY_INDEXED_COPY + +/******************************************************************** + * Included Files + *******************************************************************/ + +#include +#include + +#include +#include +#include + +/******************************************************************** + * Pre-processor Definitions + *******************************************************************/ + +/* Can't support CONFIG_MEMCPY_64BIT if the platform does not have 64-bit + * integer types. + */ + +#ifndef CONFIG_HAVE_LONG_LONG +# undef CONFIG_MEMCPY_64BIT +#endif + +/* Remove definitions when CONFIG_MEMCPY_INDEXED_COPY is defined */ + +#if defined (CONFIG_MEMCPY_INDEXED_COPY) +# if defined (CONFIG_MEMCPY_PRE_INC_PTRS) +# undef CONFIG_MEMCPY_PRE_INC_PTRS +# endif /* CONFIG_MEMCPY_PRE_INC_PTRS */ +#endif /* CONFIG_MEMCPY_INDEXED_COPY */ + +/* Definitions for pre and post increment of pointers */ + +#if defined (CONFIG_MEMCPY_PRE_INC_PTRS) + +# define START_VAL(x) (x)-- +# define INC_VAL(x) *++(x) +# define CAST_TO_U8(p, o) ((uint8_t*)p + o + TYPE_WIDTH) +# define WHILE_DEST_BREAK (TYPE_WIDTH - 1) +# define PRE_LOOP_ADJUST - (TYPE_WIDTH - 1) +# define PRE_SWITCH_ADJUST + 1 + +#else /* CONFIG_MEMCPY_PRE_INC_PTRS */ + +# define START_VAL(x) +# define INC_VAL(x) *(x)++ +# define CAST_TO_U8(p, o) ((uint8_t*)p + o) +# define WHILE_DEST_BREAK 0 +# define PRE_LOOP_ADJUST +# define PRE_SWITCH_ADJUST + +#endif /* CONFIG_MEMCPY_PRE_INC_PTRS */ + +/* Definitions for endian-ness */ + +#ifdef CONFIG_ENDIAN_BIG + +# define SHL << +# define SHR >> + +#else /* CONFIG_ENDIAN_BIG */ + +# define SHL >> +# define SHR << + +#endif /* CONFIG_ENDIAN_BIG */ + +/******************************************************************** + * Macros for copying words of different alignment. + * Uses incremening pointers. + *******************************************************************/ + +#define CP_INCR() \ +{ \ + INC_VAL(dstN) = INC_VAL(srcN); \ +} + +#define CP_INCR_SH(shl, shr) \ +{ \ + dstWord = srcWord SHL shl; \ + srcWord = INC_VAL(srcN); \ + dstWord |= srcWord SHR shr; \ + INC_VAL(dstN) = dstWord; \ +} + +/******************************************************************** + * Macros for copying words of different alignment. + * Uses array indexes. + *******************************************************************/ + +#define CP_INDEX(idx) \ +{ \ + dstN[idx] = srcN[idx]; \ +} + +#define CP_INDEX_SH(x, shl, shr) \ +{ \ + dstWord = srcWord SHL shl; \ + srcWord = srcN[x]; \ + dstWord |= srcWord SHR shr; \ + dstN[x] = dstWord; \ +} + +/******************************************************************** + * Macros for copying words of different alignment. + * Uses incremening pointers or array indexes depending on + * configuration. + *******************************************************************/ + +#if defined (CONFIG_MEMCPY_INDEXED_COPY) + +# define CP(idx) CP_INDEX(idx) +# define CP_SH(idx, shl, shr) CP_INDEX_SH(idx, shl, shr) + +# define INC_INDEX(p, o) ((p) += (o)) + +#else /* CONFIG_MEMCPY_INDEXED_COPY */ + +# define CP(idx) CP_INCR() +# define CP_SH(idx, shl, shr) CP_INCR_SH(shl, shr) + +# define INC_INDEX(p, o) + +#endif /* CONFIG_MEMCPY_INDEXED_COPY */ + +#define COPY_REMAINING(count) \ +{ \ + START_VAL(dst8); \ + START_VAL(src8); \ + \ + switch (count) \ + { \ + case 7: INC_VAL(dst8) = INC_VAL(src8); \ + case 6: INC_VAL(dst8) = INC_VAL(src8); \ + case 5: INC_VAL(dst8) = INC_VAL(src8); \ + case 4: INC_VAL(dst8) = INC_VAL(src8); \ + case 3: INC_VAL(dst8) = INC_VAL(src8); \ + case 2: INC_VAL(dst8) = INC_VAL(src8); \ + case 1: INC_VAL(dst8) = INC_VAL(src8); \ + case 0: \ + default: break; \ + } \ +} + +#define COPY_NO_SHIFT() \ +{ \ + UIntN* dstN = (UIntN*)(dst8 PRE_LOOP_ADJUST); \ + UIntN* srcN = (UIntN*)(src8 PRE_LOOP_ADJUST); \ + size_t length = count / TYPE_WIDTH; \ + \ + while (length & 7) \ + { \ + CP_INCR(); \ + length--; \ + } \ + \ + length /= 8; \ + \ + while (length--) \ + { \ + CP(0); \ + CP(1); \ + CP(2); \ + CP(3); \ + CP(4); \ + CP(5); \ + CP(6); \ + CP(7); \ + \ + INC_INDEX(dstN, 8); \ + INC_INDEX(srcN, 8); \ + } \ + \ + src8 = CAST_TO_U8(srcN, 0); \ + dst8 = CAST_TO_U8(dstN, 0); \ + \ + COPY_REMAINING(count & (TYPE_WIDTH - 1)); \ + \ + return dest; \ +} + +#define COPY_SHIFT(shift) \ +{ \ + UIntN* dstN = (UIntN*)((((UIntN)dst8) PRE_LOOP_ADJUST) & \ + ~(TYPE_WIDTH - 1)); \ + UIntN* srcN = (UIntN*)((((UIntN)src8) PRE_LOOP_ADJUST) & \ + ~(TYPE_WIDTH - 1)); \ + size_t length = count / TYPE_WIDTH; \ + UIntN srcWord = INC_VAL(srcN); \ + UIntN dstWord; \ + \ + while (length & 7) \ + { \ + CP_INCR_SH(8 * shift, 8 * (TYPE_WIDTH - shift)); \ + length--; \ + } \ + \ + length /= 8; \ + \ + while (length--) \ + { \ + CP_SH(0, 8 * shift, 8 * (TYPE_WIDTH - shift)); \ + CP_SH(1, 8 * shift, 8 * (TYPE_WIDTH - shift)); \ + CP_SH(2, 8 * shift, 8 * (TYPE_WIDTH - shift)); \ + CP_SH(3, 8 * shift, 8 * (TYPE_WIDTH - shift)); \ + CP_SH(4, 8 * shift, 8 * (TYPE_WIDTH - shift)); \ + CP_SH(5, 8 * shift, 8 * (TYPE_WIDTH - shift)); \ + CP_SH(6, 8 * shift, 8 * (TYPE_WIDTH - shift)); \ + CP_SH(7, 8 * shift, 8 * (TYPE_WIDTH - shift)); \ + \ + INC_INDEX(dstN, 8); \ + INC_INDEX(srcN, 8); \ + } \ + \ + src8 = CAST_TO_U8(srcN, (shift - TYPE_WIDTH)); \ + dst8 = CAST_TO_U8(dstN, 0); \ + \ + COPY_REMAINING(count & (TYPE_WIDTH - 1)); \ + \ + return dest; \ +} + +/******************************************************************** + * Type Definitions + *******************************************************************/ + +#ifdef CONFIG_MEMCPY_64BIT +typedef uint64_t UIntN; +# define TYPE_WIDTH 8L +#else +typedef uint32_t UIntN; +# define TYPE_WIDTH 4L +#endif + +/******************************************************************** + * Public Functions + *******************************************************************/ +/******************************************************************** + * Name: memcpy + * + * Description: + * Copies count bytes from src to dest. No overlap check is performed. + * + * Input Parameters: + * dest - pointer to destination buffer + * src - pointer to source buffer + * count - number of bytes to copy + * + * Returned Value: + * A pointer to destination buffer + * + *******************************************************************/ + +void *memcpy(void *dest, const void *src, size_t count) +{ + uint8_t *dst8 = (uint8_t*)dest; + uint8_t *src8 = (uint8_t*)src; + + if (count < 8) + { + COPY_REMAINING(count); + return dest; + } + + START_VAL(dst8); + START_VAL(src8); + + while (((UIntN)dst8 & (TYPE_WIDTH - 1)) != WHILE_DEST_BREAK) + { + INC_VAL(dst8) = INC_VAL(src8); + count--; + } + + switch ((((UIntN)src8) PRE_SWITCH_ADJUST) & (TYPE_WIDTH - 1)) + { + case 0: COPY_NO_SHIFT(); break; + case 1: COPY_SHIFT(1); break; + case 2: COPY_SHIFT(2); break; + case 3: COPY_SHIFT(3); break; +#if TYPE_WIDTH > 4 + case 4: COPY_SHIFT(4); break; + case 5: COPY_SHIFT(5); break; + case 6: COPY_SHIFT(6); break; + case 7: COPY_SHIFT(7); break; +#endif + } + + return dest; +}